DefectDojo · valentijnscholten · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
@@ -49,7 +49,6 @@
     do_false_positive_history,
     get_current_user,
     get_object_or_none,
-    mass_model_updater,
     to_str_typed,
 )
 
@@ -578,20 +577,6 @@ def finding_post_delete(sender, instance, **kwargs):
         logger.debug("finding post_delete, sender: %s instance: %s", to_str_typed(sender), to_str_typed(instance))
 
 
-def reset_duplicate_before_delete(dupe):
-    dupe.duplicate_finding = None
-    dupe.duplicate = False
-
-
-def reset_duplicates_before_delete(qs):
-    mass_model_updater(Finding, qs, reset_duplicate_before_delete, fields=["duplicate", "duplicate_finding"])
-
-
-def set_new_original(finding, new_original):
-    if finding.duplicate:
-        finding.duplicate_finding = new_original
-
-
 # can't use model to id here due to the queryset
 # @dojo_async_task
 # @app.task
@@ -617,64 +602,58 @@ def reconfigure_duplicate_cluster(original, cluster_outside):
             new_original.save_no_options()
             new_original.found_by.set(original.found_by.all())
 
-        # if the cluster is size 1, there's only the new original left
+        # Re-point remaining duplicates to the new original in a single query
         if new_original and len(cluster_outside) > 1:
-            # for find in cluster_outside:
-            #     if find != new_original:
-            #         find.duplicate_finding = new_original
-            #         find.save_no_options()
-
-            mass_model_updater(Finding, cluster_outside, lambda f: set_new_original(f, new_original), fields=["duplicate_finding"])
+            cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original)
 
 
 def prepare_duplicates_for_delete(test=None, engagement=None):
     logger.debug("prepare duplicates for delete, test: %s, engagement: %s", test.id if test else None, engagement.id if engagement else None)
     if test is None and engagement is None:
         logger.warning("nothing to prepare as test and engagement are None")
+        return
 
+    # should not be needed in normal healthy instances.
+    # but in that case it's a cheap count query and we might as well run it to be safe
     fix_loop_duplicates()
 
-    # get all originals in the test/engagement
-    originals = Finding.objects.filter(original_finding__isnull=False)
+    # Build scope filter
+    scope_filter = {}
     if engagement:
-        originals = originals.filter(test__engagement=engagement)
+        scope_filter["test__engagement"] = engagement
     if test:
-        originals = originals.filter(test=test)
+        scope_filter["test"] = test
 
-    # use distinct to flatten the join result
-    originals = originals.distinct()
-
-    if len(originals) == 0:
-        logger.debug("no originals found, so no duplicates to prepare for deletion of original")
+    scope_finding_ids = set(
+        Finding.objects.filter(**scope_filter).values_list("id", flat=True),
+    )
+    if not scope_finding_ids:
+        logger.debug("no findings in scope, nothing to prepare")
         return
 
-    # remove the link to the original from the duplicates inside the cluster so they can be safely deleted by the django framework
-    total = len(originals)
-    # logger.debug('originals: %s', [original.id for original in originals])
-    for i, original in enumerate(originals):
-        logger.debug("%d/%d: preparing duplicate cluster for deletion of original: %d", i + 1, total, original.id)
-        cluster_inside = original.original_finding.all()
-        if engagement:
-            cluster_inside = cluster_inside.filter(test__engagement=engagement)
-
-        if test:
-            cluster_inside = cluster_inside.filter(test=test)
-
-        if len(cluster_inside) > 0:
-            reset_duplicates_before_delete(cluster_inside)
-
-        # reconfigure duplicates outside test/engagement
-        cluster_outside = original.original_finding.all()
-        if engagement:
-            cluster_outside = cluster_outside.exclude(test__engagement=engagement)
-
-        if test:
-            cluster_outside = cluster_outside.exclude(test=test)
-
-        if len(cluster_outside) > 0:
-            reconfigure_duplicate_cluster(original, cluster_outside)
-
-        logger.debug("done preparing duplicate cluster for deletion of original: %d", original.id)
+    # Bulk-reset inside-scope duplicates: single UPDATE instead of per-original mass_model_updater.
+    # Clears the duplicate_finding FK so Django's Collector won't trip over dangling references
+    # when deleting findings in this scope.
+    inside_reset_count = Finding.objects.filter(
+        duplicate=True,
+        duplicate_finding_id__in=scope_finding_ids,
+        id__in=scope_finding_ids,
+    ).update(duplicate_finding=None, duplicate=False)
+    logger.debug("bulk-reset %d inside-scope duplicates", inside_reset_count)
+
+    # Reconfigure outside-scope duplicates: still per-original because each cluster
+    # needs a new original chosen, status copied, and found_by updated.
+    # Pre-filter to only originals that have at least one duplicate outside scope,
+    # avoiding a per-original .exists() check.
+    originals_with_outside_dupes = Finding.objects.filter(
+        id__in=scope_finding_ids,
+        original_finding__in=Finding.objects.exclude(id__in=scope_finding_ids),
+    ).distinct().prefetch_related("original_finding")
+
+    for original in originals_with_outside_dupes:
+        # Inside-scope duplicates were already unlinked by the bulk UPDATE above,
+        # so original_finding.all() now only contains outside-scope duplicates.
+        reconfigure_duplicate_cluster(original, original.original_finding.all())
 
 
 @receiver(pre_delete, sender=Test)
@@ -709,9 +688,10 @@ def fix_loop_duplicates():
     loop_count = loop_qs.count()
 
     if loop_count > 0:
-        deduplicationLogger.info(f"Identified {loop_count} Findings with Loops")
+        deduplicationLogger.warning("fix_loop_duplicates: found %d findings with duplicate loops", loop_count)
         # Stream IDs only in descending order to avoid loading full Finding rows
         for find_id in loop_qs.order_by("-id").values_list("id", flat=True).iterator(chunk_size=1000):
+            deduplicationLogger.warning("fix_loop_duplicates: fixing loop for finding %d", find_id)
             removeLoop(find_id, 50)
 
         new_originals = Finding.objects.filter(duplicate_finding__isnull=True, duplicate=True)
@@ -726,6 +706,10 @@ def fix_loop_duplicates():
 
 
 def removeLoop(finding_id, counter):
+    # NOTE: This function is recursive and does per-finding DB queries without prefetching.
+    # It could be optimized to load the duplicate graph as ID pairs in memory and process
+    # in bulk, but loops are rare (only from past bugs or high parallel load) so the
+    # current implementation is acceptable.
     # get latest status
     finding = Finding.objects.get(id=finding_id)
     real_original = finding.duplicate_finding