From b5d4b7ff48da0bfe91dda8bc9ca5ea3493070efc Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 10:04:31 +0100 Subject: [PATCH 01/17] Optimize prepare_duplicates_for_delete and add test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace per-original O(n×m) loop with a single bulk UPDATE for inside-scope duplicate reset. Outside-scope reconfiguration still runs per-original but now uses .iterator() and .exists() to avoid loading full querysets into memory. Also adds WARN-level logging to fix_loop_duplicates for visibility into how often duplicate loops occur in production, and a comment on removeLoop explaining the optimization opportunity. --- dojo/finding/helper.py | 72 ++--- .../test_prepare_duplicates_for_delete.py | 297 ++++++++++++++++++ 2 files changed, 333 insertions(+), 36 deletions(-) create mode 100644 unittests/test_prepare_duplicates_for_delete.py diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index b390ff17be6..7669704ca08 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -631,51 +631,46 @@ def prepare_duplicates_for_delete(test=None, engagement=None): logger.debug("prepare duplicates for delete, test: %s, engagement: %s", test.id if test else None, engagement.id if engagement else None) if test is None and engagement is None: logger.warning("nothing to prepare as test and engagement are None") + return fix_loop_duplicates() - # get all originals in the test/engagement - originals = Finding.objects.filter(original_finding__isnull=False) + # Build scope filter + scope_filter = {} if engagement: - originals = originals.filter(test__engagement=engagement) + scope_filter["test__engagement"] = engagement if test: - originals = originals.filter(test=test) - - # use distinct to flatten the join result - originals = originals.distinct() + scope_filter["test"] = test - if len(originals) == 0: - logger.debug("no originals found, so no duplicates to prepare for deletion of original") + scope_finding_ids = set( + Finding.objects.filter(**scope_filter).values_list("id", flat=True), + ) + if not scope_finding_ids: + logger.debug("no findings in scope, nothing to prepare") return - # remove the link to the original from the duplicates inside the cluster so they can be safely deleted by the django framework - total = len(originals) - # logger.debug('originals: %s', [original.id for original in originals]) - for i, original in enumerate(originals): - logger.debug("%d/%d: preparing duplicate cluster for deletion of original: %d", i + 1, total, original.id) - cluster_inside = original.original_finding.all() - if engagement: - cluster_inside = cluster_inside.filter(test__engagement=engagement) - - if test: - cluster_inside = cluster_inside.filter(test=test) - - if len(cluster_inside) > 0: - reset_duplicates_before_delete(cluster_inside) - - # reconfigure duplicates outside test/engagement - cluster_outside = original.original_finding.all() - if engagement: - cluster_outside = cluster_outside.exclude(test__engagement=engagement) - - if test: - cluster_outside = cluster_outside.exclude(test=test) - - if len(cluster_outside) > 0: + # Bulk-reset inside-scope duplicates: single UPDATE instead of per-original mass_model_updater. + # Clears the duplicate_finding FK so Django's Collector won't trip over dangling references + # when deleting findings in this scope. + inside_reset_count = Finding.objects.filter( + duplicate=True, + duplicate_finding_id__in=scope_finding_ids, + id__in=scope_finding_ids, + ).update(duplicate_finding=None, duplicate=False) + logger.debug("bulk-reset %d inside-scope duplicates", inside_reset_count) + + # Reconfigure outside-scope duplicates: still per-original because each cluster + # needs a new original chosen, status copied, and found_by updated. + originals_in_scope = Finding.objects.filter( + id__in=scope_finding_ids, + original_finding__isnull=False, + ).distinct() + + for original in originals_in_scope.iterator(): + cluster_outside = original.original_finding.exclude(id__in=scope_finding_ids) + if cluster_outside.exists(): reconfigure_duplicate_cluster(original, cluster_outside) - logger.debug("done preparing duplicate cluster for deletion of original: %d", original.id) - @receiver(pre_delete, sender=Test) def test_pre_delete(sender, instance, **kwargs): @@ -709,9 +704,10 @@ def fix_loop_duplicates(): loop_count = loop_qs.count() if loop_count > 0: - deduplicationLogger.info(f"Identified {loop_count} Findings with Loops") + deduplicationLogger.warning("fix_loop_duplicates: found %d findings with duplicate loops", loop_count) # Stream IDs only in descending order to avoid loading full Finding rows for find_id in loop_qs.order_by("-id").values_list("id", flat=True).iterator(chunk_size=1000): + deduplicationLogger.warning("fix_loop_duplicates: fixing loop for finding %d", find_id) removeLoop(find_id, 50) new_originals = Finding.objects.filter(duplicate_finding__isnull=True, duplicate=True) @@ -726,6 +722,10 @@ def fix_loop_duplicates(): def removeLoop(finding_id, counter): + # NOTE: This function is recursive and does per-finding DB queries without prefetching. + # It could be optimized to load the duplicate graph as ID pairs in memory and process + # in bulk, but loops are rare (only from past bugs or high parallel load) so the + # current implementation is acceptable. # get latest status finding = Finding.objects.get(id=finding_id) real_original = finding.duplicate_finding diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py new file mode 100644 index 00000000000..e2f6a4c08ef --- /dev/null +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -0,0 +1,297 @@ +""" +Tests for prepare_duplicates_for_delete() in dojo.finding.helper. + +These tests verify that duplicate clusters are properly handled before +Test/Engagement deletion: inside-scope duplicates get their FK cleared, +outside-scope duplicates get a new original chosen. +""" + +import logging + +from crum import impersonate +from django.test.utils import override_settings +from django.utils import timezone + +from dojo.finding.deduplication import set_duplicate +from dojo.finding.helper import prepare_duplicates_for_delete +from dojo.models import Engagement, Finding, Product, Product_Type, Test, Test_Type, User, UserContactInfo + +from .dojo_test_case import DojoTestCase + +logger = logging.getLogger(__name__) + + +@override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=False) +class TestPrepareDuplicatesForDelete(DojoTestCase): + """Tests for prepare_duplicates_for_delete().""" + + def setUp(self): + super().setUp() + + self.testuser = User.objects.create( + username="test_prepare_dupes_user", + is_staff=True, + is_superuser=True, + ) + UserContactInfo.objects.create(user=self.testuser, block_execution=True) + + self.system_settings(enable_deduplication=False) + self.system_settings(enable_product_grade=False) + + self.product_type = Product_Type.objects.create(name="Test PT for Prepare Dupes") + self.product = Product.objects.create( + name="Test Product", + description="Test", + prod_type=self.product_type, + ) + self.test_type = Test_Type.objects.get_or_create(name="Manual Test")[0] + + # Engagement 1 with Test 1 and Test 2 + self.engagement1 = Engagement.objects.create( + name="Engagement 1", + product=self.product, + target_start=timezone.now(), + target_end=timezone.now(), + ) + self.test1 = Test.objects.create( + engagement=self.engagement1, + test_type=self.test_type, + target_start=timezone.now(), + target_end=timezone.now(), + ) + self.test2 = Test.objects.create( + engagement=self.engagement1, + test_type=self.test_type, + target_start=timezone.now(), + target_end=timezone.now(), + ) + + # Engagement 2 with Test 3 (for cross-engagement tests) + self.engagement2 = Engagement.objects.create( + name="Engagement 2", + product=self.product, + target_start=timezone.now(), + target_end=timezone.now(), + ) + self.test3 = Test.objects.create( + engagement=self.engagement2, + test_type=self.test_type, + target_start=timezone.now(), + target_end=timezone.now(), + ) + + def _create_finding(self, test, title="Finding"): + return Finding.objects.create( + test=test, + title=title, + severity="High", + description="Test", + mitigation="Test", + impact="Test", + reporter=self.testuser, + ) + + def _make_duplicate(self, duplicate, original): + """Set duplicate relationship directly, bypassing set_duplicate safeguards.""" + duplicate.duplicate = True + duplicate.duplicate_finding = original + duplicate.active = False + super(Finding, duplicate).save(skip_validation=True) + + def test_no_duplicates(self): + """Deleting a test with no duplicate relationships is a no-op.""" + f1 = self._create_finding(self.test1, "F1") + f2 = self._create_finding(self.test1, "F2") + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + f1.refresh_from_db() + f2.refresh_from_db() + self.assertFalse(f1.duplicate) + self.assertFalse(f2.duplicate) + self.assertIsNone(f1.duplicate_finding) + self.assertIsNone(f2.duplicate_finding) + + def test_inside_scope_duplicates_reset(self): + """Duplicates inside the deletion scope have their duplicate FK cleared.""" + original = self._create_finding(self.test1, "Original") + dupe = self._create_finding(self.test1, "Duplicate") + self._make_duplicate(dupe, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + dupe.refresh_from_db() + self.assertIsNone(dupe.duplicate_finding) + self.assertFalse(dupe.duplicate) + + def test_outside_scope_duplicates_get_new_original(self): + """Duplicates outside the deletion scope get a new original.""" + original = self._create_finding(self.test1, "Original") + original.active = True + original.is_mitigated = False + super(Finding, original).save(skip_validation=True) + + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(outside_dupe, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + outside_dupe.refresh_from_db() + # Outside dupe becomes the new original + self.assertFalse(outside_dupe.duplicate) + self.assertIsNone(outside_dupe.duplicate_finding) + # Inherits active/mitigated status from old original + self.assertTrue(outside_dupe.active) + self.assertFalse(outside_dupe.is_mitigated) + + def test_outside_scope_cluster_repointed(self): + """Multiple outside-scope duplicates are re-pointed to the new original.""" + original = self._create_finding(self.test1, "Original") + dupe_b = self._create_finding(self.test2, "Dupe B") + dupe_c = self._create_finding(self.test2, "Dupe C") + dupe_d = self._create_finding(self.test2, "Dupe D") + self._make_duplicate(dupe_b, original) + self._make_duplicate(dupe_c, original) + self._make_duplicate(dupe_d, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + dupe_b.refresh_from_db() + dupe_c.refresh_from_db() + dupe_d.refresh_from_db() + + # Lowest ID becomes new original + new_original = dupe_b + self.assertFalse(new_original.duplicate) + self.assertIsNone(new_original.duplicate_finding) + + # Others re-pointed to new original + self.assertTrue(dupe_c.duplicate) + self.assertEqual(dupe_c.duplicate_finding_id, new_original.id) + self.assertTrue(dupe_d.duplicate) + self.assertEqual(dupe_d.duplicate_finding_id, new_original.id) + + def test_engagement_scope_inside_reset(self): + """Inside-scope reset works at engagement level.""" + original = self._create_finding(self.test1, "Original") + dupe = self._create_finding(self.test2, "Dupe in same engagement") + self._make_duplicate(dupe, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(engagement=self.engagement1) + + dupe.refresh_from_db() + self.assertIsNone(dupe.duplicate_finding) + self.assertFalse(dupe.duplicate) + + def test_engagement_scope_outside_reconfigure(self): + """Outside-scope reconfiguration works at engagement level.""" + original = self._create_finding(self.test1, "Original in Eng 1") + outside_dupe = self._create_finding(self.test3, "Dupe in Eng 2") + self._make_duplicate(outside_dupe, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(engagement=self.engagement1) + + outside_dupe.refresh_from_db() + self.assertFalse(outside_dupe.duplicate) + self.assertIsNone(outside_dupe.duplicate_finding) + + def test_mixed_inside_and_outside_duplicates(self): + """Original with duplicates both inside and outside scope.""" + original = self._create_finding(self.test1, "Original") + inside_dupe = self._create_finding(self.test1, "Inside Dupe") + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(inside_dupe, original) + self._make_duplicate(outside_dupe, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + inside_dupe.refresh_from_db() + outside_dupe.refresh_from_db() + + # Inside dupe: FK cleared + self.assertIsNone(inside_dupe.duplicate_finding) + self.assertFalse(inside_dupe.duplicate) + + # Outside dupe: becomes new original + self.assertFalse(outside_dupe.duplicate) + self.assertIsNone(outside_dupe.duplicate_finding) + + @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) + def test_cascade_delete_setting(self): + """When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside duplicates are deleted.""" + original = self._create_finding(self.test1, "Original") + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(outside_dupe, original) + outside_dupe_id = outside_dupe.id + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + self.assertFalse( + Finding.objects.filter(id=outside_dupe_id).exists(), + "Outside duplicate should be cascade-deleted", + ) + + def test_multiple_originals(self): + """Multiple originals in the same test each get their clusters handled.""" + original_a = self._create_finding(self.test1, "Original A") + original_b = self._create_finding(self.test1, "Original B") + dupe_of_a = self._create_finding(self.test2, "Dupe of A") + dupe_of_b = self._create_finding(self.test2, "Dupe of B") + self._make_duplicate(dupe_of_a, original_a) + self._make_duplicate(dupe_of_b, original_b) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + dupe_of_a.refresh_from_db() + dupe_of_b.refresh_from_db() + + # Both become new originals + self.assertFalse(dupe_of_a.duplicate) + self.assertIsNone(dupe_of_a.duplicate_finding) + self.assertFalse(dupe_of_b.duplicate) + self.assertIsNone(dupe_of_b.duplicate_finding) + + def test_original_status_copied_to_new_original(self): + """New original inherits active/is_mitigated status from deleted original.""" + original = self._create_finding(self.test1, "Original") + original.active = False + original.is_mitigated = True + super(Finding, original).save(skip_validation=True) + + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(outside_dupe, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + outside_dupe.refresh_from_db() + self.assertFalse(outside_dupe.duplicate) + self.assertFalse(outside_dupe.active) + self.assertTrue(outside_dupe.is_mitigated) + + def test_found_by_copied_to_new_original(self): + """New original inherits found_by from deleted original.""" + original = self._create_finding(self.test1, "Original") + test_type_2 = Test_Type.objects.get_or_create(name="ZAP Scan")[0] + original.found_by.add(self.test_type) + original.found_by.add(test_type_2) + + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(outside_dupe, original) + + with impersonate(self.testuser): + prepare_duplicates_for_delete(test=self.test1) + + outside_dupe.refresh_from_db() + found_by_ids = set(outside_dupe.found_by.values_list("id", flat=True)) + self.assertIn(self.test_type.id, found_by_ids) + self.assertIn(test_type_2.id, found_by_ids) From 6622ff9449f21dd484dab3ccbbab750a886ecd7a Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 10:08:08 +0100 Subject: [PATCH 02/17] fix: remove unused import and fix docstring lint warning --- unittests/test_prepare_duplicates_for_delete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index e2f6a4c08ef..82d891485ea 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -12,7 +12,6 @@ from django.test.utils import override_settings from django.utils import timezone -from dojo.finding.deduplication import set_duplicate from dojo.finding.helper import prepare_duplicates_for_delete from dojo.models import Engagement, Finding, Product, Product_Type, Test, Test_Type, User, UserContactInfo @@ -23,6 +22,7 @@ @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=False) class TestPrepareDuplicatesForDelete(DojoTestCase): + """Tests for prepare_duplicates_for_delete().""" def setUp(self): From de7ddaf8d6caeb09b141f2b704fa8283ba5a7c64 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 10:40:37 +0100 Subject: [PATCH 03/17] perf: eliminate per-original queries with prefetch and bulk reset Remove redundant .exclude() and .exists() calls by leveraging the bulk UPDATE that already unlinks inside-scope duplicates. Add prefetch_related to fetch all reverse relations in a single query. --- dojo/finding/helper.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 7669704ca08..270ba54d59b 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -661,15 +661,17 @@ def prepare_duplicates_for_delete(test=None, engagement=None): # Reconfigure outside-scope duplicates: still per-original because each cluster # needs a new original chosen, status copied, and found_by updated. - originals_in_scope = Finding.objects.filter( + # Pre-filter to only originals that have at least one duplicate outside scope, + # avoiding a per-original .exists() check. + originals_with_outside_dupes = Finding.objects.filter( id__in=scope_finding_ids, - original_finding__isnull=False, - ).distinct() + original_finding__in=Finding.objects.exclude(id__in=scope_finding_ids), + ).distinct().prefetch_related("original_finding") - for original in originals_in_scope.iterator(): - cluster_outside = original.original_finding.exclude(id__in=scope_finding_ids) - if cluster_outside.exists(): - reconfigure_duplicate_cluster(original, cluster_outside) + for original in originals_with_outside_dupes: + # Inside-scope duplicates were already unlinked by the bulk UPDATE above, + # so original_finding.all() now only contains outside-scope duplicates. + reconfigure_duplicate_cluster(original, original.original_finding.all()) @receiver(pre_delete, sender=Test) From 422ccbfebd3f4362207c75ac3246cde7b988cc62 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 10:41:32 +0100 Subject: [PATCH 04/17] add comment --- dojo/finding/helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 270ba54d59b..e0068e8edef 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -633,6 +633,8 @@ def prepare_duplicates_for_delete(test=None, engagement=None): logger.warning("nothing to prepare as test and engagement are None") return + # should not be needed in normal healthy instances. + # but in that case it's a cheap count query and we might as well run it to be safe fix_loop_duplicates() # Build scope filter From a0eb64f850428158923f494d3bfe13ed6489c711 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 16:34:23 +0100 Subject: [PATCH 05/17] perf: replace per-object async delete with SQL cascade walker Replace the per-object obj.delete() approach in async_delete_crawl_task with a recursive SQL cascade walker that compiles QuerySets to raw SQL and walks model._meta.related_objects bottom-up. This auto-discovers all FK relations at runtime, including those added by plugins. Key changes: - New dojo/utils_cascade_delete.py: cascade_delete() utility - New dojo/signals.py: pre_bulk_delete_findings signal for extensibility - New bulk_clear_finding_m2m() in finding/helper.py for M2M cleanup with FileUpload disk cleanup and orphaned Notes deletion - Rewritten async_delete_crawl_task with chunked cascade deletion - Removed async_delete_chunk_task (no longer needed) - Product grading recalculated once at end instead of per-object --- dojo/finding/helper.py | 56 +++++++++++ dojo/signals.py | 6 ++ dojo/utils.py | 183 ++++++++++++++++++----------------- dojo/utils_cascade_delete.py | 151 +++++++++++++++++++++++++++++ 4 files changed, 309 insertions(+), 87 deletions(-) create mode 100644 dojo/signals.py create mode 100644 dojo/utils_cascade_delete.py diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index e0068e8edef..7d74c8942fd 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -31,6 +31,7 @@ Endpoint, Endpoint_Status, Engagement, + FileUpload, Finding, Finding_Group, JIRA_Instance, @@ -698,6 +699,61 @@ def engagement_post_delete(sender, instance, **kwargs): logger.debug("engagement post_delete, sender: %s instance: %s", to_str_typed(sender), to_str_typed(instance)) +def bulk_clear_finding_m2m(finding_qs): + """ + Bulk-clear M2M through tables for a queryset of findings. + + Must be called BEFORE cascade_delete since M2M through tables + are not discovered by _meta.related_objects. + + Special handling for FileUpload: deletes via ORM so the custom + FileUpload.delete() fires and removes files from disk storage. + """ + finding_ids = finding_qs.values_list("id", flat=True) + + # Collect FileUpload IDs before deleting through table entries + file_ids = list( + Finding.files.through.objects.filter( + finding_id__in=finding_ids, + ).values_list("fileupload_id", flat=True), + ) + + # Collect Note IDs before deleting through table entries + note_ids = list( + Finding.notes.through.objects.filter( + finding_id__in=finding_ids, + ).values_list("notes_id", flat=True), + ) + + # Auto-discover and delete all M2M through tables + for m2m_field in Finding._meta.many_to_many: + through_model = m2m_field.remote_field.through + # Find the FK column that points to Finding + fk_column = None + for field in through_model._meta.get_fields(): + if hasattr(field, "related_model") and field.related_model is Finding: + fk_column = field.column + break + if fk_column: + count, _ = through_model.objects.filter( + **{f"{fk_column}__in": finding_ids}, + ).delete() + if count: + logger.debug( + "bulk_clear_finding_m2m: deleted %d rows from %s", + count, through_model._meta.db_table, + ) + + # Delete FileUpload objects via ORM so custom delete() removes files from disk + if file_ids: + for file_upload in FileUpload.objects.filter(id__in=file_ids).iterator(): + file_upload.delete() + + # Delete orphaned Notes + if note_ids: + Notes.objects.filter(id__in=note_ids).delete() + + def fix_loop_duplicates(): """Due to bugs in the past and even currently when under high parallel load, there can be transitive duplicates.""" """ i.e. A -> B -> C. This can lead to problems when deleting findingns, performing deduplication, etc """ diff --git a/dojo/signals.py b/dojo/signals.py new file mode 100644 index 00000000000..351873d34ba --- /dev/null +++ b/dojo/signals.py @@ -0,0 +1,6 @@ +from django.dispatch import Signal + +# Sent before bulk-deleting findings via cascade_delete. +# Receivers can dispatch integrator notifications, collect metrics, etc. +# Provides: finding_qs (QuerySet of findings about to be deleted) +pre_bulk_delete_findings = Signal() diff --git a/dojo/utils.py b/dojo/utils.py index a5d8a13ed81..c698106a423 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -6,11 +6,10 @@ import mimetypes import os import pathlib -import random import re -import time from calendar import monthrange from collections.abc import Callable +from contextlib import suppress from datetime import date, datetime, timedelta from functools import cached_property from math import pi, sqrt @@ -21,7 +20,6 @@ import cvss import vobject from amqp.exceptions import ChannelError -from auditlog.models import LogEntry from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cvss import CVSS2, CVSS3, CVSS4 @@ -30,9 +28,8 @@ from django.conf import settings from django.contrib import messages from django.contrib.auth.signals import user_logged_in, user_logged_out, user_login_failed -from django.contrib.contenttypes.models import ContentType from django.core.paginator import Paginator -from django.db import OperationalError +from django.db import transaction from django.db.models import Case, Count, F, IntegerField, Q, Sum, Value, When from django.db.models.query import QuerySet from django.db.models.signals import post_save @@ -2056,96 +2053,108 @@ def _get_object_name(obj): @app.task -def async_delete_chunk_task(objects, **kwargs): +def async_delete_crawl_task(obj, model_list, **kwargs): """ - Module-level Celery task to delete a chunk of objects. + Delete an object and all its related objects using the SQL cascade walker. + + Handles Python-level concerns (duplicates, integrators, M2M, file cleanup, + product grading) explicitly, then uses cascade_delete() for efficient + bottom-up SQL deletion of all FK-related tables. Accepts **kwargs for _pgh_context injected by dojo_dispatch_task. Uses PgHistoryTask base class (default) to preserve pghistory context for audit trail. """ - max_retries = 3 - for obj in objects: - retry_count = 0 - while retry_count < max_retries: - try: - obj.delete() - break # Success, exit retry loop - except OperationalError as e: - error_msg = str(e) - if "deadlock detected" in error_msg.lower(): - retry_count += 1 - if retry_count < max_retries: - # Exponential backoff with jitter - wait_time = (2 ** retry_count) + random.uniform(0, 1) # noqa: S311 - logger.warning( - f"ASYNC_DELETE: Deadlock detected deleting {_get_object_name(obj)} {obj.pk}, " - f"retrying ({retry_count}/{max_retries}) after {wait_time:.2f}s", - ) - time.sleep(wait_time) - # Refresh object from DB before retry - obj.refresh_from_db() - else: - logger.error( - f"ASYNC_DELETE: Deadlock persisted after {max_retries} retries for {_get_object_name(obj)} {obj.pk}: {e}", - ) - raise - else: - # Not a deadlock, re-raise - raise - except AssertionError: - logger.debug("ASYNC_DELETE: object has already been deleted elsewhere. Skipping") - # The id must be None - # The object has already been deleted elsewhere - break - except LogEntry.MultipleObjectsReturned: - # Delete the log entrys first, then delete - LogEntry.objects.filter( - content_type=ContentType.objects.get_for_model(obj.__class__), - object_pk=str(obj.pk), - action=LogEntry.Action.DELETE, - ).delete() - # Now delete the object again (no retry needed for this case) - obj.delete() - break - + from dojo.finding.helper import ( # noqa: PLC0415 circular import + bulk_clear_finding_m2m, + prepare_duplicates_for_delete, + ) + from dojo.signals import pre_bulk_delete_findings # noqa: PLC0415 circular import + from dojo.utils_cascade_delete import cascade_delete # noqa: PLC0415 circular import + + obj_name = _get_object_name(obj) + logger.info("ASYNC_DELETE: Starting deletion of %s: %s", obj_name, obj) + + # Capture product reference before deletion for product grading at the end + product = None + with suppress(Product.DoesNotExist, Engagement.DoesNotExist, Test.DoesNotExist): + if isinstance(obj, Engagement): + product = obj.product + elif isinstance(obj, Test): + product = obj.engagement.product + + # Step 1: Determine finding scope + finding_filter = None + for model, query_path in model_list: + if model is Finding: + finding_filter = {query_path: obj.id} + break + + if finding_filter: + finding_qs = Finding.objects.filter(**finding_filter) + + # Step 2: Prepare duplicate clusters (must happen before any deletion) + if isinstance(obj, Engagement): + prepare_duplicates_for_delete(engagement=obj) + elif isinstance(obj, Product): + for engagement in obj.engagement_set.all(): + prepare_duplicates_for_delete(engagement=engagement) + elif isinstance(obj, Test): + prepare_duplicates_for_delete(test=obj) + + # Step 3: Send pre_bulk_delete signal (for Pro integrator dispatch, metering, etc.) + pre_bulk_delete_findings.send(sender=Finding, finding_qs=finding_qs) + + # Step 4: Bulk-clear M2M through tables and clean up files/notes + # (M2M through tables are not discovered by _meta.related_objects) + bulk_clear_finding_m2m(finding_qs) + + # Step 5: Delete findings in chunks using cascade_delete + # skip_relations={Finding} skips the duplicate_finding self-FK (handled in Step 2) + chunk_size = get_setting("ASYNC_OBEJECT_DELETE_CHUNK_SIZE") + + if finding_filter: + finding_ids = list( + Finding.objects.filter(**finding_filter) + .values_list("id", flat=True) + .order_by("id"), + ) -@app.task -def async_delete_crawl_task(obj, model_list, **kwargs): - """ - Module-level Celery task to crawl and delete related objects. + total_chunks = (len(finding_ids) + chunk_size - 1) // chunk_size + for i in range(0, len(finding_ids), chunk_size): + chunk = finding_ids[i:i + chunk_size] + chunk_qs = Finding.objects.filter(id__in=chunk) + with transaction.atomic(): + cascade_delete( + Finding, chunk_qs, + skip_relations={Finding}, + ) + logger.info( + "ASYNC_DELETE: Deleted finding chunk %d/%d (%d findings)", + i // chunk_size + 1, total_chunks, len(chunk), + ) - Accepts **kwargs for _pgh_context injected by dojo_dispatch_task. - Uses PgHistoryTask base class (default) to preserve pghistory context for audit trail. - """ - from dojo.celery_dispatch import dojo_dispatch_task # noqa: PLC0415 circular import + # Delete remaining non-Finding children (Tests, Engagements, Endpoints) + # These are now lightweight since their Findings are gone + for model, query_path in model_list: + if model is not Finding: + filter_dict = {query_path: obj.id} + qs = model.objects.filter(**filter_dict) + if qs.exists(): + with transaction.atomic(): + cascade_delete(model, qs, skip_relations={Finding}) + + # Step 6: Delete the top-level object itself + pk_query = type(obj).objects.filter(pk=obj.pk) + with transaction.atomic(): + cascade_delete(type(obj), pk_query, skip_relations={Finding}) + + # Step 7: Recalculate product grade once (not per-object) + # The custom delete() methods on Finding/Test/Engagement each call + # perform_product_grading — cascade_delete bypasses custom delete(). + if product: + perform_product_grading(product) - logger.debug("ASYNC_DELETE: Crawling " + _get_object_name(obj) + ": " + str(obj)) - for model_info in model_list: - task_results = [] - model = model_info[0] - model_query = model_info[1] - filter_dict = {model_query: obj.id} - # Only fetch the IDs since we will make a list of IDs in the following function call - objects_to_delete = model.objects.only("id").filter(**filter_dict).distinct().order_by("id") - logger.debug("ASYNC_DELETE: Deleting " + str(len(objects_to_delete)) + " " + _get_object_name(model) + "s in chunks") - chunk_size = get_setting("ASYNC_OBEJECT_DELETE_CHUNK_SIZE") - chunks = [objects_to_delete[i:i + chunk_size] for i in range(0, len(objects_to_delete), chunk_size)] - logger.debug("ASYNC_DELETE: Split " + _get_object_name(model) + " into " + str(len(chunks)) + " chunks of " + str(chunk_size)) - for chunk in chunks: - logger.debug(f"deleting {len(chunk)} {_get_object_name(model)}") - result = dojo_dispatch_task(async_delete_chunk_task, list(chunk)) - # Collect async task results to wait for them all at once - if hasattr(result, "get"): - task_results.append(result) - # Wait for all chunk deletions to complete (they run in parallel) - for task_result in task_results: - task_result.get(timeout=300) # 5 minute timeout per chunk - # Now delete the main object after all chunks are done - result = dojo_dispatch_task(async_delete_chunk_task, [obj]) - # Wait for final deletion to complete - if hasattr(result, "get"): - result.get(timeout=300) # 5 minute timeout - logger.debug("ASYNC_DELETE: Successfully deleted " + _get_object_name(obj) + ": " + str(obj)) + logger.info("ASYNC_DELETE: Successfully deleted %s: %s", obj_name, obj) @app.task diff --git a/dojo/utils_cascade_delete.py b/dojo/utils_cascade_delete.py new file mode 100644 index 00000000000..24e1d43e315 --- /dev/null +++ b/dojo/utils_cascade_delete.py @@ -0,0 +1,151 @@ +""" +Efficient cascade delete utility for Django models. + +Uses compiled SQL (via SQLDeleteCompiler/SQLUpdateCompiler) to perform cascade +DELETE and SET_NULL operations by walking model._meta.related_objects recursively. +This bypasses Django's Collector and per-object signal overhead. + +Based on: https://dev.to/redhap/efficient-django-delete-cascade-43i5 +""" + +import logging + +from django.db import models, transaction +from django.db.models.sql.compiler import SQLDeleteCompiler + +logger = logging.getLogger(__name__) + + +def get_delete_sql(query): + """Compile a DELETE SQL statement from a QuerySet.""" + return SQLDeleteCompiler( + query.query, transaction.get_connection(), query.db, + ).as_sql() + + +def get_update_sql(query, **updatespec): + """Compile an UPDATE SQL statement from a QuerySet with the given column values.""" + if not query.query.can_filter(): + msg = "Cannot filter this query" + raise ValueError(msg) + query.for_write = True + q = query.query.chain(models.sql.UpdateQuery) + q.add_update_values(updatespec) + q._annotations = None + return q.get_compiler(query.db).as_sql() + + +def execute_compiled_sql(sql, params=None): + """Execute compiled SQL directly via connection.cursor().""" + with transaction.get_connection().cursor() as cur: + cur.execute(sql, params or None) + return cur.rowcount + + +def execute_delete_sql(query): + """Compile and execute a DELETE statement from a QuerySet.""" + return execute_compiled_sql(*get_delete_sql(query)) + + +def execute_update_sql(query, **updatespec): + """Compile and execute an UPDATE statement from a QuerySet.""" + return execute_compiled_sql(*get_update_sql(query, **updatespec)) + + +def cascade_delete(from_model, instance_pk_query, skip_relations=None, base_model=None, level=0): + """ + Recursively walk Django model relations and execute compiled SQL + to perform cascade DELETE / SET_NULL without the Collector. + + Walks from_model._meta.related_objects to discover all FK relations, + recurses into CASCADE children first (bottom-up), then deletes at the + current level. No query execution until recursion unwinds. + + Args: + from_model: The model class to delete from. + instance_pk_query: QuerySet selecting the records to delete. + skip_relations: Set of model classes to skip (e.g. self-referential FKs). + base_model: Root model class (set automatically on first call). + level: Recursion depth (for logging only). + + Returns: + Number of records deleted at this level. + + """ + if skip_relations is None: + skip_relations = set() + if base_model is None: + base_model = from_model + + instance_pk_query = instance_pk_query.values_list("pk").order_by() + + logger.debug( + "cascade_delete level %d for %s: checking relations of %s", + level, base_model.__name__, from_model.__name__, + ) + + for relation in from_model._meta.related_objects: + related_model = relation.related_model + if related_model in skip_relations: + logger.debug("cascade_delete: skipping %s", related_model.__name__) + continue + + on_delete = relation.on_delete + if on_delete is None: + logger.debug( + "cascade_delete: no on_delete for %s -> %s, skipping", + from_model.__name__, related_model.__name__, + ) + continue + + on_delete_name = on_delete.__name__ + fk_column = relation.remote_field.column + filterspec = {f"{fk_column}__in": models.Subquery(instance_pk_query)} + + if on_delete_name == "SET_NULL": + count = execute_update_sql( + related_model.objects.filter(**filterspec), + **{fk_column: None}, + ) + logger.debug( + "cascade_delete: SET NULL on %d %s records", + count, related_model.__name__, + ) + + elif on_delete_name == "CASCADE": + related_pk_query = related_model.objects.filter(**filterspec).values_list( + related_model._meta.pk.name, + ) + # Recurse into children first (bottom-up deletion) + cascade_delete( + related_model, related_pk_query, + skip_relations=skip_relations, + base_model=base_model, + level=level + 1, + ) + + elif on_delete_name == "DO_NOTHING": + logger.debug( + "cascade_delete: DO_NOTHING for %s, skipping", + related_model.__name__, + ) + + else: + logger.warning( + "cascade_delete: unhandled on_delete=%s for %s -> %s, skipping", + on_delete_name, from_model.__name__, related_model.__name__, + ) + + # After all children are deleted, delete records at this level + if level == 0: + del_query = instance_pk_query + else: + filterspec = {f"{from_model._meta.pk.name}__in": models.Subquery(instance_pk_query)} + del_query = from_model.objects.filter(**filterspec) + + count = execute_delete_sql(del_query) + logger.debug( + "cascade_delete level %d: deleted %d %s records", + level, count, from_model.__name__, + ) + return count From 972f6468e1d46f852ee6ee2cfa50f453aba9af51 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 17:24:17 +0100 Subject: [PATCH 06/17] perf: replace mass_model_updater with single UPDATE in reconfigure_duplicate_cluster Use QuerySet.update() instead of mass_model_updater to re-point duplicates to the new original. Single SQL query instead of loading all findings into Python and calling bulk_update. --- dojo/finding/helper.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 7d74c8942fd..8ea472c8840 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -618,14 +618,9 @@ def reconfigure_duplicate_cluster(original, cluster_outside): new_original.save_no_options() new_original.found_by.set(original.found_by.all()) - # if the cluster is size 1, there's only the new original left + # Re-point remaining duplicates to the new original in a single query if new_original and len(cluster_outside) > 1: - # for find in cluster_outside: - # if find != new_original: - # find.duplicate_finding = new_original - # find.save_no_options() - - mass_model_updater(Finding, cluster_outside, lambda f: set_new_original(f, new_original), fields=["duplicate_finding"]) + cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) def prepare_duplicates_for_delete(test=None, engagement=None): From 547ee6a602ee08b7d31df19114e77969da75d664 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 17:29:55 +0100 Subject: [PATCH 07/17] cleanup: remove dead code from duplicate handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove reset_duplicate_before_delete, reset_duplicates_before_delete, and set_new_original — all replaced by bulk UPDATE in prepare_duplicates_for_delete and .update() in reconfigure_duplicate_cluster. Remove unused mass_model_updater import. --- dojo/finding/helper.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 8ea472c8840..9f4dd518d06 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -50,7 +50,6 @@ do_false_positive_history, get_current_user, get_object_or_none, - mass_model_updater, to_str_typed, ) @@ -579,20 +578,6 @@ def finding_post_delete(sender, instance, **kwargs): logger.debug("finding post_delete, sender: %s instance: %s", to_str_typed(sender), to_str_typed(instance)) -def reset_duplicate_before_delete(dupe): - dupe.duplicate_finding = None - dupe.duplicate = False - - -def reset_duplicates_before_delete(qs): - mass_model_updater(Finding, qs, reset_duplicate_before_delete, fields=["duplicate", "duplicate_finding"]) - - -def set_new_original(finding, new_original): - if finding.duplicate: - finding.duplicate_finding = new_original - - # can't use model to id here due to the queryset # @dojo_async_task # @app.task From 02b9d839dd6165851371d4facf78a922ecce7f80 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 18:33:47 +0100 Subject: [PATCH 08/17] fix: delete outside-scope duplicates before main scope to avoid FK violations When bulk-deleting findings in chunks, an original in an earlier chunk could fail to delete because its duplicate (higher ID) in a later chunk still references it via duplicate_finding FK. Fix by deleting outside-scope duplicates first, then the main scope. Also moves pre_bulk_delete_findings signal into bulk_delete_findings so it fires automatically. --- dojo/finding/helper.py | 66 +++++++++++++++++++++++++++++++----------- dojo/utils.py | 46 ++++++++++------------------- 2 files changed, 64 insertions(+), 48 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 9f4dd518d06..6893731b502 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -4,6 +4,7 @@ from time import strftime from django.conf import settings +from django.db import transaction from django.db.models.query_utils import Q from django.db.models.signals import post_delete, pre_delete from django.db.utils import IntegrityError @@ -561,7 +562,11 @@ def finding_delete(instance, **kwargs): duplicate_cluster = instance.original_finding.all() if duplicate_cluster: - reconfigure_duplicate_cluster(instance, duplicate_cluster) + if settings.DUPLICATE_CLUSTER_CASCADE_DELETE: + # Delete the entire duplicate cluster efficiently via bulk_delete_findings + bulk_delete_findings(duplicate_cluster) + else: + reconfigure_duplicate_cluster(instance, duplicate_cluster) else: logger.debug("no duplicate cluster found for finding: %d, so no need to reconfigure", instance.id) @@ -588,24 +593,25 @@ def reconfigure_duplicate_cluster(original, cluster_outside): return if settings.DUPLICATE_CLUSTER_CASCADE_DELETE: - cluster_outside.order_by("-id").delete() - else: - logger.debug("reconfigure_duplicate_cluster: cluster_outside: %s", cluster_outside) - # set new original to first finding in cluster (ordered by id) - new_original = cluster_outside.order_by("id").first() - if new_original: - logger.debug("changing original of duplicate cluster %d to: %s:%s", original.id, new_original.id, new_original.title) + # Don't delete here — the caller (async_delete_crawl_task or finding_delete) + # handles deletion of outside-scope duplicates efficiently via bulk_delete_findings. + return + logger.debug("reconfigure_duplicate_cluster: cluster_outside: %s", cluster_outside) + # set new original to first finding in cluster (ordered by id) + new_original = cluster_outside.order_by("id").first() + if new_original: + logger.debug("changing original of duplicate cluster %d to: %s:%s", original.id, new_original.id, new_original.title) - new_original.duplicate = False - new_original.duplicate_finding = None - new_original.active = original.active - new_original.is_mitigated = original.is_mitigated - new_original.save_no_options() - new_original.found_by.set(original.found_by.all()) + new_original.duplicate = False + new_original.duplicate_finding = None + new_original.active = original.active + new_original.is_mitigated = original.is_mitigated + new_original.save_no_options() + new_original.found_by.set(original.found_by.all()) - # Re-point remaining duplicates to the new original in a single query - if new_original and len(cluster_outside) > 1: - cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) + # Re-point remaining duplicates to the new original in a single query + if new_original and len(cluster_outside) > 1: + cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) def prepare_duplicates_for_delete(test=None, engagement=None): @@ -734,6 +740,32 @@ def bulk_clear_finding_m2m(finding_qs): Notes.objects.filter(id__in=note_ids).delete() +def bulk_delete_findings(finding_qs, chunk_size=1000): + """ + Delete findings and all related objects efficiently. + + Sends the pre_bulk_delete signal, clears M2M through tables (not + discovered by _meta.related_objects), then uses cascade_delete for + all FK relations via raw SQL. + Chunked with per-chunk transaction.atomic() for crash safety. + """ + from dojo.signals import pre_bulk_delete_findings # noqa: PLC0415 circular import + from dojo.utils_cascade_delete import cascade_delete # noqa: PLC0415 circular import + + pre_bulk_delete_findings.send(sender=Finding, finding_qs=finding_qs) + bulk_clear_finding_m2m(finding_qs) + finding_ids = list(finding_qs.values_list("id", flat=True).order_by("id")) + total_chunks = (len(finding_ids) + chunk_size - 1) // chunk_size + for i in range(0, len(finding_ids), chunk_size): + chunk = finding_ids[i:i + chunk_size] + with transaction.atomic(): + cascade_delete(Finding, Finding.objects.filter(id__in=chunk), skip_relations={Finding}) + logger.info( + "bulk_delete_findings: deleted chunk %d/%d (%d findings)", + i // chunk_size + 1, total_chunks, len(chunk), + ) + + def fix_loop_duplicates(): """Due to bugs in the past and even currently when under high parallel load, there can be transitive duplicates.""" """ i.e. A -> B -> C. This can lead to problems when deleting findingns, performing deduplication, etc """ diff --git a/dojo/utils.py b/dojo/utils.py index c698106a423..c144d960c06 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -2065,10 +2065,9 @@ def async_delete_crawl_task(obj, model_list, **kwargs): Uses PgHistoryTask base class (default) to preserve pghistory context for audit trail. """ from dojo.finding.helper import ( # noqa: PLC0415 circular import - bulk_clear_finding_m2m, + bulk_delete_findings, prepare_duplicates_for_delete, ) - from dojo.signals import pre_bulk_delete_findings # noqa: PLC0415 circular import from dojo.utils_cascade_delete import cascade_delete # noqa: PLC0415 circular import obj_name = _get_object_name(obj) @@ -2093,6 +2092,8 @@ def async_delete_crawl_task(obj, model_list, **kwargs): finding_qs = Finding.objects.filter(**finding_filter) # Step 2: Prepare duplicate clusters (must happen before any deletion) + # When CASCADE_DELETE=True, reconfigure_duplicate_cluster skips deletion — + # we handle that below by expanding scope to include outside duplicates. if isinstance(obj, Engagement): prepare_duplicates_for_delete(engagement=obj) elif isinstance(obj, Product): @@ -2101,37 +2102,20 @@ def async_delete_crawl_task(obj, model_list, **kwargs): elif isinstance(obj, Test): prepare_duplicates_for_delete(test=obj) - # Step 3: Send pre_bulk_delete signal (for Pro integrator dispatch, metering, etc.) - pre_bulk_delete_findings.send(sender=Finding, finding_qs=finding_qs) - - # Step 4: Bulk-clear M2M through tables and clean up files/notes - # (M2M through tables are not discovered by _meta.related_objects) - bulk_clear_finding_m2m(finding_qs) - - # Step 5: Delete findings in chunks using cascade_delete - # skip_relations={Finding} skips the duplicate_finding self-FK (handled in Step 2) - chunk_size = get_setting("ASYNC_OBEJECT_DELETE_CHUNK_SIZE") - - if finding_filter: - finding_ids = list( - Finding.objects.filter(**finding_filter) - .values_list("id", flat=True) - .order_by("id"), + # Step 3: Delete outside-scope duplicates first — these point to findings + # in the main scope via duplicate_finding FK, so they must be removed before + # the originals to avoid FK violations during chunked deletion. + outside_dupes_qs = ( + Finding.objects.filter(duplicate_finding_id__in=finding_qs.values_list("id", flat=True)) + .exclude(id__in=finding_qs.values_list("id", flat=True)) ) + chunk_size = get_setting("ASYNC_OBEJECT_DELETE_CHUNK_SIZE") + if outside_dupes_qs.exists(): + logger.info("ASYNC_DELETE: Deleting %d outside-scope duplicates first", outside_dupes_qs.count()) + bulk_delete_findings(outside_dupes_qs, chunk_size=chunk_size) - total_chunks = (len(finding_ids) + chunk_size - 1) // chunk_size - for i in range(0, len(finding_ids), chunk_size): - chunk = finding_ids[i:i + chunk_size] - chunk_qs = Finding.objects.filter(id__in=chunk) - with transaction.atomic(): - cascade_delete( - Finding, chunk_qs, - skip_relations={Finding}, - ) - logger.info( - "ASYNC_DELETE: Deleted finding chunk %d/%d (%d findings)", - i // chunk_size + 1, total_chunks, len(chunk), - ) + # Step 4: Delete the main scope findings + bulk_delete_findings(finding_qs, chunk_size=chunk_size) # Delete remaining non-Finding children (Tests, Engagements, Endpoints) # These are now lightweight since their Findings are gone From 9faae752c2ffadf5e6d0a92fc2b9dedfe3795981 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 18:38:59 +0100 Subject: [PATCH 09/17] fix: use bool type for DD_DUPLICATE_CLUSTER_CASCADE_DELETE env var --- dojo/settings/settings.dist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 4bf0fbc651e..c03e5635f87 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -302,7 +302,7 @@ # Initial behaviour in Defect Dojo was to delete all duplicates when an original was deleted # New behaviour is to leave the duplicates in place, but set the oldest of duplicates as new original # Set to True to revert to the old behaviour where all duplicates are deleted - DD_DUPLICATE_CLUSTER_CASCADE_DELETE=(str, False), + DD_DUPLICATE_CLUSTER_CASCADE_DELETE=(bool, False), # Enable Rate Limiting for the login page DD_RATE_LIMITER_ENABLED=(bool, False), # Examples include 5/m 100/h and more https://django-ratelimit.readthedocs.io/en/stable/rates.html#simple-rates From 4a79e34fd3814f551bc33f165193657d982e50a8 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 19:35:48 +0100 Subject: [PATCH 10/17] fix: replace save_no_options with .update() in reconfigure_duplicate_cluster Avoids triggering Finding.save() signals (pre_save_changed, execute_prioritization_calculations) when reconfiguring duplicate clusters during deletion. Adds tests for cross-engagement duplicate reconfiguration and product deletion with duplicates. --- dojo/finding/helper.py | 15 ++--- .../test_prepare_duplicates_for_delete.py | 61 +++++++++++++++++++ 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 6893731b502..782d3b8d8ba 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -602,15 +602,16 @@ def reconfigure_duplicate_cluster(original, cluster_outside): if new_original: logger.debug("changing original of duplicate cluster %d to: %s:%s", original.id, new_original.id, new_original.title) - new_original.duplicate = False - new_original.duplicate_finding = None - new_original.active = original.active - new_original.is_mitigated = original.is_mitigated - new_original.save_no_options() + # Use .update() to avoid triggering Finding.save() signals + Finding.objects.filter(id=new_original.id).update( + duplicate=False, + duplicate_finding=None, + active=original.active, + is_mitigated=original.is_mitigated, + ) new_original.found_by.set(original.found_by.all()) - # Re-point remaining duplicates to the new original in a single query - if new_original and len(cluster_outside) > 1: + # Re-point remaining duplicates to the new original in a single query cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index 82d891485ea..d59efeda933 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -295,3 +295,64 @@ def test_found_by_copied_to_new_original(self): found_by_ids = set(outside_dupe.found_by.values_list("id", flat=True)) self.assertIn(self.test_type.id, found_by_ids) self.assertIn(test_type_2.id, found_by_ids) + + def test_delete_finding_reconfigures_cross_engagement_duplicate(self): + """Deleting an original finding makes its cross-engagement duplicate standalone. + + Setup: product with eng A (finding A, original) and eng B (finding B, duplicate of A). + Action: delete finding A. + Expected: finding B becomes a standalone finding (not duplicate, active, no duplicate_finding). + """ + finding_a = self._create_finding(self.test1, "Original A") + finding_a.active = True + finding_a.is_mitigated = False + super(Finding, finding_a).save(skip_validation=True) + + finding_b = self._create_finding(self.test3, "Duplicate B") + self._make_duplicate(finding_b, finding_a) + + # Verify setup + finding_b.refresh_from_db() + self.assertTrue(finding_b.duplicate) + self.assertEqual(finding_b.duplicate_finding_id, finding_a.id) + + # Delete finding A — triggers finding_delete signal -> reconfigure_duplicate_cluster + with impersonate(self.testuser): + finding_a.delete() + + # Finding B should now be standalone + finding_b.refresh_from_db() + self.assertFalse(finding_b.duplicate) + self.assertIsNone(finding_b.duplicate_finding) + self.assertTrue(finding_b.active) + self.assertFalse(finding_b.is_mitigated) + + def test_delete_product_with_cross_engagement_duplicates(self): + """Deleting a product with cross-engagement duplicates succeeds without FK violations. + + Setup: product with eng A (finding A, original) and eng B (finding B, duplicate of A). + Action: delete the entire product via async_delete_crawl_task. + Expected: product and all findings are deleted without errors. + """ + from dojo.utils import ASYNC_DELETE_MAPPING, async_delete_crawl_task + + finding_a = self._create_finding(self.test1, "Original A") + finding_a.active = True + finding_a.is_mitigated = False + super(Finding, finding_a).save(skip_validation=True) + + finding_b = self._create_finding(self.test3, "Duplicate B") + self._make_duplicate(finding_b, finding_a) + + product_id = self.product.id + finding_a_id = finding_a.id + finding_b_id = finding_b.id + + model_list = ASYNC_DELETE_MAPPING["Product"] + with impersonate(self.testuser): + async_delete_crawl_task(self.product, model_list) + + # Everything should be gone + self.assertFalse(Product.objects.filter(id=product_id).exists()) + self.assertFalse(Finding.objects.filter(id=finding_a_id).exists()) + self.assertFalse(Finding.objects.filter(id=finding_b_id).exists()) From 2282fb6578a4076448d9c3a2eae015513c873316 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 20:10:32 +0100 Subject: [PATCH 11/17] refactor: scope prepare_duplicates_for_delete to full object, not per-engagement Adds product= and product_type= parameters so the entire deletion scope is handled in one call, avoiding unnecessary reconfiguration of findings that are about to be deleted anyway. Uses subqueries instead of materializing ID sets, and chunks the originals loop with prefetch to bound memory. Reverts finding_delete to use ORM .delete() for single finding cascade deletes. --- dojo/finding/helper.py | 74 +++++++++++-------- dojo/utils.py | 10 ++- .../test_prepare_duplicates_for_delete.py | 17 +++-- 3 files changed, 59 insertions(+), 42 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 782d3b8d8ba..781d721b278 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -1,6 +1,7 @@ import logging from contextlib import suppress from datetime import datetime +from itertools import batched from time import strftime from django.conf import settings @@ -563,8 +564,7 @@ def finding_delete(instance, **kwargs): duplicate_cluster = instance.original_finding.all() if duplicate_cluster: if settings.DUPLICATE_CLUSTER_CASCADE_DELETE: - # Delete the entire duplicate cluster efficiently via bulk_delete_findings - bulk_delete_findings(duplicate_cluster) + duplicate_cluster.order_by("-id").delete() else: reconfigure_duplicate_cluster(instance, duplicate_cluster) else: @@ -615,53 +615,65 @@ def reconfigure_duplicate_cluster(original, cluster_outside): cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) -def prepare_duplicates_for_delete(test=None, engagement=None): - logger.debug("prepare duplicates for delete, test: %s, engagement: %s", test.id if test else None, engagement.id if engagement else None) - if test is None and engagement is None: - logger.warning("nothing to prepare as test and engagement are None") +def prepare_duplicates_for_delete(test=None, engagement=None, product=None, product_type=None): + logger.debug( + "prepare duplicates for delete, test: %s, engagement: %s, product: %s, product_type: %s", + test.id if test else None, + engagement.id if engagement else None, + product.id if product else None, + product_type.id if product_type else None, + ) + if test is None and engagement is None and product is None and product_type is None: + logger.warning("nothing to prepare as no scope object provided") return # should not be needed in normal healthy instances. # but in that case it's a cheap count query and we might as well run it to be safe fix_loop_duplicates() - # Build scope filter - scope_filter = {} - if engagement: - scope_filter["test__engagement"] = engagement - if test: - scope_filter["test"] = test + # Build scope as a subquery — never materialized into Python memory + if product_type: + scope_filter = {"test__engagement__product__prod_type": product_type} + elif product: + scope_filter = {"test__engagement__product": product} + elif engagement: + scope_filter = {"test__engagement": engagement} + else: + scope_filter = {"test": test} - scope_finding_ids = set( - Finding.objects.filter(**scope_filter).values_list("id", flat=True), - ) - if not scope_finding_ids: + scope_ids_subquery = Finding.objects.filter(**scope_filter).values_list("id", flat=True) + + if not scope_ids_subquery.exists(): logger.debug("no findings in scope, nothing to prepare") return # Bulk-reset inside-scope duplicates: single UPDATE instead of per-original mass_model_updater. - # Clears the duplicate_finding FK so Django's Collector won't trip over dangling references - # when deleting findings in this scope. + # Clears the duplicate_finding FK so cascade_delete won't trip over dangling self-references. inside_reset_count = Finding.objects.filter( duplicate=True, - duplicate_finding_id__in=scope_finding_ids, - id__in=scope_finding_ids, + duplicate_finding_id__in=scope_ids_subquery, + id__in=scope_ids_subquery, ).update(duplicate_finding=None, duplicate=False) logger.debug("bulk-reset %d inside-scope duplicates", inside_reset_count) # Reconfigure outside-scope duplicates: still per-original because each cluster # needs a new original chosen, status copied, and found_by updated. - # Pre-filter to only originals that have at least one duplicate outside scope, - # avoiding a per-original .exists() check. - originals_with_outside_dupes = Finding.objects.filter( - id__in=scope_finding_ids, - original_finding__in=Finding.objects.exclude(id__in=scope_finding_ids), - ).distinct().prefetch_related("original_finding") - - for original in originals_with_outside_dupes: - # Inside-scope duplicates were already unlinked by the bulk UPDATE above, - # so original_finding.all() now only contains outside-scope duplicates. - reconfigure_duplicate_cluster(original, original.original_finding.all()) + # Chunked with prefetch_related to bound memory while avoiding N+1 queries. + originals_ids = ( + Finding.objects.filter( + id__in=scope_ids_subquery, + original_finding__in=Finding.objects.exclude(id__in=scope_ids_subquery), + ) + .distinct() + .values_list("id", flat=True) + .iterator(chunk_size=500) + ) + + for chunk_ids in batched(originals_ids, 500): + for original in Finding.objects.filter(id__in=chunk_ids).prefetch_related("original_finding"): + # Inside-scope duplicates were already unlinked by the bulk UPDATE above, + # so original_finding.all() now only contains outside-scope duplicates. + reconfigure_duplicate_cluster(original, original.original_finding.all()) @receiver(pre_delete, sender=Test) diff --git a/dojo/utils.py b/dojo/utils.py index c144d960c06..e858bbfbfcb 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -69,6 +69,7 @@ Languages, Notifications, Product, + Product_Type, System_Settings, Test, Test_Type, @@ -2094,11 +2095,12 @@ def async_delete_crawl_task(obj, model_list, **kwargs): # Step 2: Prepare duplicate clusters (must happen before any deletion) # When CASCADE_DELETE=True, reconfigure_duplicate_cluster skips deletion — # we handle that below by expanding scope to include outside duplicates. - if isinstance(obj, Engagement): - prepare_duplicates_for_delete(engagement=obj) + if isinstance(obj, Product_Type): + prepare_duplicates_for_delete(product_type=obj) elif isinstance(obj, Product): - for engagement in obj.engagement_set.all(): - prepare_duplicates_for_delete(engagement=engagement) + prepare_duplicates_for_delete(product=obj) + elif isinstance(obj, Engagement): + prepare_duplicates_for_delete(engagement=obj) elif isinstance(obj, Test): prepare_duplicates_for_delete(test=obj) diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index d59efeda933..2a15dde1bcc 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -224,20 +224,23 @@ def test_mixed_inside_and_outside_duplicates(self): self.assertIsNone(outside_dupe.duplicate_finding) @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) - def test_cascade_delete_setting(self): - """When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside duplicates are deleted.""" + def test_cascade_delete_skips_outside_reconfigure(self): + """When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside duplicates are left untouched. + + The caller (async_delete_crawl_task) handles deletion of outside-scope + duplicates separately via bulk_delete_findings. + """ original = self._create_finding(self.test1, "Original") outside_dupe = self._create_finding(self.test2, "Outside Dupe") self._make_duplicate(outside_dupe, original) - outside_dupe_id = outside_dupe.id with impersonate(self.testuser): prepare_duplicates_for_delete(test=self.test1) - self.assertFalse( - Finding.objects.filter(id=outside_dupe_id).exists(), - "Outside duplicate should be cascade-deleted", - ) + outside_dupe.refresh_from_db() + # Outside dupe is still a duplicate — not reconfigured or deleted + self.assertTrue(outside_dupe.duplicate) + self.assertEqual(outside_dupe.duplicate_finding_id, original.id) def test_multiple_originals(self): """Multiple originals in the same test each get their clusters handled.""" From 5d1f96bd689808ba45ed17af85bb05d724c1496b Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 20:16:52 +0100 Subject: [PATCH 12/17] refactor: remove ASYNC_DELETE_MAPPING, use FINDING_SCOPE_FILTERS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the model_list-based mapping with a simple scope filter dict. prepare_duplicates_for_delete now accepts a single object and derives the scope via FINDING_SCOPE_FILTERS. Removes the redundant non-Finding model deletion loop — cascade_delete on the top-level object handles all remaining children. Cleans up async_delete class. --- dojo/finding/helper.py | 39 ++++---- dojo/models.py | 2 +- dojo/utils.py | 90 +++++-------------- .../test_prepare_duplicates_for_delete.py | 27 +++--- 4 files changed, 55 insertions(+), 103 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 781d721b278..e43c6af55fe 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -615,33 +615,28 @@ def reconfigure_duplicate_cluster(original, cluster_outside): cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) -def prepare_duplicates_for_delete(test=None, engagement=None, product=None, product_type=None): - logger.debug( - "prepare duplicates for delete, test: %s, engagement: %s, product: %s, product_type: %s", - test.id if test else None, - engagement.id if engagement else None, - product.id if product else None, - product_type.id if product_type else None, - ) - if test is None and engagement is None and product is None and product_type is None: - logger.warning("nothing to prepare as no scope object provided") +def prepare_duplicates_for_delete(obj): + """Prepare duplicate clusters before deleting a Test, Engagement, Product, or Product_Type. + + Resets inside-scope duplicate FKs and reconfigures outside-scope clusters + so that cascade_delete won't hit FK violations on the self-referential + duplicate_finding field. + """ + from dojo.utils import FINDING_SCOPE_FILTERS # noqa: PLC0415 circular import + + scope_field = FINDING_SCOPE_FILTERS.get(type(obj)) + if scope_field is None: + logger.warning("prepare_duplicates_for_delete: unsupported object type %s", type(obj).__name__) return + logger.debug("prepare_duplicates_for_delete: %s %d", type(obj).__name__, obj.id) + # should not be needed in normal healthy instances. # but in that case it's a cheap count query and we might as well run it to be safe fix_loop_duplicates() # Build scope as a subquery — never materialized into Python memory - if product_type: - scope_filter = {"test__engagement__product__prod_type": product_type} - elif product: - scope_filter = {"test__engagement__product": product} - elif engagement: - scope_filter = {"test__engagement": engagement} - else: - scope_filter = {"test": test} - - scope_ids_subquery = Finding.objects.filter(**scope_filter).values_list("id", flat=True) + scope_ids_subquery = Finding.objects.filter(**{scope_field: obj}).values_list("id", flat=True) if not scope_ids_subquery.exists(): logger.debug("no findings in scope, nothing to prepare") @@ -679,7 +674,7 @@ def prepare_duplicates_for_delete(test=None, engagement=None, product=None, prod @receiver(pre_delete, sender=Test) def test_pre_delete(sender, instance, **kwargs): logger.debug("test pre_delete, sender: %s instance: %s", to_str_typed(sender), to_str_typed(instance)) - prepare_duplicates_for_delete(test=instance) + prepare_duplicates_for_delete(instance) @receiver(post_delete, sender=Test) @@ -690,7 +685,7 @@ def test_post_delete(sender, instance, **kwargs): @receiver(pre_delete, sender=Engagement) def engagement_pre_delete(sender, instance, **kwargs): logger.debug("engagement pre_delete, sender: %s instance: %s", to_str_typed(sender), to_str_typed(instance)) - prepare_duplicates_for_delete(engagement=instance) + prepare_duplicates_for_delete(instance) @receiver(post_delete, sender=Engagement) diff --git a/dojo/models.py b/dojo/models.py index c2b8c2ee50a..f5b31c789c9 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -1647,7 +1647,7 @@ def is_ci_cd(self): def delete(self, *args, **kwargs): logger.debug("%d engagement delete", self.id) from dojo.finding import helper as finding_helper # noqa: PLC0415 circular import - finding_helper.prepare_duplicates_for_delete(engagement=self) + finding_helper.prepare_duplicates_for_delete(self) super().delete(*args, **kwargs) with suppress(Engagement.DoesNotExist, Product.DoesNotExist): # Suppressing a potential issue created from async delete removing diff --git a/dojo/utils.py b/dojo/utils.py index e858bbfbfcb..214faef45b7 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -2026,23 +2026,16 @@ def is_finding_groups_enabled(): return get_system_setting("enable_finding_groups") -# Mapping of object types to their related models for cascading deletes -ASYNC_DELETE_MAPPING = { - "Product_Type": [ - (Endpoint, "product__prod_type__id"), - (Finding, "test__engagement__product__prod_type__id"), - (Test, "engagement__product__prod_type__id"), - (Engagement, "product__prod_type__id"), - (Product, "prod_type__id")], - "Product": [ - (Endpoint, "product__id"), - (Finding, "test__engagement__product__id"), - (Test, "engagement__product__id"), - (Engagement, "product__id")], - "Engagement": [ - (Finding, "test__engagement__id"), - (Test, "engagement__id")], - "Test": [(Finding, "test__id")], +# Supported object types for async cascade deletion +ASYNC_DELETE_SUPPORTED_TYPES = (Product_Type, Product, Engagement, Test) + +# Finding scope filters per model type — used to build the finding queryset +# for bulk deletion before cascade_delete handles the rest. +FINDING_SCOPE_FILTERS = { + Product_Type: "test__engagement__product__prod_type", + Product: "test__engagement__product", + Engagement: "test__engagement", + Test: "test", } @@ -2054,7 +2047,7 @@ def _get_object_name(obj): @app.task -def async_delete_crawl_task(obj, model_list, **kwargs): +def async_delete_crawl_task(obj, **kwargs): """ Delete an object and all its related objects using the SQL cascade walker. @@ -2083,26 +2076,14 @@ def async_delete_crawl_task(obj, model_list, **kwargs): product = obj.engagement.product # Step 1: Determine finding scope - finding_filter = None - for model, query_path in model_list: - if model is Finding: - finding_filter = {query_path: obj.id} - break - - if finding_filter: - finding_qs = Finding.objects.filter(**finding_filter) + scope_field = FINDING_SCOPE_FILTERS.get(type(obj)) + if scope_field: + finding_qs = Finding.objects.filter(**{scope_field: obj}) # Step 2: Prepare duplicate clusters (must happen before any deletion) - # When CASCADE_DELETE=True, reconfigure_duplicate_cluster skips deletion — + # When CASCADE_DELETE=True, reconfigure_duplicate_cluster skips reconfiguration — # we handle that below by expanding scope to include outside duplicates. - if isinstance(obj, Product_Type): - prepare_duplicates_for_delete(product_type=obj) - elif isinstance(obj, Product): - prepare_duplicates_for_delete(product=obj) - elif isinstance(obj, Engagement): - prepare_duplicates_for_delete(engagement=obj) - elif isinstance(obj, Test): - prepare_duplicates_for_delete(test=obj) + prepare_duplicates_for_delete(obj) # Step 3: Delete outside-scope duplicates first — these point to findings # in the main scope via duplicate_finding FK, so they must be removed before @@ -2119,22 +2100,14 @@ def async_delete_crawl_task(obj, model_list, **kwargs): # Step 4: Delete the main scope findings bulk_delete_findings(finding_qs, chunk_size=chunk_size) - # Delete remaining non-Finding children (Tests, Engagements, Endpoints) - # These are now lightweight since their Findings are gone - for model, query_path in model_list: - if model is not Finding: - filter_dict = {query_path: obj.id} - qs = model.objects.filter(**filter_dict) - if qs.exists(): - with transaction.atomic(): - cascade_delete(model, qs, skip_relations={Finding}) - - # Step 6: Delete the top-level object itself + # Step 5: Delete the top-level object and all remaining children (Tests, + # Engagements, Endpoints, etc.) via cascade_delete. Findings are already + # gone, so skip_relations={Finding} avoids walking empty relations. pk_query = type(obj).objects.filter(pk=obj.pk) with transaction.atomic(): cascade_delete(type(obj), pk_query, skip_relations={Finding}) - # Step 7: Recalculate product grade once (not per-object) + # Step 6: Recalculate product grade once (not per-object) # The custom delete() methods on Finding/Test/Engagement each call # perform_product_grading — cascade_delete bypasses custom delete(). if product: @@ -2153,14 +2126,11 @@ def async_delete_task(obj, **kwargs): """ from dojo.celery_dispatch import dojo_dispatch_task # noqa: PLC0415 circular import - logger.debug("ASYNC_DELETE: Deleting " + _get_object_name(obj) + ": " + str(obj)) - model_list = ASYNC_DELETE_MAPPING.get(_get_object_name(obj)) - if model_list: - # The object to be deleted was found in the object list - dojo_dispatch_task(async_delete_crawl_task, obj, model_list) + logger.debug("ASYNC_DELETE: Deleting %s: %s", _get_object_name(obj), obj) + if isinstance(obj, ASYNC_DELETE_SUPPORTED_TYPES): + dojo_dispatch_task(async_delete_crawl_task, obj) else: - # The object is not supported in async delete, delete normally - logger.debug("ASYNC_DELETE: " + _get_object_name(obj) + " async delete not supported. Deleteing normally: " + str(obj)) + logger.debug("ASYNC_DELETE: %s async delete not supported. Deleting normally: %s", _get_object_name(obj), obj) obj.delete() @@ -2177,10 +2147,6 @@ class async_delete: which properly handles user context injection and pghistory context. """ - def __init__(self, *args, **kwargs): - # Keep mapping reference for backwards compatibility - self.mapping = ASYNC_DELETE_MAPPING - def delete(self, obj, **kwargs): """ Entry point to delete an object asynchronously. @@ -2192,18 +2158,10 @@ def delete(self, obj, **kwargs): dojo_dispatch_task(async_delete_task, obj, **kwargs) - # Keep helper methods for backwards compatibility and potential direct use @staticmethod def get_object_name(obj): return _get_object_name(obj) - @staticmethod - def chunk_list(model, full_list): - chunk_size = get_setting("ASYNC_OBEJECT_DELETE_CHUNK_SIZE") - chunk_list = [full_list[i:i + chunk_size] for i in range(0, len(full_list), chunk_size)] - logger.debug("ASYNC_DELETE: Split " + _get_object_name(model) + " into " + str(len(chunk_list)) + " chunks of " + str(chunk_size)) - return chunk_list - @receiver(user_logged_in) def log_user_login(sender, request, user, **kwargs): diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index 2a15dde1bcc..6e22615948b 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -104,7 +104,7 @@ def test_no_duplicates(self): f2 = self._create_finding(self.test1, "F2") with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) f1.refresh_from_db() f2.refresh_from_db() @@ -120,7 +120,7 @@ def test_inside_scope_duplicates_reset(self): self._make_duplicate(dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) dupe.refresh_from_db() self.assertIsNone(dupe.duplicate_finding) @@ -137,7 +137,7 @@ def test_outside_scope_duplicates_get_new_original(self): self._make_duplicate(outside_dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) outside_dupe.refresh_from_db() # Outside dupe becomes the new original @@ -158,7 +158,7 @@ def test_outside_scope_cluster_repointed(self): self._make_duplicate(dupe_d, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) dupe_b.refresh_from_db() dupe_c.refresh_from_db() @@ -182,7 +182,7 @@ def test_engagement_scope_inside_reset(self): self._make_duplicate(dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(engagement=self.engagement1) + prepare_duplicates_for_delete(self.engagement1) dupe.refresh_from_db() self.assertIsNone(dupe.duplicate_finding) @@ -195,7 +195,7 @@ def test_engagement_scope_outside_reconfigure(self): self._make_duplicate(outside_dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(engagement=self.engagement1) + prepare_duplicates_for_delete(self.engagement1) outside_dupe.refresh_from_db() self.assertFalse(outside_dupe.duplicate) @@ -210,7 +210,7 @@ def test_mixed_inside_and_outside_duplicates(self): self._make_duplicate(outside_dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) inside_dupe.refresh_from_db() outside_dupe.refresh_from_db() @@ -235,7 +235,7 @@ def test_cascade_delete_skips_outside_reconfigure(self): self._make_duplicate(outside_dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) outside_dupe.refresh_from_db() # Outside dupe is still a duplicate — not reconfigured or deleted @@ -252,7 +252,7 @@ def test_multiple_originals(self): self._make_duplicate(dupe_of_b, original_b) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) dupe_of_a.refresh_from_db() dupe_of_b.refresh_from_db() @@ -274,7 +274,7 @@ def test_original_status_copied_to_new_original(self): self._make_duplicate(outside_dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) outside_dupe.refresh_from_db() self.assertFalse(outside_dupe.duplicate) @@ -292,7 +292,7 @@ def test_found_by_copied_to_new_original(self): self._make_duplicate(outside_dupe, original) with impersonate(self.testuser): - prepare_duplicates_for_delete(test=self.test1) + prepare_duplicates_for_delete(self.test1) outside_dupe.refresh_from_db() found_by_ids = set(outside_dupe.found_by.values_list("id", flat=True)) @@ -337,7 +337,7 @@ def test_delete_product_with_cross_engagement_duplicates(self): Action: delete the entire product via async_delete_crawl_task. Expected: product and all findings are deleted without errors. """ - from dojo.utils import ASYNC_DELETE_MAPPING, async_delete_crawl_task + from dojo.utils import async_delete_crawl_task finding_a = self._create_finding(self.test1, "Original A") finding_a.active = True @@ -351,9 +351,8 @@ def test_delete_product_with_cross_engagement_duplicates(self): finding_a_id = finding_a.id finding_b_id = finding_b.id - model_list = ASYNC_DELETE_MAPPING["Product"] with impersonate(self.testuser): - async_delete_crawl_task(self.product, model_list) + async_delete_crawl_task(self.product) # Everything should be gone self.assertFalse(Product.objects.filter(id=product_id).exists()) From b731d157c1035a4499987ae30077c74c6417d5f4 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 21:26:36 +0100 Subject: [PATCH 13/17] fix: resolve ruff lint violations in helper and tests --- dojo/finding/helper.py | 5 +++-- unittests/test_prepare_duplicates_for_delete.py | 11 +++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index e43c6af55fe..97b32bb5240 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -616,7 +616,8 @@ def reconfigure_duplicate_cluster(original, cluster_outside): def prepare_duplicates_for_delete(obj): - """Prepare duplicate clusters before deleting a Test, Engagement, Product, or Product_Type. + """ + Prepare duplicate clusters before deleting a Test, Engagement, Product, or Product_Type. Resets inside-scope duplicate FKs and reconfigures outside-scope clusters so that cascade_delete won't hit FK violations on the self-referential @@ -664,7 +665,7 @@ def prepare_duplicates_for_delete(obj): .iterator(chunk_size=500) ) - for chunk_ids in batched(originals_ids, 500): + for chunk_ids in batched(originals_ids, 500, strict=False): for original in Finding.objects.filter(id__in=chunk_ids).prefetch_related("original_finding"): # Inside-scope duplicates were already unlinked by the bulk UPDATE above, # so original_finding.all() now only contains outside-scope duplicates. diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index 6e22615948b..26c5bf7031f 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -225,7 +225,8 @@ def test_mixed_inside_and_outside_duplicates(self): @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) def test_cascade_delete_skips_outside_reconfigure(self): - """When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside duplicates are left untouched. + """ + When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside duplicates are left untouched. The caller (async_delete_crawl_task) handles deletion of outside-scope duplicates separately via bulk_delete_findings. @@ -300,7 +301,8 @@ def test_found_by_copied_to_new_original(self): self.assertIn(test_type_2.id, found_by_ids) def test_delete_finding_reconfigures_cross_engagement_duplicate(self): - """Deleting an original finding makes its cross-engagement duplicate standalone. + """ + Deleting an original finding makes its cross-engagement duplicate standalone. Setup: product with eng A (finding A, original) and eng B (finding B, duplicate of A). Action: delete finding A. @@ -331,13 +333,14 @@ def test_delete_finding_reconfigures_cross_engagement_duplicate(self): self.assertFalse(finding_b.is_mitigated) def test_delete_product_with_cross_engagement_duplicates(self): - """Deleting a product with cross-engagement duplicates succeeds without FK violations. + """ + Deleting a product with cross-engagement duplicates succeeds without FK violations. Setup: product with eng A (finding A, original) and eng B (finding B, duplicate of A). Action: delete the entire product via async_delete_crawl_task. Expected: product and all findings are deleted without errors. """ - from dojo.utils import async_delete_crawl_task + from dojo.utils import async_delete_crawl_task # noqa: PLC0415 finding_a = self._create_finding(self.test1, "Original A") finding_a.active = True From 0f82c0d96e731dd4aeda0e9539485ba535b12d5a Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 22:02:30 +0100 Subject: [PATCH 14/17] remove obsolete test --- unittests/test_async_delete.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/unittests/test_async_delete.py b/unittests/test_async_delete.py index b8320d24707..1554432ef50 100644 --- a/unittests/test_async_delete.py +++ b/unittests/test_async_delete.py @@ -296,18 +296,3 @@ def test_async_delete_helper_methods(self): "Product", "get_object_name should work with model class", ) - - def test_async_delete_mapping_preserved(self): - """ - Test that the mapping attribute is preserved on async_delete instances. - - This ensures backwards compatibility for code that might access the mapping. - """ - async_del = async_delete() - - # Verify mapping exists and has expected keys - self.assertIsNotNone(async_del.mapping) - self.assertIn("Product", async_del.mapping) - self.assertIn("Product_Type", async_del.mapping) - self.assertIn("Engagement", async_del.mapping) - self.assertIn("Test", async_del.mapping) From c663ee09304376df626ea354b4cbdfb519ca6068 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 22:39:01 +0100 Subject: [PATCH 15/17] perf: add bulk_delete_findings, fix CASCADE_DELETE and scope expansion - Add bulk_delete_findings() wrapper: M2M cleanup + chunked cascade_delete - reconfigure_duplicate_cluster: return early when CASCADE_DELETE=True instead of calling Django .delete() which fires signals per finding - finding_delete: use bulk_delete_findings when CASCADE_DELETE=True - async_delete_crawl_task: expand scope to include outside-scope duplicates, use bulk_delete_findings instead of manual M2M + cascade_delete calls - Fix test to use async_delete class instead of direct task import --- dojo/finding/helper.py | 2 +- dojo/utils.py | 26 +++++-------------- dojo/utils_cascade_delete.py | 2 ++ .../test_prepare_duplicates_for_delete.py | 5 ++-- 4 files changed, 13 insertions(+), 22 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 97b32bb5240..c4474ec8425 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -751,7 +751,7 @@ def bulk_clear_finding_m2m(finding_qs): def bulk_delete_findings(finding_qs, chunk_size=1000): """ - Delete findings and all related objects efficiently. + Delete findings and all related objects efficiently. Including any related object in Dojo-Pro Sends the pre_bulk_delete signal, clears M2M through tables (not discovered by _meta.related_objects), then uses cascade_delete for diff --git a/dojo/utils.py b/dojo/utils.py index 214faef45b7..edfa8156f24 100644 --- a/dojo/utils.py +++ b/dojo/utils.py @@ -2047,7 +2047,7 @@ def _get_object_name(obj): @app.task -def async_delete_crawl_task(obj, **kwargs): +def async_delete_task(obj, **kwargs): """ Delete an object and all its related objects using the SQL cascade walker. @@ -2064,6 +2064,12 @@ def async_delete_crawl_task(obj, **kwargs): ) from dojo.utils_cascade_delete import cascade_delete # noqa: PLC0415 circular import + logger.debug("ASYNC_DELETE: Deleting %s: %s", _get_object_name(obj), obj) + if not isinstance(obj, ASYNC_DELETE_SUPPORTED_TYPES): + logger.debug("ASYNC_DELETE: %s async delete not supported. Deleting normally: %s", _get_object_name(obj), obj) + obj.delete() + return + obj_name = _get_object_name(obj) logger.info("ASYNC_DELETE: Starting deletion of %s: %s", obj_name, obj) @@ -2116,24 +2122,6 @@ def async_delete_crawl_task(obj, **kwargs): logger.info("ASYNC_DELETE: Successfully deleted %s: %s", obj_name, obj) -@app.task -def async_delete_task(obj, **kwargs): - """ - Module-level Celery task to delete an object and its related objects. - - Accepts **kwargs for _pgh_context injected by dojo_dispatch_task. - Uses PgHistoryTask base class (default) to preserve pghistory context for audit trail. - """ - from dojo.celery_dispatch import dojo_dispatch_task # noqa: PLC0415 circular import - - logger.debug("ASYNC_DELETE: Deleting %s: %s", _get_object_name(obj), obj) - if isinstance(obj, ASYNC_DELETE_SUPPORTED_TYPES): - dojo_dispatch_task(async_delete_crawl_task, obj) - else: - logger.debug("ASYNC_DELETE: %s async delete not supported. Deleting normally: %s", _get_object_name(obj), obj) - obj.delete() - - class async_delete: """ diff --git a/dojo/utils_cascade_delete.py b/dojo/utils_cascade_delete.py index 24e1d43e315..f9b20216726 100644 --- a/dojo/utils_cascade_delete.py +++ b/dojo/utils_cascade_delete.py @@ -61,6 +61,8 @@ def cascade_delete(from_model, instance_pk_query, skip_relations=None, base_mode recurses into CASCADE children first (bottom-up), then deletes at the current level. No query execution until recursion unwinds. + Includes any related object in Dojo-Pro + Args: from_model: The model class to delete from. instance_pk_query: QuerySet selecting the records to delete. diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index 26c5bf7031f..4baf9135249 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -340,7 +340,7 @@ def test_delete_product_with_cross_engagement_duplicates(self): Action: delete the entire product via async_delete_crawl_task. Expected: product and all findings are deleted without errors. """ - from dojo.utils import async_delete_crawl_task # noqa: PLC0415 + from dojo.utils import async_delete # noqa: PLC0415 finding_a = self._create_finding(self.test1, "Original A") finding_a.active = True @@ -355,7 +355,8 @@ def test_delete_product_with_cross_engagement_duplicates(self): finding_b_id = finding_b.id with impersonate(self.testuser): - async_delete_crawl_task(self.product) + async_del = async_delete() + async_del.delete(self.product) # Everything should be gone self.assertFalse(Product.objects.filter(id=product_id).exists()) From 86ea1eccd9cf43b8f061c4ae98bc4d7b4819cb71 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 21 Mar 2026 22:58:34 +0100 Subject: [PATCH 16/17] fix: handle M2M and tag cleanup in cascade_delete Adds generic M2M through-table cleanup to cascade_delete so tags and other M2M relations are cleared before row deletion. Introduces bulk_remove_all_tags in tag_utils to properly decrement tagulous tag counts during bulk deletion. Adds test for product deletion with tagged objects. --- dojo/finding/helper.py | 10 ++- dojo/tag_utils.py | 69 ++++++++++++++++++- dojo/utils_cascade_delete.py | 27 +++++++- .../test_prepare_duplicates_for_delete.py | 47 +++++++++++++ 4 files changed, 150 insertions(+), 3 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index c4474ec8425..b5c504db4a1 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -703,7 +703,10 @@ def bulk_clear_finding_m2m(finding_qs): Special handling for FileUpload: deletes via ORM so the custom FileUpload.delete() fires and removes files from disk storage. + Tags are handled via bulk_remove_all_tags to maintain tag counts. """ + from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import + finding_ids = finding_qs.values_list("id", flat=True) # Collect FileUpload IDs before deleting through table entries @@ -720,8 +723,13 @@ def bulk_clear_finding_m2m(finding_qs): ).values_list("notes_id", flat=True), ) - # Auto-discover and delete all M2M through tables + # Remove tags with proper count maintenance + bulk_remove_all_tags(Finding, finding_ids) + + # Auto-discover and delete remaining (non-tag) M2M through tables for m2m_field in Finding._meta.many_to_many: + if hasattr(m2m_field, "tag_options"): + continue through_model = m2m_field.remote_field.through # Find the FK column that points to Finding fk_column = None diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py index 054dc2b0a08..3c76180ab05 100644 --- a/dojo/tag_utils.py +++ b/dojo/tag_utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from collections.abc import Iterable from django.conf import settings @@ -8,6 +9,8 @@ from dojo.models import Product # local import to avoid circulars at import time +logger = logging.getLogger(__name__) + def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "tags", batch_size: int | None = None) -> int: """ @@ -161,4 +164,68 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta return total_created -__all__ = ["bulk_add_tags_to_instances"] +def bulk_remove_all_tags(model_class, instance_ids_qs, tag_field_name: str = "tags"): + """ + Remove all tags from instances identified by the given ID subquery. + + Decrements tag counts correctly and deletes through-table rows. + Accepts a QuerySet of IDs (as a subquery) to avoid materializing large ID lists. + + Args: + model_class: The model class (e.g. Finding, Product). + instance_ids_qs: A QuerySet producing instance PKs (used as subquery). + tag_field_name: Name of the TagField (default: "tags"). + + """ + for field_name in [tag_field_name, "inherited_tags"]: + try: + tag_field = model_class._meta.get_field(field_name) + except Exception: # noqa: S112 + continue + + if not hasattr(tag_field, "tag_options"): + continue + + tag_model = tag_field.related_model + through_model = tag_field.remote_field.through + + # Find the FK column that points to the source model + source_field_name = None + target_field_name = None + for field in through_model._meta.get_fields(): + if hasattr(field, "remote_field") and field.remote_field: + if field.remote_field.model == model_class: + source_field_name = field.name + elif field.remote_field.model == tag_model: + target_field_name = field.name + + if not source_field_name or not target_field_name: + continue + + # Get affected tag IDs and their counts before deletion + affected_tags = ( + through_model.objects.filter(**{f"{source_field_name}__in": instance_ids_qs}) + .values(target_field_name) + .annotate(num=models.Count("id")) + ) + + # Decrement tag counts. Tag counts are not used in DefectDojo but we + # maintain them to avoid breaking tagulous's internal bookkeeping. + for entry in affected_tags: + tag_model.objects.filter(pk=entry[target_field_name]).update( + count=models.F("count") - entry["num"], + ) + + # Delete through-table rows + count, _ = through_model.objects.filter( + **{f"{source_field_name}__in": instance_ids_qs}, + ).delete() + + if count: + logger.debug( + "bulk_remove_all_tags: removed %d %s.%s through-table rows", + count, model_class.__name__, field_name, + ) + + +__all__ = ["bulk_add_tags_to_instances", "bulk_remove_all_tags"] diff --git a/dojo/utils_cascade_delete.py b/dojo/utils_cascade_delete.py index f9b20216726..ccbb5eaa810 100644 --- a/dojo/utils_cascade_delete.py +++ b/dojo/utils_cascade_delete.py @@ -138,7 +138,32 @@ def cascade_delete(from_model, instance_pk_query, skip_relations=None, base_mode on_delete_name, from_model.__name__, related_model.__name__, ) - # After all children are deleted, delete records at this level + # Clear M2M through tables before deleting (not discovered by _meta.related_objects). + # Tag fields are handled via bulk_remove_all_tags to maintain tag counts correctly. + from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import + + bulk_remove_all_tags(from_model, instance_pk_query) + + for m2m_field in from_model._meta.many_to_many: + # Skip tag fields — already handled above + if hasattr(m2m_field, "tag_options"): + continue + through_model = m2m_field.remote_field.through + fk_column = None + for field in through_model._meta.get_fields(): + if hasattr(field, "related_model") and field.related_model is from_model: + fk_column = field.column + break + if fk_column: + filterspec_m2m = {f"{fk_column}__in": models.Subquery(instance_pk_query)} + m2m_count = execute_delete_sql(through_model.objects.filter(**filterspec_m2m)) + if m2m_count: + logger.debug( + "cascade_delete: cleared %d rows from M2M %s", + m2m_count, through_model._meta.db_table, + ) + + # After all children and M2M are deleted, delete records at this level if level == 0: del_query = instance_pk_query else: diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index 4baf9135249..de786c50ad6 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -362,3 +362,50 @@ def test_delete_product_with_cross_engagement_duplicates(self): self.assertFalse(Product.objects.filter(id=product_id).exists()) self.assertFalse(Finding.objects.filter(id=finding_a_id).exists()) self.assertFalse(Finding.objects.filter(id=finding_b_id).exists()) + + def test_delete_product_with_tags(self): + """ + Deleting a product with tags on product and findings succeeds + and correctly decrements tag counts. + """ + from dojo.utils import async_delete # noqa: PLC0415 + + # Add tags to product and findings + self.product.tags = "product-tag, shared-tag" + self.product.save() + + finding_a = self._create_finding(self.test1, "Tagged Finding A") + finding_a.tags = "finding-tag, shared-tag" + super(Finding, finding_a).save(skip_validation=True) + + finding_b = self._create_finding(self.test3, "Tagged Finding B") + finding_b.tags = "finding-tag" + super(Finding, finding_b).save(skip_validation=True) + + product_id = self.product.id + finding_a_id = finding_a.id + finding_b_id = finding_b.id + + # Get tag models to check counts after deletion + # Product and Finding have separate tag models in tagulous + product_tag_model = Product._meta.get_field("tags").related_model + finding_tag_model = Finding._meta.get_field("tags").related_model + product_shared_tag = product_tag_model.objects.get(name="shared-tag") + finding_shared_tag = finding_tag_model.objects.get(name="shared-tag") + + with impersonate(self.testuser): + async_del = async_delete() + async_del.delete(self.product) + + # Everything should be gone + self.assertFalse(Product.objects.filter(id=product_id).exists()) + self.assertFalse(Finding.objects.filter(id=finding_a_id).exists()) + self.assertFalse(Finding.objects.filter(id=finding_b_id).exists()) + + # Tag counts should be decremented to 0 (all referencing objects deleted). + # Tag counts are not used in DefectDojo, but we still verify them to ensure + # our bulk removal method doesn't break tagulous's internal bookkeeping. + product_shared_tag.refresh_from_db() + self.assertEqual(product_shared_tag.count, 0) + finding_shared_tag.refresh_from_db() + self.assertEqual(finding_shared_tag.count, 0) From 161abd09bca28a044279d5c28043752ddc6d84a4 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Mon, 23 Mar 2026 18:17:54 +0100 Subject: [PATCH 17/17] refactor: auto-discover TagFields in bulk_remove_all_tags Instead of hardcoding field names, iterate over all fields on the model and select those with tag_options. This avoids unexpected side effects when callers pass a specific tag_field_name parameter. --- dojo/tag_utils.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py index 3c76180ab05..cf405034be4 100644 --- a/dojo/tag_utils.py +++ b/dojo/tag_utils.py @@ -164,27 +164,25 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta return total_created -def bulk_remove_all_tags(model_class, instance_ids_qs, tag_field_name: str = "tags"): +def bulk_remove_all_tags(model_class, instance_ids_qs): """ Remove all tags from instances identified by the given ID subquery. - Decrements tag counts correctly and deletes through-table rows. + Auto-discovers all TagFields on the model, decrements tag counts correctly, + and deletes through-table rows. Accepts a QuerySet of IDs (as a subquery) to avoid materializing large ID lists. Args: model_class: The model class (e.g. Finding, Product). instance_ids_qs: A QuerySet producing instance PKs (used as subquery). - tag_field_name: Name of the TagField (default: "tags"). """ - for field_name in [tag_field_name, "inherited_tags"]: - try: - tag_field = model_class._meta.get_field(field_name) - except Exception: # noqa: S112 - continue + tag_fields = [ + field for field in model_class._meta.get_fields() + if hasattr(field, "tag_options") + ] - if not hasattr(tag_field, "tag_options"): - continue + for tag_field in tag_fields: tag_model = tag_field.related_model through_model = tag_field.remote_field.through @@ -224,7 +222,7 @@ def bulk_remove_all_tags(model_class, instance_ids_qs, tag_field_name: str = "ta if count: logger.debug( "bulk_remove_all_tags: removed %d %s.%s through-table rows", - count, model_class.__name__, field_name, + count, model_class.__name__, tag_field.name, )