Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 49 additions & 8 deletions treeherder/model/data_cycling/removal_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,55 @@ def name(self) -> str:
return "try data removal strategy"

def __attempt_remove(self, using):
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
repository_id=self.try_repo,
push_timestamp__lte=self._max_timestamp,
signature_id__in=self.target_signatures,
).values_list("id")[: self._chunk_size]
).delete()
using.rowcount = deleted
"""
Raw SQL is used to avoid Django ORM cascade deletes on performance_datum_replicate.
Although the WHERE clause in del_replicate looks redundant, it is intentionally kept to guide
the PostgreSQL planner toward a more efficient execution plan.
"""
using.execute(
"""
WITH target_datum AS (
SELECT pd.id, pd.repository_id, pd.push_timestamp, pd.signature_id
FROM performance_datum pd
WHERE pd.repository_id = %s
AND pd.push_timestamp <= %s
AND pd.signature_id = ANY(%s)
LIMIT %s
),
del_replicate AS (
DELETE FROM performance_datum_replicate r1
WHERE r1.performance_datum_id IN (
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: It would be good to explain this WHERE condition in the comment above. At first glance, it looks like it's redundant since we do a similar select query above for target_datum.

SELECT td.id
FROM target_datum td
WHERE td.repository_id = %s
AND td.push_timestamp <= %s
AND td.signature_id = ANY(%s)
AND EXISTS (
SELECT 1
FROM performance_datum_replicate r2
WHERE r2.performance_datum_id = td.id
)
)
),
del_multi AS (
DELETE FROM perf_multicommitdatum pm
USING target_datum td
WHERE pm.perf_datum_id = td.id
)
DELETE FROM performance_datum pd
USING target_datum td
WHERE pd.id = td.id
""",
[
self.try_repo,
self._max_timestamp,
list(self.target_signatures),
self._chunk_size,
self.try_repo,
self._max_timestamp,
list(self.target_signatures),
],
)

def __lookup_new_signature(self):
self.__target_signatures = self.__try_signatures[: self.SIGNATURE_BULK_SIZE]
Expand Down