Skip to content

Commit 5248bb0

Browse files
committed
Bug 1944375 - Improve data cycling for TryDataRemoval
1 parent e82681c commit 5248bb0

1 file changed

Lines changed: 49 additions & 8 deletions

File tree

treeherder/model/data_cycling/removal_strategies.py

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -175,14 +175,55 @@ def name(self) -> str:
175175
return "try data removal strategy"
176176

177177
def __attempt_remove(self, using):
178-
deleted, _ = PerformanceDatum.objects.filter(
179-
id__in=PerformanceDatum.objects.filter(
180-
repository_id=self.try_repo,
181-
push_timestamp__lte=self._max_timestamp,
182-
signature_id__in=self.target_signatures,
183-
).values_list("id")[: self._chunk_size]
184-
).delete()
185-
using.rowcount = deleted
178+
"""
179+
Raw SQL is used to avoid Django ORM cascade deletes on performance_datum_replicate.
180+
Some conditions are intentionally repeated to help the DB planner reduce the candidate rows
181+
in the del_replicate CTE.
182+
"""
183+
using.execute(
184+
"""
185+
WITH target_datum AS (
186+
SELECT pd.id, pd.repository_id, pd.push_timestamp, pd.signature_id
187+
FROM performance_datum pd
188+
WHERE pd.repository_id = %s
189+
AND pd.push_timestamp <= %s
190+
AND pd.signature_id = ANY(%s)
191+
LIMIT %s
192+
),
193+
del_replicate AS (
194+
DELETE FROM performance_datum_replicate r1
195+
WHERE r1.performance_datum_id IN (
196+
SELECT td.id
197+
FROM target_datum td
198+
WHERE td.repository_id = %s
199+
AND td.push_timestamp <= %s
200+
AND td.signature_id = ANY(%s)
201+
AND EXISTS (
202+
SELECT 1
203+
FROM performance_datum_replicate r2
204+
WHERE r2.performance_datum_id = td.id
205+
)
206+
)
207+
),
208+
del_multi AS (
209+
DELETE FROM perf_multicommitdatum pm
210+
USING target_datum td
211+
WHERE pm.perf_datum_id = td.id
212+
)
213+
DELETE FROM performance_datum pd
214+
USING target_datum td
215+
WHERE pd.id = td.id
216+
""",
217+
[
218+
self.try_repo,
219+
self._max_timestamp,
220+
list(self.target_signatures),
221+
self._chunk_size,
222+
self.try_repo,
223+
self._max_timestamp,
224+
list(self.target_signatures),
225+
],
226+
)
186227

187228
def __lookup_new_signature(self):
188229
self.__target_signatures = self.__try_signatures[: self.SIGNATURE_BULK_SIZE]

0 commit comments

Comments
 (0)