Skip to content

Commit 278daf6

Browse files
committed
fix: add retry for resume race condition (#3081)
1 parent 4c12914 commit 278daf6

File tree

1 file changed

+30
-5
lines changed

1 file changed

+30
-5
lines changed

apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,20 @@ export class PauseResumeManager {
164164
static async enqueueOrStartResume(args: EnqueueResumeArgs): Promise<EnqueueResumeResult> {
165165
const { executionId, contextId, resumeInput, userId } = args
166166

167-
return await db.transaction(async (tx) => {
168-
const pausedExecution = await tx
169-
.select()
170-
.from(pausedExecutions)
167+
// Retry to handle race condition where resume request arrives
168+
// before persistPauseResult commits the paused execution row.
169+
// The INSERT in persistPauseResult is awaited, so the race window
170+
// is only between the method call and the await returning (~10-50ms).
171+
const MAX_RETRIES = 3
172+
const RETRY_DELAY_MS = 200
173+
let lastError: Error | null = null
174+
175+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
176+
try {
177+
return await db.transaction(async (tx) => {
178+
const pausedExecution = await tx
179+
.select()
180+
.from(pausedExecutions)
171181
.where(eq(pausedExecutions.executionId, executionId))
172182
.for('update')
173183
.limit(1)
@@ -277,7 +287,22 @@ export class PauseResumeManager {
277287
resumeInput,
278288
userId,
279289
}
280-
})
290+
})
291+
} catch (err: any) {
292+
lastError = err
293+
const isNotFound = err.message?.includes('Paused execution not found')
294+
const isLastAttempt = attempt === MAX_RETRIES
295+
296+
if (!isNotFound || isLastAttempt) {
297+
throw err
298+
}
299+
300+
await new Promise((resolve) => setTimeout(resolve, RETRY_DELAY_MS))
301+
}
302+
}
303+
304+
// This should never be reached due to the for loop logic, but TypeScript needs it
305+
throw lastError ?? new Error('enqueueOrStartResume failed after retries')
281306
}
282307

283308
static async startResumeExecution(args: StartResumeExecutionArgs): Promise<void> {

0 commit comments

Comments
 (0)