@@ -164,10 +164,20 @@ export class PauseResumeManager {
164164 static async enqueueOrStartResume ( args : EnqueueResumeArgs ) : Promise < EnqueueResumeResult > {
165165 const { executionId, contextId, resumeInput, userId } = args
166166
167- return await db . transaction ( async ( tx ) => {
168- const pausedExecution = await tx
169- . select ( )
170- . from ( pausedExecutions )
167+ // Retry to handle race condition where resume request arrives
168+ // before persistPauseResult commits the paused execution row.
169+ // The INSERT in persistPauseResult is awaited, so the race window
170+ // is only between the method call and the await returning (~10-50ms).
171+ const MAX_RETRIES = 3
172+ const RETRY_DELAY_MS = 200
173+ let lastError : Error | null = null
174+
175+ for ( let attempt = 0 ; attempt <= MAX_RETRIES ; attempt ++ ) {
176+ try {
177+ return await db . transaction ( async ( tx ) => {
178+ const pausedExecution = await tx
179+ . select ( )
180+ . from ( pausedExecutions )
171181 . where ( eq ( pausedExecutions . executionId , executionId ) )
172182 . for ( 'update' )
173183 . limit ( 1 )
@@ -277,7 +287,22 @@ export class PauseResumeManager {
277287 resumeInput,
278288 userId,
279289 }
280- } )
290+ } )
291+ } catch ( err : any ) {
292+ lastError = err
293+ const isNotFound = err . message ?. includes ( 'Paused execution not found' )
294+ const isLastAttempt = attempt === MAX_RETRIES
295+
296+ if ( ! isNotFound || isLastAttempt ) {
297+ throw err
298+ }
299+
300+ await new Promise ( ( resolve ) => setTimeout ( resolve , RETRY_DELAY_MS ) )
301+ }
302+ }
303+
304+ // This should never be reached due to the for loop logic, but TypeScript needs it
305+ throw lastError ?? new Error ( 'enqueueOrStartResume failed after retries' )
281306 }
282307
283308 static async startResumeExecution ( args : StartResumeExecutionArgs ) : Promise < void > {
0 commit comments