From 278daf650f980375350858eedcbfd8d309c0ecaa Mon Sep 17 00:00:00 2001 From: guoyangzhen Date: Sat, 14 Mar 2026 19:43:12 +0800 Subject: [PATCH 1/2] fix: add retry for resume race condition (#3081) --- .../executor/human-in-the-loop-manager.ts | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts index 5649cf40e65..afd32575515 100644 --- a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts +++ b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts @@ -164,10 +164,20 @@ export class PauseResumeManager { static async enqueueOrStartResume(args: EnqueueResumeArgs): Promise { const { executionId, contextId, resumeInput, userId } = args - return await db.transaction(async (tx) => { - const pausedExecution = await tx - .select() - .from(pausedExecutions) + // Retry to handle race condition where resume request arrives + // before persistPauseResult commits the paused execution row. + // The INSERT in persistPauseResult is awaited, so the race window + // is only between the method call and the await returning (~10-50ms). + const MAX_RETRIES = 3 + const RETRY_DELAY_MS = 200 + let lastError: Error | null = null + + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + return await db.transaction(async (tx) => { + const pausedExecution = await tx + .select() + .from(pausedExecutions) .where(eq(pausedExecutions.executionId, executionId)) .for('update') .limit(1) @@ -277,7 +287,22 @@ export class PauseResumeManager { resumeInput, userId, } - }) + }) + } catch (err: any) { + lastError = err + const isNotFound = err.message?.includes('Paused execution not found') + const isLastAttempt = attempt === MAX_RETRIES + + if (!isNotFound || isLastAttempt) { + throw err + } + + await new Promise((resolve) => setTimeout(resolve, RETRY_DELAY_MS)) + } + } + + // This should never be reached due to the for loop logic, but TypeScript needs it + throw lastError ?? new Error('enqueueOrStartResume failed after retries') } static async startResumeExecution(args: StartResumeExecutionArgs): Promise { From d16d7bdad53deb0858b476ad2c59e7da8e09e9e8 Mon Sep 17 00:00:00 2001 From: guoyangzhen Date: Sat, 14 Mar 2026 21:02:10 +0800 Subject: [PATCH 2/2] fix: correct off-by-one in retry attempts (addressing review feedback) - Change <= to < so MAX_RETRIES=3 means 3 attempts, not 4 - Update isLastAttempt check accordingly --- apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts index afd32575515..f51fec62994 100644 --- a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts +++ b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts @@ -172,7 +172,7 @@ export class PauseResumeManager { const RETRY_DELAY_MS = 200 let lastError: Error | null = null - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { try { return await db.transaction(async (tx) => { const pausedExecution = await tx @@ -291,7 +291,7 @@ export class PauseResumeManager { } catch (err: any) { lastError = err const isNotFound = err.message?.includes('Paused execution not found') - const isLastAttempt = attempt === MAX_RETRIES + const isLastAttempt = attempt === MAX_RETRIES - 1 if (!isNotFound || isLastAttempt) { throw err