@@ -7,19 +7,23 @@ import type { ExecutionContext, StreamingContext } from '@/lib/copilot/request/t
77
88const {
99 mockCreateRunSegment,
10+ mockForceFailHungToolCall,
1011 mockGetEffectiveDecryptedEnv,
1112 mockGetMothershipBaseURL,
1213 mockGetMothershipSourceEnvHeaders,
1314 mockPrepareExecutionContext,
1415 mockRunStreamLoop,
16+ mockToolWatchdogTimeoutMs,
1517 mockUpdateRunStatus,
1618} = vi . hoisted ( ( ) => ( {
1719 mockCreateRunSegment : vi . fn ( ) ,
20+ mockForceFailHungToolCall : vi . fn ( ) ,
1821 mockGetEffectiveDecryptedEnv : vi . fn ( ) ,
1922 mockGetMothershipBaseURL : vi . fn ( ) ,
2023 mockGetMothershipSourceEnvHeaders : vi . fn ( ) ,
2124 mockPrepareExecutionContext : vi . fn ( ) ,
2225 mockRunStreamLoop : vi . fn ( ) ,
26+ mockToolWatchdogTimeoutMs : vi . fn ( ( ) => 60_000 ) ,
2327 mockUpdateRunStatus : vi . fn ( ) ,
2428} ) )
2529
@@ -84,6 +88,8 @@ vi.mock('@/lib/copilot/request/tools/billing', () => ({
8488
8589vi . mock ( '@/lib/copilot/request/tools/executor' , ( ) => ( {
8690 executeToolAndReport : vi . fn ( ) ,
91+ forceFailHungToolCall : mockForceFailHungToolCall ,
92+ toolWatchdogTimeoutMs : mockToolWatchdogTimeoutMs ,
8793} ) )
8894
8995import { MothershipStreamV1ToolOutcome } from '@/lib/copilot/generated/mothership-stream-v1'
@@ -583,4 +589,102 @@ describe('runCopilotLifecycle', () => {
583589 // Final attempt (2) is terminal → not flagged, so Go bills + surfaces it.
584590 expect ( bodies [ 3 ] . willRetryOnStreamError ) . toBeUndefined ( )
585591 } )
592+
593+ it ( 'force-fails a hung tool promise and resumes with an error result instead of wedging' , async ( ) => {
594+ vi . useFakeTimers ( )
595+ try {
596+ const fetchUrls : string [ ] = [ ]
597+ const bodies : Record < string , unknown > [ ] = [ ]
598+ const executionContext : ExecutionContext = {
599+ userId : 'user-1' ,
600+ workflowId : '' ,
601+ workspaceId : 'ws-1' ,
602+ chatId : 'chat-1' ,
603+ decryptedEnvVars : { } ,
604+ }
605+
606+ // Mirror the real helper: settle the tool call into a terminal error
607+ // state so the resume loop can serialize an error result for it.
608+ mockForceFailHungToolCall . mockImplementation (
609+ async ( toolCallId : string , context : StreamingContext , message : string ) => {
610+ const tool = context . toolCalls . get ( toolCallId )
611+ if ( ! tool ) return
612+ tool . status = MothershipStreamV1ToolOutcome . error
613+ tool . endTime = Date . now ( )
614+ tool . result = { success : false }
615+ tool . error = message
616+ }
617+ )
618+
619+ // Initial leg checkpoints on an async tool whose promise NEVER settles —
620+ // the exact shape of the prod incident (claimed, marked running, hung).
621+ mockRunStreamLoop . mockImplementationOnce (
622+ async (
623+ fetchUrl : string ,
624+ fetchOptions : RequestInit ,
625+ context : StreamingContext
626+ ) : Promise < void > => {
627+ fetchUrls . push ( fetchUrl )
628+ bodies . push ( JSON . parse ( String ( fetchOptions . body ) ) )
629+ context . toolCalls . set ( 'tool-hung' , {
630+ id : 'tool-hung' ,
631+ name : 'read' ,
632+ status : 'executing' ,
633+ } )
634+ context . pendingToolPromises . set ( 'tool-hung' , new Promise ( ( ) => { } ) )
635+ context . awaitingAsyncContinuation = {
636+ checkpointId : 'ckpt-1' ,
637+ pendingToolCallIds : [ 'tool-hung' ] ,
638+ }
639+ }
640+ )
641+
642+ // Resume leg completes normally with the error result delivered.
643+ mockRunStreamLoop . mockImplementationOnce (
644+ async (
645+ fetchUrl : string ,
646+ fetchOptions : RequestInit ,
647+ context : StreamingContext
648+ ) : Promise < void > => {
649+ fetchUrls . push ( fetchUrl )
650+ bodies . push ( JSON . parse ( String ( fetchOptions . body ) ) )
651+ context . accumulatedContent = 'The file read failed, but here is what I know.'
652+ }
653+ )
654+
655+ const lifecycle = runCopilotLifecycle (
656+ { message : 'hello' , messageId : 'stream-1' } ,
657+ {
658+ userId : 'user-1' ,
659+ workspaceId : 'ws-1' ,
660+ chatId : 'chat-1' ,
661+ executionId : 'exec-1' ,
662+ runId : 'run-1' ,
663+ executionContext,
664+ }
665+ )
666+
667+ // Wait budget = watchdog (60s, mocked) + resume grace (30s). Advance past it.
668+ await vi . advanceTimersByTimeAsync ( 91_000 )
669+ const result = await lifecycle
670+
671+ expect ( mockForceFailHungToolCall ) . toHaveBeenCalledWith (
672+ 'tool-hung' ,
673+ expect . anything ( ) ,
674+ expect . stringContaining ( 'hung' )
675+ )
676+ expect ( fetchUrls [ 1 ] ) . toBe ( 'http://mothership.test/api/tools/resume' )
677+ expect ( bodies [ 1 ] . results ) . toEqual ( [
678+ expect . objectContaining ( {
679+ callId : 'tool-hung' ,
680+ name : 'read' ,
681+ success : false ,
682+ data : { error : expect . stringContaining ( 'hung' ) } ,
683+ } ) ,
684+ ] )
685+ expect ( result . success ) . toBe ( true )
686+ } finally {
687+ vi . useRealTimers ( )
688+ }
689+ } )
586690} )
0 commit comments