diff --git a/src/daemon/handlers/__tests__/interaction.test.ts b/src/daemon/handlers/__tests__/interaction.test.ts index 9e4184b1a..4b8d94b89 100644 --- a/src/daemon/handlers/__tests__/interaction.test.ts +++ b/src/daemon/handlers/__tests__/interaction.test.ts @@ -935,6 +935,67 @@ test('press coordinates appends touch-visualization events while recording', asy } }); +test('press coordinates on iOS recording captures a non-compact snapshot for the touch reference frame', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'ios-direct-press-frame'; + const session = makeSession(sessionName); + session.snapshot = undefined; + session.recording = { + platform: 'ios', + outPath: '/tmp/demo.mp4', + startedAt: Date.now() - 1_000, + showTouches: true, + gestureEvents: [], + child: { kill: () => {} } as any, + wait: Promise.resolve({ stdout: '', stderr: '', exitCode: 0 }), + }; + sessionStore.set(sessionName, session); + + mockDispatch.mockResolvedValue({ x: 220, y: 600 }); + // Regression: a compact snapshot has no Application/Window node, so viewport inference would + // return a leaf-element bounding box and the recording overlay would misplace tap markers. + mockCaptureSnapshotForSession.mockResolvedValueOnce({ + nodes: attachRefs([ + { + index: 0, + type: 'XCUIElementTypeApplication', + rect: { x: 0, y: 0, width: 440, height: 956 }, + }, + { + index: 1, + type: 'XCUIElementTypeCell', + rect: { x: 16, y: 156, width: 370, height: 52 }, + hittable: true, + }, + ]), + createdAt: Date.now(), + backend: 'xctest', + }); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['220', '600'], + flags: {}, + }, + sessionName, + sessionStore, + contextFromFlags, + }); + + expect(response?.ok).toBe(true); + expect(mockCaptureSnapshotForSession.mock.calls[0]?.[4]).toEqual({ + interactiveOnly: true, + compact: false, + }); + const event = sessionStore.get(sessionName)?.recording?.gestureEvents[0]; + expect(event?.kind).toBe('tap'); + expect(event?.referenceWidth).toBe(440); + expect(event?.referenceHeight).toBe(956); +}); + test('press coordinates on Android recording uses physical screen size when no snapshot exists', async () => { const sessionStore = makeSessionStore(); const sessionName = 'android-direct-press-frame'; diff --git a/src/daemon/handlers/interaction-snapshot.ts b/src/daemon/handlers/interaction-snapshot.ts index ad2ae1305..2675d20a8 100644 --- a/src/daemon/handlers/interaction-snapshot.ts +++ b/src/daemon/handlers/interaction-snapshot.ts @@ -11,7 +11,7 @@ export type CaptureSnapshotForSession = ( flags: CommandFlags | undefined, sessionStore: SessionStore, contextFromFlags: ContextFromFlags, - options: { interactiveOnly: boolean; androidFreshnessMode?: 'ref-refresh' }, + options: { interactiveOnly: boolean; compact?: boolean; androidFreshnessMode?: 'ref-refresh' }, ) => Promise; export async function captureSnapshotForSession( @@ -19,12 +19,12 @@ export async function captureSnapshotForSession( flags: CommandFlags | undefined, sessionStore: SessionStore, contextFromFlags: ContextFromFlags, - options: { interactiveOnly: boolean; androidFreshnessMode?: 'ref-refresh' }, + options: { interactiveOnly: boolean; compact?: boolean; androidFreshnessMode?: 'ref-refresh' }, ): Promise { const effectiveFlags = { ...(flags ?? {}), snapshotInteractiveOnly: options.interactiveOnly, - snapshotCompact: options.interactiveOnly, + snapshotCompact: options.compact ?? options.interactiveOnly, }; const dispatchContext = contextFromFlags( effectiveFlags, diff --git a/src/daemon/handlers/interaction-touch-reference-frame.ts b/src/daemon/handlers/interaction-touch-reference-frame.ts index ff2ea89c3..6be45221d 100644 --- a/src/daemon/handlers/interaction-touch-reference-frame.ts +++ b/src/daemon/handlers/interaction-touch-reference-frame.ts @@ -48,8 +48,11 @@ async function resolveDirectTouchReferenceFrame(params: { return undefined; } + // Compact snapshots prune Application/Window containers, leaving viewport inference to fall + // back to a bounding box of leaf elements — a garbage reference frame for screen-point touches. const snapshot = await captureSnapshotForSession(session, flags, sessionStore, contextFromFlags, { interactiveOnly: true, + compact: false, }); const referenceFrame = getSnapshotReferenceFrame(snapshot); if (referenceFrame && session.recording) {