diff --git a/docs/adr/0005-ios-runner-interaction-lifecycle.md b/docs/adr/0005-ios-runner-interaction-lifecycle.md index fb940e26a..e22bcb9ca 100644 --- a/docs/adr/0005-ios-runner-interaction-lifecycle.md +++ b/docs/adr/0005-ios-runner-interaction-lifecycle.md @@ -35,7 +35,7 @@ startup commands still skip that preflight because the first successful command proof for a newly launched runner. Readiness probe commands skip preflight to avoid recursion. The daemon may additionally skip the ready-session `uptime` preflight for an explicit allowlist of -mutating interactions (`tap`, `tapSeries`, `longPress`, `drag`, `dragSeries`, `swipe`) when the same +mutating interactions (`tap`, `tapSeries`, `longPress`, `drag`, `dragSeries`, `swipe`, `scroll`) when the same session produced a healthy mutating response — parsed ok and not carrying `runnerFatal` — for the same `appBundleId` within 5 seconds. This recency lives only on the `RunnerSession` object as `lastHealthyMutation`, so it dies with every invalidation/restart, and it is recorded only after the diff --git a/docs/ios-runner-protocol-optimizations.md b/docs/ios-runner-protocol-optimizations.md index 164955546..2f2ccf831 100644 --- a/docs/ios-runner-protocol-optimizations.md +++ b/docs/ios-runner-protocol-optimizations.md @@ -57,7 +57,7 @@ Acceptance criteria (as shipped): (conservative) commands still preflight; readiness probes and read-only startup commands keep their existing skips. - Recency is derived only from healthy (parsed ok, non-`runnerFatal`) responses of an explicit - mutating allowlist (`tap`, `tapSeries`, `longPress`, `drag`, `dragSeries`, `swipe`) for the same + mutating allowlist (`tap`, `tapSeries`, `longPress`, `drag`, `dragSeries`, `swipe`, `scroll`) for the same `appBundleId`, within a 5s freshness window, and lives only on the session object so it dies with every invalidation/restart. Snapshots and read-only responses never refresh it. - A transport failure after a skipped preflight clears the recency record and marks the error with diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift index ebe461aee..57eb42349 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift @@ -633,53 +633,62 @@ extension RunnerTests { guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else { return Response(ok: false, error: ErrorPayload(message: "drag requires x, y, x2, and y2")) } - let dragPoints = keyboardAvoidingDragPoints(app: activeApp, x: x, y: y, x2: x2, y2: y2) - let dragFrame = resolvedDragVisualizationFrame( - app: activeApp, - x: dragPoints.x, - y: dragPoints.y, - x2: dragPoints.x2, - y2: dragPoints.y2 + return executeDragGesture( + activeApp: activeApp, + x: x, + y: y, + x2: x2, + y2: y2, + durationMs: command.durationMs, + synthesized: command.synthesized == true, + message: "dragged" ) - var fallback: GestureFallback? - if command.synthesized == true { - let durationMs = min(max(command.durationMs ?? 250, 16), 10000) - let (timing, outcome) = performGesture(activeApp, idleTimeout: false) { - synthesizedDragAt( - app: activeApp, - x: dragPoints.x, - y: dragPoints.y, - x2: dragPoints.x2, - y2: dragPoints.y2, - durationMs: durationMs + case .scroll: + // Fused frame-resolve + drag scroll for non-tvOS. Resolves the interaction frame exactly + // like .interactionFrame, computes drag endpoints with the Swift port of + // buildScrollGesturePlan, then runs the same non-synthesized drag path scroll's drag used. + guard let direction = command.direction, + direction == "up" || direction == "down" || direction == "left" || direction == "right" + else { + return Response( + ok: false, + error: ErrorPayload( + code: "INVALID_ARGS", + message: "scroll requires direction up|down|left|right" ) - } - if case .performed = outcome { - return gestureResponse(message: "dragged", timing: timing, frame: .drag(dragFrame)) - } - fallback = gestureFallback(strategy: "xctest-coordinate-drag", from: outcome) + ) } - let holdDuration = command.synthesized == true - ? synthesizedSwipeFallbackHoldDuration(durationMs: command.durationMs ?? 250) - : coordinateDragHoldDuration() - let (timing, outcome) = performGesture(activeApp) { - dragAt( - app: activeApp, - x: dragPoints.x, - y: dragPoints.y, - x2: dragPoints.x2, - y2: dragPoints.y2, - holdDuration: holdDuration + let frame = resolvedTouchReferenceFrame(app: activeApp, appFrame: activeApp.frame) + guard frame.width > 0, frame.height > 0 else { + return Response( + ok: false, + error: ErrorPayload(message: "scroll could not resolve a usable interaction frame") ) } - if let response = unsupportedResponse(for: outcome) { - return response + guard let plan = runnerScrollGesturePlan( + direction: direction, + amount: command.amount, + pixels: command.pixels, + referenceWidth: frame.width, + referenceHeight: frame.height + ) else { + return Response( + ok: false, + error: ErrorPayload( + code: "INVALID_ARGS", + message: "scroll could not compute a gesture plan" + ) + ) } - return gestureResponse( - message: "dragged", - timing: timing, - frame: .drag(dragFrame), - fallback: fallback + return executeDragGesture( + activeApp: activeApp, + x: frame.minX + plan.x1, + y: frame.minY + plan.y1, + x2: frame.minX + plan.x2, + y2: frame.minY + plan.y2, + durationMs: nil, + synthesized: false, + message: "scrolled" ) case .dragSeries: guard let x = command.x, let y = command.y, let x2 = command.x2, let y2 = command.y2 else { @@ -1023,6 +1032,71 @@ extension RunnerTests { } } + /// Shared drag execution for `.drag` and the fused `.scroll`. Mirrors the original `.drag` body + /// exactly: keyboardAvoidingDragPoints -> resolvedDragVisualizationFrame -> synthesized branch + /// (16-10000ms clamp) or non-synthesized dragAt with coordinateDragHoldDuration -> + /// gestureResponse(.drag). `.scroll` always passes synthesized: false, pinning the same + /// non-synthesized drag path scroll's drag used today. + private func executeDragGesture( + activeApp: XCUIApplication, + x: Double, + y: Double, + x2: Double, + y2: Double, + durationMs: Double?, + synthesized: Bool, + message: String + ) -> Response { + let dragPoints = keyboardAvoidingDragPoints(app: activeApp, x: x, y: y, x2: x2, y2: y2) + let dragFrame = resolvedDragVisualizationFrame( + app: activeApp, + x: dragPoints.x, + y: dragPoints.y, + x2: dragPoints.x2, + y2: dragPoints.y2 + ) + var fallback: GestureFallback? + if synthesized { + let durationMs = min(max(durationMs ?? 250, 16), 10000) + let (timing, outcome) = performGesture(activeApp, idleTimeout: false) { + synthesizedDragAt( + app: activeApp, + x: dragPoints.x, + y: dragPoints.y, + x2: dragPoints.x2, + y2: dragPoints.y2, + durationMs: durationMs + ) + } + if case .performed = outcome { + return gestureResponse(message: message, timing: timing, frame: .drag(dragFrame)) + } + fallback = gestureFallback(strategy: "xctest-coordinate-drag", from: outcome) + } + let holdDuration = synthesized + ? synthesizedSwipeFallbackHoldDuration(durationMs: durationMs ?? 250) + : coordinateDragHoldDuration() + let (timing, outcome) = performGesture(activeApp) { + dragAt( + app: activeApp, + x: dragPoints.x, + y: dragPoints.y, + x2: dragPoints.x2, + y2: dragPoints.y2, + holdDuration: holdDuration + ) + } + if let response = unsupportedResponse(for: outcome) { + return response + } + return gestureResponse( + message: message, + timing: timing, + frame: .drag(dragFrame), + fallback: fallback + ) + } + private func currentXCTestFailureCount() -> Int { return testRun?.failureCount ?? 0 } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift index dee4606fd..d69ad666d 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift @@ -144,10 +144,10 @@ final class RunnerCommandJournal { case .snapshot, .screenshot: return false case .tap, .mouseClick, .tapSeries, .longPress, .interactionFrame, .drag, .dragSeries, - .remotePress, .type, .swipe, .findText, .querySelector, .readText, .back, .backInApp, - .backSystem, .home, .rotate, .appSwitcher, .keyboardDismiss, .keyboardReturn, .alert, - .pinch, .rotateGesture, .transformGesture, .recordStart, .recordStop, .status, .uptime, - .shutdown: + .remotePress, .type, .swipe, .scroll, .findText, .querySelector, .readText, .back, + .backInApp, .backSystem, .home, .rotate, .appSwitcher, .keyboardDismiss, .keyboardReturn, + .alert, .pinch, .rotateGesture, .transformGesture, .recordStart, .recordStop, .status, + .uptime, .shutdown: return true } } @@ -219,6 +219,38 @@ extension RunnerTests { XCTAssertEqual(screenshotStatus.lifecycleResponseOk, true) XCTAssertNil(screenshotStatus.lifecycleResponseJson) + let scroll = runnerJournalCommand("scroll", id: "scroll-drag") + journal.accept(command: scroll) + journal.finish( + command: scroll, + response: Response( + ok: true, + data: DataPayload( + message: "scrolled", + gestureStartUptimeMs: 1, + gestureEndUptimeMs: 2, + x: 155, + y: 420, + x2: 155, + y2: 301, + referenceWidth: 300, + referenceHeight: 600 + ) + ) + ) + + let scrollStatus = journal.status(commandId: "scroll-drag") + XCTAssertEqual(scrollStatus.lifecycleState, RunnerCommandLifecycleState.completed.rawValue) + XCTAssertEqual(scrollStatus.lifecycleResponseOk, true) + XCTAssertNotNil(scrollStatus.lifecycleResponseJson) + let scrollResponse = try decodeRunnerJournalResponse(scrollStatus.lifecycleResponseJson) + XCTAssertEqual(scrollResponse.data?.x, 155) + XCTAssertEqual(scrollResponse.data?.y, 420) + XCTAssertEqual(scrollResponse.data?.x2, 155) + XCTAssertEqual(scrollResponse.data?.y2, 301) + XCTAssertEqual(scrollResponse.data?.referenceWidth, 300) + XCTAssertEqual(scrollResponse.data?.referenceHeight, 600) + let largeRead = runnerJournalCommand("readText", id: "large-read") journal.accept(command: largeRead) journal.finish( diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift index e1ecdcf9b..feb26d4bb 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift @@ -11,6 +11,7 @@ enum CommandType: String, Codable { case remotePress case type case swipe + case scroll case findText case querySelector case readText @@ -71,7 +72,8 @@ extension CommandType { // tapSeries/dragSeries are the series forms of tap/drag; keyboardReturn is the sibling // of keyboardDismiss — all three were missing from the historical switch (drift the // table now prevents) and are classified as interactions here. - case .tap, .tapSeries, .longPress, .drag, .dragSeries, .remotePress, .type, .swipe, + // .scroll is the fused frame-resolve + drag scroll; same classification as .drag. + case .tap, .tapSeries, .longPress, .drag, .dragSeries, .remotePress, .type, .swipe, .scroll, .back, .backInApp, .backSystem, .rotate, .appSwitcher, .keyboardDismiss, .keyboardReturn, .pinch, .rotateGesture, .transformGesture: return CommandTraits(isInteraction: true, readOnly: .never, isLifecycle: false) @@ -134,6 +136,8 @@ struct Command: Codable { let dy: Double? let durationMs: Double? let direction: String? + let amount: Double? + let pixels: Double? let orientation: String? let scale: Double? let degrees: Double? diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScrollGesture.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScrollGesture.swift new file mode 100644 index 000000000..12f0d4d80 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScrollGesture.swift @@ -0,0 +1,200 @@ +import XCTest + +// Swift port of buildScrollGesturePlan from src/core/scroll-gesture.ts. +// +// This is a deliberate two-place invariant: the daemon keeps the TS implementation (for Android, +// recording, and reported-pixels), and the runner places the gesture with this Swift copy. The +// parity test vectors at the bottom of this file mirror src/core/__tests__/scroll-gesture.test.ts — +// if you change the math in either language, update the other and both vector sets. +// +// All inputs here are positive (reference dims, travel, center), so Swift's `.rounded()` +// (half away from zero) matches JS `Math.round` (half up) on every value computed below. + +struct RunnerScrollGesturePlan { + let x1: Double + let y1: Double + let x2: Double + let y2: Double + let travelPixels: Double +} + +private let runnerDefaultScrollAmount = 0.6 +private let runnerDefaultEdgePaddingFraction = 0.05 + +func runnerScrollGesturePlan( + direction: String, + amount: Double?, + pixels: Double?, + referenceWidth: Double, + referenceHeight: Double +) -> RunnerScrollGesturePlan? { + // Mirror the TS INVALID_ARGS contract: non-positive or non-finite amount/pixels are rejected + // rather than clamped into a journaled 1px scroll. The daemon validates before sending, so + // this only triggers for non-daemon wire clients. + if let amount, !(amount.isFinite && amount > 0) { return nil } + if let pixels, !(pixels.isFinite && pixels > 0) { return nil } + let axisLength = (direction == "up" || direction == "down") ? referenceHeight : referenceWidth + let requestedAmount = amount ?? runnerDefaultScrollAmount + let requestedPixels: Double = + pixels.map { max(1, $0.rounded()) } ?? (axisLength * requestedAmount).rounded() + let edgePadding = max(1, (axisLength * runnerDefaultEdgePaddingFraction).rounded()) + let maxTravelPixels = max(1, axisLength - edgePadding * 2) + let travelPixels = max(1, min(requestedPixels, maxTravelPixels)) + let halfTravel = (travelPixels / 2).rounded() + let centerX = (referenceWidth / 2).rounded() + let centerY = (referenceHeight / 2).rounded() + + func plan(_ x1: Double, _ y1: Double, _ x2: Double, _ y2: Double) -> RunnerScrollGesturePlan { + RunnerScrollGesturePlan(x1: x1, y1: y1, x2: x2, y2: y2, travelPixels: travelPixels) + } + + switch direction { + case "up": + return plan(centerX, centerY - halfTravel, centerX, centerY + halfTravel) + case "down": + return plan(centerX, centerY + halfTravel, centerX, centerY - halfTravel) + case "left": + return plan(centerX - halfTravel, centerY, centerX + halfTravel, centerY) + case "right": + return plan(centerX + halfTravel, centerY, centerX - halfTravel, centerY) + default: + return nil + } +} + +extension RunnerTests { + // Cross-language parity vectors mirroring src/core/__tests__/scroll-gesture.test.ts. Keep these + // in sync with the vitest vectors so the two buildScrollGesturePlan implementations cannot drift. + + func testRunnerScrollGesturePlanMapsRelativeAmount() throws { + let plan = try XCTUnwrap( + runnerScrollGesturePlan( + direction: "down", + amount: 0.5, + pixels: nil, + referenceWidth: 400, + referenceHeight: 800 + ) + ) + XCTAssertEqual(plan.x1, 200) + XCTAssertEqual(plan.y1, 600) + XCTAssertEqual(plan.x2, 200) + XCTAssertEqual(plan.y2, 200) + XCTAssertEqual(plan.travelPixels, 400) + } + + func testRunnerScrollGesturePlanPixelsDown() throws { + // 300x600, down, pixels 120 -> (150,360)->(150,240), travel 120. + let plan = try XCTUnwrap( + runnerScrollGesturePlan( + direction: "down", + amount: nil, + pixels: 120, + referenceWidth: 300, + referenceHeight: 600 + ) + ) + XCTAssertEqual(plan.x1, 150) + XCTAssertEqual(plan.y1, 360) + XCTAssertEqual(plan.x2, 150) + XCTAssertEqual(plan.y2, 240) + XCTAssertEqual(plan.travelPixels, 120) + } + + func testRunnerScrollGesturePlanClampsAmountAboveOne() throws { + // 400x800, down, amount 2 -> requested 1600 clamps to the safe band (720): (200,760)->(200,40). + let plan = try XCTUnwrap( + runnerScrollGesturePlan( + direction: "down", + amount: 2, + pixels: nil, + referenceWidth: 400, + referenceHeight: 800 + ) + ) + XCTAssertEqual(plan.x1, 200) + XCTAssertEqual(plan.y1, 760) + XCTAssertEqual(plan.x2, 200) + XCTAssertEqual(plan.y2, 40) + XCTAssertEqual(plan.travelPixels, 720) + } + + func testRunnerScrollGesturePlanFloorsTinyFrames() throws { + // 2x2, down, pixels 10 engages every max(1, ...) floor and the .5 rounding cases the two + // ports must agree on (halfTravel 0.5 -> 1, center 1 from 2/2): (1,2)->(1,0), travel 1. + let plan = try XCTUnwrap( + runnerScrollGesturePlan( + direction: "down", + amount: nil, + pixels: 10, + referenceWidth: 2, + referenceHeight: 2 + ) + ) + XCTAssertEqual(plan.x1, 1) + XCTAssertEqual(plan.y1, 2) + XCTAssertEqual(plan.x2, 1) + XCTAssertEqual(plan.y2, 0) + XCTAssertEqual(plan.travelPixels, 1) + } + + func testRunnerScrollGesturePlanClampsToSafeBand() throws { + // 300x600, right, pixels 500 clamps travel to the safe band (270). + let plan = try XCTUnwrap( + runnerScrollGesturePlan( + direction: "right", + amount: nil, + pixels: 500, + referenceWidth: 300, + referenceHeight: 600 + ) + ) + XCTAssertEqual(plan.x1, 285) + XCTAssertEqual(plan.x2, 15) + XCTAssertEqual(plan.y1, 300) + XCTAssertEqual(plan.y2, 300) + XCTAssertEqual(plan.travelPixels, 270) + } + + func testRunnerScrollGesturePlanRejectsUnknownDirection() { + XCTAssertNil( + runnerScrollGesturePlan( + direction: "sideways", + amount: nil, + pixels: 100, + referenceWidth: 300, + referenceHeight: 600 + ) + ) + } + + func testRunnerScrollGesturePlanRejectsInvalidAmountAndPixels() { + XCTAssertNil( + runnerScrollGesturePlan( + direction: "down", + amount: 0, + pixels: nil, + referenceWidth: 300, + referenceHeight: 600 + ) + ) + XCTAssertNil( + runnerScrollGesturePlan( + direction: "down", + amount: nil, + pixels: -10, + referenceWidth: 300, + referenceHeight: 600 + ) + ) + XCTAssertNil( + runnerScrollGesturePlan( + direction: "down", + amount: .infinity, + pixels: nil, + referenceWidth: 300, + referenceHeight: 600 + ) + ) + } +} diff --git a/src/core/__tests__/scroll-gesture.test.ts b/src/core/__tests__/scroll-gesture.test.ts index de1485890..3b5fa639a 100644 --- a/src/core/__tests__/scroll-gesture.test.ts +++ b/src/core/__tests__/scroll-gesture.test.ts @@ -2,12 +2,16 @@ import { test } from 'vitest'; import assert from 'node:assert/strict'; import { AppError } from '../../utils/errors.ts'; import { + assertScrollGestureInput, buildScrollGesturePlan, buildSwipeGesturePlan, clampGestureCoordinate, pointFromPercent, } from '../scroll-gesture.ts'; +// The buildScrollGesturePlan vectors below are the canonical cross-language parity vectors, +// mirrored by RunnerTests+ScrollGesture.swift (runnerScrollGesturePlan). If you change the scroll +// math, update both this suite and the Swift parity test so the two ports cannot drift silently. test('buildScrollGesturePlan maps relative amount to viewport travel', () => { const plan = buildScrollGesturePlan({ direction: 'down', @@ -29,6 +33,71 @@ test('buildScrollGesturePlan maps relative amount to viewport travel', () => { }); }); +test('buildScrollGesturePlan maps explicit pixels below the safe band cap', () => { + const plan = buildScrollGesturePlan({ + direction: 'down', + pixels: 120, + referenceWidth: 300, + referenceHeight: 600, + }); + + assert.deepEqual(plan, { + direction: 'down', + x1: 150, + y1: 360, + x2: 150, + y2: 240, + referenceWidth: 300, + referenceHeight: 600, + amount: undefined, + pixels: 120, + }); +}); + +test('buildScrollGesturePlan clamps amounts above 1 to the safe gesture band', () => { + const plan = buildScrollGesturePlan({ + direction: 'down', + amount: 2, + referenceWidth: 400, + referenceHeight: 800, + }); + + assert.deepEqual(plan, { + direction: 'down', + x1: 200, + y1: 760, + x2: 200, + y2: 40, + referenceWidth: 400, + referenceHeight: 800, + amount: 2, + pixels: 720, + }); +}); + +test('buildScrollGesturePlan floors padding and travel on tiny frames', () => { + // 2x2 engages every max(1, ...) floor and the .5 rounding cases the two ports must agree on + // (halfTravel 0.5 -> 1, center 1 from 2/2). + const plan = buildScrollGesturePlan({ + direction: 'down', + pixels: 10, + referenceWidth: 2, + referenceHeight: 2, + }); + + assert.deepEqual(plan, { + direction: 'down', + x1: 1, + y1: 2, + x2: 1, + y2: 0, + referenceWidth: 2, + referenceHeight: 2, + amount: undefined, + pixels: 1, + }); +}); + test('buildScrollGesturePlan clamps pixel travel to the safe gesture band', () => { const plan = buildScrollGesturePlan({ direction: 'right', @@ -60,6 +129,36 @@ test('buildScrollGesturePlan rejects invalid amounts', () => { ); }); +test('assertScrollGestureInput accepts valid amount and pixels inputs', () => { + assert.doesNotThrow(() => assertScrollGestureInput({})); + assert.doesNotThrow(() => assertScrollGestureInput({ amount: 0.5 })); + assert.doesNotThrow(() => assertScrollGestureInput({ pixels: 120 })); +}); + +test('assertScrollGestureInput rejects non-positive or non-finite amounts', () => { + for (const amount of [0, -1, Number.NaN, Number.POSITIVE_INFINITY]) { + assert.throws( + () => assertScrollGestureInput({ amount }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + /amount must be a positive number/i.test(error.message), + ); + } +}); + +test('assertScrollGestureInput rejects non-positive or non-finite pixels', () => { + for (const pixels of [0, -10, Number.NaN, Number.POSITIVE_INFINITY]) { + assert.throws( + () => assertScrollGestureInput({ pixels }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + /pixels must be a positive integer/i.test(error.message), + ); + } +}); + test('buildSwipeGesturePlan maps finger direction through the shared scroll planner', () => { const plan = buildSwipeGesturePlan({ direction: 'left', diff --git a/src/core/scroll-gesture.ts b/src/core/scroll-gesture.ts index 32eb0efaf..80f0d67de 100644 --- a/src/core/scroll-gesture.ts +++ b/src/core/scroll-gesture.ts @@ -113,6 +113,19 @@ export function buildScrollGesturePlan(options: ScrollGestureOptions): ScrollGes } } +/** + * Validates pre-frame scroll inputs (amount/pixels) the same way buildScrollGesturePlan would, + * so the daemon throws INVALID_ARGS for bad inputs BEFORE sending the fused runner `scroll` + * command (previously validation ran between the frame request and the drag). The resolved + * values are discarded; only their throw-on-invalid behavior is reused. + */ +export function assertScrollGestureInput(options: { amount?: number; pixels?: number }): void { + resolveRequestedAmount(options.amount); + if (options.pixels !== undefined) { + normalizeRequestedPixels(options.pixels); + } +} + export function buildSwipeGesturePlan(options: SwipeGestureOptions): SwipeGesturePlan { const scrollPlan = buildScrollGesturePlan({ ...options, diff --git a/src/platforms/ios/__tests__/index.test.ts b/src/platforms/ios/__tests__/index.test.ts index 6922cbe7b..a29d79a05 100644 --- a/src/platforms/ios/__tests__/index.test.ts +++ b/src/platforms/ios/__tests__/index.test.ts @@ -282,19 +282,22 @@ for (const [name, device] of [ }); } -for (const [name, device, expectedGestureFields] of [ - ['iOS', IOS_TEST_SIMULATOR, { durationMs: 250 }], - ['macOS', MACOS_TEST_DEVICE, {}], +for (const [name, device] of [ + ['iOS', IOS_TEST_SIMULATOR], + ['macOS', MACOS_TEST_DEVICE], ] as const) { - test(`iosRunnerOverrides maps ${name} scroll to the expected drag path`, async () => { - mockRunIosRunnerCommand - .mockResolvedValueOnce({ - x: 0, - y: 0, - referenceWidth: 400, - referenceHeight: 800, - }) - .mockResolvedValueOnce({}); + test(`iosRunnerOverrides maps ${name} scroll to a single fused scroll command`, async () => { + // The fused scroll resolves the frame and performs the drag in one runner lifecycle command; + // no separate interactionFrame request and no durationMs (the runner pins the non-synthesized + // drag path that ignores it). + mockRunIosRunnerCommand.mockResolvedValueOnce({ + x: 200, + y: 640, + x2: 200, + y2: 160, + referenceWidth: 400, + referenceHeight: 800, + }); const { overrides } = iosRunnerOverrides(device, { appBundleId: 'com.example.App', @@ -302,13 +305,10 @@ for (const [name, device, expectedGestureFields] of [ await overrides.scroll('down'); - assert.deepEqual(mockRunIosRunnerCommand.mock.calls[1]?.[1], { - command: 'drag', - x: 200, - y: 640, - x2: 200, - y2: 160, - ...expectedGestureFields, + assert.equal(mockRunIosRunnerCommand.mock.calls.length, 1); + assert.deepEqual(mockRunIosRunnerCommand.mock.calls[0]?.[1], { + command: 'scroll', + direction: 'down', appBundleId: 'com.example.App', }); }); diff --git a/src/platforms/ios/__tests__/runner-client.test.ts b/src/platforms/ios/__tests__/runner-client.test.ts index a1b3438f2..224f44c70 100644 --- a/src/platforms/ios/__tests__/runner-client.test.ts +++ b/src/platforms/ios/__tests__/runner-client.test.ts @@ -33,7 +33,7 @@ import type { DeviceInfo } from '../../../utils/device.ts'; import { flushDiagnosticsToSessionFile, withDiagnosticsScope } from '../../../utils/diagnostics.ts'; import { AppError } from '../../../utils/errors.ts'; import type { RunnerCommand } from '../runner-contract.ts'; -import { withRunnerCommandId } from '../runner-contract.ts'; +import { isReadOnlyRunnerCommand, withRunnerCommandId } from '../runner-contract.ts'; import { assertSafeDerivedCleanup, isRetryableRunnerError, @@ -125,6 +125,7 @@ const runnerProtocolCommandFixtures: Record { assert.deepEqual(command, { command: 'uptime', commandId: 'runner-existing' }); }); +test('scroll is a mutating, command-id-tracked runner command', () => { + // Omission from isReadOnlyRunnerCommand classifies the fused scroll as mutating, routing it + // through single-send (no transport retry), command-id tracking, and status recovery. + assert.equal(isReadOnlyRunnerCommand('scroll'), false); + + const command = withRunnerCommandId({ command: 'scroll', direction: 'down', pixels: 120 }); + assert.match(command.commandId ?? '', /^runner-/); +}); + test('withRunnerCommandId does not add command ids to status probes', () => { const command = withRunnerCommandId({ command: 'status', diff --git a/src/platforms/ios/__tests__/runner-session.test.ts b/src/platforms/ios/__tests__/runner-session.test.ts index 3e2db7fe0..5a919fbfc 100644 --- a/src/platforms/ios/__tests__/runner-session.test.ts +++ b/src/platforms/ios/__tests__/runner-session.test.ts @@ -875,6 +875,7 @@ const ALLOWLISTED_MUTATIONS: { name: string; command: Record }[ command: { command: 'dragSeries', x: 1, y: 2, x2: 3, y2: 4, count: 2 }, }, { name: 'swipe', command: { command: 'swipe', x: 1, y: 2, x2: 3, y2: 4 } }, + { name: 'scroll', command: { command: 'scroll', direction: 'down' } }, ]; for (const { name, command } of ALLOWLISTED_MUTATIONS) { diff --git a/src/platforms/ios/interactions.ts b/src/platforms/ios/interactions.ts index aff2834e1..ddbc8148c 100644 --- a/src/platforms/ios/interactions.ts +++ b/src/platforms/ios/interactions.ts @@ -1,6 +1,9 @@ -import { AppError } from '../../utils/errors.ts'; import type { DeviceInfo } from '../../utils/device.ts'; -import { buildScrollGesturePlan, type ScrollDirection } from '../../core/scroll-gesture.ts'; +import { + assertScrollGestureInput, + buildScrollGesturePlan, + type ScrollDirection, +} from '../../core/scroll-gesture.ts'; import { runIosRunnerCommand } from './runner-client.ts'; import type { RunnerCommand } from './runner-contract.ts'; import type { @@ -15,13 +18,6 @@ type AppleRemoteButton = NonNullable; type RunIosRunnerCommand = typeof runIosRunnerCommand; type RunnerOpts = RunnerCallOptions; -type InteractionFrame = { - originX: number; - originY: number; - referenceWidth: number; - referenceHeight: number; -}; - const IOS_SWIPE_DEFAULT_DURATION_MS = 250; const IOS_SWIPE_MIN_DURATION_MS = 16; const IOS_SWIPE_MAX_DURATION_MS = 10_000; @@ -321,7 +317,6 @@ async function runAppleScroll( runnerOpts: RunnerOpts, direction: ScrollDirection, options?: { amount?: number; pixels?: number }, - interactionFrame?: InteractionFrame, ): Promise> { if (device.target === 'tv') { const runnerResult = await runRunnerCommand( @@ -332,61 +327,47 @@ async function runAppleScroll( return normalizeIosScrollResult(runnerResult, options); } - const frame = - interactionFrame ?? - (await resolveAppleInteractionFrame(runRunnerCommand, device, ctx, runnerOpts)); - const plan = buildScrollGesturePlan({ - direction, - amount: options?.amount, - pixels: options?.pixels, - referenceWidth: frame.referenceWidth, - referenceHeight: frame.referenceHeight, - }); - const runnerResult = await runRunnerCommand( - device, - iosDragCommand( - device, - ctx, - frame.originX + plan.x1, - frame.originY + plan.y1, - frame.originX + plan.x2, - frame.originY + plan.y2, - undefined, - { defaultDurationMs: IOS_SWIPE_DEFAULT_DURATION_MS }, - ), - runnerOpts, - ); - return normalizeIosScrollResult(runnerResult, { - amount: plan.amount, - pixels: plan.pixels, - preferProvidedPixels: true, - }); -} + // Validate amount/pixels up front so bad inputs throw INVALID_ARGS before any runner command + // is sent (previously validation ran between the frame request and the drag, so a bad amount + // could cost one runner request first). + assertScrollGestureInput(options ?? {}); -async function resolveAppleInteractionFrame( - runRunnerCommand: RunIosRunnerCommand, - device: DeviceInfo, - ctx: RunnerContext, - runnerOpts: RunnerOpts, -): Promise { + // Single fused lifecycle command: the runner resolves the interaction frame and runs the drag. + // durationMs is intentionally not sent — scroll's drag used 250ms today, but the runner's + // non-synthesized drag path ignores it (coordinateDragHoldDuration + XCTest default drag + // velocity), and the fused `scroll` handler pins that same non-synthesized path. const runnerResult = await runRunnerCommand( device, - { command: 'interactionFrame', appBundleId: ctx.appBundleId }, + { + command: 'scroll', + direction, + ...(options?.amount !== undefined ? { amount: options.amount } : {}), + ...(options?.pixels !== undefined ? { pixels: options.pixels } : {}), + appBundleId: ctx.appBundleId, + }, runnerOpts, ); - const originX = readFiniteNumber(runnerResult.x); - const originY = readFiniteNumber(runnerResult.y); + const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); - if ( - originX === undefined || - originY === undefined || - referenceWidth === undefined || - referenceHeight === undefined - ) { - throw new AppError('COMMAND_FAILED', 'interactionFrame did not return a usable frame'); + if (referenceWidth !== undefined && referenceHeight !== undefined) { + // Recompute the plan from the runner's resolved frame so reported pixels match the planned + // travel (TS keeps buildScrollGesturePlan for Android and recording anyway). + const plan = buildScrollGesturePlan({ + direction, + amount: options?.amount, + pixels: options?.pixels, + referenceWidth, + referenceHeight, + }); + return normalizeIosScrollResult(runnerResult, { + amount: options?.amount, + pixels: plan.pixels, + preferProvidedPixels: true, + }); } - return { originX, originY, referenceWidth, referenceHeight }; + // Missing frame dims: derive pixels from endpoint travel instead of throwing. + return normalizeIosScrollResult(runnerResult, { amount: options?.amount }); } function readFiniteNumber(value: unknown): number | undefined { diff --git a/src/platforms/ios/runner-contract.ts b/src/platforms/ios/runner-contract.ts index 1a1583c9d..6cffaedee 100644 --- a/src/platforms/ios/runner-contract.ts +++ b/src/platforms/ios/runner-contract.ts @@ -17,12 +17,18 @@ export type RunnerCommand = { | 'mouseClick' | 'tapSeries' | 'longPress' + // Runner-supported but no longer sent by this daemon (scroll fuses frame resolution into + // the runner-side `scroll` command); kept for wire compatibility with older daemons. | 'interactionFrame' | 'drag' | 'dragSeries' | 'remotePress' | 'type' | 'swipe' + // Fused frame-resolve + drag scroll (non-tvOS). Intentionally mutating: omitted from + // isReadOnlyRunnerCommand so it routes through single-send, command-id tracking, and + // lost-response status recovery like other gestures. + | 'scroll' | 'findText' | 'querySelector' | 'readText' @@ -70,6 +76,8 @@ export type RunnerCommand = { dy?: number; durationMs?: number; direction?: ScrollDirection; + amount?: number; + pixels?: number; orientation?: DeviceRotation; scale?: number; degrees?: number; diff --git a/src/platforms/ios/runner-session.ts b/src/platforms/ios/runner-session.ts index 44c8eb8ee..ab3b774c0 100644 --- a/src/platforms/ios/runner-session.ts +++ b/src/platforms/ios/runner-session.ts @@ -57,8 +57,6 @@ const runnerSessionLocks = new Map>(); const RUNNER_READY_PREFLIGHT_TIMEOUT_MS = 1_000; const RUNNER_STALE_BUNDLE_UNINSTALL_TIMEOUT_MS = 10_000; const RUNNER_PREFLIGHT_SKIP_FRESHNESS_MS = 5_000; -// Today's scroll verb is covered via 'drag'. The fused 'scroll' runner command -// (PR #760) must be added here when it lands, or hot scroll loops lose the skip. const PREFLIGHT_SKIP_ELIGIBLE_RUNNER_COMMANDS = new Set([ 'tap', 'tapSeries', @@ -66,6 +64,7 @@ const PREFLIGHT_SKIP_ELIGIBLE_RUNNER_COMMANDS = new Set { const actual = await importOriginal(); @@ -20,6 +21,15 @@ const iosSimulator: DeviceInfo = { booted: true, }; +const tvOsSimulator: DeviceInfo = { + platform: 'ios', + id: 'tv-sim-1', + name: 'Apple TV', + kind: 'simulator', + target: 'tv', + booted: true, +}; + const mockRunIosRunnerCommand = vi.mocked(runIosRunnerCommand); beforeEach(() => { @@ -36,22 +46,53 @@ test('resolveAppleBackRunnerCommand maps explicit back modes to runner commands' assert.equal(resolveAppleBackRunnerCommand('system'), 'backSystem'); }); -test('ios scroll reports planned pixels without recomputing from runner coordinates', async () => { +test('ios scroll sends a single fused scroll command and reports planned pixels', async () => { + const commands: RunnerCommand[] = []; mockRunIosRunnerCommand.mockImplementation(async (_device, command) => { - if (command.command === 'interactionFrame') { + commands.push(command); + if (command.command === 'scroll') { + // x2/y2 endpoint travel is 119 here; planned pixels (120) must be preferred. return { - x: 5, - y: 10, + x: 155, + y: 420, + x2: 155, + y2: 301, referenceWidth: 300, referenceHeight: 600, + gestureStartUptimeMs: 1, + gestureEndUptimeMs: 2, }; } - if (command.command === 'drag') { + throw new Error(`Unexpected runner command: ${command.command}`); + }); + const interactor = await getInteractor(iosSimulator, { appBundleId: 'com.example.app' }); + const result = await interactor.scroll('down', { pixels: 120 }); + + // The common iOS scroll path issues exactly one lifecycle command and NO 'interactionFrame'. + assert.deepEqual(commands, [ + { command: 'scroll', direction: 'down', pixels: 120, appBundleId: 'com.example.app' }, + ]); + assert.deepEqual(result, { + x1: 155, + y1: 420, + x2: 155, + y2: 301, + referenceWidth: 300, + referenceHeight: 600, + pixels: 120, + }); +}); + +test('ios amount-based scroll recomputes pixels from the runner reference frame', async () => { + const commands: RunnerCommand[] = []; + mockRunIosRunnerCommand.mockImplementation(async (_device, command) => { + commands.push(command); + if (command.command === 'scroll') { return { - x: 155, - y: 420, - x2: 155, - y2: 301, + x: 150, + y: 450, + x2: 150, + y2: 150, referenceWidth: 300, referenceHeight: 600, }; @@ -59,11 +100,63 @@ test('ios scroll reports planned pixels without recomputing from runner coordina throw new Error(`Unexpected runner command: ${command.command}`); }); const interactor = await getInteractor(iosSimulator, { appBundleId: 'com.example.app' }); + const result = await interactor.scroll('down', { amount: 0.5 }); + + assert.deepEqual(commands, [ + { command: 'scroll', direction: 'down', amount: 0.5, appBundleId: 'com.example.app' }, + ]); + // amount 0.5 against a 600px vertical axis -> 300 planned pixels. + const amount = + result && typeof result === 'object' && 'amount' in result ? result.amount : undefined; + const pixels = + result && typeof result === 'object' && 'pixels' in result ? result.pixels : undefined; + assert.equal(amount, 0.5); + assert.equal(pixels, 300); +}); + +test('tvOS scroll sends only a remotePress command (behavior unchanged)', async () => { + const commands: RunnerCommand[] = []; + mockRunIosRunnerCommand.mockImplementation(async (_device, command) => { + commands.push(command); + return {}; + }); + const interactor = await getInteractor(tvOsSimulator, { appBundleId: 'com.example.app' }); + + await interactor.scroll('down'); + + assert.deepEqual(commands, [ + { command: 'remotePress', remoteButton: 'down', appBundleId: 'com.example.app' }, + ]); +}); + +test('ios scroll rejects non-positive amount before sending any runner command', async () => { + mockRunIosRunnerCommand.mockImplementation(async () => ({})); + const interactor = await getInteractor(iosSimulator, { appBundleId: 'com.example.app' }); + + await assert.rejects( + () => interactor.scroll('down', { amount: 0 }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + /amount must be a positive number/i.test(error.message), + ); + assert.equal(mockRunIosRunnerCommand.mock.calls.length, 0); +}); + +test('ios scroll without reference dims derives pixels from endpoint travel', async () => { + mockRunIosRunnerCommand.mockImplementation(async (_device, command) => { + if (command.command === 'scroll') { + return { x: 150, y: 450, x2: 150, y2: 150 }; + } + throw new Error(`Unexpected runner command: ${command.command}`); + }); + const interactor = await getInteractor(iosSimulator, { appBundleId: 'com.example.app' }); const result = await interactor.scroll('down', { pixels: 120 }); const pixels = result && typeof result === 'object' && 'pixels' in result ? result.pixels : undefined; - assert.equal(pixels, 120); + // No referenceWidth/Height in the response -> pixels fall back to |y2 - y1| = 300. + assert.equal(pixels, 300); }); test('ios fill sends one verified replacement text-entry command at the target coordinates', async () => {