From 17345dd12f88bf77c922111b833d59777ba8d10d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 10 Jun 2026 22:10:59 +0200 Subject: [PATCH 1/7] fix: add iOS private AX snapshot fallback --- ...AgentDeviceRunnerUITests-Bridging-Header.h | 1 + .../RunnerAXSnapshotBridge.h | 14 + .../RunnerAXSnapshotBridge.m | 254 ++++++++++++++++++ .../RunnerTests+AXSnapshotFallback.swift | 184 +++++++++++++ .../RunnerTests+Snapshot.swift | 112 +++++++- .../RunnerTests+SystemModal.swift | 83 +++++- src/daemon/handlers/__tests__/find.test.ts | 111 ++++++++ src/daemon/handlers/find.ts | 48 +++- 8 files changed, 790 insertions(+), 17 deletions(-) create mode 100644 ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.h create mode 100644 ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m create mode 100644 ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h index 74bc7ce87..e888189c6 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h @@ -1,2 +1,3 @@ #import "RunnerObjCExceptionCatcher.h" +#import "RunnerAXSnapshotBridge.h" #import "RunnerSynthesizedGesture.h" diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.h b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.h new file mode 100644 index 000000000..24d95de74 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.h @@ -0,0 +1,14 @@ +#import +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface RunnerAXSnapshotBridge : NSObject + ++ (NSDictionary *)snapshotTreeForApplication:(XCUIApplication *)application + maxDepth:(NSInteger)maxDepth + maxNodes:(NSInteger)maxNodes; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m new file mode 100644 index 000000000..8f70f0f56 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m @@ -0,0 +1,254 @@ +#import "RunnerAXSnapshotBridge.h" + +#import +#import + +static NSString *const RunnerAXSnapshotOkKey = @"ok"; +static NSString *const RunnerAXSnapshotErrorKey = @"error"; +static NSString *const RunnerAXSnapshotRootKey = @"root"; +static NSString *const RunnerAXSnapshotTruncatedKey = @"truncated"; + +typedef id (*RunnerAXObjectMsgSend)(id, SEL); +typedef NSInteger (*RunnerAXIntegerMsgSend)(id, SEL); +typedef id (*RunnerAXSnapshotMsgSend)(id, SEL, id, id, id, NSError **); + +@implementation RunnerAXSnapshotBridge + ++ (NSDictionary *)snapshotTreeForApplication:(XCUIApplication *)application + maxDepth:(NSInteger)maxDepth + maxNodes:(NSInteger)maxNodes +{ + @try { + id axClient = [self objectFrom:XCUIDevice.sharedDevice selectorName:@"accessibilityInterface"]; + if (nil == axClient) { + return [self failure:@"XCUIDevice accessibilityInterface is unavailable"]; + } + + id target = [self accessibilityApplicationForApplication:application axClient:axClient]; + if (nil == target) { + return [self failure:@"Could not match active AX application for XCTest application"]; + } + + NSMutableDictionary *parameters = [NSMutableDictionary dictionary]; + id defaults = [self objectFrom:axClient selectorName:@"defaultParameters"]; + if ([defaults isKindOfClass:NSDictionary.class]) { + [parameters addEntriesFromDictionary:(NSDictionary *)defaults]; + } + parameters[@"maxDepth"] = @(MAX(0, maxDepth)); + parameters[@"maxChildren"] = @(MAX(1, maxNodes)); + parameters[@"maxArrayCount"] = @(MAX(1, maxNodes)); + parameters[@"traverseFromParentsToChildren"] = @YES; + + SEL requestSelector = NSSelectorFromString(@"requestSnapshotForElement:attributes:parameters:error:"); + if (![axClient respondsToSelector:requestSelector]) { + return [self failure:@"AX client does not support requestSnapshotForElement"]; + } + + NSError *error = nil; + NSArray *attributes = @[ + @"elementType", + @"identifier", + @"label", + @"value", + @"frame", + @"enabled", + @"selected", + @"hasFocus", + @"children", + ]; + RunnerAXSnapshotMsgSend send = (RunnerAXSnapshotMsgSend)objc_msgSend; + id result = send(axClient, requestSelector, target, attributes, parameters.copy, &error); + if (nil == result) { + return [self failure:error.localizedDescription ?: @"AX snapshot request returned nil"]; + } + + id root = nil; + @try { + root = [result valueForKey:@"_rootElementSnapshot"]; + } @catch (NSException *exception) { + root = nil; + } + if (nil == root) { + root = result; + } + + BOOL truncated = NO; + NSInteger nodeCount = 0; + NSDictionary *rootNode = [self dictionaryForSnapshot:root + depth:0 + maxDepth:maxDepth + maxNodes:maxNodes + nodeCount:&nodeCount + truncated:&truncated]; + if (nil == rootNode) { + return [self failure:@"AX snapshot root could not be serialized"]; + } + + return @{ + RunnerAXSnapshotOkKey: @YES, + RunnerAXSnapshotRootKey: rootNode, + RunnerAXSnapshotTruncatedKey: @(truncated), + }; + } @catch (NSException *exception) { + return [self failure:exception.reason ?: exception.name ?: @"AX snapshot bridge exception"]; + } +} + ++ (NSDictionary *)failure:(NSString *)message +{ + return @{ + RunnerAXSnapshotOkKey: @NO, + RunnerAXSnapshotErrorKey: message, + }; +} + ++ (id)objectFrom:(id)target selectorName:(NSString *)selectorName +{ + SEL selector = NSSelectorFromString(selectorName); + if (![target respondsToSelector:selector]) { + return nil; + } + RunnerAXObjectMsgSend send = (RunnerAXObjectMsgSend)objc_msgSend; + return send(target, selector); +} + ++ (NSInteger)integerFrom:(id)target selectorName:(NSString *)selectorName +{ + SEL selector = NSSelectorFromString(selectorName); + if (![target respondsToSelector:selector]) { + return 0; + } + RunnerAXIntegerMsgSend send = (RunnerAXIntegerMsgSend)objc_msgSend; + return send(target, selector); +} + ++ (id)accessibilityApplicationForApplication:(XCUIApplication *)application axClient:(id)axClient +{ + NSInteger targetProcessID = [self integerFrom:application selectorName:@"processID"]; + id activeApplications = [self objectFrom:axClient selectorName:@"activeApplications"]; + if (![activeApplications isKindOfClass:NSArray.class]) { + return nil; + } + + for (id candidate in (NSArray *)activeApplications) { + NSInteger candidateProcessID = [self integerFrom:candidate selectorName:@"processIdentifier"]; + if (targetProcessID > 0 && candidateProcessID == targetProcessID) { + return candidate; + } + } + return nil; +} + ++ (nullable NSDictionary *)dictionaryForSnapshot:(id)snapshot + depth:(NSInteger)depth + maxDepth:(NSInteger)maxDepth + maxNodes:(NSInteger)maxNodes + nodeCount:(NSInteger *)nodeCount + truncated:(BOOL *)truncated +{ + if (nil == snapshot || *nodeCount >= maxNodes) { + *truncated = YES; + return nil; + } + + *nodeCount += 1; + NSMutableDictionary *result = [NSMutableDictionary dictionary]; + result[@"type"] = [self numberValueForKey:@"elementType" snapshot:snapshot] ?: @0; + result[@"identifier"] = [self stringValueForKey:@"identifier" snapshot:snapshot] ?: @""; + result[@"label"] = [self stringValueForKey:@"label" snapshot:snapshot] ?: @""; + result[@"value"] = [self stringValueForKey:@"value" snapshot:snapshot] ?: @""; + result[@"frame"] = [self frameValueForSnapshot:snapshot]; + result[@"enabled"] = [self boolNumberForKey:@"enabled" snapshot:snapshot defaultValue:YES]; + result[@"selected"] = [self boolNumberForKey:@"selected" snapshot:snapshot defaultValue:NO]; + result[@"focused"] = [self boolNumberForKey:@"hasFocus" snapshot:snapshot defaultValue:NO]; + + NSMutableArray *children = [NSMutableArray array]; + if (depth < maxDepth) { + for (id child in [self childrenForSnapshot:snapshot]) { + NSDictionary *childNode = [self dictionaryForSnapshot:child + depth:depth + 1 + maxDepth:maxDepth + maxNodes:maxNodes + nodeCount:nodeCount + truncated:truncated]; + if (nil != childNode) { + [children addObject:childNode]; + } + if (*nodeCount >= maxNodes) { + *truncated = YES; + break; + } + } + } + result[@"children"] = children; + return result.copy; +} + ++ (NSArray *)childrenForSnapshot:(id)snapshot +{ + id children = nil; + @try { + children = [snapshot valueForKey:@"children"]; + } @catch (NSException *exception) { + children = nil; + } + return [children isKindOfClass:NSArray.class] ? children : @[]; +} + ++ (nullable NSNumber *)numberValueForKey:(NSString *)key snapshot:(id)snapshot +{ + id value = nil; + @try { + value = [snapshot valueForKey:key]; + } @catch (NSException *exception) { + return nil; + } + return [value isKindOfClass:NSNumber.class] ? value : nil; +} + ++ (nullable NSString *)stringValueForKey:(NSString *)key snapshot:(id)snapshot +{ + id value = nil; + @try { + value = [snapshot valueForKey:key]; + } @catch (NSException *exception) { + return nil; + } + if (nil == value || value == NSNull.null) { + return nil; + } + if ([value isKindOfClass:NSString.class]) { + return [(NSString *)value stringByTrimmingCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet]; + } + return [[value description] stringByTrimmingCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet]; +} + ++ (NSNumber *)boolNumberForKey:(NSString *)key snapshot:(id)snapshot defaultValue:(BOOL)defaultValue +{ + NSNumber *value = [self numberValueForKey:key snapshot:snapshot]; + return nil == value ? @(defaultValue) : @([value boolValue]); +} + ++ (NSDictionary *)frameValueForSnapshot:(id)snapshot +{ + CGRect frame = CGRectZero; + @try { + id value = [snapshot valueForKey:@"frame"]; + if ([value isKindOfClass:NSValue.class]) { + [(NSValue *)value getValue:&frame]; + } + } @catch (NSException *exception) { + frame = CGRectZero; + } + if (CGRectIsNull(frame) || CGRectIsInfinite(frame)) { + frame = CGRectZero; + } + return @{ + @"x": @(CGRectGetMinX(frame)), + @"y": @(CGRectGetMinY(frame)), + @"width": @(CGRectGetWidth(frame)), + @"height": @(CGRectGetHeight(frame)), + }; +} + +@end diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift new file mode 100644 index 000000000..d2f161dab --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift @@ -0,0 +1,184 @@ +import XCTest + +extension RunnerTests { + private static let privateAXSnapshotMaxNodes = 5_000 + + func privateAXSnapshotFallback( + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload? { + #if os(iOS) && targetEnvironment(simulator) + let maxDepth = options.depth ?? 64 + let response = RunnerAXSnapshotBridge.snapshotTree( + for: app, + maxDepth: maxDepth, + maxNodes: Self.privateAXSnapshotMaxNodes + ) + guard response["ok"] as? Bool == true else { + let error = response["error"] as? String ?? "unknown private AX snapshot failure" + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_FAILED=%@", error) + return nil + } + guard let root = response["root"] as? [String: Any] else { + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_FAILED=missing root") + return nil + } + + let viewport = safeSnapshotViewport(app: app) + var nodes: [SnapshotNode] = [] + appendPrivateAXNode( + root, + to: &nodes, + options: options, + viewport: viewport, + depth: 0, + parentIndex: nil + ) + if nodes.count <= 1 { + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_SPARSE=%ld", nodes.count) + return nil + } + + let truncated = (response["truncated"] as? Bool) == true + let message = + "Recovered iOS snapshot with private AX fallback after \(reason). This backend is simulator-only, experimental, and may expose a partial tree." + NSLog( + "AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_USED reason=%@ nodes=%ld truncated=%@", + reason, + nodes.count, + truncated ? "true" : "false" + ) + return DataPayload(message: message, nodes: nodes, truncated: truncated) + #else + return nil + #endif + } + + private func appendPrivateAXNode( + _ rawNode: [String: Any], + to nodes: inout [SnapshotNode], + options: SnapshotOptions, + viewport: CGRect, + depth: Int, + parentIndex: Int? + ) { + if let limit = options.depth, depth > limit { return } + + let rect = privateAXRect(rawNode["frame"]) + let label = privateAXString(rawNode["label"]) + let identifier = privateAXString(rawNode["identifier"]) + let value = privateAXString(rawNode["value"]) + let rawType = privateAXInt(rawNode["type"]) ?? 0 + let typeName = elementTypeName(rawElementType: rawType) + let enabled = privateAXBool(rawNode["enabled"]) ?? true + let visible = isVisibleInViewport(rect, viewport) + let hasContent = !label.isEmpty || !identifier.isEmpty || !value.isEmpty + let isRoot = parentIndex == nil + + let include: Bool + if isRoot { + include = true + } else if options.interactiveOnly && !visible { + include = false + } else if let scope = options.scope?.trimmingCharacters(in: .whitespacesAndNewlines), !scope.isEmpty { + let haystack = [label, identifier, value].joined(separator: "\n") + include = haystack.localizedCaseInsensitiveContains(scope) + } else if options.compact { + include = hasContent || privateAXLikelyInteractive(rawElementType: rawType) + } else { + include = true + } + + let currentIndex: Int? + if include { + currentIndex = nodes.count + nodes.append( + SnapshotNode( + index: nodes.count, + type: typeName, + label: label.isEmpty ? nil : label, + identifier: identifier.isEmpty ? nil : identifier, + value: value.isEmpty ? nil : value, + rect: snapshotRect(from: rect), + enabled: enabled, + focused: privateAXBool(rawNode["focused"]) == true ? true : nil, + selected: privateAXBool(rawNode["selected"]) == true ? true : nil, + hittable: visible && enabled && privateAXLikelyInteractive(rawElementType: rawType), + depth: depth, + parentIndex: parentIndex, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + ) + } else { + currentIndex = parentIndex + } + + guard let children = rawNode["children"] as? [[String: Any]] else { + return + } + for child in children { + appendPrivateAXNode( + child, + to: &nodes, + options: options, + viewport: viewport, + depth: depth + 1, + parentIndex: currentIndex + ) + } + } + + private func elementTypeName(rawElementType: Int) -> String { + if let type = XCUIElement.ElementType(rawValue: UInt(rawElementType)) { + return elementTypeName(type) + } + return "Element(\(rawElementType))" + } + + private func privateAXLikelyInteractive(rawElementType: Int) -> Bool { + guard let type = XCUIElement.ElementType(rawValue: UInt(rawElementType)) else { + return false + } + return interactiveTypes.contains(type) || Self.scrollContainerTypes.contains(type) + } + + private func privateAXString(_ value: Any?) -> String { + guard let value else { return "" } + if let string = value as? String { + return string.trimmingCharacters(in: .whitespacesAndNewlines) + } + return String(describing: value).trimmingCharacters(in: .whitespacesAndNewlines) + } + + private func privateAXInt(_ value: Any?) -> Int? { + if let value = value as? Int { return value } + if let value = value as? NSNumber { return value.intValue } + return nil + } + + private func privateAXBool(_ value: Any?) -> Bool? { + if let value = value as? Bool { return value } + if let value = value as? NSNumber { return value.boolValue } + return nil + } + + private func privateAXRect(_ value: Any?) -> CGRect { + guard let frame = value as? [String: Any] else { + return .zero + } + return CGRect( + x: privateAXDouble(frame["x"]) ?? 0, + y: privateAXDouble(frame["y"]) ?? 0, + width: privateAXDouble(frame["width"]) ?? 0, + height: privateAXDouble(frame["height"]) ?? 0 + ) + } + + private func privateAXDouble(_ value: Any?) -> Double? { + if let value = value as? Double { return value } + if let value = value as? NSNumber { return value.doubleValue } + return nil + } +} diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index 5b90f056e..63324e304 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -18,7 +18,7 @@ extension RunnerTests { .other, .staticText ] - private static let scrollContainerTypes: Set = [ + static let scrollContainerTypes: Set = [ .collectionView, .scrollView, .table @@ -99,12 +99,18 @@ extension RunnerTests { } func snapshotFast(app: XCUIApplication, options: SnapshotOptions) throws -> DataPayload { - if options.interactiveOnly && options.compact { - return snapshotFlatInteractive(app: app, options: options) - } if let blocking = blockingSystemAlertSnapshot() { return blocking } + if options.interactiveOnly && options.compact { + let payload = snapshotFlatInteractive(app: app, options: options) + return snapshotWithPrivateAXFallbackIfSparse( + payload, + app: app, + options: options, + reason: "compact interactive XCTest snapshot was sparse" + ) + } let capture = try captureSnapshotTraversalContext( app: app, @@ -243,10 +249,41 @@ extension RunnerTests { } - return DataPayload( + let payload = DataPayload( nodes: applyHiddenContentHints(hiddenContentHintsByNodeIndex, to: nodes), truncated: false ) + return snapshotWithPrivateAXFallbackIfSparse( + payload, + app: app, + options: options, + reason: "XCTest snapshot returned a sparse application/window tree" + ) + } + + private func snapshotWithPrivateAXFallbackIfSparse( + _ payload: DataPayload, + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload { + guard let nodes = payload.nodes, Self.isSparseApplicationWindowTree(nodes), + let fallback = privateAXSnapshotFallback( + app: app, + options: options, + reason: reason + ) + else { + return payload + } + return fallback + } + + private static func isSparseApplicationWindowTree(_ nodes: [SnapshotNode]) -> Bool { + guard !nodes.isEmpty, nodes.count <= 2 else { return false } + return nodes.allSatisfy { node in + node.type == "Application" || node.type == "Window" + } } func snapshotRaw(app: XCUIApplication, options: SnapshotOptions) throws -> DataPayload { @@ -307,7 +344,12 @@ extension RunnerTests { } try walk(context.rootSnapshot, depth: 0, parentIndex: nil) - return DataPayload(nodes: nodes, truncated: false) + return snapshotWithPrivateAXFallbackIfSparse( + DataPayload(nodes: nodes, truncated: false), + app: app, + options: options, + reason: "XCTest raw snapshot returned a sparse application/window tree" + ) } private func snapshotFlatInteractive(app: XCUIApplication, options: SnapshotOptions) -> DataPayload { @@ -401,6 +443,15 @@ extension RunnerTests { } return .context(context) } catch let failure as SnapshotCaptureFailure { + if Self.isAxSnapshotFailure(failure), + let fallback = privateAXSnapshotFallback( + app: app, + options: options, + reason: failure.message + ) + { + return .fallback(fallback) + } if let fallback = snapshotDepthLimitedAccessibilityFallback( app: app, options: options, @@ -513,6 +564,47 @@ extension RunnerTests { XCTAssertTrue(message.contains(Self.axSnapshotHint)) } + func testSparseApplicationWindowTreeDetectionIsConservative() { + let root = compactInteractiveRootNode(rect: .zero) + let window = SnapshotNode( + index: 1, + type: "Window", + label: nil, + identifier: nil, + value: nil, + rect: snapshotRect(from: .zero), + enabled: true, + focused: nil, + selected: nil, + hittable: false, + depth: 1, + parentIndex: 0, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + let button = SnapshotNode( + index: 1, + type: "Button", + label: "Sign in", + identifier: nil, + value: nil, + rect: snapshotRect(from: .zero), + enabled: true, + focused: nil, + selected: nil, + hittable: true, + depth: 1, + parentIndex: 0, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + + XCTAssertTrue(Self.isSparseApplicationWindowTree([root])) + XCTAssertTrue(Self.isSparseApplicationWindowTree([root, window])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, button])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, window, button])) + } + func testRawSnapshotTooLargeFailureIsStructured() { let failure = rawSnapshotTooLargeFailure(nodeCount: Self.rawSnapshotMaxNodes + 1) @@ -692,7 +784,7 @@ extension RunnerTests { return nil } - private func safeSnapshotViewport(app: XCUIApplication) -> CGRect { + func safeSnapshotViewport(app: XCUIApplication) -> CGRect { safely("SNAPSHOT_VIEWPORT", CGRect.infinite) { snapshotViewport(app: app) } } @@ -725,6 +817,10 @@ extension RunnerTests { || (normalized.contains("illegal argument") && normalized.contains("snapshot")) } + private static func isAxSnapshotFailure(_ failure: SnapshotCaptureFailure) -> Bool { + failure.code == Self.axSnapshotErrorCode || isAxIllegalArgument(failure.message) + } + private func evaluateSnapshot( _ snapshot: XCUIElementSnapshot, in context: SnapshotTraversalContext @@ -855,7 +951,7 @@ extension RunnerTests { return nil } - private func isVisibleInViewport(_ rect: CGRect, _ viewport: CGRect) -> Bool { + func isVisibleInViewport(_ rect: CGRect, _ viewport: CGRect) -> Bool { if rect.isNull || rect.isEmpty { return false } return rect.intersects(viewport) } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift index 01f0344f2..b2dd8b2ec 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift @@ -29,12 +29,27 @@ extension RunnerTests { } var nodes: [SnapshotNode] = [modalNode] + for content in informativeElements(in: modal, excluding: actions) { + guard let contentNode = safeMakeSnapshotNode( + element: content, + index: nodes.count, + type: elementTypeName(content.elementType), + depth: 1, + parentIndex: 0, + hittableOverride: false + ) else { + continue + } + nodes.append(contentNode) + } + for action in actions { guard let actionNode = safeMakeSnapshotNode( element: action, index: nodes.count, type: elementTypeName(action.elementType), depth: 1, + parentIndex: 0, hittableOverride: true ) else { continue @@ -101,9 +116,7 @@ extension RunnerTests { func actionableElements(in element: XCUIElement) -> [XCUIElement] { var seen = Set() var actions: [XCUIElement] = [] - let descendants = safeElementsQuery { - element.descendants(matching: .any).allElementsBoundByIndex - } + let descendants = actionableTypes.flatMap { modalDescendants(in: element, matching: $0) } for candidate in descendants { if !safeIsActionableCandidate(candidate, seen: &seen) { continue } actions.append(candidate) @@ -124,6 +137,61 @@ extension RunnerTests { } } + private func informativeElements(in element: XCUIElement, excluding actions: [XCUIElement]) -> [XCUIElement] { + let actionKeys = Set(actions.map(systemModalElementKey)) + var seen = Set() + var contents: [XCUIElement] = [] + let descendants = readableSystemModalTypes.flatMap { + modalDescendants(in: element, matching: $0, limit: 2) + } + for candidate in descendants { + guard let key = safeInformativeElementKey(candidate, actionKeys: actionKeys) else { + continue + } + if seen.contains(key) { continue } + seen.insert(key) + contents.append(candidate) + } + return contents + } + + private var readableSystemModalTypes: [XCUIElement.ElementType] { + [.staticText, .textView] + } + + private func modalDescendants( + in element: XCUIElement, + matching type: XCUIElement.ElementType, + limit: Int? = nil + ) -> [XCUIElement] { + let elements = safeElementsQuery { + element.descendants(matching: type).allElementsBoundByIndex + } + guard let limit else { + return elements + } + return Array(elements.prefix(limit)) + } + + private func safeInformativeElementKey(_ candidate: XCUIElement, actionKeys: Set) -> String? { + safely("MODAL_CONTENT") { () -> String? in + let key = systemModalElementKey(candidate) + if actionKeys.contains(key) { return nil } + if actionableTypes.contains(candidate.elementType) { return nil } + if !candidate.exists { return nil } + let frame = candidate.frame + if frame.isNull || frame.isEmpty { return nil } + let label = candidate.label.trimmingCharacters(in: .whitespacesAndNewlines) + if label.isEmpty { return nil } + return key + } + } + + private func systemModalElementKey(_ element: XCUIElement) -> String { + let frame = element.frame + return "\(element.elementType.rawValue)-\(frame.origin.x)-\(frame.origin.y)-\(frame.size.width)-\(frame.size.height)-\(element.label)-\(element.identifier)" + } + private func preferredSystemModalTitle(_ element: XCUIElement) -> String { let label = element.label if !label.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { @@ -143,6 +211,7 @@ extension RunnerTests { labelOverride: String? = nil, identifierOverride: String? = nil, depth: Int, + parentIndex: Int? = nil, hittableOverride: Bool? = nil ) -> SnapshotNode { let label = (labelOverride ?? element.label).trimmingCharacters(in: .whitespacesAndNewlines) @@ -155,11 +224,11 @@ extension RunnerTests { value: nil, rect: snapshotRect(from: element.frame), enabled: element.isEnabled, - focused: elementHasFocus(element) ? true : nil, - selected: element.isSelected ? true : nil, + focused: nil, + selected: nil, hittable: hittableOverride ?? element.isHittable, depth: depth, - parentIndex: nil, + parentIndex: parentIndex, hiddenContentAbove: nil, hiddenContentBelow: nil ) @@ -172,6 +241,7 @@ extension RunnerTests { labelOverride: String? = nil, identifierOverride: String? = nil, depth: Int, + parentIndex: Int? = nil, hittableOverride: Bool? = nil ) -> SnapshotNode? { safely("MODAL_NODE") { @@ -182,6 +252,7 @@ extension RunnerTests { labelOverride: labelOverride, identifierOverride: identifierOverride, depth: depth, + parentIndex: parentIndex, hittableOverride: hittableOverride ) } diff --git a/src/daemon/handlers/__tests__/find.test.ts b/src/daemon/handlers/__tests__/find.test.ts index 4a64a6772..8f9773d22 100644 --- a/src/daemon/handlers/__tests__/find.test.ts +++ b/src/daemon/handlers/__tests__/find.test.ts @@ -160,6 +160,117 @@ test('handleFindCommands click prefers on-screen duplicate text matches', async expect(invokeCalls[0]!.positionals?.[0]).toBe('@e3'); }); +test('handleFindCommands click retries full snapshot when iOS compact snapshot is sparse', async () => { + const snapshotResponses = [ + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + rect: { x: 0, y: 0, width: 0, height: 0 }, + }, + ], + }, + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + hittable: false, + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + index: 1, + type: 'Button', + label: 'Search', + hittable: true, + rect: { x: 80, y: 792, width: 78, height: 48 }, + parentIndex: 0, + }, + ], + }, + ]; + mockDispatch.mockImplementation(async (_device, command) => { + if (command === 'snapshot') return snapshotResponses.shift() ?? { nodes: [] }; + return {}; + }); + + const { response, invokeCalls } = await runFindClickScenario({ + positionals: ['Search', 'click'], + }); + + expect(response.ok).toBe(true); + expect(invokeCalls[0]!.positionals?.[0]).toBe('@e2'); + const snapshotCalls = mockDispatch.mock.calls.filter((call) => call[1] === 'snapshot'); + expect(snapshotCalls).toHaveLength(2); + expect(snapshotCalls[0]![4]).toMatchObject({ + snapshotInteractiveOnly: true, + snapshotCompact: true, + }); + expect(snapshotCalls[1]![4]).toMatchObject({ + snapshotInteractiveOnly: false, + snapshotCompact: false, + }); +}); + +test('handleFindCommands click scopes full retry when unscoped iOS fallback fails', async () => { + const snapshotResponses = [ + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + rect: { x: 0, y: 0, width: 0, height: 0 }, + }, + ], + }, + new Error('unscoped snapshot failed'), + { + backend: 'xctest', + nodes: [ + { + index: 0, + type: 'Application', + hittable: false, + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + index: 1, + type: 'Button', + label: 'Search', + hittable: true, + rect: { x: 80, y: 792, width: 78, height: 48 }, + parentIndex: 0, + }, + ], + }, + ]; + mockDispatch.mockImplementation(async (_device, command) => { + if (command !== 'snapshot') return {}; + const response = snapshotResponses.shift(); + if (response instanceof Error) throw response; + return response ?? { nodes: [] }; + }); + + const { response, invokeCalls } = await runFindClickScenario({ + positionals: ['Search', 'click'], + }); + + expect(response.ok).toBe(true); + expect(invokeCalls[0]!.positionals?.[0]).toBe('@e1'); + expect(response.ok ? response.data : undefined).toMatchObject({ x: 119, y: 816 }); + const snapshotCalls = mockDispatch.mock.calls.filter((call) => call[1] === 'snapshot'); + expect(snapshotCalls).toHaveLength(3); + expect(snapshotCalls[2]![4]).toMatchObject({ + snapshotInteractiveOnly: false, + snapshotCompact: false, + snapshotScope: 'Search', + }); +}); + test('handleFindCommands click prefers semantic controls over matching containers', async () => { const { response, invokeCalls } = await runFindClickScenario({ positionals: ['Later', 'click'], diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index 457ffc27b..217fe9fd4 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -105,7 +105,7 @@ export async function handleFindCommands(params: { if (lastNodes && now - lastSnapshotAt < 750 && !getActiveAndroidSnapshotFreshness(session)) { return { nodes: lastNodes }; } - const { snapshot } = await captureSnapshot({ + let { snapshot } = await captureSnapshot({ device, session, flags: { @@ -117,6 +117,38 @@ export async function handleFindCommands(params: { logPath, snapshotScope: scope, }); + if (interactiveOnly && isSparseIosInteractiveSnapshot(snapshot)) { + try { + const fullCapture = await captureSnapshot({ + device, + session, + flags: { + ...req.flags, + snapshotInteractiveOnly: false, + snapshotCompact: false, + }, + outPath: req.flags?.out, + logPath, + snapshotScope: scope, + }); + snapshot = fullCapture.snapshot; + } catch (error) { + if (!shouldScopeFind(locator)) throw error; + const scopedFullCapture = await captureSnapshot({ + device, + session, + flags: { + ...req.flags, + snapshotInteractiveOnly: false, + snapshotCompact: false, + }, + outPath: req.flags?.out, + logPath, + snapshotScope: query, + }); + snapshot = scopedFullCapture.snapshot; + } + } const nodes = snapshot.nodes; lastSnapshotAt = now; lastNodes = nodes; @@ -174,6 +206,11 @@ export async function handleFindCommands(params: { return handler ? handler() : null; } +function isSparseIosInteractiveSnapshot(snapshot: SnapshotState): boolean { + if (snapshot.backend !== 'xctest' || snapshot.nodes.length !== 1) return false; + return snapshot.nodes[0]?.type === 'Application'; +} + // --- Per-action handlers --- function isReadOnlyFindAction(action: string): boolean { @@ -194,7 +231,10 @@ function resolveFindMatch(params: { flags: DaemonRequest['flags']; }): FindMatchResult { const { nodes, locator, query, requiresRect, flags } = params; - const bestMatches = findBestMatchesByLocator(nodes, locator, query, { + const searchableNodes = requiresRect + ? nodes.filter((node) => !isRootInteractionContainer(node, nodes[0])) + : nodes; + const bestMatches = findBestMatchesByLocator(searchableNodes, locator, query, { requireRect: requiresRect, }); if (requiresRect) { @@ -281,7 +321,9 @@ function resolveInteractiveMatchNode( nodes: SnapshotState['nodes'], node: SnapshotState['nodes'][number], ): SnapshotState['nodes'][number] { - return resolveActionableTouchNode(nodes, node); + const resolved = resolveActionableTouchNode(nodes, node); + if (isRootInteractionContainer(resolved, nodes[0]) && node.rect) return node; + return resolved; } function isRootInteractionContainer( From 9f42418dbfd07607412b42128b939ada6413e74e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 11 Jun 2026 10:49:18 +0200 Subject: [PATCH 2/7] fix: add public iOS snapshot query recovery --- .../RunnerTests+Snapshot.swift | 144 ++++++++++++++---- 1 file changed, 112 insertions(+), 32 deletions(-) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index 63324e304..f83764269 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -11,6 +11,14 @@ extension RunnerTests { private static let rawSnapshotMaxNodes = 5_000 private static let rawSnapshotTooLargeHint = "Raw iOS snapshot exceeded the runner payload guard. Use regular snapshot for visible UI, or scope/depth-limit raw snapshot when inspecting a large accessibility tree." + private static let publicQueryRecoveryMessage = + "Recovered iOS snapshot through XCTest accessibility element queries after the public snapshot tree was sparse. The recovered nodes are a flattened view of on-screen controls." + private static let structuralOnlyNodeTypes: Set = [ + "Application", + "Window", + "Other", + "ScrollView" + ] private static let collapsedTabCandidateTypes: Set = [ .button, .link, @@ -253,7 +261,7 @@ extension RunnerTests { nodes: applyHiddenContentHints(hiddenContentHintsByNodeIndex, to: nodes), truncated: false ) - return snapshotWithPrivateAXFallbackIfSparse( + return snapshotWithFallbackIfSparse( payload, app: app, options: options, @@ -261,6 +269,25 @@ extension RunnerTests { ) } + private func snapshotWithFallbackIfSparse( + _ payload: DataPayload, + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload { + guard let nodes = payload.nodes, Self.isSparseApplicationWindowTree(nodes) else { + return payload + } + if let fallback = publicQuerySnapshotFallback( + app: app, + options: options, + reason: reason + ) { + return fallback + } + return privateAXSnapshotFallback(app: app, options: options, reason: reason) ?? payload + } + private func snapshotWithPrivateAXFallbackIfSparse( _ payload: DataPayload, app: XCUIApplication, @@ -280,12 +307,48 @@ extension RunnerTests { } private static func isSparseApplicationWindowTree(_ nodes: [SnapshotNode]) -> Bool { - guard !nodes.isEmpty, nodes.count <= 2 else { return false } + guard !nodes.isEmpty else { return false } return nodes.allSatisfy { node in - node.type == "Application" || node.type == "Window" + let hasContent = node.label?.isEmpty == false + || node.identifier?.isEmpty == false + || node.value?.isEmpty == false + return !hasContent + && !node.hittable + && Self.structuralOnlyNodeTypes.contains(node.type) } } + private func publicQuerySnapshotFallback( + app: XCUIApplication, + options: SnapshotOptions, + reason: String + ) -> DataPayload? { + let fallback = snapshotFlatInteractive( + app: app, + options: SnapshotOptions( + interactiveOnly: false, + compact: options.compact, + depth: options.depth, + scope: options.scope, + raw: false + ) + ) + guard let nodes = fallback.nodes, !Self.isSparseApplicationWindowTree(nodes) else { + return nil + } + NSLog( + "AGENT_DEVICE_RUNNER_PUBLIC_QUERY_SNAPSHOT_USED reason=%@ nodes=%ld truncated=%@", + reason, + nodes.count, + fallback.truncated == true ? "true" : "false" + ) + return DataPayload( + message: Self.publicQueryRecoveryMessage, + nodes: nodes, + truncated: true + ) + } + func snapshotRaw(app: XCUIApplication, options: SnapshotOptions) throws -> DataPayload { if let blocking = blockingSystemAlertSnapshot() { return blocking @@ -566,43 +629,60 @@ extension RunnerTests { func testSparseApplicationWindowTreeDetectionIsConservative() { let root = compactInteractiveRootNode(rect: .zero) - let window = SnapshotNode( - index: 1, - type: "Window", - label: nil, - identifier: nil, - value: nil, - rect: snapshotRect(from: .zero), - enabled: true, - focused: nil, - selected: nil, - hittable: false, - depth: 1, - parentIndex: 0, - hiddenContentAbove: nil, - hiddenContentBelow: nil - ) - let button = SnapshotNode( - index: 1, + func node( + index: Int, + type: String, + label: String? = nil, + identifier: String? = nil, + value: String? = nil, + hittable: Bool = false + ) -> SnapshotNode { + SnapshotNode( + index: index, + type: type, + label: label, + identifier: identifier, + value: value, + rect: snapshotRect(from: .zero), + enabled: true, + focused: nil, + selected: nil, + hittable: hittable, + depth: 1, + parentIndex: 0, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + } + let window = node(index: 1, type: "Window") + let structuralOther = node(index: 2, type: "Other") + let structuralScroll = node(index: 3, type: "ScrollView") + let labeledOther = node(index: 4, type: "Other", label: "Visible content") + let identifiedOther = node(index: 5, type: "Other", identifier: "test-id") + let valuedOther = node(index: 6, type: "Other", value: "Selected") + let hittableOther = node(index: 7, type: "Other", hittable: true) + let button = node( + index: 8, type: "Button", label: "Sign in", - identifier: nil, - value: nil, - rect: snapshotRect(from: .zero), - enabled: true, - focused: nil, - selected: nil, - hittable: true, - depth: 1, - parentIndex: 0, - hiddenContentAbove: nil, - hiddenContentBelow: nil + hittable: true ) XCTAssertTrue(Self.isSparseApplicationWindowTree([root])) XCTAssertTrue(Self.isSparseApplicationWindowTree([root, window])) + XCTAssertTrue(Self.isSparseApplicationWindowTree([root, window, structuralOther, structuralScroll])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, labeledOther])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, identifiedOther])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, valuedOther])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([root, hittableOther])) XCTAssertFalse(Self.isSparseApplicationWindowTree([root, button])) XCTAssertFalse(Self.isSparseApplicationWindowTree([root, window, button])) + XCTAssertFalse(Self.isSparseApplicationWindowTree([])) + } + + func testPublicQueryRecoveryMessageExplainsFlattenedFallback() { + XCTAssertTrue(Self.publicQueryRecoveryMessage.contains("XCTest accessibility element queries")) + XCTAssertTrue(Self.publicQueryRecoveryMessage.contains("flattened")) } func testRawSnapshotTooLargeFailureIsStructured() { From 59af49e119e8e75c6ac07533d6c781fefefd3606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 11 Jun 2026 12:22:46 +0200 Subject: [PATCH 3/7] fix(ios): make private AX snapshot fallback recover deep React Native trees Four fixes that turn the #758 private AX fallback from works-on-one-tree-shape into reliable on Bluesky Home: - Depth ladder: the AX server rejects bulk snapshot requests outright (kAXErrorIllegalArgument) once requested depth crosses a tree-size-dependent limit that moves with live content. Retry at 56/40/24/12 instead of giving up after one attempt at 64. - Real attribute identifiers: the server silently ignored the raw keypath strings the bridge passed, so every node came back with a zero frame (breaking ref taps and the interactive/compact filters, which is why 'snapshot -i -c' stayed sparse). Map keypaths through XCElementSnapshot.axAttributesForElementSnapshotKeyPaths (it returns an NSSet) and drop the mapper's expensive extras (automation type, window display id, base type) that pushed deep requests past the 30s main-thread watchdog. - Viewport from the private root frame when the public windows query degrades to an infinite viewport, so off-screen drawer content stops passing the visibility filter. - Runner source fingerprint now includes .m/.h, so bridge edits stop reusing stale cached runner builds. Also hardens the bridge per review: UInt(exactly:) for untrusted element types, pid_t-sized objc_msgSend for process id matching, and objCType-checked NSValue frame decoding. --- .../RunnerAXSnapshotBridge.m | 47 ++++++++++++- .../RunnerTests+AXSnapshotFallback.swift | 69 +++++++++++++++---- src/platforms/ios/runner-xctestrun.ts | 2 + 3 files changed, 103 insertions(+), 15 deletions(-) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m index 8f70f0f56..b6a34db49 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerAXSnapshotBridge.m @@ -45,7 +45,7 @@ @implementation RunnerAXSnapshotBridge } NSError *error = nil; - NSArray *attributes = @[ + NSArray *keyPaths = @[ @"elementType", @"identifier", @"label", @@ -56,6 +56,38 @@ @implementation RunnerAXSnapshotBridge @"hasFocus", @"children", ]; + // The AX server expects real accessibility attribute identifiers, not snapshot keypath + // strings; passing raw keypaths silently drops attributes it does not recognize (frame + // came back zeroed). XCElementSnapshot owns the keypath -> AX attribute mapping. + NSArray *attributes = keyPaths; + Class snapshotClass = NSClassFromString(@"XCElementSnapshot"); + SEL mapSelector = NSSelectorFromString(@"axAttributesForElementSnapshotKeyPaths:isMacOS:"); + if ([snapshotClass respondsToSelector:mapSelector]) { + typedef id (*RunnerAXMapMsgSend)(id, SEL, id, BOOL); + RunnerAXMapMsgSend mapSend = (RunnerAXMapMsgSend)objc_msgSend; + id mapped = mapSend(snapshotClass, mapSelector, keyPaths, NO); + if ([mapped isKindOfClass:NSSet.class]) { + mapped = [(NSSet *)mapped allObjects]; + } + if ([mapped isKindOfClass:NSArray.class] && [(NSArray *)mapped count] > 0) { + // The mapper expands keypaths with extra attributes (automation type, window display + // id, base type) that are disproportionately expensive for the AX server to compute + // on large React Native trees. Keep only the attributes we actually consume. + NSArray *needed = @[ @"ElementType", @"Identifier", @"Label", @"Value", @"Frame", + @"Enabled", @"Selected", @"Focus" ]; + NSMutableArray *filtered = [NSMutableArray array]; + for (id attribute in (NSArray *)mapped) { + NSString *name = [attribute description]; + for (NSString *suffix in needed) { + if ([name hasSuffix:suffix]) { + [filtered addObject:attribute]; + break; + } + } + } + attributes = filtered.count > 0 ? filtered : mapped; + } + } RunnerAXSnapshotMsgSend send = (RunnerAXSnapshotMsgSend)objc_msgSend; id result = send(axClient, requestSelector, target, attributes, parameters.copy, &error); if (nil == result) { @@ -118,6 +150,16 @@ + (NSInteger)integerFrom:(id)target selectorName:(NSString *)selectorName if (![target respondsToSelector:selector]) { return 0; } + // processID/processIdentifier return pid_t (int32); reading them through an + // NSInteger-returning cast is not upper-32-bit safe on arm64. Use the method + // signature to pick the correctly sized call. + NSMethodSignature *signature = [target methodSignatureForSelector:selector]; + const char *returnType = signature.methodReturnType; + if (returnType != NULL && strcmp(returnType, @encode(int)) == 0) { + typedef int (*RunnerAXIntMsgSend)(id, SEL); + RunnerAXIntMsgSend send = (RunnerAXIntMsgSend)objc_msgSend; + return (NSInteger)send(target, selector); + } RunnerAXIntegerMsgSend send = (RunnerAXIntegerMsgSend)objc_msgSend; return send(target, selector); } @@ -234,7 +276,8 @@ + (NSDictionary *)frameValueForSnapshot:(id)snapshot CGRect frame = CGRectZero; @try { id value = [snapshot valueForKey:@"frame"]; - if ([value isKindOfClass:NSValue.class]) { + if ([value isKindOfClass:NSValue.class] + && strcmp([(NSValue *)value objCType], @encode(CGRect)) == 0) { [(NSValue *)value getValue:&frame]; } } @catch (NSException *exception) { diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift index d2f161dab..de2ee0385 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift @@ -2,6 +2,13 @@ import XCTest extension RunnerTests { private static let privateAXSnapshotMaxNodes = 5_000 + /// Deep React Native trees make the AX server reject bulk snapshot requests outright with + /// kAXErrorIllegalArgument once the requested depth crosses a tree-size-dependent limit + /// (observed between depth 56 and 64 on the Bluesky Home feed; the limit moves with live + /// content). Retrying the same request at a shallower depth succeeds, so on failure we walk + /// this ladder instead of giving up. Capped at 4 attempts to bound worst-case latency on + /// apps where the AX surface is genuinely unavailable. + static let privateAXSnapshotDepthLadder = [56, 40, 24, 12] func privateAXSnapshotFallback( app: XCUIApplication, @@ -9,15 +16,33 @@ extension RunnerTests { reason: String ) -> DataPayload? { #if os(iOS) && targetEnvironment(simulator) - let maxDepth = options.depth ?? 64 - let response = RunnerAXSnapshotBridge.snapshotTree( - for: app, - maxDepth: maxDepth, - maxNodes: Self.privateAXSnapshotMaxNodes + let requestedDepth = options.depth ?? 64 + var attemptDepths = [requestedDepth] + attemptDepths.append( + contentsOf: Self.privateAXSnapshotDepthLadder.filter { $0 < requestedDepth } ) + var response: [String: Any] = [:] + var effectiveDepth = requestedDepth + var lastError = "unknown private AX snapshot failure" + for depth in attemptDepths { + response = RunnerAXSnapshotBridge.snapshotTree( + for: app, + maxDepth: depth, + maxNodes: Self.privateAXSnapshotMaxNodes + ) + if response["ok"] as? Bool == true { + effectiveDepth = depth + break + } + lastError = response["error"] as? String ?? lastError + NSLog( + "AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_DEPTH_RETRY depth=%ld error=%@", + depth, + lastError + ) + } guard response["ok"] as? Bool == true else { - let error = response["error"] as? String ?? "unknown private AX snapshot failure" - NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_FAILED=%@", error) + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_FAILED=%@", lastError) return nil } guard let root = response["root"] as? [String: Any] else { @@ -25,7 +50,15 @@ extension RunnerTests { return nil } - let viewport = safeSnapshotViewport(app: app) + // The public windows query backing safeSnapshotViewport can fail on the same apps that + // need this fallback, degrading to an infinite viewport that marks off-screen content + // (e.g. closed drawer menus at negative x) as visible and tappable. The private root's + // own frame is the reliable screen rect here. + var viewport = safeSnapshotViewport(app: app) + let rootFrame = privateAXRect(root["frame"]) + if viewport.isInfinite || viewport.isNull || viewport.isEmpty, !rootFrame.isEmpty { + viewport = rootFrame + } var nodes: [SnapshotNode] = [] appendPrivateAXNode( root, @@ -40,13 +73,19 @@ extension RunnerTests { return nil } - let truncated = (response["truncated"] as? Bool) == true - let message = + let depthLimited = effectiveDepth < requestedDepth + let truncated = (response["truncated"] as? Bool) == true || depthLimited + var message = "Recovered iOS snapshot with private AX fallback after \(reason). This backend is simulator-only, experimental, and may expose a partial tree." + if depthLimited { + message += + " The AX server rejected deeper requests; this tree is capped at depth \(effectiveDepth) — re-run with --depth \(effectiveDepth) --scope to inspect deeper content." + } NSLog( - "AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_USED reason=%@ nodes=%ld truncated=%@", + "AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_USED reason=%@ nodes=%ld depth=%ld truncated=%@", reason, nodes.count, + effectiveDepth, truncated ? "true" : "false" ) return DataPayload(message: message, nodes: nodes, truncated: truncated) @@ -131,14 +170,18 @@ extension RunnerTests { } private func elementTypeName(rawElementType: Int) -> String { - if let type = XCUIElement.ElementType(rawValue: UInt(rawElementType)) { + if let raw = UInt(exactly: rawElementType), + let type = XCUIElement.ElementType(rawValue: raw) + { return elementTypeName(type) } return "Element(\(rawElementType))" } private func privateAXLikelyInteractive(rawElementType: Int) -> Bool { - guard let type = XCUIElement.ElementType(rawValue: UInt(rawElementType)) else { + guard let raw = UInt(exactly: rawElementType), + let type = XCUIElement.ElementType(rawValue: raw) + else { return false } return interactiveTypes.contains(type) || Self.scrollContainerTypes.contains(type) diff --git a/src/platforms/ios/runner-xctestrun.ts b/src/platforms/ios/runner-xctestrun.ts index 618c594c6..7a5c31366 100644 --- a/src/platforms/ios/runner-xctestrun.ts +++ b/src/platforms/ios/runner-xctestrun.ts @@ -1044,6 +1044,8 @@ function isRunnerSourceFile(fileName: string, filePath: string): boolean { '.json', '.png', '.swift', + '.m', + '.h', '.plist', '.entitlements', '.xctestplan', From f0ede09c4abaed8495aa317af929c9180f98ab92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 11 Jun 2026 12:28:45 +0200 Subject: [PATCH 4/7] fix(ios): recover deadline-truncated near-empty compact snapshots The all-structural sparse detector misses the common large-RN-tree case where the typed-query sweep resolves one or two stray controls before its 1s deadline: the payload has 'content', so recovery never fires, yet 2 nodes is useless in practice. Treat deadline-truncated payloads with <= 8 nodes as needing recovery, and only replace the original payload when the recovered tree actually carries more nodes. Completed sweeps on legitimately minimal screens stay untouched (not truncated). --- .../RunnerTests+Snapshot.swift | 91 +++++++++++++++++-- 1 file changed, 82 insertions(+), 9 deletions(-) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index f83764269..61a1ffffa 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -275,7 +275,7 @@ extension RunnerTests { options: SnapshotOptions, reason: String ) -> DataPayload { - guard let nodes = payload.nodes, Self.isSparseApplicationWindowTree(nodes) else { + guard Self.snapshotPayloadNeedsRecovery(payload) else { return payload } if let fallback = publicQuerySnapshotFallback( @@ -285,7 +285,10 @@ extension RunnerTests { ) { return fallback } - return privateAXSnapshotFallback(app: app, options: options, reason: reason) ?? payload + return betterSnapshotPayload( + payload, + recovered: privateAXSnapshotFallback(app: app, options: options, reason: reason) + ) } private func snapshotWithPrivateAXFallbackIfSparse( @@ -294,16 +297,41 @@ extension RunnerTests { options: SnapshotOptions, reason: String ) -> DataPayload { - guard let nodes = payload.nodes, Self.isSparseApplicationWindowTree(nodes), - let fallback = privateAXSnapshotFallback( - app: app, - options: options, - reason: reason - ) + guard Self.snapshotPayloadNeedsRecovery(payload) else { + return payload + } + return betterSnapshotPayload( + payload, + recovered: privateAXSnapshotFallback(app: app, options: options, reason: reason) + ) + } + + /// A payload needs recovery when the tree is structural-only, OR when the capture was cut + /// off by a budget/deadline with almost nothing collected. The second condition matters on + /// large React Native trees: the typed-query sweep can resolve one or two stray controls + /// before its deadline, which defeats an all-structural check while the payload is still + /// useless in practice. A legitimately minimal screen finishes the sweep without truncation, + /// so it never pays for recovery. + static let sparseRecoveryTruncatedNodeThreshold = 8 + + static func snapshotPayloadNeedsRecovery(_ payload: DataPayload) -> Bool { + guard let nodes = payload.nodes, !nodes.isEmpty else { return false } + if isSparseApplicationWindowTree(nodes) { return true } + return payload.truncated == true && nodes.count <= sparseRecoveryTruncatedNodeThreshold + } + + /// Keeps the original payload unless the recovered tree actually carries more nodes — + /// recovery must never replace a partial-but-real capture with something thinner. + private func betterSnapshotPayload( + _ payload: DataPayload, + recovered: DataPayload? + ) -> DataPayload { + guard let recovered, let recoveredNodes = recovered.nodes, + recoveredNodes.count > (payload.nodes?.count ?? 0) else { return payload } - return fallback + return recovered } private static func isSparseApplicationWindowTree(_ nodes: [SnapshotNode]) -> Bool { @@ -680,6 +708,51 @@ extension RunnerTests { XCTAssertFalse(Self.isSparseApplicationWindowTree([])) } + func testSnapshotPayloadNeedsRecoveryOnDeadlineTruncatedNearEmptySweep() { + let root = compactInteractiveRootNode(rect: .zero) + func node(index: Int, label: String) -> SnapshotNode { + SnapshotNode( + index: index, + type: "Button", + label: label, + identifier: nil, + value: nil, + rect: snapshotRect(from: .zero), + enabled: true, + focused: nil, + selected: nil, + hittable: true, + depth: 1, + parentIndex: 0, + hiddenContentAbove: nil, + hiddenContentBelow: nil + ) + } + let button = node(index: 1, label: "Home") + + // Deadline-truncated sweep with a stray control: still needs recovery. + XCTAssertTrue( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [root, button], truncated: true)) + ) + // Structural-only tree needs recovery regardless of truncation. + XCTAssertTrue( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [root], truncated: false)) + ) + // A completed sweep on a legitimately minimal screen does not. + XCTAssertFalse( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [root, button], truncated: false)) + ) + // A truncated but reasonably populated sweep does not. + var populated: [SnapshotNode] = [root] + for index in 1...Self.sparseRecoveryTruncatedNodeThreshold { + populated.append(node(index: index, label: "b\(index)")) + } + XCTAssertFalse( + Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: populated, truncated: true)) + ) + XCTAssertFalse(Self.snapshotPayloadNeedsRecovery(DataPayload(nodes: [], truncated: true))) + } + func testPublicQueryRecoveryMessageExplainsFlattenedFallback() { XCTAssertTrue(Self.publicQueryRecoveryMessage.contains("XCTest accessibility element queries")) XCTAssertTrue(Self.publicQueryRecoveryMessage.contains("flattened")) From 4db29124616e62f2836dca41d68c42981ba9bde0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 11 Jun 2026 12:44:18 +0200 Subject: [PATCH 5/7] chore: fix CI for the AX snapshot fallback branch - Sync the setup metadata script's fingerprint extension list with the runtime (.m/.h were added for the ObjC bridge), fixing the cache metadata parity test. - Reduce find.ts complexity flagged by fallow: hoist the node fetcher into createFindNodeFetcher with a recoverSparseInteractiveSnapshot helper, split match disambiguation and resolution scoring into narrowMultipleMatches/resolvedTouchScore, extract rectsMatch. --- scripts/write-xcuitest-cache-metadata.mjs | 2 + src/daemon/handlers/find.ts | 216 +++++++++++++--------- 2 files changed, 134 insertions(+), 84 deletions(-) diff --git a/scripts/write-xcuitest-cache-metadata.mjs b/scripts/write-xcuitest-cache-metadata.mjs index 13bf818e4..f27d7f3c0 100644 --- a/scripts/write-xcuitest-cache-metadata.mjs +++ b/scripts/write-xcuitest-cache-metadata.mjs @@ -90,6 +90,8 @@ function isRunnerSourceFile(fileName, filePath) { '.json', '.png', '.swift', + '.m', + '.h', '.plist', '.entitlements', '.xctestplan', diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index 217fe9fd4..13092544f 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -2,7 +2,7 @@ import { dispatchCommand, resolveTargetDevice } from '../../core/dispatch.ts'; import { sleep } from '../../utils/timeouts.ts'; import { findBestMatchesByLocator, parseFindArgs, type FindLocator } from '../../utils/finders.ts'; import { centerOfRect, type SnapshotState } from '../../utils/snapshot.ts'; -import type { DaemonInvokeFn, DaemonRequest, DaemonResponse } from '../types.ts'; +import type { DaemonInvokeFn, DaemonRequest, DaemonResponse, SessionState } from '../types.ts'; import { SessionStore } from '../session-store.ts'; import { contextFromFlags } from '../context.ts'; import { ensureDeviceReady } from '../device-ready.ts'; @@ -90,74 +90,18 @@ export async function handleFindCommands(params: { // Interaction targets need the full compact tree so duplicate labels can be // resolved against viewport visibility before an off-screen subtree wins. const scope = shouldScopeFind(locator) && !requiresRect ? query : undefined; - const interactiveOnly = requiresRect; - let lastSnapshotAt = 0; - let lastNodes: SnapshotState['nodes'] | null = null; - const fetchNodes = async (): Promise<{ - nodes: SnapshotState['nodes']; - truncated?: boolean; - backend?: SnapshotState['backend']; - }> => { - const now = Date.now(); - // Re-use a snapshot captured within the last 750 ms to avoid redundant dumps during - // rapid find iterations. Skipped when Android freshness tracking is active, because - // the cached tree may already be stale from a recent navigation action. - if (lastNodes && now - lastSnapshotAt < 750 && !getActiveAndroidSnapshotFreshness(session)) { - return { nodes: lastNodes }; - } - let { snapshot } = await captureSnapshot({ - device, - session, - flags: { - ...req.flags, - snapshotInteractiveOnly: interactiveOnly, - snapshotCompact: interactiveOnly, - }, - outPath: req.flags?.out, - logPath, - snapshotScope: scope, - }); - if (interactiveOnly && isSparseIosInteractiveSnapshot(snapshot)) { - try { - const fullCapture = await captureSnapshot({ - device, - session, - flags: { - ...req.flags, - snapshotInteractiveOnly: false, - snapshotCompact: false, - }, - outPath: req.flags?.out, - logPath, - snapshotScope: scope, - }); - snapshot = fullCapture.snapshot; - } catch (error) { - if (!shouldScopeFind(locator)) throw error; - const scopedFullCapture = await captureSnapshot({ - device, - session, - flags: { - ...req.flags, - snapshotInteractiveOnly: false, - snapshotCompact: false, - }, - outPath: req.flags?.out, - logPath, - snapshotScope: query, - }); - snapshot = scopedFullCapture.snapshot; - } - } - const nodes = snapshot.nodes; - lastSnapshotAt = now; - lastNodes = nodes; - if (session) { - setSessionSnapshot(session, snapshot); - sessionStore.set(sessionName, session); - } - return { nodes, truncated: snapshot.truncated, backend: snapshot.backend }; - }; + const fetchNodes = createFindNodeFetcher({ + device, + session, + req, + logPath, + locator, + query, + scope, + interactiveOnly: requiresRect, + sessionStore, + sessionName, + }); const ctx: FindContext = { req, @@ -223,6 +167,86 @@ function findActionRequiresRect(action: string): boolean { return action === 'click' || action === 'focus' || action === 'fill' || action === 'type'; } +type FindNodeFetcher = () => Promise<{ + nodes: SnapshotState['nodes']; + truncated?: boolean; + backend?: SnapshotState['backend']; +}>; + +function createFindNodeFetcher(params: { + device: SessionState['device']; + session: SessionState | undefined; + req: DaemonRequest; + logPath: string; + locator: FindLocator; + query: string; + scope: string | undefined; + interactiveOnly: boolean; + sessionStore: SessionStore; + sessionName: string; +}): FindNodeFetcher { + const { device, session, req, logPath, locator, query, scope, interactiveOnly } = params; + const { sessionStore, sessionName } = params; + let lastSnapshotAt = 0; + let lastNodes: SnapshotState['nodes'] | null = null; + const capture = async (snapshotScope: string | undefined, interactive: boolean) => { + const { snapshot } = await captureSnapshot({ + device, + session, + flags: { + ...req.flags, + snapshotInteractiveOnly: interactive, + snapshotCompact: interactive, + }, + outPath: req.flags?.out, + logPath, + snapshotScope, + }); + return snapshot; + }; + return async () => { + const now = Date.now(); + // Re-use a snapshot captured within the last 750 ms to avoid redundant dumps during + // rapid find iterations. Skipped when Android freshness tracking is active, because + // the cached tree may already be stale from a recent navigation action. + if (lastNodes && now - lastSnapshotAt < 750 && !getActiveAndroidSnapshotFreshness(session)) { + return { nodes: lastNodes }; + } + let snapshot = await capture(scope, interactiveOnly); + if (interactiveOnly && isSparseIosInteractiveSnapshot(snapshot)) { + snapshot = await recoverSparseInteractiveSnapshot({ capture, locator, query, scope }); + } + const nodes = snapshot.nodes; + lastSnapshotAt = now; + lastNodes = nodes; + if (session) { + setSessionSnapshot(session, snapshot); + sessionStore.set(sessionName, session); + } + return { nodes, truncated: snapshot.truncated, backend: snapshot.backend }; + }; +} + +/** + * A sparse compact-interactive iOS snapshot usually means the runner could not enumerate the + * tree, not that the screen is empty: retry with a full snapshot, and when even unscoped AX + * serialization fails on unrelated content, with a query-scoped full snapshot. + */ +async function recoverSparseInteractiveSnapshot(params: { + capture: (scope: string | undefined, interactive: boolean) => Promise; + locator: FindLocator; + query: string; + scope: string | undefined; +}): Promise { + const { capture, locator, query, scope } = params; + try { + return await capture(scope, false); + } catch (error) { + if (!shouldScopeFind(locator)) throw error; + return await capture(query, false); + } +} + function resolveFindMatch(params: { nodes: SnapshotState['nodes']; locator: FindLocator; @@ -242,13 +266,11 @@ function resolveFindMatch(params: { } if (requiresRect && bestMatches.matches.length > 1) { - if (flags?.findFirst) { - bestMatches.matches = [bestMatches.matches[0]!]; - } else if (flags?.findLast) { - bestMatches.matches = [bestMatches.matches[bestMatches.matches.length - 1]!]; - } else { + const narrowed = narrowMultipleMatches(bestMatches.matches, flags); + if (!narrowed) { return { ok: false, response: buildAmbiguousMatchError(bestMatches.matches, locator, query) }; } + bestMatches.matches = narrowed; } const node = bestMatches.matches[0] ?? null; @@ -261,6 +283,15 @@ function resolveFindMatch(params: { return { ok: true, node }; } +function narrowMultipleMatches( + matches: SnapshotState['nodes'], + flags: DaemonRequest['flags'], +): SnapshotState['nodes'] | null { + if (flags?.findFirst) return [matches[0]!]; + if (flags?.findLast) return [matches[matches.length - 1]!]; + return null; +} + function preferOnscreenMatches( matches: SnapshotState['nodes'], nodes: SnapshotState['nodes'], @@ -300,17 +331,27 @@ function interactiveMatchScore( ): number { const resolution = resolveActionableTouchResolution(nodes, node); if (resolution.reason === 'covered') return 0; - if (resolution.reason === 'semantic-target' && resolution.node.rect) return 4; - if (resolution.reason === 'same-rect-descendant' && resolution.node.rect) return 4; + const resolved = resolvedTouchScore(resolution, nodes[0]); + if (resolved > 0) return resolved; + if (node.hittable && node.rect && !isRootInteractionContainer(node, nodes[0])) return 3; + return node.rect ? 1 : 0; +} + +function resolvedTouchScore( + resolution: ReturnType, + root: SnapshotState['nodes'][number] | undefined, +): number { + if (!resolution.node.rect) return 0; + if (resolution.reason === 'semantic-target' || resolution.reason === 'same-rect-descendant') { + return 4; + } if ( resolution.reason === 'hittable-ancestor' && - resolution.node.rect && - !isRootInteractionContainer(resolution.node, nodes[0]) + !isRootInteractionContainer(resolution.node, root) ) { return 2; } - if (node.hittable && node.rect && !isRootInteractionContainer(node, nodes[0])) return 3; - return node.rect ? 1 : 0; + return 0; } function rectArea(node: SnapshotState['nodes'][number]): number { @@ -333,11 +374,18 @@ function isRootInteractionContainer( if (!root?.rect || !node.rect) return false; const type = node.type?.toLowerCase() ?? ''; if (!type.includes('application') && !type.includes('window')) return false; + return rectsMatch(node.rect, root.rect); +} + +function rectsMatch( + left: NonNullable, + right: NonNullable, +): boolean { return ( - node.rect.x === root.rect.x && - node.rect.y === root.rect.y && - node.rect.width === root.rect.width && - node.rect.height === root.rect.height + left.x === right.x && + left.y === right.y && + left.width === right.width && + left.height === right.height ); } From 55a9107e1bf4cf0be2e7e88f999768413aa13760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 11 Jun 2026 14:32:05 +0200 Subject: [PATCH 6/7] feat(ios): make accessibility fallbacks and collapsed containers visible in snapshot output Two transparency gaps from #701's 'no silent fallback' requirement: - Runner-attached snapshot messages now surface as snapshot warnings (readAppleSnapshotResult previously dropped them), so every recovery through the fallback accessibility backend or query tier is announced, states what it usually means (the app publishes an unhealthy accessibility tree - fixing the app is the real cure), and points to screenshot as visual truth. - A leaf whose label merges many comma-joined segments is flagged as a collapsed accessible container: the app marks a container accessible, hiding every descendant from assistive tech and automation alike. Nothing can be recovered below it (VoiceOver sees the same merged element), so the warning names the node, estimates the merged label count, and gives the app-side fix plus the screenshot/coordinate-tap workaround. Validated live on the lab stress fixture (adlab://stress?accessible=1): the 6-node tree now carries '@e5 [Other] merges ~126 labels...'. --- .../RunnerTests+AXSnapshotFallback.swift | 4 +- .../RunnerTests+Snapshot.swift | 2 +- src/__tests__/runtime-snapshot.test.ts | 47 +++++++++++++++++++ src/commands/capture-snapshot.ts | 30 ++++++++++++ src/core/interactors/apple.ts | 23 +++++++-- 5 files changed, 98 insertions(+), 8 deletions(-) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift index de2ee0385..671fbb415 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift @@ -76,10 +76,10 @@ extension RunnerTests { let depthLimited = effectiveDepth < requestedDepth let truncated = (response["truncated"] as? Bool) == true || depthLimited var message = - "Recovered iOS snapshot with private AX fallback after \(reason). This backend is simulator-only, experimental, and may expose a partial tree." + "Recovered this snapshot with the fallback accessibility backend after \(reason). This usually means the app publishes an unhealthy accessibility tree (too large or deep to serialize, or containers that hide their children) — fixing the app's accessibility is the real cure. The fallback is simulator-only and may expose a partial tree; treat screenshot as visual truth when this warning appears." if depthLimited { message += - " The AX server rejected deeper requests; this tree is capped at depth \(effectiveDepth) — re-run with --depth \(effectiveDepth) --scope to inspect deeper content." + " The accessibility server rejected deeper requests; this tree is capped at depth \(effectiveDepth) — re-run with --depth \(effectiveDepth) --scope to inspect deeper content." } NSLog( "AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_USED reason=%@ nodes=%ld depth=%ld truncated=%@", diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index 61a1ffffa..3770cc40d 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -12,7 +12,7 @@ extension RunnerTests { private static let rawSnapshotTooLargeHint = "Raw iOS snapshot exceeded the runner payload guard. Use regular snapshot for visible UI, or scope/depth-limit raw snapshot when inspecting a large accessibility tree." private static let publicQueryRecoveryMessage = - "Recovered iOS snapshot through XCTest accessibility element queries after the public snapshot tree was sparse. The recovered nodes are a flattened view of on-screen controls." + "Recovered iOS snapshot through XCTest accessibility element queries after the public snapshot tree was sparse. This usually means the app publishes an unhealthy accessibility tree - fixing the app accessibility is the real cure. The recovered nodes are a flattened view of on-screen controls; treat screenshot as visual truth when this warning appears." private static let structuralOnlyNodeTypes: Set = [ "Application", "Window", diff --git a/src/__tests__/runtime-snapshot.test.ts b/src/__tests__/runtime-snapshot.test.ts index 93df48239..b4c7f5288 100644 --- a/src/__tests__/runtime-snapshot.test.ts +++ b/src/__tests__/runtime-snapshot.test.ts @@ -144,6 +144,53 @@ test('runtime snapshot warns when iOS compact interactive output is root-only', ]); }); +test('runtime snapshot flags a merged accessibility leaf and surfaces backend warnings', async () => { + const mergedLabel = Array.from({ length: 30 }, (_, i) => `Row ${i}, Tap`).join(', '); + const device = createSnapshotOnlyDevice({ + nodes: [ + { ref: 'e1', index: 0, depth: 0, type: 'Application', label: 'App' }, + { ref: 'e2', index: 1, depth: 1, parentIndex: 0, type: 'Other', label: mergedLabel }, + { ref: 'e3', index: 2, depth: 1, parentIndex: 0, type: 'Button', label: 'Ok' }, + ], + truncated: false, + backend: 'xctest', + warnings: [ + 'Recovered this snapshot with the fallback accessibility backend after sparse tree.', + ], + }); + + const result = await device.capture.snapshot({ session: 'default' }); + + assert.equal(result.warnings?.length, 2); + assert.equal( + result.warnings?.[0], + 'Recovered this snapshot with the fallback accessibility backend after sparse tree.', + ); + assert.match(String(result.warnings?.[1]), /e2 \[Other\] merges ~60 labels/); + assert.match(String(result.warnings?.[1]), /marks a container as accessible/); + assert.match(String(result.warnings?.[1]), /screenshot as visual truth/); +}); + +test('runtime snapshot does not flag prose text or labeled containers with children', async () => { + const prose = Array.from({ length: 30 }, (_, i) => `clause ${i}`).join(', '); + const device = createSnapshotOnlyDevice({ + nodes: [ + { ref: 'e1', index: 0, depth: 0, type: 'Application', label: 'App' }, + // Long comma-joined prose on a text node: content, not a collapsed container. + { ref: 'e2', index: 1, depth: 1, parentIndex: 0, type: 'StaticText', label: prose }, + // Same label shape on a container WITH children: not a merged leaf. + { ref: 'e3', index: 2, depth: 1, parentIndex: 0, type: 'Other', label: prose }, + { ref: 'e4', index: 3, depth: 2, parentIndex: 2, type: 'Button', label: 'Ok' }, + ], + truncated: false, + backend: 'xctest', + }); + + const result = await device.capture.snapshot({ session: 'default' }); + + assert.deepEqual(result.warnings ?? [], []); +}); + test('runtime snapshot does not warn for a normal iOS compact interactive output', async () => { const device = createSnapshotOnlyDevice({ nodes: [ diff --git a/src/commands/capture-snapshot.ts b/src/commands/capture-snapshot.ts index 45868a56b..9896a703d 100644 --- a/src/commands/capture-snapshot.ts +++ b/src/commands/capture-snapshot.ts @@ -225,6 +225,8 @@ function buildSnapshotWarnings(params: { const reactNativeOverlayWarning = formatReactNativeOverlayWarning(params.snapshot.nodes); if (reactNativeOverlayWarning) warnings.push(reactNativeOverlayWarning); + warnings.push(...buildMergedAccessibilityLeafWarnings(params.snapshot.nodes)); + const recentDropWarning = formatRecentSnapshotDropWarning(params); if (recentDropWarning) warnings.push(recentDropWarning); @@ -253,6 +255,34 @@ function buildSparseIosInteractiveWarnings(params: { ]; } +const MERGED_LEAF_MIN_SEGMENTS = 10; + +/** + * A leaf whose label joins many short segments is the signature of a container marked as an + * accessibility element: the platform folds every descendant into one merged node, so the + * children exist on screen but cannot be addressed by assistive tech or automation. This is + * an app-side accessibility bug, not a snapshot failure — the same merged element is all + * VoiceOver users get. + */ +function buildMergedAccessibilityLeafWarnings(nodes: SnapshotState['nodes']): string[] { + const parents = new Set( + nodes.map((node) => node.parentIndex).filter((index) => index !== undefined), + ); + return nodes + .filter((node) => { + if (parents.has(node.index)) return false; + const type = node.type?.toLowerCase() ?? ''; + if (type.includes('text')) return false; + const label = node.label ?? ''; + return label.split(', ').length > MERGED_LEAF_MIN_SEGMENTS; + }) + .map((node) => { + const segments = (node.label ?? '').split(', ').length; + const name = node.identifier ? ` (${node.identifier})` : ''; + return `@${node.ref} [${node.type ?? 'element'}]${name} merges ~${segments} labels into a single accessibility element. The app likely marks a container as accessible, which hides every descendant from assistive tech and automation — the children cannot be addressed individually. Fix the app's accessibility (mark the rows, not the container); until then use screenshot as visual truth and coordinate taps.`; + }); +} + function buildEmptyAndroidInteractiveWarnings(params: { result: BackendSnapshotResult; snapshot: SnapshotState; diff --git a/src/core/interactors/apple.ts b/src/core/interactors/apple.ts index 744adcb01..46f56d8b0 100644 --- a/src/core/interactors/apple.ts +++ b/src/core/interactors/apple.ts @@ -18,7 +18,7 @@ import { withDiagnosticTimer } from '../../utils/diagnostics.ts'; import type { DeviceInfo } from '../../utils/device.ts'; import { AppError } from '../../utils/errors.ts'; import type { RawSnapshotNode } from '../../utils/snapshot.ts'; -import type { Interactor, RunnerContext, SnapshotResult } from '../interactor-types.ts'; +import type { Interactor, RunnerContext } from '../interactor-types.ts'; export function createAppleInteractor( device: DeviceInfo, @@ -70,7 +70,12 @@ export function createAppleInteractor( if (nodes.length === 0 && device.kind === 'simulator') { throw new AppError('COMMAND_FAILED', 'XCTest snapshot returned 0 nodes on iOS simulator.'); } - return { nodes, truncated: result.truncated ?? false, backend: 'xctest' }; + return { + nodes, + truncated: result.truncated ?? false, + backend: 'xctest', + ...(result.message ? { warnings: [result.message] } : {}), + }; }, back: async (mode) => { if (device.target === 'tv') { @@ -127,11 +132,19 @@ export function createAppleInteractor( }; } -function readAppleSnapshotResult( - result: Record, -): Pick { +function readAppleSnapshotResult(result: Record): { + nodes?: RawSnapshotNode[]; + truncated?: boolean; + message?: string; +} { return { nodes: Array.isArray(result.nodes) ? (result.nodes as RawSnapshotNode[]) : undefined, truncated: typeof result.truncated === 'boolean' ? result.truncated : undefined, + // Runner-attached context (e.g. "recovered with the fallback accessibility backend") + // surfaces as a snapshot warning so fallbacks are never silent. + message: + typeof result.message === 'string' && result.message.trim().length > 0 + ? result.message + : undefined, }; } From 476979bf2b67798a50c699ba82ecbd0477359d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 11 Jun 2026 14:50:55 +0200 Subject: [PATCH 7/7] fix(ios): detect sparse trees with labeled roots and surface warnings through the daemon Validated against a real-world repro (a production React Native app's login screen, simulator build provided privately by the reporter): a full-screen accessibilityViewIsModal overlay leaves the public snapshot with just Application+Window. Two gaps kept recovery off: - The sparse detector counted the Application label (the app's display name) as content and the full-screen root as hittable, so the app name alone defeated recovery. Application/Window labels and root hittability say nothing about tree health and no longer count. - Interactor-level snapshot warnings were dropped by the daemon capture chain (only the runtime/commands layer kept them); they now thread through CaptureSnapshotResult into BackendSnapshotResult. With both fixes that login screen recovers through the public query tier: 16 nodes with every control addressable (fill @ref + read-back verified), and the output carries the recovery warning. Bluesky-class trees still ladder into the private fallback unchanged. --- .../RunnerTests+Snapshot.swift | 19 +++++++++++++++++-- src/daemon/handlers/snapshot-capture.ts | 5 +++++ src/daemon/snapshot-runtime.ts | 1 + 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index 3770cc40d..d3c2c773b 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -337,11 +337,15 @@ extension RunnerTests { private static func isSparseApplicationWindowTree(_ nodes: [SnapshotNode]) -> Bool { guard !nodes.isEmpty else { return false } return nodes.allSatisfy { node in - let hasContent = node.label?.isEmpty == false + // Application/Window labels are just the app/window name, and full-screen roots + // compute as hittable; neither says anything about tree health, so neither counts + // as content for these types (a labeled app+window pair is still a sparse tree). + let isRootContainer = node.type == "Application" || node.type == "Window" + let hasContent = (!isRootContainer && node.label?.isEmpty == false) || node.identifier?.isEmpty == false || node.value?.isEmpty == false return !hasContent - && !node.hittable + && (isRootContainer || !node.hittable) && Self.structuralOnlyNodeTypes.contains(node.type) } } @@ -706,6 +710,17 @@ extension RunnerTests { XCTAssertFalse(Self.isSparseApplicationWindowTree([root, button])) XCTAssertFalse(Self.isSparseApplicationWindowTree([root, window, button])) XCTAssertFalse(Self.isSparseApplicationWindowTree([])) + // App/window name labels and full-screen-root hittability are not content: a labeled, + // hittable Application root over a bare Window is still a sparse tree (a shape seen on + // production React Native login screens behind full-screen modal overlays). + let labeledHittableRoot = node( + index: 0, type: "Application", label: "Example App", hittable: true) + XCTAssertTrue(Self.isSparseApplicationWindowTree([labeledHittableRoot, window])) + XCTAssertFalse( + Self.isSparseApplicationWindowTree([ + labeledHittableRoot, node(index: 1, type: "Application", identifier: "custom-id"), + ]) + ) } func testSnapshotPayloadNeedsRecoveryOnDeadlineTruncatedNearEmptySweep() { diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index f16a77084..8691c962e 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -63,6 +63,7 @@ type SnapshotData = { backend?: SnapshotBackend; analysis?: AndroidSnapshotAnalysis; androidSnapshot?: AndroidSnapshotBackendMetadata; + warnings?: string[]; }; type SnapshotAttempt = { @@ -76,6 +77,7 @@ type CaptureSnapshotResult = { analysis?: AndroidSnapshotAnalysis; androidSnapshot?: AndroidSnapshotBackendMetadata; freshness?: AndroidFreshnessCaptureMeta; + warnings?: string[]; }; type AndroidFreshnessReason = 'empty-interactive' | 'sharp-drop' | 'stuck-route'; @@ -94,6 +96,7 @@ export async function captureSnapshot( snapshot: buildSnapshotState(data, resolveSnapshotStateFlags(params)), analysis: data.analysis, androidSnapshot: data.androidSnapshot, + warnings: data.warnings, }; } @@ -245,6 +248,7 @@ async function captureAndroidFreshnessAwareSnapshot( analysis: latest.data.analysis, androidSnapshot: latest.data.androidSnapshot, freshness: latest.freshness, + warnings: latest.data.warnings, }; } @@ -298,6 +302,7 @@ async function capturePostGestureAwareSnapshot( analysis: latest.data.analysis, androidSnapshot: latest.data.androidSnapshot, freshness: latest.freshness, + warnings: latest.data.warnings, }; } diff --git a/src/daemon/snapshot-runtime.ts b/src/daemon/snapshot-runtime.ts index 86bd5f977..f652b8376 100644 --- a/src/daemon/snapshot-runtime.ts +++ b/src/daemon/snapshot-runtime.ts @@ -290,6 +290,7 @@ function createDaemonSnapshotBackend(params: { analysis: capture.analysis, androidSnapshot: capture.androidSnapshot, freshness: capture.freshness, + warnings: capture.warnings, appName: session?.appBundleId ? (session.appName ?? session.appBundleId) : undefined, appBundleId: session?.appBundleId, };