Skip to content

Commit 73a08cd

Browse files
Fix scroll and scrollUntilVisible on Android
Use ADB input swipe for reliable scrolling instead of Appium gestures, which are unreliable on many Android devices/emulators. Log a warning when falling back to the Appium scroll path due to missing ADB. Also distinguish "element not found" errors from infrastructure failures in scrollUntilVisible so connection errors are propagated immediately instead of being silently swallowed until all scrolls are exhausted. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent bb1ca90 commit 73a08cd

4 files changed

Lines changed: 273 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Fixed
1111
- `runFlow: when` conditions with variable expressions (e.g., `${output.element.id}`) were never expanded, causing conditions to always evaluate as false and silently skip conditional blocks
1212
- iOS real device: `acceptAlertButtonSelector` matched "Don't Allow" instead of "Allow" — `CONTAINS[c] 'Allow'` matched both buttons, causing WDA to reject permission dialogs. Changed to `BEGINSWITH[c] 'Allow'` with `OK` fallback for older iOS versions
13+
- Android: `scroll` and `scrollUntilVisible` did not scroll — Appium `/appium/gestures/scroll` endpoint is unreliable on many devices. Replaced with ADB `input swipe` for direct OS-level input injection (falls back to Appium if ADB is unavailable). Also added on-screen bounds verification to prevent false positives from off-screen elements in the Android view hierarchy
1314

1415
## [1.0.7] - 2026-02-20
1516

pkg/driver/uiautomator2/commands.go

Lines changed: 91 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package uiautomator2
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"strconv"
78
"strings"
@@ -395,23 +396,37 @@ func (d *Driver) scroll(step *flow.ScrollStep) *core.CommandResult {
395396
direction = "down"
396397
}
397398

398-
// Get screen size for dynamic scroll area
399+
// Get screen size for scroll coordinates
399400
width, height, err := d.screenSize()
400401
if err != nil {
401402
return errorResult(err, "Failed to get screen size")
402403
}
403404

404-
// Use most of screen for scroll area (leave margins)
405-
area := uiautomator2.NewRect(0, height/8, width, height*3/4)
406-
407-
// /appium/gestures/scroll already uses scroll semantics — no inversion needed
408-
if err := d.client.ScrollInArea(area, direction, 0.5, 0); err != nil {
405+
// Use ADB input swipe for reliable scrolling
406+
if err := d.scrollBySwipe(direction, width, height); err != nil {
409407
return errorResult(err, fmt.Sprintf("Failed to scroll: %v", err))
410408
}
411409

412410
return successResult(fmt.Sprintf("Scrolled %s", direction), nil)
413411
}
414412

413+
// isElementNotFoundError returns true if the error indicates the element was simply
414+
// not found (expected during scrolling). Returns false for infrastructure errors
415+
// (connection refused, request failures, etc.) which should be propagated immediately.
416+
func isElementNotFoundError(err error) bool {
417+
if errors.Is(err, context.DeadlineExceeded) {
418+
return true
419+
}
420+
msg := strings.ToLower(err.Error())
421+
notFoundPhrases := []string{"not found", "no elements match", "no such element", "could not be located", "context deadline exceeded"}
422+
for _, phrase := range notFoundPhrases {
423+
if strings.Contains(msg, phrase) {
424+
return true
425+
}
426+
}
427+
return false
428+
}
429+
415430
func (d *Driver) scrollUntilVisible(step *flow.ScrollUntilVisibleStep) *core.CommandResult {
416431
direction := strings.ToLower(step.Direction)
417432
if direction == "" {
@@ -428,26 +443,30 @@ func (d *Driver) scrollUntilVisible(step *flow.ScrollUntilVisibleStep) *core.Com
428443
}
429444
deadline := time.Now().Add(timeout)
430445

431-
// Get screen size for dynamic scroll area
446+
// Get screen size for scroll coordinates
432447
width, height, err := d.screenSize()
433448
if err != nil {
434449
return errorResult(err, "Failed to get screen size")
435450
}
436451

437-
// Use most of screen for scroll area (leave margins)
438-
area := uiautomator2.NewRect(0, height/8, width, height*3/4)
439-
440452
for i := 0; i < maxScrolls && time.Now().Before(deadline); i++ {
441453
// Try to find element (short timeout - includes page source fallback)
442454
_, info, err := d.findElement(step.Element, true, 1000)
443455
if err == nil && info != nil {
444-
// Element found - return success
445-
return successResult(fmt.Sprintf("Element found after %d scrolls", i), info)
456+
// On Android, UIAutomator can find elements that exist in the view hierarchy
457+
// but are off-screen (e.g., in ScrollView). Verify the element is actually
458+
// visible on screen by checking its bounds overlap with the viewport.
459+
if isElementOnScreen(info, width, height) {
460+
return successResult(fmt.Sprintf("Element found after %d scrolls", i), info)
461+
}
462+
// Element exists in hierarchy but is off-screen - continue scrolling
463+
} else if err != nil && info == nil && !isElementNotFoundError(err) {
464+
return errorResult(err, "Failed to find element")
446465
}
447466

448-
// /appium/gestures/scroll already uses scroll semantics — no inversion needed
449-
if err := d.client.ScrollInArea(area, direction, 0.3, 0); err != nil {
450-
return errorResult(err, fmt.Sprintf("Failed to scroll: %v", err))
467+
// Use ADB input swipe for reliable scrolling (Appium gestures/scroll is unreliable)
468+
if err := d.scrollBySwipe(direction, width, height); err != nil {
469+
return errorResult(err, "Failed to scroll")
451470
}
452471

453472
time.Sleep(300 * time.Millisecond)
@@ -456,6 +475,63 @@ func (d *Driver) scrollUntilVisible(step *flow.ScrollUntilVisibleStep) *core.Com
456475
return errorResult(fmt.Errorf("element not found"), fmt.Sprintf("Element not found after %d scrolls", maxScrolls))
457476
}
458477

478+
// scrollBySwipe performs a scroll gesture using ADB input swipe for reliability.
479+
// Falls back to Appium gestures/scroll if ADB is not available.
480+
func (d *Driver) scrollBySwipe(direction string, screenWidth, screenHeight int) error {
481+
centerX := screenWidth / 2
482+
startY := screenHeight * 3 / 5
483+
endY := screenHeight * 2 / 5
484+
durationMs := 300
485+
486+
// Calculate swipe coordinates based on direction
487+
// Swipe direction is opposite of scroll direction:
488+
// scroll DOWN (see content below) = swipe finger UP
489+
// scroll UP (see content above) = swipe finger DOWN
490+
var fromX, fromY, toX, toY int
491+
switch direction {
492+
case "up":
493+
fromX, fromY = centerX, endY
494+
toX, toY = centerX, startY
495+
case "down":
496+
fromX, fromY = centerX, startY
497+
toX, toY = centerX, endY
498+
case "left":
499+
centerY := screenHeight / 2
500+
fromX, fromY = screenWidth*2/5, centerY
501+
toX, toY = screenWidth*3/5, centerY
502+
case "right":
503+
centerY := screenHeight / 2
504+
fromX, fromY = screenWidth*3/5, centerY
505+
toX, toY = screenWidth*2/5, centerY
506+
default:
507+
fromX, fromY = centerX, startY
508+
toX, toY = centerX, endY
509+
}
510+
511+
// Prefer ADB shell for reliable input injection
512+
if d.device != nil {
513+
cmd := fmt.Sprintf("input swipe %d %d %d %d %d", fromX, fromY, toX, toY, durationMs)
514+
_, err := d.device.Shell(cmd)
515+
return err
516+
}
517+
518+
// Fallback to Appium gestures if no ADB access — this path is unreliable
519+
// on many Android devices/emulators, so log a warning to aid debugging.
520+
logger.Warn("ADB not available, falling back to Appium scroll (may be unreliable)")
521+
area := uiautomator2.NewRect(0, screenHeight/8, screenWidth, screenHeight*3/4)
522+
return d.client.ScrollInArea(area, direction, 0.5, 0)
523+
}
524+
525+
// isElementOnScreen checks if an element's bounds overlap with the visible screen area.
526+
// Returns false if bounds have no area (zero width or height) or are entirely off-screen.
527+
func isElementOnScreen(info *core.ElementInfo, screenWidth, screenHeight int) bool {
528+
b := info.Bounds
529+
if b.Width == 0 || b.Height == 0 {
530+
return false
531+
}
532+
return b.X+b.Width > 0 && b.X < screenWidth && b.Y+b.Height > 0 && b.Y < screenHeight
533+
}
534+
459535
func (d *Driver) swipe(step *flow.SwipeStep) *core.CommandResult {
460536
// Check if coordinate-based swipe (percentage or absolute)
461537
if step.Start != "" && step.End != "" {

pkg/driver/uiautomator2/commands_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package uiautomator2
22

33
import (
4+
"context"
45
"errors"
56
"fmt"
67
"net/http"
@@ -4175,6 +4176,34 @@ func TestScrollUntilVisibleDefaultMaxScrolls(t *testing.T) {
41754176
// Verify MockUIA2Client satisfies UIA2Client at compile time.
41764177
var _ UIA2Client = (*MockUIA2Client)(nil)
41774178

4179+
func TestIsElementNotFoundError(t *testing.T) {
4180+
tests := []struct {
4181+
name string
4182+
err error
4183+
expected bool
4184+
}{
4185+
{"context deadline exceeded", context.DeadlineExceeded, true},
4186+
{"wrapped deadline exceeded", fmt.Errorf("element 'x' not found: %w", context.DeadlineExceeded), true},
4187+
{"element not found", fmt.Errorf("element not found"), true},
4188+
{"no elements match", fmt.Errorf("no elements match selector"), true},
4189+
{"no such element", fmt.Errorf("no such element: An element could not be located"), true},
4190+
{"could not be located", fmt.Errorf("An element could not be located on the page"), true},
4191+
{"appium deadline with no such element", fmt.Errorf("context deadline exceeded: no such element: An element could not be located on the page using the given search parameters"), true},
4192+
{"connection refused", fmt.Errorf("connection refused"), false},
4193+
{"send request failed", fmt.Errorf("send request failed"), false},
4194+
{"EOF", fmt.Errorf("unexpected EOF"), false},
4195+
}
4196+
4197+
for _, tt := range tests {
4198+
t.Run(tt.name, func(t *testing.T) {
4199+
got := isElementNotFoundError(tt.err)
4200+
if got != tt.expected {
4201+
t.Errorf("isElementNotFoundError(%q) = %v, want %v", tt.err, got, tt.expected)
4202+
}
4203+
})
4204+
}
4205+
}
4206+
41784207
// Verify uiautomator2.DeviceInfo is used correctly.
41794208
var _ = &uiautomator2.DeviceInfo{}
41804209

pkg/driver/uiautomator2/driver_test.go

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1977,6 +1977,158 @@ func TestScrollUntilVisibleScrollError(t *testing.T) {
19771977
}
19781978
}
19791979

1980+
func TestScrollUntilVisibleOffScreenElement(t *testing.T) {
1981+
// Element is found by UiAutomator but is off-screen (y=3000 on 2400px screen).
1982+
// scrollUntilVisible should scroll via ADB until the element moves on-screen.
1983+
findCount := 0
1984+
server := setupMockServer(t, map[string]func(w http.ResponseWriter, r *http.Request){
1985+
"POST /element": func(w http.ResponseWriter, r *http.Request) {
1986+
writeJSON(w, map[string]interface{}{
1987+
"value": map[string]string{"ELEMENT": "elem-offscreen"},
1988+
})
1989+
},
1990+
"GET /element/elem-offscreen/text": func(w http.ResponseWriter, r *http.Request) {
1991+
writeJSON(w, map[string]interface{}{"value": "Target"})
1992+
},
1993+
"GET /element/elem-offscreen/rect": func(w http.ResponseWriter, r *http.Request) {
1994+
findCount++
1995+
// First 2 finds: element is off-screen (below viewport)
1996+
// Third find: element has scrolled into view
1997+
y := 3000
1998+
if findCount >= 3 {
1999+
y = 500
2000+
}
2001+
writeJSON(w, map[string]interface{}{
2002+
"value": map[string]int{"x": 100, "y": y, "width": 200, "height": 50},
2003+
})
2004+
},
2005+
"GET /source": func(w http.ResponseWriter, r *http.Request) {
2006+
writeJSON(w, map[string]interface{}{
2007+
"value": `<hierarchy><node text="Other" bounds="[0,0][100,100]"/></hierarchy>`,
2008+
})
2009+
},
2010+
})
2011+
defer server.Close()
2012+
2013+
shell := &MockShellExecutor{}
2014+
client := newMockHTTPClient(server.URL)
2015+
info := &core.PlatformInfo{ScreenWidth: 1080, ScreenHeight: 2400}
2016+
driver := New(client.Client, info, shell)
2017+
2018+
step := &flow.ScrollUntilVisibleStep{
2019+
Element: flow.Selector{ID: "target-button"},
2020+
Direction: "UP",
2021+
}
2022+
result := driver.Execute(step)
2023+
2024+
if !result.Success {
2025+
t.Errorf("expected success, got error: %v", result.Error)
2026+
}
2027+
// Should have issued at least 2 ADB swipe commands before finding on-screen
2028+
swipeCount := 0
2029+
for _, cmd := range shell.commands {
2030+
if strings.HasPrefix(cmd, "input swipe") {
2031+
swipeCount++
2032+
// Direction UP = finger moves down = fromY < toY
2033+
var fromX, fromY, toX, toY, dur int
2034+
fmt.Sscanf(cmd, "input swipe %d %d %d %d %d", &fromX, &fromY, &toX, &toY, &dur)
2035+
if fromY >= toY {
2036+
t.Errorf("scroll UP should produce finger-down swipe (fromY < toY), got fromY=%d toY=%d", fromY, toY)
2037+
}
2038+
}
2039+
}
2040+
if swipeCount < 2 {
2041+
t.Errorf("expected at least 2 ADB swipes for off-screen element, got %d", swipeCount)
2042+
}
2043+
}
2044+
2045+
func TestScrollUntilVisibleUsesADBSwipe(t *testing.T) {
2046+
server := setupMockServer(t, map[string]func(w http.ResponseWriter, r *http.Request){
2047+
"POST /element": func(w http.ResponseWriter, r *http.Request) {
2048+
writeJSON(w, map[string]interface{}{
2049+
"value": map[string]string{"ELEMENT": ""},
2050+
})
2051+
},
2052+
"GET /source": func(w http.ResponseWriter, r *http.Request) {
2053+
writeJSON(w, map[string]interface{}{
2054+
"value": `<hierarchy><node text="Other" bounds="[0,0][100,100]"/></hierarchy>`,
2055+
})
2056+
},
2057+
})
2058+
defer server.Close()
2059+
2060+
shell := &MockShellExecutor{}
2061+
client := newMockHTTPClient(server.URL)
2062+
info := &core.PlatformInfo{ScreenWidth: 1080, ScreenHeight: 2400}
2063+
driver := New(client.Client, info, shell)
2064+
2065+
step := &flow.ScrollUntilVisibleStep{
2066+
Element: flow.Selector{Text: "Target"},
2067+
Direction: "down",
2068+
MaxScrolls: 1,
2069+
}
2070+
driver.Execute(step)
2071+
2072+
if len(shell.commands) == 0 {
2073+
t.Fatal("expected ADB shell commands for scroll")
2074+
}
2075+
cmd := shell.commands[0]
2076+
if !strings.HasPrefix(cmd, "input swipe") {
2077+
t.Errorf("expected 'input swipe' command, got: %s", cmd)
2078+
}
2079+
// Direction DOWN = finger moves up = fromY > toY
2080+
var fromX, fromY, toX, toY, dur int
2081+
fmt.Sscanf(cmd, "input swipe %d %d %d %d %d", &fromX, &fromY, &toX, &toY, &dur)
2082+
if fromY <= toY {
2083+
t.Errorf("scroll DOWN should produce finger-up swipe (fromY > toY), got fromY=%d toY=%d", fromY, toY)
2084+
}
2085+
}
2086+
2087+
func TestScrollUntilVisibleConnectionError(t *testing.T) {
2088+
// Create a server that we'll shut down mid-scroll to simulate connection failure
2089+
callCount := 0
2090+
server := setupMockServer(t, map[string]func(w http.ResponseWriter, r *http.Request){
2091+
"POST /element": func(w http.ResponseWriter, r *http.Request) {
2092+
callCount++
2093+
writeJSON(w, map[string]interface{}{
2094+
"value": map[string]string{"ELEMENT": ""},
2095+
})
2096+
},
2097+
"GET /source": func(w http.ResponseWriter, r *http.Request) {
2098+
writeJSON(w, map[string]interface{}{
2099+
"value": `<hierarchy><node text="Other" bounds="[0,0][100,100]"/></hierarchy>`,
2100+
})
2101+
},
2102+
"GET /appium/device/info": func(w http.ResponseWriter, r *http.Request) {
2103+
writeJSON(w, map[string]interface{}{
2104+
"value": map[string]interface{}{"realDisplaySize": "1080x2400"},
2105+
})
2106+
},
2107+
})
2108+
2109+
client := newMockHTTPClient(server.URL)
2110+
info := &core.PlatformInfo{ScreenWidth: 1080, ScreenHeight: 2400}
2111+
driver := New(client.Client, info, nil)
2112+
2113+
// Shut down the server to simulate connection failure
2114+
server.Close()
2115+
2116+
step := &flow.ScrollUntilVisibleStep{
2117+
Element: flow.Selector{Text: "Target"},
2118+
Direction: "down",
2119+
MaxScrolls: 10,
2120+
}
2121+
result := driver.Execute(step)
2122+
2123+
if result.Success {
2124+
t.Fatal("expected failure on connection error")
2125+
}
2126+
// Should get a connection error, not "element not found after N scrolls"
2127+
if strings.Contains(result.Error.Error(), "not found after") {
2128+
t.Errorf("expected connection error to be propagated, got: %s", result.Error.Error())
2129+
}
2130+
}
2131+
19802132
// ============================================================================
19812133
// Relative Selector Tests (uses HTTP mock for anchor + page source for target)
19822134
// ============================================================================

0 commit comments

Comments
 (0)