Skip to content

Commit 4ee1c04

Browse files
authored
Merge pull request #188 from Predicate-Labs/scroll_verify
scroll verification
2 parents 2dfa07f + b8f5502 commit 4ee1c04

File tree

6 files changed

+278
-3
lines changed

6 files changed

+278
-3
lines changed

CHANGELOG.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,30 @@ All notable changes to `@predicatelabs/sdk` will be documented in this file.
44

55
## Unreleased
66

7+
### 2026-02-13
8+
9+
#### Expanded deterministic verifications (adaptive resnapshotting)
10+
11+
You can now make `.eventually()` verifications more reliable on long / virtualized pages by **automatically increasing the snapshot `limit` across retries** (so later attempts see more elements).
12+
13+
- **AgentRuntime assertions**: `AssertionHandle.eventually({ snapshotLimitGrowth: ... })`
14+
- **Expect-style verifications**: `expect(...).eventually({ snapshotLimitGrowth: ... })`
15+
- **Commit**: `5f011b878c9a1dcb8c5976b365f0f80b7abe135c`
16+
17+
**Example**
18+
19+
```ts
20+
await dbg.check(exists("text~'Checkout'"), 'checkout_visible', true).eventually({
21+
timeoutMs: 12_000,
22+
snapshotLimitGrowth: {
23+
startLimit: 60,
24+
step: 40,
25+
maxLimit: 220,
26+
applyOn: 'only_on_fail', // default; or "all"
27+
},
28+
});
29+
```
30+
731
### Deprecated
832

933
- Soft-deprecated legacy `Sentience*` class names in favor of `Predicate*` names:

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,24 @@ async function loginExample(): Promise<void> {
169169
- Fluent assertion DSL via `expect(...)`
170170
- Retrying verification via `runtime.check(...).eventually(...)`
171171

172+
### Scroll verification (prevent no-op scroll drift)
173+
174+
A common agent failure mode is “scrolling” without the UI actually advancing (overlays, nested scrollers, focus issues). Use `AgentRuntime.scrollBy(...)` to deterministically verify scroll _had effect_ via before/after `scrollTop`.
175+
176+
```ts
177+
runtime.beginStep('Scroll the page and verify it moved');
178+
const ok = await runtime.scrollBy(600, {
179+
verify: true,
180+
minDeltaPx: 50,
181+
label: 'scroll_effective',
182+
required: true,
183+
timeoutMs: 5_000,
184+
});
185+
if (!ok) {
186+
throw new Error('Scroll had no effect (likely blocked by overlay or nested scroller).');
187+
}
188+
```
189+
172190
### Explained failure
173191

174192
- JSONL trace events (`Tracer` + `JsonlTraceSink`)

src/agent-runtime.ts

Lines changed: 136 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,8 @@ export class AgentRuntime {
501501
label: string,
502502
required: boolean,
503503
extra: Record<string, any> | null,
504-
recordInStep: boolean
504+
recordInStep: boolean,
505+
kind: 'assert' | 'task_done' | 'captcha' | 'scroll' = 'assert'
505506
): void {
506507
const details = { ...(outcome.details || {}) } as Record<string, any>;
507508

@@ -541,7 +542,7 @@ export class AgentRuntime {
541542
this.tracer.emit(
542543
'verification',
543544
{
544-
kind: 'assert',
545+
kind,
545546
...record,
546547
},
547548
this.stepId || undefined
@@ -784,6 +785,139 @@ export class AgentRuntime {
784785
}
785786
}
786787

788+
private async getScrollTop(): Promise<number> {
789+
try {
790+
const v = await this.page.evaluate(
791+
"(() => { const el = document.scrollingElement || document.documentElement || document.body; return (el && typeof el.scrollTop === 'number') ? el.scrollTop : (typeof window.scrollY === 'number' ? window.scrollY : 0); })()"
792+
);
793+
const n = Number(v);
794+
return Number.isFinite(n) ? n : 0;
795+
} catch {
796+
return 0;
797+
}
798+
}
799+
800+
/**
801+
* Scroll and (optionally) verify the scroll had effect (delta in scrollTop).
802+
*
803+
* This targets a common drift failure mode: "we scrolled" but the UI didn't advance.
804+
*/
805+
async scrollBy(
806+
deltaY: number,
807+
opts?: {
808+
verify?: boolean;
809+
minDeltaPx?: number;
810+
label?: string;
811+
required?: boolean;
812+
timeoutMs?: number;
813+
pollMs?: number;
814+
x?: number;
815+
y?: number;
816+
jsFallback?: boolean;
817+
}
818+
): Promise<boolean> {
819+
const verify = opts?.verify ?? true;
820+
const minDeltaPx = opts?.minDeltaPx ?? 50;
821+
const label = opts?.label ?? 'scroll_effective';
822+
const required = opts?.required ?? true;
823+
const timeoutMs = opts?.timeoutMs ?? 10_000;
824+
const pollMs = opts?.pollMs ?? 250;
825+
const jsFallback = opts?.jsFallback ?? true;
826+
827+
await this.recordAction(`scrollBy(deltaY=${deltaY})`, this.page?.url?.());
828+
829+
const doWheel = async (): Promise<void> => {
830+
const mouse: any = (this.page as any)?.mouse;
831+
if (mouse && typeof mouse.wheel === 'function') {
832+
// Playwright: mouse.wheel(deltaX, deltaY)
833+
await mouse.wheel(opts?.x ?? 0, deltaY);
834+
return;
835+
}
836+
// Fallback: request scroll via JS (best-effort)
837+
await this.page.evaluate(`window.scrollBy(0, ${Number(deltaY)})`);
838+
};
839+
840+
if (!verify) {
841+
await doWheel();
842+
return true;
843+
}
844+
845+
const beforeTop = await this.getScrollTop();
846+
await doWheel();
847+
848+
const start = Date.now();
849+
let usedJsFallback = false;
850+
851+
while (true) {
852+
const afterTop = await this.getScrollTop();
853+
const delta = afterTop - beforeTop;
854+
const passed = Math.abs(delta) >= minDeltaPx;
855+
856+
if (passed) {
857+
this._recordOutcome(
858+
{
859+
passed: true,
860+
reason: '',
861+
details: {
862+
deltaY,
863+
min_delta_px: minDeltaPx,
864+
before_top: beforeTop,
865+
after_top: afterTop,
866+
delta_px: delta,
867+
js_fallback_used: usedJsFallback,
868+
},
869+
} as any,
870+
label,
871+
required,
872+
null,
873+
true,
874+
'scroll'
875+
);
876+
return true;
877+
}
878+
879+
if (Date.now() - start >= timeoutMs) {
880+
this._recordOutcome(
881+
{
882+
passed: false,
883+
reason: `scroll delta ${delta.toFixed(1)}px < min_delta_px=${minDeltaPx.toFixed(1)}px`,
884+
details: {
885+
deltaY,
886+
min_delta_px: minDeltaPx,
887+
before_top: beforeTop,
888+
after_top: afterTop,
889+
delta_px: delta,
890+
js_fallback_used: usedJsFallback,
891+
timeout_ms: timeoutMs,
892+
},
893+
} as any,
894+
label,
895+
required,
896+
null,
897+
true,
898+
'scroll'
899+
);
900+
if (required) {
901+
this.persistFailureArtifacts(`scroll_failed:${label}`).catch(() => {
902+
// best-effort
903+
});
904+
}
905+
return false;
906+
}
907+
908+
if (jsFallback && !usedJsFallback && Math.abs(delta) < 1.0) {
909+
usedJsFallback = true;
910+
try {
911+
await this.page.evaluate(`window.scrollBy(0, ${Number(deltaY)})`);
912+
} catch {
913+
// ignore
914+
}
915+
}
916+
917+
await new Promise(resolve => setTimeout(resolve, pollMs));
918+
}
919+
}
920+
787921
/**
788922
* List open tabs in the current browser context.
789923
*/

src/tracing/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ export interface TraceEventData {
184184
verify?: VerifyData;
185185

186186
// Verification event fields (for assertion loop)
187-
kind?: 'assert' | 'task_done' | 'captcha';
187+
kind?: 'assert' | 'task_done' | 'captcha' | 'scroll';
188188
label?: string;
189189
passed?: boolean;
190190
required?: boolean;
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { AgentRuntime } from '../src/agent-runtime';
2+
import { TraceSink } from '../src/tracing/sink';
3+
import { Tracer } from '../src/tracing/tracer';
4+
import { MockPage } from './mocks/browser-mock';
5+
6+
class MockSink extends TraceSink {
7+
public events: any[] = [];
8+
emit(event: Record<string, any>): void {
9+
this.events.push(event);
10+
}
11+
async close(): Promise<void> {
12+
// no-op
13+
}
14+
getSinkType(): string {
15+
return 'MockSink';
16+
}
17+
}
18+
19+
describe('AgentRuntime.scrollBy() deterministic verification', () => {
20+
it('passes when scrollTop delta >= minDeltaPx', async () => {
21+
const sink = new MockSink();
22+
const tracer = new Tracer('test-run', sink);
23+
const page = new MockPage('https://example.com') as any;
24+
const browserLike = {
25+
snapshot: async () => ({
26+
status: 'success',
27+
url: 'https://example.com',
28+
elements: [],
29+
timestamp: 't1',
30+
}),
31+
};
32+
33+
const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
34+
runtime.beginStep('scroll');
35+
36+
const ok = await runtime.scrollBy(200, {
37+
verify: true,
38+
minDeltaPx: 50,
39+
timeoutMs: 1000,
40+
pollMs: 1,
41+
});
42+
expect(ok).toBe(true);
43+
44+
const hasScrollVerification = sink.events.some(
45+
e => e.type === 'verification' && e.data?.kind === 'scroll' && e.data?.passed === true
46+
);
47+
expect(hasScrollVerification).toBe(true);
48+
});
49+
50+
it('fails when scrollTop delta stays below minDeltaPx', async () => {
51+
const sink = new MockSink();
52+
const tracer = new Tracer('test-run', sink);
53+
const page = new MockPage('https://example.com') as any;
54+
55+
// Override wheel to be a no-op scroll (simulates blocked scroll).
56+
(page.mouse as any).wheel = async (_dx: number, _dy: number) => {
57+
// no-op
58+
};
59+
60+
const browserLike = {
61+
snapshot: async () => ({
62+
status: 'success',
63+
url: 'https://example.com',
64+
elements: [],
65+
timestamp: 't1',
66+
}),
67+
};
68+
69+
const runtime = new AgentRuntime(browserLike as any, page as any, tracer);
70+
runtime.beginStep('scroll');
71+
72+
const ok = await runtime.scrollBy(200, {
73+
verify: true,
74+
minDeltaPx: 50,
75+
timeoutMs: 30,
76+
pollMs: 1,
77+
jsFallback: false,
78+
});
79+
expect(ok).toBe(false);
80+
81+
const hasFailedScrollVerification = sink.events.some(
82+
e => e.type === 'verification' && e.data?.kind === 'scroll' && e.data?.passed === false
83+
);
84+
expect(hasFailedScrollVerification).toBe(true);
85+
});
86+
});

tests/mocks/browser-mock.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ import { Page } from 'playwright';
1414
*/
1515
export class MockPage implements IPage {
1616
private _url: string = 'https://example.com';
17+
private _scrollTop: number = 0;
1718
public evaluateCalls: Array<{ script: string | Function; args: any[] }> = [];
1819
public gotoCalls: Array<{ url: string; options?: any }> = [];
1920
public waitForFunctionCalls: Array<{ fn: () => boolean | Promise<boolean>; options?: any }> = [];
2021
public waitForTimeoutCalls: number[] = [];
2122
public mouseClickCalls: Array<{ x: number; y: number }> = [];
23+
public mouseWheelCalls: Array<{ dx: number; dy: number }> = [];
2224
public keyboardTypeCalls: string[] = [];
2325
public keyboardPressCalls: string[] = [];
2426
public screenshotCalls: Array<{ options?: any }> = [];
@@ -51,6 +53,13 @@ export class MockPage implements IPage {
5153
} as T;
5254
}
5355

56+
if (
57+
typeof script === 'string' &&
58+
(script.includes('scrollTop') || script.includes('scrollY'))
59+
) {
60+
return this._scrollTop as any as T;
61+
}
62+
5463
return {} as T;
5564
}
5665

@@ -79,6 +88,10 @@ export class MockPage implements IPage {
7988
click: async (x: number, y: number): Promise<void> => {
8089
this.mouseClickCalls.push({ x, y });
8190
},
91+
wheel: async (dx: number, dy: number): Promise<void> => {
92+
this.mouseWheelCalls.push({ dx, dy });
93+
this._scrollTop += dy;
94+
},
8295
};
8396

8497
keyboard = {

0 commit comments

Comments
 (0)