Skip to content

Commit 490bea5

Browse files
k4cper-gclaude
andcommitted
Add pagination (page) feature for clipped tree content
Add Session.page() to serve slices of cached raw tree for offscreen items without UI scrolling. Includes page MCP tool, serializePage formatter, findNodeById utility, updated clipping hints to reference page(), streamlined ALL_ROLES set, and pagination tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6c46ba2 commit 490bea5

6 files changed

Lines changed: 427 additions & 16 deletions

File tree

docs/api-reference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ Each node in the tree:
246246
}
247247
```
248248
249-
**Roles:** 54 ARIA-derived roles. See [schema/mappings.json](../schema/mappings.json) for the full list and per-platform mappings.
249+
**Roles:** 59 ARIA-derived roles. See [schema/mappings.json](../schema/mappings.json) for the full list and per-platform mappings.
250250
251251
**States:** `busy`, `checked`, `collapsed`, `disabled`, `editable`, `expanded`, `focused`, `hidden`, `mixed`, `modal`, `multiselectable`, `offscreen`, `pressed`, `readonly`, `required`, `selected`
252252

src/format.ts

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ function pruneNode(
255255
const childBounds = child.bounds;
256256
if (childViewport && childBounds && isOutsideViewport(childBounds, childViewport)) {
257257
const dir = clipDirection(childBounds, childViewport);
258-
clipped[dir as keyof typeof clipped] += countNodes([child]);
258+
clipped[dir as keyof typeof clipped] += 1;
259259
hasClipped = true;
260260
continue;
261261
}
@@ -306,6 +306,22 @@ export function pruneTree(
306306
return result;
307307
}
308308

309+
// ---------------------------------------------------------------------------
310+
// Tree utilities
311+
// ---------------------------------------------------------------------------
312+
313+
/**
314+
* Find a node by its element ID in a tree (recursive DFS).
315+
*/
316+
export function findNodeById(tree: CupNode[], id: string): CupNode | null {
317+
for (const node of tree) {
318+
if (node.id === id) return node;
319+
const found = findNodeById(node.children ?? [], id);
320+
if (found) return found;
321+
}
322+
return null;
323+
}
324+
309325
// ---------------------------------------------------------------------------
310326
// Compact text serializer
311327
// ---------------------------------------------------------------------------
@@ -500,7 +516,8 @@ function emitCompact(node: CupNode, depth: number, lines: string[], counter: num
500516
if (left > 0) directions.push("left");
501517
if (right_ > 0) directions.push("right");
502518
const hintIndent = " ".repeat(depth + 1);
503-
lines.push(`${hintIndent}# ${total} more items — scroll ${directions.join("/")} to see`);
519+
const dir = directions[0] ?? "down";
520+
lines.push(`${hintIndent}# ${total} more items — page(element_id='${node.id}', direction='${dir}') to see`);
504521
}
505522
}
506523
}
@@ -573,3 +590,46 @@ export function serializeCompact(
573590

574591
return output;
575592
}
593+
594+
/**
595+
* Serialize a page of children from a scrollable container as compact text.
596+
*
597+
* Used by Session.page() to return a slice of clipped children without
598+
* re-capturing the full tree.
599+
*/
600+
export function serializePage(
601+
container: CupNode,
602+
pageItems: CupNode[],
603+
offset: number,
604+
total: number,
605+
): string {
606+
const roleName = ROLE_CODES[container.role] ?? container.role;
607+
const nameStr = container.name ? ` "${container.name}"` : "";
608+
const end = Math.min(offset + pageItems.length, total);
609+
const headerLines = [
610+
`# page ${container.id} | items ${offset + 1}-${end} of ${total} | ${roleName}${nameStr}`,
611+
"",
612+
];
613+
614+
const lines: string[] = [];
615+
const counter = [0];
616+
for (const item of pageItems) {
617+
emitCompact(item, 0, lines, counter);
618+
}
619+
620+
const footerLines: string[] = [];
621+
const remaining = total - end;
622+
if (remaining > 0) {
623+
footerLines.push("");
624+
footerLines.push(
625+
`# ${remaining} more — page(element_id='${container.id}', direction='down')`,
626+
);
627+
}
628+
if (offset > 0) {
629+
footerLines.push(
630+
`# ${offset} before — page(element_id='${container.id}', direction='up')`,
631+
);
632+
}
633+
634+
return [...headerLines, ...lines, ...footerLines].join("\n") + "\n";
635+
}

src/index.ts

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import {
2525
pruneTree,
2626
serializeCompact,
2727
serializeOverview,
28+
serializePage,
29+
findNodeById,
2830
formatLine,
2931
} from "./format.js";
3032
import { searchTree } from "./search.js";
@@ -49,6 +51,7 @@ export class Session {
4951
private executor: ActionExecutor;
5052
private lastTree: CupNode[] | null = null;
5153
private lastRawTree: CupNode[] | null = null;
54+
private _pageCursors: Map<string, number> = new Map();
5255

5356
private constructor(adapter: PlatformAdapter) {
5457
this.adapter = adapter;
@@ -174,6 +177,7 @@ export class Session {
174177
// Store raw tree for search + pruned tree for compact
175178
this.lastRawTree = envelope.tree;
176179
this.lastTree = pruneTree(envelope.tree, { detail });
180+
this._pageCursors.clear();
177181

178182
if (compact) {
179183
return serializeCompact(envelope, { windowList, detail });
@@ -189,20 +193,23 @@ export class Session {
189193
actionName: string,
190194
params?: Record<string, unknown>,
191195
): Promise<ActionResult> {
196+
this._pageCursors.clear();
192197
return this.executor.action(elementId, actionName, params);
193198
}
194199

195200
/**
196201
* Send a keyboard shortcut to the focused window.
197202
*/
198203
async press(combo: string): Promise<ActionResult> {
204+
this._pageCursors.clear();
199205
return this.executor.press(combo);
200206
}
201207

202208
/**
203209
* Open an application by name (fuzzy matched).
204210
*/
205211
async openApp(name: string): Promise<ActionResult> {
212+
this._pageCursors.clear();
206213
return this.executor.openApp(name);
207214
}
208215

@@ -231,6 +238,107 @@ export class Session {
231238
return results.map((r) => r.node);
232239
}
233240

241+
/**
242+
* Page through clipped content in a scrollable container.
243+
*
244+
* Serves slices of the cached raw tree — no UI scrolling needed.
245+
* Provides deterministic, contiguous pagination of offscreen items.
246+
*/
247+
page(
248+
elementId: string,
249+
options?: {
250+
direction?: "up" | "down" | "left" | "right";
251+
offset?: number;
252+
limit?: number;
253+
},
254+
): string {
255+
if (this.lastRawTree === null || this.lastTree === null) {
256+
throw new Error("No tree captured. Call snapshot() first.");
257+
}
258+
259+
const rawContainer = findNodeById(this.lastRawTree, elementId);
260+
if (!rawContainer) {
261+
throw new Error(`Element '${elementId}' not found in current tree.`);
262+
}
263+
264+
const rawChildren = rawContainer.children ?? [];
265+
if (rawChildren.length === 0) {
266+
throw new Error(`Container '${elementId}' has no children to paginate.`);
267+
}
268+
269+
// Get pruned container for visible count and _clipped metadata
270+
const prunedContainer = findNodeById(this.lastTree, elementId);
271+
const visibleCount = prunedContainer?.children?.length ?? 0;
272+
const clipped = prunedContainer?._clipped;
273+
const clippedAbove = clipped?.above ?? 0;
274+
const clippedBelow = clipped?.below ?? 0;
275+
const clippedLeft = clipped?.left ?? 0;
276+
const clippedRight = clipped?.right ?? 0;
277+
const clippedCount = clippedAbove + clippedBelow + clippedLeft + clippedRight;
278+
279+
// Virtual scroll detection: if raw tree has far fewer children than
280+
// visible + clipped, the content is likely lazy-loaded
281+
if (clippedCount > 0) {
282+
const expectedTotal = visibleCount + clippedCount;
283+
if (rawChildren.length < expectedTotal * 0.8) {
284+
throw new Error(
285+
`Container '${elementId}' appears to use virtual scrolling ` +
286+
`(raw: ${rawChildren.length}, expected: ~${expectedTotal}). ` +
287+
`Use action(action='scroll', element_id='${elementId}', direction='...') ` +
288+
`followed by snapshot() instead.`,
289+
);
290+
}
291+
}
292+
293+
const direction = options?.direction;
294+
const total = rawChildren.length;
295+
const defaultPageSize = visibleCount > 0 ? visibleCount : 20;
296+
const pageSize = options?.limit ?? defaultPageSize;
297+
298+
// Compute directional start offsets from clipping metadata.
299+
// Clipped-above items are at the start of the children array (low indices),
300+
// clipped-below items are at the end (high indices), because children are
301+
// in document/spatial order.
302+
const startDown = total - clippedBelow; // first below-clipped child
303+
const startUp = clippedAbove - 1; // last above-clipped child (page backwards from here)
304+
const startRight = total - clippedRight;
305+
const startLeft = clippedLeft - 1;
306+
307+
// Determine offset
308+
let currentOffset: number;
309+
if (options?.offset != null) {
310+
currentOffset = options.offset;
311+
} else if (direction) {
312+
const cursor = this._pageCursors.get(elementId);
313+
if (cursor == null) {
314+
// First page call — start at the boundary of clipped content
315+
if (direction === "down") currentOffset = startDown;
316+
else if (direction === "right") currentOffset = startRight;
317+
else if (direction === "up") currentOffset = Math.max(0, startUp - pageSize + 1);
318+
else /* left */ currentOffset = Math.max(0, startLeft - pageSize + 1);
319+
} else {
320+
currentOffset =
321+
direction === "down" || direction === "right"
322+
? cursor + pageSize
323+
: Math.max(0, cursor - pageSize);
324+
}
325+
} else {
326+
// No direction or offset — show first page of hidden content
327+
currentOffset = startDown;
328+
}
329+
330+
// Clamp
331+
currentOffset = Math.max(0, Math.min(currentOffset, total - 1));
332+
333+
// Slice
334+
const pageItems = rawChildren.slice(currentOffset, currentOffset + pageSize);
335+
336+
// Track cursor
337+
this._pageCursors.set(elementId, currentOffset);
338+
339+
return serializePage(rawContainer, pageItems, currentOffset, total);
340+
}
341+
234342
/**
235343
* Execute a sequence of actions, stopping on first failure.
236344
*/

src/mcp/server.ts

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,17 @@ export const server = new McpServer(
2020
"tree of the user's computer.\n\n" +
2121
"WORKFLOW — follow this pattern:\n" +
2222
"1. snapshot to capture the active window's UI\n" +
23-
"2. find to locate specific elements (PREFERRED over re-capturing)\n" +
24-
"3. action to interact (click, type, press, etc.)\n" +
25-
"4. Re-capture ONLY after actions change the UI\n\n" +
23+
"2. If you see 'N more items — page(...)', call page to see hidden items\n" +
24+
"3. find to locate specific elements (PREFERRED over re-capturing)\n" +
25+
"4. action to interact (click, type, press, etc.)\n" +
26+
"5. Re-capture ONLY after actions change the UI\n\n" +
2627
"TOOLS:\n" +
2728
"- snapshot() — active window tree + window list (most common)\n" +
2829
"- snapshot_app(app) — specific app by title (when not in foreground)\n" +
2930
"- overview() — just the window list, near-instant\n" +
3031
"- snapshot_desktop() — desktop icons and widgets\n" +
3132
"- find(role/name/state) — search last tree without re-capturing\n" +
33+
"- page(element_id, direction) — page through clipped items in a scrollable container\n" +
3234
"- action(action, ...) — interact with elements or press keys\n" +
3335
"- open_app(name) — open an app by name with fuzzy matching\n" +
3436
"- screenshot(region) — visual context when tree isn't enough\n\n" +
@@ -322,6 +324,52 @@ Both modes can be combined: query + state="focused" narrows to focused elements.
322324
},
323325
);
324326

327+
// ---------------------------------------------------------------------------
328+
// Pagination tool
329+
// ---------------------------------------------------------------------------
330+
331+
server.tool(
332+
"page",
333+
`Page through clipped content in a scrollable container.
334+
335+
When a snapshot shows "N more items — page(...) to see", use this
336+
tool to retrieve the next batch of hidden children from the cached tree.
337+
338+
This does NOT scroll the actual UI — it serves from the cached tree.
339+
For guaranteed contiguity, call with just element_id and direction.
340+
341+
After any action or new snapshot, pagination resets.`,
342+
{
343+
element_id: z.string().describe("Scrollable container element ID (e.g., 'e5')"),
344+
direction: z
345+
.enum(["up", "down", "left", "right"])
346+
.optional()
347+
.describe("Page direction — advance or retreat one page"),
348+
offset: z.number().int().optional().describe("Jump to a specific child index (overrides direction)"),
349+
limit: z.number().int().optional().describe("Override page size (default: match viewport count)"),
350+
},
351+
async ({ element_id, direction, offset, limit }) => {
352+
const session = await getSession();
353+
try {
354+
const result = session.page(element_id, { direction, offset, limit });
355+
return { content: [{ type: "text", text: result }] };
356+
} catch (err: any) {
357+
return {
358+
content: [
359+
{
360+
type: "text",
361+
text: JSON.stringify({
362+
success: false,
363+
message: "",
364+
error: err.message ?? String(err),
365+
}),
366+
},
367+
],
368+
};
369+
}
370+
},
371+
);
372+
325373
// ---------------------------------------------------------------------------
326374
// Screenshot
327375
// ---------------------------------------------------------------------------

src/search.ts

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,14 @@ import type { CupNode, SearchResult } from "./types.js";
1616
// ---------------------------------------------------------------------------
1717

1818
export const ALL_ROLES: ReadonlySet<string> = new Set([
19-
"alert", "alertdialog", "application", "banner", "blockquote", "button",
20-
"caption", "cell", "checkbox", "code", "columnheader", "combobox",
21-
"complementary", "contentinfo", "deletion", "dialog", "document",
22-
"emphasis", "figure", "form", "generic", "grid", "group", "heading",
23-
"img", "insertion", "link", "list", "listitem", "log", "main", "marquee",
24-
"math", "menu", "menubar", "menuitem", "menuitemcheckbox", "menuitemradio",
25-
"navigation", "none", "note", "option", "paragraph", "progressbar",
26-
"radio", "region", "row", "rowheader", "scrollbar", "search", "searchbox",
27-
"separator", "slider", "spinbutton", "status", "strong", "subscript",
28-
"superscript", "switch", "tab", "table", "tablist", "tabpanel", "text",
19+
"alert", "alertdialog", "application", "banner", "button", "cell",
20+
"checkbox", "columnheader", "combobox", "complementary", "contentinfo",
21+
"dialog", "document", "form", "generic", "grid", "group", "heading",
22+
"img", "link", "list", "listitem", "log", "main", "marquee", "menu",
23+
"menubar", "menuitem", "menuitemcheckbox", "menuitemradio", "navigation",
24+
"none", "option", "progressbar", "radio", "region", "row", "rowheader",
25+
"scrollbar", "search", "searchbox", "separator", "slider", "spinbutton",
26+
"status", "switch", "tab", "table", "tablist", "tabpanel", "text",
2927
"textbox", "timer", "titlebar", "toolbar", "tooltip", "tree", "treeitem",
3028
"window",
3129
]);

0 commit comments

Comments
 (0)