Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions packages/cli/src/commands/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ interface RunOptions {
model: string;
provider: string;
headless: boolean;
stepLimit: number;
maxSteps: number;
verbose: boolean;
noCost: boolean;
}
Expand Down Expand Up @@ -68,15 +68,15 @@ export function registerRunCommand(program: Command): void {
.command('run')
.description('Run an AI agent to complete a browser task')
.argument('<task>', 'Description of the task for the agent to complete')
.option('-m, --model <model>', 'Model ID to use', 'gpt-4o')
.option('-p, --provider <provider>', 'LLM provider (openai, anthropic, google)', 'openai')
.option('-m, --model <model>', 'Model ID to use', 'claude-haiku-4-5-20251001')
.option('-p, --provider <provider>', 'LLM provider (openai, anthropic, google)', 'anthropic')
.option('--headless', 'Run browser in headless mode', true)
.option('--no-headless', 'Show the browser window')
.option('--max-steps <n>', 'Maximum number of agent steps', '25')
.option('-v, --verbose', 'Show detailed step information', false)
.option('--no-cost', 'Hide cost tracking information')
.action(async (task: string, options: RunOptions) => {
const stepLimit = Number.parseInt(String(options.stepLimit), 10);
const stepLimit = Number.parseInt(String(options.maxSteps), 10);

displayHeader(`Agent Task: ${task}`);
console.log(
Expand Down
Empty file modified packages/cli/src/index.ts
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"@ai-sdk/google": "^1.1.0",
"zod": "^3.24.0",
"playwright": "^1.51.0",
"mitt": "^3.0.2",
"mitt": "^3.0.1",
"nanoid": "^5.1.0",
"turndown": "^7.2.1",
"dotenv": "^16.5.0"
Expand Down
22 changes: 12 additions & 10 deletions packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -418,16 +418,18 @@ export class Agent {
}

// Build state message
const stateText = InstructionBuilder.buildStatePrompt(
browserState.url,
browserState.title,
browserState.tabs,
domState.tree,
step,
stepLimit,
domState.pixelsAbove,
domState.pixelsBelow,
);
const stateText =
InstructionBuilder.buildTaskPrompt(this.settings.task) + '\n\n' +
InstructionBuilder.buildStatePrompt(
browserState.url,
browserState.title,
browserState.tabs,
domState.tree,
step,
stepLimit,
domState.pixelsAbove,
domState.pixelsBelow,
);

// Check for loop
const loopCheck = this.loopDetector.isStuck();
Expand Down
21 changes: 17 additions & 4 deletions packages/core/src/page/snapshot-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ export class SnapshotBuilder {
};
}

const { nodes, layout, strings } = doc;
const { nodes, layout } = doc;
// strings may be on the document or at the top-level (newer Chromium)
const strings = doc.strings ?? snapshot.strings ?? [];

// Build backend node ID → AX node map
const axNodeMap = new Map<number, AXNode>();
Expand All @@ -76,7 +78,7 @@ export class SnapshotBuilder {
layoutMap.set(nodeIdx, {
bounds: layout.bounds[i],
text: layout.text[i] !== -1 ? strings[layout.text[i]] : undefined,
paintOrder: layout.paintOrder?.[i],
paintOrder: (layout.paintOrders ?? layout.paintOrder)?.[i],
});
}

Expand Down Expand Up @@ -199,8 +201,9 @@ export class SnapshotBuilder {
node.highlightIndex = elementIndex(this.indexCounter++);
}

// Build children
const childIndexes: number[] = nodes.childNodeIndexes?.[nodeIndex] ?? [];
// Build children — use childNodeIndexes if available, otherwise derive from parentIndex
const childIndexes: number[] = nodes.childNodeIndexes?.[nodeIndex]
?? this.getChildIndexes(nodes.parentIndex, nodeIndex);
for (const childIdx of childIndexes) {
const child = this.buildNodeTree(
childIdx,
Expand All @@ -220,6 +223,16 @@ export class SnapshotBuilder {
return node;
}

private getChildIndexes(parentIndex: number[], nodeIndex: number): number[] {
const children: number[] = [];
for (let i = 0; i < parentIndex.length; i++) {
if (parentIndex[i] === nodeIndex) {
children.push(i);
}
}
return children;
}

private buildAXMap(node: AXNode, map: Map<number, AXNode>): void {
if (node.backendDOMNodeId) {
map.set(node.backendDOMNodeId, node);
Expand Down
5 changes: 4 additions & 1 deletion packages/core/src/page/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,17 @@ export interface CDPSnapshotResult {
text: number[];
stackingContexts?: { index: number[] };
paintOrder?: number[];
paintOrders?: number[];
styles: number[][];
};
textBoxes: {
layoutIndex: number[];
bounds: number[][];
};
strings: string[];
strings?: string[];
}>;
/** In newer Chromium versions, strings are at the top level */
strings?: string[];
}

export interface AXNode {
Expand Down
3 changes: 2 additions & 1 deletion packages/core/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,10 @@ const URL_REGEX = /https?:\/\/[^\s<>"{}|\\^`\[\]]+/g;

/**
* Extract all URLs from a text string.
* Strips trailing punctuation that is likely part of the surrounding sentence.
*/
export function extractUrls(text: string): string[] {
return [...text.matchAll(URL_REGEX)].map((m) => m[0]);
return [...text.matchAll(URL_REGEX)].map((m) => m[0].replace(/[.,;:!?)]+$/, ''));
}

/**
Expand Down
Loading