diff --git a/bun.lock b/bun.lock index 3909f16..eab9ef4 100644 --- a/bun.lock +++ b/bun.lock @@ -4,37 +4,37 @@ "": { "name": "open-browser-monorepo", "devDependencies": { - "@biomejs/biome": "^1.9.0", - "@types/bun": "^1.1.0", - "typescript": "^5.6.0", + "@biomejs/biome": "^1.9.4", + "@types/bun": "^1.2.0", + "typescript": "^5.8.0", }, }, "packages/cli": { "name": "@open-browser/cli", - "version": "0.1.0", + "version": "1.1.0", "bin": { "open-browser": "src/index.ts", }, "dependencies": { - "chalk": "^5.3.0", - "commander": "^12.0.0", + "chalk": "^5.4.0", + "commander": "^12.1.0", "open-browser": "workspace:*", }, }, "packages/core": { "name": "open-browser", - "version": "0.1.0", + "version": "1.1.0", "dependencies": { - "@ai-sdk/anthropic": "^1.0.0", - "@ai-sdk/google": "^1.0.0", - "@ai-sdk/openai": "^1.0.0", - "ai": "^4.0.0", - "dotenv": "^16.4.0", + "@ai-sdk/anthropic": "^1.1.0", + "@ai-sdk/google": "^1.1.0", + "@ai-sdk/openai": "^1.1.0", + "ai": "^4.2.0", + "dotenv": "^16.5.0", "mitt": "^3.0.1", - "nanoid": "^5.0.0", - "playwright": "^1.48.0", - "turndown": "^7.2.0", - "zod": "^3.23.0", + "nanoid": "^5.1.0", + "playwright": "^1.51.0", + "turndown": "^7.2.1", + "zod": "^3.24.0", }, "devDependencies": { "@types/turndown": "^5.0.5", @@ -48,7 +48,7 @@ }, "packages/sandbox": { "name": "@open-browser/sandbox", - "version": "0.1.0", + "version": "1.1.0", "dependencies": { "open-browser": "workspace:*", }, diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts index b680e0c..1e07996 100644 --- a/packages/cli/src/commands/run.ts +++ b/packages/cli/src/commands/run.ts @@ -22,7 +22,7 @@ interface RunOptions { model: string; provider: string; headless: boolean; - stepLimit: number; + maxSteps: number; verbose: boolean; noCost: boolean; } @@ -68,15 +68,15 @@ export function registerRunCommand(program: Command): void { .command('run') .description('Run an AI agent to complete a browser task') .argument('', 'Description of the task for the agent to complete') - .option('-m, --model ', 'Model ID to use', 'gpt-4o') - .option('-p, --provider ', 'LLM provider (openai, anthropic, google)', 'openai') + .option('-m, --model ', 'Model ID to use', 'claude-haiku-4-5-20251001') + .option('-p, --provider ', 'LLM provider (openai, anthropic, google)', 'anthropic') .option('--headless', 'Run browser in headless mode', true) .option('--no-headless', 'Show the browser window') .option('--max-steps ', 'Maximum number of agent steps', '25') .option('-v, --verbose', 'Show detailed step information', false) .option('--no-cost', 'Hide cost tracking information') .action(async (task: string, options: RunOptions) => { - const stepLimit = Number.parseInt(String(options.stepLimit), 10); + const stepLimit = Number.parseInt(String(options.maxSteps), 10); displayHeader(`Agent Task: ${task}`); console.log( diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts old mode 100644 new mode 100755 diff --git a/packages/core/package.json b/packages/core/package.json index d8d5f35..91b4f82 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -20,7 +20,7 @@ "@ai-sdk/google": "^1.1.0", "zod": "^3.24.0", "playwright": "^1.51.0", - "mitt": "^3.0.2", + "mitt": "^3.0.1", "nanoid": "^5.1.0", "turndown": "^7.2.1", "dotenv": "^16.5.0" diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 8b98b71..1b25577 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -418,16 +418,18 @@ export class Agent { } // Build state message - const stateText = InstructionBuilder.buildStatePrompt( - browserState.url, - browserState.title, - browserState.tabs, - domState.tree, - step, - stepLimit, - domState.pixelsAbove, - domState.pixelsBelow, - ); + const stateText = + InstructionBuilder.buildTaskPrompt(this.settings.task) + '\n\n' + + InstructionBuilder.buildStatePrompt( + browserState.url, + browserState.title, + browserState.tabs, + domState.tree, + step, + stepLimit, + domState.pixelsAbove, + domState.pixelsBelow, + ); // Check for loop const loopCheck = this.loopDetector.isStuck(); diff --git a/packages/core/src/page/snapshot-builder.ts b/packages/core/src/page/snapshot-builder.ts index 52ecb5f..f5262b3 100644 --- a/packages/core/src/page/snapshot-builder.ts +++ b/packages/core/src/page/snapshot-builder.ts @@ -63,7 +63,9 @@ export class SnapshotBuilder { }; } - const { nodes, layout, strings } = doc; + const { nodes, layout } = doc; + // strings may be on the document or at the top-level (newer Chromium) + const strings = doc.strings ?? snapshot.strings ?? []; // Build backend node ID → AX node map const axNodeMap = new Map(); @@ -76,7 +78,7 @@ export class SnapshotBuilder { layoutMap.set(nodeIdx, { bounds: layout.bounds[i], text: layout.text[i] !== -1 ? strings[layout.text[i]] : undefined, - paintOrder: layout.paintOrder?.[i], + paintOrder: (layout.paintOrders ?? layout.paintOrder)?.[i], }); } @@ -199,8 +201,9 @@ export class SnapshotBuilder { node.highlightIndex = elementIndex(this.indexCounter++); } - // Build children - const childIndexes: number[] = nodes.childNodeIndexes?.[nodeIndex] ?? []; + // Build children — use childNodeIndexes if available, otherwise derive from parentIndex + const childIndexes: number[] = nodes.childNodeIndexes?.[nodeIndex] + ?? this.getChildIndexes(nodes.parentIndex, nodeIndex); for (const childIdx of childIndexes) { const child = this.buildNodeTree( childIdx, @@ -220,6 +223,16 @@ export class SnapshotBuilder { return node; } + private getChildIndexes(parentIndex: number[], nodeIndex: number): number[] { + const children: number[] = []; + for (let i = 0; i < parentIndex.length; i++) { + if (parentIndex[i] === nodeIndex) { + children.push(i); + } + } + return children; + } + private buildAXMap(node: AXNode, map: Map): void { if (node.backendDOMNodeId) { map.set(node.backendDOMNodeId, node); diff --git a/packages/core/src/page/types.ts b/packages/core/src/page/types.ts index 558d475..b6513ec 100644 --- a/packages/core/src/page/types.ts +++ b/packages/core/src/page/types.ts @@ -172,14 +172,17 @@ export interface CDPSnapshotResult { text: number[]; stackingContexts?: { index: number[] }; paintOrder?: number[]; + paintOrders?: number[]; styles: number[][]; }; textBoxes: { layoutIndex: number[]; bounds: number[][]; }; - strings: string[]; + strings?: string[]; }>; + /** In newer Chromium versions, strings are at the top level */ + strings?: string[]; } export interface AXNode { diff --git a/packages/core/src/utils.ts b/packages/core/src/utils.ts index 512fb77..995139d 100644 --- a/packages/core/src/utils.ts +++ b/packages/core/src/utils.ts @@ -233,9 +233,10 @@ const URL_REGEX = /https?:\/\/[^\s<>"{}|\\^`\[\]]+/g; /** * Extract all URLs from a text string. + * Strips trailing punctuation that is likely part of the surrounding sentence. */ export function extractUrls(text: string): string[] { - return [...text.matchAll(URL_REGEX)].map((m) => m[0]); + return [...text.matchAll(URL_REGEX)].map((m) => m[0].replace(/[.,;:!?)]+$/, '')); } /**