From 1e264bcd85a35d95c1c373e0f4b234aa9eae2d24 Mon Sep 17 00:00:00 2001 From: bcode Date: Sun, 10 May 2026 17:42:44 -0700 Subject: [PATCH 1/4] browser_execute: move description after code in parameter schema Providers stream tool-call args in schema-declared order, so a leading description field commits the model to a stated intent before code generation. Matches the shell tool's command-first, description-last shape. --- packages/bcode-browser/src/browser-execute.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/packages/bcode-browser/src/browser-execute.ts b/packages/bcode-browser/src/browser-execute.ts index 7f25447db..8ac675088 100644 --- a/packages/bcode-browser/src/browser-execute.ts +++ b/packages/bcode-browser/src/browser-execute.ts @@ -49,11 +49,11 @@ import { Skills } from "./skills" const DEFAULT_TIMEOUT_MS = 60 * 1000 const MAX_TIMEOUT_MS = 10 * 60 * 1000 +// Field order matters: providers stream tool-call args in schema-declared +// order, so the model commits to whichever field comes first. `code` is the +// substantive output; `description` is a summary written after the code +// exists, mirroring the shell tool's `command` → ... → `description` shape. export const parameters = Schema.Struct({ - description: Schema.String.annotate({ - description: - "Clear, concise summary of what this snippet does in 3-7 words. Examples:\nInput: code that connects to local Chrome\nOutput: Connect to local Chrome\n\nInput: scrape product titles from current page\nOutput: Scrape product titles\n\nInput: capture a screenshot of the homepage\nOutput: Screenshot homepage", - }), code: Schema.String.annotate({ description: "JavaScript source. Wrapped in an async function with `session` (CDP Session) and `console` (per-call capture; same `log/error/warn/info` API) bound.", @@ -61,6 +61,10 @@ export const parameters = Schema.Struct({ timeout: Schema.optional(Schema.Number).annotate({ description: `Timeout in milliseconds. Default ${DEFAULT_TIMEOUT_MS}, max ${MAX_TIMEOUT_MS}.`, }), + description: Schema.String.annotate({ + description: + "Clear, concise summary of what this snippet does in 3-7 words. Examples:\nInput: code that connects to local Chrome\nOutput: Connect to local Chrome\n\nInput: scrape product titles from current page\nOutput: Scrape product titles\n\nInput: capture a screenshot of the homepage\nOutput: Screenshot homepage", + }), }) export type Parameters = Schema.Schema.Type From 85fc1d339b796fa808e1f121586135bc7cd9c885 Mon Sep 17 00:00:00 2001 From: bcode Date: Sun, 10 May 2026 17:47:00 -0700 Subject: [PATCH 2/4] rename BROWSER.md to browser-execute-guide.md Renames the primary browser_execute guide and updates every code/doc reference. The file's role is the same; the name now states what it documents. --- packages/bcode-browser/README.md | 2 +- .../{BROWSER.md => browser-execute-guide.md} | 4 ++-- .../bcode-browser/skills/cloud-browser.md | 4 ++-- packages/bcode-browser/src/browser-execute.ts | 2 +- packages/bcode-browser/src/index.ts | 2 +- packages/bcode-browser/src/skills.ts | 2 +- packages/bcode-browser/test/skills.test.ts | 6 +++--- .../test/workspace-import.test.ts | 2 +- packages/opencode/src/agent/agent.ts | 2 +- packages/opencode/src/tool/browser-execute.ts | 2 +- .../opencode/src/tool/browser-execute.txt | 19 ++++++------------- 11 files changed, 20 insertions(+), 27 deletions(-) rename packages/bcode-browser/skills/{BROWSER.md => browser-execute-guide.md} (99%) diff --git a/packages/bcode-browser/README.md b/packages/bcode-browser/README.md index 3100920cb..1c700cd96 100644 --- a/packages/bcode-browser/README.md +++ b/packages/bcode-browser/README.md @@ -12,7 +12,7 @@ See `decisions.md §1c` (three-level model) and `§1d` (this package) in the Bro | `src/browser-execute.ts` | In-process JS-eval `browser_execute` body. | | `src/session-store.ts` | Per-opencode-session CDP `Session` map. The agent calls `session.connect(...)` from a snippet; subsequent snippets find the same Session. | | `src/skills.ts` | Runtime resolver for embedded skills (extract on first call in compiled mode; in-tree path in dev). | -| `skills/` | `BROWSER.md` (the agent's prompt for `browser_execute`) and `cloud-browser.md` (Way 3 — provision/stop a Browser Use cloud browser via raw HTTP from inside a snippet). Embedded into the binary by `script/embed-skills.ts`. The interaction-skills set inherited from the Python harness was archived 2026-05-09 — we'll reintroduce only what evals show is needed, one skill at a time. | +| `skills/` | `browser-execute-guide.md` (the agent's prompt for `browser_execute`) and `cloud-browser.md` (Way 3 — provision/stop a Browser Use cloud browser via raw HTTP from inside a snippet). Embedded into the binary by `script/embed-skills.ts`. The interaction-skills set inherited from the Python harness was archived 2026-05-09 — we'll reintroduce only what evals show is needed, one skill at a time. | | `script/embed-skills.ts` | Build-time embed; emits `bcode-skills.gen.ts` consumed by the compiled binary. | | `test/` | `bun test` smoke coverage for the workspace dynamic-import pattern. | diff --git a/packages/bcode-browser/skills/BROWSER.md b/packages/bcode-browser/skills/browser-execute-guide.md similarity index 99% rename from packages/bcode-browser/skills/BROWSER.md rename to packages/bcode-browser/skills/browser-execute-guide.md index 3ec919142..a955db3a9 100644 --- a/packages/bcode-browser/skills/BROWSER.md +++ b/packages/bcode-browser/skills/browser-execute-guide.md @@ -1,11 +1,11 @@ -# BROWSER.md — driving a real browser with `browser_execute` +# browser-execute-guide.md — driving a real browser with `browser_execute` Use the `browser_execute` tool to run JavaScript against a connected browser via the Chrome DevTools Protocol. The snippet runs in-process; `session` is bound to a long-lived CDP `Session` that persists across calls within the same bcode session. You connect once, drive many. **Locations:** - Workspace (read/write your reusable scripts): `/.bcode/agent-workspace/`. The bcode CLI runs from the project root, so `./.bcode/agent-workspace/foo.ts` works directly with the `read`/`write`/`edit` tools. -- Skills (read-only reference docs): `{{SKILLS_DIR}}/`. Currently `BROWSER.md` (this file) and `cloud-browser.md`. +- Skills (read-only reference docs): `{{SKILLS_DIR}}/`. Currently `browser-execute-guide.md` (this file) and `cloud-browser.md`. ## The model in one paragraph diff --git a/packages/bcode-browser/skills/cloud-browser.md b/packages/bcode-browser/skills/cloud-browser.md index ea25ac390..b922afe14 100644 --- a/packages/bcode-browser/skills/cloud-browser.md +++ b/packages/bcode-browser/skills/cloud-browser.md @@ -1,6 +1,6 @@ # cloud-browser.md — Browser Use cloud browser via raw HTTP -When BROWSER.md sent you here, the user wants a Browser Use cloud browser (Way 3): a clean isolated Chrome on BU's infrastructure, optionally with a geo-located proxy or a synced profile, with a `liveUrl` the user can open to watch you work. +When browser-execute-guide.md sent you here, the user wants a Browser Use cloud browser (Way 3): a clean isolated Chrome on BU's infrastructure, optionally with a geo-located proxy or a synced profile, with a `liveUrl` the user can open to watch you work. There is no `browser_open_cloud` tool. You write the HTTP calls yourself in a `browser_execute` snippet. This keeps the connection model symmetric (you also call `session.connect()` for local browsers in Way 1 and Way 2) and gives you full control over the BU API surface — provision, stop, swap profiles, change proxies, anything BU exposes. @@ -78,7 +78,7 @@ await fetch(`https://api.browser-use.com/api/v3/browsers/${id}`, { }) ``` -If you'll do this often within one project, save it as `./.bcode/agent-workspace/cloud.ts` (see BROWSER.md "Reusing code") and import it from later snippets. +If you'll do this often within one project, save it as `./.bcode/agent-workspace/cloud.ts` (see browser-execute-guide.md "Reusing code") and import it from later snippets. ## Swap diff --git a/packages/bcode-browser/src/browser-execute.ts b/packages/bcode-browser/src/browser-execute.ts index 8ac675088..397999745 100644 --- a/packages/bcode-browser/src/browser-execute.ts +++ b/packages/bcode-browser/src/browser-execute.ts @@ -145,7 +145,7 @@ const serialize = (v: unknown): string => { // Snippet executor. The CDP Session is resolved per-call from `SessionStore` // keyed on `ctx.sessionID`. The agent connects with `await session.connect(...)` -// in one snippet (Way 1 / Way 2 / Way 3 in BROWSER.md); the Session persists +// in one snippet (Way 1 / Way 2 / Way 3 in browser-execute-guide.md); the Session persists // for follow-up snippets in the same opencode session. // // `dataDir` is opencode's XDG_DATA_HOME for bcode (~/.local/share/bcode/ on diff --git a/packages/bcode-browser/src/index.ts b/packages/bcode-browser/src/index.ts index 5405a7a6d..cdb1067d1 100644 --- a/packages/bcode-browser/src/index.ts +++ b/packages/bcode-browser/src/index.ts @@ -11,7 +11,7 @@ // src/browser-execute.ts — in-process JS-eval browser_execute body // src/session-store.ts — per-opencode-session CDP Session map // src/skills.ts — runtime resolver for embedded skills -// skills/ — BROWSER.md + cloud-browser.md (embedded into binary) +// skills/ — browser-execute-guide.md + cloud-browser.md (embedded into binary) // // Cloud browser provisioning is intentionally NOT a separate Level-1 // surface. The agent reads `skills/cloud-browser.md` and writes the diff --git a/packages/bcode-browser/src/skills.ts b/packages/bcode-browser/src/skills.ts index b89f3c8ed..5b8c174f8 100644 --- a/packages/bcode-browser/src/skills.ts +++ b/packages/bcode-browser/src/skills.ts @@ -2,7 +2,7 @@ // // Materializes the skills tree to `/skills/` and substitutes the // `{{SKILLS_DIR}}` placeholder in every file with that absolute path so -// cross-references inside BROWSER.md (``read `{{SKILLS_DIR}}/cloud-browser.md` ``) +// cross-references inside browser-execute-guide.md (``read `{{SKILLS_DIR}}/cloud-browser.md` ``) // point at a real location. // // Compiled launches (the user-facing path) read a one-line sentinel at diff --git a/packages/bcode-browser/test/skills.test.ts b/packages/bcode-browser/test/skills.test.ts index aa65c83d2..94dff77c8 100644 --- a/packages/bcode-browser/test/skills.test.ts +++ b/packages/bcode-browser/test/skills.test.ts @@ -14,7 +14,7 @@ test("resolveSkillsDir materializes skills with {{SKILLS_DIR}} substituted", asy try { const dir = await Skills.resolveSkillsDir(dataDir) expect(dir).toBe(path.join(dataDir, "skills")) - const browser = (await fs.readFile(path.join(dir, "BROWSER.md"), "utf8")).replaceAll("\\", "/") + const browser = (await fs.readFile(path.join(dir, "browser-execute-guide.md"), "utf8")).replaceAll("\\", "/") expect(browser).not.toContain("{{SKILLS_DIR}}") expect(browser).toContain(`${dir.replaceAll("\\", "/")}/cloud-browser.md`) } finally { @@ -29,8 +29,8 @@ test("different dataDirs get their own substituted paths", async () => { const dirA = await Skills.resolveSkillsDir(a) const dirB = await Skills.resolveSkillsDir(b) const [browserA, browserB] = (await Promise.all([ - fs.readFile(path.join(dirA, "BROWSER.md"), "utf8"), - fs.readFile(path.join(dirB, "BROWSER.md"), "utf8"), + fs.readFile(path.join(dirA, "browser-execute-guide.md"), "utf8"), + fs.readFile(path.join(dirB, "browser-execute-guide.md"), "utf8"), ])).map((s) => s.replaceAll("\\", "/")) const [a2, b2] = [dirA.replaceAll("\\", "/"), dirB.replaceAll("\\", "/")] expect(browserA).toContain(a2) diff --git a/packages/bcode-browser/test/workspace-import.test.ts b/packages/bcode-browser/test/workspace-import.test.ts index 04ecb7f95..4b0de35d2 100644 --- a/packages/bcode-browser/test/workspace-import.test.ts +++ b/packages/bcode-browser/test/workspace-import.test.ts @@ -3,7 +3,7 @@ // at runtime from a `browser_execute` snippet via // `await import("/abs/path?t=" + Date.now())`. We don't run a real // `browser_execute` here — the point is to verify the dynamic-import -// mechanism behaves as the BROWSER.md prompt claims. +// mechanism behaves as the browser-execute-guide.md prompt claims. // // All four scenarios run against a real tmp dir, real .ts files, and // the real Bun module loader. No mocks. diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index 177ac97ff..7b046b1a1 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -93,7 +93,7 @@ export const layer = Layer.effect( const agentWorkspaceGlob = "**/.bcode/agent-workspace/**/*" // Browser-skills tree, materialized at runtime to // /skills/ in both dev and compiled modes (so the - // `{{SKILLS_DIR}}` placeholder in BROWSER.md gets substituted with a + // `{{SKILLS_DIR}}` placeholder in browser-execute-guide.md gets substituted with a // stable absolute path). Read-only baseline. const browserSkillsGlob = path.join(Skills.skillsDir(Global.Path.data), "*") const whitelistedDirs = [ diff --git a/packages/opencode/src/tool/browser-execute.ts b/packages/opencode/src/tool/browser-execute.ts index ec9c74e05..d00cf09f0 100644 --- a/packages/opencode/src/tool/browser-execute.ts +++ b/packages/opencode/src/tool/browser-execute.ts @@ -25,7 +25,7 @@ export const BrowserExecuteTool = Tool.define( Effect.gen(function* () { const impl = yield* BrowserExecute.make(Global.Path.data) return { - // Substitute the resolved skills path so BROWSER.md / cloud-browser.md + // Substitute the resolved skills path so browser-execute-guide.md / cloud-browser.md // references in the description point at concrete locations. Workspace // is per-project and agent-discoverable from cwd, so it's not // substituted here. diff --git a/packages/opencode/src/tool/browser-execute.txt b/packages/opencode/src/tool/browser-execute.txt index 66813091b..ce7b9067e 100644 --- a/packages/opencode/src/tool/browser-execute.txt +++ b/packages/opencode/src/tool/browser-execute.txt @@ -1,14 +1,7 @@ -Execute JavaScript against a connected browser via the BrowserCode CDP harness. +Executes JavaScript in a connected browser. -Use this tool whenever the task requires driving a real browser — automation, scraping, end-to-end testing, or interactive exploration. The snippet runs in-process with one persistent CDP `Session` object that survives across calls in the same opencode session. You connect once and drive many. - -Before the first `browser_execute` call of a session, you MUST read `{{SKILLS_DIR}}/BROWSER.md`. It defines the snippet model, the three connection methods (local user Chrome, isolated debug-port Chrome, Browser Use cloud browser), the workspace pattern, the `session` API surface, and gotchas. For cloud-browser specifics, also read `{{SKILLS_DIR}}/cloud-browser.md`. - -Always pass a clear, concise `description` of what the snippet does in 3-7 words (e.g. "Connect to local Chrome", "Scrape product titles", "Screenshot homepage"). It surfaces in the TUI as the call's title. - -Snippet scope: - -- `session` — the live CDP `Session`. You call `session.connect(...)` once at the start of your work; subsequent snippets reuse the same connection. Domain methods follow `session..(params)` and return Promises. -- standard JS globals (`console.log` etc. stream back to the user; `process.env` is available for reading `BROWSER_USE_API_KEY` etc.). - -Top-level `import` is not allowed inside a snippet. To reuse code across calls, save it as a `.ts` file under `./.bcode/agent-workspace/` (per-project, tracked-by-default in git) and `await import("/abs/path?t=" + Date.now())` it from a later snippet. +Usage: +- Use this tool whenever the task requires driving a real browser. +- Use this tool to read webpages that block the webfetch tool. +- IMPORTANT: you MUST use the Read tool first to read `{{SKILLS_DIR}}/browser-execute-guide.md`. This tool will fail if you did not read these directions first. +- Always pass a clear, precise, and low verbosity `description` of what the JS snippet does. From 6119a41fa4e321ec63593176b6dab889b249f4e5 Mon Sep 17 00:00:00 2001 From: bcode Date: Sun, 10 May 2026 23:00:13 -0700 Subject: [PATCH 3/4] remove cloud-browser.md, fold Way 3 into browser-execute-guide.md Single skill file is sufficient: agent can derive stop/swap behavior from the inline Way 3 example plus the BU API docs link. Rewrites guide intro and Way sections for concision, adds runtime self-discovery hints (Object.keys(session.domains), CdpError.data) so the agent can probe the CDP surface without docs round-trip. Updates README, src/index.ts, src/skills.ts, opencode comment, and skills test to drop cloud-browser.md references. --- packages/bcode-browser/README.md | 2 +- .../skills/browser-execute-guide.md | 157 +++++++----------- .../bcode-browser/skills/cloud-browser.md | 154 ----------------- packages/bcode-browser/src/index.ts | 8 +- packages/bcode-browser/src/skills.ts | 5 +- packages/bcode-browser/test/skills.test.ts | 2 +- packages/opencode/src/tool/browser-execute.ts | 8 +- 7 files changed, 75 insertions(+), 261 deletions(-) delete mode 100644 packages/bcode-browser/skills/cloud-browser.md diff --git a/packages/bcode-browser/README.md b/packages/bcode-browser/README.md index 1c700cd96..98ff17787 100644 --- a/packages/bcode-browser/README.md +++ b/packages/bcode-browser/README.md @@ -12,7 +12,7 @@ See `decisions.md §1c` (three-level model) and `§1d` (this package) in the Bro | `src/browser-execute.ts` | In-process JS-eval `browser_execute` body. | | `src/session-store.ts` | Per-opencode-session CDP `Session` map. The agent calls `session.connect(...)` from a snippet; subsequent snippets find the same Session. | | `src/skills.ts` | Runtime resolver for embedded skills (extract on first call in compiled mode; in-tree path in dev). | -| `skills/` | `browser-execute-guide.md` (the agent's prompt for `browser_execute`) and `cloud-browser.md` (Way 3 — provision/stop a Browser Use cloud browser via raw HTTP from inside a snippet). Embedded into the binary by `script/embed-skills.ts`. The interaction-skills set inherited from the Python harness was archived 2026-05-09 — we'll reintroduce only what evals show is needed, one skill at a time. | +| `skills/` | `browser-execute-guide.md` (the agent's prompt for `browser_execute`, covering all three connection Ways including Browser Use cloud provisioning via raw HTTP from inside a snippet). Embedded into the binary by `script/embed-skills.ts`. The interaction-skills set inherited from the Python harness was archived 2026-05-09 — we'll reintroduce only what evals show is needed, one skill at a time. | | `script/embed-skills.ts` | Build-time embed; emits `bcode-skills.gen.ts` consumed by the compiled binary. | | `test/` | `bun test` smoke coverage for the workspace dynamic-import pattern. | diff --git a/packages/bcode-browser/skills/browser-execute-guide.md b/packages/bcode-browser/skills/browser-execute-guide.md index a955db3a9..9c8bea20d 100644 --- a/packages/bcode-browser/skills/browser-execute-guide.md +++ b/packages/bcode-browser/skills/browser-execute-guide.md @@ -1,120 +1,109 @@ -# browser-execute-guide.md — driving a real browser with `browser_execute` +The `browser_execute` tool evaluates JavaScript against a connected browser `session` via the Chrome DevTools Protocol. +The snippet runs in-process; `session` is bound to a long-lived CDP `Session` that persists. Connect once, then drive many snippets. +There is no helper namespace, just `session`, `console`, and standard JS globals. -Use the `browser_execute` tool to run JavaScript against a connected browser via the Chrome DevTools Protocol. The snippet runs in-process; `session` is bound to a long-lived CDP `Session` that persists across calls within the same bcode session. You connect once, drive many. - -**Locations:** - -- Workspace (read/write your reusable scripts): `/.bcode/agent-workspace/`. The bcode CLI runs from the project root, so `./.bcode/agent-workspace/foo.ts` works directly with the `read`/`write`/`edit` tools. -- Skills (read-only reference docs): `{{SKILLS_DIR}}/`. Currently `browser-execute-guide.md` (this file) and `cloud-browser.md`. - -## The model in one paragraph - -`browser_execute` evaluates whatever JS you write against `session`. There is no auto-loaded library, no privileged file, no helper namespace — just `session` and standard JS globals. To reuse code from a previous snippet, save it as a `.ts` file under `./.bcode/agent-workspace/` (using the `write` tool) and `await import("/abs/path?t=" + Date.now())` it from a later snippet. The import takes an **absolute** path — construct it from `process.cwd()` inside the snippet. Same mechanism for a 5-line wrapper and a 500-line script. +Workspace: `/.bcode/agent-workspace/`. Read/write your reusable scripts here. +Skills: `{{SKILLS_DIR}}/`. Read-only browser execute reference docs. ## Connecting +Always call `session.connect(...)` once at the start of your work. There are three connection methods: -You always call `session.connect(...)` once at the start of your work. The `Session` is fresh on the first `browser_execute` call of an opencode session; subsequent calls reuse it. Three connection methods, in order of preference for typical tasks. - -For most tasks where the agent acts on behalf of the user in their normal browser, use **Way 1**. For automation that runs without the user watching, or any case where popup interruptions are unacceptable, use **Way 2** or a cloud browser. Cloud is only used when the user opts in. - -**Preconfigured environments (eval harnesses, CI).** If `BU_CDP_WS` (or its alias `BU_CDP_URL`) is set in the environment, `session.connect()` with no args connects to that endpoint directly — no OS scan, no cloud provision. The harness has already chosen the browser for you; just call `await session.connect()` and start driving. Explicit `{ wsUrl }` / `{ profileDir }` calls ignore the env var. - -**Way 1 — connect to the user's running Chrome (real profile, popup-gated).** Inherits the user's everyday Chrome logins, extensions, history, and bookmarks. Right choice when the task involves the user's actual logged-in sites. +#### Way 1: connect to the user's running Chrome or Chromium-based browser (real profile, popup-gated). +Choose when the task involves the user's logged-in sites, current browser state, cookies, saved data, etc. ```js -// Auto-detect the most-recently-launched Chrome with remote debugging enabled. +// Attempts to connect to every detected Chrome, most-recently-launched first. await session.connect() ``` -For this to work the user must have, **once**, navigated to `chrome://inspect/#remote-debugging` in their target Chrome and ticked "Allow remote debugging for this browser instance". This setting is per-profile and sticky: tick it once and it persists across every future Chrome launch of that profile. On Chrome 144 and later, the first attach also triggers an in-browser "Allow remote debugging?" popup that the user must click Allow on. The popup may reappear on later attaches under conditions that are not fully characterized — daemon restart, browser restart, time elapsed, version-dependent options like "Allow for N hours" — so be ready to ask the user to click Allow again if a previously working connection starts 403'ing. +For this to work the user must have navigated to `chrome://inspect/#remote-debugging` in their target Chrome and ticked "Allow remote debugging for this browser instance". This setting is per-profile and persists across every future launch of that profile. On Chrome 144 and later, the first attach also triggers an in-browser "Allow remote debugging?" popup that the user must click "Allow" on. The popup may reappear on later attaches under conditions that are not fully characterized — browser restart, time elapsed, new CDP session. Ask the user to click Allow again if a previously working connection starts 403'ing. -Failure modes and what they mean: +Failure modes: +- `connect()` throws "No running browser with remote debugging detected". The checkbox at `chrome://inspect/#remote-debugging` has not been ticked in any running Chrome profile, or no Chrome is running. +- `connect()` throws with "403" / "permission" / "WS closed before open". The checkbox is ticked but the user hasn't clicked Allow on the popup yet. By default `connect()` errors in 5s; pass `{ timeoutMs: 30000 }` to wait up to 30s for the click. -- **`connect()` throws "No running browser with remote debugging detected"** — the checkbox at `chrome://inspect/#remote-debugging` has not been ticked in any running Chrome profile, or no Chrome is running. Ask the user to open their target Chrome and tick the box. -- **`connect()` throws with "403" / "permission" / "WS closed before open"** — the checkbox is ticked but the user hasn't clicked Allow on the popup yet. By default `connect()` errors fast (5s per candidate). To wait up to 30s for the click: pass `{ profileDir: "", timeoutMs: 30000 }`. Passing `profileDir` skips the OS scan and reads the WebSocket URL straight from `/DevToolsActivePort`. Note: this works for Way 1 (the user's existing profile) on every Chrome version including 144+. For Way 2 (a fresh profile launched with `--user-data-dir`), Chrome 147+ has been observed to not write this file — see Way 2 below for the `/json/version` route. +#### Way 2: connect to a Chrome or Chromium-based browser launched with a debug port (isolated profile, no popups). +Choose for unattended automation, or for an isolated browser. -**Way 2 — connect to a Chrome you (or the user) launched with a debug port (isolated profile, no popups, ever).** Right choice for unattended automation, or whenever popup interruptions are unacceptable. - -Launch Chrome with `--remote-debugging-port= --user-data-dir=`. Pick any path the agent's tools can write to — a project-local directory like `./.bcode/way2-chrome` is a safe default; `/tmp/...` works wherever the sandbox allows it. +Launch Chrome with `--remote-debugging-port= --user-data-dir=`. Pick a directory you can access — e.g., a project-local one like `./.bcode/chrome-data-dir`. ```bash # Linux -google-chrome --remote-debugging-port=9222 --user-data-dir=./.bcode/way2-chrome - +google-chrome --remote-debugging-port=9222 --user-data-dir=./.bcode/chrome-data-dir # macOS "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \ - --remote-debugging-port=9222 --user-data-dir=./.bcode/way2-chrome - + --remote-debugging-port=9222 --user-data-dir=./.bcode/chrome-data-dir # Windows (cmd.exe) "C:\Program Files\Google\Chrome\Application\chrome.exe" ^ - --remote-debugging-port=9222 --user-data-dir=.\.bcode\way2-chrome - + --remote-debugging-port=9222 --user-data-dir=.\.bcode\chrome-data-dir # Windows (PowerShell) & "C:\Program Files\Google\Chrome\Application\chrome.exe" ` - --remote-debugging-port=9222 --user-data-dir=.\.bcode\way2-chrome + --remote-debugging-port=9222 --user-data-dir=.\.bcode\chrome-data-dir ``` -Then resolve the live WebSocket URL via `/json/version` and connect: - ```js +// Resolve the live WebSocket URL via `/json/version` and connect: const ver = await fetch("http://127.0.0.1:9222/json/version").then(r => r.json()) await session.connect({ wsUrl: ver.webSocketDebuggerUrl }) ``` -This is the canonical Way 2 path. Works on every Chrome that serves `/json/version` (every Chromium-based browser launched with `--remote-debugging-port`). - -**Older / alternate path: `{ profileDir }`.** On older Chrome (pre-147) and on the chrome://inspect Way 1 path, Chrome writes a `DevToolsActivePort` file inside the user-data-dir, and `session.connect({ profileDir: "" })` reads the WS URL directly from it — no HTTP probe. Chrome 147+ has been observed (macOS, Windows) to NOT write this file when launched with a custom `--user-data-dir`, so this path no longer works for Way 2 on modern Chrome. Use it only if `/json/version` is unavailable. - -Two precisions on the `--user-data-dir`: +`--user-data-dir` must not be Chrome's platform default. Chrome 136 and later silently no-ops the `--remote-debugging-port` flag when `--user-data-dir` is the platform default. The platform defaults are `%LOCALAPPDATA%\Google\Chrome\User Data` on Windows, `~/Library/Application Support/Google/Chrome` on macOS, `~/.config/google-chrome` on Linux. +You cannot reuse the user's everyday Chrome profile by copying its files into a custom directory. -- **It must not be Chrome's platform default.** Chrome 136 and later silently no-op the `--remote-debugging-port` flag when `--user-data-dir` is the platform default, even if you pass it explicitly. The platform defaults are `%LOCALAPPDATA%\Google\Chrome\User Data` on Windows, `~/Library/Application Support/Google/Chrome` on macOS, `~/.config/google-chrome` on Linux. An empty or new path gives a fresh clean profile that Chrome will persist there across future launches. -- **You cannot reuse the user's everyday Chrome profile by copying its files into a custom directory.** Chrome will accept the flag and start, so it looks like it works — but cookies are encrypted under a key bound to the *original* directory and will not survive the copy. Bookmarks and extensions transfer; logged-in sessions do not. If you need the user's real logins, use Way 1. +Failure modes: +- Chrome's launch log prints `DevTools listening on ws://...:/...` immediately followed by `bind() failed: Address already in use` and Chrome exits. Confirm the port is actually open with `curl http://127.0.0.1:/json/version` before connecting. +- `{ profileDir }` raises ENOENT on `DevToolsActivePort`. Chrome 147+ doesn't write this file under custom `--user-data-dir`; use the `/json/version` route above instead. +- Launch silently no-ops `--remote-debugging-port`. Launching a second Chrome that points at a `--user-data-dir` matching a running process ignores `--remote-debugging-port`. -The bare `ws://host:port/devtools/browser` form (no UUID suffix) does not work — Chrome's browser-level endpoint includes a per-process UUID. Always resolve via `/json/version` first. - -**Way 2 troubleshooting:** - -- **Chrome's launch log prints `DevTools listening on ws://...:/...` before the bind succeeds.** That line is not a reliable readiness signal: if the port is already taken, you'll see the line immediately followed by `bind() failed: Address already in use` and Chrome exits. Confirm the port is actually open with `curl http://127.0.0.1:/json/version` (or fetch from a snippet) before connecting. -- **Windows: launching Chrome while any other Chrome is already running silently hands the new flags off to the existing process** — `--remote-debugging-port` is ignored. Kill all `chrome.exe` first (or use a unique `--user-data-dir` and accept that some Windows builds still no-op). -- **`{ profileDir }` raises ENOENT on `DevToolsActivePort`** — Chrome 147+ doesn't write this file under custom `--user-data-dir`. Use the `/json/version` route above instead. - -**Way 3 — provision and connect to a Browser Use cloud browser.** Best when the user can't see the browser, you need a clean profile, geo-located proxy, or fingerprint isolation. BU cloud browsers also auto-solve captchas (Cloudflare Turnstile, reCAPTCHA, hCaptcha) — when you land on one, just stop driving and `await new Promise(r => setTimeout(r, 10000))`; the solver runs server-side and the page advances on its own. Local browsers (Way 1, Way 2) do not have this. Read `{{SKILLS_DIR}}/cloud-browser.md` for the full pattern (provision, stop, swap profile/proxy). Briefly: +#### Way 3: provision and connect to a Browser Use cloud browser. +Choose when the computer has no browser or to work in the background. ```js +// Provision and connect to a cloud browser const r = await fetch("https://api.browser-use.com/api/v3/browsers", { method: "POST", headers: { "X-Browser-Use-API-Key": process.env.BROWSER_USE_API_KEY, "Content-Type": "application/json" }, body: "{}", }) +// Additional options: fetch https://docs.browser-use.com/cloud/api-v3/browsers/create-browser-session const { id, cdpUrl, liveUrl } = await r.json() -// BU's cdpUrl is the HTTPS discovery endpoint (e.g. https://cdpN.browser-use.com), -// not a WebSocket URL. Resolve it like a remote Chrome: fetch /json/version and -// use the webSocketDebuggerUrl field. The resolved URL is `wss://...` (secure); -// `session.connect({ wsUrl })` handles both `ws://` and `wss://` transparently. const ver = await fetch(`${cdpUrl}/json/version`).then(r => r.json()) await session.connect({ wsUrl: ver.webSocketDebuggerUrl }) console.log("liveUrl for the user to watch:", liveUrl) ``` -Requires `BROWSER_USE_API_KEY` in the environment (the user should have set this before launching bcode). If absent, tell the user to get a key at https://browser-use.com and `export BROWSER_USE_API_KEY=...`. +```js +// Browser Use cloud will eventually close idle browsers. An explicit stop frees the slot: +await fetch(`https://api.browser-use.com/api/v3/browsers/${id}`, { + method: "PATCH", + headers: { "X-Browser-Use-API-Key": process.env.BROWSER_USE_API_KEY, "Content-Type": "application/json" }, + body: JSON.stringify({ action: "stop" }), +}) +``` + +A cloud browser is a paid service and requires `BROWSER_USE_API_KEY` to be set in the environment. +Browser Use cloud browsers auto-solve captchas. When you land on one, just stop driving and `await new Promise(r => setTimeout(r, 10000))`; the solver runs server-side and the page advances on its own. +Browser Use has a free tier gated for intelligent and powerful agents. Unlimited free browsers and proxies, limited to 3 concurrent browsers. Agents can sign up and receive a key fully autonomously, but must pass a proof-of-agent-capability challenge. To begin, fetch https://browser-use.com and solve the challenge. -When `BROWSER_USE_API_KEY` is set, `webfetch` is automatically enhanced with `fetch-use` (Chrome TLS fingerprint + residential proxy + session cookies) — each request is free, but consumes a small amount of proxy bandwidth from the BU account. Disable in `opencode.json` with `experimental.fetch_use: false`. +#### Way 4: user-preconfigured endpoint +Not a method you choose — a way for the user to hand you a pre-set endpoint. +If `BU_CDP_WS` (or its alias `BU_CDP_URL`) is set in the environment, `session.connect()` with no args connects to that endpoint directly. Explicit `{ wsUrl }` / `{ profileDir }` calls ignore the env var. ## Attaching to a target - After `connect()`, attach to a page target before driving the browser: ```js const targets = (await session.Target.getTargets({})).targetInfos +// Pick the first non-internal tab if none was specified. const page = targets.find(t => t.type === "page" && !t.url.startsWith("chrome://")) await session.use(page.targetId) ``` -`session.use(targetId)` makes subsequent calls auto-route to that target. Switch with another `session.use`. - ## Driving a page - -Domain methods follow `session..(params)` and return Promises. The full surface (652 commands) is the Chrome DevTools Protocol — see https://chromedevtools.github.io/devtools-protocol/. +Domain methods follow `session..(params)` and return Promises. +The full surface (652 commands) is the Chrome DevTools Protocol. +`Object.keys(session.domains).sort()` lists every CDP domain bound on the session; `Object.keys(session.Page).sort()` lists the methods for `Page`. +For unknown param shapes, call with `{}` and inspect the thrown `CdpError` — `.data` carries the missing-field detail. Common moves: @@ -148,26 +137,14 @@ await session.Page.captureScreenshot({ format: "png" }) // for the rare case you want to process it programmatically. ``` -## Switching browsers mid-session - -You own the connection. To swap: - -```js -await session.close() -await session.connect({ /* new opts */ }) -``` - -Cloud cleanup is your responsibility — if you're done with a cloud browser, stop it explicitly (see `{{SKILLS_DIR}}/cloud-browser.md` for the PATCH call). Otherwise it persists until your API quota or BU's idle timer reclaims it. - -## Reusing code: write to the workspace, import from snippet - -The agent-workspace is per-project: `./.bcode/agent-workspace/`. It's a directory of `.ts` files you own and edit with the standard `write`/`edit` tools — flat for small projects, organized into subdirectories (`scrape/`, `auth/`, `cloud/`, …) when you accumulate enough scripts that grouping helps. Imports work at any depth; pick whatever layout makes the project easiest to navigate. Saved scripts travel with the project (`.bcode/agent-workspace/` is committed by default), so `git clone && cd && bcode` shares them. - -Write once, import many: +## Reusing code +The agent-workspace is per-project: `./.bcode/agent-workspace/`. +Use this to write memory files, scripts, and helper functions. +Imports work at any depth; pick whatever layout makes the project easiest to navigate. ```ts // ./.bcode/agent-workspace/scrape_titles.ts (you write this with the `write` tool) -export async function run(session: any, urls: string[]) { +export async function scrapeTitles(session: any, urls: string[]) { const titles: string[] = [] await session.Page.enable() for (const url of urls) { @@ -181,26 +158,18 @@ export async function run(session: any, urls: string[]) { ``` ```js -// later snippet (browser_execute call) — construct the absolute path from cwd. +// later snippet const path = process.cwd() + "/.bcode/agent-workspace/scrape_titles.ts" +// Cache-bust (`?t=${Date.now()}`) is your responsibility: without it, edits to the file won't be picked up. const m = await import(`${path}?t=${Date.now()}`) -const titles = await m.run(session, ["https://example.com", "https://example.org"]) +const titles = await m.scrapeTitles(session, ["https://example.com", "https://example.org"]) console.log(JSON.stringify(titles)) ``` -Cache-bust (`?t=${Date.now()}`) is your responsibility: without it, edits to the file won't be picked up. The pattern is the same for any depth — save to `subdir/foo.ts`, import by full path. - ## Guardrails +- Top-level `import` statements inside the snippet body are not allowed. Use `await import(...)` instead. +- No CPU-bound infinite loops without `await` — they ignore the timeout. Insert `await new Promise(r => setTimeout(r, 0))` to yield. -- **Top-level `import`** statements inside the snippet body are **not allowed** — the snippet is wrapped in an async function. Use `await import(...)` instead. -- **No CPU-bound infinite loops without `await`.** JS Promises aren't preemptively cancellable; a `for (;;)` without an `await` yield-point will not respect the timeout. Insert `await new Promise(r => setTimeout(r, 0))` if you genuinely need a long compute loop. -- `console.log`, `console.error`, `console.warn`, `console.info`, `console.debug` are all captured and streamed to the user. Treat them as your stdout. Other `console.*` methods (`table`, `dir`, `trace`, …) work but write to bcode's stderr without being captured into the tool result. +## Console +- `console.log`, `console.error`, `console.warn`, `console.info`, `console.debug` are all captured and streamed to the user. Treat them as your stdout. Other `console.*` methods write to bcode's stderr without being captured into the tool result. - The snippet's `return` value is captured separately (JSON-serialized when possible). - -## When something doesn't work - -- **`session.Page.navigate` hangs forever** → the page is showing a native dialog. Use `session.Page.handleJavaScriptDialog({ accept: true })` to dismiss. -- **Selectors don't find elements that you can see** → likely an iframe or shadow DOM. Walk frames via `Page.getFrameTree` / `Target.attachToTarget`, or pierce shadow roots with `element.shadowRoot.querySelector(...)`. -- **Actions silently no-op** → the page is mid-load. After `Page.navigate`, await `session.waitFor("Page.loadEventFired")` before driving inputs. -- **Connection refused, 403, or `WS closed before open` on connect()** → see the Way 1 failure-mode list above. Most often: the `chrome://inspect/#remote-debugging` checkbox isn't ticked, or the Chrome 144+ "Allow remote debugging?" popup hasn't been clicked. Pass `{ profileDir, timeoutMs: 30000 }` (Way 1, user's profile) to wait up to 30s for the click, or fall back to Way 2. -- **Cloud `connect()` fails after a successful provision** → check that `cdp_url` came back in the POST response; some BU regions return `cdpUrl` (camelCase) — accept both. See `{{SKILLS_DIR}}/cloud-browser.md`. diff --git a/packages/bcode-browser/skills/cloud-browser.md b/packages/bcode-browser/skills/cloud-browser.md deleted file mode 100644 index b922afe14..000000000 --- a/packages/bcode-browser/skills/cloud-browser.md +++ /dev/null @@ -1,154 +0,0 @@ -# cloud-browser.md — Browser Use cloud browser via raw HTTP - -When browser-execute-guide.md sent you here, the user wants a Browser Use cloud browser (Way 3): a clean isolated Chrome on BU's infrastructure, optionally with a geo-located proxy or a synced profile, with a `liveUrl` the user can open to watch you work. - -There is no `browser_open_cloud` tool. You write the HTTP calls yourself in a `browser_execute` snippet. This keeps the connection model symmetric (you also call `session.connect()` for local browsers in Way 1 and Way 2) and gives you full control over the BU API surface — provision, stop, swap profiles, change proxies, anything BU exposes. - -## Authentication - -Every call to `https://api.browser-use.com/...` requires an API key in the `X-Browser-Use-API-Key` header. The key lives in the environment as `BROWSER_USE_API_KEY` (the user is expected to `export` it before launching bcode, the same way they'd set `AWS_BEDROCK_ACCESS_KEY_ID` for an LLM provider). - -Read it once, fail clearly if missing: - -```js -const apiKey = process.env.BROWSER_USE_API_KEY -if (!apiKey) { - throw new Error("BROWSER_USE_API_KEY is not set. Get a key at https://browser-use.com and re-launch bcode with the key exported.") -} -``` - -## Provision - -```js -const r = await fetch("https://api.browser-use.com/api/v3/browsers", { - method: "POST", - headers: { "X-Browser-Use-API-Key": apiKey, "Content-Type": "application/json" }, - body: JSON.stringify({ - // All optional — omit for an ephemeral fresh-profile browser with no proxy. - // profileId: "", // attach an existing BU profile - // proxyCountryCode: "us", // geo-located proxy (default "us"; null disables) - }), -}) -// Successful provision returns 201, not 200 — `!r.ok` covers both. -if (!r.ok) throw new Error(`provision failed: ${r.status} ${await r.text()}`) -const body = await r.json() -const { id, cdpUrl, liveUrl } = body -``` - -The response carries more than the three fields above. Other fields you may want: - -- `timeoutAt` — ISO timestamp when BU will auto-reclaim the browser. Use it to schedule a `stop` or warn the user before quota expiry. -- `recordingUrl` — playback URL for the session recording. Surface this to the user when handing back the run. -- `status`, `startedAt`, `finishedAt`, `proxyUsedMb`, `proxyCost`, `browserCost`, `agentSessionId` — observability fields, not needed to drive the browser. - -The `liveUrl` is a viewer URL the user can open in their own browser to watch the cloud browser's pixels. **Print it to console** so the user can see it: - -```js -console.log("Cloud browser ready. Live view:", liveUrl) -``` - -If the user later asks for the link in a clickable form (e.g. "give me the live url"), surface it in your reply as a markdown link — `[Live view]()` — which the TUI renders clickable. Tool stdout is not auto-linkified, but markdown in your assistant message is. - -Stash `id` somewhere (a `globalThis.cloudBrowserId = id` is fine, or the snippet's return value) — you need it to stop the browser later. - -## Connect - -The `cdpUrl` from BU is an HTTPS discovery endpoint (e.g. `https://cdpN.browser-use.com`), the same shape Chrome's `:9222` exposes locally, **not** a WebSocket URL. Resolve it via `/json/version`. The resolved URL is `wss://...` (secure WebSocket); `session.connect({ wsUrl })` handles `ws://` and `wss://` transparently, so the local-vs-cloud flow is identical from the snippet's perspective. - -```js -const ver = await fetch(`${cdpUrl}/json/version`).then(r => r.json()) -await session.connect({ wsUrl: ver.webSocketDebuggerUrl }) - -const targets = (await session.Target.getTargets({})).targetInfos -const page = targets.find(t => t.type === "page") -await session.use(page.targetId) -``` - -From here on `session..(...)` drives the cloud browser exactly like a local Chrome. - -## Stop - -When you're done, stop the browser. BU's quotas and idle reclaim will eventually clean it up if you forget, but explicit stop is faster and frees the slot: - -```js -await fetch(`https://api.browser-use.com/api/v3/browsers/${id}`, { - method: "PATCH", - headers: { "X-Browser-Use-API-Key": apiKey, "Content-Type": "application/json" }, - body: JSON.stringify({ action: "stop" }), -}) -``` - -If you'll do this often within one project, save it as `./.bcode/agent-workspace/cloud.ts` (see browser-execute-guide.md "Reusing code") and import it from later snippets. - -## Swap - -To switch from one cloud browser to another (e.g. different proxy country) within the same opencode session: - -```js -// Stop the old one first. -await fetch(`https://api.browser-use.com/api/v3/browsers/${oldId}`, { - method: "PATCH", - headers: { "X-Browser-Use-API-Key": apiKey, "Content-Type": "application/json" }, - body: JSON.stringify({ action: "stop" }), -}) - -// Close the local Session's WS so connect() opens a fresh one. -await session.close() - -// Provision and connect to the new one (provision block above, with new params). -``` - -## A reusable workspace helper - -Recommended pattern for any project that uses cloud browsers more than once: - -```ts -// ./.bcode/agent-workspace/cloud.ts -const API = "https://api.browser-use.com/api/v3/browsers" -const key = () => { - const k = process.env.BROWSER_USE_API_KEY - if (!k) throw new Error("BROWSER_USE_API_KEY is not set.") - return k -} - -export async function provision(opts: { profileId?: string; proxyCountryCode?: string } = {}) { - const r = await fetch(API, { - method: "POST", - headers: { "X-Browser-Use-API-Key": key(), "Content-Type": "application/json" }, - body: JSON.stringify({ - profileId: opts.profileId, - proxyCountryCode: opts.proxyCountryCode, - }), - }) - if (!r.ok) throw new Error(`provision failed: ${r.status} ${await r.text()}`) - const body = (await r.json()) as { id: string; cdpUrl: string; liveUrl: string } - // BU's cdpUrl is an HTTP discovery endpoint; resolve to the WS URL once - // here so callers can pass `wsUrl` straight to `session.connect`. - const ver = await fetch(`${body.cdpUrl}/json/version`).then(r => r.json()) - return { id: body.id, wsUrl: ver.webSocketDebuggerUrl as string, liveUrl: body.liveUrl } -} - -export async function stop(id: string) { - const r = await fetch(`${API}/${id}`, { - method: "PATCH", - headers: { "X-Browser-Use-API-Key": key(), "Content-Type": "application/json" }, - body: JSON.stringify({ action: "stop" }), - }) - if (!r.ok) throw new Error(`stop failed: ${r.status} ${await r.text()}`) -} -``` - -Then any snippet does: - -```js -const { provision, stop } = await import(`${process.cwd()}/.bcode/agent-workspace/cloud.ts?t=${Date.now()}`) -const { id, wsUrl, liveUrl } = await provision({ proxyCountryCode: "us" }) -console.log("Live view:", liveUrl) -await session.connect({ wsUrl }) -// ... do work ... -await stop(id) -``` - -## Other BU API endpoints - -The full BU cloud API (profile sync, profile list, custom proxies, recording on/off, etc.) is documented at https://browser-use.com — `read` the docs and write the matching `fetch` call. Anything BU's API exposes is reachable from a snippet without bcode-side wrapper code. diff --git a/packages/bcode-browser/src/index.ts b/packages/bcode-browser/src/index.ts index cdb1067d1..ac70f410e 100644 --- a/packages/bcode-browser/src/index.ts +++ b/packages/bcode-browser/src/index.ts @@ -11,12 +11,12 @@ // src/browser-execute.ts — in-process JS-eval browser_execute body // src/session-store.ts — per-opencode-session CDP Session map // src/skills.ts — runtime resolver for embedded skills -// skills/ — browser-execute-guide.md + cloud-browser.md (embedded into binary) +// skills/ — browser-execute-guide.md (embedded into binary) // // Cloud browser provisioning is intentionally NOT a separate Level-1 -// surface. The agent reads `skills/cloud-browser.md` and writes the -// fetch+connect snippet itself, matching how local-browser connect works -// (snippet-side, not tool-side). Decisions trail in +// surface. The agent reads Way 3 of `skills/browser-execute-guide.md` and +// writes the fetch+connect snippet itself, matching how local-browser +// connect works (snippet-side, not tool-side). Decisions trail in // `memory/browsercode/decisions.md` §3.4. // // Planned (per ROADMAP phase): diff --git a/packages/bcode-browser/src/skills.ts b/packages/bcode-browser/src/skills.ts index 5b8c174f8..946b21fda 100644 --- a/packages/bcode-browser/src/skills.ts +++ b/packages/bcode-browser/src/skills.ts @@ -1,9 +1,8 @@ // Skills directory resolver. // // Materializes the skills tree to `/skills/` and substitutes the -// `{{SKILLS_DIR}}` placeholder in every file with that absolute path so -// cross-references inside browser-execute-guide.md (``read `{{SKILLS_DIR}}/cloud-browser.md` ``) -// point at a real location. +// `{{SKILLS_DIR}}` placeholder in every file with that absolute path so any +// cross-references inside the skill files point at a real location. // // Compiled launches (the user-facing path) read a one-line sentinel at // `/.bcode-build` recording `:`. When it matches diff --git a/packages/bcode-browser/test/skills.test.ts b/packages/bcode-browser/test/skills.test.ts index 94dff77c8..80a852020 100644 --- a/packages/bcode-browser/test/skills.test.ts +++ b/packages/bcode-browser/test/skills.test.ts @@ -16,7 +16,7 @@ test("resolveSkillsDir materializes skills with {{SKILLS_DIR}} substituted", asy expect(dir).toBe(path.join(dataDir, "skills")) const browser = (await fs.readFile(path.join(dir, "browser-execute-guide.md"), "utf8")).replaceAll("\\", "/") expect(browser).not.toContain("{{SKILLS_DIR}}") - expect(browser).toContain(`${dir.replaceAll("\\", "/")}/cloud-browser.md`) + expect(browser).toContain(`${dir.replaceAll("\\", "/")}/`) } finally { await fs.rm(dataDir, { recursive: true, force: true }) } diff --git a/packages/opencode/src/tool/browser-execute.ts b/packages/opencode/src/tool/browser-execute.ts index d00cf09f0..65adb7566 100644 --- a/packages/opencode/src/tool/browser-execute.ts +++ b/packages/opencode/src/tool/browser-execute.ts @@ -25,10 +25,10 @@ export const BrowserExecuteTool = Tool.define( Effect.gen(function* () { const impl = yield* BrowserExecute.make(Global.Path.data) return { - // Substitute the resolved skills path so browser-execute-guide.md / cloud-browser.md - // references in the description point at concrete locations. Workspace - // is per-project and agent-discoverable from cwd, so it's not - // substituted here. + // Substitute the resolved skills path so `{{SKILLS_DIR}}` references in + // the description point at a concrete location. Workspace is + // per-project and agent-discoverable from cwd, so it's not substituted + // here. description: DESCRIPTION.replaceAll("{{SKILLS_DIR}}", impl.skillsDir), parameters: impl.parameters, execute: (args: Schema.Schema.Type, ctx: Tool.Context) => From 2aa3e2ae7adc5b4bbdb23afc959944d747f9d9fb Mon Sep 17 00:00:00 2001 From: bcode Date: Sun, 10 May 2026 23:00:22 -0700 Subject: [PATCH 4/4] browser_execute: standardize parameter descriptions and tool prose Aligns annotation style with the shell tool: code mentions the snippet model and points at the guide; timeout uses the canonical 'Optional X (default Y, max Z)' format; description swaps 'summary' for 'description' to match the field name. Tool top-line tightened and a Returns bullet added in browser-execute.txt. --- packages/bcode-browser/src/browser-execute.ts | 6 +++--- packages/opencode/src/tool/browser-execute.txt | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/bcode-browser/src/browser-execute.ts b/packages/bcode-browser/src/browser-execute.ts index 397999745..51c318f17 100644 --- a/packages/bcode-browser/src/browser-execute.ts +++ b/packages/bcode-browser/src/browser-execute.ts @@ -56,14 +56,14 @@ const MAX_TIMEOUT_MS = 10 * 60 * 1000 export const parameters = Schema.Struct({ code: Schema.String.annotate({ description: - "JavaScript source. Wrapped in an async function with `session` (CDP Session) and `console` (per-call capture; same `log/error/warn/info` API) bound.", + "The JavaScript snippet to execute. `session` (CDP Session) and `console` are in scope; see browser-execute-guide.md for the snippet model.", }), timeout: Schema.optional(Schema.Number).annotate({ - description: `Timeout in milliseconds. Default ${DEFAULT_TIMEOUT_MS}, max ${MAX_TIMEOUT_MS}.`, + description: `Optional timeout in milliseconds (default ${DEFAULT_TIMEOUT_MS}, max ${MAX_TIMEOUT_MS})`, }), description: Schema.String.annotate({ description: - "Clear, concise summary of what this snippet does in 3-7 words. Examples:\nInput: code that connects to local Chrome\nOutput: Connect to local Chrome\n\nInput: scrape product titles from current page\nOutput: Scrape product titles\n\nInput: capture a screenshot of the homepage\nOutput: Screenshot homepage", + "Clear, concise description of what this snippet does in 3-7 words. Examples:\nInput: code that connects to local Chrome\nOutput: Connect to local Chrome\n\nInput: scrape product titles from current page\nOutput: Scrape product titles\n\nInput: capture a screenshot of the homepage\nOutput: Screenshot homepage", }), }) diff --git a/packages/opencode/src/tool/browser-execute.txt b/packages/opencode/src/tool/browser-execute.txt index ce7b9067e..758aadd35 100644 --- a/packages/opencode/src/tool/browser-execute.txt +++ b/packages/opencode/src/tool/browser-execute.txt @@ -1,7 +1,7 @@ -Executes JavaScript in a connected browser. +Executes JavaScript in a browser via CDP. Usage: - Use this tool whenever the task requires driving a real browser. - Use this tool to read webpages that block the webfetch tool. - IMPORTANT: you MUST use the Read tool first to read `{{SKILLS_DIR}}/browser-execute-guide.md`. This tool will fail if you did not read these directions first. -- Always pass a clear, precise, and low verbosity `description` of what the JS snippet does. +- Returns console output from the snippet; screenshots taken attach automatically as images. \ No newline at end of file