From 961d4aae285f08dca73505157e695771a29d1220 Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 01:55:02 -0700 Subject: [PATCH 01/31] chat: deterministic bash command summarizer for friendly tool labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds summarizeBashCommand and summarizeBrowserHarnessJs — pure-regex pattern matchers that map common shell commands and CDP calls to plain-English labels ("Read AGENTS.md", "Visited https://x.com", "Connected to browser") for the chat-view tool pills. Patterns cover the cat/sed/head/tail/ls/find/grep family, git, package managers, mkdir/mv/cp/rm, curl/wget, cd-prefix stripping, and the browser-harness-js JS payload (Page.navigate / captureScreenshot / Input.* / DOM.* / connectToAssignedTarget). Only maps when the binary or method alone unambiguously determines the action; anything ambiguous falls through to null so the caller keeps the raw "Ran command" UI. Includes a corpus test that replays every distinct bash call from the local sessions.db through the summarizer to measure coverage on real agent output. --- app/src/renderer/hub/chat/toolLabels.ts | 540 ++++++++++++++++++ app/tests/unit/hub/_corpus.spec.ts | 38 ++ .../unit/hub/summarizeBashCommand.spec.ts | 226 ++++++++ 3 files changed, 804 insertions(+) create mode 100644 app/src/renderer/hub/chat/toolLabels.ts create mode 100644 app/tests/unit/hub/_corpus.spec.ts create mode 100644 app/tests/unit/hub/summarizeBashCommand.spec.ts diff --git a/app/src/renderer/hub/chat/toolLabels.ts b/app/src/renderer/hub/chat/toolLabels.ts new file mode 100644 index 00000000..f89879b3 --- /dev/null +++ b/app/src/renderer/hub/chat/toolLabels.ts @@ -0,0 +1,540 @@ +/** + * Tool name → friendly label + primary-parameter extractor. + * + * Mirrors browser_use_cloud/frontend/src/lib/experimental/utils/tool-labels.ts + * so chat output reads consistently across the Reagan stack. + * + * - getToolType: normalizes raw tool names (bash, execute_command, shell, …) + * to a small canonical set used for icon/renderer dispatch. + * - getToolLabel(name, status): returns "Running command" while in-flight, + * "Ran command" once finished. Falls back to Title Case of raw name. + * - getToolDisplayValue(name, args): one-line primary parameter for the + * collapsed pill. + */ + +export type ToolCallType = + | 'bash' | 'read_file' | 'create_file' | 'edit_file' + | 'glob' | 'grep' | 'python' | 'browse' | 'click' + | 'scroll' | 'search' | 'type' | 'js' | 'send_keys' + | 'go_back' | 'wait' | 'switch_tab' | 'close_tab' + | 'upload' | 'dropdown' | 'move' | 'todo' + | 'integration_search' | 'integration' | 'unknown'; + +export type ToolStatus = 'pending' | 'running' | 'completed' | 'error'; + +const TOOL_TYPES: Record = { + bash: 'bash', + bash_output: 'bash', + kill_bash: 'bash', + execute_command: 'bash', + run_command: 'bash', + command_execution: 'bash', + shell: 'bash', + read: 'read_file', + read_file: 'read_file', + write: 'create_file', + write_file: 'create_file', + create_file: 'create_file', + edit: 'edit_file', + edit_file: 'edit_file', + multi_edit: 'edit_file', + multiedit: 'edit_file', + replace_in_file: 'edit_file', + glob: 'glob', + glob_tool: 'glob', + grep: 'grep', + python: 'python', + browser_navigate: 'browse', + navigate: 'browse', + go_to_url: 'browse', + webfetch: 'browse', + fetch: 'browse', + browser_click: 'click', + click: 'click', + click_element: 'click', + browser_scroll: 'scroll', + scroll_down: 'scroll', + scroll_up: 'scroll', + scroll_to_bottom: 'scroll', + scroll_to_top: 'scroll', + web_search: 'search', + websearch: 'search', + browser_search: 'search', + search: 'search', + browser_input: 'type', + input_text: 'type', + type: 'type', + browser_evaluate: 'js', + browser_send_keys: 'send_keys', + browser_go_back: 'go_back', + go_back: 'go_back', + browser_wait: 'wait', + wait: 'wait', + browser_switch_tab: 'switch_tab', + browser_close_tab: 'close_tab', + browser_upload_file: 'upload', + file_upload: 'upload', + browser_select_dropdown: 'dropdown', + browser_dropdown_options: 'dropdown', + browser_find_text: 'search', + move_mouse: 'move', + todo_write: 'todo', +}; + +export function getToolType(toolName: string | undefined): ToolCallType { + if (!toolName) return 'unknown'; + return TOOL_TYPES[toolName.toLowerCase()] ?? 'unknown'; +} + +const TOOL_LABELS: Record = { + bash: { active: 'Running command', completed: 'Ran command' }, + command_execution: { active: 'Running command', completed: 'Ran command' }, + shell: { active: 'Running command', completed: 'Ran command' }, + execute_command: { active: 'Running command', completed: 'Ran command' }, + run_command: { active: 'Running command', completed: 'Ran command' }, + bash_output: { active: 'Getting output', completed: 'Got output' }, + kill_bash: { active: 'Killing process', completed: 'Killed process' }, + read: { active: 'Reading file', completed: 'Read file' }, + read_file: { active: 'Reading file', completed: 'Read file' }, + write: { active: 'Writing file', completed: 'Wrote file' }, + write_file: { active: 'Writing file', completed: 'Wrote file' }, + create_file: { active: 'Creating file', completed: 'Created file' }, + edit: { active: 'Editing file', completed: 'Edited file' }, + edit_file: { active: 'Editing file', completed: 'Edited file' }, + multi_edit: { active: 'Editing file', completed: 'Edited file' }, + multiedit: { active: 'Editing file', completed: 'Edited file' }, + glob: { active: 'Finding files', completed: 'Found files' }, + grep: { active: 'Searching files', completed: 'Searched files' }, + python: { active: 'Running Python', completed: 'Ran Python' }, + navigate: { active: 'Opening page', completed: 'Opened page' }, + browser_navigate: { active: 'Opening page', completed: 'Opened page' }, + go_to_url: { active: 'Opening page', completed: 'Opened page' }, + webfetch: { active: 'Fetching page', completed: 'Fetched page' }, + fetch: { active: 'Fetching page', completed: 'Fetched page' }, + click: { active: 'Clicking', completed: 'Clicked' }, + browser_click: { active: 'Clicking', completed: 'Clicked' }, + click_element: { active: 'Clicking', completed: 'Clicked' }, + type: { active: 'Typing', completed: 'Typed' }, + input_text: { active: 'Typing', completed: 'Typed' }, + browser_input: { active: 'Typing', completed: 'Typed' }, + search: { active: 'Searching', completed: 'Searched' }, + web_search: { active: 'Searching web', completed: 'Searched web' }, + websearch: { active: 'Searching web', completed: 'Searched web' }, + screenshot: { active: 'Taking screenshot', completed: 'Took screenshot' }, + todo_write: { active: 'Updating todos', completed: 'Updated todos' }, + wait: { active: 'Waiting', completed: 'Waited' }, +}; + +export function getToolLabel(toolName: string | undefined, status: ToolStatus = 'pending'): string { + if (!toolName) return 'Unknown action'; + const labels = TOOL_LABELS[toolName.toLowerCase()]; + if (!labels) { + return toolName + .split(/[_\s]+/) + .map((w) => w.charAt(0).toUpperCase() + w.slice(1)) + .join(' '); + } + const done = status === 'completed' || status === 'error'; + return done ? labels.completed : labels.active; +} + +function tryParseJSON(str: string): Record | null { + try { + const parsed = JSON.parse(str); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) return parsed as Record; + } catch { /* ignore */ } + return null; +} + +function asString(v: unknown): string | undefined { + return typeof v === 'string' && v.length > 0 ? v : undefined; +} + +function truncate(s: string, n: number): string { + return s.length > n ? s.slice(0, n) + '…' : s; +} + +/** + * Pull a one-line "primary parameter" out of a tool_call args payload. + * Handles flat (Claude) and nested (Codex `{id, type, command_execution: {command}}`) + * shapes. Returns '' when nothing useful surfaces. + */ +export function getToolDisplayValue(toolName: string | undefined, argsContent: string): string { + const parsed = tryParseJSON(argsContent); + const args: Record | null = (() => { + if (!parsed) return null; + // Unwrap Codex-style nested {id, type, command_execution: {...}} + for (const v of Object.values(parsed)) { + if (v && typeof v === 'object' && !Array.isArray(v)) { + const inner = v as Record; + if ('command' in inner || 'url' in inner || 'file_path' in inner) return inner; + } + } + return parsed; + })(); + + const type = getToolType(toolName); + + if (args) { + switch (type) { + case 'browse': + return asString(args.url) ?? ''; + case 'search': + return asString(args.query) ?? asString(args.q) ?? ''; + case 'read_file': + case 'create_file': + case 'edit_file': + return asString(args.file_path) ?? asString(args.path) ?? asString(args.filename) ?? ''; + case 'bash': { + const cmd = asString(args.command) ?? asString(args.cmd); + return cmd ? truncate(cmd.split('\n')[0], 80) : ''; + } + case 'glob': + case 'grep': + return asString(args.pattern) ?? ''; + case 'type': + return asString(args.text) ?? ''; + case 'js': { + const code = asString(args.script) ?? asString(args.code) ?? asString(args.expression); + return code ? truncate(code, 60) : ''; + } + case 'send_keys': + return asString(args.keys) ?? ''; + default: { + // Generic fallback: first string-valued field that's short enough. + for (const v of Object.values(args)) { + const s = asString(v); + if (s && s.length < 200) return truncate(s.split('\n')[0], 80); + } + return ''; + } + } + } + + // Not JSON — argsContent IS the primary value. + return truncate(argsContent.split('\n')[0], 80); +} + +/** + * Parse bash backend wrappers. Codex (and browser-harness) wrap shell results + * as { stdout, stderr, exit_code, status, duration_ms, aggregated_output, ... }. + * Returns the human-meaningful output, an error flag, and the duration if + * present. Falls back to raw text when the wrapper shape isn't recognized. + */ +export interface BashResult { + output: string; + isError: boolean; + durationMs?: number; +} + +/** + * Decode a JSON-encoded string field manually — handles \n, \t, \", \\ and + * \uXXXX. Used as a fallback when JSON.parse fails because the codex adapter + * sliced the result to 2000 chars and broke the JSON tail. + */ +function decodeJsonString(s: string): string { + return s + .replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16))) + .replace(/\\n/g, '\n') + .replace(/\\r/g, '\r') + .replace(/\\t/g, '\t') + .replace(/\\"/g, '"') + .replace(/\\\\/g, '\\'); +} + +/** + * Extract a single string field from a JSON-ish blob even when truncated. + * Looks for `"":"..."` and returns the decoded inner string. Returns + * undefined if no match. Safe against unterminated strings (greedy stop on + * the next unescaped `"` followed by `,` `}` or end). + */ +function extractJsonField(raw: string, field: string): string | undefined { + const re = new RegExp(`"${field}"\\s*:\\s*"((?:\\\\.|[^"\\\\])*)`, 'm'); + const m = raw.match(re); + if (!m) return undefined; + return decodeJsonString(m[1]); +} + +/** + * Friendly summary of a bash command for the collapsed tool pill. + * + * Strips the surrounding shell wrapper (e.g. `/bin/zsh -lc "..."`) and + * pattern-matches the inner command against ~20 common shapes (cat, sed, ls, + * git, grep, curl, npm, heredocs, …). Returns `null` when nothing matches so + * callers can fall back to the default "Ran command" + raw-cmd display. + * + * Deterministic and offline. No semantic interpretation of filenames — `cat + * AGENTS.md` becomes "Read AGENTS.md", not "Read agent instructions". + */ +export interface BashSummary { + active: string; + completed: string; + value: string; +} + +function basename(p: string): string { + const stripped = p.replace(/['"]/g, ''); + const idx = stripped.lastIndexOf('/'); + return idx === -1 ? stripped : stripped.slice(idx + 1) || stripped; +} + +/** + * Unwrap `/bin/zsh -lc ""` / `bash -c ''` / etc. to surface the + * actual command. Returns the original string if no wrapper is detected. + * + * Implemented without a backreferenced regex on the inner body because real + * sessions.db payloads contain heredocs with embedded quote-switching tricks + * (`"'`...`'"`) that defeat backreference matching. Instead: locate the prefix + * + opening quote, then require the final character to be the same quote. + */ +export function stripShellWrapper(cmd: string): string { + const trimmed = cmd.trim(); + const prefix = trimmed.match(/^(?:\S*\/)?(?:zsh|bash|sh|dash)\s+-[lc]+\s+(['"])/); + if (!prefix) return trimmed; + const openIdx = prefix[0].length - 1; + const quote = prefix[1]; + const body = trimmed.slice(openIdx + 1); + // Strict: body ends with the same quote that opened — strip exactly one. + if (body.endsWith(quote)) return body.slice(0, -1).trim(); + // Permissive: real sessions.db payloads sometimes have malformed wrappers + // (e.g. heredoc body containing `EOF'` before the closing `"`). Strip any + // single trailing quote rather than losing the whole command. + return body.replace(/['"]\s*$/, '').trim(); +} + +type Pattern = { re: RegExp; build: (m: RegExpMatchArray) => BashSummary }; + +/** + * Pattern-match the JS body of a `browser-harness-js ''` (or heredoc) + * invocation. The harness API is well-defined in + * `app/src/main/hl/stock/browser-harness-js/SKILL.md` — each match below + * corresponds to a single, unambiguous CDP method. Anything not matched + * returns null so the caller can fall through rather than guess. + */ +// Rendering convention for these summaries: +// • The `value` slot is shown muted/secondary in the chat pill — reserve it +// for SPECIFIC IDENTIFIERS (URLs, filenames, branches, search patterns). +// • When the action's target is a generic noun ("browser", "page", "tests"), +// fold the noun into the bold label and leave value empty. Otherwise the +// chip reads awkwardly: bold "Connected to" + muted "the browser". +// Ordered most-specific to least. Scripts often bundle `connect + navigate + +// evaluate` — when that happens the *user-visible* action (navigate, click, +// screenshot) should label the pill, not the scaffolding (`connect`). So +// connect/auto-detect sit at the bottom. +const BROWSER_JS_PATTERNS: Pattern[] = [ + // page.goto(URL) — Puppeteer-style call. Not in our harness API, but agents + // try it routinely and the intent (navigate) is unambiguous from the call + // shape. Treat as a navigate. + { re: /\bpage\.goto\s*\(\s*['"`]([^'"`]+)['"`]/, build: (m) => ({ active: 'Visiting', completed: 'Visited', value: m[1] }) }, + // Input — explicit user-driven actions + { re: /\bInput\.dispatchMouseEvent\b/, build: () => ({ active: 'Clicking on page', completed: 'Clicked on page', value: '' }) }, + { re: /\bInput\.insertText\b/, build: () => ({ active: 'Typing on page', completed: 'Typed on page', value: '' }) }, + { re: /\bInput\.dispatchKeyEvent\b/, build: () => ({ active: 'Pressing key', completed: 'Pressed key', value: '' }) }, + // Navigation — surface URL as the specific value when present + { re: /\bPage\.navigate\b[\s\S]*?url\s*:\s*['"`]([^'"`]+)['"`]/, build: (m) => ({ active: 'Visiting', completed: 'Visited', value: m[1] }) }, + { re: /\bPage\.navigate\b/, build: () => ({ active: 'Visiting page', completed: 'Visited page', value: '' }) }, + { re: /\bPage\.reload\b/, build: () => ({ active: 'Reloading page', completed: 'Reloaded page', value: '' }) }, + // Captures + { re: /\bPage\.captureScreenshot\b/, build: () => ({ active: 'Taking screenshot', completed: 'Took screenshot', value: '' }) }, + { re: /\bPage\.printToPDF\b/, build: () => ({ active: 'Saving page as PDF', completed: 'Saved page as PDF', value: '' }) }, + // DOM inspection + { re: /\bDOM\.(?:querySelector|getDocument|describeNode|getAttributes|getOuterHTML)\b/, build: () => ({ active: 'Inspecting page', completed: 'Inspected page', value: '' }) }, + // Runtime.evaluate — only mapped when the expression reads well-known + // document/location properties (the agent's bread-and-butter state read). + // Anything else falls through — arbitrary JS, intent unknowable. + { + re: /\bRuntime\.evaluate\b[\s\S]*?expression\s*:\s*['"`][\s\S]*?(?:document\.title|document\.body|document\.readyState|location\.href|location\.host)/, + build: () => ({ active: 'Looking at page', completed: 'Looked at page', value: '' }), + }, + // Tabs / targets + { re: /\blistPageTargets\s*\(/, build: () => ({ active: 'Listing open tabs', completed: 'Listed open tabs', value: '' }) }, + { re: /\bsession\.use\s*\(/, build: () => ({ active: 'Switching tab', completed: 'Switched tab', value: '' }) }, + { re: /\bTarget\.closeTarget\b/, build: () => ({ active: 'Closing tab', completed: 'Closed tab', value: '' }) }, + { re: /\bdetectBrowsers\s*\(/, build: () => ({ active: 'Looking for open browsers', completed: 'Looked for open browsers', value: '' }) }, + // Connection — scaffolding, lowest priority + { re: /\bconnectToAssignedTarget\s*\(/, build: () => ({ active: 'Connecting to browser', completed: 'Connected to browser', value: '' }) }, + { re: /\bsession\.connect\s*\(/, build: () => ({ active: 'Connecting to browser', completed: 'Connected to browser', value: '' }) }, +]; + +function summarizeBrowserHarnessJs(code: string): BashSummary | null { + for (const { re, build } of BROWSER_JS_PATTERNS) { + const m = code.match(re); + if (m) return build(m); + } + return null; +} + +/** + * Extract the JS payload from a `browser-harness-js` invocation, handling + * both inline (`browser-harness-js 'CODE'`) and heredoc (`< ({ active: 'Reading', completed: 'Read', value: basename(m[1]) }), + }, + { + re: /^(?:cat|less|more|bat)\s+(\S+)\s*$/, + build: (m) => ({ active: 'Reading', completed: 'Read', value: basename(m[1]) }), + }, + { + re: /^(?:head|tail)(?:\s+-n\s+\d+)?\s+(\S+)\s*$/, + build: (m) => ({ active: 'Reading', completed: 'Read', value: basename(m[1]) }), + }, + // ls [flags] [path] + { + re: /^ls(?:\s+-\S+)*(?:\s+(\S+))?\s*$/, + build: (m) => m[1] + ? { active: 'Looking at', completed: 'Looked at', value: basename(m[1]) } + : { active: 'Looking at files', completed: 'Looked at files', value: '' }, + }, + // find PATH … + { + re: /^find\s+/, + build: () => ({ active: 'Looking for files', completed: 'Looked for files', value: '' }), + }, + // grep / rg / ag PATTERN + { + re: /^(?:grep|rg|ag)\s+(?:-\S+\s+)*(?:(['"])([^'"]+)\1|(\S+))/, + build: (m) => ({ active: 'Searching for', completed: 'Searched for', value: m[2] ?? m[3] ?? '' }), + }, + // git → plain-English equivalents (generic targets folded into label) + { re: /^git\s+(?:status|diff|log|show)\b/, build: () => ({ active: 'Reviewing recent changes', completed: 'Reviewed recent changes', value: '' }) }, + { re: /^git\s+branch\b/, build: () => ({ active: 'Looking at versions', completed: 'Looked at versions', value: '' }) }, + { re: /^git\s+add\b/, build: () => ({ active: 'Marking changes to save', completed: 'Marked changes to save', value: '' }) }, + { re: /^git\s+commit\b/, build: () => ({ active: 'Saving progress', completed: 'Saved progress', value: '' }) }, + { re: /^git\s+push\b/, build: () => ({ active: 'Sending changes to the cloud', completed: 'Sent changes to the cloud', value: '' }) }, + { re: /^git\s+pull\b/, build: () => ({ active: 'Getting latest changes', completed: 'Got latest changes', value: '' }) }, + { re: /^git\s+checkout\s+(\S+)/, build: (m) => ({ active: 'Switching to', completed: 'Switched to', value: m[1] }) }, + { re: /^git\s+blame\s+(\S+)/, build: (m) => ({ active: 'Looking at history of', completed: 'Looked at history of', value: basename(m[1]) }) }, + { re: /^git\s+stash\b/, build: () => ({ active: 'Setting aside changes', completed: 'Set aside changes', value: '' }) }, + { re: /^git\s+(?:rebase|merge|fetch)\b/, build: () => ({ active: 'Syncing changes', completed: 'Synced changes', value: '' }) }, + // network → "Visited URL" + { re: /^(?:curl|wget|http|httpie)\s+(?:-\S+\s+)*(\S+)/, build: (m) => ({ active: 'Visiting', completed: 'Visited', value: m[1] }) }, + // file ops in plain English + { re: /^mkdir\s+(?:-\S+\s+)*(\S+)/, build: (m) => ({ active: 'Creating folder', completed: 'Created folder', value: basename(m[1]) }) }, + { re: /^touch\s+(\S+)/, build: (m) => ({ active: 'Creating', completed: 'Created', value: basename(m[1]) }) }, + { re: /^mv\s+\S+\s+(\S+)/, build: (m) => ({ active: 'Moving file to', completed: 'Moved file to', value: basename(m[1]) }) }, + { re: /^cp\s+(?:-\S+\s+)*\S+\s+(\S+)/, build: (m) => ({ active: 'Copying file to', completed: 'Copied file to', value: basename(m[1]) }) }, + { re: /^rm\s+(?:-\S+\s+)*(\S+)/, build: (m) => ({ active: 'Deleting', completed: 'Deleted', value: basename(m[1]) }) }, + { re: /^chmod\s+\S+\s+(\S+)/, build: (m) => ({ active: 'Updating access for', completed: 'Updated access for', value: basename(m[1]) }) }, + // redirection → "Saved to FILE" + { re: /^echo\s+.+\s+>>\s+(\S+)/, build: (m) => ({ active: 'Adding to', completed: 'Added to', value: basename(m[1]) }) }, + { re: /^echo\s+.+\s+>\s+(\S+)/, build: (m) => ({ active: 'Saving to', completed: 'Saved to', value: basename(m[1]) }) }, + // package managers + { re: /^(?:npm|yarn|pnpm|bun)\s+(?:install|add|i)\b/, build: () => ({ active: 'Installing tools', completed: 'Installed tools', value: '' }) }, + { re: /^(?:npm|yarn|pnpm|bun)\s+(?:run\s+)?(test|tests)\b/, build: () => ({ active: 'Running tests', completed: 'Ran tests', value: '' }) }, + { re: /^(?:npm|yarn|pnpm|bun)\s+(?:run\s+)?build\b/, build: () => ({ active: 'Building project', completed: 'Built project', value: '' }) }, + { re: /^(?:npm|yarn|pnpm|bun)\s+(?:run\s+)?(lint|typecheck|format)\b/, build: () => ({ active: 'Checking code', completed: 'Checked code', value: '' }) }, + { re: /^(?:npm|yarn|pnpm|bun)\s+run\s+(\S+)/, build: (m) => ({ active: 'Running', completed: 'Ran', value: m[1] }) }, + // process / env / navigation + { re: /^cd\s+(?:"([^"]*)"|'([^']*)'|(\S+))\s*$/, build: (m) => ({ active: 'Changing folder to', completed: 'Changed folder to', value: basename(m[1] ?? m[2] ?? m[3] ?? '') }) }, + { re: /^pwd\b/, build: () => ({ active: 'Checking current folder', completed: 'Checked current folder', value: '' }) }, + { re: /^which\s+(\S+)/, build: (m) => ({ active: 'Finding', completed: 'Found', value: m[1] }) }, + { re: /^env\b/, build: () => ({ active: 'Checking settings', completed: 'Checked settings', value: '' }) }, + // Known scripting binaries — only map when we can be 100% sure of the + // intent from the binary alone. Browser-harness-js calls vary too widely + // (connect / navigate / click / DOM check), so it's intentionally absent + // and falls through to the generic "Ran " path below. + { + re: /^(?:\S*\/)?python3?\b(?:.*-c\b|.*<<)?/, + build: () => ({ active: 'Running Python code', completed: 'Ran Python code', value: '' }), + }, + { + re: /^(?:\S*\/)?node\b(?:.*-e\b|.*<<)?/, + build: () => ({ active: 'Running JavaScript code', completed: 'Ran JavaScript code', value: '' }), + }, + { + re: /^(?:\S*\/)?(?:psql|sqlite3|mysql)\b/, + build: () => ({ active: 'Querying database', completed: 'Queried database', value: '' }), + }, + { + re: /^ssh\b/, + build: () => ({ active: 'Running remote command', completed: 'Ran remote command', value: '' }), + }, + // heredoc → known binaries get plain-English mapping, unknowns fall back to + // the binary name (better than the vague "a script" — at least it's + // distinguishable from other actions). + { + re: /^(\S+)[\s\S]*<<-?\s*['"]?\w+['"]?/, + build: (m) => { + const bin = basename(m[1]); + const known: Record = { + python: { active: 'Running Python code', completed: 'Ran Python code', value: '' }, + python3: { active: 'Running Python code', completed: 'Ran Python code', value: '' }, + node: { active: 'Running JavaScript code', completed: 'Ran JavaScript code', value: '' }, + psql: { active: 'Querying database', completed: 'Queried database', value: '' }, + sqlite3: { active: 'Querying database', completed: 'Queried database', value: '' }, + ssh: { active: 'Running remote command', completed: 'Ran remote command', value: '' }, + }; + return known[bin] ?? { active: 'Running', completed: 'Ran', value: bin }; + }, + }, +]; + +export function summarizeBashCommand(rawCmd: string | undefined): BashSummary | null { + if (!rawCmd) return null; + let inner = stripShellWrapper(rawCmd); + // `cd /path && ` — the real action is what comes after; strip + // the cd prefix so the label reflects the actual intent. Accept quoted + // paths (cd "/Application Support/…") since those have embedded spaces. + inner = inner.replace(/^cd\s+(?:"[^"]*"|'[^']*'|\S+)\s*(?:&&|;)\s*/, ''); + + // browser-harness-js: pattern-match the JS payload to identify the CDP call. + // When the inline quoting is malformed (truncated previews, multi-statement + // scripts with unbalanced quotes, etc.) extractBrowserHarnessJs gives up — + // fall back to scanning the raw inner string, since the same method calls + // we look for are unambiguous wherever they appear. + if (/^(?:\S*\/)?browser-harness(?:-js)?\b/.test(inner)) { + const js = extractBrowserHarnessJs(inner) ?? inner; + const browserSummary = summarizeBrowserHarnessJs(js); + if (browserSummary) return browserSummary; + return null; + } + + for (const { re, build } of BASH_PATTERNS) { + const m = inner.match(re); + if (m) return build(m); + } + return null; +} + +export function parseBashResult(raw: string | undefined): BashResult { + if (!raw) return { output: '', isError: false }; + const parsed = tryParseJSON(raw); + + // Try the structured fields first. If JSON.parse fails (often because the + // upstream adapter sliced the JSON to a 2000-char preview and the tail is + // missing), fall back to regex extraction of the known keys. + const stdout = asString(parsed?.stdout)?.trim() ?? extractJsonField(raw, 'stdout')?.trim() ?? ''; + const stderr = asString(parsed?.stderr)?.trim() ?? extractJsonField(raw, 'stderr')?.trim() ?? ''; + const aggregated = asString(parsed?.aggregated_output)?.trim() + ?? extractJsonField(raw, 'aggregated_output')?.trim() ?? ''; + const exit = typeof parsed?.exit_code === 'number' ? parsed.exit_code : null; + const status = asString(parsed?.status); + const duration = typeof parsed?.duration_ms === 'number' ? parsed.duration_ms : undefined; + const isError = (exit !== null && exit !== 0) || status === 'failed' || (!!stderr && !stdout && !aggregated); + + let output = ''; + if (isError && stderr) output = stderr; + else if (stdout) output = stdout; + else if (aggregated) output = aggregated; + else if (stderr) output = stderr; + else if (parsed) output = JSON.stringify(parsed, null, 2); + else output = raw.trim(); // truly unparseable — show raw + + return { output, isError, durationMs: duration }; +} diff --git a/app/tests/unit/hub/_corpus.spec.ts b/app/tests/unit/hub/_corpus.spec.ts new file mode 100644 index 00000000..fb5b320a --- /dev/null +++ b/app/tests/unit/hub/_corpus.spec.ts @@ -0,0 +1,38 @@ +import { describe, it } from 'vitest'; +import { execSync } from 'node:child_process'; +import { summarizeBashCommand } from '../../../src/renderer/hub/chat/toolLabels'; + +const DB = `${process.env.HOME}/Library/Application Support/Browser Use/sessions.db`; + +describe('corpus coverage', () => { + it('runs the summarizer against every real bash call in sessions.db and prints stats', () => { + let rows: { command: string }[] = []; + try { + const json = execSync( + `sqlite3 -json "${DB}" "SELECT DISTINCT json_extract(payload,'$.args.command') AS command FROM session_events WHERE type='tool_call' AND json_extract(payload,'$.name')='Bash'"`, + { encoding: 'utf8', maxBuffer: 20 * 1024 * 1024 }, + ); + rows = JSON.parse(json); + } catch { + // No DB on this machine — skip silently. + console.log('corpus: sessions.db not available, skipping'); + return; + } + + const stats: Record = {}; + const unmapped: string[] = []; + for (const { command } of rows) { + if (!command) continue; + const s = summarizeBashCommand(command); + const key = s ? s.completed + (s.value ? ` · ${s.value.slice(0, 50)}` : '') : '__UNMAPPED__'; + stats[key] = (stats[key] || 0) + 1; + if (!s) unmapped.push(command.split('\n')[0].slice(0, 140)); + } + + const sorted = Object.entries(stats).sort((a, b) => b[1] - a[1]); + console.log(`\n=== corpus: ${rows.length} unique commands ===`); + for (const [k, n] of sorted) console.log(`${String(n).padStart(4)} ${k}`); + console.log('\n=== first 15 unmapped ==='); + for (const u of unmapped.slice(0, 15)) console.log(' ' + u); + }); +}); diff --git a/app/tests/unit/hub/summarizeBashCommand.spec.ts b/app/tests/unit/hub/summarizeBashCommand.spec.ts new file mode 100644 index 00000000..e3baae1a --- /dev/null +++ b/app/tests/unit/hub/summarizeBashCommand.spec.ts @@ -0,0 +1,226 @@ +import { describe, expect, it } from 'vitest'; +import { summarizeBashCommand } from '../../../src/renderer/hub/chat/toolLabels'; + +describe('summarizeBashCommand', () => { + it('returns null for unknown commands', () => { + expect(summarizeBashCommand('weirdtool --do-stuff')).toBeNull(); + }); + + it('unwraps /bin/zsh -lc "..." before matching', () => { + expect(summarizeBashCommand(`/bin/zsh -lc "sed -n '1,260p' AGENTS.md"`)).toEqual({ + active: 'Reading', completed: 'Read', value: 'AGENTS.md', + }); + }); + + it('summarizes cat as Read FILE', () => { + expect(summarizeBashCommand('cat package.json')).toEqual({ + active: 'Reading', completed: 'Read', value: 'package.json', + }); + }); + + it('summarizes ls as Looked at files (folded into label)', () => { + expect(summarizeBashCommand('ls -la')).toEqual({ + active: 'Looking at files', completed: 'Looked at files', value: '', + }); + }); + + it('summarizes find as Looked for files (folded into label)', () => { + expect(summarizeBashCommand('find ./src -name "*.ts"')).toEqual({ + active: 'Looking for files', completed: 'Looked for files', value: '', + }); + }); + + it('summarizes grep with quoted pattern', () => { + expect(summarizeBashCommand(`grep -r "useState" src/`)).toEqual({ + active: 'Searching for', completed: 'Searched for', value: 'useState', + }); + }); + + it('summarizes git status/diff/log/show as Reviewed recent changes', () => { + expect(summarizeBashCommand('git status --short')?.completed).toBe('Reviewed recent changes'); + expect(summarizeBashCommand('git diff')?.completed).toBe('Reviewed recent changes'); + expect(summarizeBashCommand('git log')?.completed).toBe('Reviewed recent changes'); + }); + + it('summarizes git commit as Saved progress', () => { + expect(summarizeBashCommand('git commit -m "hi"')).toEqual({ + active: 'Saving progress', completed: 'Saved progress', value: '', + }); + }); + + it('summarizes git push as Sent changes to the cloud', () => { + expect(summarizeBashCommand('git push origin main')?.completed).toBe('Sent changes to the cloud'); + }); + + it('summarizes git checkout BRANCH', () => { + expect(summarizeBashCommand('git checkout feature/chat-view')).toEqual({ + active: 'Switching to', completed: 'Switched to', value: 'feature/chat-view', + }); + }); + + it('summarizes curl as Visited URL', () => { + expect(summarizeBashCommand('curl https://example.com')).toEqual({ + active: 'Visiting', completed: 'Visited', value: 'https://example.com', + }); + }); + + it('summarizes npm install as Installed tools', () => { + expect(summarizeBashCommand('npm install lodash')?.completed).toBe('Installed tools'); + }); + + it('summarizes npm test as Ran tests', () => { + expect(summarizeBashCommand('yarn test')?.completed).toBe('Ran tests'); + }); + + it('summarizes npm run build as Built project', () => { + expect(summarizeBashCommand('npm run build')?.completed).toBe('Built project'); + }); + + it('strips dirname from basename targets', () => { + expect(summarizeBashCommand('cat /a/b/c.md')?.value).toBe('c.md'); + }); + + it('maps browser-harness connectToAssignedTarget to "Connected to browser"', () => { + const cmd = `/bin/zsh -lc "browser-harness-js 'await connectToAssignedTarget()'"`; + expect(summarizeBashCommand(cmd)).toEqual({ + active: 'Connecting to browser', completed: 'Connected to browser', value: '', + }); + }); + + it('maps browser-harness session.connect to "Connected to browser"', () => { + expect(summarizeBashCommand(`browser-harness-js 'await session.connect()'`)?.completed) + .toBe('Connected to browser'); + }); + + it('extracts the URL from browser-harness Page.navigate (URL is a specific identifier, stays in value)', () => { + const cmd = `browser-harness-js 'await session.Page.navigate({url:"https://linkedin.com/mynetwork"})'`; + expect(summarizeBashCommand(cmd)).toEqual({ + active: 'Visiting', completed: 'Visited', value: 'https://linkedin.com/mynetwork', + }); + }); + + it('maps browser-harness Page.captureScreenshot to "Took screenshot"', () => { + expect(summarizeBashCommand(`browser-harness-js 'await session.Page.captureScreenshot()'`)?.completed) + .toBe('Took screenshot'); + }); + + it('maps browser-harness listPageTargets to "Listed open tabs"', () => { + expect(summarizeBashCommand(`browser-harness-js 'await listPageTargets()'`)?.completed) + .toBe('Listed open tabs'); + }); + + it('maps browser-harness DOM.querySelector to "Inspected page"', () => { + const cmd = `browser-harness-js 'await session.DOM.querySelector({nodeId:1,selector:"h1"})'`; + expect(summarizeBashCommand(cmd)?.completed).toBe('Inspected page'); + }); + + it('maps browser-harness Input.dispatchMouseEvent to "Clicked on page"', () => { + expect(summarizeBashCommand(`browser-harness-js 'await session.Input.dispatchMouseEvent({type:"mousePressed",x:1,y:1})'`)?.completed) + .toBe('Clicked on page'); + }); + + it('returns null for arbitrary Runtime.evaluate (intent unknowable)', () => { + const cmd = `browser-harness-js 'await session.Runtime.evaluate({expression:"window.myCustom()"})'`; + expect(summarizeBashCommand(cmd)).toBeNull(); + }); + + it('maps Runtime.evaluate reading document.title to "Looked at page"', () => { + const cmd = `browser-harness-js 'await session.Runtime.evaluate({expression:"document.title", returnByValue:true})'`; + expect(summarizeBashCommand(cmd)?.completed).toBe('Looked at page'); + }); + + it('maps Runtime.evaluate reading location.href to "Looked at page"', () => { + const cmd = `browser-harness-js 'await session.Runtime.evaluate({expression:"({ url: location.href })", returnByValue:true})'`; + expect(summarizeBashCommand(cmd)?.completed).toBe('Looked at page'); + }); + + it('handles browser-harness inside a heredoc', () => { + const cmd = `/bin/zsh -lc "browser-harness-js <<'EOF'\nawait session.Page.navigate({url:'https://linkedin.com'});\nEOF"`; + expect(summarizeBashCommand(cmd)?.value).toBe('https://linkedin.com'); + }); + + it('multi-step script with connect+navigate labels the navigation (most informative wins)', () => { + const cmd = `/bin/zsh -lc "browser-harness-js <<'EOF'\nawait connectToAssignedTarget()\nawait session.Page.navigate({ url: 'https://x.com' })\nEOF"`; + expect(summarizeBashCommand(cmd)).toEqual({ + active: 'Visiting', completed: 'Visited', value: 'https://x.com', + }); + }); + + it('multi-step script with connect+screenshot labels the screenshot', () => { + const cmd = `/bin/zsh -lc "browser-harness-js <<'EOF'\nawait connectToAssignedTarget()\nawait session.Page.captureScreenshot({ format: 'png' })\nEOF"`; + expect(summarizeBashCommand(cmd)?.completed).toBe('Took screenshot'); + }); + + it('maps page.goto(URL) as "Visited URL" even though the API is Puppeteer-style', () => { + const cmd = `browser-harness-js 'await page.goto("https://x.com/home", {waitUntil: "domcontentloaded"})'`; + expect(summarizeBashCommand(cmd)).toEqual({ + active: 'Visiting', completed: 'Visited', value: 'https://x.com/home', + }); + }); + + it('falls back to scanning raw inner when inline quoting is broken (multi-statement scripts)', () => { + // No matching outer quotes — extractBrowserHarnessJs returns null, but the + // raw inner still contains identifiable CDP calls. + const cmd = `browser-harness-js 'await connectToAssignedTarget(); await page.goto("https://x.com/home", {waitUntil: "load"})'`; + expect(summarizeBashCommand(cmd)?.completed).toBe('Visited'); + }); + + it('connect-only script still labels as "Connected to browser"', () => { + const cmd = `/bin/zsh -lc "browser-harness-js 'await connectToAssignedTarget()'"`; + expect(summarizeBashCommand(cmd)?.completed).toBe('Connected to browser'); + }); + + it('maps python -c to Ran Python code', () => { + expect(summarizeBashCommand(`python3 -c "print('hi')"`)?.completed).toBe('Ran Python code'); + }); + + it('maps node -e to Ran JavaScript code', () => { + expect(summarizeBashCommand(`node -e "console.log(1)"`)?.completed).toBe('Ran JavaScript code'); + }); + + it('falls back to binary name for unknown heredoc', () => { + expect(summarizeBashCommand(`weird-tool < { + expect(summarizeBashCommand('head -n 50 src/index.ts')?.value).toBe('index.ts'); + }); + + it('summarizes mkdir as Created folder', () => { + expect(summarizeBashCommand('mkdir -p build/out')).toEqual({ + active: 'Creating folder', completed: 'Created folder', value: 'out', + }); + }); + + it('summarizes rm as Deleted', () => { + expect(summarizeBashCommand('rm -rf node_modules')?.completed).toBe('Deleted'); + }); + + it('summarizes echo > FILE as Saved to', () => { + expect(summarizeBashCommand('echo "hello" > out.txt')).toEqual({ + active: 'Saving to', completed: 'Saved to', value: 'out.txt', + }); + }); + + it('summarizes pwd as Checked current folder', () => { + expect(summarizeBashCommand('pwd')?.completed).toBe('Checked current folder'); + }); + + it('summarizes standalone cd as Changed folder', () => { + expect(summarizeBashCommand('cd /tmp/foo')).toEqual({ + active: 'Changing folder to', completed: 'Changed folder to', value: 'foo', + }); + }); + + it('strips leading "cd X &&" so the chained command gets labeled', () => { + expect(summarizeBashCommand('cd /tmp/foo && git status')?.completed) + .toBe('Reviewed recent changes'); + }); + + it('strips leading "cd X &&" before a browser-harness call', () => { + const cmd = `/bin/zsh -lc "cd /work && browser-harness-js 'await connectToAssignedTarget()'"`; + expect(summarizeBashCommand(cmd)?.completed).toBe('Connected to browser'); + }); +}); From ba244ad5818be86209acac296de9b61a5e1ea51b Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 01:55:10 -0700 Subject: [PATCH 02/31] chat: collapsible tool group with live and prose summaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ToolGroup, a wrapper that batches consecutive tool calls in an agent turn into a single inline summary. While any tool in the group is in-flight the header shows the current tool's active label ("Reading file…", "Visiting x.com…") with a spinner; once complete it settles into a deduped prose summary ("Connected to browser, read file, and ran 1 command"). Click to expand and see each tool inline as a textual outline tied to the summary by a left rule. Unrecognized bash entries are pooled into "ran N command(s)" rather than showing raw shell. Specific identifiers (URLs, filenames, branches) stay in the value slot when meaningful; generic nouns (browser, page, tests) fold into the bold label so the chip never reads as "Bold verb — muted noun". --- app/src/renderer/hub/chat/ToolGroup.tsx | 141 ++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 app/src/renderer/hub/chat/ToolGroup.tsx diff --git a/app/src/renderer/hub/chat/ToolGroup.tsx b/app/src/renderer/hub/chat/ToolGroup.tsx new file mode 100644 index 00000000..2759dc33 --- /dev/null +++ b/app/src/renderer/hub/chat/ToolGroup.tsx @@ -0,0 +1,141 @@ +import React, { useState } from 'react'; +import type { OutputEntry } from '../types'; +import { ToolBlock } from './ToolBlock'; +import { TerminalSpinner } from './TerminalSpinner'; +import { + getToolType, + getToolLabel, + getToolDisplayValue, + summarizeBashCommand, +} from './toolLabels'; + +interface ToolGroupProps { + entries: OutputEntry[]; +} + +interface Phrase { + label: string; + value: string; +} + +function describeEntry(entry: OutputEntry, status: 'running' | 'completed'): Phrase { + // entry.content for a bash tool_call is JSON.stringify(args) — e.g. + // `{"preview":"/bin/zsh …","command":"/bin/zsh …"}`. Run it through + // getToolDisplayValue first so summarizeBashCommand sees the raw shell + // command, not the JSON wrapper. + const display = getToolDisplayValue(entry.tool, entry.content || ''); + if (getToolType(entry.tool) === 'bash') { + const s = summarizeBashCommand(display || entry.content || ''); + if (s) return { label: status === 'running' ? s.active : s.completed, value: s.value }; + } + return { + label: getToolLabel(entry.tool, status), + value: display, + }; +} + +function joinPhrases(phrases: string[]): string { + if (phrases.length === 0) return ''; + if (phrases.length === 1) return phrases[0]; + if (phrases.length === 2) return `${phrases[0]} and ${phrases[1]}`; + return `${phrases.slice(0, -1).join(', ')}, and ${phrases[phrases.length - 1]}`; +} + +/** + * Build a slug summary from completed entries: + * - One phrase per unique label (specific identifiers are dropped — too long). + * - Unrecognized bash commands are pooled into "ran N command(s)". + * - First phrase is capitalized; the rest are lowercased to read as prose. + * + * Example: [connect, read file, unrecognized bash] → "Connected to browser, + * read file, and ran 1 command". Click-to-expand reveals the per-tool details. + */ +function summarizeCompleted(entries: OutputEntry[]): string { + const seen = new Set(); + const phrases: string[] = []; + let unmappedBash = 0; + + for (const e of entries) { + const p = describeEntry(e, 'completed'); + // "Ran command" is the generic getToolLabel fallback for bash — the matcher + // didn't recognize it. Pool these rather than expose raw commands inline. + if (p.label === 'Ran command') { + unmappedBash++; + continue; + } + const key = p.label.toLowerCase(); + if (seen.has(key)) continue; + seen.add(key); + phrases.push(p.label); + } + + if (unmappedBash > 0) { + phrases.push(`ran ${unmappedBash} command${unmappedBash === 1 ? '' : 's'}`); + } + + const formatted = phrases.map((p, i) => (i === 0 ? p : p.charAt(0).toLowerCase() + p.slice(1))); + return joinPhrases(formatted); +} + +function ChevronDown({ rotated }: { rotated: boolean }): React.ReactElement { + return ( + + + + ); +} + +export function ToolGroup({ entries }: ToolGroupProps): React.ReactElement { + // A single tool isn't a group — render it as a standalone ToolBlock so the + // header doesn't read "Read AGENTS.md" twice (once in the group chip, once + // inside the expanded body). + if (entries.length === 1) return ; + + const runningIdx = entries.findIndex((e) => !e.result); + const isInFlight = runningIdx !== -1; + // Default open while in-flight; once expanded by the user (or auto-open on + // first mount), stay open. Manual click toggles in either direction. + const [expanded, setExpanded] = useState(true); + + const headerText = (() => { + if (isInFlight) { + const running = entries[runningIdx]; + const p = describeEntry(running, 'running'); + return p.value ? `${p.label} ${p.value}…` : `${p.label}…`; + } + return summarizeCompleted(entries); + })(); + + return ( +
+ + {expanded && ( +
+ {entries.map((e) => ( + + ))} +
+ )} +
+ ); +} From d624874616a13e6a0536ff9f9f447ca8b7893d5e Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 01:55:17 -0700 Subject: [PATCH 03/31] hub: add Back-to-chat button to AgentPane header When a session is opened in grid mode (typically because the user clicked Switch to browser from the chat view), there was no in-app affordance to get back to the chat. Adds an optional onOpenChat handler on AgentPaneProps and renders a "Back to chat" pill in the action row when it's provided. Button is conditional on the prop, so AgentPane consumers that don't pass it (none today besides HubApp's grid mode) are unaffected. --- app/src/renderer/hub/AgentPane.tsx | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/app/src/renderer/hub/AgentPane.tsx b/app/src/renderer/hub/AgentPane.tsx index caf3047d..c31d832e 100644 --- a/app/src/renderer/hub/AgentPane.tsx +++ b/app/src/renderer/hub/AgentPane.tsx @@ -699,11 +699,12 @@ interface AgentPaneProps { onSelect?: (sessionId: string) => void; onOpenFollowUp?: () => void; onOpenSettings?: () => void; + onOpenChat?: (sessionId: string) => void; followUpShortcut?: string; cycleShortcut?: string; } -export function AgentPane({ session, focused, onRerun, onResume, onPause, onFollowUp, onDismiss, onCancel, onSelect, onOpenFollowUp, onOpenSettings, followUpShortcut, cycleShortcut }: AgentPaneProps): React.ReactElement { +export function AgentPane({ session, focused, onRerun, onResume, onPause, onFollowUp, onDismiss, onCancel, onSelect, onOpenFollowUp, onOpenSettings, onOpenChat, followUpShortcut, cycleShortcut }: AgentPaneProps): React.ReactElement { const openaiLogo = useThemedAsset(openaiLogoDark, openaiLogoLight); const opencodeLogo = useThemedAsset(opencodeLogoDark, opencodeLogoLight); const paneRef = useRef(null); @@ -1033,6 +1034,19 @@ export function AgentPane({ session, focused, onRerun, onResume, onPause, onFoll Browser ended )} + {onOpenChat && ( + + )} + + ); + } + + // While running, still allow follow-ups — backend queues them (resume() + // returns `queued: true` if mid-step). Show a small hint above the input. + return ( + <> + {isBusy && ( +

+ Agent is {header.status === 'stuck' ? 'stuck' : 'running'} — your message will be queued. + {' '} + +

+ )} + + + ); + }, [header, onSubmit, onCancel, onExit]); + + if (!header) { + return ( +
+
Session not found.
+
+ ); + } + + const statusClass = `chat-pane__status chat-pane__status--${header.status}`; + + return ( +
+
+ +
+ +
+ +
+
{composer}
+
+
+ ); +} diff --git a/app/src/renderer/hub/chat/ChatTranscript.tsx b/app/src/renderer/hub/chat/ChatTranscript.tsx new file mode 100644 index 00000000..dbbd1056 --- /dev/null +++ b/app/src/renderer/hub/chat/ChatTranscript.tsx @@ -0,0 +1,128 @@ +import React, { useEffect, useLayoutEffect, useMemo, useRef } from 'react'; +import { useShallow } from 'zustand/react/shallow'; +import { useSessionsStore } from '../state/sessionsStore'; +import { adaptSession } from '../types'; +import type { AgentSession } from '../types'; +import { groupIntoTurns } from './groupIntoTurns'; +import { ChatTurn } from './ChatTurn'; +import { TerminalSpinner, Elapsed } from './TerminalSpinner'; + +function ThinkingIndicator({ since }: { since: number }): React.ReactElement { + return ( +
+ + Working + +
+ ); +} + +interface ChatTranscriptProps { + sessionId: string; +} + +const PIN_THRESHOLD_PX = 32; + +export function ChatTranscript({ sessionId }: ChatTranscriptProps): React.ReactElement | null { + // Subscribe only to this session's output + createdAt. Other sessions' + // updates do not re-render this component. + const sessionSlice = useSessionsStore( + useShallow((s): { output: AgentSession['output']; createdAt: number; status: AgentSession['status']; prompt: string } | null => { + const sess = s.byId[sessionId]; + if (!sess) return null; + return { output: sess.output, createdAt: sess.createdAt, status: sess.status, prompt: sess.prompt }; + }), + ); + + const containerRef = useRef(null); + const pinnedRef = useRef(true); + const lastTurnsLenRef = useRef(0); + + const turns = useMemo(() => { + if (!sessionSlice) return []; + const fake: AgentSession = { + id: sessionId, + prompt: sessionSlice.prompt, + status: 'idle', + createdAt: sessionSlice.createdAt, + output: sessionSlice.output, + }; + const { entries } = adaptSession(fake); + // SessionManager is supposed to emit `session.prompt` as a leading + // user_input event, but in older sessions and some adapter paths that + // entry is missing — leaving the chat with no opening user bubble. + // Synthesize one from session.prompt when needed so the kickoff message + // is always visible at the top. + if (sessionSlice.prompt && (entries.length === 0 || entries[0].type !== 'user_input')) { + entries.unshift({ + id: `prompt-${sessionId}`, + type: 'user_input', + timestamp: sessionSlice.createdAt, + content: sessionSlice.prompt, + }); + } + return groupIntoTurns(entries); + }, [sessionId, sessionSlice]); + + // Scroll-pin: stay glued to bottom when user is at the bottom; release + // when user scrolls up. New user_input forces re-pin. + const onScroll = (): void => { + const el = containerRef.current; + if (!el) return; + const distance = el.scrollHeight - el.clientHeight - el.scrollTop; + pinnedRef.current = distance <= PIN_THRESHOLD_PX; + }; + + useLayoutEffect(() => { + const el = containerRef.current; + if (!el) return; + // Force-pin when a new user_input lands (new turn). + const newUserTurn = turns.length > lastTurnsLenRef.current + && turns[turns.length - 1]?.userEntry !== null; + if (newUserTurn) pinnedRef.current = true; + lastTurnsLenRef.current = turns.length; + + if (pinnedRef.current) { + el.scrollTop = el.scrollHeight; + } + }, [turns]); + + useEffect(() => { + // On session switch, snap to bottom. + const el = containerRef.current; + if (!el) return; + pinnedRef.current = true; + el.scrollTop = el.scrollHeight; + }, [sessionId]); + + if (!sessionSlice) return null; + + const isRunning = sessionSlice.status === 'running' || sessionSlice.status === 'stuck'; + // Show the thinking indicator while running unless the latest agent entry + // is an unpaired tool_call (that already has its own spinner) — avoids + // double-indicating activity. + const lastTurn = turns[turns.length - 1]; + const lastAgent = lastTurn?.agentEntries[lastTurn.agentEntries.length - 1]; + const lastIsInflightTool = lastAgent?.type === 'tool_call' && !lastAgent.result; + const showThinking = isRunning && !lastIsInflightTool; + // Elapsed counter resets at each turn — start counting from the last + // user_input timestamp, or session creation if there is none yet. + const since = lastTurn?.userEntry?.timestamp ?? sessionSlice.createdAt; + + if (turns.length === 0) { + return ( +
+ {showThinking ? :
No messages yet.
} +
+ ); + } + + return ( +
+ {turns.map((t) => ( + + ))} + {showThinking && } +
+ ); +} diff --git a/app/src/renderer/hub/chat/ChatTurn.tsx b/app/src/renderer/hub/chat/ChatTurn.tsx new file mode 100644 index 00000000..42063089 --- /dev/null +++ b/app/src/renderer/hub/chat/ChatTurn.tsx @@ -0,0 +1,179 @@ +import React, { useState } from 'react'; +import { Markdown } from '../Markdown'; +import type { OutputEntry } from '../types'; +import type { Turn } from './groupIntoTurns'; +import { ToolBlock } from './ToolBlock'; +import { ToolGroup } from './ToolGroup'; +import { useToast } from '@/renderer/components/base/Toast'; + +const USER_BUBBLE_CLAMP_LINES = 10; +const USER_BUBBLE_CLAMP_CHARS = 600; + +function CopyIcon(): React.ReactElement { + return ( + + ); +} + +function UserBubble({ content }: { content: string }): React.ReactElement { + const lines = content.split('\n').length; + const isLong = lines > USER_BUBBLE_CLAMP_LINES || content.length > USER_BUBBLE_CLAMP_CHARS; + const [expanded, setExpanded] = useState(false); + const clamped = isLong && !expanded; + const toast = useToast(); + + const handleCopy = async () => { + try { + await navigator.clipboard.writeText(content); + toast.show({ variant: 'success', title: 'Copied to clipboard' }); + } catch { + toast.show({ variant: 'error', title: 'Copy failed' }); + } + }; + + return ( +
+
+
{content}
+ {isLong && ( + + )} +
+
+ +
+
+ ); +} + +interface ChatTurnProps { + turn: Turn; +} + +function AgentEntry({ entry }: { entry: OutputEntry }): React.ReactElement | null { + switch (entry.type) { + case 'thinking': + return
{entry.content}
; + + case 'tool_call': + return ; + + case 'tool_result': { + // Orphaned tool_result (no preceding tool_call paired by adaptSession). + // Codex emits these for top-level error items ({type:"error", message}). + // Surface those as proper error cards; suppress all other orphans as noise. + const text = entry.content; + const errMatch = text.match(/"type"\s*:\s*"error"[\s\S]*?"message"\s*:\s*"((?:\\.|[^"\\])*)"/); + if (errMatch) { + const msg = errMatch[1] + .replace(/\\n/g, '\n') + .replace(/\\"/g, '"') + .replace(/\\\\/g, '\\'); + return
{msg}
; + } + return null; + } + + case 'done': + return ( +
+ +
+ ); + + case 'error': + return
{entry.content}
; + + case 'skill_used': + return skill · {entry.content}; + + case 'skill_written': + return wrote skill · {entry.content}; + + case 'harness_edited': + return edited {entry.content}; + + case 'file_output': + return file · {entry.content}; + + case 'notify': + if (entry.level === 'blocking') { + return
{entry.content}
; + } + return {entry.content}; + + default: + return null; + } +} + +/** + * Walk through agent entries, batching consecutive `tool_call` runs into + * `ToolGroup` blocks so a long agent turn renders as a few collapsed chips + * instead of dozens of stacked tool pills. Non-tool entries (thinking, done, + * skill_used, …) break the run and render in place. + */ +/** Normalize whitespace for comparing thinking/done content. */ +function normalizeProse(s: string): string { + return (s || '').trim().replace(/\s+/g, ' '); +} + +function renderAgentEntries(entries: OutputEntry[]): React.ReactElement[] { + const out: React.ReactElement[] = []; + let batch: OutputEntry[] = []; + const flush = () => { + if (batch.length === 0) return; + out.push(); + batch = []; + }; + for (let i = 0; i < entries.length; i++) { + const e = entries[i]; + if (e.type === 'tool_call') { + batch.push(e); + continue; + } + flush(); + // Agents (Claude Code in particular) often emit the same prose as a final + // `thinking` event AND a `done.summary` — which renders twice. When the + // immediately-following entry is `done` with identical content, skip the + // thinking so the markdown-rendered `done` wins. + if (e.type === 'thinking') { + const next = entries[i + 1]; + if (next && next.type === 'done' && normalizeProse(next.content) === normalizeProse(e.content)) { + continue; + } + } + const rendered = ; + if (rendered) out.push(rendered); + } + flush(); + return out; +} + +export function ChatTurn({ turn }: ChatTurnProps): React.ReactElement { + return ( +
+ {turn.userEntry && } + {turn.agentEntries.length > 0 && ( +
+ {renderAgentEntries(turn.agentEntries)} +
+ )} +
+ ); +} diff --git a/app/src/renderer/hub/chat/chat.css b/app/src/renderer/hub/chat/chat.css new file mode 100644 index 00000000..b9d51d7d --- /dev/null +++ b/app/src/renderer/hub/chat/chat.css @@ -0,0 +1,673 @@ +/* Chat view styles. Coexists with hub.css; uses theme tokens. */ + +.chat-pane { + display: flex; + flex-direction: column; + flex: 1; + min-height: 0; + background-color: var(--color-bg-elevated); + color: var(--color-fg-primary); +} + +.chat-pane__header { + display: flex; + align-items: center; + gap: 12px; + padding: 12px 24px; + border-bottom: 1px solid var(--color-border-subtle); + flex-shrink: 0; + background-color: var(--color-bg-elevated); +} + +.chat-pane__back, +.chat-pane__hdr-btn { + background: transparent; + border: 1px solid var(--color-border-subtle); + color: var(--color-fg-secondary); + border-radius: 6px; + padding: 5px 11px; + font-size: 13px; + cursor: pointer; + font-family: inherit; + line-height: 1.2; +} + +.chat-pane__back:hover, +.chat-pane__hdr-btn:hover { + background-color: var(--color-bg-overlay); + color: var(--color-fg-primary); +} + +.chat-pane__title { + flex: 1; + font-size: 14px; + font-weight: 500; + color: var(--color-fg-primary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.chat-pane__meta { + display: flex; + gap: 12px; + align-items: center; + font-size: 12px; + color: var(--color-fg-tertiary); + font-variant-numeric: tabular-nums; +} + +.chat-pane__status { + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.5px; + padding: 3px 9px; + border-radius: 6px; + border: 1px solid var(--color-border-subtle); + font-weight: 500; +} + +.chat-pane__status { border-color: currentColor; } +.chat-pane__status--running { color: var(--color-status-success); } +.chat-pane__status--stuck { color: var(--color-status-error); } +.chat-pane__status--idle { color: var(--color-status-warning); } +.chat-pane__status--paused { color: var(--color-accent-default); } +.chat-pane__status--stopped, +.chat-pane__status--draft { color: var(--color-fg-tertiary); } + +/* Full-width column so the scrollbar sits at the viewport's right edge. + Inner content is centered via symmetric horizontal padding that collapses + to 16px on narrow viewports. */ +.chat-pane__column { + flex: 1; + display: flex; + flex-direction: column; + width: 100%; + min-height: 0; + min-width: 0; +} + +.chat-transcript { + flex: 1; + overflow-y: auto; + /* Center column ~780px wide, with at least 16px gutter on small screens */ + padding: 28px max(16px, calc((100% - 780px) / 2)) 16px; + display: flex; + flex-direction: column; + gap: 28px; + min-height: 0; +} + +.chat-turn { + display: flex; + flex-direction: column; + gap: 12px; +} + +.chat-bubble__wrap { + display: flex; + flex-direction: column; + align-self: flex-end; + max-width: 78%; + gap: 6px; +} + +.chat-bubble { + /* Theme tokens don't go dark enough for a chat bubble, so layer a black + translucent tint over the page bg. Works in both modes — light gets a + mid-grey fill, dark gets a more recessed neutral. */ + background-color: color-mix(in srgb, var(--color-fg-primary) 10%, var(--color-bg-elevated)); + border: none; + padding: 14px 18px; + border-radius: 18px; + font-size: 14px; + line-height: 1.55; + color: var(--color-fg-primary); + display: flex; + flex-direction: column; + gap: 6px; +} + +.chat-bubble__text { + white-space: pre-wrap; + word-break: break-word; +} + +.chat-bubble--clamped .chat-bubble__text { + display: -webkit-box; + -webkit-line-clamp: 10; + -webkit-box-orient: vertical; + overflow: hidden; + mask-image: linear-gradient(to bottom, black 70%, transparent 100%); + -webkit-mask-image: linear-gradient(to bottom, black 70%, transparent 100%); +} + +.chat-bubble__show-more { + background: transparent; + border: none; + color: var(--color-fg-primary); + font: inherit; + font-size: 14px; + cursor: pointer; + padding: 0; + align-self: flex-start; + display: inline-flex; + align-items: center; + gap: 4px; +} +.chat-bubble__show-more:hover { color: var(--color-fg-secondary); } + +.chat-bubble__actions { + display: flex; + gap: 4px; + align-self: flex-end; + color: var(--color-fg-tertiary); +} +.chat-bubble__actions button { + background: transparent; + border: none; + cursor: pointer; + color: inherit; + padding: 4px; + border-radius: 4px; + display: inline-flex; + align-items: center; + justify-content: center; +} +.chat-bubble__actions button:hover { + background-color: var(--color-bg-overlay); + color: var(--color-fg-primary); +} + +.chat-agent { + display: flex; + flex-direction: column; + gap: 10px; + align-self: stretch; + max-width: 100%; +} + +/* --------------------------------------------------------------------------- + Tool block — small inline pill that expands into prism-highlighted blocks + Mirrors browser_use_cloud/frontend/.../bash-code-block.tsx +--------------------------------------------------------------------------- */ +.chat-tool { + display: flex; + flex-direction: column; + gap: 8px; + align-self: stretch; +} + +.chat-tool__pill { + display: inline-flex; + align-items: center; + gap: 8px; + align-self: flex-start; + max-width: 100%; + background-color: var(--color-bg-base); + border: 1px solid var(--color-border-subtle); + border-radius: 6px; + padding: 5px 10px 5px 6px; + font: inherit; + font-size: 13px; + cursor: pointer; + color: var(--color-fg-primary); + transition: background-color 80ms ease, border-color 80ms ease; + text-align: left; +} + +.chat-tool__pill:hover { + background-color: var(--color-bg-overlay); + border-color: var(--color-border-default); +} + +/* Group summary is plain text — no card chrome, no icon circle. Visually + subordinate to the agent's final output (smaller, dimmer) since the summary + is just a recap of the tools that ran. */ +.chat-tool--group > .chat-tool__pill { + background-color: transparent; + border: none; + padding: 0; + color: var(--color-fg-tertiary); + font-size: 12px; + font-weight: 400; +} + +.chat-tool--group > .chat-tool__pill:hover { + background-color: transparent; + border: none; + color: var(--color-fg-secondary); +} + +.chat-tool--group > .chat-tool__pill .chat-tool__icon--bare { + border: none; + width: 12px; + height: 12px; + background: transparent; +} + +.chat-tool--group > .chat-tool__pill .chat-tool__label { + font-weight: 400; + color: inherit; +} + +/* Expanded body of a group — strip its own card chrome and use a left-border + line to suggest "these are children of the summary above". Nested ToolBlocks + also drop their pill chrome so it reads as a textual outline, not stacked + cards. The individual tool's own expansion (Command/Output codeblocks) keeps + its card chrome because it sits one level deeper. */ +.chat-tool--group > .chat-tool__expanded { + background-color: transparent; + border: none; + border-radius: 0; + padding: 4px 0 4px 14px; + margin-left: 4px; + border-left: 1px solid var(--color-border-subtle); + gap: 4px; +} + +.chat-tool--group .chat-tool__group-list > .chat-tool > .chat-tool__pill { + background-color: transparent; + border: none; + padding: 2px 0; + color: var(--color-fg-tertiary); + font-size: 12px; + font-weight: 400; +} + +.chat-tool--group .chat-tool__group-list > .chat-tool > .chat-tool__pill:hover { + background-color: transparent; + color: var(--color-fg-secondary); +} + +.chat-tool--group .chat-tool__group-list > .chat-tool > .chat-tool__pill .chat-tool__icon { + display: none; +} + +.chat-tool--group .chat-tool__group-list > .chat-tool > .chat-tool__pill .chat-tool__label { + font-weight: 400; + color: inherit; +} + +.chat-tool__icon { + width: 18px; + height: 18px; + border-radius: 50%; + border: 1px solid var(--color-border-default); + display: inline-flex; + align-items: center; + justify-content: center; + color: var(--color-fg-tertiary); + flex-shrink: 0; +} + +.chat-tool__label { + font-weight: 500; + color: var(--color-fg-primary); + flex-shrink: 0; +} + +.chat-tool__value { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 12px; + color: var(--color-fg-tertiary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 380px; +} + +.chat-tool__duration { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 11px; + color: var(--color-fg-tertiary); + font-variant-numeric: tabular-nums; + flex-shrink: 0; +} + +.chat-tool__chev { + color: var(--color-fg-tertiary); + flex-shrink: 0; +} + +.chat-tool__expanded { + display: flex; + flex-direction: column; + gap: 10px; + padding: 10px; + border: 1px solid var(--color-border-subtle); + border-radius: 10px; + background-color: var(--color-bg-base); +} + +/* --------------------------------------------------------------------------- + CodeBlock — bordered, label header, copy button, prism body +--------------------------------------------------------------------------- */ +.chat-code { + border: 1px solid var(--color-border-subtle); + border-radius: 10px; + overflow: hidden; + background-color: var(--color-bg-elevated); +} + +.chat-code__head { + display: flex; + align-items: center; + justify-content: space-between; + padding: 6px 10px; + border-bottom: 1px solid var(--color-border-subtle); + background-color: var(--color-bg-base); +} + +.chat-code__label { + font-size: 12px; + font-weight: 500; + color: var(--color-fg-secondary); +} + +.chat-code__copy { + display: inline-flex; + align-items: center; + gap: 5px; + padding: 2px 7px; + border: 0; + background: transparent; + color: var(--color-fg-tertiary); + font: inherit; + font-size: 11px; + cursor: pointer; + border-radius: 6px; +} + +.chat-code__copy:hover { + background-color: var(--color-bg-overlay); + color: var(--color-fg-primary); +} + +.chat-code__body { + max-height: 280px; + overflow: auto; + font-size: 13px; +} + +.chat-code__body--error { + color: #c84848; +} + +.chat-code__pre { + margin: 0; + padding: 10px 12px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 13px; + line-height: 1.55; + white-space: pre-wrap; + word-break: break-word; + background: transparent; +} + +/* Markdown rendering inside a CodeBlock body (asMarkdown). Override the + global .md font-size (which is var(--font-size-xs)) so chat prose reads + at chat scale instead of sidebar/tooltip scale. */ +.chat-code__md { + padding: 10px 14px; +} +.chat-code__md .md, +.chat-step__assistant .md, +.chat-step__thinking .md { + font-size: 14px; + line-height: 1.65; + color: var(--color-fg-primary); +} +.chat-code__md .md h1 { font-size: 18px; } +.chat-code__md .md h2 { font-size: 16px; } +.chat-code__md .md h3, +.chat-code__md .md h4 { font-size: 14px; } +.chat-code__md .md code, +.chat-step__assistant .md code { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 12.5px; + background-color: var(--color-bg-base); + padding: 1px 5px; + border-radius: 4px; +} +.chat-code__md .md pre { + background-color: var(--color-bg-base); + border: 1px solid var(--color-border-subtle); + border-radius: 6px; + padding: 10px 12px; + overflow-x: auto; + font-size: 12.5px; + line-height: 1.5; +} + +.chat-step__thinking { + font-style: italic; + color: var(--color-fg-tertiary); + font-size: 14px; + line-height: 1.65; + padding: 4px 0; +} + +.chat-step__error { + background-color: rgba(220, 70, 70, 0.06); + border: 1px solid rgba(220, 70, 70, 0.30); + border-radius: 10px; + padding: 10px 14px; + color: #c84848; + font-size: 13px; + line-height: 1.5; +} + +.chat-step__chip { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 3px 10px; + font-size: 12px; + color: var(--color-fg-tertiary); + background-color: var(--color-bg-base); + border: 1px solid var(--color-border-subtle); + border-radius: 999px; + align-self: flex-start; +} + +.chat-step__assistant { + padding: 4px 0; + font-size: 14px; + line-height: 1.65; + color: var(--color-fg-primary); +} + +/* Bottom thinking indicator */ +.chat-thinking { + display: flex; + align-items: center; + gap: 8px; + padding: 6px 2px; + color: var(--color-fg-tertiary); + font-size: 13px; + align-self: flex-start; +} + +.chat-thinking__label { + color: var(--color-fg-secondary); +} + +.chat-elapsed { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 12px; + color: var(--color-fg-tertiary); +} + +.chat-spinner { + display: inline-block; + color: var(--color-accent-default); +} + +.chat-composer { + flex-shrink: 0; + padding: 12px max(16px, calc((100% - 780px) / 2)) 18px; + background-color: var(--color-bg-elevated); +} + +.chat-composer__hint { + font-size: 12px; + color: var(--color-fg-tertiary); + margin: 0 0 8px 4px; + display: flex; + align-items: center; + gap: 8px; +} + +.chat-composer__terminal { + font-size: 13px; + color: var(--color-fg-tertiary); + padding: 11px 15px; + border: 1px dashed var(--color-border-subtle); + border-radius: 10px; + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; +} + +.chat-composer__cancel { + background: transparent; + border: 1px solid var(--color-border-subtle); + color: var(--color-fg-secondary); + border-radius: 6px; + padding: 4px 10px; + font-size: 12px; + cursor: pointer; + font-family: inherit; +} + +.chat-composer__cancel:hover { + color: var(--color-fg-primary); + background-color: var(--color-bg-overlay); +} + +/* Preview rail — left-aligned, lives in the centered 780px column directly + above the composer. Same horizontal padding as the transcript/composer so + it visually anchors to the same reading column. */ +.chat-preview-rail { + flex-shrink: 0; + padding: 0 max(16px, calc((100% - 780px) / 2)) 10px; + display: flex; + justify-content: flex-start; +} + +/* --------------------------------------------------------------------------- + BrowserPreview — live screencast thumbnail above the chat composer +--------------------------------------------------------------------------- */ +.browser-preview { + position: relative; + width: 200px; + height: 125px; + border: 1px solid var(--color-border-subtle); + border-radius: 10px; + overflow: hidden; + background-color: var(--color-bg-base); + padding: 0; + cursor: pointer; + display: block; + /* Two-layer shadow: long soft ambient + short crisper for depth. */ + box-shadow: + 0 1px 2px rgba(0, 0, 0, 0.05), + 0 8px 24px -10px rgba(0, 0, 0, 0.14); + transition: transform 220ms cubic-bezier(.2,.7,.2,1), + opacity 220ms ease, + box-shadow 160ms ease, + border-color 160ms ease; + transform-origin: bottom left; +} + +.browser-preview:hover { + border-color: var(--color-border-default); + box-shadow: + 0 2px 4px rgba(0, 0, 0, 0.06), + 0 14px 32px -10px rgba(0, 0, 0, 0.22); +} + +.browser-preview__img { + display: block; + width: 100%; + height: 100%; + object-fit: cover; + user-select: none; + -webkit-user-drag: none; +} + +.browser-preview__placeholder { + position: absolute; + inset: 0; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: 6px; + color: var(--color-fg-tertiary); + font-size: 11px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.browser-preview__placeholder-icon { + font-size: 22px; + line-height: 1; + opacity: 0.5; +} + +.browser-preview__placeholder-label { + letter-spacing: 0.3px; +} + +.browser-preview--disabled { + cursor: default; + opacity: 0.75; +} +.browser-preview--disabled:hover { + box-shadow: none; +} + +.browser-preview__overlay { + position: absolute; + inset: 0; + display: flex; + align-items: flex-end; + justify-content: flex-end; + padding: 6px; + background: linear-gradient(to bottom, rgba(0,0,0,0) 60%, rgba(0,0,0,0.22) 100%); + opacity: 0; + transition: opacity 120ms ease; +} + +.browser-preview:hover .browser-preview__overlay { opacity: 1; } + +.browser-preview__expand-hint { + font-size: 10px; + color: white; + background-color: rgba(0,0,0,0.55); + padding: 2px 7px; + border-radius: 4px; + letter-spacing: 0.3px; + font-weight: 500; +} + +/* Click → quick zoom-out into the destination view. Real animation handled + by HubApp swapping viewMode 220ms later; this card visually "lifts off". */ +.browser-preview--expanding { + transform: scale(2.2) translate(-30%, 50%); + opacity: 0.4; + pointer-events: none; +} + +.chat-empty { + display: flex; + flex-direction: column; + gap: 12px; + align-items: center; + justify-content: center; + height: 100%; + color: var(--color-fg-tertiary); + font-size: 14px; +} diff --git a/app/src/renderer/hub/chat/groupIntoTurns.ts b/app/src/renderer/hub/chat/groupIntoTurns.ts new file mode 100644 index 00000000..dbc74265 --- /dev/null +++ b/app/src/renderer/hub/chat/groupIntoTurns.ts @@ -0,0 +1,36 @@ +import type { OutputEntry } from '../types'; + +export type Turn = { + id: string; + userEntry: OutputEntry | null; + agentEntries: OutputEntry[]; +}; + +/** + * Group a flat OutputEntry[] into conversational turns. + * + * A turn starts at a `user_input` entry and continues until the next + * `user_input` (or end). Entries before the first user_input go into a + * leading "system" turn with userEntry = null (rare — session.prompt is + * always emitted as user_input by SessionManager, so this is mostly defensive). + */ +export function groupIntoTurns(entries: readonly OutputEntry[]): Turn[] { + const turns: Turn[] = []; + let current: Turn | null = null; + + for (const entry of entries) { + if (entry.type === 'user_input') { + if (current) turns.push(current); + current = { id: entry.id, userEntry: entry, agentEntries: [] }; + continue; + } + if (!current) { + current = { id: `pre-${entry.id}`, userEntry: null, agentEntries: [entry] }; + continue; + } + current.agentEntries.push(entry); + } + + if (current) turns.push(current); + return turns; +} From 8e53b03ad6d7c3d0856a4f42d77fc042b1b42f83 Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 01:58:27 -0700 Subject: [PATCH 08/31] =?UTF-8?q?feat(chat):=20tool=20rendering=20?= =?UTF-8?q?=E2=80=94=20code=20blocks,=20labels,=20spinner,=20browser=20pre?= =?UTF-8?q?view?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ToolBlock renders a single tool_call as a small inline pill with the friendly verb-tense label (Running command / Ran command) and a primary-parameter summary in mono. Expanded, it shows a Command block and an Output block. The Output block detects markdown (rendered via the existing Markdown component), JSON (pretty-printed + prism), or yaml-style (used when JSON has any multi-line string value, so embedded \n in fields like bodyText render as real line breaks rather than literal escapes). Generic tools render args+result as pretty JSON. CodeBlock is the building block — bordered header bar (label + copy), body with prism-react-renderer (oneDark/github theme, autodetected from --color-bg-base brightness). New asMarkdown mode swaps the body for the Markdown component with chat-scoped font-size overrides. toolLabels mirrors browser_use_cloud's tool-labels.ts — TOOL_TYPES / TOOL_LABELS / getToolDisplayValue + parseBashResult that unwraps codex {stdout, stderr, aggregated_output, exit_code} envelopes (regex-based fallback for the case where codex slices preview to 2000 chars and truncates the JSON tail). TerminalSpinner is a JS-driven braille frame cycler ⠋⠙⠹⠸…, paired with Elapsed which ticks an / counter every second. BrowserPreview subscribes to hasBrowser from the sessions store, starts the screencast when true via sessions.previewStart, listens for session-preview-frame events, and renders the latest base64 JPEG as a 200×125 thumbnail above the composer. Click triggers a 220ms zoom-out animation then onExpand — the parent switches to grid mode. --- app/src/renderer/hub/chat/BrowserPreview.tsx | 121 +++++++++ app/src/renderer/hub/chat/CodeBlock.tsx | 104 ++++++++ app/src/renderer/hub/chat/TerminalSpinner.tsx | 42 ++++ app/src/renderer/hub/chat/ToolBlock.tsx | 231 ++++++++++++++++++ 4 files changed, 498 insertions(+) create mode 100644 app/src/renderer/hub/chat/BrowserPreview.tsx create mode 100644 app/src/renderer/hub/chat/CodeBlock.tsx create mode 100644 app/src/renderer/hub/chat/TerminalSpinner.tsx create mode 100644 app/src/renderer/hub/chat/ToolBlock.tsx diff --git a/app/src/renderer/hub/chat/BrowserPreview.tsx b/app/src/renderer/hub/chat/BrowserPreview.tsx new file mode 100644 index 00000000..9036c93c --- /dev/null +++ b/app/src/renderer/hub/chat/BrowserPreview.tsx @@ -0,0 +1,121 @@ +import React, { useCallback, useEffect, useRef, useState } from 'react'; +import { useShallow } from 'zustand/react/shallow'; +import { useSessionsStore } from '../state/sessionsStore'; + +interface BrowserPreviewProps { + sessionId: string; + onExpand: () => void; +} + +const PREVIEW_W = 200; +const PREVIEW_H = 125; + +/** + * Live browser thumbnail driven by CDP Page.startScreencast on the main side. + * Always rendered above the composer so the user has a stable target. The + * browser-attached signal comes from session.hasBrowser, which the main + * process derives from BrowserPool.getWebContents(id) — single source of + * truth, no shadow state. + */ +export function BrowserPreview({ sessionId, onExpand }: BrowserPreviewProps): React.ReactElement { + const sessionInfo = useSessionsStore( + useShallow((s) => { + const sess = s.byId[sessionId]; + if (!sess) return { hasBrowser: false, status: 'idle' as const }; + return { hasBrowser: !!sess.hasBrowser, status: sess.status }; + }), + ); + + const [frame, setFrame] = useState(null); + const [expanding, setExpanding] = useState(false); + const cardRef = useRef(null); + + // Listen for frames unconditionally — cheap, and a late-arriving stream + // attaches without a remount. + useEffect(() => { + const api = window.electronAPI; + if (!api) return; + let count = 0; + let lastLog = 0; + return api.on.sessionPreviewFrame((id, dataB64) => { + if (id !== sessionId) return; + count += 1; + const now = Date.now(); + if (now - lastLog > 5000) { + lastLog = now; + console.log('[BrowserPreview] frames received', { sessionId, count, bytes: dataB64.length }); + } + setFrame(dataB64); + }); + }, [sessionId]); + + // Start/stop the screencast in lockstep with hasBrowser. When the agent + // creates a browser later, hasBrowser flips true and this effect re-runs. + useEffect(() => { + const api = window.electronAPI; + if (!api || !sessionInfo.hasBrowser) { + setFrame(null); + return; + } + let cancelled = false; + api.sessions.previewStart(sessionId, { maxWidth: PREVIEW_W * 2, maxHeight: PREVIEW_H * 2 }) + .then((res) => { + if (cancelled) { + api.sessions.previewStop(sessionId).catch(() => { /* ignore */ }); + return; + } + if (!res.ok) { + console.warn('[BrowserPreview] previewStart not ok', { sessionId, reason: res.reason }); + } + }) + .catch((err) => console.error('[BrowserPreview] previewStart threw', err)); + + return () => { + cancelled = true; + api.sessions.previewStop(sessionId).catch(() => { /* ignore */ }); + }; + }, [sessionId, sessionInfo.hasBrowser]); + + const onClick = useCallback(() => { + if (!sessionInfo.hasBrowser) return; + setExpanding(true); + setTimeout(() => onExpand(), 220); + }, [sessionInfo.hasBrowser, onExpand]); + + const disabled = !sessionInfo.hasBrowser; + return ( + + ); +} diff --git a/app/src/renderer/hub/chat/CodeBlock.tsx b/app/src/renderer/hub/chat/CodeBlock.tsx new file mode 100644 index 00000000..dc8557e3 --- /dev/null +++ b/app/src/renderer/hub/chat/CodeBlock.tsx @@ -0,0 +1,104 @@ +import React, { useState } from 'react'; +import { Highlight, themes, type Language } from 'prism-react-renderer'; +import { Markdown } from '../Markdown'; + +interface CodeBlockProps { + label: string; + code: string; + language?: Language; + /** When true, render body via the Markdown renderer instead of mono pre. */ + asMarkdown?: boolean; + /** When true, render in destructive color regardless of theme. */ + isError?: boolean; +} + +function CopyIcon(): React.ReactElement { + return ( + + + + + ); +} + +function CheckIcon(): React.ReactElement { + return ( + + + + ); +} + +/** + * Bordered, syntax-highlighted code block with a header bar (label + copy). + * The header is sticky-feeling: matches the cloud's bash-code-block pattern. + * Theme is dark-on-light by default and re-tints automatically via + * prefers-color-scheme through the parent .chat-pane theme variables. + */ +export function CodeBlock({ label, code, language, asMarkdown, isError }: CodeBlockProps): React.ReactElement { + const [copied, setCopied] = useState(false); + // Detect dark mode via the parent's resolved color. We can't easily get the + // theme name here so we sniff the bg color brightness on mount; not perfect + // but cheap. The hub themes set --color-bg-base which we read. + const isDark = (() => { + if (typeof window === 'undefined') return false; + const v = getComputedStyle(document.documentElement).getPropertyValue('--color-bg-base').trim(); + if (!v) return false; + // Parse hex / rgb to brightness < 128 → dark + const m = v.match(/#?([0-9a-f]{6}|[0-9a-f]{3})/i); + if (m) { + const hex = m[1].length === 3 ? m[1].split('').map((c) => c + c).join('') : m[1]; + const r = parseInt(hex.slice(0, 2), 16); + const g = parseInt(hex.slice(2, 4), 16); + const b = parseInt(hex.slice(4, 6), 16); + return (r * 299 + g * 587 + b * 114) / 1000 < 128; + } + return false; + })(); + + const onCopy = async (e: React.MouseEvent): Promise => { + e.stopPropagation(); + try { + await navigator.clipboard.writeText(code); + setCopied(true); + setTimeout(() => setCopied(false), 1500); + } catch (err) { + console.error('[CodeBlock] copy failed', err); + } + }; + + return ( +
+
+ {label} + +
+
+ {asMarkdown ? ( +
+ +
+ ) : language ? ( + + {({ tokens, getLineProps, getTokenProps }) => ( +
+                {tokens.map((line, i) => (
+                  
+ {line.map((token, key) => ( + + ))} +
+ ))} +
+ )} +
+ ) : ( +
{code}
+ )} +
+
+ ); +} diff --git a/app/src/renderer/hub/chat/TerminalSpinner.tsx b/app/src/renderer/hub/chat/TerminalSpinner.tsx new file mode 100644 index 00000000..1e5b7138 --- /dev/null +++ b/app/src/renderer/hub/chat/TerminalSpinner.tsx @@ -0,0 +1,42 @@ +import React, { useEffect, useState } from 'react'; + +const FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; +const FRAME_MS = 80; + +interface TerminalSpinnerProps { + size?: number; +} + +export function TerminalSpinner({ size = 13 }: TerminalSpinnerProps): React.ReactElement { + const [i, setI] = useState(0); + useEffect(() => { + const t = setInterval(() => setI((v) => (v + 1) % FRAMES.length), FRAME_MS); + return () => clearInterval(t); + }, []); + return ( + + {FRAMES[i]} + + ); +} + +interface ElapsedProps { + since: number; // ms epoch +} + +export function Elapsed({ since }: ElapsedProps): React.ReactElement { + const [now, setNow] = useState(() => Date.now()); + useEffect(() => { + const t = setInterval(() => setNow(Date.now()), 1000); + return () => clearInterval(t); + }, []); + const secs = Math.max(0, Math.floor((now - since) / 1000)); + const m = Math.floor(secs / 60); + const s = secs % 60; + const label = m > 0 ? `${m}m ${s.toString().padStart(2, '0')}s` : `${s}s`; + return {label}; +} diff --git a/app/src/renderer/hub/chat/ToolBlock.tsx b/app/src/renderer/hub/chat/ToolBlock.tsx new file mode 100644 index 00000000..82fd22ca --- /dev/null +++ b/app/src/renderer/hub/chat/ToolBlock.tsx @@ -0,0 +1,231 @@ +import React, { useState } from 'react'; +import type { OutputEntry } from '../types'; +import { CodeBlock } from './CodeBlock'; +import { TerminalSpinner } from './TerminalSpinner'; +import { + getToolType, + getToolLabel, + getToolDisplayValue, + parseBashResult, + summarizeBashCommand, + stripShellWrapper, +} from './toolLabels'; + +interface ToolBlockProps { + entry: OutputEntry; +} + +function ChevronDown({ rotated }: { rotated: boolean }): React.ReactElement { + return ( + + + + ); +} + +function TerminalIcon(): React.ReactElement { + return ( + + + + ); +} + +function formatDuration(ms: number): string { + if (ms < 1000) return `${ms}ms`; + return `${(ms / 1000).toFixed(1)}s`; +} + +/** + * Try to pretty-print JSON results. Returns the input unchanged when not JSON. + */ +function formatGenericResult(raw: string): string { + const trimmed = raw.trim(); + if (!trimmed) return ''; + if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) { + try { + return JSON.stringify(JSON.parse(trimmed), null, 2); + } catch { /* not JSON */ } + } + return raw; +} + +type OutputRender = + | { mode: 'markdown'; code: string } + | { mode: 'json'; code: string } + | { mode: 'yaml'; code: string } + | { mode: 'text'; code: string }; + +/** + * YAML-ish flattening of a parsed JSON value. Long / multi-line string fields + * are emitted as `key: |` block scalars so embedded \n actually render as + * real line breaks — that's the readability problem with browser-harness-js + * payloads like `{url, title, bodyText}` where bodyText is multi-paragraph. + */ +function toReadableYaml(value: unknown, indent = 0): string { + const pad = ' '.repeat(indent); + + if (value === null) return 'null'; + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + if (typeof value === 'string') { + if (value.includes('\n') || value.length > 80) { + const inner = value.split('\n').map((l) => `${pad} ${l}`).join('\n'); + return `|\n${inner}`; + } + // Short string — inline quoted for visual distinction from keys + return JSON.stringify(value); + } + if (Array.isArray(value)) { + if (value.length === 0) return '[]'; + return value.map((v) => `${pad}- ${toReadableYaml(v, indent + 1).replace(/^\n/, '')}`).join('\n'); + } + if (typeof value === 'object') { + const entries = Object.entries(value as Record); + if (entries.length === 0) return '{}'; + return entries + .map(([k, v], i) => { + const child = toReadableYaml(v, indent + 1); + const prefix = i === 0 && indent === 0 ? '' : pad; + if (child.startsWith('|\n') || child.includes('\n')) return `${prefix}${k}: ${child}`; + return `${prefix}${k}: ${child}`; + }) + .join('\n'); + } + return String(value); +} + +/** + * Heuristically pick how to render bash/script output. + * - JSON object with any multi-line string value → YAML render (so \n inside + * strings becomes a real newline instead of a literal escape) + * - Other valid JSON → pretty-print + json highlighting + * - Starts with a markdown heading or has fenced code blocks → markdown render + * - Otherwise → plain monospace text + */ +function detectOutputRender(raw: string): OutputRender { + const trimmed = raw.trim(); + if (!trimmed) return { mode: 'text', code: '' }; + + if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) { + try { + const parsed = JSON.parse(trimmed); + // Walk shallow values — if any string contains a newline OR is long + // enough to wrap awkwardly inside JSON, prefer YAML rendering. + const values = Array.isArray(parsed) + ? parsed + : (parsed && typeof parsed === 'object' ? Object.values(parsed) : []); + const hasMultiline = values.some( + (v) => typeof v === 'string' && (v.includes('\n') || v.length > 200), + ); + if (hasMultiline) { + return { mode: 'yaml', code: toReadableYaml(parsed) }; + } + return { mode: 'json', code: JSON.stringify(parsed, null, 2) }; + } catch { /* fall through */ } + } + + const hasHeading = /^#{1,6}\s/m.test(trimmed); + const hasFence = /```/.test(trimmed); + const hasListAndProse = /^\s*[-*]\s/m.test(trimmed) && /[.!?]\s/.test(trimmed); + if (hasHeading || hasFence || hasListAndProse) { + return { mode: 'markdown', code: trimmed }; + } + + return { mode: 'text', code: raw }; +} + +export function ToolBlock({ entry }: ToolBlockProps): React.ReactElement { + const [expanded, setExpanded] = useState(false); + const inFlight = !entry.result; + const status = inFlight ? 'running' : 'completed'; + const type = getToolType(entry.tool); + let label = getToolLabel(entry.tool, status); + let displayValue = getToolDisplayValue(entry.tool, entry.content); + + // For bash, keep the original (unwrapped) command separate from the friendly + // chip value — the expansion's "Command" block should show what actually ran, + // not the summary phrase. + const rawBashCommand = type === 'bash' ? stripShellWrapper(displayValue || entry.content || '') : ''; + + if (type === 'bash') { + const summary = summarizeBashCommand(displayValue || entry.content); + if (summary) { + label = inFlight ? summary.active : summary.completed; + displayValue = summary.value; + } else { + // Unrecognized bash — hide the raw command from the pill. The full + // command is still visible inside the expansion's Command block. + displayValue = ''; + } + } + + // For bash, parse the backend wrapper to surface stdout cleanly. + const bash = type === 'bash' && entry.result ? parseBashResult(entry.result.content) : null; + const durationMs = bash?.durationMs ?? entry.duration; + + const expandedBody = (() => { + if (type === 'bash') { + const command = rawBashCommand; + const output = bash && bash.output ? detectOutputRender(bash.output) : null; + return ( + <> + + {output && output.code && ( + output.mode === 'markdown' + ? + : output.mode === 'json' + ? + : output.mode === 'yaml' + ? + : + )} + + ); + } + + // Generic: pretty-print args + result + const args = formatGenericResult(entry.content); + const result = entry.result ? formatGenericResult(entry.result.content) : ''; + return ( + <> + + {result && } + + ); + })(); + + return ( +
+ + {expanded && ( +
+ {expandedBody} +
+ )} +
+ ); +} From 77d8273f36df5b108feaeaaf3becbeedb3c44b6d Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 01:58:49 -0700 Subject: [PATCH 09/31] feat(hub): wire chat view and entry points Extends ViewMode with chat. Mounts useSessionsBridge once at the root. Dashboard submit, dashboard session selection, and sidebar row clicks all enter chat for the chosen session via enterChat(id), which writes chatSessionId to uiStore and flips viewMode. Browser views are now hidden everywhere except grid (was: everywhere except settings) so they don't bleed through the chat UI. ChatPane receives onExit (sets viewMode to dashboard) and onSwitchToBrowser (sets viewMode to grid for the same session). --- app/src/renderer/hub/HubApp.tsx | 53 ++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/app/src/renderer/hub/HubApp.tsx b/app/src/renderer/hub/HubApp.tsx index 7814edb0..2729bb23 100644 --- a/app/src/renderer/hub/HubApp.tsx +++ b/app/src/renderer/hub/HubApp.tsx @@ -13,8 +13,11 @@ import type { AgentSession, HlEvent } from './types'; import type { ActionId } from './keybindings'; import type { SettingsOpenIntent, SettingsSectionId } from './SettingsPane'; import { orderSessionsForSidebar } from './sessionOrdering'; +import { ChatPane } from './chat/ChatPane'; +import { useUIStore } from './state/uiStore'; +import { useSessionsBridge } from './state/useSessionsBridge'; -type ViewMode = 'dashboard' | 'grid' | 'settings'; +type ViewMode = 'dashboard' | 'grid' | 'chat' | 'settings'; type SettingsOpenPayload = { sectionId?: SettingsSectionId; focusBrowserCodeProvider?: string; @@ -115,6 +118,20 @@ export function HubApp(): React.ReactElement { const sessions = isMock ? mockSessions : (sessionsQuery.data ?? []); const setSessions = isMock ? setMockSessions : () => {}; + // Mirror sessions into Zustand for the chat view + future fine-grained + // subscribers. Uses the same per-event `session-output` IPC stream that the + // logs pane uses (not the heavier `session-updated` snapshot channel), so + // chat updates are true push events. Old consumers (Sidebar, AgentPane, + // Dashboard) keep reading from useSessionsQuery — no behavior change for + // grid mode. + useSessionsBridge(); + + // Chat target lives in useUIStore so the selection persists across reloads. + // viewMode itself remains HubApp-local for now (avoids a full migration of + // every other view-mode consumer); we just extend it with 'chat'. + const chatSessionId = useUIStore((s) => s.chatSessionId); + const setChatSession = useUIStore((s) => s.setChatSession); + useEffect(() => { console.log('[HubApp] sessions changed', { count: sessions.length, ts: Date.now(), ids: sessions.map((s) => s.id.slice(0, 8)) }); }, [sessions.length]); @@ -133,11 +150,19 @@ export function HubApp(): React.ReactElement { }); const setViewMode = useCallback((mode: ViewMode) => { setViewModeRaw(mode); - window.electronAPI?.sessions?.viewsSetVisible?.(mode !== 'settings')?.catch(() => {}); + // Browser views are only used by AgentPane (grid mode). Hide everywhere else + // so they don't bleed through the chat/dashboard/settings UI. + const shouldShowBrowserViews = mode === 'grid'; + window.electronAPI?.sessions?.viewsSetVisible?.(shouldShowBrowserViews)?.catch(() => {}); if (mode === 'dashboard' || mode === 'grid') { try { window.localStorage.setItem('hub-view-mode', mode); } catch { /* ignore */ } } }, []); + const enterChat = useCallback((id: string) => { + console.log('[HubApp] enterChat', { id }); + setChatSession(id); + setViewMode('chat'); + }, [setChatSession, setViewMode]); const openPill = useCallback(() => { window.electronAPI?.pill.toggle(); }, []); const [helpOpen, setHelpOpen] = useState(false); const [settingsIntent, setSettingsIntent] = useState(null); @@ -419,7 +444,7 @@ export function HubApp(): React.ReactElement { }; console.log('[HubApp] createSession (mock)', { id, prompt }); pendingFocusIdRef.current = id; - setViewMode('grid'); + enterChat(id); setSessions((prev) => [...prev, newSession]); const pushEvent = (event: HlEvent, statusOverride?: AgentSession['status']) => { @@ -453,13 +478,13 @@ export function HubApp(): React.ReactElement { ); console.log('[HubApp] session created', { id }); pendingFocusIdRef.current = id; - setViewMode('grid'); + enterChat(id); await api.sessions.start(id); console.log('[HubApp] session started', { id }); } catch (err) { console.error('[HubApp] createSession failed', err); } - }, [isMock, setViewMode]); + }, [isMock, setViewMode, enterChat]); const handleFollowUp = useCallback(async ( @@ -590,10 +615,10 @@ export function HubApp(): React.ReactElement { { handleSelectSession(id); - if (viewMode !== 'grid') setViewMode('grid'); + enterChat(id); }} onNewAgent={() => openPill()} onRowAction={(id, action) => { @@ -625,6 +650,17 @@ export function HubApp(): React.ReactElement { onResetAll={vim.resetAll} formatShortcut={vim.formatShortcut} /> + ) : viewMode === 'chat' ? ( + chatSessionId + ? setViewMode('dashboard')} + onSwitchToBrowser={() => { + handleSelectSession(chatSessionId); + setViewMode('grid'); + }} + /> + :
No session selected.
) : viewMode === 'dashboard' ? ( { handleSelectSession(id); sessionsQuery.refetch(); - setViewMode('grid'); + enterChat(id); }} /> ) : ( @@ -693,6 +729,7 @@ export function HubApp(): React.ReactElement { onOpenSettings={() => { openSettingsPage(); }} + onOpenChat={enterChat} followUpShortcut={shortcutFor('action.followUp')} /> ); From ce75b686af0f5ecd15f0ca08b1bc2f4e8ac2043d Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 01:58:50 -0700 Subject: [PATCH 10/31] test(chat): groupIntoTurns unit tests Covers empty input, single-turn grouping, multi-turn user_input boundaries, the leading null-user turn for entries that arrive before the first user_input, and consecutive user_inputs producing empty-agent-entry turns. --- app/tests/unit/hub/groupIntoTurns.spec.ts | 65 +++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 app/tests/unit/hub/groupIntoTurns.spec.ts diff --git a/app/tests/unit/hub/groupIntoTurns.spec.ts b/app/tests/unit/hub/groupIntoTurns.spec.ts new file mode 100644 index 00000000..7db8db69 --- /dev/null +++ b/app/tests/unit/hub/groupIntoTurns.spec.ts @@ -0,0 +1,65 @@ +import { describe, expect, it } from 'vitest'; +import { groupIntoTurns } from '../../../src/renderer/hub/chat/groupIntoTurns'; +import type { OutputEntry } from '../../../src/renderer/hub/types'; + +function entry(id: string, type: OutputEntry['type'], content = ''): OutputEntry { + return { id, type, timestamp: 0, content }; +} + +describe('groupIntoTurns', () => { + it('returns empty array for empty input', () => { + expect(groupIntoTurns([])).toEqual([]); + }); + + it('groups entries into a single turn when there is one user_input', () => { + const entries = [ + entry('1', 'user_input', 'hello'), + entry('2', 'thinking', 'pondering'), + entry('3', 'done', 'all set'), + ]; + const turns = groupIntoTurns(entries); + expect(turns).toHaveLength(1); + expect(turns[0].userEntry?.id).toBe('1'); + expect(turns[0].agentEntries.map((e) => e.id)).toEqual(['2', '3']); + }); + + it('starts a new turn at each user_input', () => { + const entries = [ + entry('u1', 'user_input', 'first'), + entry('t1', 'thinking'), + entry('d1', 'done'), + entry('u2', 'user_input', 'second'), + entry('t2', 'thinking'), + ]; + const turns = groupIntoTurns(entries); + expect(turns).toHaveLength(2); + expect(turns[0].userEntry?.id).toBe('u1'); + expect(turns[0].agentEntries.map((e) => e.id)).toEqual(['t1', 'd1']); + expect(turns[1].userEntry?.id).toBe('u2'); + expect(turns[1].agentEntries.map((e) => e.id)).toEqual(['t2']); + }); + + it('emits a leading null-user turn when entries precede the first user_input', () => { + const entries = [ + entry('orphan', 'thinking'), + entry('u1', 'user_input', 'hello'), + entry('d1', 'done'), + ]; + const turns = groupIntoTurns(entries); + expect(turns).toHaveLength(2); + expect(turns[0].userEntry).toBeNull(); + expect(turns[0].agentEntries.map((e) => e.id)).toEqual(['orphan']); + expect(turns[1].userEntry?.id).toBe('u1'); + }); + + it('handles consecutive user_inputs (each starts a new turn even with no agent entries)', () => { + const entries = [ + entry('u1', 'user_input', 'a'), + entry('u2', 'user_input', 'b'), + ]; + const turns = groupIntoTurns(entries); + expect(turns).toHaveLength(2); + expect(turns[0].agentEntries).toEqual([]); + expect(turns[1].agentEntries).toEqual([]); + }); +}); From 8ea1b7f5d190827cefeebb44fe07b608a83f82cd Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 02:00:39 -0700 Subject: [PATCH 11/31] feat(toast): wire toasts into settings, connections, and cookie sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hook the existing ToastProvider up to user-visible save/sync actions so changes get explicit confirmation instead of dying silently. Restyle the toast itself to match the app's frosted-glass surfaces — layered shadow, backdrop blur, and per-variant tinted borders and glows. --- app/src/renderer/components/base/Toast.tsx | 7 + .../renderer/components/base/components.css | 125 +++++++++++++----- app/src/renderer/hub/ConnectionsPane.tsx | 14 +- app/src/renderer/hub/SettingsPane.tsx | 13 +- app/src/renderer/shared/CookieBrowser.tsx | 16 ++- 5 files changed, 136 insertions(+), 39 deletions(-) diff --git a/app/src/renderer/components/base/Toast.tsx b/app/src/renderer/components/base/Toast.tsx index 9d9319cf..912c1f93 100644 --- a/app/src/renderer/components/base/Toast.tsx +++ b/app/src/renderer/components/base/Toast.tsx @@ -3,6 +3,13 @@ * Variants: info | success | warning | error | agent * Renders a stack of toasts via ToastProvider + useToast hook. * No !important, no Inter references. + * + * Usage: + * const toast = useToast(); + * toast.show({ variant: 'success', title: 'Copied to clipboard' }); + * toast.show({ variant: 'error', title: 'Save failed', message: 'Try again.' }); + * const id = toast.show({ variant: 'info', title: 'Uploading...', persistent: true }); + * toast.update(id, { variant: 'success', title: 'Upload complete', persistent: false }); */ import React, { diff --git a/app/src/renderer/components/base/components.css b/app/src/renderer/components/base/components.css index 39b531d8..cf517a1f 100644 --- a/app/src/renderer/components/base/components.css +++ b/app/src/renderer/components/base/components.css @@ -333,36 +333,63 @@ .agb-toast__stack { position: fixed; - bottom: 20px; - right: 20px; + bottom: 24px; + right: 24px; z-index: var(--z-toast); display: flex; flex-direction: column; - gap: 8px; + gap: 10px; pointer-events: none; + max-width: 400px; } +/* Toast: frosted-glass panel matching the app's elevated surfaces. + Layered shadow = depth (drop) + ambient ring + variant-tinted glow. */ .agb-toast { + position: relative; display: flex; - align-items: center; - gap: 10px; - padding: 10px 14px; + align-items: flex-start; + gap: 11px; + padding: 12px 14px 12px 13px; border-radius: var(--radius-lg); - background-color: var(--color-bg-overlay); + background-color: color-mix(in srgb, var(--color-bg-overlay) 88%, transparent); border: 1px solid var(--color-border-default); - box-shadow: var(--shadow-lg); - min-width: 280px; - max-width: 380px; + backdrop-filter: blur(14px) saturate(140%); + -webkit-backdrop-filter: blur(14px) saturate(140%); + box-shadow: + 0 1px 0 rgba(255, 255, 255, 0.04) inset, + 0 12px 32px -8px rgba(0, 0, 0, 0.55), + 0 4px 12px -4px rgba(0, 0, 0, 0.4), + 0 0 0 1px rgba(0, 0, 0, 0.2); + min-width: 300px; + max-width: 400px; pointer-events: auto; - animation: slide-up var(--duration-normal) var(--ease-spring) forwards; + transform-origin: bottom right; + animation: agb-toast-in 280ms var(--ease-spring) forwards; +} + +@keyframes agb-toast-in { + from { + opacity: 0; + transform: translateY(8px) scale(0.98); + } + to { + opacity: 1; + transform: translateY(0) scale(1); + } } .agb-toast__icon { flex-shrink: 0; - font-size: var(--font-size-sm); - line-height: 1.6; - width: 16px; - text-align: center; + display: flex; + align-items: center; + justify-content: center; + width: 14px; + height: 14px; + font-size: 13px; + font-weight: var(--font-weight-semibold); + line-height: 1; + margin-top: 3px; } .agb-toast__content { @@ -371,6 +398,7 @@ gap: 2px; flex: 1; min-width: 0; + padding-top: 1px; } .agb-toast__title { @@ -378,6 +406,7 @@ font-weight: var(--font-weight-medium); color: var(--color-fg-primary); line-height: var(--line-height-snug); + letter-spacing: -0.005em; } .agb-toast__message { @@ -393,11 +422,13 @@ justify-content: center; width: 20px; height: 20px; - border-radius: var(--radius-xs); + border-radius: var(--radius-sm); color: var(--color-fg-tertiary); + background: transparent; transition: background-color var(--duration-fast) var(--ease-out), - color var(--duration-fast) var(--ease-out); + color var(--duration-fast) var(--ease-out), + transform var(--duration-fast) var(--ease-out); margin-top: 1px; } @@ -405,26 +436,58 @@ background-color: var(--color-surface-interactive-hover); color: var(--color-fg-primary); } - -/* Variants — tinted borders for each status type. - why-not-a-token: these alpha variants (30%) of the status colors are - toast-specific border tints. A full --color-toast-border-{type} family - would be the clean solution; for now they are documented here. */ -.agb-toast--info { border-color: rgba(96, 165, 250, 0.30); } -.agb-toast--info .agb-toast__icon { color: var(--color-status-info); } - -.agb-toast--success { border-color: rgba(74, 222, 128, 0.30); } +.agb-toast__dismiss:active { transform: scale(0.92); } + +/* Variants — each picks up a tinted border, a tinted icon chip, and a + subtle outer glow in the variant color. The glow rides on top of the + base layered shadow defined on .agb-toast. */ +.agb-toast--info { + border-color: color-mix(in srgb, var(--color-status-info) 35%, var(--color-border-default)); + box-shadow: + 0 1px 0 rgba(255, 255, 255, 0.04) inset, + 0 12px 32px -8px rgba(0, 0, 0, 0.55), + 0 4px 12px -4px rgba(0, 0, 0, 0.4), + 0 0 24px -8px color-mix(in srgb, var(--color-status-info) 50%, transparent); +} +.agb-toast--info .agb-toast__icon { color: var(--color-status-info); } + +.agb-toast--success { + border-color: color-mix(in srgb, var(--color-status-success) 38%, var(--color-border-default)); + box-shadow: + 0 1px 0 rgba(255, 255, 255, 0.04) inset, + 0 12px 32px -8px rgba(0, 0, 0, 0.55), + 0 4px 12px -4px rgba(0, 0, 0, 0.4), + 0 0 24px -8px color-mix(in srgb, var(--color-status-success) 50%, transparent); +} .agb-toast--success .agb-toast__icon { color: var(--color-status-success); } -.agb-toast--warning { border-color: rgba(245, 158, 11, 0.30); } +.agb-toast--warning { + border-color: color-mix(in srgb, var(--color-status-warning) 38%, var(--color-border-default)); + box-shadow: + 0 1px 0 rgba(255, 255, 255, 0.04) inset, + 0 12px 32px -8px rgba(0, 0, 0, 0.55), + 0 4px 12px -4px rgba(0, 0, 0, 0.4), + 0 0 24px -8px color-mix(in srgb, var(--color-status-warning) 55%, transparent); +} .agb-toast--warning .agb-toast__icon { color: var(--color-status-warning); } -.agb-toast--error { border-color: rgba(248, 113, 113, 0.30); } -.agb-toast--error .agb-toast__icon { color: var(--color-status-error); } +.agb-toast--error { + border-color: color-mix(in srgb, var(--color-status-error) 40%, var(--color-border-default)); + box-shadow: + 0 1px 0 rgba(255, 255, 255, 0.04) inset, + 0 12px 32px -8px rgba(0, 0, 0, 0.55), + 0 4px 12px -4px rgba(0, 0, 0, 0.4), + 0 0 28px -8px color-mix(in srgb, var(--color-status-error) 55%, transparent); +} +.agb-toast--error .agb-toast__icon { color: var(--color-status-error); } .agb-toast--agent { - border-color: var(--color-pill-border, #2e2e38); - box-shadow: var(--glow-accent); + border-color: color-mix(in srgb, var(--color-accent-default) 40%, var(--color-border-default)); + box-shadow: + 0 1px 0 rgba(255, 255, 255, 0.04) inset, + 0 12px 32px -8px rgba(0, 0, 0, 0.55), + 0 4px 12px -4px rgba(0, 0, 0, 0.4), + 0 0 28px -8px color-mix(in srgb, var(--color-accent-default) 60%, transparent); } .agb-toast--agent .agb-toast__icon { color: var(--color-accent-default); } diff --git a/app/src/renderer/hub/ConnectionsPane.tsx b/app/src/renderer/hub/ConnectionsPane.tsx index 37be2d21..79b5c385 100644 --- a/app/src/renderer/hub/ConnectionsPane.tsx +++ b/app/src/renderer/hub/ConnectionsPane.tsx @@ -14,6 +14,7 @@ import minimaxLogo from './minimax-color.svg'; import { useThemedAsset } from '../design/useThemedAsset'; import { CookieBrowser, type CookieBrowserApi } from '../shared/CookieBrowser'; import { pollInstalledStatus } from '../shared/installStatus'; +import { useToast } from '@/renderer/components/base/Toast'; type WaStatus = 'disconnected' | 'connecting' | 'qr_ready' | 'connected' | 'error'; type AuthType = 'oauth' | 'apiKey' | 'none'; @@ -103,6 +104,7 @@ export function ConnectionsPane({ browserSyncSectionId, focusBrowserCodeProvider, }: ConnectionsPaneProps): React.ReactElement { + const toast = useToast(); const openaiLogo = useThemedAsset(openaiLogoDark, openaiLogoLight); const opencodeLogo = useThemedAsset(opencodeLogoDark, opencodeLogoLight); const codexLogo = useThemedAsset(codexLogoDark, codexLogoLight); @@ -444,7 +446,8 @@ export function ConnectionsPane({ setOpenaiDraft(''); setOpenaiEditing(false); await refreshOpenai(); - }, [codexStatus.installed, openaiDraft, refreshOpenai]); + toast.show({ variant: 'success', title: 'OpenAI API key saved' }); + }, [codexStatus.installed, openaiDraft, refreshOpenai, toast]); const handleDeleteOpenai = useCallback(async () => { const api = window.electronAPI; @@ -453,7 +456,8 @@ export function ConnectionsPane({ setOpenaiKeyStatus('idle'); setOpenaiError(null); await refreshOpenai(); - }, [refreshOpenai]); + toast.show({ variant: 'success', title: 'OpenAI API key removed' }); + }, [refreshOpenai, toast]); const handleStartEditBrowserCode = useCallback((providerId: string) => { setEditingProviderId(providerId); @@ -497,7 +501,8 @@ export function ConnectionsPane({ setBrowserCodeKeyDraft(''); setEditingProviderId(null); await refreshBrowserCode(); - }, [browserCodeKeyDraft, browserCodeStatus.installed?.installed, refreshBrowserCode]); + toast.show({ variant: 'success', title: 'Provider key saved', message: providerId }); + }, [browserCodeKeyDraft, browserCodeStatus.installed?.installed, refreshBrowserCode, toast]); const handleRemoveBrowserCodeKey = useCallback(async (providerId: string) => { const api = window.electronAPI; @@ -509,7 +514,8 @@ export function ConnectionsPane({ setBrowserCodeErrorProviderId(null); if (editingProviderId === providerId) setEditingProviderId(null); await refreshBrowserCode(); - }, [editingProviderId, refreshBrowserCode]); + toast.show({ variant: 'success', title: 'Provider key removed', message: providerId }); + }, [editingProviderId, refreshBrowserCode, toast]); const [testingProviderId, setTestingProviderId] = useState(null); const [testResultByProvider, setTestResultByProvider] = useState>({}); diff --git a/app/src/renderer/hub/SettingsPane.tsx b/app/src/renderer/hub/SettingsPane.tsx index a38af75b..b44f1370 100644 --- a/app/src/renderer/hub/SettingsPane.tsx +++ b/app/src/renderer/hub/SettingsPane.tsx @@ -4,6 +4,7 @@ import type { ActionId, KeyBinding } from './keybindings'; import { fallbackShortcutPlatform, keyboardEventToShortcut } from '../../shared/hotkeys'; import { useThemeMode } from '../design/useThemeMode'; import type { ThemeMode } from '../design/themeMode'; +import { useToast } from '@/renderer/components/base/Toast'; /** * Generic settings primitives. Add a new option type and every section that @@ -362,6 +363,7 @@ function PrivacySection(): React.ReactElement { const [telemetry, setTelemetry] = useState(null); const [saving, setSaving] = useState(false); const api = (window as unknown as { electronAPI: { settings: { privacy: ElectronPrivacyAPI } } }).electronAPI.settings.privacy; + const toast = useToast(); useEffect(() => { let cancelled = false; @@ -379,12 +381,21 @@ function PrivacySection(): React.ReactElement { try { const res = await api.setTelemetry(next); setTelemetry(res.telemetry); + toast.show({ + variant: 'success', + title: res.telemetry ? 'Telemetry enabled' : 'Telemetry disabled', + }); } catch { setTelemetry(!next); // revert + toast.show({ + variant: 'error', + title: 'Could not save setting', + message: 'Telemetry change could not be saved. Please try again.', + }); } finally { setSaving(false); } - }, [telemetry, saving, api]); + }, [telemetry, saving, api, toast]); return (
diff --git a/app/src/renderer/shared/CookieBrowser.tsx b/app/src/renderer/shared/CookieBrowser.tsx index d6c75bc3..d96e4ecb 100644 --- a/app/src/renderer/shared/CookieBrowser.tsx +++ b/app/src/renderer/shared/CookieBrowser.tsx @@ -2,6 +2,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'; import { extractHostname, getFaviconUrl, isDefaultFavicon, sortDomains } from './domain-utils'; import { BrowserLogoAvatar } from './BrowserLogoAvatar'; import { userFacingIpcError } from './ipcErrors'; +import { useToast } from '@/renderer/components/base/Toast'; import './CookieBrowser.css'; const MAX_VISIBLE_DOMAINS = 2000; @@ -96,6 +97,7 @@ function relativeTime(iso: string): string { } export function CookieBrowser({ api, hideHeader }: Props): React.ReactElement { + const toast = useToast(); const [profiles, setProfiles] = useState([]); const [profilesLoading, setProfilesLoading] = useState(false); const [profilesError, setProfilesError] = useState(null); @@ -177,17 +179,25 @@ export function CookieBrowser({ api, hideHeader }: Props): React.ReactElement { setSyncingProfile(profileId); setSyncError(null); try { - await api.importCookies(profileId); + const result = await api.importCookies(profileId); // Slight delay so the writes have flushed before we re-list. setTimeout(() => { void refreshCookies(); }, REFRESH_AFTER_SYNC_MS); // Pull the freshly-persisted record (timestamp + counts) from main. void refreshSyncs(); + const domainCount = result.domains?.length ?? 0; + toast.show({ + variant: 'success', + title: `Synced ${result.imported} cookies`, + message: `${result.browserName} · ${domainCount} ${domainCount === 1 ? 'site' : 'sites'}`, + }); } catch (err) { - setSyncError(userFacingIpcError(err) || 'Cookie sync failed'); + const message = userFacingIpcError(err) || 'Cookie sync failed'; + setSyncError(message); + toast.show({ variant: 'error', title: 'Cookie sync failed', message }); } finally { setSyncingProfile(null); } - }, [api, refreshCookies, refreshSyncs]); + }, [api, refreshCookies, refreshSyncs, toast]); // Collapse the cookie list into one entry per unique domain. The raw list is // 5–10× longer (each site sets multiple cookies); the user just wants to see From db2313d151c52b0d5eb12716f578a1ce9e0a0714 Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 12:04:09 -0700 Subject: [PATCH 12/31] chat: clickable harness-output paths via shell.showItemInFolder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a Linkify component that scans plain-text chat content (thinking blocks, error messages, notify chips, file_output text) for paths shaped like `outputs//.` and turns them into clickable links. Click invokes `sessions:reveal-output` (already exposed via preload as `electronAPI.sessions.revealOutput`), which uses Electron's `shell.showItemInFolder` — Finder on macOS, Explorer on Windows, the desktop environment's file manager on Linux. No renderer-side platform branching required. Detection is intentionally narrow: only paths that resolve inside the harness outputs root, which is the only thing the IPC handler will reveal. Other paths fall through as plain text so clicks are never misleading. Inline styles keep the component self-contained. The renderer integration (wrapping ChatTurn's plain-text branches with ) lives in the working tree alongside other in-progress edits to that file; it'll land in the next commit that takes ChatTurn. --- app/src/renderer/hub/chat/Linkify.tsx | 84 +++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 app/src/renderer/hub/chat/Linkify.tsx diff --git a/app/src/renderer/hub/chat/Linkify.tsx b/app/src/renderer/hub/chat/Linkify.tsx new file mode 100644 index 00000000..95e3042a --- /dev/null +++ b/app/src/renderer/hub/chat/Linkify.tsx @@ -0,0 +1,84 @@ +import React from 'react'; + +/** + * Detects file paths in chat prose and turns them into clickable links that + * reveal the file in the OS file manager (Finder / Explorer / nautilus etc.). + * + * Scope: only `outputs//.` paths (with optional path prefix and + * leading drive/slash) — that matches what the harness writes and what + * `sessions:reveal-output` is allowed to reveal. Other paths are ignored; + * showing them as links would set up dead clicks (the backend rejects + * anything outside the harness outputs root). + * + * Cross-platform: the backend uses `shell.showItemInFolder`, which on macOS + * opens Finder, on Windows opens Explorer, on Linux defers to the desktop + * environment's file manager. No renderer-side platform branching needed. + */ + +// One regex covers: +// outputs//file.png +// ./outputs//file.png +// /Users/.../outputs//file.png +// C:\…\outputs\\file.png +// +// The path body stops at whitespace, common terminators, or a closing quote. +const PATH_RE = /((?:[A-Za-z]:[\\/]|\/|\.{1,2}[\\/])?(?:[\w.~+-]+[\\/])*outputs[\\/][\w-]{4,}[\\/][^\s"'`)\]<>,;]+?\.[A-Za-z0-9]{1,8})/g; + +function reveal(rawPath: string): void { + // Trim a stray trailing punctuation that escaped the regex (rare — the regex + // tries to avoid trailing dots/commas, but markdown-y prose can still grab + // a `.` after a filename when the extension is also a sentence end). + const clean = rawPath.replace(/[.,;:!?)\]]+$/, ''); + console.log('[Linkify] reveal', clean); + void window.electronAPI?.sessions + ?.revealOutput?.(clean) + .catch((err) => console.warn('[Linkify] revealOutput failed', err)); +} + +interface LinkifyProps { + children: string; +} + +// Inline styles keep this self-contained — no chat.css coupling required. +const LINK_STYLE: React.CSSProperties = { + background: 'transparent', + border: 'none', + padding: 0, + margin: 0, + font: 'inherit', + color: 'var(--color-accent-default)', + cursor: 'pointer', + textDecoration: 'underline', + textDecorationStyle: 'dotted', + textUnderlineOffset: '2px', + wordBreak: 'break-all', +}; + +export function Linkify({ children }: LinkifyProps): React.ReactElement { + const text = children ?? ''; + if (!text) return <>; + + const parts: React.ReactNode[] = []; + let lastIdx = 0; + PATH_RE.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = PATH_RE.exec(text)) !== null) { + if (m.index > lastIdx) parts.push(text.slice(lastIdx, m.index)); + const p = m[1]; + parts.push( + , + ); + lastIdx = m.index + p.length; + } + if (lastIdx < text.length) parts.push(text.slice(lastIdx)); + return <>{parts}; +} From bd1cd89e0f421841fa0487815878e03f9be5093b Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 14:21:34 -0700 Subject: [PATCH 13/31] =?UTF-8?q?chat:=20header=20polish=20=E2=80=94=20eng?= =?UTF-8?q?ine=20icons,=20badges,=20status=20colors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace plain text engine label with provider logo (claude-code, codex, browsercode) - Add SUBSCRIPTION/KEY badge and cost chip mirroring AgentPane - Status pill border now picks up the status color (running/idle/stuck/paused/stopped) - Remove redundant back button --- app/src/renderer/hub/chat/ChatPane.tsx | 88 +++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/app/src/renderer/hub/chat/ChatPane.tsx b/app/src/renderer/hub/chat/ChatPane.tsx index 177445b9..6b80e9d4 100644 --- a/app/src/renderer/hub/chat/ChatPane.tsx +++ b/app/src/renderer/hub/chat/ChatPane.tsx @@ -1,10 +1,13 @@ -import React, { useCallback, useEffect, useMemo } from 'react'; +import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { useShallow } from 'zustand/react/shallow'; import { TaskInput, type TaskInputSubmission } from '../TaskInput'; import { ChatTranscript } from './ChatTranscript'; import { BrowserPreview } from './BrowserPreview'; import { useSessionsStore } from '../state/sessionsStore'; import { STATUS_LABEL } from '../constants'; +import { useTextSelection } from './useTextSelection'; +import { QuoteSelectionButton } from './QuoteSelectionButton'; +import { formatUserMessageWithQuote } from './parseUserMessage'; import claudeCodeLogo from '../claude-code-logo.svg'; import openaiLogo from '../openai-logo.svg'; import opencodeLogo from '../opencode-logo-light.svg'; @@ -72,6 +75,20 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps }), ); + // Text-selection quote system. Scoped to the transcript only — selecting in + // the composer or sidebar doesn't trigger the floating Quote button. + const transcriptRef = useRef(null); + const selection = useTextSelection(transcriptRef); + const [quotedText, setQuotedText] = useState(null); + + // Clear the active quote when switching sessions so it doesn't leak across. + useEffect(() => { setQuotedText(null); }, [sessionId]); + + const onQuote = useCallback((text: string) => { + console.log('[ChatPane] quote', { length: text.length }); + setQuotedText(text); + }, []); + const onSubmit = useCallback( async (sub: TaskInputSubmission) => { const api = window.electronAPI; @@ -79,16 +96,23 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps console.warn('[ChatPane] no electronAPI'); return; } - console.log('[ChatPane] resume submit', { sessionId, promptLength: sub.prompt.length, attachments: sub.attachments.length }); + const composed = formatUserMessageWithQuote(quotedText, sub.prompt); + console.log('[ChatPane] resume submit', { + sessionId, + promptLength: composed.length, + attachments: sub.attachments.length, + hasQuote: !!quotedText, + }); try { - const res = await api.sessions.resume(sessionId, sub.prompt, sub.attachments); + const res = await api.sessions.resume(sessionId, composed, sub.attachments); console.log('[ChatPane] resume result', res); if (res.error) console.error('[ChatPane] resume error', res.error); + else setQuotedText(null); } catch (err) { console.error('[ChatPane] resume threw', err); } }, - [sessionId], + [sessionId, quotedText], ); const onCancel = useCallback(() => { @@ -98,6 +122,23 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps api.sessions.cancel(sessionId).catch((err) => console.error('[ChatPane] cancel failed', err)); }, [sessionId]); + const onRerun = useCallback(() => { + const api = window.electronAPI; + if (!api) return; + console.log('[ChatPane] rerun', { sessionId }); + api.sessions.rerun(sessionId).catch((err) => console.error('[ChatPane] rerun failed', err)); + }, [sessionId]); + + const onResumeRun = useCallback(() => { + const api = window.electronAPI; + if (!api) return; + console.log('[ChatPane] resume (no new prompt)', { sessionId }); + // Mirror HubApp.handleResume's canned-prompt pattern so paused sessions + // can be picked up without making the user type something. + api.sessions.resume(sessionId, 'Continue from where you left off', []) + .catch((err) => console.error('[ChatPane] resume failed', err)); + }, [sessionId]); + const composer = useMemo(() => { if (!header) return null; const isTerminal = header.canResume === false || header.status === 'stopped'; @@ -107,11 +148,16 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps return (
This session is finished. Start a new task from the dashboard. - +
+ + +
); } + const isPaused = header.status === 'paused'; + // While running, still allow follow-ups — backend queues them (resume() // returns `queued: true` if mid-step). Show a small hint above the input. return ( @@ -127,10 +173,35 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps >Cancel run

)} - + {isPaused && ( +

+ Agent is paused. + {' '} + +

+ )} + +
+
{quotedText}
+ +
+ ) : undefined} + /> ); - }, [header, onSubmit, onCancel, onExit]); + }, [header, onSubmit, onCancel, onExit, onRerun, onResumeRun, quotedText]); if (!header) { return ( @@ -190,12 +261,13 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps
- +
{composer}
+ ); } From 87abe0135c2cb95e804be930f5e4c204a863d9cd Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 14:21:43 -0700 Subject: [PATCH 14/31] screenshots: inline-render user-facing captures via chatfile:// protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add BU_OUTPUTS_DIR env so the harness can write screenshots into the session's watched outputs dir (/harness/outputs//). - Log every file detected by the outputs watcher with abs path, bytes, and mime so users can locate artifacts. - Register a chatfile:// protocol scoped to the harness outputs root, so the renderer can safely. Path is canonicalized via realpathSync and rejected if it escapes the root. - Allow chatfile: in the hub CSP img-src. - AGENTS.md: explain when to save (judgment-based — task confirmation, unexpected findings, stuck states, mid-progress checkpoints) vs. keep in memory (selectors / state inspection). --- app/src/main/hl/engines/browserHarnessEnv.ts | 5 ++ app/src/main/hl/engines/runEngine.ts | 10 ++- app/src/main/hl/stock/AGENTS.md | 26 ++++++- app/src/main/index.ts | 5 ++ app/src/main/protocols/chatfile.ts | 74 ++++++++++++++++++++ app/src/renderer/hub/hub.html | 2 +- 6 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 app/src/main/protocols/chatfile.ts diff --git a/app/src/main/hl/engines/browserHarnessEnv.ts b/app/src/main/hl/engines/browserHarnessEnv.ts index 25c6de00..9ff32883 100644 --- a/app/src/main/hl/engines/browserHarnessEnv.ts +++ b/app/src/main/hl/engines/browserHarnessEnv.ts @@ -13,5 +13,10 @@ export function applyBrowserHarnessEnv(ctx: SpawnContext, env: NodeJS.ProcessEnv env.CDP_REPL_PORT = env.CDP_REPL_PORT ?? browserHarnessReplPort(ctx.sessionId, ctx.targetId); env.CDP_REPL_LOG = env.CDP_REPL_LOG ?? path.join(ctx.harnessDir, `browser-harness-js-${ctx.sessionId}.log`); env.BU_SESSION_ID = ctx.sessionId; + // Watched session outputs dir — any file written here triggers a `file_output` + // event in runEngine. The Page.captureScreenshot wrapper in repl.ts auto-saves + // PNGs into this dir so screenshots surface in the chat instead of being + // dumped as base64 into stdout. + env.BU_OUTPUTS_DIR = path.join(ctx.harnessDir, 'outputs', ctx.sessionId); return env; } diff --git a/app/src/main/hl/engines/runEngine.ts b/app/src/main/hl/engines/runEngine.ts index 733f9cf4..15225fe5 100644 --- a/app/src/main/hl/engines/runEngine.ts +++ b/app/src/main/hl/engines/runEngine.ts @@ -454,12 +454,20 @@ export async function runEngine(opts: RunEngineOptions): Promise { if (!stat.isFile()) return; if (seenOutputs.get(filename) === stat.size) return; seenOutputs.set(filename, stat.size); + const mime = mimeFromExt(filename); + engineLogger.info('engines.run.outputs.fileDetected', { + sessionId: opts.sessionId, + filename, + absPath: filePath, + bytes: stat.size, + mime, + }); opts.onEvent({ type: 'file_output', name: filename, path: filePath, size: stat.size, - mime: mimeFromExt(filename), + mime, }); }); } catch (err) { diff --git a/app/src/main/hl/stock/AGENTS.md b/app/src/main/hl/stock/AGENTS.md index 6215c066..0f22a144 100644 --- a/app/src/main/hl/stock/AGENTS.md +++ b/app/src/main/hl/stock/AGENTS.md @@ -140,14 +140,36 @@ Verify after every meaningful browser action: For screenshots: ```bash +# Internal screenshot (for your own vision — not shown to the user): browser-harness-js <<'EOF' await connectToAssignedTarget() const { data } = await session.Page.captureScreenshot({ format: 'png' }) -await Bun.write('/tmp/browser-use-shot.png', Buffer.from(data, 'base64')) -return '/tmp/browser-use-shot.png' +// inspect `data` (base64 PNG) however you need; do NOT save unless the user +// explicitly asked to see the screenshot. +EOF + +# User-facing screenshot (renders inline in the chat): +browser-harness-js <<'EOF' +await connectToAssignedTarget() +const { data } = await session.Page.captureScreenshot({ format: 'png' }) +await Bun.write(`${process.env.BU_OUTPUTS_DIR}/screenshot-${Date.now()}.png`, Buffer.from(data, 'base64')) EOF ``` +**When a screenshot is worth showing the user:** save to `$BU_OUTPUTS_DIR` when +the user genuinely benefits from seeing the page. Use your own judgment — these +are guideposts, not rules: +- Confirming a delegated task finished (a post went up, a message sent, a form + submitted, a checkout completed). +- Mid-progress check-in on a long task, so the user knows you haven't stalled. +- Something unexpected or interesting showed up that's worth flagging visually. +- You're stuck on a captcha, login wall, or page state you can't resolve, and + showing it helps the user see what you see. + +Don't save screenshots you took purely to look at the page yourself (finding +a selector, checking element state, verifying navigation) — those clutter the +chat without giving the user new information. + ## Uploads And Outputs - Uploads from the user appear under `./uploads//`. diff --git a/app/src/main/index.ts b/app/src/main/index.ts index ac70c771..9c6abd12 100644 --- a/app/src/main/index.ts +++ b/app/src/main/index.ts @@ -18,6 +18,10 @@ loadDotEnv({ path: path.resolve(__dirname, '..', '..', '.env') }); import { app, BrowserWindow, crashReporter, globalShortcut, ipcMain, Menu, MenuItemConstructorOptions, nativeImage, shell } from 'electron'; import { mergeChromiumFeature } from './startup/chromiumFeatures'; +import { registerChatfilePrivileges, registerChatfileHandler } from './protocols/chatfile'; + +// Must run before app.whenReady — Electron caches scheme privileges at startup. +registerChatfilePrivileges(); if (process.platform === 'linux') { app.commandLine.appendSwitch( @@ -390,6 +394,7 @@ function openShellAndWire(): BrowserWindow { // --------------------------------------------------------------------------- app.whenReady().then(async () => { mainLogger.info('main.appReady', { msg: 'Electron app ready — initializing Browser Use' }); + registerChatfileHandler(); startResourceMonitor(resourceMonitorContext); // Verify the CDP endpoint at our announced port is actually OUR Electron diff --git a/app/src/main/protocols/chatfile.ts b/app/src/main/protocols/chatfile.ts new file mode 100644 index 00000000..c2005d6b --- /dev/null +++ b/app/src/main/protocols/chatfile.ts @@ -0,0 +1,74 @@ +/** + * `chatfile://` protocol — serves files that live under the harness outputs dir + * so the renderer can `` screenshots and other + * agent-produced media without granting blanket filesystem access. + * + * Security: the requested abs path is canonicalized, then required to live + * under `/outputs/`. Anything else returns 403. Symlink escapes are + * blocked by `fs.realpathSync`. + * + * Register order matters in Electron: `registerSchemesAsPrivileged` MUST run + * before `app.whenReady`, while `protocol.handle` must run after. + */ + +import { protocol, net } from 'electron'; +import fs from 'node:fs'; +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; +import { mainLogger } from '../logger'; +import { harnessDir } from '../hl/harness'; + +export const CHATFILE_SCHEME = 'chatfile'; + +export function registerChatfilePrivileges(): void { + protocol.registerSchemesAsPrivileged([ + { + scheme: CHATFILE_SCHEME, + privileges: { + // `standard: true` makes the URL parser treat `chatfile://` like http — + // so `chatfile:///abs/path` reliably parses with hostname="" and + // pathname="/abs/path" rather than the looser opaque-path semantics + // non-standard schemes get. + standard: true, + secure: true, + supportFetchAPI: true, + bypassCSP: false, + stream: true, + }, + }, + ]); +} + +export function registerChatfileHandler(): void { + const root = path.resolve(path.join(harnessDir(), 'outputs')) + path.sep; + + protocol.handle(CHATFILE_SCHEME, async (req) => { + let absPath: string; + try { + const url = new URL(req.url); + absPath = decodeURIComponent(url.pathname); + } catch (err) { + mainLogger.warn('chatfile.badUrl', { url: req.url, error: (err as Error).message }); + return new Response('bad url', { status: 400 }); + } + + let realPath: string; + try { + realPath = fs.realpathSync(absPath); + } catch (err) { + mainLogger.warn('chatfile.notFound', { url: req.url, absPath, error: (err as Error).message }); + return new Response('not found', { status: 404 }); + } + + if (!realPath.startsWith(root)) { + mainLogger.warn('chatfile.deniedOutsideRoot', { requested: absPath, realPath, root }); + return new Response('forbidden', { status: 403 }); + } + + // Use pathToFileURL so paths containing spaces ("Application Support") and + // other URL-significant chars get encoded correctly. + return net.fetch(pathToFileURL(realPath).toString()); + }); + + mainLogger.info('chatfile.registered', { root }); +} diff --git a/app/src/renderer/hub/hub.html b/app/src/renderer/hub/hub.html index 2201f2ba..ab50240d 100644 --- a/app/src/renderer/hub/hub.html +++ b/app/src/renderer/hub/hub.html @@ -6,7 +6,7 @@ Hub From 5e534d9198ed6022a8b569c7421ebc4f034cd1d6 Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 14:21:54 -0700 Subject: [PATCH 15/31] chat: streaming prose, hoisted images, layout-shift fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - StreamingProse + useTypewriter: smooth out chunky provider streams (claude-code, codex, browsercode) by revealing chars at a steady rate with adaptive catch-up. Same component spans the thinking->done swap (stable key) so the cursor doesn't reset. - stableMarkdown: temporarily close unbalanced code fences / inline ticks while typing so markdown rendering stays locally stable instead of re-rewriting as new chars arrive. - Skip the typewriter entirely on re-entry to settled turns — drove the 'everything re-streams' bug when reopening a finished chat. - Hoist image file_outputs into the trailing done block as floated insets (magazine layout). Defer rendering until done lands so the image appears in its final position, not as a standalone block that later jumps. - 'Working' indicator now lives at the top of the live agent turn and never hides — old logic toggled it off whenever the last entry was an in-flight tool_call, which made it flicker on/off as tool calls resolved. - Narrow transcript column to 640px, stable scrollbar gutter. - Strip tool pill chrome (no border/bg/icon) so solo tools and grouped tools read as the same plain-text outline. - Bump assistant prose weight and size for readability. --- app/src/renderer/hub/chat/ChatTranscript.tsx | 45 ++- app/src/renderer/hub/chat/ChatTurn.tsx | 248 +++++++++++- app/src/renderer/hub/chat/chat.css | 380 ++++++++++++++----- 3 files changed, 550 insertions(+), 123 deletions(-) diff --git a/app/src/renderer/hub/chat/ChatTranscript.tsx b/app/src/renderer/hub/chat/ChatTranscript.tsx index dbbd1056..cf6b8e1f 100644 --- a/app/src/renderer/hub/chat/ChatTranscript.tsx +++ b/app/src/renderer/hub/chat/ChatTranscript.tsx @@ -1,4 +1,4 @@ -import React, { useEffect, useLayoutEffect, useMemo, useRef } from 'react'; +import React, { forwardRef, useEffect, useImperativeHandle, useLayoutEffect, useMemo, useRef } from 'react'; import { useShallow } from 'zustand/react/shallow'; import { useSessionsStore } from '../state/sessionsStore'; import { adaptSession } from '../types'; @@ -23,7 +23,7 @@ interface ChatTranscriptProps { const PIN_THRESHOLD_PX = 32; -export function ChatTranscript({ sessionId }: ChatTranscriptProps): React.ReactElement | null { +export const ChatTranscript = forwardRef(function ChatTranscript({ sessionId }, fwdRef): React.ReactElement | null { // Subscribe only to this session's output + createdAt. Other sessions' // updates do not re-render this component. const sessionSlice = useSessionsStore( @@ -35,6 +35,7 @@ export function ChatTranscript({ sessionId }: ChatTranscriptProps): React.ReactE ); const containerRef = useRef(null); + useImperativeHandle(fwdRef, () => containerRef.current as HTMLDivElement, []); const pinnedRef = useRef(true); const lastTurnsLenRef = useRef(0); @@ -98,16 +99,27 @@ export function ChatTranscript({ sessionId }: ChatTranscriptProps): React.ReactE if (!sessionSlice) return null; const isRunning = sessionSlice.status === 'running' || sessionSlice.status === 'stuck'; - // Show the thinking indicator while running unless the latest agent entry - // is an unpaired tool_call (that already has its own spinner) — avoids - // double-indicating activity. + // Always show the Working indicator while running. Earlier we hid it + // whenever the latest entry was an in-flight tool_call (to avoid double + // indicators), but that caused the indicator to flicker on/off as tool + // calls landed and resolved — the layout shift was worse than the duplication. const lastTurn = turns[turns.length - 1]; - const lastAgent = lastTurn?.agentEntries[lastTurn.agentEntries.length - 1]; - const lastIsInflightTool = lastAgent?.type === 'tool_call' && !lastAgent.result; - const showThinking = isRunning && !lastIsInflightTool; - // Elapsed counter resets at each turn — start counting from the last - // user_input timestamp, or session creation if there is none yet. - const since = lastTurn?.userEntry?.timestamp ?? sessionSlice.createdAt; + const showThinking = isRunning; + // Elapsed counter shows time since the most recent activity — prefer an + // in-flight tool_call (what the user is waiting on), then the latest agent + // entry of any kind, then the turn-start user_input, then session creation. + let since = lastTurn?.userEntry?.timestamp ?? sessionSlice.createdAt; + if (lastTurn && lastTurn.agentEntries.length > 0) { + const last = lastTurn.agentEntries[lastTurn.agentEntries.length - 1]; + since = last.timestamp; + for (let i = lastTurn.agentEntries.length - 1; i >= 0; i--) { + const e = lastTurn.agentEntries[i]; + if (e.type === 'tool_call' && !e.result) { + since = e.timestamp; + break; + } + } + } if (turns.length === 0) { return ( @@ -119,10 +131,13 @@ export function ChatTranscript({ sessionId }: ChatTranscriptProps): React.ReactE return (
- {turns.map((t) => ( - + {turns.map((t, i) => ( + ))} - {showThinking && }
); -} +}); diff --git a/app/src/renderer/hub/chat/ChatTurn.tsx b/app/src/renderer/hub/chat/ChatTurn.tsx index 42063089..835654da 100644 --- a/app/src/renderer/hub/chat/ChatTurn.tsx +++ b/app/src/renderer/hub/chat/ChatTurn.tsx @@ -1,10 +1,13 @@ -import React, { useState } from 'react'; +import React, { useEffect, useRef, useState } from 'react'; import { Markdown } from '../Markdown'; import type { OutputEntry } from '../types'; import type { Turn } from './groupIntoTurns'; import { ToolBlock } from './ToolBlock'; import { ToolGroup } from './ToolGroup'; +import { Linkify } from './Linkify'; import { useToast } from '@/renderer/components/base/Toast'; +import { TerminalSpinner, Elapsed } from './TerminalSpinner'; +import { parseUserMessage } from './parseUserMessage'; const USER_BUBBLE_CLAMP_LINES = 10; const USER_BUBBLE_CLAMP_CHARS = 600; @@ -19,8 +22,10 @@ function CopyIcon(): React.ReactElement { } function UserBubble({ content }: { content: string }): React.ReactElement { - const lines = content.split('\n').length; - const isLong = lines > USER_BUBBLE_CLAMP_LINES || content.length > USER_BUBBLE_CLAMP_CHARS; + const { quote, message } = parseUserMessage(content); + const body = message || ''; // message can be empty if user sent quote-only + const lines = body.split('\n').length; + const isLong = lines > USER_BUBBLE_CLAMP_LINES || body.length > USER_BUBBLE_CLAMP_CHARS; const [expanded, setExpanded] = useState(false); const clamped = isLong && !expanded; const toast = useToast(); @@ -37,7 +42,10 @@ function UserBubble({ content }: { content: string }): React.ReactElement { return (
-
{content}
+ {quote && ( +
{quote}
+ )} + {body &&
{body}
} {isLong && (

)} + {editing && ( +

+ Editing your first message — submitting will rewrite the conversation from here. + {' '} + +

+ )} @@ -201,7 +246,7 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps /> ); - }, [header, onSubmit, onCancel, onExit, onRerun, onResumeRun, quotedText]); + }, [header, onSubmit, onCancel, onExit, onRerun, onResumeRun, quotedText, editing]); if (!header) { return ( @@ -261,11 +306,18 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps
- -
- + +
+
+ +
+ {composer}
-
{composer}
diff --git a/app/src/renderer/hub/chat/ChatTranscript.tsx b/app/src/renderer/hub/chat/ChatTranscript.tsx index cf6b8e1f..4a93f8c9 100644 --- a/app/src/renderer/hub/chat/ChatTranscript.tsx +++ b/app/src/renderer/hub/chat/ChatTranscript.tsx @@ -19,11 +19,13 @@ function ThinkingIndicator({ since }: { since: number }): React.ReactElement { interface ChatTranscriptProps { sessionId: string; + onEditMessage?: (text: string) => void; + onShare?: () => void; } const PIN_THRESHOLD_PX = 32; -export const ChatTranscript = forwardRef(function ChatTranscript({ sessionId }, fwdRef): React.ReactElement | null { +export const ChatTranscript = forwardRef(function ChatTranscript({ sessionId, onEditMessage, onShare }, fwdRef): React.ReactElement | null { // Subscribe only to this session's output + createdAt. Other sessions' // updates do not re-render this component. const sessionSlice = useSessionsStore( @@ -129,6 +131,12 @@ export const ChatTranscript = forwardRef(fu ); } + // Only the very first user_input can be edited end-to-end today — the + // backend rerun primitive replays the conversation from session.prompt, so + // editing a follow-up message would silently discard everything after it. + // Find the index of the first turn with a real user entry. + const firstUserTurnIdx = turns.findIndex((t) => t.userEntry !== null); + return (
{turns.map((t, i) => ( @@ -136,6 +144,8 @@ export const ChatTranscript = forwardRef(fu key={t.id} turn={t} inflightSince={showThinking && i === turns.length - 1 ? since : undefined} + onEditMessage={i === firstUserTurnIdx ? onEditMessage : undefined} + onShare={i === firstUserTurnIdx ? onShare : undefined} /> ))}
diff --git a/app/src/renderer/hub/chat/ChatTurn.tsx b/app/src/renderer/hub/chat/ChatTurn.tsx index 835654da..9e56d6e4 100644 --- a/app/src/renderer/hub/chat/ChatTurn.tsx +++ b/app/src/renderer/hub/chat/ChatTurn.tsx @@ -21,7 +21,28 @@ function CopyIcon(): React.ReactElement { ); } -function UserBubble({ content }: { content: string }): React.ReactElement { +function ShareIcon(): React.ReactElement { + return ( + + ); +} + +function EditIcon(): React.ReactElement { + return ( + + ); +} + +function UserBubble({ content, onEdit, onShare }: { + content: string; + onEdit?: (text: string) => void; + onShare?: () => void; +}): React.ReactElement { const { quote, message } = parseUserMessage(content); const body = message || ''; // message can be empty if user sent quote-only const lines = body.split('\n').length; @@ -60,10 +81,31 @@ function UserBubble({ content }: { content: string }): React.ReactElement { + {onShare && ( + + )} + {onEdit && ( + + )} ); @@ -72,6 +114,8 @@ function UserBubble({ content }: { content: string }): React.ReactElement { interface ChatTurnProps { turn: Turn; inflightSince?: number; + onEditMessage?: (text: string) => void; + onShare?: () => void; } /** @@ -90,38 +134,52 @@ function useTypewriter(target: string, baseCharsPerSec = 70, startInstant = fals // animation and render full-text immediately. Otherwise start at 0 and let // the raf loop type it out. const [shownLen, setShownLen] = useState(() => (startInstant ? target.length : 0)); - const lastResetTargetRef = useRef(target); + const targetRef = useRef(target); + const shownLenRef = useRef(shownLen); + targetRef.current = target; + shownLenRef.current = shownLen; - if (target.length < shownLen && target !== lastResetTargetRef.current) { - lastResetTargetRef.current = target; + // If the target swaps to something shorter than what we've already shown + // (rare — happens on rerun / quick edits), restart from 0. + if (target.length < shownLen) { + shownLenRef.current = 0; setShownLen(0); } + // Single persistent raf loop. We deliberately do NOT depend on `shownLen` so + // the loop is not torn down + recreated every frame (which was wiping the + // dt/accum state and stalling progress to ~1 char per re-render). useEffect(() => { - if (shownLen >= target.length) return; let raf = 0; let last: number | null = null; - let accum = 0; // fractional character budget + let accum = 0; const tick = (ts: number): void => { const dt = last == null ? 16 : ts - last; last = ts; - setShownLen((prev) => { - if (prev >= target.length) return prev; - const gap = target.length - prev; - // Adaptive rate: the further behind we are, the faster we catch up. - // Cap at 6× base so a huge late chunk doesn't snap instantly. + const tgt = targetRef.current; + const prev = shownLenRef.current; + if (prev < tgt.length) { + const gap = tgt.length - prev; const rate = Math.min(baseCharsPerSec * 2.5, baseCharsPerSec + gap * 0.4); accum += (dt / 1000) * rate; const advance = Math.floor(accum); - if (advance <= 0) return prev; - accum -= advance; - return Math.min(target.length, prev + advance); - }); + if (advance > 0) { + accum -= advance; + const next = Math.min(tgt.length, prev + advance); + shownLenRef.current = next; + setShownLen(next); + } + } else { + // Caught up — keep ticking cheaply so we resume immediately when more + // text arrives. raf is ~1KHz of wall-clock budget; this is fine. + accum = 0; + last = ts; + } raf = requestAnimationFrame(tick); }; raf = requestAnimationFrame(tick); return () => cancelAnimationFrame(raf); - }, [target, shownLen, baseCharsPerSec]); + }, [baseCharsPerSec]); return target.slice(0, Math.min(shownLen, target.length)); } @@ -373,11 +431,17 @@ function renderAgentEntries(entries: OutputEntry[], isLive: boolean): React.Reac return out; } -export function ChatTurn({ turn, inflightSince }: ChatTurnProps): React.ReactElement { +export function ChatTurn({ turn, inflightSince, onEditMessage, onShare }: ChatTurnProps): React.ReactElement { const showInflight = inflightSince !== undefined; return (
- {turn.userEntry && } + {turn.userEntry && ( + + )} {(showInflight || turn.agentEntries.length > 0) && (
{showInflight && ( diff --git a/app/src/renderer/hub/chat/QuoteSelectionButton.tsx b/app/src/renderer/hub/chat/QuoteSelectionButton.tsx new file mode 100644 index 00000000..1857a58c --- /dev/null +++ b/app/src/renderer/hub/chat/QuoteSelectionButton.tsx @@ -0,0 +1,64 @@ +import React from 'react'; +import { clearSelection, type TextSelectionSnapshot } from './useTextSelection'; + +interface QuoteSelectionButtonProps { + selection: TextSelectionSnapshot | null; + onQuote: (text: string) => void; +} + +const BTN_HEIGHT = 28; +const GAP = 8; +const HORIZONTAL_MARGIN = 8; + +function QuoteIcon(): React.ReactElement { + return ( + + + + ); +} + +/** + * Floating "Quote" button anchored to the current selection's bounding rect. + * Centered horizontally above the selection; falls back below when above + * would clip the viewport. Horizontally clamped to viewport bounds. + * + * Click prevents-default on mousedown so the browser doesn't drop the + * selection before our handler runs. + */ +export function QuoteSelectionButton({ selection, onQuote }: QuoteSelectionButtonProps): React.ReactElement | null { + if (!selection) return null; + + const { rect, text } = selection; + // Measure-after-paint isn't easy here; use a known approximate width. The + // CSS sets min-width and the inner content centers, so a slight + // mis-measurement just shifts a few px. + const approxWidth = 76; + + let top = rect.top - BTN_HEIGHT - GAP; + if (top < HORIZONTAL_MARGIN) top = rect.bottom + GAP; + const centerX = rect.left + rect.width / 2; + let left = centerX - approxWidth / 2; + const maxLeft = window.innerWidth - approxWidth - HORIZONTAL_MARGIN; + if (left < HORIZONTAL_MARGIN) left = HORIZONTAL_MARGIN; + if (left > maxLeft) left = maxLeft; + + return ( + + ); +} diff --git a/app/src/renderer/hub/chat/chat.css b/app/src/renderer/hub/chat/chat.css index a423fd7f..78ce6660 100644 --- a/app/src/renderer/hub/chat/chat.css +++ b/app/src/renderer/hub/chat/chat.css @@ -182,36 +182,38 @@ } /* Dismissible quote chip rendered inside the TaskInput's box (via topSlot) - so it visually extends the input rather than floating above it. No own - border or radius — the parent box owns those. */ + so it visually extends the input rather than floating above it. Tinted + slightly so it reads as a distinct region from the textarea below; no own + border or radius (parent box owns those). */ .chat-quote-preview { display: flex; align-items: flex-start; gap: 10px; margin: 0; - padding: 8px 10px; + padding: 10px 12px; border-bottom: 1px solid var(--color-border-subtle); - background-color: transparent; + background-color: color-mix(in srgb, var(--color-accent-default) 8%, var(--color-bg-elevated)); } .chat-quote-preview__bar { width: 3px; align-self: stretch; border-radius: 2px; - background-color: color-mix(in srgb, var(--color-fg-primary) 28%, transparent); + background-color: var(--color-accent-default); flex-shrink: 0; } .chat-quote-preview__text { flex: 1; font-size: 12px; - line-height: 1.5; - color: var(--color-fg-secondary); + line-height: 1.55; + color: var(--color-fg-primary); font-style: italic; white-space: pre-wrap; word-break: break-word; - display: -webkit-box; - -webkit-line-clamp: 4; - -webkit-box-orient: vertical; - overflow: hidden; + max-height: 110px; + overflow-y: auto; + /* Soft mask at the bottom edge to suggest "more below" when scrollable. */ + mask-image: linear-gradient(to bottom, black 85%, transparent 100%); + -webkit-mask-image: linear-gradient(to bottom, black 85%, transparent 100%); } .chat-quote-preview__close { background: transparent; @@ -701,6 +703,9 @@ padding: 12px max(16px, calc((100% - 780px) / 2)) 18px; background-color: transparent; pointer-events: none; + display: flex; + flex-direction: column; + gap: 10px; } .chat-composer > * { pointer-events: auto; @@ -743,19 +748,18 @@ background-color: var(--color-bg-overlay); } -/* Preview rail — overlays the transcript anchored just above the composer. - Same horizontal padding as the composer so it visually anchors to the same - reading column. */ +/* Preview rail — sits inside .chat-composer's flex column, naturally above + the TaskInput. Grows/shrinks with the composer instead of being pinned to + a fixed bottom offset (which used to leave the rail behind the composer + whenever the input expanded from a quote chip or multi-line typing). */ .chat-preview-rail { - position: absolute; - left: 0; - right: 0; - bottom: 86px; - z-index: 2; - padding: 0 max(16px, calc((100% - 780px) / 2)); display: flex; justify-content: flex-start; pointer-events: none; + /* Match .task-input's horizontal padding so the preview's left edge aligns + with the TaskInput's visible bordered box (which is inset from .task-input + by --space-3 on each side). */ + padding: 0 var(--space-3); } .chat-preview-rail > * { pointer-events: auto; diff --git a/app/src/renderer/hub/chat/parseUserMessage.ts b/app/src/renderer/hub/chat/parseUserMessage.ts new file mode 100644 index 00000000..2b8b6f1b --- /dev/null +++ b/app/src/renderer/hub/chat/parseUserMessage.ts @@ -0,0 +1,43 @@ +/** + * Wire format for an in-chat quote — matches browser_use_cloud's user-message + * encoding (minus the [scroll:N] metadata line). A user_input event whose + * content starts with consecutive `> ` lines, followed by a blank line, then + * prose, is split into `{ quote, message }`. + * + * > First line of quote + * > Second line + * + * User's actual reply here. + */ + +export interface ParsedUserMessage { + quote: string | null; + message: string; +} + +export function parseUserMessage(content: string): ParsedUserMessage { + const lines = content.split('\n'); + const quoteLines: string[] = []; + let i = 0; + while (i < lines.length && lines[i].startsWith('> ')) { + quoteLines.push(lines[i].slice(2)); + i++; + } + if (quoteLines.length === 0) return { quote: null, message: content }; + // Allow an optional single blank-line separator between quote and message. + if (i < lines.length && lines[i] === '') i++; + const message = lines.slice(i).join('\n'); + return { quote: quoteLines.join('\n'), message }; +} + +/** + * Inverse — prepend a quote to the user's message in the canonical wire form. + */ +export function formatUserMessageWithQuote(quote: string | null, message: string): string { + if (!quote) return message; + const quoteBlock = quote + .split('\n') + .map((l) => `> ${l}`) + .join('\n'); + return message ? `${quoteBlock}\n\n${message}` : quoteBlock; +} diff --git a/app/src/renderer/hub/chat/useTextSelection.ts b/app/src/renderer/hub/chat/useTextSelection.ts new file mode 100644 index 00000000..c04a64b9 --- /dev/null +++ b/app/src/renderer/hub/chat/useTextSelection.ts @@ -0,0 +1,79 @@ +import { useEffect, useState, type RefObject } from 'react'; + +export interface TextSelectionSnapshot { + text: string; + rect: DOMRect; +} + +/** + * Track the current text selection inside a scoped container. Returns + * `null` when the selection is collapsed, zero-dimension, or falls outside + * the container. + * + * Mirrors `browser_use_cloud/.../useTextSelection.ts`: + * - listens to selectionchange + mouseup on document + * - uses Selection API + range.getBoundingClientRect() + * - filters out collapsed and zero-dim selections (programmatic + * selections, focus/blur artifacts) + * + * Scope: only fires when the entire selection is anchored *and* extended + * inside `containerRef`. That prevents the floating Quote button from + * showing when the user selects text in the composer textarea or the + * sidebar. + */ +export function useTextSelection(containerRef: RefObject): TextSelectionSnapshot | null { + const [snap, setSnap] = useState(null); + + useEffect(() => { + const compute = (): void => { + const sel = window.getSelection(); + if (!sel || sel.isCollapsed || sel.rangeCount === 0) { + setSnap(null); + return; + } + const container = containerRef.current; + if (!container) { + setSnap(null); + return; + } + // Restrict to selections fully inside the transcript container. + const anchor = sel.anchorNode; + const focus = sel.focusNode; + if (!anchor || !focus) { + setSnap(null); + return; + } + if (!container.contains(anchor) || !container.contains(focus)) { + setSnap(null); + return; + } + const text = sel.toString(); + if (!text.trim()) { + setSnap(null); + return; + } + const range = sel.getRangeAt(0); + const rect = range.getBoundingClientRect(); + if (rect.width === 0 && rect.height === 0) { + setSnap(null); + return; + } + setSnap({ text, rect }); + }; + + const onSelectionChange = (): void => compute(); + const onMouseUp = (): void => compute(); + document.addEventListener('selectionchange', onSelectionChange); + document.addEventListener('mouseup', onMouseUp); + return () => { + document.removeEventListener('selectionchange', onSelectionChange); + document.removeEventListener('mouseup', onMouseUp); + }; + }, [containerRef]); + + return snap; +} + +export function clearSelection(): void { + try { window.getSelection()?.removeAllRanges(); } catch { /* ignore */ } +} From 03cd5638aca1845a287220ae3c94175c4442bc62 Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Mon, 11 May 2026 14:49:24 -0700 Subject: [PATCH 17/31] chat: fix streaming typewriter stall + clear transcript on rerun Typewriter was overwriting shownLenRef with stale React state on every render, slamming the reveal back to ~3 chars per commit. Refs are now written only by the raf loop; shrink/resume handled in effects. Rerun clears session.output in main but emits via session-updated, which the bridge strips output from. Detect rerun via createdAt bump and reset output alongside the patch. --- app/src/renderer/hub/chat/ChatTurn.tsx | 116 +++++++++++------- .../renderer/hub/state/useSessionsBridge.ts | 9 +- 2 files changed, 80 insertions(+), 45 deletions(-) diff --git a/app/src/renderer/hub/chat/ChatTurn.tsx b/app/src/renderer/hub/chat/ChatTurn.tsx index 9e56d6e4..28b2c36d 100644 --- a/app/src/renderer/hub/chat/ChatTurn.tsx +++ b/app/src/renderer/hub/chat/ChatTurn.tsx @@ -129,59 +129,87 @@ interface ChatTurnProps { * once caught up. */ function useTypewriter(target: string, baseCharsPerSec = 70, startInstant = false): string { - // Lazy init: if the prose is already finalized when this component mounts - // (re-opening a completed task, scrolling back to an old turn), skip the - // animation and render full-text immediately. Otherwise start at 0 and let - // the raf loop type it out. + // shownLen is ONLY used to trigger re-renders. The raf loop reads/writes + // shownLenRef exclusively — never shownLen directly — so React re-renders + // can't race with in-flight raf advances. const [shownLen, setShownLen] = useState(() => (startInstant ? target.length : 0)); - const targetRef = useRef(target); + + // shownLenRef is the single source of truth for the revealed position. + // Only written by the raf loop or the shrink handler. Never from render body. const shownLenRef = useRef(shownLen); + + // targetRef lets the raf loop read the latest target without a dep. + const targetRef = useRef(target); targetRef.current = target; - shownLenRef.current = shownLen; - // If the target swaps to something shorter than what we've already shown - // (rare — happens on rerun / quick edits), restart from 0. - if (target.length < shownLen) { - shownLenRef.current = 0; - setShownLen(0); - } + // rafRef holds the active requestAnimationFrame id (0 = idle). + const rafRef = useRef(0); + + // Shared tick logic stored in a ref so both the initial effect and the + // resume effect reuse the exact same function without duplication. + const tickStateRef = useRef({ last: null as number | null, accum: 0 }); + const tickRef = useRef(() => {}); + tickRef.current = (ts: number) => { + const state = tickStateRef.current; + const dt = state.last == null ? 16 : ts - state.last; + state.last = ts; + + const tgt = targetRef.current; + const prev = shownLenRef.current; + + if (prev < tgt.length) { + const gap = tgt.length - prev; + // Adaptive rate: catch up faster when far behind, cap at 2.5×. + const rate = Math.min(baseCharsPerSec * 2.5, baseCharsPerSec + gap * 0.4); + state.accum += (dt / 1000) * rate; + const advance = Math.floor(state.accum); + if (advance > 0) { + state.accum -= advance; + const next = Math.min(tgt.length, prev + advance); + shownLenRef.current = next; + setShownLen(next); + } + rafRef.current = requestAnimationFrame(tickRef.current); + } else { + // Caught up — idle. Resume effect restarts when target grows. + state.accum = 0; + state.last = null; + rafRef.current = 0; + } + }; - // Single persistent raf loop. We deliberately do NOT depend on `shownLen` so - // the loop is not torn down + recreated every frame (which was wiping the - // dt/accum state and stalling progress to ~1 char per re-render). + // Start the raf loop once on mount (or if baseCharsPerSec changes). useEffect(() => { - let raf = 0; - let last: number | null = null; - let accum = 0; - const tick = (ts: number): void => { - const dt = last == null ? 16 : ts - last; - last = ts; - const tgt = targetRef.current; - const prev = shownLenRef.current; - if (prev < tgt.length) { - const gap = tgt.length - prev; - const rate = Math.min(baseCharsPerSec * 2.5, baseCharsPerSec + gap * 0.4); - accum += (dt / 1000) * rate; - const advance = Math.floor(accum); - if (advance > 0) { - accum -= advance; - const next = Math.min(tgt.length, prev + advance); - shownLenRef.current = next; - setShownLen(next); - } - } else { - // Caught up — keep ticking cheaply so we resume immediately when more - // text arrives. raf is ~1KHz of wall-clock budget; this is fine. - accum = 0; - last = ts; - } - raf = requestAnimationFrame(tick); + tickStateRef.current = { last: null, accum: 0 }; + rafRef.current = requestAnimationFrame(tickRef.current); + return () => { + if (rafRef.current) cancelAnimationFrame(rafRef.current); + rafRef.current = 0; }; - raf = requestAnimationFrame(tick); - return () => cancelAnimationFrame(raf); }, [baseCharsPerSec]); - return target.slice(0, Math.min(shownLen, target.length)); + // Handle target shrinking (rerun / edit): reset to 0. + // useEffect, not render body, to avoid setState-during-render warning. + useEffect(() => { + if (target.length < shownLenRef.current) { + shownLenRef.current = 0; + setShownLen(0); + // Restart the loop from the beginning. + if (rafRef.current) cancelAnimationFrame(rafRef.current); + tickStateRef.current = { last: null, accum: 0 }; + rafRef.current = requestAnimationFrame(tickRef.current); + } + }, [target]); + + // Restart the idle loop when new target text arrives. + useEffect(() => { + if (target.length > shownLenRef.current && rafRef.current === 0) { + tickStateRef.current = { last: null, accum: 0 }; + rafRef.current = requestAnimationFrame(tickRef.current); + } + }, [target.length]); + + return target.slice(0, shownLenRef.current); } /** diff --git a/app/src/renderer/hub/state/useSessionsBridge.ts b/app/src/renderer/hub/state/useSessionsBridge.ts index 9bd58a30..e5dae356 100644 --- a/app/src/renderer/hub/state/useSessionsBridge.ts +++ b/app/src/renderer/hub/state/useSessionsBridge.ts @@ -62,7 +62,14 @@ export function useSessionsBridge(): void { // stored on the in-memory session record). // eslint-disable-next-line @typescript-eslint/no-unused-vars const { output: _o, hasBrowser: _hb, ...rest } = session; - store.patchSession(session.id, rest); + // Rerun: SessionManager bumps createdAt and clears session.output, then + // emits session-updated. Without resetting here the transcript keeps + // showing the prior run's messages until new events arrive. + if (session.createdAt > prev.createdAt) { + store.patchSession(session.id, { ...rest, output: [] }); + } else { + store.patchSession(session.id, rest); + } }); const unsubBrowserGone = api.on.sessionBrowserGone((id) => { From 246f972df7f6c4724ea1c2f0ffc1bf6abbe00113 Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Wed, 13 May 2026 17:10:40 -0700 Subject: [PATCH 18/31] Preserve product direction for chat view Add the product blueprint, product soul, and user-transcript handoff docs so the branch keeps the direction artifacts separate from implementation work. Constraint: User explicitly asked to keep the PRODUCT_ markdown files while restarting the code changes Confidence: high Scope-risk: narrow Tested: Documentation-only commit; no runtime tests required Not-tested: Rendered documentation preview --- app/docs/PRODUCT_BLUEPRINT.md | 334 +++++++++++++++++++++++ app/docs/PRODUCT_SOUL.md | 123 +++++++++ app/docs/PRODUCT_SOUL_USER_TRANSCRIPT.md | 73 +++++ 3 files changed, 530 insertions(+) create mode 100644 app/docs/PRODUCT_BLUEPRINT.md create mode 100644 app/docs/PRODUCT_SOUL.md create mode 100644 app/docs/PRODUCT_SOUL_USER_TRANSCRIPT.md diff --git a/app/docs/PRODUCT_BLUEPRINT.md b/app/docs/PRODUCT_BLUEPRINT.md new file mode 100644 index 00000000..e336271c --- /dev/null +++ b/app/docs/PRODUCT_BLUEPRINT.md @@ -0,0 +1,334 @@ +# Product Blueprint + +Operational detail for Browser Use Desktop. Companion to `PRODUCT_SOUL.md`. + +The soul doc holds direction and judgment. This doc holds shapes, flows, and +specifics. This one evolves as product decisions get made. If something here +contradicts the soul doc, the soul doc wins. + +## Mental Model + +- **Execution layer** runs the work. Browser (most common), local action, + direct model answer. Provider-neutral behind a stable session contract. +- **Session** is the durable work unit. Records task, state, events, outputs, + costs, errors, source evidence, follow-up path. Survives restarts. +- **Chat** is the spine. Users read top-to-bottom. Text narrates. **Typed + blocks are embedded inline** to carry structure, evidence, and outputs. + There is no parallel "execution view" surface — execution and output both + live in the chat as blocks. +- **Block vocabulary** is owned by the UI. The agent emits typed events with + labels; the UI renders them consistently with rich skeleton components. +- **Action queue** is the agency layer. Candidate actions, approval state, + history. Surfaced both inline (as blocks in chat) and aggregated in the + sidebar. +- **Job sidebar** is the orchestration layer. Many parallel sessions at a + glance. +- **Personalization surface** makes the app the user's. Onboarding, + preferences, dashboard, notification rules — all first-class. + +## The Chat Is The Document + +The chat is not a transcript next to a panel. It is a rich, top-to-bottom +document where text and structured blocks alternate. The agent narrates, +emits a `search` block, narrates again, emits a `decide` block, etc. + +Reading patterns: + +- Skim the text and block headers to follow the shape of the work. +- Expand a block to inspect what happened (raw results, evidence, + reasoning). +- For large output blocks (a 100-row table, a long draft), use **pop to + canvas** to open the block in a side panel without leaving the chat. + +This is the answer to "what shape is an agent run." Mostly linear, vertical, +text-spined, with structure where structure earns its keep. Graph view is a +toggle for power users and for genuinely graph-shaped jobs — not the default. + +## Inline Typed Block Vocabulary + +The UI owns the structure. The agent picks the type and the label. First-pass +vocabulary (extend as needed): + +### Process Blocks + +- **`step`** — a labeled unit grouping sub-events. Title, status, one-line + summary, expandable detail. +- **`search`** — query, result count, top results, expandable list, source + links. +- **`navigate`** — URL, page title, screenshot thumbnail, what was sought. +- **`extract`** — what was extracted, count, sample rows, link to full data. +- **`decide`** — options considered, chosen path, reason. +- **`retry`** — what failed, attempt count, new approach. +- **`branch`** — path taken, paths abandoned with one-line reasons. +- **`wait`** — what the agent is waiting on (auth, user input, rate limit). + +### Output Blocks + +- **`table`** — structured rows with provenance per row. Pop-to-canvas for + large tables. +- **`comparison`** — side-by-side cells with differentiators highlighted. +- **`clusters`** — grouped items with category labels and pattern notes. +- **`draft`** — editable text or structured object, approval-gated when it + would be sent or saved externally. +- **`plan`** — list of proposed operations with reasons, editable, single + approval executes. +- **`briefing`** — what changed, why it matters, evidence, recommended + response. +- **`summary`** — short closing block at end of a turn or session. + +### Agency Blocks + +- **`action`** — a single candidate action with description, evidence link, + approval state, undo path. Aggregated into the action queue. + +Every block carries: human-readable title, status, evidence link where +applicable, expand-collapse, and (optionally, behind a toggle) duration and +cost. + +## Plan Revisions + +The agent may revise its plan mid-flight. Revisions are visible. + +- A `step` that gets replaced renders as struck-through with a one-line + reason and a pointer to the replacement. +- A `step` that gets skipped renders as collapsed with the reason. +- A `branch` block makes path choices explicit when there were real + alternatives. + +The user always knows what changed, why, and what the agent is doing now. + +## Task Shapes And Block Weight + +Same chat-with-blocks pattern, different density. + +| Shape | Example | Block density | +| --- | --- | --- | +| Trivial Q&A | "What's the weather?" | Pure text. No blocks. | +| Lookup | "Find the latest invoice from X." | Light: one `search`, one `summary`. | +| Small structured | "Draft a reply to this email." | One `draft` block, one `action`. | +| Local action | "Clean up my Downloads folder." | One `plan` block + `action` items. | +| Medium delegated | "Compare these five products." | Several process blocks + one `comparison`. | +| Large delegated | "Research 100 leads." | Many process blocks + `table` + `clusters` + drafts. | +| Monitoring | "Tell me when this page changes." | Recurring `briefing` blocks. | + +Adding a new task type means picking the right block weight, not inventing a +new surface. + +## Session Lifecycle + +States (human-readable, product-relevant): + +- `idle` — session created, not yet started +- `running` — actively executing +- `waiting-for-user` — needs clarification or approval +- `waiting-for-auth` — needs login or credential +- `blocked` — site unreachable, captcha, rate limit, unrecoverable site state +- `paused` — user paused; resumable +- `failed` — terminal failure; may be re-runnable +- `completed` — terminal success +- `archived` — completed and out of active view + +Never collapse these into generic success/failure. Each warrants distinct UI +treatment in the job sidebar. + +## Job Sidebar + +The primary orchestration surface. Sections, roughly: + +- **Needs you now** — waiting on user, waiting on auth, blocked. Top. +- **Running** — actively executing, with current step. +- **Scheduled / Monitoring** — recurring jobs and their next check. +- **Recently completed** — items finished within a recent window whose + output is worth a glance. +- **Archived** — collapsed by default. + +Each row: title, current state, last meaningful event, shortcuts to open the +chat or jump to the latest block. + +Background jobs run quietly. They surface only when something happens the +user has opted to see — meaningful change, required approval, completion, +blocker. + +## Action Queue + +Each `action` block in chat also appears in an aggregated queue. Examples: + +- Send a message +- Save records to a list +- Apply labels +- Purchase an item +- Book a time +- Submit a form +- Delete or move files + +Each action has: description, evidence link, status (`drafted`, +`awaiting-approval`, `approved`, `executed`, `failed`, `cancelled`), +approval rule (auto, single approval, requires confirmation phrase), and a +clear undo path where possible. + +Actions are never hidden inside chat text. They are always rendered as +`action` blocks. + +## Onboarding + +Goal: fast, rewarding, thorough. + +Constraints: + +- 60-90 seconds for the fast path. The user is using the app immediately + after. +- Each answer visibly changes something on screen — dashboard preview, + sample briefing in chosen tone, sidebar layout. No invisible questions. +- "Thorough" is opt-in and incremental, surfaced after the user has felt + value — not as a wall before they get any. + +Question shape (illustrative, not final): + +1. What kinds of things do you most want help with? (Multi-select with + examples — research, inbox, shopping, monitoring, file organization, + etc.) +2. How proactive should I be? (Quiet / Calm / Active — with examples of + what each looks like.) +3. What should I bring to your attention? (Completions only / Important + changes / Suggestions / Drafts to review.) +4. How do you want to be notified? (In-app only / OS notifications / Daily + digest.) +5. Preferred tone for briefings. (Terse / Neutral / Conversational.) + +Passive learning extends every answer over time. There is always a global +quiet switch. + +## Proactivity Rules + +- **Earned, not default.** Start mostly reactive. Earn background work by + doing reactive work well first. +- **Scoped.** Every proactive behavior has a user-set boundary. +- **Briefings, not pings.** Digests beat toast streams. +- **Contextual moments.** Surface suggestions at the moment of relevance + (opening a session, finishing a task, asking a related question), not as + ambient nags. + +Failure modes to avoid: + +- Notification spam +- Vague "still working" pings with no content +- Suggestions the user didn't ask for and wouldn't want +- Background work that surprises on next app open +- Updates without evidence + +## Personalization Levels + +1. **Preferences.** Theme, density, tone, panel visibility, notification + scope. Table stakes. +2. **Layout.** Pinned widgets, rearranged panes, dashboard reflecting actual + work patterns. Core product. +3. **Generative UI.** Custom views built per user from work and data. + Roadmap, not v1. + +Onboarding seeds (1) and (2). Passive learning extends both over time. + +## Example Walkthroughs + +### Contact Research (delegated) + +User asks: "Research 100 contacts from this list and tell me who's worth +reaching out to." + +In the chat, top-to-bottom: + +1. Agent text: "Starting. I'll work through the list and group results." +2. `step` block: "Loading contact list — 100 entries." +3. `search` blocks for each cohort lookup, grouped under labeled steps. +4. `extract` blocks summarizing what was pulled per source. +5. Agent text: "Here's what I found." +6. `clusters` block: founders, hiring managers, investors, etc. +7. `table` block (pop-to-canvas): full contacts with provenance. +8. `briefing` block: pattern notes, outliers. +9. `draft` blocks: outreach drafts for chosen clusters. +10. `action` blocks: send / save / label, each approval-gated. +11. `summary` block: one-line closing. + +### Desktop Cleanup (local action) + +User asks: "Clean up my Downloads folder." + +1. Agent text: "Inspecting the folder." +2. `extract` block: file inventory (count, types, ages). +3. `plan` block: delete candidates, archive candidates, leave-alone — each + with a reason. Editable. +4. User edits, approves. +5. `action` blocks: each file operation, status `executed` with undo links. +6. `summary` block. + +### Monitoring (proactive) + +User asks: "Tell me when this page changes." + +1. `step` block: monitor configured, schedule shown. +2. Recurring `briefing` blocks appear in the chat at each check — change + summary, severity, evidence, recommended response. Or a no-change record + collapsed by default. +3. User can adjust scope, frequency, or end the monitor from inside a + briefing block. + +## Implementation Principles + +- **Chat is the document.** Text spine, typed blocks inline. No parallel + execution panel. +- **UI owns the vocabulary.** Agent emits typed events with labels; UI + renders consistently. +- **Legibility over completeness.** The user can always glance and know what + is going on. +- **Plan revisions are visible.** No silent rewrites. +- **Provenance on everything.** Clusters, rankings, charts, recommendations + retain source links. +- **Execution as ground truth.** Inspection and takeover always nearby on + the relevant block. +- **Proactive but permissioned.** Consequential actions require explicit + authority. When in doubt, prepare and ask. +- **Durable work units.** Sessions survive restarts, failures, follow-ups. +- **Human-readable state.** Never collapse important states into generic + success/failure. +- **Local-first trust.** Credentials, cookies, browser state, local logs are + sensitive local data. +- **Provider-neutral sessions.** Engine details stay behind a stable session + contract. +- **Cross-platform is core.** Installers, shortcuts, shells, logs, credential + storage, profiles, browser behavior — all first-class per OS. +- **Personalization is core.** The personalization surface is part of the + product spine, not buried in settings. + +## Do / Avoid Cheat Sheet + +Prefer: + +- Command input that starts work quickly +- A job sidebar that makes parallel work coherent +- Inline typed blocks that make any moment in a session glance-able +- Source-backed summaries instead of unsupported prose +- Visible plan revisions +- Explicit action states and approval affordances +- Pop-to-canvas for large output blocks +- Session recovery near the session that needs it +- Concise updates that say what changed and what matters +- Onboarding and personalization surfaces that feel rewarding + +Avoid: + +- Building a generic browser shell +- Putting execution detail in a parallel panel away from the chat +- Free-form agent output that bypasses the block vocabulary +- Long prose where structure would help +- Hiding source evidence +- Silent plan rewrites +- Ambient proactivity that hasn't been opted into +- Vague "still working" pings +- Provider-specific UI where a session-level concept would work +- Treating logs as a user-facing substitute for product state +- Burying personalization in settings + +## Public-Facing Language Care + +When writing public-facing examples, avoid implying scraping of specific +third-party sites in ways that violate their terms. Prefer "research contacts +from authorized sources" over "scrape LinkedIn." diff --git a/app/docs/PRODUCT_SOUL.md b/app/docs/PRODUCT_SOUL.md new file mode 100644 index 00000000..cdecce4b --- /dev/null +++ b/app/docs/PRODUCT_SOUL.md @@ -0,0 +1,123 @@ +# Product Soul + +For agents and humans making product decisions in Browser Use Desktop. Not +marketing copy. Read this first to orient on direction. For concrete shapes, +flows, and operational detail, see `PRODUCT_BLUEPRINT.md`. + +## Thesis + +Browser Use Desktop is for **anyone who wants something done on their +computer**. Shopping, research, desktop cleanup, file search, form filling, +monitoring, general questions, drafting, booking. + +The user does not care whether the work runs through a browser, a local +action, or a direct model answer. They care that they can ask, get a result +they trust, and stay in control. + +Most work routes through the browser, so browser is the **most common** +execution layer — not the privileged one. Local actions and direct answers +are first-class. + +The product is not a desktop app with chat. Chat is necessary, not sufficient. +The app turns delegated work into structured understanding and controlled +action. + +## Scope + +In: anything a person wants done on their computer that does not require deep +IDE integration. + +Coding is **allowed but not marketed**. The positioning fight against Cursor, +Conductor, Codex app, and similar is intentionally skipped. The win is the +much larger non-coding surface. + +## Moats + +Three things to defend. Evaluate features against them. + +1. **User experience.** Faster, calmer, more thoughtful than the everything- + app features the big labs ship. Taste is the product, not decoration. +2. **Personalized proactivity.** Scoped to the user, evidence-backed, earned. + Never spam, never vague, never presumptuous. +3. **Multi-session orchestration.** Many durable jobs running at once, + surfaced through a coherent interface. Structural advantage over single- + conversation chat products. + +Local-first trust and browser depth are real strengths but serve the three +above, not the headline. + +## Promise + +> "I can hand work to this app, get useful updates while it runs, understand +> the result quickly, let it act for me when the boundaries are clear, and +> trust that it is calibrated to me — not to a generic average user." + +Four parts: **delegation**, **understanding**, **personalized proactivity**, +**control**. + +## Interface Pattern + +One pattern for everything: **chat with inline typed blocks**. Text is the +spine — the user reads top-to-bottom. Blocks (search, navigate, extract, +decide, table, draft, plan, briefing, action queue, and more) are embedded +*inside* the chat, not in a parallel surface. They carry structure where +structure helps; text carries narration and clarification. + +Block weight scales with task complexity. Trivial Q&A: pure text. Large +delegated work: many embedded blocks, with a "pop to canvas" affordance for +outputs that deserve more room. + +The UI owns the block vocabulary. The agent emits typed events with labels; +the UI renders them consistently. Agents stay free; users stay oriented. + +## Autonomy Ladder + +Observe → Organize → Recommend → Draft → Act → Monitor. Always make the rung +explicit. "Act for me" is only safe with defined scope, allowed actions, +confidence threshold, and approval rules. + +## Personalization + +The app is the user's. Preferences, layout, dashboard, notification scope, +tone — all bend to them. Generative UI is on the roadmap, not v1. + +Calibration is fast, rewarding, and thorough at onboarding, and continues +passively through use. There is always a global quiet switch that pauses +output without resetting learning. + +## Non-Negotiables + +- **Legibility over completeness.** The user can always feel they know what + is going on. Glance-ability beats exhaustive logs. When agent freedom and + user legibility conflict, legibility wins. +- **Structure over prose** for any task with structure. +- **Provenance** on every cluster, ranking, chart, recommendation. +- **Execution as ground truth.** Inspection and takeover are always nearby. +- **Permissioned proactivity.** Consequential actions need explicit authority. +- **Durable sessions.** Work survives restarts, failures, follow-ups. +- **Local-first trust.** Credentials, cookies, state are sensitive local data. +- **Cross-platform is core.** macOS, Windows, Linux are first-class. + +## Decision Filter + +Before implementing a feature, ask: + +1. Does it help the user delegate computer work? +2. Does it convert raw activity into understanding or controlled action? +3. Does it fit chat + artifacts, or invent a parallel pattern? +4. Does it respect or extend the user's personalization? +5. Does it serve at least one moat (UX, personalized proactivity, multi- + session orchestration)? +6. Does it preserve evidence, provenance, and user control? + +If mostly no, the feature is off-direction. + +## Short Version + +Browser Use Desktop is the interface for delegating computer work to agents. +Any person, any task that touches their computer. Browser is the most common +execution layer, not the only one. Chat plus artifacts. Many sessions in +parallel. Proactive on the user's terms. + +The goal is not to make users read more chat. The goal is to make delegated +computer work understandable, resumable, actionable, and *theirs*. diff --git a/app/docs/PRODUCT_SOUL_USER_TRANSCRIPT.md b/app/docs/PRODUCT_SOUL_USER_TRANSCRIPT.md new file mode 100644 index 00000000..2bca8898 --- /dev/null +++ b/app/docs/PRODUCT_SOUL_USER_TRANSCRIPT.md @@ -0,0 +1,73 @@ +# Product Soul Direction - User Transcript + +This file preserves Reagan's side of the product-direction discussion so another +agent can continue from the source statements instead of from a rewritten +interpretation. + +## Initial Request + +> let's write a sort of soul document in terms of what direction this desktop +> app is meant to go. + +## Reshape Around Browser Automation And Knowledge Work + +> well, let's reshape that. +> +> the thing is, it is generally true that most automations for users will occur +> on a browser. this means that this desktop app is a good place to automate all +> knowledge, background work for users. it is going to be soething proactive, +> that gives users meaningful updates and act on the behalf of users in a +> meaningful way. +> +> this is because browser agents have come such a long way that they are now +> very accurate - and now it is time to ask, what is the ideal interface for +> automation? +> +> a desktop app with a chat is not enough. humans were not meant to read text to +> text. diagrams and text are both needed for humans to fully engage with +> knowledge work. +> +> in this, we aim to make a chat that, yes, in the end has text to text, but +> also abstracts information in a high level for users to understand, making it a +> pleasant automation experence. +> +> an example is scraping 100 linkedin contacts, and automatically sorting these +> contacts in such a way that the user understands what abstract category each +> of those 100 contacts falls under, or interesting patterns in the data that +> may not have been found otherwise. +> +> does this make sense? you should be very critical here in terms of +> specificity, clarity, and general direction. I want you to brainstorm with me. + +## Audience Is Agents + +> well this is mainly for agents to read and understand. + +## Need Brainstorming, Not Just Writing + +> I want you to brainstorm with me in terms of direction, not just write for me +> man + +## Everything App Outside Coding, With Proactivity Baked In + +> yes. but this doesnt really touch on the proactive nature of things. this app +> is meant to try to be the everything app for everything but coding, while it +> can be used for coding. +> +> the reason for this is that I believe coding apps like conudctor, codex app, +> etc. are all fighting to be that. we do not have the energy nor resources to +> try to compete, nor do we want to compete. we want to capture everything else +> on the market, which is a large majority. + +## Background Completion, Not Only Interface + +> once more, this doesnt really touch on the proactiveness of helping users +> complete things in the background and such. + +## Not Only Proactive, Avoid Overweighting One Suggestion + +> but it's not only proactive. once more, it's the everything app that happens to +> have proactivity baked in. you keep going all-in on all of my suggestions. +> just copy and save this chat transcript of what I said so I can chat to a +> different agernt instead. + From 262ba2c27b7c77fbd95592a33dd8ff9d5454a61a Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Wed, 13 May 2026 17:10:55 -0700 Subject: [PATCH 19/31] Let chat edits rerun with a replaced prompt The chat edit affordance needs a renderer-safe API that can rerun an existing session with updated prompt text. Route editAndRerun through the existing rerun path and persist the prompt override before clearing the transcript. Constraint: Keep this to the first-message rerun path; follow-up truncation/resume was not restored from the stash Rejected: Reapply the larger edit-and-resume stash path | it pulled in broader session-history changes outside the requested UI restore Confidence: high Scope-risk: moderate Tested: yarn vitest run tests/unit/sessions/SessionManager.persistence.test.ts Tested: yarn typecheck Not-tested: Manual Electron edit-rerun flow --- app/src/main/index.ts | 11 +++++++---- app/src/main/sessions/SessionManager.ts | 6 +++++- app/src/preload/shell.ts | 2 ++ app/src/renderer/globals.d.ts | 1 + .../SessionManager.persistence.test.ts | 19 +++++++++++++++++++ 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/app/src/main/index.ts b/app/src/main/index.ts index 0a931d3a..bc699da5 100644 --- a/app/src/main/index.ts +++ b/app/src/main/index.ts @@ -1301,10 +1301,13 @@ app.whenReady().then(async () => { return resumeSessionWithAgent(validatedId, validatedPrompt, resumeAttachments, 'resume'); }); - ipcMain.handle('sessions:rerun', async (_event, id: string) => { - const validatedId = assertString(id, 'id', 100); + ipcMain.handle('sessions:rerun', async (_event, payload: string | { id?: unknown; prompt?: unknown }) => { + const idRaw = typeof payload === 'string' ? payload : payload?.id; + const promptRaw = typeof payload === 'string' ? undefined : payload?.prompt; + const validatedId = assertString(idRaw, 'id', 100); + const promptOverride = promptRaw == null ? undefined : assertString(promptRaw, 'prompt', 10000); const t0 = Date.now(); - mainLogger.info('main.sessions:rerun', { id: validatedId }); + mainLogger.info('main.sessions:rerun', { id: validatedId, edited: promptOverride !== undefined }); const session = sessionManager.getSession(validatedId); if (!session) return { error: 'Session not found' }; @@ -1314,7 +1317,7 @@ app.whenReady().then(async () => { const engineId = sessionManager.getSessionEngine(validatedId) ?? DEFAULT_ENGINE_ID; await stampConfiguredSessionModel(validatedId, engineId, 'rerun'); - const abortController = sessionManager.rerunSession(validatedId); + const abortController = sessionManager.rerunSession(validatedId, promptOverride); captureEvent('session_rerun', { engine: engineId, }); diff --git a/app/src/main/sessions/SessionManager.ts b/app/src/main/sessions/SessionManager.ts index 51b2db8d..964b8fbc 100644 --- a/app/src/main/sessions/SessionManager.ts +++ b/app/src/main/sessions/SessionManager.ts @@ -493,7 +493,7 @@ export class SessionManager extends EventEmitter { mainLogger.info('SessionManager.deleteSession', { id }); } - rerunSession(id: string): AbortController { + rerunSession(id: string, promptOverride?: string): AbortController { const session = this.sessions.get(id); if (!session) throw new Error(`Session not found: ${id}`); @@ -501,6 +501,10 @@ export class SessionManager extends EventEmitter { if (ctrl) { ctrl.abort(); this.abortControllers.delete(id); } this.clearStuckTimer(id); + if (promptOverride !== undefined) { + session.prompt = promptOverride; + this.db.updateSessionPrompt(id, promptOverride); + } session.output = []; session.error = undefined; session.status = 'running'; diff --git a/app/src/preload/shell.ts b/app/src/preload/shell.ts index cff5691f..908e9cc0 100644 --- a/app/src/preload/shell.ts +++ b/app/src/preload/shell.ts @@ -263,6 +263,8 @@ contextBridge.exposeInMainWorld('electronAPI', { ipcRenderer.invoke('sessions:resume', { id, prompt, attachments }), rerun: (id: string): Promise<{ rerun?: boolean; error?: string }> => ipcRenderer.invoke('sessions:rerun', id), + editAndRerun: (id: string, prompt: string): Promise<{ rerun?: boolean; error?: string }> => + ipcRenderer.invoke('sessions:rerun', { id, prompt }), previewStart: (id: string, opts?: { maxWidth?: number; maxHeight?: number }): Promise<{ ok: boolean; reason?: string }> => ipcRenderer.invoke('sessions:preview-start', { id, ...(opts ?? {}) }), previewStop: (id: string): Promise => diff --git a/app/src/renderer/globals.d.ts b/app/src/renderer/globals.d.ts index f1660701..481e993e 100644 --- a/app/src/renderer/globals.d.ts +++ b/app/src/renderer/globals.d.ts @@ -73,6 +73,7 @@ interface ElectronSessionAPI { attachments?: Array<{ name: string; mime: string; bytes: Uint8Array }>, ) => Promise<{ resumed?: boolean; queued?: boolean; error?: string }>; rerun: (id: string) => Promise<{ rerun?: boolean; error?: string }>; + editAndRerun: (id: string, prompt: string) => Promise<{ rerun?: boolean; error?: string }>; previewStart: (id: string, opts?: { maxWidth?: number; maxHeight?: number }) => Promise<{ ok: boolean; reason?: string }>; previewStop: (id: string) => Promise; list: () => Promise; diff --git a/app/tests/unit/sessions/SessionManager.persistence.test.ts b/app/tests/unit/sessions/SessionManager.persistence.test.ts index 6e21d8e6..767c6e21 100644 --- a/app/tests/unit/sessions/SessionManager.persistence.test.ts +++ b/app/tests/unit/sessions/SessionManager.persistence.test.ts @@ -215,6 +215,25 @@ describe('SessionManager persistence', () => { manager.destroy(); }); + it('reruns a session with an edited prompt as a fresh conversation', () => { + const manager = new SessionManager(tempDbPath()); + const id = manager.createSession('Open example.com'); + + manager.startSession(id); + manager.setEngineSessionId(id, 'thread-123'); + const abortController = manager.rerunSession(id, 'Open example.org instead'); + const session = manager.getSession(id); + + expect(abortController.signal.aborted).toBe(false); + expect(session?.prompt).toBe('Open example.org instead'); + expect(session?.status).toBe('running'); + expect(session?.canResume).toBe(false); + expect(session?.output).toEqual([]); + expect(manager.getEngineSessionId(id)).toBeUndefined(); + + manager.destroy(); + }); + it('pauses a running session without aborting the live run and resumes it in place', () => { const manager = new SessionManager(tempDbPath()); const id = manager.createSession('Open example.com'); From 9f08d6e560de3c4fa1800af7f41d0cbb50e4fc07 Mon Sep 17 00:00:00 2001 From: Reagan Hsu Date: Wed, 13 May 2026 17:12:51 -0700 Subject: [PATCH 20/31] Restore chat-first UI polish without changing streaming Recover the stash's chat-view UI work while deliberately leaving the committed typewriter and Markdown streaming path intact. New sessions now open in chat, the center view toggle is gone, the composer locks to the session engine, terminal quoting can seed a dashboard prompt, latest-turn scrolling gets stable spacing and timestamps, browser previews wait for real navigations, and chat rows regain edit, action, and file-card polish. Constraint: Do not reintroduce the depth-trail or Streamdown streaming experiment Rejected: Checkout the stash's Markdown.tsx and depth-trail ChatTurn/css hunks | user said the old committed streaming felt better Confidence: high Scope-risk: moderate Tested: yarn typecheck Tested: yarn eslint src/renderer/components/empty/ErrorBoundary.tsx src/renderer/hub/Dashboard.tsx src/renderer/hub/HubApp.tsx src/renderer/hub/TaskInput.tsx src/renderer/hub/chat/BrowserPreview.tsx src/renderer/hub/chat/ChatPane.tsx src/renderer/hub/chat/ChatTranscript.tsx src/renderer/hub/chat/ChatTurn.tsx src/renderer/hub/chat/QuoteSelectionButton.tsx src/renderer/hub/chat/ToolGroup.tsx src/renderer/hub/state/sessionsStore.ts src/renderer/hub/state/uiStore.ts src/renderer/hub/types.ts Not-tested: Manual Electron chat UX pass Co-authored-by: OmX --- .../components/empty/ErrorBoundary.tsx | 12 +- app/src/renderer/hub/Dashboard.tsx | 16 + app/src/renderer/hub/HubApp.tsx | 114 +----- app/src/renderer/hub/TaskInput.tsx | 34 +- app/src/renderer/hub/chat/BrowserPreview.tsx | 223 +++++++--- app/src/renderer/hub/chat/ChatPane.tsx | 158 +++++--- app/src/renderer/hub/chat/ChatTranscript.tsx | 151 ++++++- app/src/renderer/hub/chat/ChatTurn.tsx | 197 ++++++++- .../hub/chat/QuoteSelectionButton.tsx | 11 +- app/src/renderer/hub/chat/ToolGroup.tsx | 5 +- app/src/renderer/hub/chat/chat.css | 382 +++++++++++++++++- app/src/renderer/hub/hub.css | 20 +- app/src/renderer/hub/keybindings.ts | 8 +- app/src/renderer/hub/state/sessionsStore.ts | 19 +- app/src/renderer/hub/state/uiStore.ts | 14 + app/src/renderer/hub/types.ts | 32 +- 16 files changed, 1113 insertions(+), 283 deletions(-) diff --git a/app/src/renderer/components/empty/ErrorBoundary.tsx b/app/src/renderer/components/empty/ErrorBoundary.tsx index 8ce0a155..10880f89 100644 --- a/app/src/renderer/components/empty/ErrorBoundary.tsx +++ b/app/src/renderer/components/empty/ErrorBoundary.tsx @@ -46,11 +46,13 @@ export class ErrorBoundary extends Component { diff --git a/app/src/renderer/hub/Dashboard.tsx b/app/src/renderer/hub/Dashboard.tsx index 8009bf66..83b666ef 100644 --- a/app/src/renderer/hub/Dashboard.tsx +++ b/app/src/renderer/hub/Dashboard.tsx @@ -8,6 +8,7 @@ import { ParentSize } from '@visx/responsive'; import { TaskInput } from './TaskInput'; import type { TaskInputHandle } from './TaskInput'; import { DashboardBackground } from './DashboardBackground'; +import { useUIStore } from './state/uiStore'; import type { AgentSession } from './types'; const HOUR = 3600 * 1000; @@ -117,6 +118,21 @@ export function Dashboard({ sessions, onSubmitTask }: DashboardProps): React.Rea return () => window.clearTimeout(id); }, []); + // Consume a one-shot pending prompt from the UI store. Set by ChatPane + // when the user quotes from a terminal session via "Reference in new + // chat" — we drop the quoted block into the input and clear so the + // signal doesn't replay on a later mount. + useEffect(() => { + const pending = useUIStore.getState().pendingDashboardPrompt; + if (!pending) return; + console.log('[Dashboard] consuming pendingDashboardPrompt', { length: pending.length }); + const id = window.setTimeout(() => { + taskInputRef.current?.setText(pending); + useUIStore.getState().setPendingDashboardPrompt(null); + }, 0); + return () => window.clearTimeout(id); + }, []); + useEffect(() => { const hasFiles = (e: DragEvent) => !!e.dataTransfer && Array.from(e.dataTransfer.types).includes('Files'); diff --git a/app/src/renderer/hub/HubApp.tsx b/app/src/renderer/hub/HubApp.tsx index 3227761c..b6d55c44 100644 --- a/app/src/renderer/hub/HubApp.tsx +++ b/app/src/renderer/hub/HubApp.tsx @@ -24,11 +24,6 @@ type SettingsOpenPayload = { }; let sessionCounter = MOCK_SESSIONS.length + 1; -let entryCounter = 1000; - -function uid(prefix: string): string { - return `${prefix}-${++entryCounter}`; -} function PlusIcon(): React.ReactElement { return ( @@ -38,78 +33,6 @@ function PlusIcon(): React.ReactElement { ); } -function GridIcon(): React.ReactElement { - return ( - - - - - - - ); -} - -function DashboardIcon(): React.ReactElement { - return ( - - - - - - ); -} - -function SettingsIcon(): React.ReactElement { - return ( - - - - - ); -} - -interface HubViewToggleProps { - viewMode: ViewMode; - setViewMode: (mode: ViewMode) => void; - onOpenSettings: () => void; - tipDashboard: string; - tipGrid: string; - tipSettings: string; -} - -const HubViewToggle = React.memo(function HubViewToggle({ - viewMode, setViewMode, onOpenSettings, tipDashboard, tipGrid, tipSettings, -}: HubViewToggleProps): React.ReactElement { - return ( -
- - - -
- ); -}); - export function HubApp(): React.ReactElement { const isMock = import.meta.env.VITE_MOCK_MODE === '1'; const [mockSessions, setMockSessions] = useState(isMock ? MOCK_SESSIONS : []); @@ -184,10 +107,6 @@ export function HubApp(): React.ReactElement { return saved === 'top' ? 'top' : 'side'; } catch { return 'side'; } }); - const setTabsPosition = useCallback((pos: 'side' | 'top') => { - setTabsPositionRaw(pos); - try { window.localStorage.setItem('hub-tabs-position', pos); } catch { /* ignore */ } - }, []); useEffect(() => { const onChange = (e: Event): void => { const next = (e as CustomEvent<{ position: 'side' | 'top' }>).detail?.position; @@ -411,10 +330,10 @@ export function HubApp(): React.ReactElement { knownIdsRef.current = new Set(sessions.map((s) => s.id)); if (!newSession) return; const globalIdx = sessions.findIndex((s) => s.id === newSession.id); - console.log('[HubApp] new session detected -> focus', { id: newSession.id, globalIdx }); - setViewMode('grid'); + console.log('[HubApp] new session detected -> chat', { id: newSession.id, globalIdx }); + enterChat(newSession.id); setFocusIndex(globalIdx); - }, [sessions, setViewMode]); + }, [sessions, enterChat]); useEffect(() => { const visible = sessions; @@ -545,21 +464,6 @@ export function HubApp(): React.ReactElement { console.log('[HubApp] selectSession', { id }); }, [sessions]); - const visibleCount = sessions.length; - const gridPageSize = Math.max(1, gridColumns); - const gridTotalPages = Math.max(1, Math.ceil(visibleCount / gridPageSize)); - const gridSafePage = Math.min(gridPage, gridTotalPages - 1); - const goToPage = useCallback((target: number) => { - const clamped = Math.max(0, Math.min(target, gridTotalPages - 1)); - const visible = sessions; - const firstOnPage = visible[clamped * gridPageSize]; - if (firstOnPage) { - const globalIdx = sessions.findIndex((s) => s.id === firstOnPage.id); - if (globalIdx >= 0) setFocusIndex(globalIdx); - } - setGridPage(clamped); - }, [gridTotalPages, gridPageSize, sessions]); - const selectedSessionId = sessions[focusIndex]?.id ?? null; return ( @@ -572,16 +476,7 @@ export function HubApp(): React.ReactElement { settingsShortcut={shortcutFor('goto.settings')} />
-
- openSettingsPage()} - tipDashboard={tip('Dashboard', 'goto.dashboard')} - tipGrid={tip('Grid view', 'goto.agents')} - tipSettings={tip('Settings', 'goto.settings')} - /> -
+
{/* Grid density + pager removed — single-pane (1x1) layout. */} {viewMode !== 'dashboard' && ( @@ -599,7 +494,6 @@ export function HubApp(): React.ReactElement { - + {lockedEngine + ? ( + + + + {ENGINE_DISPLAY_NAMES[lockedEngine] ?? lockedEngine} + + + ) + : + } { const sess = s.byId[sessionId]; - if (!sess) return { hasBrowser: false, status: 'idle' as const }; - return { hasBrowser: !!sess.hasBrowser, status: sess.status }; + if (!sess) return { hasBrowser: false, status: 'idle' as const, lastUrl: null as string | null }; + return { hasBrowser: !!sess.hasBrowser, status: sess.status, lastUrl: sess.lastUrl ?? null }; }), ); + // Treat only real navigations as "we have a browser to show". about:blank, + // chrome://newtab, data: / file: URLs and any pre-navigation state get + // filtered out so the preview doesn't render an empty white tile. + const isRealUrl = (() => { + const u = sessionInfo.lastUrl; + if (!u) return false; + try { + const parsed = new URL(u); + if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false; + if (!parsed.host) return false; + return true; + } catch { + return false; + } + })(); + + const hostLabel = (() => { + if (!isRealUrl) return null; + const u = sessionInfo.lastUrl as string; + try { return new URL(u).host; } catch { return u; } + })(); + + const shouldShow = sessionInfo.hasBrowser && isRealUrl; + const [frame, setFrame] = useState(null); const [expanding, setExpanding] = useState(false); const cardRef = useRef(null); + // Diagnostic refs — kept out of state to avoid re-rendering on every frame. + const lastFrameAtRef = useRef(null); + const lastFingerprintRef = useRef(null); + const dupCountRef = useRef(0); + const frameCountRef = useRef(0); + const startedAtRef = useRef(null); + const stallWarnedRef = useRef(false); + // Listen for frames unconditionally — cheap, and a late-arriving stream // attaches without a remount. useEffect(() => { const api = window.electronAPI; if (!api) return; - let count = 0; let lastLog = 0; return api.on.sessionPreviewFrame((id, dataB64) => { - if (id !== sessionId) return; - count += 1; + if (id !== sessionId) { + // Different session's frame arriving on this subscription — diagnostic + // signal in case routing ever crosses streams. + console.debug('[BrowserPreview] frame.dropped.foreign-session', { mySession: sessionId, frameFor: id }); + return; + } const now = Date.now(); + const prevAt = lastFrameAtRef.current; + const gapMs = prevAt == null ? null : now - prevAt; + lastFrameAtRef.current = now; + frameCountRef.current += 1; + + // Cheap content fingerprint to catch "screencast running but page not + // repainting" — captureScreenshot keeps returning the same JPEG bytes + // when nothing on the page has changed (or when the WebContents is + // suspended because its BrowserView is detached). + const fp = `${dataB64.length}:${dataB64.slice(0, 32)}:${dataB64.slice(-16)}`; + const dup = fp === lastFingerprintRef.current; + if (dup) dupCountRef.current += 1; else dupCountRef.current = 0; + lastFingerprintRef.current = fp; + + stallWarnedRef.current = false; + if (now - lastLog > 5000) { lastLog = now; - console.log('[BrowserPreview] frames received', { sessionId, count, bytes: dataB64.length }); + console.log('[BrowserPreview] frames.received', { + sessionId, + count: frameCountRef.current, + bytes: dataB64.length, + gapMs, + dupCount: dupCountRef.current, + }); + } + if (dupCountRef.current === 5) { + console.warn('[BrowserPreview] frames.stale.duplicates', { + sessionId, + dupCount: dupCountRef.current, + message: 'Same screenshot bytes 5x in a row — page likely suspended or not repainting', + }); } setFrame(dataB64); }); }, [sessionId]); - // Start/stop the screencast in lockstep with hasBrowser. When the agent - // creates a browser later, hasBrowser flips true and this effect re-runs. + // Start/stop the screencast in lockstep with shouldShow. We wait for an + // actual http(s) navigation before starting — no point attaching the + // debugger to capture about:blank. useEffect(() => { const api = window.electronAPI; - if (!api || !sessionInfo.hasBrowser) { + if (!api || !shouldShow) { + if (!shouldShow && startedAtRef.current != null) { + console.log('[BrowserPreview] hide: clearing frame', { + sessionId, + framesSeen: frameCountRef.current, + }); + } + startedAtRef.current = null; + lastFrameAtRef.current = null; + frameCountRef.current = 0; + dupCountRef.current = 0; + lastFingerprintRef.current = null; + stallWarnedRef.current = false; setFrame(null); return; } let cancelled = false; + const startedAt = Date.now(); + startedAtRef.current = startedAt; + console.log('[BrowserPreview] previewStart.requested', { sessionId }); api.sessions.previewStart(sessionId, { maxWidth: PREVIEW_W * 2, maxHeight: PREVIEW_H * 2 }) .then((res) => { + const elapsedMs = Date.now() - startedAt; if (cancelled) { + console.log('[BrowserPreview] previewStart.cancelled-after-resolve', { sessionId, elapsedMs, ok: res.ok }); api.sessions.previewStop(sessionId).catch(() => { /* ignore */ }); return; } - if (!res.ok) { - console.warn('[BrowserPreview] previewStart not ok', { sessionId, reason: res.reason }); + if (res.ok) { + console.log('[BrowserPreview] previewStart.ok', { sessionId, elapsedMs }); + } else { + console.warn('[BrowserPreview] previewStart.not-ok', { sessionId, elapsedMs, reason: res.reason }); } }) - .catch((err) => console.error('[BrowserPreview] previewStart threw', err)); + .catch((err) => console.error('[BrowserPreview] previewStart.threw', { sessionId, error: (err as Error).message })); return () => { cancelled = true; api.sessions.previewStop(sessionId).catch(() => { /* ignore */ }); }; - }, [sessionId, sessionInfo.hasBrowser]); + }, [sessionId, shouldShow]); + + // Stall watchdog — when shouldShow is true and previewStart resolved, we + // expect a frame within ~1.5s (poll interval is 1s). If nothing arrives + // within 3s, log a STALL warning so the user can grep for it. The + // common cause is the WebContents being suspended while detached from + // the window — clicking through to the browser view forces a paint and + // the next captureScreenshot picks it up. + useEffect(() => { + if (!shouldShow) return; + const handle = setInterval(() => { + const startedAt = startedAtRef.current; + if (startedAt == null) return; + const now = Date.now(); + const sinceStart = now - startedAt; + const lastAt = lastFrameAtRef.current; + const sinceLast = lastAt == null ? null : now - lastAt; + + // Case 1: never received a frame. + if (lastAt == null && sinceStart > 3000 && !stallWarnedRef.current) { + stallWarnedRef.current = true; + console.warn('[BrowserPreview] STALL.no-first-frame', { + sessionId, + sinceStartMs: sinceStart, + message: 'previewStart resolved but no session-preview-frame received — check SessionScreencast logs', + }); + return; + } + // Case 2: had frames, then they stopped. + if (sinceLast != null && sinceLast > 4000 && !stallWarnedRef.current) { + stallWarnedRef.current = true; + console.warn('[BrowserPreview] STALL.frames-stopped', { + sessionId, + sinceLastFrameMs: sinceLast, + framesSeen: frameCountRef.current, + message: 'Frames were flowing then stopped — debugger may have errored, see SessionScreencast.capture.error logs', + }); + } + }, 1000); + return () => clearInterval(handle); + }, [sessionId, shouldShow]); const onClick = useCallback(() => { - if (!sessionInfo.hasBrowser) return; setExpanding(true); setTimeout(() => onExpand(), 220); - }, [sessionInfo.hasBrowser, onExpand]); + }, [onExpand]); + + // Don't render the rail at all until the agent has actually navigated to + // a real URL. Prevents the dead/blank tile that appears while the browser + // is still on about:blank. + if (!shouldShow) return null; - const disabled = !sessionInfo.hasBrowser; return ( - + +
); } diff --git a/app/src/renderer/hub/chat/ChatPane.tsx b/app/src/renderer/hub/chat/ChatPane.tsx index 0790aef8..fcc1d3db 100644 --- a/app/src/renderer/hub/chat/ChatPane.tsx +++ b/app/src/renderer/hub/chat/ChatPane.tsx @@ -4,6 +4,7 @@ import { TaskInput, type TaskInputHandle, type TaskInputSubmission } from '../Ta import { ChatTranscript } from './ChatTranscript'; import { BrowserPreview } from './BrowserPreview'; import { useSessionsStore } from '../state/sessionsStore'; +import { useUIStore } from '../state/uiStore'; import { STATUS_LABEL } from '../constants'; import { useTextSelection } from './useTextSelection'; import { QuoteSelectionButton } from './QuoteSelectionButton'; @@ -82,27 +83,49 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps const taskInputRef = useRef(null); const selection = useTextSelection(transcriptRef); const [quotedText, setQuotedText] = useState(null); - const [editing, setEditing] = useState(false); const toast = useToast(); // Clear the active quote when switching sessions so it doesn't leak across. - useEffect(() => { setQuotedText(null); setEditing(false); }, [sessionId]); + useEffect(() => { setQuotedText(null); }, [sessionId]); - const onEditMessage = useCallback((text: string) => { - console.log('[ChatPane] edit message', { length: text.length }); - setEditing(true); + const onEditMessage = useCallback(async (text: string) => { + const api = window.electronAPI; + if (!api) return; + console.log('[ChatPane] editAndRerun', { sessionId, length: text.length }); setQuotedText(null); - taskInputRef.current?.setText(text); - }, []); + try { + const res = await api.sessions.editAndRerun(sessionId, text); + if (res?.error) { + console.error('[ChatPane] editAndRerun error', res.error); + toast.show({ variant: 'error', title: 'Edit failed', message: res.error }); + } else { + toast.show({ variant: 'success', title: 'Conversation reset with edited prompt' }); + } + } catch (err) { + console.error('[ChatPane] editAndRerun threw', err); + toast.show({ variant: 'error', title: 'Edit failed', message: String(err) }); + } + }, [sessionId, toast]); const onShare = useCallback(() => { toast.show({ variant: 'info', title: 'Share coming soon', message: 'HTML export is wired but not yet implemented.' }); }, [toast]); + // Terminal sessions can't accept follow-ups, so the floating Quote button + // re-targets the Dashboard's TaskInput instead of this pane's composer. + // We seed a one-shot prompt in the UI store and navigate home; Dashboard + // consumes it on mount. + const isTerminal = header?.canResume === false || header?.status === 'stopped'; const onQuote = useCallback((text: string) => { - console.log('[ChatPane] quote', { length: text.length }); + console.log('[ChatPane] quote', { length: text.length, isTerminal }); + if (isTerminal) { + const composed = formatUserMessageWithQuote(text, ''); + useUIStore.getState().setPendingDashboardPrompt(composed); + onExit(); + return; + } setQuotedText(text); - }, []); + }, [isTerminal, onExit]); const onSubmit = useCallback( async (sub: TaskInputSubmission) => { @@ -111,24 +134,6 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps console.warn('[ChatPane] no electronAPI'); return; } - if (editing) { - console.log('[ChatPane] editAndRerun submit', { sessionId, promptLength: sub.prompt.length }); - try { - const res = await api.sessions.editAndRerun(sessionId, sub.prompt); - if (res?.error) { - console.error('[ChatPane] editAndRerun error', res.error); - toast.show({ variant: 'error', title: 'Edit failed', message: res.error }); - } else { - setEditing(false); - setQuotedText(null); - toast.show({ variant: 'success', title: 'Conversation reset with edited prompt' }); - } - } catch (err) { - console.error('[ChatPane] editAndRerun threw', err); - toast.show({ variant: 'error', title: 'Edit failed', message: String(err) }); - } - return; - } const composed = formatUserMessageWithQuote(quotedText, sub.prompt); console.log('[ChatPane] resume submit', { sessionId, @@ -145,16 +150,9 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps console.error('[ChatPane] resume threw', err); } }, - [sessionId, quotedText, editing, toast], + [sessionId, quotedText], ); - const onCancel = useCallback(() => { - const api = window.electronAPI; - if (!api) return; - console.log('[ChatPane] cancel', { sessionId }); - api.sessions.cancel(sessionId).catch((err) => console.error('[ChatPane] cancel failed', err)); - }, [sessionId]); - const onRerun = useCallback(() => { const api = window.electronAPI; if (!api) return; @@ -172,6 +170,56 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps .catch((err) => console.error('[ChatPane] resume failed', err)); }, [sessionId]); + const onPause = useCallback(() => { + const api = window.electronAPI; + if (!api) return; + console.log('[ChatPane] pause', { sessionId }); + api.sessions.pause(sessionId).catch((err) => console.error('[ChatPane] pause failed', err)); + }, [sessionId]); + + // Two-step Escape to pause a running task. First press arms the action + // and shows a hint above the composer; second press within the timeout + // actually pauses. No-op when the agent is idle/paused/terminal. + const isBusy = header?.status === 'running' || header?.status === 'stuck'; + const [escArmed, setEscArmed] = useState(false); + const escTimerRef = useRef(null); + const disarmEsc = useCallback(() => { + setEscArmed(false); + if (escTimerRef.current != null) { + window.clearTimeout(escTimerRef.current); + escTimerRef.current = null; + } + }, []); + // Disarm whenever the agent leaves the busy state (e.g. it finished on + // its own) so a stale arm doesn't carry over into the next run. + useEffect(() => { + if (!isBusy && escArmed) disarmEsc(); + }, [isBusy, escArmed, disarmEsc]); + useEffect(() => { + if (!isBusy) return; + const handler = (e: KeyboardEvent): void => { + if (e.key !== 'Escape') return; + const target = e.target as HTMLElement | null; + const tag = target?.tagName; + if (tag === 'INPUT' || tag === 'TEXTAREA' || target?.isContentEditable) return; + e.preventDefault(); + if (escArmed) { + disarmEsc(); + onPause(); + return; + } + setEscArmed(true); + if (escTimerRef.current != null) window.clearTimeout(escTimerRef.current); + escTimerRef.current = window.setTimeout(() => { + setEscArmed(false); + escTimerRef.current = null; + }, 2500); + }; + window.addEventListener('keydown', handler); + return () => window.removeEventListener('keydown', handler); + }, [isBusy, escArmed, onPause, disarmEsc]); + useEffect(() => () => { if (escTimerRef.current != null) window.clearTimeout(escTimerRef.current); }, []); + const composer = useMemo(() => { if (!header) return null; const isTerminal = header.canResume === false || header.status === 'stopped'; @@ -191,24 +239,20 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps const isPaused = header.status === 'paused'; - // While running, still allow follow-ups — backend queues them (resume() - // returns `queued: true` if mid-step). Show a small hint above the input. + // While running, follow-ups are queued by the backend. No persistent hint + // — Escape pauses the run instead (see the global keydown effect above). + // First Esc shows a one-shot "press again" confirmation; second Esc within + // the timeout actually pauses. return ( <> - {isBusy && ( -

- Agent is {header.status === 'stuck' ? 'stuck' : 'running'} — your message will be queued. - {' '} - + {escArmed && isBusy && ( +

+ Press Esc again to pause chat

)} {isPaused && (

- Agent is paused. + Chat paused. {' '}

)} - {editing && ( -

- Editing your first message — submitting will rewrite the conversation from here. - {' '} - -

- )}
@@ -246,7 +280,7 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps /> ); - }, [header, onSubmit, onCancel, onExit, onRerun, onResumeRun, quotedText, editing]); + }, [header, onSubmit, onExit, onRerun, onResumeRun, quotedText, escArmed]); if (!header) { return ( @@ -319,7 +353,11 @@ export function ChatPane({ sessionId, onSwitchToBrowser, onExit }: ChatPaneProps {composer}
- +
); } diff --git a/app/src/renderer/hub/chat/ChatTranscript.tsx b/app/src/renderer/hub/chat/ChatTranscript.tsx index 4a93f8c9..192328be 100644 --- a/app/src/renderer/hub/chat/ChatTranscript.tsx +++ b/app/src/renderer/hub/chat/ChatTranscript.tsx @@ -19,20 +19,55 @@ function ThinkingIndicator({ since }: { since: number }): React.ReactElement { interface ChatTranscriptProps { sessionId: string; - onEditMessage?: (text: string) => void; + onEditMessage?: (text: string, rawIdx?: number) => void; onShare?: () => void; } const PIN_THRESHOLD_PX = 32; +/** + * Native `scrollTo({ behavior: 'smooth' })` is fast and uses a snappy curve + * we can't tune. For the "new user message pushes everything up" moment we + * want a slightly longer, softer ease so the prior turn feels like it + * glides off rather than snapping. raf-based + ease-out-cubic. + * + * Aborts on user wheel/touch so manual scrolling always wins. + */ +function smoothScrollTo(el: HTMLElement, top: number, durationMs: number): void { + const start = el.scrollTop; + const delta = top - start; + if (Math.abs(delta) < 1) { el.scrollTop = top; return; } + const t0 = performance.now(); + let aborted = false; + const abort = (): void => { aborted = true; cleanup(); }; + const cleanup = (): void => { + el.removeEventListener('wheel', abort); + el.removeEventListener('touchmove', abort); + }; + el.addEventListener('wheel', abort, { passive: true }); + el.addEventListener('touchmove', abort, { passive: true }); + const ease = (x: number): number => 1 - Math.pow(1 - x, 3); + const tick = (now: number): void => { + if (aborted) return; + const x = Math.min(1, (now - t0) / durationMs); + el.scrollTop = start + delta * ease(x); + if (x < 1) { + requestAnimationFrame(tick); + } else { + cleanup(); + } + }; + requestAnimationFrame(tick); +} + export const ChatTranscript = forwardRef(function ChatTranscript({ sessionId, onEditMessage, onShare }, fwdRef): React.ReactElement | null { // Subscribe only to this session's output + createdAt. Other sessions' // updates do not re-render this component. const sessionSlice = useSessionsStore( - useShallow((s): { output: AgentSession['output']; createdAt: number; status: AgentSession['status']; prompt: string } | null => { + useShallow((s): { output: AgentSession['output']; outputTimestamps: number[] | undefined; createdAt: number; status: AgentSession['status']; prompt: string } | null => { const sess = s.byId[sessionId]; if (!sess) return null; - return { output: sess.output, createdAt: sess.createdAt, status: sess.status, prompt: sess.prompt }; + return { output: sess.output, outputTimestamps: sess.outputTimestamps, createdAt: sess.createdAt, status: sess.status, prompt: sess.prompt }; }), ); @@ -40,6 +75,12 @@ export const ChatTranscript = forwardRef(fu useImperativeHandle(fwdRef, () => containerRef.current as HTMLDivElement, []); const pinnedRef = useRef(true); const lastTurnsLenRef = useRef(0); + // Tracks the wall-clock time of the most recent agent activity (any change + // to the latest entry — new entry, streamed token, tool_result landing). + // Entry timestamps are set at creation, so a long streaming "text" entry + // would otherwise leave the Working timer counting from when streaming + // started; we want it to read time-since-last-token instead. + const lastActivityRef = useRef<{ key: string; at: number }>({ key: '', at: 0 }); const turns = useMemo(() => { if (!sessionSlice) return []; @@ -49,6 +90,7 @@ export const ChatTranscript = forwardRef(fu status: 'idle', createdAt: sessionSlice.createdAt, output: sessionSlice.output, + outputTimestamps: sessionSlice.outputTimestamps, }; const { entries } = adaptSession(fake); // SessionManager is supposed to emit `session.prompt` as a leading @@ -67,6 +109,38 @@ export const ChatTranscript = forwardRef(fu return groupIntoTurns(entries); }, [sessionId, sessionSlice]); + // Maintain a CSS variable for the latest turn's agent area min-height so + // that, when the new user bubble snaps to TOP_GAP_PX below the viewport + // top, the agent area below it fills the viewport and its bottom rests + // exactly at the transcript's bottom padding (which keeps content above + // the composer). Recomputes when the container resizes OR when the + // latest user bubble's height changes. + useEffect(() => { + const el = containerRef.current; + if (!el) return; + const apply = (): void => { + const cs = getComputedStyle(el); + const padTop = parseFloat(cs.paddingTop) || 0; + const padBot = parseFloat(cs.paddingBottom) || 0; + const latest = el.querySelector('.chat-turn--latest') as HTMLElement | null; + const bubble = latest?.querySelector('.chat-bubble__wrap') as HTMLElement | null; + const bubbleH = bubble ? bubble.offsetHeight : 0; + // After scrolling so the bubble's TOP sits at padTop in viewport, + // the agent area starts at padTop + bubbleH and should extend to + // clientHeight - padBot. So required agent height = clientHeight - + // padTop - bubbleH - padBot. + const needed = el.clientHeight - padTop - bubbleH - padBot; + el.style.setProperty('--chat-agent-latest-h', `${Math.max(0, needed)}px`); + }; + apply(); + const ro = new ResizeObserver(apply); + ro.observe(el); + // Re-measure when the latest bubble itself resizes (long messages, edits). + const bubble = el.querySelector('.chat-turn--latest .chat-bubble__wrap') as HTMLElement | null; + if (bubble) ro.observe(bubble); + return () => ro.disconnect(); + }, [turns]); + // Scroll-pin: stay glued to bottom when user is at the bottom; release // when user scrolls up. New user_input forces re-pin. const onScroll = (): void => { @@ -79,12 +153,45 @@ export const ChatTranscript = forwardRef(fu useLayoutEffect(() => { const el = containerRef.current; if (!el) return; - // Force-pin when a new user_input lands (new turn). + // When a new user turn lands, scroll so the latest turn sits at the top + // of the viewport (ChatGPT-style). The .chat-turn--latest min-height in + // chat.css reserves enough space below for that scroll to be possible. const newUserTurn = turns.length > lastTurnsLenRef.current - && turns[turns.length - 1]?.userEntry !== null; - if (newUserTurn) pinnedRef.current = true; + && turns[turns.length - 1]?.userEntry != null; lastTurnsLenRef.current = turns.length; + if (newUserTurn) { + const latest = el.querySelector('.chat-turn--latest') as HTMLElement | null; + if (latest) { + // Anchor on the *top* of the latest user bubble. The transcript's + // top padding (~28px) acts as TOP_GAP between viewport top and the + // bubble. The agent area min-height (set by the ResizeObserver + // above) guarantees there's enough scroll content to reach this + // target, so the bubble lands exactly TOP_GAP from the viewport + // top and the agent's response fills the rest of the viewport + // above the composer-clearance bottom padding. + const latestUserBubble = latest.querySelector('.chat-bubble__wrap') as HTMLElement | null; + + const containerRect = el.getBoundingClientRect(); + const padTop = parseFloat(getComputedStyle(el).paddingTop) || 0; + let top: number; + if (latestUserBubble) { + const bubbleRect = latestUserBubble.getBoundingClientRect(); + // Bubble top in scroll-content coords: + const bubbleTop = bubbleRect.top - containerRect.top + el.scrollTop; + // Place that top at exactly padTop below viewport top. + top = bubbleTop - padTop; + } else { + top = latest.offsetTop - padTop; + } + smoothScrollTo(el, Math.max(0, top), 520); + } else { + el.scrollTop = el.scrollHeight; + } + pinnedRef.current = false; + return; + } + if (pinnedRef.current) { el.scrollTop = el.scrollHeight; } @@ -107,21 +214,24 @@ export const ChatTranscript = forwardRef(fu // calls landed and resolved — the layout shift was worse than the duplication. const lastTurn = turns[turns.length - 1]; const showThinking = isRunning; - // Elapsed counter shows time since the most recent activity — prefer an - // in-flight tool_call (what the user is waiting on), then the latest agent - // entry of any kind, then the turn-start user_input, then session creation. - let since = lastTurn?.userEntry?.timestamp ?? sessionSlice.createdAt; + // Elapsed counter shows time since the current step began. A "step" boundary + // is a new agent entry appearing OR a tool_call's result landing. We do NOT + // key on streamed content length — otherwise every token would reset the + // marker and the timer would stick at 0s during long streams. + let activityKey = `turn:${lastTurn?.id ?? ''}|user:${lastTurn?.userEntry?.id ?? ''}`; if (lastTurn && lastTurn.agentEntries.length > 0) { const last = lastTurn.agentEntries[lastTurn.agentEntries.length - 1]; - since = last.timestamp; - for (let i = lastTurn.agentEntries.length - 1; i >= 0; i--) { - const e = lastTurn.agentEntries[i]; - if (e.type === 'tool_call' && !e.result) { - since = e.timestamp; - break; - } - } + activityKey += `|n:${lastTurn.agentEntries.length}|id:${last.id}|r:${last.result ? '1' : '0'}`; + } + const now = Date.now(); + if (lastActivityRef.current.key !== activityKey) { + lastActivityRef.current = { key: activityKey, at: now }; } + // Fallback chain: last activity → first user message → session createdAt. + // (Activity ref is 0 on first render before any output exists.) + const since = lastActivityRef.current.at > 0 + ? lastActivityRef.current.at + : (lastTurn?.userEntry?.timestamp ?? sessionSlice.createdAt); if (turns.length === 0) { return ( @@ -131,10 +241,6 @@ export const ChatTranscript = forwardRef(fu ); } - // Only the very first user_input can be edited end-to-end today — the - // backend rerun primitive replays the conversation from session.prompt, so - // editing a follow-up message would silently discard everything after it. - // Find the index of the first turn with a real user entry. const firstUserTurnIdx = turns.findIndex((t) => t.userEntry !== null); return ( @@ -146,6 +252,7 @@ export const ChatTranscript = forwardRef(fu inflightSince={showThinking && i === turns.length - 1 ? since : undefined} onEditMessage={i === firstUserTurnIdx ? onEditMessage : undefined} onShare={i === firstUserTurnIdx ? onShare : undefined} + isLatest={turns.length > 1 && i === turns.length - 1} /> ))} diff --git a/app/src/renderer/hub/chat/ChatTurn.tsx b/app/src/renderer/hub/chat/ChatTurn.tsx index 28b2c36d..2f414354 100644 --- a/app/src/renderer/hub/chat/ChatTurn.tsx +++ b/app/src/renderer/hub/chat/ChatTurn.tsx @@ -50,6 +50,28 @@ function UserBubble({ content, onEdit, onShare }: { const [expanded, setExpanded] = useState(false); const clamped = isLong && !expanded; const toast = useToast(); + const [editing, setEditing] = useState(false); + const [draft, setDraft] = useState(body); + const editTextareaRef = useRef(null); + + const resizeEditArea = (): void => { + const ta = editTextareaRef.current; + if (!ta) return; + ta.style.height = 'auto'; + ta.style.height = `${ta.scrollHeight}px`; + }; + + useEffect(() => { if (!editing) setDraft(body); }, [body, editing]); + useEffect(() => { + if (!editing) return; + requestAnimationFrame(() => { + const ta = editTextareaRef.current; + if (!ta) return; + resizeEditArea(); + ta.focus(); + ta.setSelectionRange(ta.value.length, ta.value.length); + }); + }, [editing]); const handleCopy = async () => { try { @@ -60,6 +82,58 @@ function UserBubble({ content, onEdit, onShare }: { } }; + const startEdit = (): void => { + setDraft(body); + setEditing(true); + }; + const cancelEdit = (): void => { + setEditing(false); + setDraft(body); + }; + const submitEdit = (): void => { + const next = draft.trim(); + if (!next || !onEdit) return; + onEdit(next); + setEditing(false); + }; + + if (editing) { + return ( +
+
+ {quote && ( +
{quote}
+ )} +