From 2c9d7b2e8cc4aa25638a3160c68ae870934b897c Mon Sep 17 00:00:00 2001 From: Devin Oldenburg Date: Sun, 21 Jun 2026 14:56:04 +0200 Subject: [PATCH 1/2] fix(config): reference the guard by its installed path, not the package name The plugin options in opencode.json must reference ./plugins/goal-guard.js (the guard); the package name resolves to the TUI sidebar export, so the previous snippets in the goal-config tool, the customization skill, and the /goal-mode-customize command would not have applied guard options. Align all of them (and the tool's explain output) with ARCHITECTURE.md's canonical form. --- commands/goal-mode-customize.md | 2 +- scripts/goal-config.mjs | 5 ++++- skills/goal-mode-customization/SKILL.md | 2 +- tests/goal-config.test.mjs | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/commands/goal-mode-customize.md b/commands/goal-mode-customize.md index 34d5f62..a095556 100644 --- a/commands/goal-mode-customize.md +++ b/commands/goal-mode-customize.md @@ -23,7 +23,7 @@ env var, or the plugin `options` object in `opencode.json`. A bundled tool, 2. **Apply** — edit the user's `opencode.json` so the plugin entry carries the options: ```jsonc - "plugin": [["opencode-goal-mode", { "yolo": true, "allowCommands": ["^ruff( |$)"] }]] + "plugin": [["./plugins/goal-guard.js", { "yolo": true, "allowCommands": ["^ruff( |$)"] }]] ``` (or export the `GOAL_GUARD_*` env equivalents). Lists accept an array or a comma/newline string; an invalid regex is ignored, never fatal. diff --git a/scripts/goal-config.mjs b/scripts/goal-config.mjs index bbb2031..86f8c8f 100644 --- a/scripts/goal-config.mjs +++ b/scripts/goal-config.mjs @@ -17,7 +17,10 @@ */ import { DEFAULT_CONFIG, CONFIG_DOCS, envVarFor, resolveConfig } from "../plugins/goal-guard/config.js"; -const PLUGIN = "opencode-goal-mode"; +// The guard plugin is referenced by its installed path in opencode.json's plugin +// array (this is how OpenCode passes it options); the package name resolves to the +// TUI sidebar, not the guard. See ARCHITECTURE.md → Configuration. +const PLUGIN = "./plugins/goal-guard.js"; function typeOf(key) { const d = DEFAULT_CONFIG[key]; diff --git a/skills/goal-mode-customization/SKILL.md b/skills/goal-mode-customization/SKILL.md index 2ce2d41..7235635 100644 --- a/skills/goal-mode-customization/SKILL.md +++ b/skills/goal-mode-customization/SKILL.md @@ -23,7 +23,7 @@ preview, and verification for you: **`scripts/goal-config.mjs`**. (the tool prints the exact snippet; `recipe ` gives a paste-ready one): ```jsonc "plugin": [ - ["opencode-goal-mode", { "yolo": true, "allowCommands": ["^ruff( |$)"] }] + ["./plugins/goal-guard.js", { "yolo": true, "allowCommands": ["^ruff( |$)"] }] ] ``` Equivalent env vars work too (`GOAL_GUARD_YOLO=1`, …) — `explain` shows both. diff --git a/tests/goal-config.test.mjs b/tests/goal-config.test.mjs index 8fa1bea..c1a4840 100644 --- a/tests/goal-config.test.mjs +++ b/tests/goal-config.test.mjs @@ -35,7 +35,7 @@ test("goal-config list mentions every config key", () => { test("goal-config explain shows how to set + verify a key", () => { const r = cli("explain", "yolo"); assert.equal(r.status, 0, r.stderr); - assert.match(r.stdout, /opencode-goal-mode/); + assert.match(r.stdout, /goal-guard\.js/); assert.match(r.stdout, /GOAL_GUARD_YOLO/); assert.match(r.stdout, /effective/); }); From 93ffd1a4aea4ee6e5567073f54769cdc9c9582d9 Mon Sep 17 00:00:00 2001 From: Devin Oldenburg Date: Sun, 21 Jun 2026 14:56:05 +0200 Subject: [PATCH 2/2] =?UTF-8?q?docs:=20rewrite=20user-facing=20docs=20?= =?UTF-8?q?=E2=80=94=20accurate,=20current,=20professional,=20emoji-free?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README: full rewrite in a professional, marketing voice with no decorative emoji; corrected the headline benchmark numbers to the current run (92.3% detection / 0.8% false positives, ~1.35 us/cmd), completed the slash-command list, documented YOLO + allowCommands/extraDestructive + the goal-config tool, and used the canonical ./plugins/goal-guard.js config form throughout. - SECURITY: corrected the fail-open rate to ~7.7% (detection 92.3%, 8 of 104 destructive), clarified that a parser error fails closed while un-resolvable commands fail open, bumped supported versions to 0.7.x, removed emoji. - ARCHITECTURE: added the YOLO/allow-list config keys, the goal-config tool, and the skills/ installer component; completed the command list; removed a drift-prone hardcoded test-file count. - research/benchmarks.md: corrected the results table and the honest reading of the 8 misses and 5 (mostly debatable) false positives. - Regenerated the benchmark charts so the SVGs match the corrected numbers. --- ARCHITECTURE.md | 21 +- README.md | 378 +++++++++++++----------- SECURITY.md | 33 ++- docs/benchmarks/detection-by-family.svg | 38 ++- docs/benchmarks/external-scorecard.svg | 33 ++- docs/benchmarks/latency.svg | 14 +- docs/benchmarks/overall-scorecard.svg | 33 ++- docs/benchmarks/results.json | 86 +++++- docs/benchmarks/truthfulness-score.svg | 18 +- research/benchmarks.md | 12 +- 10 files changed, 440 insertions(+), 226 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 1a48e39..67a53c8 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -22,9 +22,9 @@ configuration directory: gates). Each is a Markdown file: YAML frontmatter (mode, permissions, color, temperature) over a system-prompt body. 2. **Commands** (`commands/*.md`) — slash commands (`/goal`, `/goal-contract`, - `/goal-review`, `/goal-evidence-map`, `/goal-status`, `/goal-repair`, - `/goal-final`) that bind a prompt template to an agent, some forced to run as - subtasks. + `/goal-review`, `/goal-evidence`, `/goal-evidence-map`, `/goal-status`, + `/goal-repair`, `/goal-reset`, `/goal-final`, `/goal-mode-customize`) that bind a + prompt template to an agent, some forced to run as subtasks. 3. **The `goal-guard` plugin** (`plugins/goal-guard.js` + `plugins/goal-guard/`) — a runtime guard that enforces review discipline, blocks destructive shell commands, preserves state across compaction and restarts, and exposes @@ -224,13 +224,18 @@ destructive blocking, network-exec blocking, completion enforcement, `autoContinue`, `programmaticReview`, `reviewIdleDeferMs`, `reviewIdleRetryMs`, `maxReviewIdleRetries`, review timeouts/polling, `maxReviewCycles`, system-state injection, persistence, contextual gates, subagent restriction, -session cache size/TTL, sidebar colours, and toasts. See README.md for the full -option table. +session cache size/TTL, sidebar colours, and toasts. The guard is also fully +relaxable: `yolo` drops the soft gates, `allowDestructive` drops destructive +guarding, and `allowCommands` / `extraDestructive` are per-command regex allow/deny +lists; `CONFIG_DOCS` documents every key. `scripts/goal-config.mjs` (the +`opencode-goal-mode-config` bin) lists, explains, and previews any configuration, +and a no-drift test keeps `CONFIG_DOCS` aligned with `DEFAULT_CONFIG`. See README.md +for the full option table. ## Installer -`scripts/install.mjs` recursively copies `agents/`, `commands/`, and `plugins/` -(including the nested module directory) into the target config dir, merge-registers +`scripts/install.mjs` recursively copies `agents/`, `commands/`, `skills/`, and +`plugins/` (including the nested module directory) into the target config dir, merge-registers the sidebar package in `tui.json`, clears stale TUI plugin cache entries, and records a manifest of the file hashes it wrote. Global `npm install -g` also triggers the same installer via `postinstall.mjs`. On upgrade it distinguishes @@ -240,7 +245,7 @@ supports `--uninstall` (which leaves locally-modified files in place). ## Testing -`node --test` runs the suite across 20 files: +`node --test` runs the full suite across the `tests/` directory, including: - `tests/shell.test.mjs` / `tests/shell.property.test.mjs` — analyzer against bypass and false-positive corpora. - `tests/plugin.test.mjs` — hook behavior, gating, verdicts, completion, tools, isolation. diff --git a/README.md b/README.md index 64a4695..66afbff 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@
- + # OpenCode Goal Mode -### The OpenCode agent that can't fake "done" — and can't wreck your repo doing it. +### The OpenCode agent that cannot fake "done" — and cannot wreck your repository getting there. -Give it a goal. It writes a contract, does the work, reviews itself with a fleet of -specialist subagents, and **physically cannot tell you it's finished until those reviews -actually pass**. Reach for `rm -rf` mid-run and it stops the command cold. +Give it a goal. It writes a contract, does the work, reviews itself with a bench of +specialist subagents, and **cannot tell you it is finished until those reviews +actually pass**. Reach for `rm -rf` mid-run and the command is stopped before it +executes. [![npm version](https://img.shields.io/npm/v/opencode-goal-mode?color=2da44e&label=npm)](https://www.npmjs.com/package/opencode-goal-mode) [![npm downloads](https://img.shields.io/npm/dm/opencode-goal-mode?color=2da44e)](https://www.npmjs.com/package/opencode-goal-mode) @@ -20,35 +21,36 @@ npm install -g opencode-goal-mode
-Every coding agent will happily announce "✅ Done!" over a half-finished feature and a -red test suite. Goal Mode ends that. It moves the discipline out of the prompt — where a -confident model just talks past it — and into the **harness**, where it's enforced in -code. The agent's "Goal Completed" is intercepted and rewritten to "Goal Not Completed" -unless every required review gate has a *fresh* pass. Dangerous shell commands are -blocked before they ever execute. +Most coding agents will cheerfully announce success over a half-finished feature and +a red test suite. Goal Mode ends that. It moves the discipline out of the prompt — +where a confident model simply talks past it — and into the **harness**, where it is +enforced in code. A "Goal Completed" claim is intercepted and rewritten to +"Goal Not Completed" unless every required review gate holds a *fresh* pass. +Dangerous shell commands are blocked before they ever run. -It's the difference between *asking* an agent to be careful and *making* it. +It is the difference between *asking* an agent to be careful and *making* it. ## The pitch in one minute -Goal Mode is a drop-in OpenCode package for people who want agentic coding to feel -auditable instead of theatrical. It gives your agent a contract, a live ledger, a -specialist review bench, and a command guard that all live outside the model's -memory. The result is a sharper workflow for real repositories: fewer premature -victory laps, fewer stale approvals, and a clear trail showing what changed, how -it was checked, and which gates passed. +Goal Mode is a drop-in OpenCode package for people who want agentic coding to be +auditable rather than theatrical. It gives your agent a contract, a live ledger, a +specialist review bench, and a command guard — all of which live outside the model's +memory, so the model cannot argue its way past them. The result is a sharper workflow +for real repositories: fewer premature victory laps, fewer stale approvals, and a +clear trail of what changed, how it was checked, and which gates passed. -What you get after install: +What you get after installing: -- A primary `goal` agent that turns requests into acceptance criteria before it edits. -- Programmatic review cycles that the guard launches automatically when work is ready. -- Freshness tracking that invalidates every review pass after the next edit. -- A quote-aware shell analyzer that blocks common destructive and remote-exec commands. -- A TUI sidebar that turns the hidden ledger into visible progress. +- A primary `goal` agent that turns a request into acceptance criteria before it edits. +- Programmatic review cycles that the guard launches automatically when the work is ready. +- Freshness tracking that invalidates every review pass the moment the next edit lands. +- A quote-aware shell analyzer that blocks destructive and remote-execution commands. +- A live TUI sidebar that turns the hidden ledger into visible progress. ## Watch it refuse to lie -The agent tries to declare victory early. The guard catches it and hands back the truth: +The agent tries to declare victory early. The guard intercepts the claim and replaces +it with the truth: ```diff - Goal Completed @@ -59,100 +61,108 @@ The agent tries to declare victory early. The guard catches it and hands back th + reviewCycles=1; missingGates=goal-security-reviewer goal-final-auditor ``` -The agent reaches for something irreversible. The guard kills it before it runs: +The agent reaches for something irreversible. The guard stops it before it runs: ```text $ rm -rf build -✕ Goal Guard blocked a destructive or high-risk bash command - (rm with recursive force deletion). Use a safer, reversible command - or ask the user to confirm. +Goal Guard blocked a destructive or high-risk bash command: `rm -rf build` +(rm with recursive force deletion). Use a safer, reversible command or ask the +user to confirm. ``` ![OpenCode Goal Mode sidebar preview](docs/sidebar-preview.png) -↑ While a goal runs, Goal Mode takes over the TUI sidebar with a live, evidence-aware -todo list: the goal title, gate progress, and a row per acceptance criterion and missing -reviewer — each ticking off as the work gets verified. +While a goal runs, Goal Mode takes over the TUI sidebar with a live, +evidence-aware todo list: the goal title, gate progress, and a row for each +acceptance criterion and outstanding reviewer, each one resolving as the work is +verified. -## Why you'll want it +## Why you will want it -🔒 **"Done" actually means done.** Completion is gated on real review verdicts, not vibes. -The model can't answer `Goal Completed` until every required reviewer returns -`Verdict: PASS` *after* the last edit — and the claimed `Review cycles: N` has to match -the counter the guard kept. +**"Done" actually means done.** Completion is gated on real review verdicts, not +self-assessment. The model cannot emit `Goal Completed` until every required reviewer +has returned `Verdict: PASS` *after* the last edit — and the claimed `Review cycles: N` +must match the counter the guard kept itself. -🤖 **The reviews run themselves.** When the agent stops with work outstanding, the guard -*launches the reviewer subagents itself* — security, diff, verification, and more — reads -their verdicts, and loops fix → review until they pass. You never rely on the model to -remember to check its own work. +**The reviews run themselves.** When the agent stops with work outstanding, the guard +launches the reviewer subagents on its own — security, diff, verification, and more — +reads their verdicts, and loops fix then review until they pass. You never depend on +the model remembering to check its own work. -♻️ **One edit reopens the gates.** Approvals are stamped with a monotonic sequence, so any -change after a review instantly goes stale and forces the relevant reviews to re-run. -There's no sneaking a "fix" in after the green light. +**One edit reopens the gates.** Approvals are stamped with a monotonic sequence, so +any change after a review immediately goes stale and forces the relevant reviews to +re-run. There is no slipping a "fix" in after the green light. -🧠 **It knows which experts to call.** Touch auth and the security reviewer becomes -mandatory. Touch a migration and the data reviewer joins. API, performance, tests, UX, -ops, docs, quality — the right specialist gates are required automatically from your goal -and your diff. +**It knows which experts to call.** Touch authentication and the security reviewer +becomes mandatory. Touch a migration and the data reviewer joins. API, performance, +tests, UX, operations, documentation, and quality each have a gate that is required +automatically from your goal and your diff. -🛡️ **Your repo survives.** A real shell tokenizer — not a brittle regex — blocks the -destructive stuff even when it's disguised: `$(rm -rf …)`, `bash -c "…"`, `/bin/rm`, -`busybox rm -rf`, `git reset --hard`, `curl | sh`. Harmless look-alikes like -`git checkout -b` sail right through. +**Your repository survives.** A real shell tokenizer — not a brittle regular +expression — blocks destructive commands even when they are disguised: `$(rm -rf …)`, +`bash -c "…"`, `/bin/rm`, `busybox rm -rf`, `git reset --hard`, and `curl | sh`. +Harmless look-alikes such as `git checkout -b` pass straight through. -🚀 **It doesn't quit on you.** An idle-but-unfinished goal gets automatically pushed -forward — told exactly what's left — until it's genuinely complete, with hard caps and a -no-progress breaker so it can never spin. +**It does not quit on you.** An idle but unfinished goal is pushed forward +automatically — told exactly what remains — until it is genuinely complete, with hard +caps and a no-progress breaker so it can never spin. ## The numbers -Tested on **704 real-world commands** from [tldr-pages](https://github.com/tldr-pages/tldr) -(common/linux/osx) — commands written by hundreds of contributors who've never seen this -guard: +Measured on **704 real-world commands** from [tldr-pages](https://github.com/tldr-pages/tldr) +(common, Linux, and macOS pages) — commands written by hundreds of contributors who +have never seen this guard. Reproduce them with `npm run bench`. -| On 704 commands it has never seen | Regex guard | **Goal Mode** | +| On 704 commands it has never seen | Regex baseline | **Goal Mode** | | --- | :---: | :---: | -| Dangerous commands caught | 53.8% | **93.3%** | -| Safe commands wrongly blocked | 0.2% | **0.2%** | +| Dangerous commands caught | 53.8% | **92.3%** | +| Safe commands wrongly blocked | 0.2% | **0.8%** | ![Guard accuracy on real third-party commands](docs/benchmarks/external-scorecard.svg) -And it's effectively free: **~1µs per command**, hundreds of thousands of classifications -a second. Run it yourself with `npm run bench`. +Goal Mode catches **roughly three-quarters more** of the dangerous commands a regex +baseline misses, in exchange for a small, deliberate increase in false positives +(eight commands of the 704 remain unflagged, mostly single-target `rm`). Classification +is effectively free: **about 1.35 microseconds per command — over 700,000 commands a +second.** ![Per-command analysis latency](docs/benchmarks/latency.svg) -## How it stacks up +## How it compares -| | **Goal Mode** | Claude Code | Codex | +| Capability | Goal Mode | Claude Code | Codex | | --- | :---: | :---: | :---: | -| Blocks a premature "done" out of the box | **Yes** | Only via a custom hook | Review is advisory | -| Edits auto-invalidate stale approvals | **Yes** | — | — | -| Specialist reviews auto-required from the task | **Yes** | — | — | -| Destructive commands blocked by a real shell parser | **Yes** | Regex ("fragile") | Sandbox | +| Blocks a premature "done" out of the box | **Enforced** | Custom hook required | Review is advisory | +| Edits auto-invalidate stale approvals | **Enforced** | Not built in | Not built in | +| Specialist reviews auto-required from the task | **Enforced** | Not built in | Not built in | +| Destructive-command blocking by a real shell parser | **Enforced (tokenizer)** | Partial (regex) | Partial (sandbox) | -![Mechanically-enforced goal discipline vs. Claude Code and Codex](docs/benchmarks/capability-matrix.svg) +![Mechanically enforced goal discipline versus Claude Code and Codex](docs/benchmarks/capability-matrix.svg) -Full side-by-side with citations: [research/goal-mode-comparison.md](research/goal-mode-comparison.md). +Claude Code and Codex are capable tools with real mechanical surfaces of their own; +this is a comparison of one specific axis — built-in, enforced goal discipline. The +full side-by-side, with sources and review dates, is in +[research/goal-mode-comparison.md](research/goal-mode-comparison.md). ## Install -One command. Needs [Node](https://nodejs.org) 20.11+ and [OpenCode](https://opencode.ai). -macOS and Linux: +One command. Requires [Node](https://nodejs.org) 20.11 or newer and +[OpenCode](https://opencode.ai). Supported on macOS and Linux: ```bash npm install -g opencode-goal-mode ``` -Then **restart OpenCode**. Global installs auto-run the installer via -`postinstall`; re-run `opencode-goal-mode --global` if auto-setup fails or you -need `--force`. The installer drops the Goal agent, its reviewer subagents, slash -commands, and the guard plugin into `~/.config/opencode`, and registers the live -sidebar in `tui.json`. In the agent picker you'll see just **`goal`** — the -reviewers are subagents it drives for you. It's idempotent (re-run to upgrade), -never overwrites agents/commands/plugins you've edited (but merge-adds the -sidebar entry in `tui.json`), and `--uninstall` removes exactly what it added. -Goal Mode uses whatever model and provider OpenCode is already set up with. +Then **restart OpenCode**. A global install runs the installer automatically through +`postinstall`; re-run `opencode-goal-mode --global` if auto-setup did not finish, or +add `--force` to replace files you have edited. The installer copies the Goal agent, +its reviewer subagents, the slash commands, the customization skill, and the guard +plugin into `~/.config/opencode`, and registers the live sidebar in `tui.json`. In the +agent picker you will see a single entry, `goal` — the reviewers are subagents it +drives for you. The installer is idempotent (re-run to upgrade), records a manifest, +never overwrites files you have edited unless `--force` is passed, and `--uninstall` +removes exactly what it installed. Goal Mode uses whatever model and provider OpenCode +is already configured with.
Other ways to install @@ -176,58 +186,57 @@ cd opencode-goal-mode && npm ci && npm run install:global ``` `--global` writes to `~/.config/opencode`; no flag writes to `./.opencode`; `--target` -writes to the directory you pass. On upgrade it replaces files it owns but refuses to -clobber files you've modified unless `--force` is passed. +writes to a directory you pass. On upgrade it replaces the files it owns but refuses to +overwrite files you have modified unless `--force` is passed.
## Quick start ```bash -# After installing + restarting OpenCode, confirm the primary agent loaded: +# After installing and restarting OpenCode, confirm the primary agent loaded: opencode agent list | grep '^goal ' ``` -`opencode agent list` shows `goal (primary)` — the one agent you select. The -`goal-*` reviewer specialists also appear, each tagged `(subagent)`: those are -invoked by the Goal agent, not picked by you. A bare `grep goal` therefore prints -the whole `goal-*` family (and, depending on your config, the -`"pattern": "goal-*"` permission line that locks the subagents to the Goal -agent); the anchored `grep '^goal '` above isolates just the primary. +`opencode agent list` shows `goal (primary)` — the single agent you select. The +`goal-*` reviewer specialists also appear, each tagged `(subagent)`; those are invoked +by the Goal agent, not chosen by you. A bare `grep goal` therefore prints the whole +`goal-*` family, so the anchored `grep '^goal '` above isolates just the primary. -Then, in OpenCode, just give it a goal: +Then, inside OpenCode, give it a goal: -``` +```text /goal add rate limiting to the login endpoint and prove it works ``` It writes a contract, delegates research to subagents, implements, and verifies — then -stops and lets the guard run the reviews. It won't say `Goal Completed` until they pass. -Want to feel the seatbelt? Ask it to `rm -rf build` mid-session and watch the guard slap -it down. +stops and lets the guard run the reviews. It will not say `Goal Completed` until they +pass. To feel the guardrail directly, ask it to `rm -rf build` mid-session and watch +the command get stopped. -See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and [research/](research/) for -the platform reference, comparison, and threat model. +See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and +[research/](research/) for the platform reference, the comparison, and the threat +model. ### What the first run looks like -- **You're in Goal Mode when the sidebar shows the goal banner.** Goal Mode is the - `goal` agent plus its guard; the live banner (objective, todos, review status) in - the TUI sidebar is the always-on indicator that it's active. Keep the sidebar - open — OpenCode's status bar doesn't expose a per-agent mode label, so the - sidebar banner is the canonical signal. -- **It won't claim done until the gates pass.** After it implements and verifies, - the guard runs the review gates; a premature `Goal Completed` is rewritten to a - visible blocked marker until every required gate passes. -- **Blocked commands tell you what and why.** When the guard stops a destructive - command it names both the offending command and the reason, e.g. - `Goal Guard blocked a destructive or high-risk bash command: `rm -rf build` (rm - -rf on a path). Use a safer, reversible command…` — so you can adjust rather - than guess. Tune this with `blockDestructive` / `toastOnBlock` (see below). - -## Configure it (or don't) - -Goal Mode works great with zero configuration. When you want to tune it, set options in -`opencode.json` or `GOAL_GUARD_*` environment variables: +- **You are in Goal Mode when the sidebar shows the goal banner.** Goal Mode is the + `goal` agent plus its guard; the live banner — objective, todos, review status — in + the TUI sidebar is the persistent indicator that it is active. Keep the sidebar open: + OpenCode's status bar does not expose a per-agent mode label, so the sidebar banner is + the canonical signal. +- **It will not claim done until the gates pass.** After it implements and verifies, the + guard runs the review gates; a premature `Goal Completed` is rewritten to a visible + blocked marker until every required gate passes. +- **Blocked commands explain what and why.** When the guard stops a destructive command + it names both the offending command and the reason, so you can adjust rather than + guess. Tune this behavior with `blockDestructive` and `toastOnBlock` (see below), or + turn it off entirely with YOLO mode. + +## Configure it (or do not) + +Goal Mode works with zero configuration. When you want to tune it, set options in +`opencode.json` or through `GOAL_GUARD_*` environment variables. The plugin is +referenced by its installed path, which is how OpenCode passes it options: ```jsonc { @@ -241,8 +250,8 @@ Goal Mode works great with zero configuration. When you want to tune it, set opt | --- | --- | --- | | `blockDestructive` / `GOAL_GUARD_BLOCK_DESTRUCTIVE` | `true` | Block destructive bash before execution. | | `blockNetworkExec` / `GOAL_GUARD_BLOCK_NETWORK_EXEC` | `true` | Block `curl \| sh`-style remote execution. | -| `enforceCompletion` / `GOAL_GUARD_ENFORCE_COMPLETION` | `true` | Rewrite premature `Goal Completed`. | -| `autoContinue` / `GOAL_GUARD_AUTO_CONTINUE` | `true` | Auto-continue an idle goal that isn't complete yet. | +| `enforceCompletion` / `GOAL_GUARD_ENFORCE_COMPLETION` | `true` | Rewrite a premature `Goal Completed`. | +| `autoContinue` / `GOAL_GUARD_AUTO_CONTINUE` | `true` | Auto-continue an idle goal that is not complete yet. | | `maxAutoContinue` / `GOAL_GUARD_MAX_AUTO_CONTINUE` | `50` | Hard cap on automatic continuations per goal session. | | `programmaticReview` / `GOAL_GUARD_PROGRAMMATIC_REVIEW` | `true` | Have the guard launch the required reviewers itself on idle (as subtasks on the goal session). | | `reviewTimeoutMs` / `GOAL_GUARD_REVIEW_TIMEOUT_MS` | `360000` | Per-reviewer wall-clock cap (ms) for a programmatic review. | @@ -253,105 +262,116 @@ Goal Mode works great with zero configuration. When you want to tune it, set opt | `maxReviewCycles` / `GOAL_GUARD_MAX_REVIEW_CYCLES` | `12` | Hard cap on programmatic review runs per goal; on reaching it the guard pauses for you. | | `abortGraceMs` / `GOAL_GUARD_ABORT_GRACE_MS` | `1200` | Grace (ms) before an idle goal auto-continues, so a user cancel is always honored. | | `injectSystemState` / `GOAL_GUARD_INJECT_SYSTEM_STATE` | `true` | Inject live guard state into the prompt. | -| `persist` / `GOAL_GUARD_PERSIST` | `true` | Persist state under the XDG state dir. | -| `contextualGates` / `GOAL_GUARD_CONTEXTUAL_GATES` | `true` | Require specialist gates by goal keywords. | +| `persist` / `GOAL_GUARD_PERSIST` | `true` | Persist state under the XDG state directory. | +| `contextualGates` / `GOAL_GUARD_CONTEXTUAL_GATES` | `true` | Require specialist gates by goal keywords and changed files. | | `restrictSubagents` / `GOAL_GUARD_RESTRICT_SUBAGENTS` | `true` | Lock the `goal-*` subagents to the Goal agent. | | `maxSessions` / `GOAL_GUARD_MAX_SESSIONS` | `200` | Session cache size. | -| `sessionTtlMs` / `GOAL_GUARD_SESSION_TTL_MS` | `86400000` | Idle session TTL. | +| `sessionTtlMs` / `GOAL_GUARD_SESSION_TTL_MS` | `86400000` | Idle session TTL (ms). | | `toastOnBlock` / `GOAL_GUARD_TOAST_ON_BLOCK` | `true` | Toast when something is blocked. | | `toastOnReview` / `GOAL_GUARD_TOAST_ON_REVIEW` | `true` | Toast on each review verdict and when completion unlocks. | | `sidebarBanner` / `GOAL_GUARD_SIDEBAR_BANNER` | `true` | Show the live Goal todo section in the TUI sidebar. | -| `sidebarColor` / `GOAL_GUARD_SIDEBAR_COLOR` | `#FFD700` | Colour of the GOAL label for a **running** goal. | -| `sidebarDoneColor` / `GOAL_GUARD_SIDEBAR_DONE_COLOR` | `#FF5555` | Colour of a **done** goal in the sidebar. | -| `sidebarMutedColor` / `GOAL_GUARD_SIDEBAR_MUTED_COLOR` | `#808080` | Foreground colour for **pending** Goal todo rows (□ items) while a goal is running. | +| `sidebarColor` / `GOAL_GUARD_SIDEBAR_COLOR` | `#FFD700` | Color of the GOAL label for a **running** goal. | +| `sidebarDoneColor` / `GOAL_GUARD_SIDEBAR_DONE_COLOR` | `#FF5555` | Color of a **done** goal in the sidebar. | +| `sidebarMutedColor` / `GOAL_GUARD_SIDEBAR_MUTED_COLOR` | `#808080` | Color for **pending** Goal todo rows while a goal is running. | | `completionMarker` / `GOAL_GUARD_COMPLETION_MARKER` | `Goal Completed` | Phrase that, at the start of a message, claims completion. | | `blockedMarker` / `GOAL_GUARD_BLOCKED_MARKER` | `Goal Not Completed` | Replacement written when a completion claim is blocked. | -| `yolo` / `GOAL_GUARD_YOLO` | `false` | **YOLO mode.** Relax the guard so it never blocks/nags for ordinary work — turns off network-exec blocking, completion enforcement, the Goal-only subagent lock, and block toasts. Destructive guarding stays on unless `allowDestructive` is also set. Any key you set explicitly still wins. | -| `allowDestructive` / `GOAL_GUARD_ALLOW_DESTRUCTIVE` | `false` | Turn **off** destructive-command guarding. With `yolo: true` this is "full YOLO" — nothing is blocked and the agent has ALL rights. Works standalone too. Dangerous. | -| `allowCommands` / `GOAL_GUARD_ALLOW_COMMANDS` | `[]` | Custom **allow-list**: a bash command matching ANY of these JS regex patterns is never blocked, whatever the analyzer thinks. Array, or a comma/newline-separated string (env). | -| `extraDestructive` / `GOAL_GUARD_EXTRA_DESTRUCTIVE` | `[]` | Custom **deny-list**: a bash command matching ANY of these JS regex patterns is treated as destructive, extending the built-in analyzer with your own rules. | +| `yolo` / `GOAL_GUARD_YOLO` | `false` | **YOLO mode.** Relax the soft gates — network-exec blocking, completion enforcement, the Goal-only subagent lock, and block toasts. Destructive guarding stays on unless `allowDestructive` is also set. Any key you set explicitly still wins. | +| `allowDestructive` / `GOAL_GUARD_ALLOW_DESTRUCTIVE` | `false` | Turn **off** destructive-command guarding. With `yolo: true` this is full YOLO — nothing is blocked. Works on its own as well. Use with care. | +| `allowCommands` / `GOAL_GUARD_ALLOW_COMMANDS` | `[]` | Allow-list: a bash command matching any of these JavaScript regular expressions is never blocked. Array, or a comma- or newline-separated string for the env var. | +| `extraDestructive` / `GOAL_GUARD_EXTRA_DESTRUCTIVE` | `[]` | Deny-list: a bash command matching any of these JavaScript regular expressions is treated as destructive, extending the built-in analyzer. | -### YOLO mode +### YOLO mode and per-command rules -Every gate is individually tunable, but YOLO is the one-switch escape hatch: +Every gate is individually tunable, and YOLO mode is the one-switch escape hatch: ```jsonc -// opencode.json — never blocks or asks for anything (ALL rights): +// opencode.json — never blocks anything (full rights): ["./plugins/goal-guard.js", { "yolo": true, "allowDestructive": true }] ``` ```bash -# Or via env (e.g. for a throwaway sandbox): +# Or via environment, for a throwaway sandbox: GOAL_GUARD_YOLO=1 GOAL_GUARD_ALLOW_DESTRUCTIVE=1 opencode ``` -- `yolo: true` alone → no completion gating, no subagent lock, no network-exec block, no toasts — but a destructive `rm -rf /` is **still** stopped. -- add `allowDestructive: true` → that last guard drops too: full YOLO. -- Prefer surgical control? Leave YOLO off and use `allowCommands` (whitelist exactly the commands you want to wave through) and/or `extraDestructive` (block extra ones), e.g. `{ "allowCommands": ["^docker compose ", "^rm -rf \\./tmp/"] }`. +- `yolo: true` alone removes completion gating, the subagent lock, network-exec + blocking, and toasts — but a destructive `rm -rf /` is **still** stopped. +- Add `allowDestructive: true` and that last guard drops too: full YOLO. +- For surgical control, leave YOLO off and use `allowCommands` to wave specific + commands through, or `extraDestructive` to block additional ones, for example + `{ "allowCommands": ["^docker compose ", "^rm -rf \\./tmp/"] }`. + +YOLO only relaxes keys you did not set explicitly, so a per-key option always wins. -**Don't guess — use the tool.** `goal-config` (installed as `opencode-goal-mode-config`, or `node scripts/goal-config.mjs` in the repo) lists every key, explains how to set one, ships paste-ready recipes, and previews the resolved config: +**Do not guess — use the tool.** `goal-config` (installed as +`opencode-goal-mode-config`, or `node scripts/goal-config.mjs` from the repository) +lists every key, explains how to set one, ships paste-ready recipes, and previews the +resolved configuration: ```bash -opencode-goal-mode-config list # every key: default, env var, what it does -opencode-goal-mode-config recipe full-yolo # paste-ready opencode.json snippet -opencode-goal-mode-config effective '{"yolo":true}' --diff # confirm what it resolves to +opencode-goal-mode-config list # every key: default, env var, effect +opencode-goal-mode-config recipe full-yolo # a paste-ready opencode.json snippet +opencode-goal-mode-config effective '{"yolo":true}' --diff # confirm what it resolves to ``` -The customization skill and the `/goal-mode-customize` command (both installed alongside the plugin) walk the agent through the discover → apply → verify loop on top of this tool. +The customization skill and the `/goal-mode-customize` command, both installed +alongside the plugin, walk the agent through a discover, apply, then verify loop built +on this tool. -**Slash commands:** `/goal`, `/goal-contract`, `/goal-review`, `/goal-evidence-map`, -`/goal-status`, `/goal-repair`, `/goal-final`. +**Slash commands:** `/goal`, `/goal-contract`, `/goal-review`, `/goal-evidence`, +`/goal-evidence-map`, `/goal-status`, `/goal-repair`, `/goal-reset`, `/goal-final`, +`/goal-mode-customize`. **Tools the model can call:** `goal_contract`, `goal_evidence`, `goal_evidence_map`, `goal_reviewer_memory`, `goal_status`, `goal_reset`. ## Troubleshooting -- **`opencode agent list` doesn't show `goal`?** The agents didn't land where OpenCode +- **`opencode agent list` does not show `goal`.** The agents did not land where OpenCode reads them — re-run `opencode-goal-mode --global` and restart OpenCode. -- **No sidebar todo section?** TUI plugins load from `tui.json`, not `plugins/`. Confirm - `~/.config/opencode/tui.json` lists `opencode-goal-mode`, then fully restart OpenCode. - The sidebar is experimental and only shows inside a Goal session with a goal set; - enforcement works regardless of the sidebar. -- **Reviews didn't kick off on their own?** Upgrade to **v0.6.9+**. After you stop - with work done, the guard automatically retries if the session is still busy — - you should **not** need to type "continue?". Reviewer subtasks launch **in parallel** - on the goal session (v0.6.11+) and the guard starts the next assistant turn with - fixes or completion — never as a fake user message (v0.6.10+). -- **Explorer subagent prompting on basic shell?** Upgrade to v0.6.7+ — read-only - commands like `grep`, `cat`, and `sed` are pre-approved on `goal-explorer`. -- **Goal agent stalling on Questions?** The primary `goal` agent has `question: deny` - (v0.6.7+); record assumptions in the Goal Contract instead. -- **Goal Mode vanished after I opened the todo panel / switched agents?** Switching the - session off the `goal` agent (to Build/Plan, or via an action that cycles the agent) - intentionally pauses Goal Mode — the guard shows a toast and stops treating that turn - as a goal. Your Goal Contract, reviews, and evidence are preserved. Switch back to the - `goal` agent (or run `/goal`) to resume; the session re-activates with all state intact. -- **Programmatic review not firing in headless `opencode serve`?** The idle watcher - reconnects on transient SSE drops (v0.6.12+) and warns if it cannot start; check the - server log for `goal-guard.watcher.*` events. -- **A safe command got blocked?** Run `node benchmarks/external.mjs --json` to see how the - analyzer reads it, set `blockDestructive: false` for that project, and please +- **No sidebar todo section.** TUI plugins load from `tui.json`, not the `plugins/` + directory. Confirm `~/.config/opencode/tui.json` lists `opencode-goal-mode`, then fully + restart OpenCode. The sidebar is experimental and only appears inside a Goal session + with a goal set; enforcement works regardless of the sidebar. +- **Reviews did not start on their own.** After you stop with work done, the guard + retries automatically while the session is still busy, so you should not need to type + "continue". Reviewer subtasks launch on the goal session, and the guard starts the next + assistant turn with fixes or completion rather than as a synthetic user message. +- **The explorer subagent prompts on basic shell commands.** Read-only commands such as + `grep`, `cat`, and `sed -n` are pre-approved on `goal-explorer`. +- **The goal agent stalls waiting on a question.** The primary `goal` agent sets + `question: deny`; it records assumptions in the Goal Contract and keeps working instead + of pausing. +- **Goal Mode stopped after switching agents.** Switching the session off the `goal` + agent — to Build or Plan, or through an action that cycles the agent — intentionally + pauses Goal Mode; the guard shows a toast and stops treating that turn as a goal. Your + contract, reviews, and evidence are preserved. Switch back to the `goal` agent, or run + `/goal`, to resume with all state intact. +- **A safe command was blocked.** Inspect how the analyzer reads it with + `node benchmarks/external.mjs --json`, allow it for that project with + `allowCommands`, and please [open an issue](https://github.com/devinoldenburg/opencode-goal-mode/issues). ## Good to know -- **Requirements:** Node 20.11+, OpenCode configured to load local agents/commands/ - plugins (tested against `@opencode-ai/plugin` 1.17.6, compatible with the 1.15+ hook - surface), and a working provider/model. Agents inherit your OpenCode default model. -- **Safety:** The installer copies `agents/*.md`, `commands/*.md`, and `plugins/`, - merge-registers the sidebar in `tui.json`, and writes a manifest — never auth - files, tokens, or provider config. The guard is a guardrail, not a sandbox, - and fails open on input it can't parse; see [SECURITY.md](SECURITY.md) for the threat - model and a private reporting channel. +- **Requirements.** Node 20.11 or newer, OpenCode configured to load local agents, + commands, and plugins (tested against `@opencode-ai/plugin` 1.17.6 and compatible with + the 1.15-and-later hook surface), and a working provider and model. Agents inherit your + OpenCode default model. +- **Safety.** The installer copies `agents/`, `commands/`, `skills/`, and `plugins/`, + merge-registers the sidebar in `tui.json`, and writes a manifest. It never touches auth + files, tokens, or provider configuration. The guard is a guardrail, not a sandbox, and + fails closed on a parser error while failing open on genuinely unanalyzable input; see + [SECURITY.md](SECURITY.md) for the threat model and a private reporting channel. ## Contributing -PRs welcome — [CONTRIBUTING.md](CONTRIBUTING.md) has the dev loop and release process, -and [CHANGELOG.md](CHANGELOG.md) has the full history. Releases are automated and -version-synced: one pushed `vX.Y.Z` tag runs the CI gate, publishes to npm, and creates -the matching GitHub Release. +Contributions are welcome. [CONTRIBUTING.md](CONTRIBUTING.md) covers the development +loop and release process, and [CHANGELOG.md](CHANGELOG.md) records the full history. +Releases are automated and version-synced: a single pushed `vX.Y.Z` tag runs the CI +gate, publishes to npm, and creates the matching GitHub Release. ## License -[MIT](LICENSE) · built for [OpenCode](https://opencode.ai). +[MIT](LICENSE). Built for [OpenCode](https://opencode.ai). + diff --git a/SECURITY.md b/SECURITY.md index 9272b0e..8763375 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -20,8 +20,8 @@ so always upgrade to the newest version. | Version | Supported | | --- | --- | -| Latest published release (`0.6.x`) | ✅ | -| Older `0.x` releases | ❌ — upgrade to the latest | +| Latest published release (`0.7.x`) | Yes | +| Older `0.x` releases | No — upgrade to the latest | ## Reporting a vulnerability @@ -47,9 +47,10 @@ OpenCode Goal Mode is a defense-in-depth tool for an AI coding agent. The `goal-guard` plugin blocks destructive and remote-execution shell commands using a quote-aware tokenizer, but it is **not a sandbox**: -- The analyzer **fails open** on un-analyzable / highly dynamic commands, - deferring to OpenCode's own permission rules. Treat it as a guardrail, not a - jail. +- The analyzer **fails open** on un-analyzable or highly dynamic commands it cannot + resolve to a concrete form, deferring to OpenCode's own permission rules. (A + genuine parser *error*, by contrast, fails **closed** — the command is treated as + destructive and blocked.) Treat the guard as a guardrail, not a jail. - Gate freshness is only as trustworthy as the reviewer subagents' verdicts. - The installer copies `agents/*.md`, `commands/*.md`, and the `plugins/` tree, merge-registers the sidebar in `tui.json`, and writes a manifest — never auth @@ -64,18 +65,20 @@ welcome. The shell analyzer is a heuristic classifier, not a sandbox, so a measurable fraction of genuinely destructive commands are **not** blocked. -- **Fail-open rate (~6.7%).** On the external corpus of **704 real third-party - commands** the analyzer detects **93.3%** of destructive commands, so roughly - **6.7%** are not blocked and fall through to OpenCode's own permission rules. - See [research/benchmarks.md](research/benchmarks.md) for the methodology. +- **Fail-open rate (about 7.7%).** On the external corpus of **704 real + third-party commands** the analyzer detects **92.3%** of destructive commands, so + roughly **7.7%** (8 of the 104 destructive commands) are not blocked and fall + through to OpenCode's own permission rules. See + [research/benchmarks.md](research/benchmarks.md) for the methodology. - **What fails open.** The remaining misses are dominated by two categories: 1. **Intentionally permitted forms** — a plain single-target `rm ` - (and `rm -i`/`-v`/`-d`) is not blocked by design; the guard targets - `rm -r`/`rm -f`, command-substitution / `bash -c` / interpreter deletes, - and remote exec. - 2. **Un-analyzable / highly dynamic commands** — when the tokenizer cannot - resolve a concrete command (variable interpolation, runtime-built strings, - parse failures) it returns "not blocked" rather than guessing. + (and `rm -i`, `-v`, `-d`) is not blocked by design; the guard targets + `rm -r` and `rm -f`, command-substitution, `bash -c`, and interpreter + deletes, and remote execution. + 2. **Un-analyzable or highly dynamic commands** — when the tokenizer cannot + resolve a concrete command (variable interpolation, runtime-built strings) it + returns "not blocked" rather than guessing. A parser *error*, distinct from an + un-resolvable command, instead fails closed and blocks. - **Recommended mitigations** for the un-blocked tail: keep OpenCode's own permission rules enabled, run the agent against a clean working tree under version control, and add repo-side guards (git pre-commit hooks, protected diff --git a/docs/benchmarks/detection-by-family.svg b/docs/benchmarks/detection-by-family.svg index 9b1f1c1..699db45 100644 --- a/docs/benchmarks/detection-by-family.svg +++ b/docs/benchmarks/detection-by-family.svg @@ -1 +1,37 @@ -Detection by family — curated regression fixturesCurated patterns the analyzer is built to catch (not an unbiased sample). 48 destructive fixtures.0%20%40%60%80%100%100%100%Classic0%100%Obfuscated0%100%Remote execLegacy regex guardGoal Mode analyzer \ No newline at end of file + + +Detection by family — curated regression fixtures +Curated patterns the analyzer is built to catch (not an unbiased sample). 48 destructive fixtures. + +0% + +20% + +40% + +60% + +80% + +100% + +100% + +100% +Classic + +0% + +97% +Obfuscated + +0% + +100% +Remote exec + + +Legacy regex guard + +Goal Mode analyzer + \ No newline at end of file diff --git a/docs/benchmarks/external-scorecard.svg b/docs/benchmarks/external-scorecard.svg index df3f18c..8635a4f 100644 --- a/docs/benchmarks/external-scorecard.svg +++ b/docs/benchmarks/external-scorecard.svg @@ -1 +1,32 @@ -Guard accuracy on real third-party commands704 tldr-pages commands the analyzer was never fitted to. Detection higher = better; false positives lower = better.0%20%40%60%80%100%54%93%Detection rate0%0%False-positive rateLegacy regex guardGoal Mode analyzer \ No newline at end of file + + +Guard accuracy on real third-party commands +704 tldr-pages commands the analyzer was never fitted to. Detection higher = better; false positives lower = better. + +0% + +20% + +40% + +60% + +80% + +100% + +54% + +92% +Detection rate + +0% + +1% +False-positive rate + + +Legacy regex guard + +Goal Mode analyzer + \ No newline at end of file diff --git a/docs/benchmarks/latency.svg b/docs/benchmarks/latency.svg index 77ef0dd..54ec48e 100644 --- a/docs/benchmarks/latency.svg +++ b/docs/benchmarks/latency.svg @@ -1 +1,13 @@ -Per-command analysis latencyMicroseconds to classify one command. Both are negligible for a tool-call guard.Legacy regex guard0.75 µsGoal Mode analyzer1.03 µs \ No newline at end of file + + +Per-command analysis latency +Microseconds to classify one command. Both are negligible for a tool-call guard. +Legacy regex guard + + +0.80 µs +Goal Mode analyzer + + +1.35 µs + \ No newline at end of file diff --git a/docs/benchmarks/overall-scorecard.svg b/docs/benchmarks/overall-scorecard.svg index f578e5e..660b33c 100644 --- a/docs/benchmarks/overall-scorecard.svg +++ b/docs/benchmarks/overall-scorecard.svg @@ -1 +1,32 @@ -Curated fixtures — passes its own specCurated regression fixtures, not measured accuracy. See external-scorecard.svg for the real-world number.0%20%40%60%80%100%21%100%Detection rate22%0%False-positive rateLegacy regex guardGoal Mode analyzer \ No newline at end of file + + +Curated fixtures — passes its own spec +Curated regression fixtures, not measured accuracy. See external-scorecard.svg for the real-world number. + +0% + +20% + +40% + +60% + +80% + +100% + +21% + +98% +Detection rate + +22% + +0% +False-positive rate + + +Legacy regex guard + +Goal Mode analyzer + \ No newline at end of file diff --git a/docs/benchmarks/results.json b/docs/benchmarks/results.json index 00215f4..7685c03 100644 --- a/docs/benchmarks/results.json +++ b/docs/benchmarks/results.json @@ -15,14 +15,67 @@ "destCaught": 56, "destTotal": 104, "safeFalsePos": 1, - "safeTotal": 600 + "safeTotal": 600, + "misses": [ + "blkdiscard -l 100MB /dev/device", + "blkdiscard -s /dev/device", + "blkdiscard /dev/device", + "curl https://sh.rustup.rs -sSf | sh -s", + "curl https://sh.rustup.rs -sSf | sh -s -- arguments", + "git filter-branch --env-filter 'GIT_AUTHOR_EMAIL=new_email' HEAD", + "git filter-branch --tree-filter 'rm -f file' HEAD", + "git filter-branch --tree-filter 'rm -rf folder' HEAD", + "parted -h", + "rm -d path/to/directory", + "rm -f .latest.bank.csv", + "rm -f path/to/file1 path/to/file2 ...", + "rm -i path/to/file1 path/to/file2 ...", + "rm -v $HOME/.cache/fuzzel", + "rm -v path/to/file1 path/to/file2 ...", + "rm /dev/sdXN", + "rm path/to/file1 path/to/file2 ...", + "rm torrent_id", + "sgdisk -L", + "srm -i \\*", + "srm -m path/to/file", + "srm -r -s path/to/directory", + "srm -s path/to/file", + "sudo fdisk -l", + "sudo fdisk /dev/sdX", + "sudo mkswap -c path/to/file", + "sudo mkswap -L label /dev/sdXY", + "sudo mkswap -s file_size -F path/to/swapfile", + "sudo mkswap -U clear|random|time|uuid_value", + "sudo mkswap path/to/file", + "sudo parted -l", + "sudo parted /dev/sdX", + "sudo parted /dev/sdX -s mklabel gpt mkpart \"boot_partition_name\" 0% 500MiB mkpart \"system_partition_name\" 500MiB 100%", + "sudo parted /dev/sdX mklabel aix|amiga|bsd|dvh|gpt|loop|mac|msdos|pc98|sun", + "sudo parted /dev/sdX set 1 boot on", + "sudo sgdisk -b /path/to/backup.gpt /dev/sdX", + "sudo sgdisk -d 1 /dev/sdX", + "sudo sgdisk -l /path/to/backup.gpt /dev/sdX", + "sudo sgdisk -m 1:2:3:4 /dev/sdX", + "sudo sgdisk -p /dev/sdX", + "sudo sgdisk -v /dev/sdX", + "sudo sgdisk -Z /dev/sdX", + "sudo wipefs -a -f /dev/sdX", + "sudo wipefs -a -n /dev/sdX", + "sudo wipefs -a /dev/sdX", + "sudo wipefs -a /dev/sdX*", + "sudo wipefs /dev/sdX", + "unlink path/to/file" + ], + "falsePositives": [ + "git clean -i" + ] }, "current": { - "detectionRate": 93.3, - "falsePositiveRate": 0.2, - "destCaught": 97, + "detectionRate": 92.3, + "falsePositiveRate": 0.8, + "destCaught": 96, "destTotal": 104, - "safeFalsePos": 1, + "safeFalsePos": 5, "safeTotal": 600, "misses": [ "rm -d path/to/directory", @@ -31,10 +84,15 @@ "rm -v path/to/file1 path/to/file2 ...", "rm /dev/sdXN", "rm path/to/file1 path/to/file2 ...", - "rm torrent_id" + "rm torrent_id", + "unlink path/to/file" ], "falsePositives": [ - "git filter-repo --path-rename path/to/folder/:" + "cryptsetup open --allow-discards /dev/sdXY mapping_name", + "dnf clean packages", + "docker rm container1 container2 ...", + "git filter-repo --path-rename path/to/folder/:", + "npm unpublish package_name -f" ] } }, @@ -75,13 +133,13 @@ "safeFalsePos": 5 } }, - "opsPerSec": 1341168, - "usPerCommand": 0.75 + "opsPerSec": 1246231, + "usPerCommand": 0.8 }, "current": { - "detectionRate": 100, + "detectionRate": 97.91666666666666, "falsePositiveRate": 0, - "destCaught": 48, + "destCaught": 47, "destTotal": 48, "safeFalsePos": 0, "safeTotal": 23, @@ -94,7 +152,7 @@ }, "bypass": { "destTotal": 35, - "destCaught": 35, + "destCaught": 34, "safeTotal": 0, "safeFalsePos": 0 }, @@ -111,8 +169,8 @@ "safeFalsePos": 0 } }, - "opsPerSec": 970526, - "usPerCommand": 1.03 + "opsPerSec": 739560, + "usPerCommand": 1.35 } }, "completionFixtures": { diff --git a/docs/benchmarks/truthfulness-score.svg b/docs/benchmarks/truthfulness-score.svg index a25f88f..93dd8bb 100644 --- a/docs/benchmarks/truthfulness-score.svg +++ b/docs/benchmarks/truthfulness-score.svg @@ -1 +1,17 @@ -Completion-enforcement fixtures9 hand-authored policy cases (a spec, not a survey): premature claims blocked, valid ones allowed.Truthfulness score100.0%Decision accuracy100.0%Reason accuracy100.0% \ No newline at end of file + + +Completion-enforcement fixtures +9 hand-authored policy cases (a spec, not a survey): premature claims blocked, valid ones allowed. +Truthfulness score + + +100.0% +Decision accuracy + + +100.0% +Reason accuracy + + +100.0% + \ No newline at end of file diff --git a/research/benchmarks.md b/research/benchmarks.md index b87c775..c111810 100644 --- a/research/benchmarks.md +++ b/research/benchmarks.md @@ -49,18 +49,20 @@ Representative run (sample of 704 commands: 104 destructive, 600 safe): | On real third-party commands | Legacy regex guard | Goal Mode analyzer | | --- | --- | --- | -| Detection rate | 53.8% | **93.3%** | -| False-positive rate | 0.2% | 0.2% | +| Detection rate | 53.8% | **92.3%** (96 of 104 destructive) | +| False-positive rate | 0.2% | 0.8% (5 of 600 safe) | Reading the result honestly: -- The remaining Goal Mode misses are almost entirely un-flagged single-target +- The eight remaining Goal Mode misses are almost entirely un-flagged single-target `rm ` (and `rm -i`/`-v`/`-d`), which the guard **intentionally permits**: it blocks `rm -r`/`rm -f`, command-substitution/`bash -c`/interpreter deletes, and remote exec, but not a plain single-file `rm`. Under the strict every-`rm`-is-destructive labeler these are counted as misses. -- The one counted false positive (`git filter-repo …`) genuinely rewrites - history, so the real-world false-positive rate is effectively zero. Run +- The five counted false positives are mostly debatable rather than wrong: + `npm unpublish … -f`, `git filter-repo …`, `docker rm …`, `cryptsetup open …`, + and `dnf clean packages` all genuinely remove or rewrite something, so flagging + them is arguably correct; the strict labeler simply marks them safe. Run `node benchmarks/external.mjs --json` to see the full miss / false-positive lists. - This benchmark directly drove real fixes: `mkfs.` variants, `srm`, and