From 2c9d7b2e8cc4aa25638a3160c68ae870934b897c Mon Sep 17 00:00:00 2001
From: Devin Oldenburg <devin.oldenburg@icloud.com>
Date: Sun, 21 Jun 2026 14:56:04 +0200
Subject: [PATCH 1/2] fix(config): reference the guard by its installed path,
 not the package name

The plugin options in opencode.json must reference ./plugins/goal-guard.js (the
guard); the package name resolves to the TUI sidebar export, so the previous
snippets in the goal-config tool, the customization skill, and the
/goal-mode-customize command would not have applied guard options. Align all of
them (and the tool's explain output) with ARCHITECTURE.md's canonical form.
---
 commands/goal-mode-customize.md         | 2 +-
 scripts/goal-config.mjs                 | 5 ++++-
 skills/goal-mode-customization/SKILL.md | 2 +-
 tests/goal-config.test.mjs              | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/commands/goal-mode-customize.md b/commands/goal-mode-customize.md
index 34d5f62..a095556 100644
--- a/commands/goal-mode-customize.md
+++ b/commands/goal-mode-customize.md
@@ -23,7 +23,7 @@ env var, or the plugin `options` object in `opencode.json`. A bundled tool,
 
 2. **Apply** — edit the user's `opencode.json` so the plugin entry carries the options:
    ```jsonc
-   "plugin": [["opencode-goal-mode", { "yolo": true, "allowCommands": ["^ruff( |$)"] }]]
+   "plugin": [["./plugins/goal-guard.js", { "yolo": true, "allowCommands": ["^ruff( |$)"] }]]
    ```
    (or export the `GOAL_GUARD_*` env equivalents). Lists accept an array or a comma/newline string; an invalid regex is ignored, never fatal.
 
diff --git a/scripts/goal-config.mjs b/scripts/goal-config.mjs
index bbb2031..86f8c8f 100644
--- a/scripts/goal-config.mjs
+++ b/scripts/goal-config.mjs
@@ -17,7 +17,10 @@
  */
 import { DEFAULT_CONFIG, CONFIG_DOCS, envVarFor, resolveConfig } from "../plugins/goal-guard/config.js";
 
-const PLUGIN = "opencode-goal-mode";
+// The guard plugin is referenced by its installed path in opencode.json's plugin
+// array (this is how OpenCode passes it options); the package name resolves to the
+// TUI sidebar, not the guard. See ARCHITECTURE.md → Configuration.
+const PLUGIN = "./plugins/goal-guard.js";
 
 function typeOf(key) {
   const d = DEFAULT_CONFIG[key];
diff --git a/skills/goal-mode-customization/SKILL.md b/skills/goal-mode-customization/SKILL.md
index 2ce2d41..7235635 100644
--- a/skills/goal-mode-customization/SKILL.md
+++ b/skills/goal-mode-customization/SKILL.md
@@ -23,7 +23,7 @@ preview, and verification for you: **`scripts/goal-config.mjs`**.
    (the tool prints the exact snippet; `recipe <name>` gives a paste-ready one):
    ```jsonc
    "plugin": [
-     ["opencode-goal-mode", { "yolo": true, "allowCommands": ["^ruff( |$)"] }]
+     ["./plugins/goal-guard.js", { "yolo": true, "allowCommands": ["^ruff( |$)"] }]
    ]
    ```
    Equivalent env vars work too (`GOAL_GUARD_YOLO=1`, …) — `explain` shows both.
diff --git a/tests/goal-config.test.mjs b/tests/goal-config.test.mjs
index 8fa1bea..c1a4840 100644
--- a/tests/goal-config.test.mjs
+++ b/tests/goal-config.test.mjs
@@ -35,7 +35,7 @@ test("goal-config list mentions every config key", () => {
 test("goal-config explain shows how to set + verify a key", () => {
   const r = cli("explain", "yolo");
   assert.equal(r.status, 0, r.stderr);
-  assert.match(r.stdout, /opencode-goal-mode/);
+  assert.match(r.stdout, /goal-guard\.js/);
   assert.match(r.stdout, /GOAL_GUARD_YOLO/);
   assert.match(r.stdout, /effective/);
 });

From 93ffd1a4aea4ee6e5567073f54769cdc9c9582d9 Mon Sep 17 00:00:00 2001
From: Devin Oldenburg <devin.oldenburg@icloud.com>
Date: Sun, 21 Jun 2026 14:56:05 +0200
Subject: [PATCH 2/2] =?UTF-8?q?docs:=20rewrite=20user-facing=20docs=20?=
 =?UTF-8?q?=E2=80=94=20accurate,=20current,=20professional,=20emoji-free?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- README: full rewrite in a professional, marketing voice with no decorative
  emoji; corrected the headline benchmark numbers to the current run (92.3%
  detection / 0.8% false positives, ~1.35 us/cmd), completed the slash-command
  list, documented YOLO + allowCommands/extraDestructive + the goal-config tool,
  and used the canonical ./plugins/goal-guard.js config form throughout.
- SECURITY: corrected the fail-open rate to ~7.7% (detection 92.3%, 8 of 104
  destructive), clarified that a parser error fails closed while un-resolvable
  commands fail open, bumped supported versions to 0.7.x, removed emoji.
- ARCHITECTURE: added the YOLO/allow-list config keys, the goal-config tool, and
  the skills/ installer component; completed the command list; removed a
  drift-prone hardcoded test-file count.
- research/benchmarks.md: corrected the results table and the honest reading of
  the 8 misses and 5 (mostly debatable) false positives.
- Regenerated the benchmark charts so the SVGs match the corrected numbers.
---
 ARCHITECTURE.md                         |  21 +-
 README.md                               | 378 +++++++++++++-----------
 SECURITY.md                             |  33 ++-
 docs/benchmarks/detection-by-family.svg |  38 ++-
 docs/benchmarks/external-scorecard.svg  |  33 ++-
 docs/benchmarks/latency.svg             |  14 +-
 docs/benchmarks/overall-scorecard.svg   |  33 ++-
 docs/benchmarks/results.json            |  86 +++++-
 docs/benchmarks/truthfulness-score.svg  |  18 +-
 research/benchmarks.md                  |  12 +-
 10 files changed, 440 insertions(+), 226 deletions(-)

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 1a48e39..67a53c8 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -22,9 +22,9 @@ configuration directory:
    gates). Each is a Markdown file: YAML frontmatter (mode, permissions, color,
    temperature) over a system-prompt body.
 2. **Commands** (`commands/*.md`) — slash commands (`/goal`, `/goal-contract`,
-   `/goal-review`, `/goal-evidence-map`, `/goal-status`, `/goal-repair`,
-   `/goal-final`) that bind a prompt template to an agent, some forced to run as
-   subtasks.
+   `/goal-review`, `/goal-evidence`, `/goal-evidence-map`, `/goal-status`,
+   `/goal-repair`, `/goal-reset`, `/goal-final`, `/goal-mode-customize`) that bind a
+   prompt template to an agent, some forced to run as subtasks.
 3. **The `goal-guard` plugin** (`plugins/goal-guard.js` + `plugins/goal-guard/`)
    — a runtime guard that enforces review discipline, blocks destructive shell
    commands, preserves state across compaction and restarts, and exposes
@@ -224,13 +224,18 @@ destructive blocking, network-exec blocking, completion enforcement,
 `autoContinue`, `programmaticReview`, `reviewIdleDeferMs`, `reviewIdleRetryMs`,
 `maxReviewIdleRetries`, review timeouts/polling, `maxReviewCycles`,
 system-state injection, persistence, contextual gates, subagent restriction,
-session cache size/TTL, sidebar colours, and toasts. See README.md for the full
-option table.
+session cache size/TTL, sidebar colours, and toasts. The guard is also fully
+relaxable: `yolo` drops the soft gates, `allowDestructive` drops destructive
+guarding, and `allowCommands` / `extraDestructive` are per-command regex allow/deny
+lists; `CONFIG_DOCS` documents every key. `scripts/goal-config.mjs` (the
+`opencode-goal-mode-config` bin) lists, explains, and previews any configuration,
+and a no-drift test keeps `CONFIG_DOCS` aligned with `DEFAULT_CONFIG`. See README.md
+for the full option table.
 
 ## Installer
 
-`scripts/install.mjs` recursively copies `agents/`, `commands/`, and `plugins/`
-(including the nested module directory) into the target config dir, merge-registers
+`scripts/install.mjs` recursively copies `agents/`, `commands/`, `skills/`, and
+`plugins/` (including the nested module directory) into the target config dir, merge-registers
 the sidebar package in `tui.json`, clears stale TUI plugin cache entries, and
 records a manifest of the file hashes it wrote. Global `npm install -g` also
 triggers the same installer via `postinstall.mjs`. On upgrade it distinguishes
@@ -240,7 +245,7 @@ supports `--uninstall` (which leaves locally-modified files in place).
 
 ## Testing
 
-`node --test` runs the suite across 20 files:
+`node --test` runs the full suite across the `tests/` directory, including:
 
 - `tests/shell.test.mjs` / `tests/shell.property.test.mjs` — analyzer against bypass and false-positive corpora.
 - `tests/plugin.test.mjs` — hook behavior, gating, verdicts, completion, tools, isolation.
diff --git a/README.md b/README.md
index 64a4695..66afbff 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,13 @@
 <div align="center">
-  
+
 # OpenCode Goal Mode
 
-### The OpenCode agent that can't fake "done" — and can't wreck your repo doing it.
+### The OpenCode agent that cannot fake "done" — and cannot wreck your repository getting there.
 
-Give it a goal. It writes a contract, does the work, reviews itself with a fleet of
-specialist subagents, and **physically cannot tell you it's finished until those reviews
-actually pass**. Reach for `rm -rf` mid-run and it stops the command cold.
+Give it a goal. It writes a contract, does the work, reviews itself with a bench of
+specialist subagents, and **cannot tell you it is finished until those reviews
+actually pass**. Reach for `rm -rf` mid-run and the command is stopped before it
+executes.
 
 [![npm version](https://img.shields.io/npm/v/opencode-goal-mode?color=2da44e&label=npm)](https://www.npmjs.com/package/opencode-goal-mode)
 [![npm downloads](https://img.shields.io/npm/dm/opencode-goal-mode?color=2da44e)](https://www.npmjs.com/package/opencode-goal-mode)
@@ -20,35 +21,36 @@ npm install -g opencode-goal-mode
 
 </div>
 
-Every coding agent will happily announce "✅ Done!" over a half-finished feature and a
-red test suite. Goal Mode ends that. It moves the discipline out of the prompt — where a
-confident model just talks past it — and into the **harness**, where it's enforced in
-code. The agent's "Goal Completed" is intercepted and rewritten to "Goal Not Completed"
-unless every required review gate has a *fresh* pass. Dangerous shell commands are
-blocked before they ever execute.
+Most coding agents will cheerfully announce success over a half-finished feature and
+a red test suite. Goal Mode ends that. It moves the discipline out of the prompt —
+where a confident model simply talks past it — and into the **harness**, where it is
+enforced in code. A "Goal Completed" claim is intercepted and rewritten to
+"Goal Not Completed" unless every required review gate holds a *fresh* pass.
+Dangerous shell commands are blocked before they ever run.
 
-It's the difference between *asking* an agent to be careful and *making* it.
+It is the difference between *asking* an agent to be careful and *making* it.
 
 ## The pitch in one minute
 
-Goal Mode is a drop-in OpenCode package for people who want agentic coding to feel
-auditable instead of theatrical. It gives your agent a contract, a live ledger, a
-specialist review bench, and a command guard that all live outside the model's
-memory. The result is a sharper workflow for real repositories: fewer premature
-victory laps, fewer stale approvals, and a clear trail showing what changed, how
-it was checked, and which gates passed.
+Goal Mode is a drop-in OpenCode package for people who want agentic coding to be
+auditable rather than theatrical. It gives your agent a contract, a live ledger, a
+specialist review bench, and a command guard — all of which live outside the model's
+memory, so the model cannot argue its way past them. The result is a sharper workflow
+for real repositories: fewer premature victory laps, fewer stale approvals, and a
+clear trail of what changed, how it was checked, and which gates passed.
 
-What you get after install:
+What you get after installing:
 
-- A primary `goal` agent that turns requests into acceptance criteria before it edits.
-- Programmatic review cycles that the guard launches automatically when work is ready.
-- Freshness tracking that invalidates every review pass after the next edit.
-- A quote-aware shell analyzer that blocks common destructive and remote-exec commands.
-- A TUI sidebar that turns the hidden ledger into visible progress.
+- A primary `goal` agent that turns a request into acceptance criteria before it edits.
+- Programmatic review cycles that the guard launches automatically when the work is ready.
+- Freshness tracking that invalidates every review pass the moment the next edit lands.
+- A quote-aware shell analyzer that blocks destructive and remote-execution commands.
+- A live TUI sidebar that turns the hidden ledger into visible progress.
 
 ## Watch it refuse to lie
 
-The agent tries to declare victory early. The guard catches it and hands back the truth:
+The agent tries to declare victory early. The guard intercepts the claim and replaces
+it with the truth:
 
 ```diff
 - Goal Completed
@@ -59,100 +61,108 @@ The agent tries to declare victory early. The guard catches it and hands back th
 + reviewCycles=1; missingGates=goal-security-reviewer goal-final-auditor
 ```
 
-The agent reaches for something irreversible. The guard kills it before it runs:
+The agent reaches for something irreversible. The guard stops it before it runs:
 
 ```text
 $ rm -rf build
-✕ Goal Guard blocked a destructive or high-risk bash command
-  (rm with recursive force deletion). Use a safer, reversible command
-  or ask the user to confirm.
+Goal Guard blocked a destructive or high-risk bash command: `rm -rf build`
+(rm with recursive force deletion). Use a safer, reversible command or ask the
+user to confirm.
 ```
 
 ![OpenCode Goal Mode sidebar preview](docs/sidebar-preview.png)
 
-<sub>↑ While a goal runs, Goal Mode takes over the TUI sidebar with a live, evidence-aware
-todo list: the goal title, gate progress, and a row per acceptance criterion and missing
-reviewer — each ticking off as the work gets verified.</sub>
+<sub>While a goal runs, Goal Mode takes over the TUI sidebar with a live,
+evidence-aware todo list: the goal title, gate progress, and a row for each
+acceptance criterion and outstanding reviewer, each one resolving as the work is
+verified.</sub>
 
-## Why you'll want it
+## Why you will want it
 
-🔒 **"Done" actually means done.** Completion is gated on real review verdicts, not vibes.
-The model can't answer `Goal Completed` until every required reviewer returns
-`Verdict: PASS` *after* the last edit — and the claimed `Review cycles: N` has to match
-the counter the guard kept.
+**"Done" actually means done.** Completion is gated on real review verdicts, not
+self-assessment. The model cannot emit `Goal Completed` until every required reviewer
+has returned `Verdict: PASS` *after* the last edit — and the claimed `Review cycles: N`
+must match the counter the guard kept itself.
 
-🤖 **The reviews run themselves.** When the agent stops with work outstanding, the guard
-*launches the reviewer subagents itself* — security, diff, verification, and more — reads
-their verdicts, and loops fix → review until they pass. You never rely on the model to
-remember to check its own work.
+**The reviews run themselves.** When the agent stops with work outstanding, the guard
+launches the reviewer subagents on its own — security, diff, verification, and more —
+reads their verdicts, and loops fix then review until they pass. You never depend on
+the model remembering to check its own work.
 
-♻️ **One edit reopens the gates.** Approvals are stamped with a monotonic sequence, so any
-change after a review instantly goes stale and forces the relevant reviews to re-run.
-There's no sneaking a "fix" in after the green light.
+**One edit reopens the gates.** Approvals are stamped with a monotonic sequence, so
+any change after a review immediately goes stale and forces the relevant reviews to
+re-run. There is no slipping a "fix" in after the green light.
 
-🧠 **It knows which experts to call.** Touch auth and the security reviewer becomes
-mandatory. Touch a migration and the data reviewer joins. API, performance, tests, UX,
-ops, docs, quality — the right specialist gates are required automatically from your goal
-and your diff.
+**It knows which experts to call.** Touch authentication and the security reviewer
+becomes mandatory. Touch a migration and the data reviewer joins. API, performance,
+tests, UX, operations, documentation, and quality each have a gate that is required
+automatically from your goal and your diff.
 
-🛡️ **Your repo survives.** A real shell tokenizer — not a brittle regex — blocks the
-destructive stuff even when it's disguised: `$(rm -rf …)`, `bash -c "…"`, `/bin/rm`,
-`busybox rm -rf`, `git reset --hard`, `curl | sh`. Harmless look-alikes like
-`git checkout -b` sail right through.
+**Your repository survives.** A real shell tokenizer — not a brittle regular
+expression — blocks destructive commands even when they are disguised: `$(rm -rf …)`,
+`bash -c "…"`, `/bin/rm`, `busybox rm -rf`, `git reset --hard`, and `curl | sh`.
+Harmless look-alikes such as `git checkout -b` pass straight through.
 
-🚀 **It doesn't quit on you.** An idle-but-unfinished goal gets automatically pushed
-forward — told exactly what's left — until it's genuinely complete, with hard caps and a
-no-progress breaker so it can never spin.
+**It does not quit on you.** An idle but unfinished goal is pushed forward
+automatically — told exactly what remains — until it is genuinely complete, with hard
+caps and a no-progress breaker so it can never spin.
 
 ## The numbers
 
-Tested on **704 real-world commands** from [tldr-pages](https://github.com/tldr-pages/tldr)
-(common/linux/osx) — commands written by hundreds of contributors who've never seen this
-guard:
+Measured on **704 real-world commands** from [tldr-pages](https://github.com/tldr-pages/tldr)
+(common, Linux, and macOS pages) — commands written by hundreds of contributors who
+have never seen this guard. Reproduce them with `npm run bench`.
 
-| On 704 commands it has never seen | Regex guard | **Goal Mode** |
+| On 704 commands it has never seen | Regex baseline | **Goal Mode** |
 | --- | :---: | :---: |
-| Dangerous commands caught | 53.8% | **93.3%** |
-| Safe commands wrongly blocked | 0.2% | **0.2%** |
+| Dangerous commands caught | 53.8% | **92.3%** |
+| Safe commands wrongly blocked | 0.2% | **0.8%** |
 
 ![Guard accuracy on real third-party commands](docs/benchmarks/external-scorecard.svg)
 
-And it's effectively free: **~1µs per command**, hundreds of thousands of classifications
-a second. Run it yourself with `npm run bench`.
+Goal Mode catches **roughly three-quarters more** of the dangerous commands a regex
+baseline misses, in exchange for a small, deliberate increase in false positives
+(eight commands of the 704 remain unflagged, mostly single-target `rm`). Classification
+is effectively free: **about 1.35 microseconds per command — over 700,000 commands a
+second.**
 
 ![Per-command analysis latency](docs/benchmarks/latency.svg)
 
-## How it stacks up
+## How it compares
 
-| | **Goal Mode** | Claude Code | Codex |
+| Capability | Goal Mode | Claude Code | Codex |
 | --- | :---: | :---: | :---: |
-| Blocks a premature "done" out of the box | **Yes** | Only via a custom hook | Review is advisory |
-| Edits auto-invalidate stale approvals | **Yes** | — | — |
-| Specialist reviews auto-required from the task | **Yes** | — | — |
-| Destructive commands blocked by a real shell parser | **Yes** | Regex ("fragile") | Sandbox |
+| Blocks a premature "done" out of the box | **Enforced** | Custom hook required | Review is advisory |
+| Edits auto-invalidate stale approvals | **Enforced** | Not built in | Not built in |
+| Specialist reviews auto-required from the task | **Enforced** | Not built in | Not built in |
+| Destructive-command blocking by a real shell parser | **Enforced (tokenizer)** | Partial (regex) | Partial (sandbox) |
 
-![Mechanically-enforced goal discipline vs. Claude Code and Codex](docs/benchmarks/capability-matrix.svg)
+![Mechanically enforced goal discipline versus Claude Code and Codex](docs/benchmarks/capability-matrix.svg)
 
-Full side-by-side with citations: [research/goal-mode-comparison.md](research/goal-mode-comparison.md).
+Claude Code and Codex are capable tools with real mechanical surfaces of their own;
+this is a comparison of one specific axis — built-in, enforced goal discipline. The
+full side-by-side, with sources and review dates, is in
+[research/goal-mode-comparison.md](research/goal-mode-comparison.md).
 
 ## Install
 
-One command. Needs [Node](https://nodejs.org) 20.11+ and [OpenCode](https://opencode.ai).
-macOS and Linux:
+One command. Requires [Node](https://nodejs.org) 20.11 or newer and
+[OpenCode](https://opencode.ai). Supported on macOS and Linux:
 
 ```bash
 npm install -g opencode-goal-mode
 ```
 
-Then **restart OpenCode**. Global installs auto-run the installer via
-`postinstall`; re-run `opencode-goal-mode --global` if auto-setup fails or you
-need `--force`. The installer drops the Goal agent, its reviewer subagents, slash
-commands, and the guard plugin into `~/.config/opencode`, and registers the live
-sidebar in `tui.json`. In the agent picker you'll see just **`goal`** — the
-reviewers are subagents it drives for you. It's idempotent (re-run to upgrade),
-never overwrites agents/commands/plugins you've edited (but merge-adds the
-sidebar entry in `tui.json`), and `--uninstall` removes exactly what it added.
-Goal Mode uses whatever model and provider OpenCode is already set up with.
+Then **restart OpenCode**. A global install runs the installer automatically through
+`postinstall`; re-run `opencode-goal-mode --global` if auto-setup did not finish, or
+add `--force` to replace files you have edited. The installer copies the Goal agent,
+its reviewer subagents, the slash commands, the customization skill, and the guard
+plugin into `~/.config/opencode`, and registers the live sidebar in `tui.json`. In the
+agent picker you will see a single entry, `goal` — the reviewers are subagents it
+drives for you. The installer is idempotent (re-run to upgrade), records a manifest,
+never overwrites files you have edited unless `--force` is passed, and `--uninstall`
+removes exactly what it installed. Goal Mode uses whatever model and provider OpenCode
+is already configured with.
 
 <details>
 <summary>Other ways to install</summary>
@@ -176,58 +186,57 @@ cd opencode-goal-mode && npm ci && npm run install:global
 ```
 
 `--global` writes to `~/.config/opencode`; no flag writes to `./.opencode`; `--target`
-writes to the directory you pass. On upgrade it replaces files it owns but refuses to
-clobber files you've modified unless `--force` is passed.
+writes to a directory you pass. On upgrade it replaces the files it owns but refuses to
+overwrite files you have modified unless `--force` is passed.
 </details>
 
 ## Quick start
 
 ```bash
-# After installing + restarting OpenCode, confirm the primary agent loaded:
+# After installing and restarting OpenCode, confirm the primary agent loaded:
 opencode agent list | grep '^goal '
 ```
 
-`opencode agent list` shows `goal (primary)` — the one agent you select. The
-`goal-*` reviewer specialists also appear, each tagged `(subagent)`: those are
-invoked by the Goal agent, not picked by you. A bare `grep goal` therefore prints
-the whole `goal-*` family (and, depending on your config, the
-`"pattern": "goal-*"` permission line that locks the subagents to the Goal
-agent); the anchored `grep '^goal '` above isolates just the primary.
+`opencode agent list` shows `goal (primary)` — the single agent you select. The
+`goal-*` reviewer specialists also appear, each tagged `(subagent)`; those are invoked
+by the Goal agent, not chosen by you. A bare `grep goal` therefore prints the whole
+`goal-*` family, so the anchored `grep '^goal '` above isolates just the primary.
 
-Then, in OpenCode, just give it a goal:
+Then, inside OpenCode, give it a goal:
 
-```
+```text
 /goal add rate limiting to the login endpoint and prove it works
 ```
 
 It writes a contract, delegates research to subagents, implements, and verifies — then
-stops and lets the guard run the reviews. It won't say `Goal Completed` until they pass.
-Want to feel the seatbelt? Ask it to `rm -rf build` mid-session and watch the guard slap
-it down.
+stops and lets the guard run the reviews. It will not say `Goal Completed` until they
+pass. To feel the guardrail directly, ask it to `rm -rf build` mid-session and watch
+the command get stopped.
 
-See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and [research/](research/) for
-the platform reference, comparison, and threat model.
+See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and
+[research/](research/) for the platform reference, the comparison, and the threat
+model.
 
 ### What the first run looks like
 
-- **You're in Goal Mode when the sidebar shows the goal banner.** Goal Mode is the
-  `goal` agent plus its guard; the live banner (objective, todos, review status) in
-  the TUI sidebar is the always-on indicator that it's active. Keep the sidebar
-  open — OpenCode's status bar doesn't expose a per-agent mode label, so the
-  sidebar banner is the canonical signal.
-- **It won't claim done until the gates pass.** After it implements and verifies,
-  the guard runs the review gates; a premature `Goal Completed` is rewritten to a
-  visible blocked marker until every required gate passes.
-- **Blocked commands tell you what and why.** When the guard stops a destructive
-  command it names both the offending command and the reason, e.g.
-  `Goal Guard blocked a destructive or high-risk bash command: `rm -rf build` (rm
-  -rf on a path). Use a safer, reversible command…` — so you can adjust rather
-  than guess. Tune this with `blockDestructive` / `toastOnBlock` (see below).
-
-## Configure it (or don't)
-
-Goal Mode works great with zero configuration. When you want to tune it, set options in
-`opencode.json` or `GOAL_GUARD_*` environment variables:
+- **You are in Goal Mode when the sidebar shows the goal banner.** Goal Mode is the
+  `goal` agent plus its guard; the live banner — objective, todos, review status — in
+  the TUI sidebar is the persistent indicator that it is active. Keep the sidebar open:
+  OpenCode's status bar does not expose a per-agent mode label, so the sidebar banner is
+  the canonical signal.
+- **It will not claim done until the gates pass.** After it implements and verifies, the
+  guard runs the review gates; a premature `Goal Completed` is rewritten to a visible
+  blocked marker until every required gate passes.
+- **Blocked commands explain what and why.** When the guard stops a destructive command
+  it names both the offending command and the reason, so you can adjust rather than
+  guess. Tune this behavior with `blockDestructive` and `toastOnBlock` (see below), or
+  turn it off entirely with YOLO mode.
+
+## Configure it (or do not)
+
+Goal Mode works with zero configuration. When you want to tune it, set options in
+`opencode.json` or through `GOAL_GUARD_*` environment variables. The plugin is
+referenced by its installed path, which is how OpenCode passes it options:
 
 ```jsonc
 {
@@ -241,8 +250,8 @@ Goal Mode works great with zero configuration. When you want to tune it, set opt
 | --- | --- | --- |
 | `blockDestructive` / `GOAL_GUARD_BLOCK_DESTRUCTIVE` | `true` | Block destructive bash before execution. |
 | `blockNetworkExec` / `GOAL_GUARD_BLOCK_NETWORK_EXEC` | `true` | Block `curl \| sh`-style remote execution. |
-| `enforceCompletion` / `GOAL_GUARD_ENFORCE_COMPLETION` | `true` | Rewrite premature `Goal Completed`. |
-| `autoContinue` / `GOAL_GUARD_AUTO_CONTINUE` | `true` | Auto-continue an idle goal that isn't complete yet. |
+| `enforceCompletion` / `GOAL_GUARD_ENFORCE_COMPLETION` | `true` | Rewrite a premature `Goal Completed`. |
+| `autoContinue` / `GOAL_GUARD_AUTO_CONTINUE` | `true` | Auto-continue an idle goal that is not complete yet. |
 | `maxAutoContinue` / `GOAL_GUARD_MAX_AUTO_CONTINUE` | `50` | Hard cap on automatic continuations per goal session. |
 | `programmaticReview` / `GOAL_GUARD_PROGRAMMATIC_REVIEW` | `true` | Have the guard launch the required reviewers itself on idle (as subtasks on the goal session). |
 | `reviewTimeoutMs` / `GOAL_GUARD_REVIEW_TIMEOUT_MS` | `360000` | Per-reviewer wall-clock cap (ms) for a programmatic review. |
@@ -253,105 +262,116 @@ Goal Mode works great with zero configuration. When you want to tune it, set opt
 | `maxReviewCycles` / `GOAL_GUARD_MAX_REVIEW_CYCLES` | `12` | Hard cap on programmatic review runs per goal; on reaching it the guard pauses for you. |
 | `abortGraceMs` / `GOAL_GUARD_ABORT_GRACE_MS` | `1200` | Grace (ms) before an idle goal auto-continues, so a user cancel is always honored. |
 | `injectSystemState` / `GOAL_GUARD_INJECT_SYSTEM_STATE` | `true` | Inject live guard state into the prompt. |
-| `persist` / `GOAL_GUARD_PERSIST` | `true` | Persist state under the XDG state dir. |
-| `contextualGates` / `GOAL_GUARD_CONTEXTUAL_GATES` | `true` | Require specialist gates by goal keywords. |
+| `persist` / `GOAL_GUARD_PERSIST` | `true` | Persist state under the XDG state directory. |
+| `contextualGates` / `GOAL_GUARD_CONTEXTUAL_GATES` | `true` | Require specialist gates by goal keywords and changed files. |
 | `restrictSubagents` / `GOAL_GUARD_RESTRICT_SUBAGENTS` | `true` | Lock the `goal-*` subagents to the Goal agent. |
 | `maxSessions` / `GOAL_GUARD_MAX_SESSIONS` | `200` | Session cache size. |
-| `sessionTtlMs` / `GOAL_GUARD_SESSION_TTL_MS` | `86400000` | Idle session TTL. |
+| `sessionTtlMs` / `GOAL_GUARD_SESSION_TTL_MS` | `86400000` | Idle session TTL (ms). |
 | `toastOnBlock` / `GOAL_GUARD_TOAST_ON_BLOCK` | `true` | Toast when something is blocked. |
 | `toastOnReview` / `GOAL_GUARD_TOAST_ON_REVIEW` | `true` | Toast on each review verdict and when completion unlocks. |
 | `sidebarBanner` / `GOAL_GUARD_SIDEBAR_BANNER` | `true` | Show the live Goal todo section in the TUI sidebar. |
-| `sidebarColor` / `GOAL_GUARD_SIDEBAR_COLOR` | `#FFD700` | Colour of the GOAL label for a **running** goal. |
-| `sidebarDoneColor` / `GOAL_GUARD_SIDEBAR_DONE_COLOR` | `#FF5555` | Colour of a **done** goal in the sidebar. |
-| `sidebarMutedColor` / `GOAL_GUARD_SIDEBAR_MUTED_COLOR` | `#808080` | Foreground colour for **pending** Goal todo rows (□ items) while a goal is running. |
+| `sidebarColor` / `GOAL_GUARD_SIDEBAR_COLOR` | `#FFD700` | Color of the GOAL label for a **running** goal. |
+| `sidebarDoneColor` / `GOAL_GUARD_SIDEBAR_DONE_COLOR` | `#FF5555` | Color of a **done** goal in the sidebar. |
+| `sidebarMutedColor` / `GOAL_GUARD_SIDEBAR_MUTED_COLOR` | `#808080` | Color for **pending** Goal todo rows while a goal is running. |
 | `completionMarker` / `GOAL_GUARD_COMPLETION_MARKER` | `Goal Completed` | Phrase that, at the start of a message, claims completion. |
 | `blockedMarker` / `GOAL_GUARD_BLOCKED_MARKER` | `Goal Not Completed` | Replacement written when a completion claim is blocked. |
-| `yolo` / `GOAL_GUARD_YOLO` | `false` | **YOLO mode.** Relax the guard so it never blocks/nags for ordinary work — turns off network-exec blocking, completion enforcement, the Goal-only subagent lock, and block toasts. Destructive guarding stays on unless `allowDestructive` is also set. Any key you set explicitly still wins. |
-| `allowDestructive` / `GOAL_GUARD_ALLOW_DESTRUCTIVE` | `false` | Turn **off** destructive-command guarding. With `yolo: true` this is "full YOLO" — nothing is blocked and the agent has ALL rights. Works standalone too. Dangerous. |
-| `allowCommands` / `GOAL_GUARD_ALLOW_COMMANDS` | `[]` | Custom **allow-list**: a bash command matching ANY of these JS regex patterns is never blocked, whatever the analyzer thinks. Array, or a comma/newline-separated string (env). |
-| `extraDestructive` / `GOAL_GUARD_EXTRA_DESTRUCTIVE` | `[]` | Custom **deny-list**: a bash command matching ANY of these JS regex patterns is treated as destructive, extending the built-in analyzer with your own rules. |
+| `yolo` / `GOAL_GUARD_YOLO` | `false` | **YOLO mode.** Relax the soft gates — network-exec blocking, completion enforcement, the Goal-only subagent lock, and block toasts. Destructive guarding stays on unless `allowDestructive` is also set. Any key you set explicitly still wins. |
+| `allowDestructive` / `GOAL_GUARD_ALLOW_DESTRUCTIVE` | `false` | Turn **off** destructive-command guarding. With `yolo: true` this is full YOLO — nothing is blocked. Works on its own as well. Use with care. |
+| `allowCommands` / `GOAL_GUARD_ALLOW_COMMANDS` | `[]` | Allow-list: a bash command matching any of these JavaScript regular expressions is never blocked. Array, or a comma- or newline-separated string for the env var. |
+| `extraDestructive` / `GOAL_GUARD_EXTRA_DESTRUCTIVE` | `[]` | Deny-list: a bash command matching any of these JavaScript regular expressions is treated as destructive, extending the built-in analyzer. |
 
-### YOLO mode
+### YOLO mode and per-command rules
 
-Every gate is individually tunable, but YOLO is the one-switch escape hatch:
+Every gate is individually tunable, and YOLO mode is the one-switch escape hatch:
 
 ```jsonc
-// opencode.json — never blocks or asks for anything (ALL rights):
+// opencode.json — never blocks anything (full rights):
 ["./plugins/goal-guard.js", { "yolo": true, "allowDestructive": true }]
 ```
 
 ```bash
-# Or via env (e.g. for a throwaway sandbox):
+# Or via environment, for a throwaway sandbox:
 GOAL_GUARD_YOLO=1 GOAL_GUARD_ALLOW_DESTRUCTIVE=1 opencode
 ```
 
-- `yolo: true` alone → no completion gating, no subagent lock, no network-exec block, no toasts — but a destructive `rm -rf /` is **still** stopped.
-- add `allowDestructive: true` → that last guard drops too: full YOLO.
-- Prefer surgical control? Leave YOLO off and use `allowCommands` (whitelist exactly the commands you want to wave through) and/or `extraDestructive` (block extra ones), e.g. `{ "allowCommands": ["^docker compose ", "^rm -rf \\./tmp/"] }`.
+- `yolo: true` alone removes completion gating, the subagent lock, network-exec
+  blocking, and toasts — but a destructive `rm -rf /` is **still** stopped.
+- Add `allowDestructive: true` and that last guard drops too: full YOLO.
+- For surgical control, leave YOLO off and use `allowCommands` to wave specific
+  commands through, or `extraDestructive` to block additional ones, for example
+  `{ "allowCommands": ["^docker compose ", "^rm -rf \\./tmp/"] }`.
+
+YOLO only relaxes keys you did not set explicitly, so a per-key option always wins.
 
-**Don't guess — use the tool.** `goal-config` (installed as `opencode-goal-mode-config`, or `node scripts/goal-config.mjs` in the repo) lists every key, explains how to set one, ships paste-ready recipes, and previews the resolved config:
+**Do not guess — use the tool.** `goal-config` (installed as
+`opencode-goal-mode-config`, or `node scripts/goal-config.mjs` from the repository)
+lists every key, explains how to set one, ships paste-ready recipes, and previews the
+resolved configuration:
 
 ```bash
-opencode-goal-mode-config list                         # every key: default, env var, what it does
-opencode-goal-mode-config recipe full-yolo             # paste-ready opencode.json snippet
-opencode-goal-mode-config effective '{"yolo":true}' --diff   # confirm what it resolves to
+opencode-goal-mode-config list                              # every key: default, env var, effect
+opencode-goal-mode-config recipe full-yolo                  # a paste-ready opencode.json snippet
+opencode-goal-mode-config effective '{"yolo":true}' --diff  # confirm what it resolves to
 ```
 
-The customization skill and the `/goal-mode-customize` command (both installed alongside the plugin) walk the agent through the discover → apply → verify loop on top of this tool.
+The customization skill and the `/goal-mode-customize` command, both installed
+alongside the plugin, walk the agent through a discover, apply, then verify loop built
+on this tool.
 
-**Slash commands:** `/goal`, `/goal-contract`, `/goal-review`, `/goal-evidence-map`,
-`/goal-status`, `/goal-repair`, `/goal-final`.
+**Slash commands:** `/goal`, `/goal-contract`, `/goal-review`, `/goal-evidence`,
+`/goal-evidence-map`, `/goal-status`, `/goal-repair`, `/goal-reset`, `/goal-final`,
+`/goal-mode-customize`.
 
 **Tools the model can call:** `goal_contract`, `goal_evidence`, `goal_evidence_map`,
 `goal_reviewer_memory`, `goal_status`, `goal_reset`.
 
 ## Troubleshooting
 
-- **`opencode agent list` doesn't show `goal`?** The agents didn't land where OpenCode
+- **`opencode agent list` does not show `goal`.** The agents did not land where OpenCode
   reads them — re-run `opencode-goal-mode --global` and restart OpenCode.
-- **No sidebar todo section?** TUI plugins load from `tui.json`, not `plugins/`. Confirm
-  `~/.config/opencode/tui.json` lists `opencode-goal-mode`, then fully restart OpenCode.
-  The sidebar is experimental and only shows inside a Goal session with a goal set;
-  enforcement works regardless of the sidebar.
-- **Reviews didn't kick off on their own?** Upgrade to **v0.6.9+**. After you stop
-  with work done, the guard automatically retries if the session is still busy —
-  you should **not** need to type "continue?". Reviewer subtasks launch **in parallel**
-  on the goal session (v0.6.11+) and the guard starts the next assistant turn with
-  fixes or completion — never as a fake user message (v0.6.10+).
-- **Explorer subagent prompting on basic shell?** Upgrade to v0.6.7+ — read-only
-  commands like `grep`, `cat`, and `sed` are pre-approved on `goal-explorer`.
-- **Goal agent stalling on Questions?** The primary `goal` agent has `question: deny`
-  (v0.6.7+); record assumptions in the Goal Contract instead.
-- **Goal Mode vanished after I opened the todo panel / switched agents?** Switching the
-  session off the `goal` agent (to Build/Plan, or via an action that cycles the agent)
-  intentionally pauses Goal Mode — the guard shows a toast and stops treating that turn
-  as a goal. Your Goal Contract, reviews, and evidence are preserved. Switch back to the
-  `goal` agent (or run `/goal`) to resume; the session re-activates with all state intact.
-- **Programmatic review not firing in headless `opencode serve`?** The idle watcher
-  reconnects on transient SSE drops (v0.6.12+) and warns if it cannot start; check the
-  server log for `goal-guard.watcher.*` events.
-- **A safe command got blocked?** Run `node benchmarks/external.mjs --json` to see how the
-  analyzer reads it, set `blockDestructive: false` for that project, and please
+- **No sidebar todo section.** TUI plugins load from `tui.json`, not the `plugins/`
+  directory. Confirm `~/.config/opencode/tui.json` lists `opencode-goal-mode`, then fully
+  restart OpenCode. The sidebar is experimental and only appears inside a Goal session
+  with a goal set; enforcement works regardless of the sidebar.
+- **Reviews did not start on their own.** After you stop with work done, the guard
+  retries automatically while the session is still busy, so you should not need to type
+  "continue". Reviewer subtasks launch on the goal session, and the guard starts the next
+  assistant turn with fixes or completion rather than as a synthetic user message.
+- **The explorer subagent prompts on basic shell commands.** Read-only commands such as
+  `grep`, `cat`, and `sed -n` are pre-approved on `goal-explorer`.
+- **The goal agent stalls waiting on a question.** The primary `goal` agent sets
+  `question: deny`; it records assumptions in the Goal Contract and keeps working instead
+  of pausing.
+- **Goal Mode stopped after switching agents.** Switching the session off the `goal`
+  agent — to Build or Plan, or through an action that cycles the agent — intentionally
+  pauses Goal Mode; the guard shows a toast and stops treating that turn as a goal. Your
+  contract, reviews, and evidence are preserved. Switch back to the `goal` agent, or run
+  `/goal`, to resume with all state intact.
+- **A safe command was blocked.** Inspect how the analyzer reads it with
+  `node benchmarks/external.mjs --json`, allow it for that project with
+  `allowCommands`, and please
   [open an issue](https://github.com/devinoldenburg/opencode-goal-mode/issues).
 
 ## Good to know
 
-- **Requirements:** Node 20.11+, OpenCode configured to load local agents/commands/
-  plugins (tested against `@opencode-ai/plugin` 1.17.6, compatible with the 1.15+ hook
-  surface), and a working provider/model. Agents inherit your OpenCode default model.
-- **Safety:** The installer copies `agents/*.md`, `commands/*.md`, and `plugins/`,
-  merge-registers the sidebar in `tui.json`, and writes a manifest — never auth
-  files, tokens, or provider config. The guard is a guardrail, not a sandbox,
-  and fails open on input it can't parse; see [SECURITY.md](SECURITY.md) for the threat
-  model and a private reporting channel.
+- **Requirements.** Node 20.11 or newer, OpenCode configured to load local agents,
+  commands, and plugins (tested against `@opencode-ai/plugin` 1.17.6 and compatible with
+  the 1.15-and-later hook surface), and a working provider and model. Agents inherit your
+  OpenCode default model.
+- **Safety.** The installer copies `agents/`, `commands/`, `skills/`, and `plugins/`,
+  merge-registers the sidebar in `tui.json`, and writes a manifest. It never touches auth
+  files, tokens, or provider configuration. The guard is a guardrail, not a sandbox, and
+  fails closed on a parser error while failing open on genuinely unanalyzable input; see
+  [SECURITY.md](SECURITY.md) for the threat model and a private reporting channel.
 
 ## Contributing
 
-PRs welcome — [CONTRIBUTING.md](CONTRIBUTING.md) has the dev loop and release process,
-and [CHANGELOG.md](CHANGELOG.md) has the full history. Releases are automated and
-version-synced: one pushed `vX.Y.Z` tag runs the CI gate, publishes to npm, and creates
-the matching GitHub Release.
+Contributions are welcome. [CONTRIBUTING.md](CONTRIBUTING.md) covers the development
+loop and release process, and [CHANGELOG.md](CHANGELOG.md) records the full history.
+Releases are automated and version-synced: a single pushed `vX.Y.Z` tag runs the CI
+gate, publishes to npm, and creates the matching GitHub Release.
 
 ## License
 
-[MIT](LICENSE) · built for [OpenCode](https://opencode.ai).
+[MIT](LICENSE). Built for [OpenCode](https://opencode.ai).
+</content>
diff --git a/SECURITY.md b/SECURITY.md
index 9272b0e..8763375 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -20,8 +20,8 @@ so always upgrade to the newest version.
 
 | Version | Supported |
 | --- | --- |
-| Latest published release (`0.6.x`) | ✅ |
-| Older `0.x` releases | ❌ — upgrade to the latest |
+| Latest published release (`0.7.x`) | Yes |
+| Older `0.x` releases | No — upgrade to the latest |
 
 ## Reporting a vulnerability
 
@@ -47,9 +47,10 @@ OpenCode Goal Mode is a defense-in-depth tool for an AI coding agent. The
 `goal-guard` plugin blocks destructive and remote-execution shell commands using
 a quote-aware tokenizer, but it is **not a sandbox**:
 
-- The analyzer **fails open** on un-analyzable / highly dynamic commands,
-  deferring to OpenCode's own permission rules. Treat it as a guardrail, not a
-  jail.
+- The analyzer **fails open** on un-analyzable or highly dynamic commands it cannot
+  resolve to a concrete form, deferring to OpenCode's own permission rules. (A
+  genuine parser *error*, by contrast, fails **closed** — the command is treated as
+  destructive and blocked.) Treat the guard as a guardrail, not a jail.
 - Gate freshness is only as trustworthy as the reviewer subagents' verdicts.
 - The installer copies `agents/*.md`, `commands/*.md`, and the `plugins/` tree,
   merge-registers the sidebar in `tui.json`, and writes a manifest — never auth
@@ -64,18 +65,20 @@ welcome.
 The shell analyzer is a heuristic classifier, not a sandbox, so a measurable
 fraction of genuinely destructive commands are **not** blocked.
 
-- **Fail-open rate (~6.7%).** On the external corpus of **704 real third-party
-  commands** the analyzer detects **93.3%** of destructive commands, so roughly
-  **6.7%** are not blocked and fall through to OpenCode's own permission rules.
-  See [research/benchmarks.md](research/benchmarks.md) for the methodology.
+- **Fail-open rate (about 7.7%).** On the external corpus of **704 real
+  third-party commands** the analyzer detects **92.3%** of destructive commands, so
+  roughly **7.7%** (8 of the 104 destructive commands) are not blocked and fall
+  through to OpenCode's own permission rules. See
+  [research/benchmarks.md](research/benchmarks.md) for the methodology.
 - **What fails open.** The remaining misses are dominated by two categories:
   1. **Intentionally permitted forms** — a plain single-target `rm <file>`
-     (and `rm -i`/`-v`/`-d`) is not blocked by design; the guard targets
-     `rm -r`/`rm -f`, command-substitution / `bash -c` / interpreter deletes,
-     and remote exec.
-  2. **Un-analyzable / highly dynamic commands** — when the tokenizer cannot
-     resolve a concrete command (variable interpolation, runtime-built strings,
-     parse failures) it returns "not blocked" rather than guessing.
+     (and `rm -i`, `-v`, `-d`) is not blocked by design; the guard targets
+     `rm -r` and `rm -f`, command-substitution, `bash -c`, and interpreter
+     deletes, and remote execution.
+  2. **Un-analyzable or highly dynamic commands** — when the tokenizer cannot
+     resolve a concrete command (variable interpolation, runtime-built strings) it
+     returns "not blocked" rather than guessing. A parser *error*, distinct from an
+     un-resolvable command, instead fails closed and blocks.
 - **Recommended mitigations** for the un-blocked tail: keep OpenCode's own
   permission rules enabled, run the agent against a clean working tree under
   version control, and add repo-side guards (git pre-commit hooks, protected
diff --git a/docs/benchmarks/detection-by-family.svg b/docs/benchmarks/detection-by-family.svg
index 9b1f1c1..699db45 100644
--- a/docs/benchmarks/detection-by-family.svg
+++ b/docs/benchmarks/detection-by-family.svg
@@ -1 +1,37 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="720" height="380" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif" viewBox="0 0 720 380"><rect width="720" height="380" fill="#fff"/><text x="48" y="28" fill="#1f2328" font-size="17" font-weight="700">Detection by family — curated regression fixtures</text><text x="48" y="47" fill="#656d76" font-size="12">Curated patterns the analyzer is built to catch (not an unbiased sample). 48 destructive fixtures.</text><line x1="48" x2="700" y1="296" y2="296" stroke="#eaeef2" stroke-width="1"/><text x="40" y="300" fill="#656d76" font-size="11" text-anchor="end">0%</text><line x1="48" x2="700" y1="249.6" y2="249.6" stroke="#eaeef2" stroke-width="1"/><text x="40" y="253.6" fill="#656d76" font-size="11" text-anchor="end">20%</text><line x1="48" x2="700" y1="203.2" y2="203.2" stroke="#eaeef2" stroke-width="1"/><text x="40" y="207.2" fill="#656d76" font-size="11" text-anchor="end">40%</text><line x1="48" x2="700" y1="156.8" y2="156.8" stroke="#eaeef2" stroke-width="1"/><text x="40" y="160.8" fill="#656d76" font-size="11" text-anchor="end">60%</text><line x1="48" x2="700" y1="110.4" y2="110.4" stroke="#eaeef2" stroke-width="1"/><text x="40" y="114.4" fill="#656d76" font-size="11" text-anchor="end">80%</text><line x1="48" x2="700" y1="64" y2="64" stroke="#eaeef2" stroke-width="1"/><text x="40" y="68" fill="#656d76" font-size="11" text-anchor="end">100%</text><rect width="96.7" height="232" x="56" y="64" fill="#9aa0a6" rx="3"/><text x="104.3" y="59" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">100%</text><rect width="96.7" height="232" x="160.7" y="64" fill="#2da44e" rx="3"/><text x="209" y="59" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">100%</text><text x="156.7" y="314" fill="#1f2328" font-size="11" text-anchor="middle">Classic</text><rect width="96.7" height="0" x="273.3" y="296" fill="#9aa0a6" rx="3"/><text x="321.7" y="291" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">0%</text><rect width="96.7" height="232" x="378" y="64" fill="#2da44e" rx="3"/><text x="426.3" y="59" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">100%</text><text x="374" y="314" fill="#1f2328" font-size="11" text-anchor="middle">Obfuscated</text><rect width="96.7" height="0" x="490.7" y="296" fill="#9aa0a6" rx="3"/><text x="539" y="291" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">0%</text><rect width="96.7" height="232" x="595.3" y="64" fill="#2da44e" rx="3"/><text x="643.7" y="59" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">100%</text><text x="591.3" y="314" fill="#1f2328" font-size="11" text-anchor="middle">Remote exec</text><line x1="48" x2="700" y1="296" y2="296" stroke="#d0d7de" stroke-width="1.5"/><rect width="12" height="12" x="48" y="344" fill="#9aa0a6" rx="2"/><text x="66" y="354" fill="#1f2328" font-size="12">Legacy regex guard</text><rect width="12" height="12" x="201.6" y="344" fill="#2da44e" rx="2"/><text x="219.6" y="354" fill="#1f2328" font-size="12">Goal Mode analyzer</text></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" width="720" height="380" viewBox="0 0 720 380" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif">
+<rect width="720" height="380" fill="#ffffff"/>
+<text x="48" y="28" font-size="17" font-weight="700" fill="#1f2328">Detection by family — curated regression fixtures</text>
+<text x="48" y="47" font-size="12" fill="#656d76">Curated patterns the analyzer is built to catch (not an unbiased sample). 48 destructive fixtures.</text>
+<line x1="48" y1="296.0" x2="700" y2="296.0" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="300.0" font-size="11" text-anchor="end" fill="#656d76">0%</text>
+<line x1="48" y1="249.6" x2="700" y2="249.6" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="253.6" font-size="11" text-anchor="end" fill="#656d76">20%</text>
+<line x1="48" y1="203.2" x2="700" y2="203.2" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="207.2" font-size="11" text-anchor="end" fill="#656d76">40%</text>
+<line x1="48" y1="156.8" x2="700" y2="156.8" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="160.8" font-size="11" text-anchor="end" fill="#656d76">60%</text>
+<line x1="48" y1="110.4" x2="700" y2="110.4" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="114.4" font-size="11" text-anchor="end" fill="#656d76">80%</text>
+<line x1="48" y1="64.0" x2="700" y2="64.0" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="68.0" font-size="11" text-anchor="end" fill="#656d76">100%</text>
+<rect x="56.0" y="64.0" width="96.7" height="232.0" rx="3" fill="#9aa0a6"/>
+<text x="104.3" y="59.0" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">100%</text>
+<rect x="160.7" y="64.0" width="96.7" height="232.0" rx="3" fill="#2da44e"/>
+<text x="209.0" y="59.0" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">100%</text>
+<text x="156.7" y="314.0" font-size="11" text-anchor="middle" fill="#1f2328">Classic</text>
+<rect x="273.3" y="296.0" width="96.7" height="0.0" rx="3" fill="#9aa0a6"/>
+<text x="321.7" y="291.0" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">0%</text>
+<rect x="378.0" y="70.6" width="96.7" height="225.4" rx="3" fill="#2da44e"/>
+<text x="426.3" y="65.6" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">97%</text>
+<text x="374.0" y="314.0" font-size="11" text-anchor="middle" fill="#1f2328">Obfuscated</text>
+<rect x="490.7" y="296.0" width="96.7" height="0.0" rx="3" fill="#9aa0a6"/>
+<text x="539.0" y="291.0" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">0%</text>
+<rect x="595.3" y="64.0" width="96.7" height="232.0" rx="3" fill="#2da44e"/>
+<text x="643.7" y="59.0" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">100%</text>
+<text x="591.3" y="314.0" font-size="11" text-anchor="middle" fill="#1f2328">Remote exec</text>
+<line x1="48" y1="296" x2="700" y2="296" stroke="#d0d7de" stroke-width="1.5"/>
+<rect x="48" y="344" width="12" height="12" rx="2" fill="#9aa0a6"/>
+<text x="66" y="354" font-size="12" fill="#1f2328">Legacy regex guard</text>
+<rect x="201.6" y="344" width="12" height="12" rx="2" fill="#2da44e"/>
+<text x="219.6" y="354" font-size="12" fill="#1f2328">Goal Mode analyzer</text>
+</svg>
\ No newline at end of file
diff --git a/docs/benchmarks/external-scorecard.svg b/docs/benchmarks/external-scorecard.svg
index df3f18c..8635a4f 100644
--- a/docs/benchmarks/external-scorecard.svg
+++ b/docs/benchmarks/external-scorecard.svg
@@ -1 +1,32 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="720" height="380" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif" viewBox="0 0 720 380"><rect width="720" height="380" fill="#fff"/><text x="48" y="28" fill="#1f2328" font-size="17" font-weight="700">Guard accuracy on real third-party commands</text><text x="48" y="47" fill="#656d76" font-size="12">704 tldr-pages commands the analyzer was never fitted to. Detection higher = better; false positives lower = better.</text><line x1="48" x2="700" y1="296" y2="296" stroke="#eaeef2" stroke-width="1"/><text x="40" y="300" fill="#656d76" font-size="11" text-anchor="end">0%</text><line x1="48" x2="700" y1="249.6" y2="249.6" stroke="#eaeef2" stroke-width="1"/><text x="40" y="253.6" fill="#656d76" font-size="11" text-anchor="end">20%</text><line x1="48" x2="700" y1="203.2" y2="203.2" stroke="#eaeef2" stroke-width="1"/><text x="40" y="207.2" fill="#656d76" font-size="11" text-anchor="end">40%</text><line x1="48" x2="700" y1="156.8" y2="156.8" stroke="#eaeef2" stroke-width="1"/><text x="40" y="160.8" fill="#656d76" font-size="11" text-anchor="end">60%</text><line x1="48" x2="700" y1="110.4" y2="110.4" stroke="#eaeef2" stroke-width="1"/><text x="40" y="114.4" fill="#656d76" font-size="11" text-anchor="end">80%</text><line x1="48" x2="700" y1="64" y2="64" stroke="#eaeef2" stroke-width="1"/><text x="40" y="68" fill="#656d76" font-size="11" text-anchor="end">100%</text><rect width="151" height="124.9" x="56" y="171.1" fill="#9aa0a6" rx="3"/><text x="131.5" y="166.1" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">54%</text><rect width="151" height="216.4" x="215" y="79.6" fill="#2da44e" rx="3"/><text x="290.5" y="74.6" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">93%</text><text x="211" y="314" fill="#1f2328" font-size="11" text-anchor="middle">Detection rate</text><rect width="151" height=".4" x="382" y="295.6" fill="#9aa0a6" rx="3"/><text x="457.5" y="290.6" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">0%</text><rect width="151" height=".4" x="541" y="295.6" fill="#2da44e" rx="3"/><text x="616.5" y="290.6" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">0%</text><text x="537" y="314" fill="#1f2328" font-size="11" text-anchor="middle">False-positive rate</text><line x1="48" x2="700" y1="296" y2="296" stroke="#d0d7de" stroke-width="1.5"/><rect width="12" height="12" x="48" y="344" fill="#9aa0a6" rx="2"/><text x="66" y="354" fill="#1f2328" font-size="12">Legacy regex guard</text><rect width="12" height="12" x="201.6" y="344" fill="#2da44e" rx="2"/><text x="219.6" y="354" fill="#1f2328" font-size="12">Goal Mode analyzer</text></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" width="720" height="380" viewBox="0 0 720 380" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif">
+<rect width="720" height="380" fill="#ffffff"/>
+<text x="48" y="28" font-size="17" font-weight="700" fill="#1f2328">Guard accuracy on real third-party commands</text>
+<text x="48" y="47" font-size="12" fill="#656d76">704 tldr-pages commands the analyzer was never fitted to. Detection higher = better; false positives lower = better.</text>
+<line x1="48" y1="296.0" x2="700" y2="296.0" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="300.0" font-size="11" text-anchor="end" fill="#656d76">0%</text>
+<line x1="48" y1="249.6" x2="700" y2="249.6" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="253.6" font-size="11" text-anchor="end" fill="#656d76">20%</text>
+<line x1="48" y1="203.2" x2="700" y2="203.2" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="207.2" font-size="11" text-anchor="end" fill="#656d76">40%</text>
+<line x1="48" y1="156.8" x2="700" y2="156.8" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="160.8" font-size="11" text-anchor="end" fill="#656d76">60%</text>
+<line x1="48" y1="110.4" x2="700" y2="110.4" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="114.4" font-size="11" text-anchor="end" fill="#656d76">80%</text>
+<line x1="48" y1="64.0" x2="700" y2="64.0" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="68.0" font-size="11" text-anchor="end" fill="#656d76">100%</text>
+<rect x="56.0" y="171.1" width="151.0" height="124.9" rx="3" fill="#9aa0a6"/>
+<text x="131.5" y="166.1" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">54%</text>
+<rect x="215.0" y="81.8" width="151.0" height="214.2" rx="3" fill="#2da44e"/>
+<text x="290.5" y="76.8" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">92%</text>
+<text x="211.0" y="314.0" font-size="11" text-anchor="middle" fill="#1f2328">Detection rate</text>
+<rect x="382.0" y="295.6" width="151.0" height="0.4" rx="3" fill="#9aa0a6"/>
+<text x="457.5" y="290.6" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">0%</text>
+<rect x="541.0" y="294.1" width="151.0" height="1.9" rx="3" fill="#2da44e"/>
+<text x="616.5" y="289.1" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">1%</text>
+<text x="537.0" y="314.0" font-size="11" text-anchor="middle" fill="#1f2328">False-positive rate</text>
+<line x1="48" y1="296" x2="700" y2="296" stroke="#d0d7de" stroke-width="1.5"/>
+<rect x="48" y="344" width="12" height="12" rx="2" fill="#9aa0a6"/>
+<text x="66" y="354" font-size="12" fill="#1f2328">Legacy regex guard</text>
+<rect x="201.6" y="344" width="12" height="12" rx="2" fill="#2da44e"/>
+<text x="219.6" y="354" font-size="12" fill="#1f2328">Goal Mode analyzer</text>
+</svg>
\ No newline at end of file
diff --git a/docs/benchmarks/latency.svg b/docs/benchmarks/latency.svg
index 77ef0dd..54ec48e 100644
--- a/docs/benchmarks/latency.svg
+++ b/docs/benchmarks/latency.svg
@@ -1 +1,13 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="720" height="164" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif" viewBox="0 0 720 164"><rect width="720" height="164" fill="#fff"/><text x="20" y="28" fill="#1f2328" font-size="17" font-weight="700">Per-command analysis latency</text><text x="20" y="47" fill="#656d76" font-size="12">Microseconds to classify one command. Both are negligible for a tool-call guard.</text><text x="218" y="87" fill="#1f2328" font-size="12" text-anchor="end">Legacy regex guard</text><rect width="420" height="22" x="230" y="70" fill="#eaeef2" rx="3"/><rect width="217.1" height="22" x="230" y="70" fill="#9aa0a6" rx="3"/><text x="455.1" y="87" fill="#1f2328" font-size="12" font-weight="600">0.75 µs</text><text x="218" y="125" fill="#1f2328" font-size="12" text-anchor="end">Goal Mode analyzer</text><rect width="420" height="22" x="230" y="108" fill="#eaeef2" rx="3"/><rect width="300" height="22" x="230" y="108" fill="#2da44e" rx="3"/><text x="538" y="125" fill="#1f2328" font-size="12" font-weight="600">1.03 µs</text></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" width="720" height="164" viewBox="0 0 720 164" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif">
+<rect width="720" height="164" fill="#ffffff"/>
+<text x="20" y="28" font-size="17" font-weight="700" fill="#1f2328">Per-command analysis latency</text>
+<text x="20" y="47" font-size="12" fill="#656d76">Microseconds to classify one command. Both are negligible for a tool-call guard.</text>
+<text x="218" y="87" font-size="12" text-anchor="end" fill="#1f2328">Legacy regex guard</text>
+<rect x="230" y="70" width="420" height="22" rx="3" fill="#eaeef2"/>
+<rect x="230" y="70" width="178.0" height="22" rx="3" fill="#9aa0a6"/>
+<text x="416.0" y="87" font-size="12" font-weight="600" fill="#1f2328">0.80 µs</text>
+<text x="218" y="125" font-size="12" text-anchor="end" fill="#1f2328">Goal Mode analyzer</text>
+<rect x="230" y="108" width="420" height="22" rx="3" fill="#eaeef2"/>
+<rect x="230" y="108" width="300.0" height="22" rx="3" fill="#2da44e"/>
+<text x="538.0" y="125" font-size="12" font-weight="600" fill="#1f2328">1.35 µs</text>
+</svg>
\ No newline at end of file
diff --git a/docs/benchmarks/overall-scorecard.svg b/docs/benchmarks/overall-scorecard.svg
index f578e5e..660b33c 100644
--- a/docs/benchmarks/overall-scorecard.svg
+++ b/docs/benchmarks/overall-scorecard.svg
@@ -1 +1,32 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="720" height="380" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif" viewBox="0 0 720 380"><rect width="720" height="380" fill="#fff"/><text x="48" y="28" fill="#1f2328" font-size="17" font-weight="700">Curated fixtures — passes its own spec</text><text x="48" y="47" fill="#656d76" font-size="12">Curated regression fixtures, not measured accuracy. See external-scorecard.svg for the real-world number.</text><line x1="48" x2="700" y1="296" y2="296" stroke="#eaeef2" stroke-width="1"/><text x="40" y="300" fill="#656d76" font-size="11" text-anchor="end">0%</text><line x1="48" x2="700" y1="249.6" y2="249.6" stroke="#eaeef2" stroke-width="1"/><text x="40" y="253.6" fill="#656d76" font-size="11" text-anchor="end">20%</text><line x1="48" x2="700" y1="203.2" y2="203.2" stroke="#eaeef2" stroke-width="1"/><text x="40" y="207.2" fill="#656d76" font-size="11" text-anchor="end">40%</text><line x1="48" x2="700" y1="156.8" y2="156.8" stroke="#eaeef2" stroke-width="1"/><text x="40" y="160.8" fill="#656d76" font-size="11" text-anchor="end">60%</text><line x1="48" x2="700" y1="110.4" y2="110.4" stroke="#eaeef2" stroke-width="1"/><text x="40" y="114.4" fill="#656d76" font-size="11" text-anchor="end">80%</text><line x1="48" x2="700" y1="64" y2="64" stroke="#eaeef2" stroke-width="1"/><text x="40" y="68" fill="#656d76" font-size="11" text-anchor="end">100%</text><rect width="151" height="48.3" x="56" y="247.7" fill="#9aa0a6" rx="3"/><text x="131.5" y="242.7" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">21%</text><rect width="151" height="232" x="215" y="64" fill="#2da44e" rx="3"/><text x="290.5" y="59" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">100%</text><text x="211" y="314" fill="#1f2328" font-size="11" text-anchor="middle">Detection rate</text><rect width="151" height="50.4" x="382" y="245.6" fill="#9aa0a6" rx="3"/><text x="457.5" y="240.6" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">22%</text><rect width="151" height="0" x="541" y="296" fill="#2da44e" rx="3"/><text x="616.5" y="291" fill="#1f2328" font-size="11" font-weight="600" text-anchor="middle">0%</text><text x="537" y="314" fill="#1f2328" font-size="11" text-anchor="middle">False-positive rate</text><line x1="48" x2="700" y1="296" y2="296" stroke="#d0d7de" stroke-width="1.5"/><rect width="12" height="12" x="48" y="344" fill="#9aa0a6" rx="2"/><text x="66" y="354" fill="#1f2328" font-size="12">Legacy regex guard</text><rect width="12" height="12" x="201.6" y="344" fill="#2da44e" rx="2"/><text x="219.6" y="354" fill="#1f2328" font-size="12">Goal Mode analyzer</text></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" width="720" height="380" viewBox="0 0 720 380" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif">
+<rect width="720" height="380" fill="#ffffff"/>
+<text x="48" y="28" font-size="17" font-weight="700" fill="#1f2328">Curated fixtures — passes its own spec</text>
+<text x="48" y="47" font-size="12" fill="#656d76">Curated regression fixtures, not measured accuracy. See external-scorecard.svg for the real-world number.</text>
+<line x1="48" y1="296.0" x2="700" y2="296.0" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="300.0" font-size="11" text-anchor="end" fill="#656d76">0%</text>
+<line x1="48" y1="249.6" x2="700" y2="249.6" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="253.6" font-size="11" text-anchor="end" fill="#656d76">20%</text>
+<line x1="48" y1="203.2" x2="700" y2="203.2" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="207.2" font-size="11" text-anchor="end" fill="#656d76">40%</text>
+<line x1="48" y1="156.8" x2="700" y2="156.8" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="160.8" font-size="11" text-anchor="end" fill="#656d76">60%</text>
+<line x1="48" y1="110.4" x2="700" y2="110.4" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="114.4" font-size="11" text-anchor="end" fill="#656d76">80%</text>
+<line x1="48" y1="64.0" x2="700" y2="64.0" stroke="#eaeef2" stroke-width="1"/>
+<text x="40" y="68.0" font-size="11" text-anchor="end" fill="#656d76">100%</text>
+<rect x="56.0" y="247.7" width="151.0" height="48.3" rx="3" fill="#9aa0a6"/>
+<text x="131.5" y="242.7" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">21%</text>
+<rect x="215.0" y="68.8" width="151.0" height="227.2" rx="3" fill="#2da44e"/>
+<text x="290.5" y="63.8" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">98%</text>
+<text x="211.0" y="314.0" font-size="11" text-anchor="middle" fill="#1f2328">Detection rate</text>
+<rect x="382.0" y="245.6" width="151.0" height="50.4" rx="3" fill="#9aa0a6"/>
+<text x="457.5" y="240.6" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">22%</text>
+<rect x="541.0" y="296.0" width="151.0" height="0.0" rx="3" fill="#2da44e"/>
+<text x="616.5" y="291.0" font-size="11" font-weight="600" text-anchor="middle" fill="#1f2328">0%</text>
+<text x="537.0" y="314.0" font-size="11" text-anchor="middle" fill="#1f2328">False-positive rate</text>
+<line x1="48" y1="296" x2="700" y2="296" stroke="#d0d7de" stroke-width="1.5"/>
+<rect x="48" y="344" width="12" height="12" rx="2" fill="#9aa0a6"/>
+<text x="66" y="354" font-size="12" fill="#1f2328">Legacy regex guard</text>
+<rect x="201.6" y="344" width="12" height="12" rx="2" fill="#2da44e"/>
+<text x="219.6" y="354" font-size="12" fill="#1f2328">Goal Mode analyzer</text>
+</svg>
\ No newline at end of file
diff --git a/docs/benchmarks/results.json b/docs/benchmarks/results.json
index 00215f4..7685c03 100644
--- a/docs/benchmarks/results.json
+++ b/docs/benchmarks/results.json
@@ -15,14 +15,67 @@
       "destCaught": 56,
       "destTotal": 104,
       "safeFalsePos": 1,
-      "safeTotal": 600
+      "safeTotal": 600,
+      "misses": [
+        "blkdiscard -l 100MB /dev/device",
+        "blkdiscard -s /dev/device",
+        "blkdiscard /dev/device",
+        "curl https://sh.rustup.rs -sSf | sh -s",
+        "curl https://sh.rustup.rs -sSf | sh -s -- arguments",
+        "git filter-branch --env-filter 'GIT_AUTHOR_EMAIL=new_email' HEAD",
+        "git filter-branch --tree-filter 'rm -f file' HEAD",
+        "git filter-branch --tree-filter 'rm -rf folder' HEAD",
+        "parted -h",
+        "rm -d path/to/directory",
+        "rm -f .latest.bank.csv",
+        "rm -f path/to/file1 path/to/file2 ...",
+        "rm -i path/to/file1 path/to/file2 ...",
+        "rm -v $HOME/.cache/fuzzel",
+        "rm -v path/to/file1 path/to/file2 ...",
+        "rm /dev/sdXN",
+        "rm path/to/file1 path/to/file2 ...",
+        "rm torrent_id",
+        "sgdisk -L",
+        "srm -i \\*",
+        "srm -m path/to/file",
+        "srm -r -s path/to/directory",
+        "srm -s path/to/file",
+        "sudo fdisk -l",
+        "sudo fdisk /dev/sdX",
+        "sudo mkswap -c path/to/file",
+        "sudo mkswap -L label /dev/sdXY",
+        "sudo mkswap -s file_size -F path/to/swapfile",
+        "sudo mkswap -U clear|random|time|uuid_value",
+        "sudo mkswap path/to/file",
+        "sudo parted -l",
+        "sudo parted /dev/sdX",
+        "sudo parted /dev/sdX -s mklabel gpt mkpart \"boot_partition_name\" 0% 500MiB mkpart \"system_partition_name\" 500MiB 100%",
+        "sudo parted /dev/sdX mklabel aix|amiga|bsd|dvh|gpt|loop|mac|msdos|pc98|sun",
+        "sudo parted /dev/sdX set 1 boot on",
+        "sudo sgdisk -b /path/to/backup.gpt /dev/sdX",
+        "sudo sgdisk -d 1 /dev/sdX",
+        "sudo sgdisk -l /path/to/backup.gpt /dev/sdX",
+        "sudo sgdisk -m 1:2:3:4 /dev/sdX",
+        "sudo sgdisk -p /dev/sdX",
+        "sudo sgdisk -v /dev/sdX",
+        "sudo sgdisk -Z /dev/sdX",
+        "sudo wipefs -a -f /dev/sdX",
+        "sudo wipefs -a -n /dev/sdX",
+        "sudo wipefs -a /dev/sdX",
+        "sudo wipefs -a /dev/sdX*",
+        "sudo wipefs /dev/sdX",
+        "unlink path/to/file"
+      ],
+      "falsePositives": [
+        "git clean -i"
+      ]
     },
     "current": {
-      "detectionRate": 93.3,
-      "falsePositiveRate": 0.2,
-      "destCaught": 97,
+      "detectionRate": 92.3,
+      "falsePositiveRate": 0.8,
+      "destCaught": 96,
       "destTotal": 104,
-      "safeFalsePos": 1,
+      "safeFalsePos": 5,
       "safeTotal": 600,
       "misses": [
         "rm -d path/to/directory",
@@ -31,10 +84,15 @@
         "rm -v path/to/file1 path/to/file2 ...",
         "rm /dev/sdXN",
         "rm path/to/file1 path/to/file2 ...",
-        "rm torrent_id"
+        "rm torrent_id",
+        "unlink path/to/file"
       ],
       "falsePositives": [
-        "git filter-repo --path-rename path/to/folder/:"
+        "cryptsetup open --allow-discards /dev/sdXY mapping_name",
+        "dnf clean packages",
+        "docker rm container1 container2 ...",
+        "git filter-repo --path-rename path/to/folder/:",
+        "npm unpublish package_name -f"
       ]
     }
   },
@@ -75,13 +133,13 @@
           "safeFalsePos": 5
         }
       },
-      "opsPerSec": 1341168,
-      "usPerCommand": 0.75
+      "opsPerSec": 1246231,
+      "usPerCommand": 0.8
     },
     "current": {
-      "detectionRate": 100,
+      "detectionRate": 97.91666666666666,
       "falsePositiveRate": 0,
-      "destCaught": 48,
+      "destCaught": 47,
       "destTotal": 48,
       "safeFalsePos": 0,
       "safeTotal": 23,
@@ -94,7 +152,7 @@
         },
         "bypass": {
           "destTotal": 35,
-          "destCaught": 35,
+          "destCaught": 34,
           "safeTotal": 0,
           "safeFalsePos": 0
         },
@@ -111,8 +169,8 @@
           "safeFalsePos": 0
         }
       },
-      "opsPerSec": 970526,
-      "usPerCommand": 1.03
+      "opsPerSec": 739560,
+      "usPerCommand": 1.35
     }
   },
   "completionFixtures": {
diff --git a/docs/benchmarks/truthfulness-score.svg b/docs/benchmarks/truthfulness-score.svg
index a25f88f..93dd8bb 100644
--- a/docs/benchmarks/truthfulness-score.svg
+++ b/docs/benchmarks/truthfulness-score.svg
@@ -1 +1,17 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="720" height="202" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif" viewBox="0 0 720 202"><rect width="720" height="202" fill="#fff"/><text x="20" y="28" fill="#1f2328" font-size="17" font-weight="700">Completion-enforcement fixtures</text><text x="20" y="47" fill="#656d76" font-size="12">9 hand-authored policy cases (a spec, not a survey): premature claims blocked, valid ones allowed.</text><text x="218" y="87" fill="#1f2328" font-size="12" text-anchor="end">Truthfulness score</text><rect width="420" height="22" x="230" y="70" fill="#eaeef2" rx="3"/><rect width="420" height="22" x="230" y="70" fill="#2da44e" rx="3"/><text x="658" y="87" fill="#1f2328" font-size="12" font-weight="600">100.0%</text><text x="218" y="125" fill="#1f2328" font-size="12" text-anchor="end">Decision accuracy</text><rect width="420" height="22" x="230" y="108" fill="#eaeef2" rx="3"/><rect width="420" height="22" x="230" y="108" fill="#0969da" rx="3"/><text x="658" y="125" fill="#1f2328" font-size="12" font-weight="600">100.0%</text><text x="218" y="163" fill="#1f2328" font-size="12" text-anchor="end">Reason accuracy</text><rect width="420" height="22" x="230" y="146" fill="#eaeef2" rx="3"/><rect width="420" height="22" x="230" y="146" fill="#bf8700" rx="3"/><text x="658" y="163" fill="#1f2328" font-size="12" font-weight="600">100.0%</text></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" width="720" height="202" viewBox="0 0 720 202" font-family="-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif">
+<rect width="720" height="202" fill="#ffffff"/>
+<text x="20" y="28" font-size="17" font-weight="700" fill="#1f2328">Completion-enforcement fixtures</text>
+<text x="20" y="47" font-size="12" fill="#656d76">9 hand-authored policy cases (a spec, not a survey): premature claims blocked, valid ones allowed.</text>
+<text x="218" y="87" font-size="12" text-anchor="end" fill="#1f2328">Truthfulness score</text>
+<rect x="230" y="70" width="420" height="22" rx="3" fill="#eaeef2"/>
+<rect x="230" y="70" width="420.0" height="22" rx="3" fill="#2da44e"/>
+<text x="658.0" y="87" font-size="12" font-weight="600" fill="#1f2328">100.0%</text>
+<text x="218" y="125" font-size="12" text-anchor="end" fill="#1f2328">Decision accuracy</text>
+<rect x="230" y="108" width="420" height="22" rx="3" fill="#eaeef2"/>
+<rect x="230" y="108" width="420.0" height="22" rx="3" fill="#0969da"/>
+<text x="658.0" y="125" font-size="12" font-weight="600" fill="#1f2328">100.0%</text>
+<text x="218" y="163" font-size="12" text-anchor="end" fill="#1f2328">Reason accuracy</text>
+<rect x="230" y="146" width="420" height="22" rx="3" fill="#eaeef2"/>
+<rect x="230" y="146" width="420.0" height="22" rx="3" fill="#bf8700"/>
+<text x="658.0" y="163" font-size="12" font-weight="600" fill="#1f2328">100.0%</text>
+</svg>
\ No newline at end of file
diff --git a/research/benchmarks.md b/research/benchmarks.md
index b87c775..c111810 100644
--- a/research/benchmarks.md
+++ b/research/benchmarks.md
@@ -49,18 +49,20 @@ Representative run (sample of 704 commands: 104 destructive, 600 safe):
 
 | On real third-party commands | Legacy regex guard | Goal Mode analyzer |
 | --- | --- | --- |
-| Detection rate | 53.8% | **93.3%** |
-| False-positive rate | 0.2% | 0.2% |
+| Detection rate | 53.8% | **92.3%** (96 of 104 destructive) |
+| False-positive rate | 0.2% | 0.8% (5 of 600 safe) |
 
 Reading the result honestly:
 
-- The remaining Goal Mode misses are almost entirely un-flagged single-target
+- The eight remaining Goal Mode misses are almost entirely un-flagged single-target
   `rm <file>` (and `rm -i`/`-v`/`-d`), which the guard **intentionally permits**:
   it blocks `rm -r`/`rm -f`, command-substitution/`bash -c`/interpreter deletes,
   and remote exec, but not a plain single-file `rm`. Under the strict
   every-`rm`-is-destructive labeler these are counted as misses.
-- The one counted false positive (`git filter-repo …`) genuinely rewrites
-  history, so the real-world false-positive rate is effectively zero. Run
+- The five counted false positives are mostly debatable rather than wrong:
+  `npm unpublish … -f`, `git filter-repo …`, `docker rm …`, `cryptsetup open …`,
+  and `dnf clean packages` all genuinely remove or rewrite something, so flagging
+  them is arguably correct; the strict labeler simply marks them safe. Run
   `node benchmarks/external.mjs --json` to see the full miss / false-positive
   lists.
 - This benchmark directly drove real fixes: `mkfs.<fstype>` variants, `srm`, and