From 8e14b13cba074db9756af47fa82c8891ffa9861e Mon Sep 17 00:00:00 2001
From: Devin Oldenburg <devin@logicplanes.com>
Date: Sun, 21 Jun 2026 13:31:50 +0200
Subject: [PATCH 1/2] fix(core): state perms, contract validation, cleanup +
 unit-test coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- #72: write guard state files with mode 0600 (owner-only); they hold goal
  text/contract/evidence and were landing at the default umask (world-readable).
- #151: validate/coerce goal_contract string-array fields (acceptanceCriteria,
  requirements, inferred, nonGoals) via coerceStringList so a non-string element
  is dropped instead of stored as "[object Object]".
- #218: add persistence cleanup()/remove() to prune orphaned per-worktree state
  files by on-disk TTL (DEFAULT_PERSIST_TTL_MS = 7d), preserving the current
  worktree and fresh siblings; sweep stray .tmp files; ignore unrelated files.
- #244: prefix every blocked-completion rewrite with an unambiguous guard
  sentinel ("⛔ [Goal Guard · blocked completion]") so an intervention can't be
  confused with a benign "I am not done yet" message; marker flip is preserved.
- #262: deepen logger.js coverage (synthetic-turn edge cases, guardPrompt error
  path/return values, emitGoalCompleted format, best-effort info/warn/toast).
- #291: add direct unit tests for completion/events/system/summary/agents.
- #373: make plugin.test.mjs assert loudly with an ERR_MODULE_NOT_FOUND hint
  instead of a vague falsy check when the tool hook is absent.
- #374: add tests/concurrency.test.mjs proving concurrent goal sessions complete
  independently with no cross-contamination.
- #375: add case-insensitive last-wins parseVerdict coverage (no source change).
- #67: clean up temp dirs in install.test.mjs and persistence.test.mjs (t.after).

Note: guard.js is owned elsewhere; the persistence cleanup primitive is added and
unit-tested here, to be wired into the guard dispose/flush path by that owner.
---
 plugins/goal-guard/completion.js  |  18 ++++
 plugins/goal-guard/persistence.js | 127 +++++++++++++++++++++-
 plugins/goal-guard/tools.js       |  53 ++++++++-
 tests/agents-unit.test.mjs        | 100 +++++++++++++++++
 tests/completion.test.mjs         | 149 +++++++++++++++++++++++++
 tests/concurrency.test.mjs        | 126 ++++++++++++++++++++++
 tests/events.test.mjs             | 163 ++++++++++++++++++++++++++++
 tests/install.test.mjs            |  63 ++++++-----
 tests/logger.test.mjs             | 143 ++++++++++++++++++++++++
 tests/persistence.test.mjs        | 144 ++++++++++++++++++++++++-
 tests/plugin.test.mjs             |  13 ++-
 tests/summary.test.mjs            | 173 ++++++++++++++++++++++++++++++
 tests/system.test.mjs             | 118 ++++++++++++++++++++
 tests/tools.test.mjs              | 127 ++++++++++++++++++++++
 tests/verdicts.test.mjs           |  11 ++
 15 files changed, 1493 insertions(+), 35 deletions(-)
 create mode 100644 tests/agents-unit.test.mjs
 create mode 100644 tests/completion.test.mjs
 create mode 100644 tests/concurrency.test.mjs
 create mode 100644 tests/events.test.mjs
 create mode 100644 tests/summary.test.mjs
 create mode 100644 tests/system.test.mjs
 create mode 100644 tests/tools.test.mjs

diff --git a/plugins/goal-guard/completion.js b/plugins/goal-guard/completion.js
index e0b4244..27b9a7d 100644
--- a/plugins/goal-guard/completion.js
+++ b/plugins/goal-guard/completion.js
@@ -33,6 +33,19 @@ import { summarizeState } from "./summary.js";
 
 const CYCLES_RE = /Review cycles:\s*(\d+)/gi;
 
+/**
+ * Unambiguous guard sentinel prefixed to every blocked-completion rewrite (issue
+ * #244). The blocked MARKER itself (`Goal Not Completed` by default) is textually
+ * identical to a normal "I am honestly not done yet" agent sentence, so a user
+ * skimming the transcript cannot tell a guard intervention from benign agent
+ * prose. This sentinel is something ONLY the guard emits — it carries the
+ * bracketed `[Goal Guard]` namespace plus a stop glyph and the word "blocked" — so
+ * `rg "Goal Guard · blocked"` (or the bracket) finds exactly the interventions
+ * and never an ordinary message. It is emitted on its OWN leading line, ABOVE the
+ * (still-flipped) marker line, so existing marker/markdown-prefix detection is
+ * unaffected. */
+export const GUARD_BLOCK_SENTINEL = "⛔ [Goal Guard · blocked completion]";
+
 // Invisible / formatting code points that carry no glyph and must never break up
 // or hide the marker: zero-width space/joiners, bidi controls, word-joiner, soft
 // hyphen, BOM.
@@ -136,6 +149,11 @@ export function evaluateCompletionClaim(state, config, text) {
   if (replacement === text) {
     replacement = `${blockedMarker}\n\n${replacement}`;
   }
+  // Prefix the unambiguous guard sentinel on its OWN leading line so the rewrite is
+  // visibly a guard artifact and searchable, never conflated with a normal "I am not
+  // done yet" agent message (issue #244). The marker line itself is left flipped in
+  // place below it, so existing marker/markdown-prefix detection still holds.
+  replacement = `${GUARD_BLOCK_SENTINEL}\n${replacement}`;
   replacement += `\n\nGoal Guard blocked completion: ${reason}. State: ${summary}`;
   return { blocked: true, reason, replacement, claimedCycles };
 }
diff --git a/plugins/goal-guard/persistence.js b/plugins/goal-guard/persistence.js
index 93b3498..f5033d8 100644
--- a/plugins/goal-guard/persistence.js
+++ b/plugins/goal-guard/persistence.js
@@ -14,10 +14,22 @@
  */
 
 import { createHash } from "node:crypto";
-import { mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync } from "node:fs";
+import { mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync, readdirSync, statSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 
+/**
+ * On-disk retention for orphaned state files (issue #218). The in-memory
+ * `sessionTtlMs` only governs when an in-memory record may be dropped; it does
+ * NOT remove the per-worktree file from disk. Without an independent on-disk TTL,
+ * `$XDG_STATE_HOME/opencode/goal-guard/` accumulates one `<hash>.json` file per
+ * worktree the user has EVER opened a goal in, holding the verbatim goal text,
+ * the contract, and the evidence — forever. Default is 7 days, deliberately
+ * longer than the 24h in-memory TTL so a long-paused goal is not pruned while a
+ * sibling worktree is still actively saving.
+ */
+export const DEFAULT_PERSIST_TTL_MS = 7 * 24 * 60 * 60 * 1000;
+
 /** Resolve the base directory for guard state files. */
 export function stateBaseDir(env = process.env) {
   const xdg = env.XDG_STATE_HOME && env.XDG_STATE_HOME.trim();
@@ -41,6 +53,7 @@ export function projectKey(worktree) {
  * @param {Record<string,string|undefined>} [opts.env]
  * @param {(fn: () => void, ms: number) => any} [opts.setTimer]  Injectable for tests.
  * @param {(handle: any) => void} [opts.clearTimer]
+ * @param {number} [opts.persistTtlMs]  On-disk TTL for orphaned files (issue #218).
  */
 export function createPersistence({
   worktree,
@@ -49,11 +62,13 @@ export function createPersistence({
   env = process.env,
   setTimer = (fn, ms) => setTimeout(fn, ms),
   clearTimer = (h) => clearTimeout(h),
+  persistTtlMs = DEFAULT_PERSIST_TTL_MS,
   fs: fsImpl,
 } = {}) {
-  // Injectable fs (tests exercise the EXDEV path); defaults to node:fs.
+  // Injectable fs (tests exercise the EXDEV/cleanup paths); defaults to node:fs.
   const fs = {
     mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync,
+    readdirSync, statSync,
     ...(fsImpl || {}),
   };
   const dir = stateBaseDir(env);
@@ -79,7 +94,13 @@ export function createPersistence({
     if (!enabled || degraded) return false;
     try {
       fs.mkdirSync(dir, { recursive: true });
-      fs.writeFileSync(tmp, JSON.stringify(data), "utf8");
+      // mode 0600 (owner read/write only): the snapshot holds the verbatim goal
+      // text, the contract, the changed-file paths, the verification evidence and
+      // the reviewer findings — all of which would otherwise land at the default
+      // umask (typically 0644, world-readable) and leak to any local user on a
+      // shared/multi-user host (issue #72). The mode is set on the temp file BEFORE
+      // the rename so the target is never momentarily world-readable.
+      fs.writeFileSync(tmp, JSON.stringify(data), { encoding: "utf8", mode: 0o600 });
       try {
         fs.renameSync(tmp, file);
       } catch (err) {
@@ -141,11 +162,111 @@ export function createPersistence({
     return writeNow(fn()); // true on success, false on a real write failure
   }
 
+  /**
+   * Prune orphaned state files (issue #218).
+   *
+   * The store is keyed one file per worktree, so the directory grows unbounded:
+   * one `<hash>.json` per worktree the user has ever opened a goal in, never
+   * removed. This sweep deletes any OTHER worktree's file (never the current
+   * one) when it is stale, where "stale" means EITHER:
+   *   - its mtime is older than `persistTtlMs` (default 7 days), OR
+   *   - its embedded `updatedAt` is older than `persistTtlMs`.
+   * The current worktree's file is always preserved so an in-flight goal is never
+   * dropped out from under a live session. Also removes the leftover `.tmp` files.
+   *
+   * All disk access is best-effort: a read-only/sandboxed FS, a missing dir, or an
+   * unreadable entry degrades to a no-op rather than throwing into a hook.
+   *
+   * @param {object} [opts]
+   * @param {number} [opts.now]  Injectable wall clock (ms) for tests.
+   * @returns {{ removed: string[], scanned: number }}
+   */
+  function cleanup({ now = Date.now() } = {}) {
+    const removed = [];
+    if (!enabled || degraded) return { removed, scanned: 0 };
+    let entries;
+    try {
+      entries = fs.readdirSync(dir);
+    } catch {
+      return { removed, scanned: 0 }; // dir does not exist yet → nothing to prune
+    }
+    const cutoff = now - persistTtlMs;
+    const currentName = `${projectKey(worktree)}.json`;
+    let scanned = 0;
+    for (const name of entries) {
+      // Only touch our own JSON snapshots (and their temp siblings); never any
+      // unrelated file a user may have placed in the dir.
+      const isTmp = name.endsWith(".json.tmp");
+      const isJson = name.endsWith(".json");
+      if (!isTmp && !isJson) continue;
+      // Never prune the file backing the CURRENT worktree — it may be actively saving.
+      if (name === currentName) continue;
+      scanned += 1;
+      const full = join(dir, name);
+      // A stray temp file is always removable (an interrupted write left it behind).
+      if (isTmp) {
+        try {
+          fs.rmSync(full, { force: true });
+          removed.push(full);
+        } catch {
+          /* ignore */
+        }
+        continue;
+      }
+      let stale = false;
+      try {
+        const st = fs.statSync(full);
+        const mtime = st.mtimeMs !== undefined ? st.mtimeMs : (st.mtime ? new Date(st.mtime).getTime() : 0);
+        if (mtime < cutoff) stale = true;
+        if (!stale) {
+          // mtime is recent, but the embedded updatedAt may still be ancient (the
+          // file was touched without a logical update). Parse and check it too.
+          try {
+            const parsed = JSON.parse(fs.readFileSync(full, "utf8"));
+            const ts = parsed && parsed.updatedAt ? new Date(parsed.updatedAt).getTime() : NaN;
+            if (Number.isFinite(ts) && ts < cutoff) stale = true;
+          } catch {
+            /* unparseable → leave the mtime decision stand */
+          }
+        }
+      } catch {
+        continue; // unreadable → skip rather than risk deleting something live
+      }
+      if (!stale) continue;
+      try {
+        fs.rmSync(full, { force: true });
+        removed.push(full);
+      } catch {
+        /* ignore */
+      }
+    }
+    return { removed, scanned };
+  }
+
+  /** Remove the current worktree's state file (and its temp sibling). Used on a
+   * full goal_reset / dispose when the caller wants the on-disk snapshot gone too. */
+  function remove() {
+    if (!enabled) return false;
+    let ok = false;
+    for (const target of [file, tmp]) {
+      try {
+        fs.rmSync(target, { force: true });
+        ok = true;
+      } catch {
+        /* ignore */
+      }
+    }
+    return ok;
+  }
+
   return {
     file,
+    dir,
     load,
     save,
     flush,
+    cleanup,
+    remove,
     isDegraded: () => degraded,
   };
 }
diff --git a/plugins/goal-guard/tools.js b/plugins/goal-guard/tools.js
index 9a5cf9b..15c5969 100644
--- a/plugins/goal-guard/tools.js
+++ b/plugins/goal-guard/tools.js
@@ -57,6 +57,34 @@ export function goalSimilarity(a, b) {
 // same goal that PRESERVES accrued review progress.
 export const SAME_GOAL_THRESHOLD = 0.4;
 
+/**
+ * Coerce an array of contract strings (acceptance criteria, requirements, …) into
+ * a clean list of non-empty strings (issue #151).
+ *
+ * The tool schema declares these as `array(string())`, but a model can still pass
+ * a malformed element — an object, a number, null — which previously flowed
+ * straight into storage. `String({foo:'bar'})` is the literal "[object Object]",
+ * which then rendered as a garbled todo row in the sidebar and as a meaningless
+ * acceptance criterion in the contract. Stringify the elements that have a sensible
+ * string form (string/number/boolean), and DROP the ones that do not (objects,
+ * arrays, null/undefined) rather than persisting "[object Object]". Each survivor
+ * is whitespace-collapsed and empties are removed.
+ */
+export function coerceStringList(value) {
+  if (!Array.isArray(value)) return [];
+  const out = [];
+  for (const item of value) {
+    let str;
+    if (typeof item === "string") str = item;
+    else if (typeof item === "number" && Number.isFinite(item)) str = String(item);
+    else if (typeof item === "boolean") str = String(item);
+    else continue; // object/array/null/undefined/NaN → has no meaningful string form
+    const trimmed = str.replace(/\s+/g, " ").trim();
+    if (trimmed) out.push(trimmed);
+  }
+  return out;
+}
+
 /**
  * @param {object} deps
  * @param {ReturnType<import("./state.js").createStore>} deps.store
@@ -193,6 +221,16 @@ export function createGoalTools({ store, config, persist, onReset }) {
         const state = store.stateFor(key);
         if (!requireGoalMode(state)) return goalModeOnlyResult();
         state.active = true;
+        // Validate/coerce the string-array fields up front (issue #151): a model can
+        // pass a non-string element (object/number/null) that the schema does not
+        // catch, which otherwise stored "[object Object]" as an acceptance criterion
+        // and rendered a garbled sidebar todo. coerceStringList drops the unstringifiable
+        // ones and normalizes the rest. Use the cleaned lists for BOTH the similarity
+        // corpus and the stored contract so the two never disagree.
+        const cleanRequirements = coerceStringList(args.requirements);
+        const cleanInferred = coerceStringList(args.inferred);
+        const cleanNonGoals = coerceStringList(args.nonGoals);
+        const cleanAcceptanceCriteria = coerceStringList(args.acceptanceCriteria);
         // A genuinely NEW goal in this session (a different verbatim request) must
         // not inherit the previous goal's gates, verdicts, dirty flags, review
         // cycles, or accumulated goal text — otherwise the TUI sidebar keeps showing
@@ -222,7 +260,12 @@ export function createGoalTools({ store, config, persist, onReset }) {
             [c?.original, c?.title, ...(c?.requirements || []), ...(c?.acceptanceCriteria || [])]
               .filter(Boolean)
               .join(" ");
-          const incoming = contractCorpus(args);
+          const incoming = contractCorpus({
+            original: args.original,
+            title: args.title,
+            requirements: cleanRequirements,
+            acceptanceCriteria: cleanAcceptanceCriteria,
+          });
           const existing = contractCorpus(state.contract);
           const sim = goalSimilarity(incoming, existing);
           const isNewGoal = sim < 0 ? true : sim < SAME_GOAL_THRESHOLD;
@@ -234,10 +277,10 @@ export function createGoalTools({ store, config, persist, onReset }) {
         state.contract = {
           title: String(args.title || "").replace(/\s+/g, " ").trim(),
           original: String(args.original || ""),
-          requirements: args.requirements || [],
-          inferred: args.inferred || [],
-          nonGoals: args.nonGoals || [],
-          acceptanceCriteria: args.acceptanceCriteria || [],
+          requirements: cleanRequirements,
+          inferred: cleanInferred,
+          nonGoals: cleanNonGoals,
+          acceptanceCriteria: cleanAcceptanceCriteria,
           at: store.nowIso(),
         };
         // Append the contract's original to the rolling goal text used for
diff --git a/tests/agents-unit.test.mjs b/tests/agents-unit.test.mjs
new file mode 100644
index 0000000..c0bf71b
--- /dev/null
+++ b/tests/agents-unit.test.mjs
@@ -0,0 +1,100 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  isReviewAgent,
+  isGoalAgent,
+  isPrimaryAgent,
+  goalSessionActiveForAgent,
+  prettyAgentName,
+  PRIMARY_AGENT,
+  REVIEW_AGENTS,
+  GOAL_AGENTS,
+  CYCLE_CLOSING_AGENT,
+  BASE_GATES,
+} from "../plugins/goal-guard/agents.js";
+import { createState } from "../plugins/goal-guard/state.js";
+
+// Direct unit coverage for agents.js classifiers (issue #291). Named
+// agents-unit.test.mjs because tests/agents.test.mjs validates the agent .md files.
+
+test("#291 isReviewAgent: true for review gates, false for workers/unknown/empty", () => {
+  assert.equal(isReviewAgent("goal-reviewer"), true);
+  assert.equal(isReviewAgent("goal-final-auditor"), true);
+  assert.equal(isReviewAgent("goal-implementer"), false, "a worker is not a reviewer");
+  assert.equal(isReviewAgent("goal"), false, "the primary is not a reviewer");
+  assert.equal(isReviewAgent("totally-unknown"), false);
+  assert.equal(isReviewAgent(""), false);
+  assert.equal(isReviewAgent(null), false);
+  assert.equal(isReviewAgent(undefined), false);
+});
+
+test("#291 isGoalAgent: true for primary/workers/reviewers, false for build/plan/unknown", () => {
+  assert.equal(isGoalAgent("goal"), true);
+  assert.equal(isGoalAgent("goal-explorer"), true);
+  assert.equal(isGoalAgent("goal-security-reviewer"), true);
+  assert.equal(isGoalAgent("build"), false);
+  assert.equal(isGoalAgent("plan"), false);
+  assert.equal(isGoalAgent("unknown"), false);
+  assert.equal(isGoalAgent(null), false);
+});
+
+test("#291 isPrimaryAgent: only the exact `goal` agent is primary", () => {
+  assert.equal(isPrimaryAgent("goal"), true);
+  assert.equal(isPrimaryAgent(PRIMARY_AGENT), true);
+  assert.equal(isPrimaryAgent("goal-implementer"), false);
+  assert.equal(isPrimaryAgent("goal-reviewer"), false);
+  assert.equal(isPrimaryAgent(""), false);
+  assert.equal(isPrimaryAgent(null), false);
+});
+
+test("#291 every BASE_GATE and CYCLE_CLOSING_AGENT is a known review agent", () => {
+  for (const gate of BASE_GATES) assert.equal(isReviewAgent(gate), true, `${gate} must be a review agent`);
+  assert.equal(isReviewAgent(CYCLE_CLOSING_AGENT), true);
+  // Sanity: the canonical lists are internally consistent.
+  for (const r of REVIEW_AGENTS) assert.ok(GOAL_AGENTS.includes(r), `${r} should be in GOAL_AGENTS`);
+});
+
+test("#291 goalSessionActiveForAgent: the primary agent always activates, even with null state", () => {
+  assert.equal(goalSessionActiveForAgent("goal", null), true);
+  assert.equal(goalSessionActiveForAgent("goal", undefined), true);
+});
+
+test("#291 goalSessionActiveForAgent: non-goal agents never activate, regardless of state", () => {
+  const st = createState();
+  st.active = true;
+  st.contract = { title: "x" };
+  assert.equal(goalSessionActiveForAgent("build", st), false);
+  assert.equal(goalSessionActiveForAgent("plan", st), false);
+  assert.equal(goalSessionActiveForAgent("unknown", st), false);
+});
+
+test("#291 goalSessionActiveForAgent: a goal worker keeps an ANCHORED session active, not a blank one", () => {
+  const blank = createState();
+  assert.equal(goalSessionActiveForAgent("goal-implementer", blank), false, "no anchored work → not active");
+  assert.equal(goalSessionActiveForAgent("goal-implementer", null), false, "null state → not active");
+
+  const withContract = createState();
+  withContract.contract = { title: "t" };
+  assert.equal(goalSessionActiveForAgent("goal-implementer", withContract), true);
+
+  const withEdit = createState();
+  withEdit.lastEditSeq = 5;
+  assert.equal(goalSessionActiveForAgent("goal-explorer", withEdit), true);
+
+  const withEvidence = createState();
+  withEvidence.evidence = [{ command: "x" }];
+  assert.equal(goalSessionActiveForAgent("goal-verifier", withEvidence), true);
+
+  const withFiles = createState();
+  withFiles.changedFiles = ["a.js"];
+  assert.equal(goalSessionActiveForAgent("goal-architect", withFiles), true);
+});
+
+test("#291 prettyAgentName: drops the goal- prefix, title-cases, keeps acronyms upper", () => {
+  assert.equal(prettyAgentName("goal-security-reviewer"), "Security Reviewer");
+  assert.equal(prettyAgentName("goal-api-reviewer"), "API Reviewer");
+  assert.equal(prettyAgentName("goal-ux-reviewer"), "UX Reviewer");
+  assert.equal(prettyAgentName("goal-final-auditor"), "Final Auditor");
+  assert.equal(prettyAgentName(""), "");
+  assert.equal(prettyAgentName(null), "");
+});
diff --git a/tests/completion.test.mjs b/tests/completion.test.mjs
new file mode 100644
index 0000000..541b5f3
--- /dev/null
+++ b/tests/completion.test.mjs
@@ -0,0 +1,149 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { evaluateCompletionClaim, GUARD_BLOCK_SENTINEL } from "../plugins/goal-guard/completion.js";
+import { createStore, createState } from "../plugins/goal-guard/state.js";
+import { recordVerdict } from "../plugins/goal-guard/verdicts.js";
+import { markEdit } from "../plugins/goal-guard/events.js";
+import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js";
+import { BASE_GATES } from "../plugins/goal-guard/agents.js";
+
+// Direct unit coverage for completion.js (issue #291) — previously only exercised
+// through the experimental.text.complete hook.
+
+// A config with NO contextual gates so the required set is exactly BASE_GATES.
+const CFG = { ...DEFAULT_CONFIG, contextualGates: false };
+
+/** Build an active goal state whose base gates have all PASSED with fresh seqs and
+ *  the requested number of recorded review cycles, so completion is ALLOWED. */
+function allowedState(cycles = 1) {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  st.goalText = "do a generic thing";
+  // Pass every base gate; the final-auditor pass is what increments reviewCycles.
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  // Drive reviewCycles up to the requested count with intervening edits.
+  while (st.reviewCycles < cycles) {
+    markEdit(store, st, "more work");
+    for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  }
+  st.dirty = false;
+  return { store, st };
+}
+
+test("#291 evaluateCompletionClaim: empty/no text is not blocked", () => {
+  assert.deepEqual(evaluateCompletionClaim(createState(), CFG, ""), { blocked: false });
+  assert.deepEqual(evaluateCompletionClaim(createState(), CFG, null), { blocked: false });
+});
+
+test("#291 a marker claim in an INACTIVE session is never policed", () => {
+  const st = createState(); // active=false
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 1");
+  assert.equal(res.blocked, false);
+});
+
+test("#291 a mid-sentence MENTION of the marker is not a claim", () => {
+  const st = createState();
+  st.active = true;
+  const res = evaluateCompletionClaim(st, CFG, "I have not Goal Completed this yet.");
+  assert.equal(res.blocked, false);
+});
+
+test("#291 active claim with a MISSING Review cycles line is blocked with the right reason", () => {
+  const st = createState();
+  st.active = true;
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed");
+  assert.equal(res.blocked, true);
+  assert.match(res.reason, /missing required Review cycles line/);
+});
+
+test("#291 active claim with 'Review cycles: 0' and no recorded cycles is blocked", () => {
+  const st = createState();
+  st.active = true;
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 0");
+  assert.equal(res.blocked, true);
+  assert.match(res.reason, /no review cycles recorded/);
+  assert.equal(res.claimedCycles, 0);
+});
+
+test("#291 claimed cycle count must match the recorded count", () => {
+  const { st } = allowedState(1);
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 7");
+  assert.equal(res.blocked, true);
+  assert.match(res.reason, /claimed review cycles \(7\) do not match recorded review cycles \(1\)/);
+});
+
+test("#291 the LAST 'Review cycles: N' line is the claim (earlier mentions ignored)", () => {
+  const { st } = allowedState(2);
+  // An earlier recap line cites a wrong number; the conclusion line is correct.
+  const text = "Earlier I noted Review cycles: 1 (a recap).\n\nGoal Completed\n\nReview cycles: 2";
+  const res = evaluateCompletionClaim(st, CFG, text);
+  assert.equal(res.blocked, false, "the trailing conclusive count matches, so completion is allowed");
+  assert.equal(res.claimedCycles, 2);
+});
+
+test("#291 a fully-satisfied active claim is ALLOWED (not blocked)", () => {
+  const { st } = allowedState(1);
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 1");
+  assert.equal(res.blocked, false);
+  assert.equal(res.claimedCycles, 1);
+});
+
+test("#291 a blocked claim with missing gates names the missing gate(s)", () => {
+  const st = createState();
+  st.active = true;
+  st.reviewCycles = 1; // pretend a cycle ran but gates are not fresh/passing
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 1");
+  assert.equal(res.blocked, true);
+  assert.match(res.reason, /required review gates are missing or stale/);
+});
+
+// --- issue #244: the rewrite is an unambiguous, searchable guard artifact ---
+
+test("#244/#291 a blocked rewrite carries the unambiguous guard sentinel on its own leading line", () => {
+  const st = createState();
+  st.active = true;
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 0");
+  assert.equal(res.blocked, true);
+  // The sentinel is something only the guard emits — distinct from any normal
+  // "I am not done yet" agent prose, and searchable.
+  assert.ok(res.replacement.startsWith(GUARD_BLOCK_SENTINEL), "sentinel must lead the rewrite");
+  assert.match(res.replacement, /\[Goal Guard · blocked completion\]/);
+  // The marker line is still flipped (existing detection contract preserved).
+  assert.match(res.replacement, /Goal Not Completed/);
+  assert.match(res.replacement, /Goal Guard blocked completion: /);
+});
+
+test("#244 the sentinel makes a guard rewrite distinguishable from a benign 'Goal Not Completed' message", () => {
+  // A normal agent message can literally contain "Goal Not Completed". The guard's
+  // rewrite must be set apart by a token that benign prose would never produce.
+  const benign = "I made a start but Goal Not Completed — I will continue next turn.";
+  assert.doesNotMatch(benign, /\[Goal Guard · blocked completion\]/);
+  const st = createState();
+  st.active = true;
+  const res = evaluateCompletionClaim(st, CFG, "Goal Completed");
+  assert.match(res.replacement, /\[Goal Guard · blocked completion\]/);
+});
+
+test("#291 the marker-line markdown prefix is preserved when flipped", () => {
+  const st = createState();
+  st.active = true;
+  const res = evaluateCompletionClaim(st, CFG, "## Goal Completed\n\nReview cycles: 0");
+  assert.match(res.replacement, /## Goal Not Completed/);
+});
+
+test("#291 a homoglyph-smuggled marker is still blocked (normalized detection)", () => {
+  const st = createState();
+  st.active = true;
+  // Cyrillic 'о' in "Gоal" — must be detected and blocked.
+  const res = evaluateCompletionClaim(st, CFG, "Gоal Completed\n\nReview cycles: 0");
+  assert.equal(res.blocked, true);
+  assert.ok(res.replacement.startsWith(GUARD_BLOCK_SENTINEL));
+});
+
+test("#291 a custom blockedMarker is honored in the flip", () => {
+  const st = createState();
+  st.active = true;
+  const res = evaluateCompletionClaim(st, { ...CFG, blockedMarker: "NOPE NOT DONE" }, "Goal Completed\n\nReview cycles: 0");
+  assert.match(res.replacement, /NOPE NOT DONE/);
+});
diff --git a/tests/concurrency.test.mjs b/tests/concurrency.test.mjs
new file mode 100644
index 0000000..143e4e9
--- /dev/null
+++ b/tests/concurrency.test.mjs
@@ -0,0 +1,126 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { createGuard } from "../plugins/goal-guard/guard.js";
+
+// Issue #374: two concurrent goal sessions must complete INDEPENDENTLY — each with
+// its own verdicts, gates, evidence, and review cycles — with no cross-contamination.
+// Integration tests covered a single session and `file.edited dirties the in-flight
+// goal`, but never proved two live sessions stay isolated through completion.
+
+const noopPersistence = { load: () => null, save: () => {}, flush: () => false, file: "", isDegraded: () => false };
+
+function makeGuard(options = {}) {
+  let t = 0;
+  return createGuard(
+    { client: { app: { log: async () => undefined }, tui: { showToast: async () => undefined } } },
+    options,
+    { persistence: noopPersistence, clock: () => (t += 1), syncIdle: true },
+  );
+}
+
+const BASE = ["goal-prompt-auditor", "goal-reviewer", "goal-diff-reviewer", "goal-verifier", "goal-final-auditor"];
+
+async function startGoal(hooks, sid, goalText) {
+  await hooks["chat.params"]({ sessionID: sid, agent: "goal" }, {});
+  await hooks["chat.message"]({ sessionID: sid, agent: "goal" }, { parts: [{ type: "text", text: goalText }] });
+}
+
+async function edit(hooks, sid, callID) {
+  await hooks["tool.execute.after"]({ tool: "edit", sessionID: sid, callID, args: {} }, { output: "", title: "", metadata: {} });
+}
+
+async function passGates(hooks, sid, gates) {
+  for (const agent of gates) {
+    await hooks["tool.execute.after"](
+      { tool: "task", sessionID: sid, callID: agent, args: { subagent_type: agent } },
+      { output: "Verdict: PASS", title: "", metadata: {} },
+    );
+  }
+}
+
+async function tryComplete(hooks, sid, cycles) {
+  const out = { text: `Goal Completed\n\nReview cycles: ${cycles}` };
+  await hooks["experimental.text.complete"]({ sessionID: sid, messageID: "m", partID: "p" }, out);
+  return out.text;
+}
+
+test("#374 two concurrent goal sessions complete independently without interference", async () => {
+  const { hooks, store } = makeGuard({ contextualGates: false });
+  const A = "sessA";
+  const B = "sessB";
+
+  // Both goals start and both implement (interleaved, simulating concurrency).
+  await startGoal(hooks, A, "Refactor the billing module");
+  await startGoal(hooks, B, "Improve the search ranking");
+  await edit(hooks, A, "a1");
+  await edit(hooks, B, "b1");
+
+  // Drive A all the way to a clean review; leave B unreviewed.
+  await passGates(hooks, A, BASE);
+
+  // A can complete; B is still blocked — proving no leakage of A's passes onto B.
+  assert.doesNotMatch(await tryComplete(hooks, A, 1), /Goal Not Completed/, "A is fully reviewed → allowed");
+  assert.match(await tryComplete(hooks, B, 1), /Goal Not Completed/, "B has no passing gates → blocked");
+
+  // Each session carries ITS OWN verdicts / cycle count.
+  const sa = store.stateFor(A);
+  const sb = store.stateFor(B);
+  assert.equal(sa.reviewCycles, 1, "A closed one cycle");
+  assert.equal(sb.reviewCycles, 0, "B closed no cycle");
+  assert.ok(sa.latestVerdict["goal-final-auditor"], "A has an auditor verdict");
+  assert.equal(sb.latestVerdict["goal-final-auditor"], undefined, "B has no auditor verdict");
+
+  // Now bring B to completion; A must remain complete and unaffected.
+  await passGates(hooks, B, BASE);
+  assert.doesNotMatch(await tryComplete(hooks, B, 1), /Goal Not Completed/, "B now completes on its own gates");
+  assert.doesNotMatch(await tryComplete(hooks, A, 1), /Goal Not Completed/, "A is still complete");
+  assert.equal(store.stateFor(A).reviewCycles, 1, "A's cycle count is untouched by B");
+  assert.equal(store.stateFor(B).reviewCycles, 1, "B's own cycle count");
+});
+
+test("#374 an edit in one session does not dirty or re-block the other", async () => {
+  const { hooks, store } = makeGuard({ contextualGates: false });
+  const A = "isolA";
+  const B = "isolB";
+  await startGoal(hooks, A, "Add export to CSV");
+  await startGoal(hooks, B, "Add import from CSV");
+  await edit(hooks, A, "a1");
+  await edit(hooks, B, "b1");
+  await passGates(hooks, A, BASE);
+  await passGates(hooks, B, BASE);
+
+  // A makes a NEW edit (re-opening A's gates). B must stay clean and completable.
+  await hooks["chat.params"]({ sessionID: A, agent: "goal" }, {});
+  await edit(hooks, A, "a2");
+
+  assert.equal(store.stateFor(A).dirty, true, "A is dirty again");
+  assert.equal(store.stateFor(B).dirty, false, "B is unaffected by A's edit");
+  assert.match(await tryComplete(hooks, A, 1), /Goal Not Completed/, "A re-blocked by its own fresh edit");
+  assert.doesNotMatch(await tryComplete(hooks, B, 1), /Goal Not Completed/, "B still completes independently");
+});
+
+test("#374 different contextual goals require DIFFERENT gates, scoped per session", async () => {
+  const { hooks, store } = makeGuard(); // contextual gates ON
+  const SEC = "secSess";
+  const DOC = "docSess";
+  // A security goal pulls in goal-security-reviewer; a docs goal pulls in goal-doc-reviewer.
+  await startGoal(hooks, SEC, "Harden authentication and fix the auth token vulnerability");
+  await startGoal(hooks, DOC, "Update the README and project documentation");
+  await edit(hooks, SEC, "s1");
+  await edit(hooks, DOC, "d1");
+
+  // Pass only the base gates for both.
+  await passGates(hooks, SEC, BASE);
+  await passGates(hooks, DOC, BASE);
+
+  // Each is still blocked on its OWN contextual gate, not the other's.
+  const secOut = await tryComplete(hooks, SEC, 1);
+  const docOut = await tryComplete(hooks, DOC, 1);
+  assert.match(secOut, /goal-security-reviewer/, "security session names its security gate");
+  assert.match(docOut, /goal-doc-reviewer/, "docs session names its doc gate");
+  // Cross-check: the security session is NOT blocked on the doc reviewer and vice-versa.
+  assert.doesNotMatch(secOut, /goal-doc-reviewer/);
+  assert.doesNotMatch(docOut, /goal-security-reviewer/);
+
+  void store;
+});
diff --git a/tests/events.test.mjs b/tests/events.test.mjs
new file mode 100644
index 0000000..ef4c3ea
--- /dev/null
+++ b/tests/events.test.mjs
@@ -0,0 +1,163 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  seededTitle,
+  maybeAutoSeedContract,
+  markEdit,
+  markVerification,
+  markFileChanged,
+  recordEvidence,
+  maybeClearDirtyOnFinalPass,
+} from "../plugins/goal-guard/events.js";
+import { createStore, createState } from "../plugins/goal-guard/state.js";
+import { recordVerdict } from "../plugins/goal-guard/verdicts.js";
+import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js";
+import { BASE_GATES, CYCLE_CLOSING_AGENT } from "../plugins/goal-guard/agents.js";
+
+// Direct unit coverage for events.js — the state-mutation layer (issue #291).
+
+const CFG = { ...DEFAULT_CONFIG, contextualGates: false };
+
+test("#291 seededTitle: short goals pass through, trailing punctuation stripped", () => {
+  assert.equal(seededTitle("Add a rate limiter"), "Add a rate limiter");
+  assert.equal(seededTitle("Fix the bug."), "Fix the bug");
+  assert.equal(seededTitle(""), "");
+  assert.equal(seededTitle(null), "");
+});
+
+test("#291 seededTitle: long goals are clipped near 8 words, not mid-connective", () => {
+  const t = seededTitle("Add a configurable token bucket rate limiter to the login endpoint for safety");
+  const words = t.split(" ");
+  assert.ok(words.length <= 8, `expected <=8 words, got ${words.length}: ${t}`);
+  // It must not dangle on a trailing connective like "to"/"for".
+  assert.doesNotMatch(t, /\b(to|for|with|and|of|in|on)$/i);
+});
+
+test("#291 maybeAutoSeedContract: no-op without an active session, goal text, or when a contract exists", () => {
+  const store = createStore();
+  assert.equal(maybeAutoSeedContract(store, null), false);
+  const inactive = createState();
+  inactive.goalText = "do a thing";
+  assert.equal(maybeAutoSeedContract(store, inactive), false, "inactive → no seed");
+  const noText = createState();
+  noText.active = true;
+  assert.equal(maybeAutoSeedContract(store, noText), false, "no goal text → no seed");
+  const hasContract = createState();
+  hasContract.active = true;
+  hasContract.goalText = "x";
+  hasContract.contract = { title: "t" };
+  assert.equal(maybeAutoSeedContract(store, hasContract), false, "existing contract → no seed");
+});
+
+test("#291 maybeAutoSeedContract: seeds a minimal auto contract with empty acceptance criteria", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  st.goalText = "  Add rate limiting   to the login endpoint  ";
+  assert.equal(maybeAutoSeedContract(store, st), true);
+  assert.equal(st.contract.auto, true);
+  assert.deepEqual(st.contract.acceptanceCriteria, []);
+  assert.ok(st.contract.title.length > 0);
+  assert.match(st.contract.original, /Add rate limiting to the login endpoint/);
+});
+
+test("#291 markEdit: activates, dirties, advances seq, and bounds dirtyReasons", () => {
+  const store = createStore();
+  const st = createState();
+  markEdit(store, st, "first");
+  assert.equal(st.active, true);
+  assert.equal(st.dirty, true);
+  assert.ok(st.lastEditSeq > 0);
+  const firstSeq = st.lastEditSeq;
+  markEdit(store, st, "second");
+  assert.ok(st.lastEditSeq > firstSeq, "seq is monotonic");
+  for (let i = 0; i < 80; i += 1) markEdit(store, st, `r${i}`);
+  assert.ok(st.dirtyReasons.length <= 50, "dirtyReasons is bounded to 50");
+});
+
+test("#291 markEdit: a missing reason falls back to a timestamped default", () => {
+  const store = createStore();
+  const st = createState();
+  markEdit(store, st);
+  assert.match(st.dirtyReasons[0], /^edit at /);
+});
+
+test("#291 markVerification: sets verificationSeen and returns the stamped seq", () => {
+  const store = createStore();
+  const st = createState();
+  const seq = markVerification(store, st);
+  assert.equal(st.verificationSeen, true);
+  assert.equal(st.lastVerificationSeq, seq);
+  assert.ok(seq > 0);
+});
+
+test("#291 markFileChanged: dedupes files, bounds the list, and marks an edit", () => {
+  const store = createStore();
+  const st = createState();
+  markFileChanged(store, st, "a.js");
+  markFileChanged(store, st, "a.js"); // duplicate must not be added twice
+  markFileChanged(store, st, "b.js");
+  assert.deepEqual(st.changedFiles, ["a.js", "b.js"]);
+  assert.equal(st.dirty, true);
+  // An empty/whitespace name is ignored.
+  markFileChanged(store, st, "   ");
+  assert.equal(st.changedFiles.length, 2);
+  for (let i = 0; i < 250; i += 1) markFileChanged(store, st, `f${i}.js`);
+  assert.ok(st.changedFiles.length <= 200, "changedFiles is bounded to 200");
+});
+
+test("#291 recordEvidence: pushes an entry, caps criteria to 50, and marks verification", () => {
+  const store = createStore();
+  const st = createState();
+  const manyCriteria = Array.from({ length: 80 }, (_, i) => `c${i}`);
+  recordEvidence(store, st, "npm test", "all green", manyCriteria);
+  assert.equal(st.evidence.length, 1);
+  const entry = st.evidence[0];
+  assert.equal(entry.command, "npm test");
+  assert.equal(entry.result, "all green");
+  assert.equal(entry.criteria.length, 50, "criteria truncated to 50");
+  assert.ok(entry.seq > 0, "evidence is stamped with the verification seq");
+  assert.equal(st.verificationSeen, true);
+});
+
+test("#291 recordEvidence: bounds the evidence log to 100 entries", () => {
+  const store = createStore();
+  const st = createState();
+  for (let i = 0; i < 130; i += 1) recordEvidence(store, st, `cmd${i}`, "ok", []);
+  assert.ok(st.evidence.length <= 100);
+});
+
+test("#291 recordEvidence: non-array criteria become an empty list", () => {
+  const store = createStore();
+  const st = createState();
+  recordEvidence(store, st, "cmd", "ok", "not-an-array");
+  assert.deepEqual(st.evidence[0].criteria, []);
+});
+
+test("#291 maybeClearDirtyOnFinalPass: clears dirty only when the auditor passed fresh AND completion is allowed", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  st.dirty = true;
+  // No fresh auditor pass yet → no clear.
+  assert.equal(maybeClearDirtyOnFinalPass(st, CFG), false);
+  assert.equal(st.dirty, true);
+
+  // Pass all base gates fresh (final auditor closes the cycle).
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  assert.equal(maybeClearDirtyOnFinalPass(st, CFG), true);
+  assert.equal(st.dirty, false);
+  assert.deepEqual(st.dirtyReasons, []);
+});
+
+test("#291 maybeClearDirtyOnFinalPass: a stale auditor pass (edit after pass) does not clear dirty", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  // A new edit AFTER the passes makes the auditor pass stale.
+  markEdit(store, st, "more changes after review");
+  assert.equal(maybeClearDirtyOnFinalPass(st, CFG), false);
+  assert.equal(st.dirty, true);
+  void CYCLE_CLOSING_AGENT;
+});
diff --git a/tests/install.test.mjs b/tests/install.test.mjs
index a2b6646..799abe5 100644
--- a/tests/install.test.mjs
+++ b/tests/install.test.mjs
@@ -11,6 +11,21 @@ import { readRepo } from "./helpers.mjs";
 const repoRoot = fileURLToPath(new URL("..", import.meta.url));
 const installer = join(repoRoot, "scripts", "install.mjs");
 
+// Track every temp dir created via mkTemp so the whole suite cleans up after
+// itself (issue #67: ~9 mkdtempSync calls × 2 files each were leaked into /tmp
+// on every run, accumulating over time and risking CI disk exhaustion).
+const TEMP_DIRS = [];
+function mkTemp(prefix) {
+  const dir = mkdtempSync(join(tmpdir(), prefix));
+  TEMP_DIRS.push(dir);
+  return dir;
+}
+test.after(() => {
+  for (const dir of TEMP_DIRS.splice(0)) {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
 function run(args, opts = {}) {
   return execFileSync("node", [installer, ...args], { stdio: "pipe", encoding: "utf8", ...opts });
 }
@@ -47,7 +62,7 @@ test("gitignore excludes secrets, dependencies, and local runtime data", () => {
 });
 
 test("installer copies components, including the nested plugin module directory", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-"));
+  const temp = mkTemp("goal-install-");
   run(["--target", join(temp, "cfg")]);
   const cfg = join(temp, "cfg");
   assert.equal(existsSync(join(cfg, "agents", "goal.md")), true);
@@ -67,7 +82,7 @@ test("installer help shows the public one-command install flow", () => {
 });
 
 test("installed plugin actually loads from its target location", async () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-load-"));
+  const temp = mkTemp("goal-install-load-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]);
   const mod = await import(join(cfg, "plugins", "goal-guard.js"));
@@ -78,13 +93,13 @@ test("installed plugin actually loads from its target location", async () => {
 });
 
 test("installer dry run does not write files", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-dry-"));
+  const temp = mkTemp("goal-install-dry-");
   run(["--target", join(temp, "cfg"), "--dry-run"]);
   assert.equal(existsSync(join(temp, "cfg")), false);
 });
 
 test("installer is idempotent (second run reports all unchanged)", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-idem-"));
+  const temp = mkTemp("goal-install-idem-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]);
   const out = run(["--target", cfg]);
@@ -92,7 +107,7 @@ test("installer is idempotent (second run reports all unchanged)", () => {
 });
 
 test("installer refuses to overwrite a locally-modified file", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-conflict-"));
+  const temp = mkTemp("goal-install-conflict-");
   const cfg = join(temp, "cfg");
   mkdirSync(join(cfg, "agents"), { recursive: true });
   writeFileSync(join(cfg, "agents", "goal.md"), "local change\n");
@@ -100,7 +115,7 @@ test("installer refuses to overwrite a locally-modified file", () => {
 });
 
 test("installer force replaces a locally-modified file", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-force-"));
+  const temp = mkTemp("goal-install-force-");
   const cfg = join(temp, "cfg");
   mkdirSync(join(cfg, "agents"), { recursive: true });
   const dest = join(cfg, "agents", "goal.md");
@@ -110,7 +125,7 @@ test("installer force replaces a locally-modified file", () => {
 });
 
 test("installer can upgrade a file it owns without --force", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-upgrade-"));
+  const temp = mkTemp("goal-install-upgrade-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]);
   // Simulate a previously-shipped version: dest matches a stale manifest hash.
@@ -129,7 +144,7 @@ test("installer can upgrade a file it owns without --force", () => {
 });
 
 test("installer supports an explicit target directory", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-install-target-"));
+  const temp = mkTemp("goal-install-target-");
   const target = join(temp, "custom-opencode");
   run(["--target", target]);
   assert.equal(existsSync(join(target, "agents", "goal.md")), true);
@@ -137,7 +152,7 @@ test("installer supports an explicit target directory", () => {
 });
 
 test("uninstall removes installed files but keeps locally-modified ones", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-uninstall-"));
+  const temp = mkTemp("goal-uninstall-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]);
   const modified = join(cfg, "agents", "goal.md");
@@ -149,7 +164,7 @@ test("uninstall removes installed files but keeps locally-modified ones", () =>
 });
 
 test("installer refuses invalid tui.json unless --force backs it up", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-invalid-tui-"));
+  const temp = mkTemp("goal-invalid-tui-");
   const cfg = join(temp, "cfg");
   mkdirSync(cfg, { recursive: true });
   writeFileSync(join(cfg, "tui.json"), "{bad json\n");
@@ -162,7 +177,7 @@ test("REGRESSION: a non-object tui.json (JSON array) is refused, not silently co
   // A JSON array is typeof "object", so the old `typeof existing === "object"`
   // check adopted it, set .plugin on the array (which JSON.stringify drops), and
   // reported success while the sidebar entry was silently lost. Refuse non-objects.
-  const temp = mkdtempSync(join(tmpdir(), "goal-array-tui-"));
+  const temp = mkTemp("goal-array-tui-");
   const cfg = join(temp, "cfg");
   mkdirSync(cfg, { recursive: true });
   writeFileSync(join(cfg, "tui.json"), "[]\n");
@@ -180,7 +195,7 @@ test("REGRESSION: uninstall cleans tui.json before removing the manifest (orphan
   // re-run can still reach the manifest and complete. Previously the manifest was
   // removed first, leaving a stuck sidebar reference that --uninstall could no
   // longer touch (the manifest was already gone).
-  const temp = mkdtempSync(join(tmpdir(), "goal-uninstall-order-"));
+  const temp = mkTemp("goal-uninstall-order-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]); // normal install
   const manifestPath = join(cfg, ".goal-mode-manifest.json");
@@ -211,7 +226,7 @@ test("REGRESSION: --target '~' is expanded to the home directory", () => {
 });
 
 test("uninstall ignores tampered manifest paths outside the target", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-manifest-safe-"));
+  const temp = mkTemp("goal-manifest-safe-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]);
   const manifest = JSON.parse(readFileSync(join(cfg, ".goal-mode-manifest.json"), "utf8"));
@@ -221,7 +236,7 @@ test("uninstall ignores tampered manifest paths outside the target", () => {
 });
 
 test("issue #20: a clean install + uninstall leaves no Goal Mode files behind (tui.json removed)", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-clean-cycle-"));
+  const temp = mkTemp("goal-clean-cycle-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]);
   assert.equal(existsSync(join(cfg, "tui.json")), true, "install creates tui.json");
@@ -235,7 +250,7 @@ test("issue #20: a clean install + uninstall leaves no Goal Mode files behind (t
 });
 
 test("issue #20: uninstall PRESERVES a user-authored tui.json with other plugins", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-keep-tui-"));
+  const temp = mkTemp("goal-keep-tui-");
   const cfg = join(temp, "cfg");
   mkdirSync(cfg, { recursive: true });
   writeFileSync(join(cfg, "tui.json"), JSON.stringify({ $schema: "https://opencode.ai/tui.json", plugin: ["someone-elses-plugin"] }, null, 2) + "\n");
@@ -262,7 +277,7 @@ test("issue #24: mutually-exclusive flags print a friendly error", () => {
 });
 
 test("issue #24: a target that is a file prints a friendly error", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-file-target-"));
+  const temp = mkTemp("goal-file-target-");
   const file = join(temp, "not-a-dir");
   writeFileSync(file, "x");
   const res = runFail(["--target", file]);
@@ -272,14 +287,14 @@ test("issue #24: a target that is a file prints a friendly error", () => {
 });
 
 test("issue #158: dry-run reports 'Would register' for the sidebar, never 'Registered'", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-dryrun-wording-"));
+  const temp = mkTemp("goal-dryrun-wording-");
   const out = run(["--target", join(temp, "cfg"), "--dry-run"]);
   assert.match(out, /Would register the experimental sidebar/);
   assert.doesNotMatch(out, /\bRegistered the experimental sidebar/);
 });
 
 test("issue #134: an unreadable existing dest (broken symlink) does not crash install", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-broken-symlink-"));
+  const temp = mkTemp("goal-broken-symlink-");
   const cfg = join(temp, "cfg");
   mkdirSync(join(cfg, "plugins"), { recursive: true });
   // A broken symlink where a component file will be copied: readFileSync throws on it.
@@ -301,7 +316,7 @@ test("issue #220: a symlink in the source tree is skipped with a warning", () =>
 });
 
 test("issue #293: uninstall --force removes locally-modified files (with a backup)", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-force-uninstall-"));
+  const temp = mkTemp("goal-force-uninstall-");
   const cfg = join(temp, "cfg");
   run(["--target", cfg]);
   const modified = join(cfg, "agents", "goal.md");
@@ -313,7 +328,7 @@ test("issue #293: uninstall --force removes locally-modified files (with a backu
 });
 
 test("issue #293: install --force backs up a locally-modified file before overwriting", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-force-backup-"));
+  const temp = mkTemp("goal-force-backup-");
   const cfg = join(temp, "cfg");
   mkdirSync(join(cfg, "agents"), { recursive: true });
   const dest = join(cfg, "agents", "goal.md");
@@ -325,7 +340,7 @@ test("issue #293: install --force backs up a locally-modified file before overwr
 });
 
 test("issue #133: a malformed tui.json does NOT leave a committed manifest claiming a full install", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-tui-before-manifest-"));
+  const temp = mkTemp("goal-tui-before-manifest-");
   const cfg = join(temp, "cfg");
   mkdirSync(cfg, { recursive: true });
   writeFileSync(join(cfg, "tui.json"), "{bad json\n");
@@ -340,7 +355,7 @@ test("issue #133: a malformed tui.json does NOT leave a committed manifest claim
 });
 
 test("issue #197: a dry-run never touches the OpenCode plugin cache", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-cache-dryrun-"));
+  const temp = mkTemp("goal-cache-dryrun-");
   const cache = join(temp, "cache");
   const pkgRoot = join(cache, "opencode", "packages");
   const canary = join(pkgRoot, "opencode-goal-mode-canary");
@@ -354,7 +369,7 @@ test("issue #197: a dry-run never touches the OpenCode plugin cache", () => {
 });
 
 test("issue #197: cache clearing never collateral-damages a sibling (name-canary) dir", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-cache-canary-"));
+  const temp = mkTemp("goal-cache-canary-");
   const cache = join(temp, "cache");
   const pkgRoot = join(cache, "opencode", "packages");
   const canary = join(pkgRoot, "opencode-goal-mode-canary");
@@ -369,7 +384,7 @@ test("issue #197: cache clearing never collateral-damages a sibling (name-canary
 });
 
 test("issue #197: an idempotent re-install does not churn the cache", () => {
-  const temp = mkdtempSync(join(tmpdir(), "goal-cache-idem-"));
+  const temp = mkTemp("goal-cache-idem-");
   const cache = join(temp, "cache");
   const cfg = join(temp, "cfg");
   const env = { ...process.env, XDG_CACHE_HOME: cache };
diff --git a/tests/logger.test.mjs b/tests/logger.test.mjs
index 8a89223..8bdc46b 100644
--- a/tests/logger.test.mjs
+++ b/tests/logger.test.mjs
@@ -47,3 +47,146 @@ test("guardPrompt sends synthetic body via promptAsync", async () => {
   assert.equal(sent[0].body.parts[0].synthetic, true);
   assert.match(sent[0].body.system, /Keep going\./);
 });
+
+// ---------------------------------------------------------------------------
+// #262 — deeper coverage of the guard's sole agent-communication module.
+// ---------------------------------------------------------------------------
+
+test("#262 buildGuardPromptBody omits model when none is supplied (no model key)", () => {
+  const body = buildGuardPromptBody("text", null);
+  assert.equal("model" in body, false, "a null model must not add a model key");
+  const withModel = buildGuardPromptBody("text", { providerID: "p", modelID: "m" });
+  assert.deepEqual(withModel.model, { providerID: "p", modelID: "m" });
+});
+
+test("#262 buildGuardPromptBody always pins the canonical primary agent", () => {
+  // The body must route the continuation to the `goal` primary agent so a guard turn
+  // can never be misrouted to Build/Plan and deactivate Goal Mode.
+  assert.equal(buildGuardPromptBody("a", null).agent, "goal");
+  assert.equal(buildGuardPromptBody("b", { modelID: "x" }).agent, "goal");
+});
+
+test("#262 buildGuardPromptBody stringifies a non-string directive text", () => {
+  const body = buildGuardPromptBody(12345, null);
+  assert.ok(body.system.includes("12345"));
+  assert.match(body.system, new RegExp(`^${GUARD_PREFIX.replace(/[[\]]/g, "\\$&")}`));
+});
+
+test("#262 isSyntheticUserTurn: mixed real+synthetic text is NOT a guard continuation", () => {
+  // `every` over text parts: one genuine (human) text part means it is a real turn.
+  assert.equal(
+    isSyntheticUserTurn([
+      { type: "text", text: "(cont)", synthetic: true },
+      { type: "text", text: "actually do this instead" },
+    ]),
+    false,
+  );
+});
+
+test("#262 isSyntheticUserTurn: tool-only / file-only turns are real (no text parts)", () => {
+  assert.equal(isSyntheticUserTurn([{ type: "tool", id: "t" }]), false);
+  assert.equal(isSyntheticUserTurn([{ type: "file", url: "u" }, { type: "file", url: "v" }]), false);
+});
+
+test("#262 isSyntheticUserTurn: malformed parts are handled safely", () => {
+  assert.equal(isSyntheticUserTurn(null), false);
+  assert.equal(isSyntheticUserTurn(undefined), false);
+  assert.equal(isSyntheticUserTurn("not an array"), false);
+  assert.equal(isSyntheticUserTurn([null, { type: "text", text: "hi", synthetic: true }]), true);
+});
+
+test("#262 guardPrompt returns false on missing client/session/text and never throws", async () => {
+  const noClient = createLogger({});
+  assert.equal(await noClient.guardPrompt("s", "go", null), false);
+  const ok = createLogger({ session: { promptAsync: async () => {} } });
+  assert.equal(await ok.guardPrompt("", "go", null), false, "missing sessionID → false");
+  assert.equal(await ok.guardPrompt("s", "", null), false, "missing text → false");
+});
+
+test("#262 guardPrompt returns false (and swallows) when promptAsync throws", async () => {
+  const logger = createLogger({
+    session: {
+      promptAsync: async () => {
+        throw new Error("SessionBusy");
+      },
+    },
+  });
+  // A failed continuation must degrade to false, not propagate into the hook.
+  assert.equal(await logger.guardPrompt("s", "go", null), false);
+});
+
+test("#262 guardPrompt returns true on a successful send", async () => {
+  const logger = createLogger({ session: { promptAsync: async () => ({ ok: true }) } });
+  assert.equal(await logger.guardPrompt("s", "go", null), true);
+});
+
+test("#262 emitGoalCompleted sends the final-deliverable directive with the cycle count and marker", async () => {
+  const sent = [];
+  const logger = createLogger({ session: { promptAsync: async (o) => sent.push(o) } });
+  const ok = await logger.emitGoalCompleted("sess", 3, { modelID: "m" });
+  assert.equal(ok, true);
+  assert.equal(sent.length, 1);
+  assert.equal(sent[0].path.id, "sess");
+  // The directive must carry the review-cycle count and the completion marker.
+  assert.match(sent[0].body.system, /Review cycles: 3/);
+  assert.match(sent[0].body.system, /Goal Completed/);
+  assert.equal(sent[0].body.parts[0].synthetic, true);
+});
+
+test("#262 emitGoalCompleted coerces a non-numeric/absent cycle count to 0 and honors a custom marker", async () => {
+  const sent = [];
+  const logger = createLogger({ session: { promptAsync: async (o) => sent.push(o) } }, { completionMarker: "SHIPPED" });
+  await logger.emitGoalCompleted("sess", undefined, null);
+  assert.match(sent[0].body.system, /Review cycles: 0/);
+  assert.match(sent[0].body.system, /SHIPPED/);
+});
+
+test("#262 continueSession is an alias of guardPrompt (deprecated path still works)", async () => {
+  const sent = [];
+  const logger = createLogger({ session: { promptAsync: async (o) => sent.push(o) } });
+  assert.equal(await logger.continueSession("s", "carry on", null), true);
+  assert.equal(sent[0].path.id, "s");
+  assert.match(sent[0].body.system, /carry on/);
+});
+
+test("#262 info/warn/toast are best-effort and never throw on a missing or failing client", async () => {
+  const silent = createLogger({}); // no app.log, no tui
+  await assert.doesNotReject(silent.info("hi"));
+  await assert.doesNotReject(silent.warn("careful"));
+  await assert.doesNotReject(silent.toast("blocked"));
+
+  const calls = [];
+  const failing = createLogger({
+    app: { log: async () => { throw new Error("log down"); } },
+    tui: { showToast: async () => { throw new Error("tui down"); } },
+  });
+  await assert.doesNotReject(failing.info("x"));
+  await assert.doesNotReject(failing.toast("y"));
+  void calls;
+});
+
+test("#262 info/warn/toast forward the expected payload shape to the client", async () => {
+  const logs = [];
+  const toasts = [];
+  const logger = createLogger({
+    app: { log: async (p) => logs.push(p) },
+    tui: { showToast: async (p) => toasts.push(p) },
+  });
+  await logger.info("started", { sessionID: "s" });
+  await logger.warn("slow");
+  await logger.toast("Completion blocked", "error");
+  assert.equal(logs[0].body.service, "goal-guard");
+  assert.equal(logs[0].body.level, "info");
+  assert.equal(logs[0].body.message, "started");
+  assert.deepEqual(logs[0].body.extra, { sessionID: "s" });
+  assert.equal(logs[1].body.level, "warn");
+  assert.equal(toasts[0].body.message, "Completion blocked");
+  assert.equal(toasts[0].body.variant, "error");
+});
+
+test("#262 toast defaults to the 'warning' variant", async () => {
+  const toasts = [];
+  const logger = createLogger({ tui: { showToast: async (p) => toasts.push(p) } });
+  await logger.toast("heads up");
+  assert.equal(toasts[0].body.variant, "warning");
+});
diff --git a/tests/persistence.test.mjs b/tests/persistence.test.mjs
index fbc5232..83d7bdc 100644
--- a/tests/persistence.test.mjs
+++ b/tests/persistence.test.mjs
@@ -1,16 +1,25 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { mkdtempSync, existsSync, readFileSync, writeFileSync, mkdirSync, chmodSync, renameSync, rmSync, copyFileSync } from "node:fs";
+import { mkdtempSync, existsSync, readFileSync, writeFileSync, mkdirSync, chmodSync, renameSync, rmSync, copyFileSync, statSync, utimesSync } from "node:fs";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
-import { createPersistence, projectKey, stateBaseDir } from "../plugins/goal-guard/persistence.js";
+import { createPersistence, projectKey, stateBaseDir, DEFAULT_PERSIST_TTL_MS } from "../plugins/goal-guard/persistence.js";
 import { createStore } from "../plugins/goal-guard/state.js";
 import { createGuard } from "../plugins/goal-guard/guard.js";
 
+// Track every temp dir so it is removed after the suite (issue #67: these were
+// previously leaked into /tmp, accumulating ~18 dirs per run and risking CI disk).
+const TEMP_DIRS = [];
 function tempEnv() {
   const dir = mkdtempSync(join(tmpdir(), "goal-persist-"));
+  TEMP_DIRS.push(dir);
   return { XDG_STATE_HOME: dir, __dir: dir };
 }
+test.after(() => {
+  for (const dir of TEMP_DIRS.splice(0)) {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
 
 /** A synchronous timer shim so debounced saves can be flushed deterministically. */
 function syncTimers() {
@@ -227,3 +236,134 @@ test("two different worktrees persist independently", () => {
   ra.restore(pa.load());
   assert.equal(ra.stateFor("x").reviewCycles, 1);
 });
+
+// ---------------------------------------------------------------------------
+// #72 — state files must be written owner-only (mode 0600), not world-readable.
+// ---------------------------------------------------------------------------
+
+test("#72 state file is written with owner-only (0600) permissions", () => {
+  const env = tempEnv();
+  const p = createPersistence({ worktree: "/secure-proj", env });
+  assert.equal(p.flush(() => ({ goal: "secret goal text", evidence: ["leaky"] })), true);
+  const mode = statSync(p.file).mode & 0o777;
+  // The owner keeps read/write; group and other get NOTHING. (On platforms whose
+  // umask/ACL further restricts owner bits this is still owner-only; the invariant
+  // we enforce is that no group/other bit is set.)
+  assert.equal(mode & 0o077, 0, `group/other bits must be clear, got mode ${mode.toString(8)}`);
+  assert.equal(mode & 0o600, 0o600, `owner read/write must be set, got mode ${mode.toString(8)}`);
+});
+
+test("#72 a debounced save also lands at 0600 (no world-readable window via tmp+rename)", () => {
+  const env = tempEnv();
+  const timers = syncTimers();
+  const p = createPersistence({ worktree: "/secure-debounced", env, setTimer: timers.setTimer, clearTimer: timers.clearTimer });
+  p.save(() => ({ a: 1 }));
+  timers.fire();
+  const mode = statSync(p.file).mode & 0o777;
+  assert.equal(mode & 0o077, 0, `group/other bits must be clear, got mode ${mode.toString(8)}`);
+});
+
+// ---------------------------------------------------------------------------
+// #218 — orphaned per-worktree state files must be pruned (unbounded growth).
+// ---------------------------------------------------------------------------
+
+test("#218 DEFAULT_PERSIST_TTL_MS is a 7-day on-disk retention, longer than the 24h in-memory TTL", () => {
+  assert.equal(DEFAULT_PERSIST_TTL_MS, 7 * 24 * 60 * 60 * 1000);
+});
+
+test("#218 cleanup() prunes stale OTHER-worktree files but preserves the current one and fresh files", () => {
+  const env = tempEnv();
+  const dir = stateBaseDir(env);
+  const current = createPersistence({ worktree: "/current", env });
+  const stale = createPersistence({ worktree: "/old-moved-project", env });
+  const fresh = createPersistence({ worktree: "/another-active", env });
+
+  // Write all three files.
+  current.flush(() => ({ updatedAt: new Date().toISOString(), keep: "me" }));
+  stale.flush(() => ({ updatedAt: "2000-01-01T00:00:00.000Z", orphan: true }));
+  fresh.flush(() => ({ updatedAt: new Date().toISOString(), active: true }));
+
+  // Age the stale file's mtime well past the TTL so the mtime check also fires.
+  const old = Date.now() / 1000 - (8 * 24 * 60 * 60);
+  utimesSync(stale.file, old, old);
+
+  assert.ok(existsSync(current.file) && existsSync(stale.file) && existsSync(fresh.file));
+  const res = current.cleanup();
+  assert.ok(res.removed.includes(stale.file), "the stale orphan must be removed");
+  assert.equal(existsSync(current.file), true, "the current worktree file is always preserved");
+  assert.equal(existsSync(fresh.file), true, "a fresh sibling worktree file is preserved");
+  assert.equal(existsSync(stale.file), false, "the stale orphan is gone");
+});
+
+test("#218 cleanup() removes a file whose embedded updatedAt is ancient even if mtime is fresh", () => {
+  const env = tempEnv();
+  const current = createPersistence({ worktree: "/cur", env });
+  const ancient = createPersistence({ worktree: "/ancient-by-content", env });
+  current.flush(() => ({ updatedAt: new Date().toISOString() }));
+  // Write with a recent mtime but a very old logical updatedAt.
+  ancient.flush(() => ({ updatedAt: "1999-01-01T00:00:00.000Z" }));
+  const res = current.cleanup();
+  assert.ok(res.removed.includes(ancient.file));
+  assert.equal(existsSync(ancient.file), false);
+});
+
+test("#218 cleanup() is a safe no-op when the state dir does not exist yet", () => {
+  const env = tempEnv();
+  const p = createPersistence({ worktree: "/never", env });
+  const res = p.cleanup();
+  assert.deepEqual(res, { removed: [], scanned: 0 });
+});
+
+test("#218 cleanup() respects a custom persistTtlMs and never touches the current file", () => {
+  const env = tempEnv();
+  const cur = createPersistence({ worktree: "/cur2", env, persistTtlMs: 1000 });
+  const other = createPersistence({ worktree: "/other2", env, persistTtlMs: 1000 });
+  cur.flush(() => ({ updatedAt: new Date().toISOString() }));
+  other.flush(() => ({ updatedAt: new Date(Date.now() - 5000).toISOString() }));
+  const res = cur.cleanup();
+  assert.ok(res.removed.includes(other.file), "older than the 1s TTL → pruned");
+  assert.equal(existsSync(cur.file), true);
+});
+
+test("#218 cleanup() removes orphaned .tmp files left by an interrupted write", () => {
+  const env = tempEnv();
+  const dir = stateBaseDir(env);
+  const cur = createPersistence({ worktree: "/cur3", env });
+  cur.flush(() => ({ updatedAt: new Date().toISOString() }));
+  mkdirSync(dir, { recursive: true });
+  const strayTmp = join(dir, `${projectKey("/abandoned")}.json.tmp`);
+  writeFileSync(strayTmp, "partial", "utf8");
+  const res = cur.cleanup();
+  assert.ok(res.removed.includes(strayTmp), "a stray temp file must be swept");
+  assert.equal(existsSync(strayTmp), false);
+});
+
+test("#218 cleanup() ignores non-goal-guard files in the directory", () => {
+  const env = tempEnv();
+  const dir = stateBaseDir(env);
+  const cur = createPersistence({ worktree: "/cur4", env });
+  cur.flush(() => ({ updatedAt: new Date().toISOString() }));
+  const unrelated = join(dir, "README.txt");
+  writeFileSync(unrelated, "not ours", "utf8");
+  cur.cleanup();
+  assert.equal(existsSync(unrelated), true, "an unrelated file is never touched");
+});
+
+test("#218 remove() deletes the current worktree's file and its tmp sibling", () => {
+  const env = tempEnv();
+  const dir = stateBaseDir(env);
+  const p = createPersistence({ worktree: "/disposable", env });
+  p.flush(() => ({ updatedAt: new Date().toISOString() }));
+  writeFileSync(`${p.file}.tmp`, "leftover", "utf8");
+  assert.equal(existsSync(p.file), true);
+  assert.equal(p.remove(), true);
+  assert.equal(existsSync(p.file), false);
+  assert.equal(existsSync(`${p.file}.tmp`), false);
+  void dir;
+});
+
+test("#218 cleanup() is a no-op when persistence is disabled", () => {
+  const env = tempEnv();
+  const p = createPersistence({ worktree: "/x", env, enabled: false });
+  assert.deepEqual(p.cleanup(), { removed: [], scanned: 0 });
+});
diff --git a/tests/plugin.test.mjs b/tests/plugin.test.mjs
index 6673a0c..7c6a119 100644
--- a/tests/plugin.test.mjs
+++ b/tests/plugin.test.mjs
@@ -589,7 +589,18 @@ test("compaction preserves concrete live state values", async () => {
 
 test("default export registers the goal_* tools", async () => {
   const hooks = await plugin({ client: { app: { log: async () => undefined } } });
-  assert.ok(hooks.tool);
+  // The default export imports tools.js (and its `@opencode-ai/plugin` dependency)
+  // lazily; when that import FAILS the plugin returns hooks WITHOUT a `tool` key.
+  // A bare `assert.ok(hooks.tool)` masked that as a vague falsy failure that read
+  // like a logic bug rather than a missing dependency (issue #373). Surface the
+  // real cause loudly: if `tool` is absent it is almost always ERR_MODULE_NOT_FOUND
+  // for `@opencode-ai/plugin`.
+  assert.ok(
+    hooks.tool,
+    "Expected the `tool` hook to be registered, but it is absent. This means tools.js " +
+      "failed to import — most likely ERR_MODULE_NOT_FOUND for `@opencode-ai/plugin`. " +
+      "Run: npm install @opencode-ai/plugin",
+  );
   for (const name of ["goal_status", "goal_evidence_map", "goal_reviewer_memory", "goal_contract", "goal_evidence", "goal_reset"]) {
     assert.equal(typeof hooks.tool[name].execute, "function", `${name} missing`);
   }
diff --git a/tests/summary.test.mjs b/tests/summary.test.mjs
new file mode 100644
index 0000000..e592ab9
--- /dev/null
+++ b/tests/summary.test.mjs
@@ -0,0 +1,173 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import {
+  shortGoalLabel,
+  summarizeState,
+  reviewerMemoryReport,
+  statusReport,
+  evidenceMapReport,
+  sidebarView,
+  NO_GOAL,
+} from "../plugins/goal-guard/summary.js";
+import { createStore, createState } from "../plugins/goal-guard/state.js";
+import { recordVerdict } from "../plugins/goal-guard/verdicts.js";
+import { markEdit, recordEvidence } from "../plugins/goal-guard/events.js";
+import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js";
+import { BASE_GATES } from "../plugins/goal-guard/agents.js";
+
+// Direct unit coverage for summary.js (issue #291).
+
+const CFG = { ...DEFAULT_CONFIG, contextualGates: false };
+
+test("#291 shortGoalLabel: prefers contract.title, then original, then goalText", () => {
+  assert.equal(shortGoalLabel({ contract: { title: "Migrate auth to JWT" } }), "Migrate auth to JWT");
+  assert.equal(shortGoalLabel({ contract: { original: "Add a rate limiter to login" } }), "Add a rate limiter to login");
+  assert.equal(shortGoalLabel({ goalText: "just the captured text" }), "just the captured text");
+  assert.equal(shortGoalLabel({}), "");
+  assert.equal(shortGoalLabel(null), "");
+});
+
+test("#291 shortGoalLabel: a long title is truncated with an ellipsis", () => {
+  const long = "x".repeat(200);
+  const out = shortGoalLabel({ contract: { title: long } }, 20);
+  assert.ok(out.length <= 20);
+  assert.match(out, /…$/);
+});
+
+test("#291 shortGoalLabel: contract with ONLY original (no title) falls back to original", () => {
+  // The edge case named in the issue.
+  const out = shortGoalLabel({ contract: { original: "Implement the export endpoint." } });
+  assert.equal(out, "Implement the export endpoint.");
+});
+
+test("#291 shortGoalLabel: prefers a short first sentence of a long original", () => {
+  const out = shortGoalLabel({ contract: { original: "Add rate limiting. Then a lot more detail follows that should be dropped." } }, 80);
+  assert.equal(out, "Add rate limiting.");
+});
+
+test("#291 summarizeState: renders the key invariants as a single semicolon line", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  markEdit(store, st, "did work");
+  recordVerdict(store, st, "goal-reviewer", "FAIL", "blocking: X is broken\nVerdict: FAIL");
+  const out = summarizeState(st, CFG);
+  assert.match(out, /active=true/);
+  assert.match(out, /dirty=true/);
+  assert.match(out, /reviewCycles=0/);
+  assert.match(out, /recentVerdicts=goal-reviewer:FAIL/);
+  assert.match(out, /missingGates=/);
+});
+
+test("#291 summarizeState: 'none' sentinels when there are no verdicts/reasons", () => {
+  const st = createState();
+  st.active = true;
+  const out = summarizeState(st, CFG);
+  assert.match(out, /recentVerdicts=none/);
+  assert.match(out, /dirtyReasons=none/);
+});
+
+test("#291 reviewerMemoryReport: partitions open vs resolved and flags freshness", () => {
+  const store = createStore();
+  const st = createState();
+  recordVerdict(store, st, "goal-doc-reviewer", "FAIL", "blocking: docs missing\nVerdict: FAIL");
+  let report = reviewerMemoryReport(st);
+  assert.equal(report.open.length, 1);
+  assert.equal(report.resolved.length, 0);
+  assert.equal(report.total, 1);
+  // Resolve it.
+  recordVerdict(store, st, "goal-doc-reviewer", "PASS", "Verdict: PASS");
+  report = reviewerMemoryReport(st);
+  assert.equal(report.open.length, 0);
+  assert.equal(report.resolved.length, 1);
+});
+
+test("#291 reviewerMemoryReport: tolerates a non-array reviewerMemory", () => {
+  const report = reviewerMemoryReport({ reviewerMemory: undefined, lastEditSeq: 0 });
+  assert.deepEqual(report, { open: [], resolved: [], total: 0 });
+});
+
+test("#291 statusReport: completionAllowed flips when all gates pass and reflects required/missing", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  let report = statusReport(st, CFG);
+  assert.equal(report.completionAllowed, false);
+  assert.ok(report.missingGates.length > 0);
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  report = statusReport(st, CFG);
+  assert.equal(report.completionAllowed, true);
+  assert.deepEqual(report.missingGates, []);
+  assert.equal(report.reviewCycles, 1);
+});
+
+test("#291 statusReport: inactive session is never completion-allowed", () => {
+  const st = createState();
+  st.contract = { acceptanceCriteria: [] };
+  const report = statusReport(st, CFG);
+  assert.equal(report.active, false);
+  assert.equal(report.completionAllowed, false);
+});
+
+test("#291 evidenceMapReport: a criterion with fresh matching evidence is 'covered' once gates pass", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  st.contract = { acceptanceCriteria: ["Rate limit the login endpoint"] };
+  recordEvidence(store, st, "run load test", "ok", ["Rate limit the login endpoint"]);
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  const report = evidenceMapReport(st, CFG);
+  assert.equal(report.criteria.length, 1);
+  assert.equal(report.criteria[0].status, "covered");
+});
+
+test("#291 evidenceMapReport: a criterion with no evidence is 'missing' with an actionable next step", () => {
+  const st = createState();
+  st.active = true;
+  st.contract = { acceptanceCriteria: ["Tokens expire after 1h"] };
+  const report = evidenceMapReport(st, CFG);
+  assert.equal(report.criteria[0].status, "missing");
+  assert.match(report.criteria[0].nextAction, /Run verification and record it with goal_evidence/);
+});
+
+test("#291 evidenceMapReport: evidence that matches no criterion is reported as unmappedEvidence", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  st.contract = { acceptanceCriteria: ["Login works"] };
+  recordEvidence(store, st, "unrelated check", "ok", ["something else entirely"]);
+  const report = evidenceMapReport(st, CFG);
+  assert.equal(report.unmappedEvidence.length, 1);
+  assert.equal(report.unmappedEvidence[0].command, "unrelated check");
+});
+
+test("#291 sidebarView: returns NO_GOAL for inactive/no-goal sessions", () => {
+  assert.equal(sidebarView(null, CFG), NO_GOAL);
+  assert.equal(sidebarView(createState(), CFG), NO_GOAL, "inactive → NO_GOAL");
+  const activeNoGoal = createState();
+  activeNoGoal.active = true; // active but no goal label
+  assert.equal(sidebarView(activeNoGoal, CFG), NO_GOAL);
+});
+
+test("#291 sidebarView: a running goal exposes gate counts and review-cycle status", () => {
+  const st = createState();
+  st.active = true;
+  st.contract = { title: "Ship the feature", acceptanceCriteria: ["it works"] };
+  const view = sidebarView(st, CFG);
+  assert.equal(view.state, "running");
+  assert.equal(view.goal, "Ship the feature");
+  assert.match(view.gates, /\/\d+ gates/);
+  assert.match(view.status, /in progress/);
+});
+
+test("#291 sidebarView: a fully-passed, clean goal renders as done", () => {
+  const store = createStore();
+  const st = createState();
+  st.active = true;
+  st.contract = { title: "Done goal", acceptanceCriteria: ["it works"] };
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  st.dirty = false;
+  const view = sidebarView(st, CFG);
+  assert.equal(view.state, "done");
+  assert.match(view.status, /completed/);
+});
diff --git a/tests/system.test.mjs b/tests/system.test.mjs
new file mode 100644
index 0000000..d31b671
--- /dev/null
+++ b/tests/system.test.mjs
@@ -0,0 +1,118 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { buildSystemInjection } from "../plugins/goal-guard/system.js";
+import { createStore, createState } from "../plugins/goal-guard/state.js";
+import { recordVerdict } from "../plugins/goal-guard/verdicts.js";
+import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js";
+import { BASE_GATES } from "../plugins/goal-guard/agents.js";
+
+// Direct unit coverage for system.js — the system-prompt injection (issue #291).
+// `bullet`/`exampleTaskCall` are internal, so they are exercised through the public
+// buildSystemInjection output (the only entry point the hook uses).
+
+const CFG = { ...DEFAULT_CONFIG, contextualGates: false };
+
+function activeState() {
+  const st = createState();
+  st.active = true;
+  st.goalText = "build a generic feature";
+  return st;
+}
+
+test("#291 buildSystemInjection returns null for an absent or inactive session", () => {
+  assert.equal(buildSystemInjection(null, CFG), null);
+  assert.equal(buildSystemInjection(createState(), CFG), null, "inactive → null");
+});
+
+test("#291 buildSystemInjection emits the live enforcement header and ground-truth framing", () => {
+  const out = buildSystemInjection(activeState(), CFG);
+  assert.match(out, /## Goal Guard — live enforcement state/);
+  assert.match(out, /Treat it as ground truth/);
+  assert.match(out, /Review cycles recorded: 0/);
+});
+
+test("#291 with no contract: it tells the agent to record the contract FIRST", () => {
+  const out = buildSystemInjection(activeState(), CFG);
+  assert.match(out, /Goal Contract: not yet recorded/);
+});
+
+test("#291 an auto-derived contract (no criteria) nudges adding acceptance criteria", () => {
+  const st = activeState();
+  st.contract = { title: "t", original: "x", acceptanceCriteria: [], auto: true };
+  const out = buildSystemInjection(st, CFG);
+  assert.match(out, /Goal Contract: auto-derived/);
+});
+
+test("#291 recorded acceptance criteria are listed (capped at 8) — issue #155", () => {
+  const st = activeState();
+  st.contract = {
+    title: "t",
+    original: "x",
+    acceptanceCriteria: Array.from({ length: 12 }, (_, i) => `criterion number ${i}`),
+  };
+  const out = buildSystemInjection(st, CFG);
+  assert.match(out, /12 acceptance criteria recorded/);
+  // Only the first 8 are inlined.
+  assert.match(out, /criterion number 0/);
+  assert.match(out, /criterion number 7/);
+  assert.doesNotMatch(out, /criterion number 8/);
+});
+
+test("#291 reviewer-supplied criteria/findings are escaped (no backticks/code-fences/newlines)", () => {
+  const st = activeState();
+  st.contract = {
+    title: "t",
+    original: "x",
+    acceptanceCriteria: ["use ```rm -rf``` and `eval` across\nmultiple lines"],
+  };
+  const out = buildSystemInjection(st, CFG);
+  // Backticks are folded to single quotes and newlines collapsed → no triple-fence.
+  assert.doesNotMatch(out, /```/);
+});
+
+test("#291 with programmaticReview ON: it says the guard runs reviews automatically on stop", () => {
+  const out = buildSystemInjection(activeState(), { ...CFG, programmaticReview: true });
+  assert.match(out, /runs the required reviews AUTOMATICALLY/);
+  // It must NOT tell the agent to invoke reviewers itself.
+  assert.doesNotMatch(out, /MANDATORY NEXT ACTION — run the outstanding reviews now using the task tool/);
+});
+
+test("#291 with programmaticReview OFF: it prescribes an exact task() call for the next gate", () => {
+  const out = buildSystemInjection(activeState(), { ...CFG, programmaticReview: false });
+  assert.match(out, /MANDATORY NEXT ACTION/);
+  // A concrete, copyable task tool call for the first missing reviewer.
+  assert.match(out, /task\(subagent_type: "goal-/);
+});
+
+test("#291 required/missing gates render as comma lists, 'none' when empty", () => {
+  const store = createStore();
+  const st = activeState();
+  // Pass every base gate so the missing set is empty → 'none'.
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  const out = buildSystemInjection(st, CFG);
+  assert.match(out, /Gates still missing or stale: none\./);
+  assert.match(out, /Required review gates: goal-/);
+});
+
+test("#291 completion ALLOWED vs BLOCKED line reflects the gate state and marker", () => {
+  const store = createStore();
+  const st = activeState();
+  const blocked = buildSystemInjection(st, CFG);
+  assert.match(blocked, /Completion is currently BLOCKED/);
+  assert.match(blocked, /Do NOT claim "Goal Completed" yet/);
+
+  for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
+  const allowed = buildSystemInjection(st, CFG);
+  assert.match(allowed, /Completion is currently ALLOWED/);
+  assert.match(allowed, /answer with "Goal Completed"/);
+});
+
+test("#291 open reviewer-memory findings are surfaced and escaped", () => {
+  const store = createStore();
+  const st = activeState();
+  recordVerdict(store, st, "goal-doc-reviewer", "FAIL", "Blocking findings\n- README `missing` tool docs\nVerdict: FAIL");
+  const out = buildSystemInjection(st, CFG);
+  assert.match(out, /Open Reviewer Memory/);
+  assert.match(out, /README/);
+  assert.doesNotMatch(out, /```/);
+});
diff --git a/tests/tools.test.mjs b/tests/tools.test.mjs
new file mode 100644
index 0000000..84cc0f2
--- /dev/null
+++ b/tests/tools.test.mjs
@@ -0,0 +1,127 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { createGoalTools, coerceStringList, goalSimilarity, SAME_GOAL_THRESHOLD } from "../plugins/goal-guard/tools.js";
+import { createGuard } from "../plugins/goal-guard/guard.js";
+
+const noopPersistence = { load: () => null, save: () => {}, flush: () => false, file: "", isDegraded: () => false };
+
+function makeGuard(options = {}) {
+  let t = 0;
+  return createGuard(
+    { client: { app: { log: async () => undefined }, tui: { showToast: async () => undefined } } },
+    options,
+    { persistence: noopPersistence, clock: () => (t += 1), syncIdle: true },
+  );
+}
+
+async function makeTools() {
+  const guard = makeGuard();
+  const tools = createGoalTools({ store: guard.store, config: guard.config, persist: guard.persist });
+  await guard.hooks["chat.params"]({ sessionID: "s", agent: "goal" }, {});
+  return { guard, tools };
+}
+
+// ---------------------------------------------------------------------------
+// #151 — coerceStringList: validate/coerce acceptanceCriteria (and friends).
+// ---------------------------------------------------------------------------
+
+test("#151 coerceStringList keeps non-empty strings and trims/collapses whitespace", () => {
+  assert.deepEqual(coerceStringList(["a", "  b  ", "c\n\nd"]), ["a", "b", "c d"]);
+});
+
+test("#151 coerceStringList stringifies finite numbers and booleans", () => {
+  assert.deepEqual(coerceStringList([1, 2.5, true, false]), ["1", "2.5", "true", "false"]);
+});
+
+test("#151 coerceStringList DROPS objects/arrays/null/NaN (never '[object Object]')", () => {
+  const out = coerceStringList([{ foo: "bar" }, ["x"], null, undefined, NaN, "real"]);
+  assert.deepEqual(out, ["real"]);
+  assert.ok(!out.includes("[object Object]"), "an object must never become '[object Object]'");
+});
+
+test("#151 coerceStringList drops empty/whitespace-only strings and tolerates non-arrays", () => {
+  assert.deepEqual(coerceStringList(["", "   ", "ok"]), ["ok"]);
+  assert.deepEqual(coerceStringList(undefined), []);
+  assert.deepEqual(coerceStringList("not an array"), []);
+  assert.deepEqual(coerceStringList(null), []);
+});
+
+test("#151 goal_contract sanitizes object acceptanceCriteria elements before storage", async () => {
+  const { guard, tools } = await makeTools();
+  await tools.goal_contract.execute(
+    {
+      title: "Do the thing",
+      original: "do the thing well",
+      acceptanceCriteria: ["login works", { malformed: "object" }, "", 42, ["nested"]],
+    },
+    { sessionID: "s" },
+  );
+  const st = guard.store.stateFor("s");
+  // The object and nested-array elements are dropped; the number is stringified; the
+  // empty string is removed. NOTHING becomes "[object Object]".
+  assert.deepEqual(st.contract.acceptanceCriteria, ["login works", "42"]);
+  for (const c of st.contract.acceptanceCriteria) {
+    assert.equal(typeof c, "string");
+    assert.notEqual(c, "[object Object]");
+  }
+});
+
+test("#151 the recorded-criteria count in the result reflects the cleaned list", async () => {
+  const { tools } = await makeTools();
+  const res = await tools.goal_contract.execute(
+    { title: "T", original: "build it", acceptanceCriteria: ["only this one", { x: 1 }, null] },
+    { sessionID: "s" },
+  );
+  assert.match(res.title, /1 acceptance criteria/);
+});
+
+test("#151 requirements / inferred / nonGoals are sanitized too", async () => {
+  const { guard, tools } = await makeTools();
+  await tools.goal_contract.execute(
+    {
+      title: "T",
+      original: "build it",
+      requirements: ["must log in", { bad: 1 }],
+      inferred: [null, "should be fast"],
+      nonGoals: [["x"], "no UI changes"],
+      acceptanceCriteria: ["works"],
+    },
+    { sessionID: "s" },
+  );
+  const c = guard.store.stateFor("s").contract;
+  assert.deepEqual(c.requirements, ["must log in"]);
+  assert.deepEqual(c.inferred, ["should be fast"]);
+  assert.deepEqual(c.nonGoals, ["no UI changes"]);
+});
+
+// ---------------------------------------------------------------------------
+// goalSimilarity — exported helper used to decide same-goal vs new-goal.
+// ---------------------------------------------------------------------------
+
+test("goalSimilarity: identical content scores 1, disjoint scores 0, undefined → -1", () => {
+  assert.equal(goalSimilarity("rate limiter login", "rate limiter login"), 1);
+  assert.equal(goalSimilarity("rate limiter login endpoint", "completely different banana topic"), 0);
+  assert.equal(goalSimilarity("", "anything"), -1, "no content tokens on one side → -1");
+});
+
+test("goalSimilarity: a faithful restatement scores above the same-goal threshold", () => {
+  const a = "Add a configurable token-bucket rate limiter to the login endpoint";
+  const b = "Add a rate limiter to login";
+  assert.ok(goalSimilarity(a, b) >= SAME_GOAL_THRESHOLD, "restatement should be treated as the SAME goal");
+});
+
+// ---------------------------------------------------------------------------
+// Tool gating sanity (session + goal-mode requirements).
+// ---------------------------------------------------------------------------
+
+test("goal_* tools refuse when the host supplies no session id", async () => {
+  const { tools } = await makeTools();
+  const res = await tools.goal_status.execute({}, {});
+  assert.equal(res.metadata.reason, "no_session");
+});
+
+test("goal_reset requires confirm=true", async () => {
+  const { tools } = await makeTools();
+  const res = await tools.goal_reset.execute({ confirm: false }, { sessionID: "s" });
+  assert.match(res.title, /not confirmed/i);
+});
diff --git a/tests/verdicts.test.mjs b/tests/verdicts.test.mjs
index fd7e919..d27f28a 100644
--- a/tests/verdicts.test.mjs
+++ b/tests/verdicts.test.mjs
@@ -59,6 +59,17 @@ test("parseVerdict is case-insensitive on the keyword", () => {
   assert.equal(parseVerdict("verdict: pass"), "PASS");
 });
 
+test("#375 parseVerdict is case-insensitive last-wins across mixed-case verdicts in one text", () => {
+  // Two verdicts in the same body, both in non-canonical case: the LAST one wins
+  // and the result is normalized to upper-case regardless of input casing.
+  assert.equal(parseVerdict("VERDICT: pass\nVERDICT: FAIL"), "FAIL");
+  assert.equal(parseVerdict("verdict: FAIL\nVerdict: Pass"), "PASS");
+  assert.equal(parseVerdict("Verdict: PASS\nSome text\nverdict: fail"), "FAIL");
+  // A lower-case earlier PASS must not beat a later upper-case FAIL, and vice-versa.
+  assert.equal(parseVerdict("Verdict: pass — looks good\nVERDICT: FAIL — blocking"), "FAIL");
+  assert.equal(parseVerdict("VERDICT: FAIL initial\nverdict: pass after fix"), "PASS");
+});
+
 test("textOf unwraps the task_result envelope", () => {
   const wrapped = '<task id="abc" state="completed"><task_result>Looks good. Verdict: PASS</task_result></task>';
   assert.match(textOf({ output: wrapped }), /Verdict: PASS/);

From 4b68d7bdce8ccf64afcc3ca535aebcc2af1e3c46 Mon Sep 17 00:00:00 2001
From: Devin Oldenburg <devin@logicplanes.com>
Date: Sun, 21 Jun 2026 13:35:11 +0200
Subject: [PATCH 2/2] test(logger): fully escape GUARD_PREFIX when building the
 RegExp

CodeQL js/incomplete-sanitization: the assertion escaped only [ and ] before
new RegExp(); escape all regex metacharacters so the anchored match is correct
regardless of GUARD_PREFIX content.
---
 tests/logger.test.mjs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/logger.test.mjs b/tests/logger.test.mjs
index 8bdc46b..884ee43 100644
--- a/tests/logger.test.mjs
+++ b/tests/logger.test.mjs
@@ -69,7 +69,7 @@ test("#262 buildGuardPromptBody always pins the canonical primary agent", () =>
 test("#262 buildGuardPromptBody stringifies a non-string directive text", () => {
   const body = buildGuardPromptBody(12345, null);
   assert.ok(body.system.includes("12345"));
-  assert.match(body.system, new RegExp(`^${GUARD_PREFIX.replace(/[[\]]/g, "\\$&")}`));
+  assert.match(body.system, new RegExp(`^${GUARD_PREFIX.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`));
 });
 
 test("#262 isSyntheticUserTurn: mixed real+synthetic text is NOT a guard continuation", () => {