From 8e14b13cba074db9756af47fa82c8891ffa9861e Mon Sep 17 00:00:00 2001 From: Devin Oldenburg Date: Sun, 21 Jun 2026 13:31:50 +0200 Subject: [PATCH 1/2] fix(core): state perms, contract validation, cleanup + unit-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - #72: write guard state files with mode 0600 (owner-only); they hold goal text/contract/evidence and were landing at the default umask (world-readable). - #151: validate/coerce goal_contract string-array fields (acceptanceCriteria, requirements, inferred, nonGoals) via coerceStringList so a non-string element is dropped instead of stored as "[object Object]". - #218: add persistence cleanup()/remove() to prune orphaned per-worktree state files by on-disk TTL (DEFAULT_PERSIST_TTL_MS = 7d), preserving the current worktree and fresh siblings; sweep stray .tmp files; ignore unrelated files. - #244: prefix every blocked-completion rewrite with an unambiguous guard sentinel ("⛔ [Goal Guard · blocked completion]") so an intervention can't be confused with a benign "I am not done yet" message; marker flip is preserved. - #262: deepen logger.js coverage (synthetic-turn edge cases, guardPrompt error path/return values, emitGoalCompleted format, best-effort info/warn/toast). - #291: add direct unit tests for completion/events/system/summary/agents. - #373: make plugin.test.mjs assert loudly with an ERR_MODULE_NOT_FOUND hint instead of a vague falsy check when the tool hook is absent. - #374: add tests/concurrency.test.mjs proving concurrent goal sessions complete independently with no cross-contamination. - #375: add case-insensitive last-wins parseVerdict coverage (no source change). - #67: clean up temp dirs in install.test.mjs and persistence.test.mjs (t.after). Note: guard.js is owned elsewhere; the persistence cleanup primitive is added and unit-tested here, to be wired into the guard dispose/flush path by that owner. --- plugins/goal-guard/completion.js | 18 ++++ plugins/goal-guard/persistence.js | 127 +++++++++++++++++++++- plugins/goal-guard/tools.js | 53 ++++++++- tests/agents-unit.test.mjs | 100 +++++++++++++++++ tests/completion.test.mjs | 149 +++++++++++++++++++++++++ tests/concurrency.test.mjs | 126 ++++++++++++++++++++++ tests/events.test.mjs | 163 ++++++++++++++++++++++++++++ tests/install.test.mjs | 63 ++++++----- tests/logger.test.mjs | 143 ++++++++++++++++++++++++ tests/persistence.test.mjs | 144 ++++++++++++++++++++++++- tests/plugin.test.mjs | 13 ++- tests/summary.test.mjs | 173 ++++++++++++++++++++++++++++++ tests/system.test.mjs | 118 ++++++++++++++++++++ tests/tools.test.mjs | 127 ++++++++++++++++++++++ tests/verdicts.test.mjs | 11 ++ 15 files changed, 1493 insertions(+), 35 deletions(-) create mode 100644 tests/agents-unit.test.mjs create mode 100644 tests/completion.test.mjs create mode 100644 tests/concurrency.test.mjs create mode 100644 tests/events.test.mjs create mode 100644 tests/summary.test.mjs create mode 100644 tests/system.test.mjs create mode 100644 tests/tools.test.mjs diff --git a/plugins/goal-guard/completion.js b/plugins/goal-guard/completion.js index e0b4244..27b9a7d 100644 --- a/plugins/goal-guard/completion.js +++ b/plugins/goal-guard/completion.js @@ -33,6 +33,19 @@ import { summarizeState } from "./summary.js"; const CYCLES_RE = /Review cycles:\s*(\d+)/gi; +/** + * Unambiguous guard sentinel prefixed to every blocked-completion rewrite (issue + * #244). The blocked MARKER itself (`Goal Not Completed` by default) is textually + * identical to a normal "I am honestly not done yet" agent sentence, so a user + * skimming the transcript cannot tell a guard intervention from benign agent + * prose. This sentinel is something ONLY the guard emits — it carries the + * bracketed `[Goal Guard]` namespace plus a stop glyph and the word "blocked" — so + * `rg "Goal Guard · blocked"` (or the bracket) finds exactly the interventions + * and never an ordinary message. It is emitted on its OWN leading line, ABOVE the + * (still-flipped) marker line, so existing marker/markdown-prefix detection is + * unaffected. */ +export const GUARD_BLOCK_SENTINEL = "⛔ [Goal Guard · blocked completion]"; + // Invisible / formatting code points that carry no glyph and must never break up // or hide the marker: zero-width space/joiners, bidi controls, word-joiner, soft // hyphen, BOM. @@ -136,6 +149,11 @@ export function evaluateCompletionClaim(state, config, text) { if (replacement === text) { replacement = `${blockedMarker}\n\n${replacement}`; } + // Prefix the unambiguous guard sentinel on its OWN leading line so the rewrite is + // visibly a guard artifact and searchable, never conflated with a normal "I am not + // done yet" agent message (issue #244). The marker line itself is left flipped in + // place below it, so existing marker/markdown-prefix detection still holds. + replacement = `${GUARD_BLOCK_SENTINEL}\n${replacement}`; replacement += `\n\nGoal Guard blocked completion: ${reason}. State: ${summary}`; return { blocked: true, reason, replacement, claimedCycles }; } diff --git a/plugins/goal-guard/persistence.js b/plugins/goal-guard/persistence.js index 93b3498..f5033d8 100644 --- a/plugins/goal-guard/persistence.js +++ b/plugins/goal-guard/persistence.js @@ -14,10 +14,22 @@ */ import { createHash } from "node:crypto"; -import { mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync } from "node:fs"; +import { mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync, readdirSync, statSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; +/** + * On-disk retention for orphaned state files (issue #218). The in-memory + * `sessionTtlMs` only governs when an in-memory record may be dropped; it does + * NOT remove the per-worktree file from disk. Without an independent on-disk TTL, + * `$XDG_STATE_HOME/opencode/goal-guard/` accumulates one `.json` file per + * worktree the user has EVER opened a goal in, holding the verbatim goal text, + * the contract, and the evidence — forever. Default is 7 days, deliberately + * longer than the 24h in-memory TTL so a long-paused goal is not pruned while a + * sibling worktree is still actively saving. + */ +export const DEFAULT_PERSIST_TTL_MS = 7 * 24 * 60 * 60 * 1000; + /** Resolve the base directory for guard state files. */ export function stateBaseDir(env = process.env) { const xdg = env.XDG_STATE_HOME && env.XDG_STATE_HOME.trim(); @@ -41,6 +53,7 @@ export function projectKey(worktree) { * @param {Record} [opts.env] * @param {(fn: () => void, ms: number) => any} [opts.setTimer] Injectable for tests. * @param {(handle: any) => void} [opts.clearTimer] + * @param {number} [opts.persistTtlMs] On-disk TTL for orphaned files (issue #218). */ export function createPersistence({ worktree, @@ -49,11 +62,13 @@ export function createPersistence({ env = process.env, setTimer = (fn, ms) => setTimeout(fn, ms), clearTimer = (h) => clearTimeout(h), + persistTtlMs = DEFAULT_PERSIST_TTL_MS, fs: fsImpl, } = {}) { - // Injectable fs (tests exercise the EXDEV path); defaults to node:fs. + // Injectable fs (tests exercise the EXDEV/cleanup paths); defaults to node:fs. const fs = { mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync, + readdirSync, statSync, ...(fsImpl || {}), }; const dir = stateBaseDir(env); @@ -79,7 +94,13 @@ export function createPersistence({ if (!enabled || degraded) return false; try { fs.mkdirSync(dir, { recursive: true }); - fs.writeFileSync(tmp, JSON.stringify(data), "utf8"); + // mode 0600 (owner read/write only): the snapshot holds the verbatim goal + // text, the contract, the changed-file paths, the verification evidence and + // the reviewer findings — all of which would otherwise land at the default + // umask (typically 0644, world-readable) and leak to any local user on a + // shared/multi-user host (issue #72). The mode is set on the temp file BEFORE + // the rename so the target is never momentarily world-readable. + fs.writeFileSync(tmp, JSON.stringify(data), { encoding: "utf8", mode: 0o600 }); try { fs.renameSync(tmp, file); } catch (err) { @@ -141,11 +162,111 @@ export function createPersistence({ return writeNow(fn()); // true on success, false on a real write failure } + /** + * Prune orphaned state files (issue #218). + * + * The store is keyed one file per worktree, so the directory grows unbounded: + * one `.json` per worktree the user has ever opened a goal in, never + * removed. This sweep deletes any OTHER worktree's file (never the current + * one) when it is stale, where "stale" means EITHER: + * - its mtime is older than `persistTtlMs` (default 7 days), OR + * - its embedded `updatedAt` is older than `persistTtlMs`. + * The current worktree's file is always preserved so an in-flight goal is never + * dropped out from under a live session. Also removes the leftover `.tmp` files. + * + * All disk access is best-effort: a read-only/sandboxed FS, a missing dir, or an + * unreadable entry degrades to a no-op rather than throwing into a hook. + * + * @param {object} [opts] + * @param {number} [opts.now] Injectable wall clock (ms) for tests. + * @returns {{ removed: string[], scanned: number }} + */ + function cleanup({ now = Date.now() } = {}) { + const removed = []; + if (!enabled || degraded) return { removed, scanned: 0 }; + let entries; + try { + entries = fs.readdirSync(dir); + } catch { + return { removed, scanned: 0 }; // dir does not exist yet → nothing to prune + } + const cutoff = now - persistTtlMs; + const currentName = `${projectKey(worktree)}.json`; + let scanned = 0; + for (const name of entries) { + // Only touch our own JSON snapshots (and their temp siblings); never any + // unrelated file a user may have placed in the dir. + const isTmp = name.endsWith(".json.tmp"); + const isJson = name.endsWith(".json"); + if (!isTmp && !isJson) continue; + // Never prune the file backing the CURRENT worktree — it may be actively saving. + if (name === currentName) continue; + scanned += 1; + const full = join(dir, name); + // A stray temp file is always removable (an interrupted write left it behind). + if (isTmp) { + try { + fs.rmSync(full, { force: true }); + removed.push(full); + } catch { + /* ignore */ + } + continue; + } + let stale = false; + try { + const st = fs.statSync(full); + const mtime = st.mtimeMs !== undefined ? st.mtimeMs : (st.mtime ? new Date(st.mtime).getTime() : 0); + if (mtime < cutoff) stale = true; + if (!stale) { + // mtime is recent, but the embedded updatedAt may still be ancient (the + // file was touched without a logical update). Parse and check it too. + try { + const parsed = JSON.parse(fs.readFileSync(full, "utf8")); + const ts = parsed && parsed.updatedAt ? new Date(parsed.updatedAt).getTime() : NaN; + if (Number.isFinite(ts) && ts < cutoff) stale = true; + } catch { + /* unparseable → leave the mtime decision stand */ + } + } + } catch { + continue; // unreadable → skip rather than risk deleting something live + } + if (!stale) continue; + try { + fs.rmSync(full, { force: true }); + removed.push(full); + } catch { + /* ignore */ + } + } + return { removed, scanned }; + } + + /** Remove the current worktree's state file (and its temp sibling). Used on a + * full goal_reset / dispose when the caller wants the on-disk snapshot gone too. */ + function remove() { + if (!enabled) return false; + let ok = false; + for (const target of [file, tmp]) { + try { + fs.rmSync(target, { force: true }); + ok = true; + } catch { + /* ignore */ + } + } + return ok; + } + return { file, + dir, load, save, flush, + cleanup, + remove, isDegraded: () => degraded, }; } diff --git a/plugins/goal-guard/tools.js b/plugins/goal-guard/tools.js index 9a5cf9b..15c5969 100644 --- a/plugins/goal-guard/tools.js +++ b/plugins/goal-guard/tools.js @@ -57,6 +57,34 @@ export function goalSimilarity(a, b) { // same goal that PRESERVES accrued review progress. export const SAME_GOAL_THRESHOLD = 0.4; +/** + * Coerce an array of contract strings (acceptance criteria, requirements, …) into + * a clean list of non-empty strings (issue #151). + * + * The tool schema declares these as `array(string())`, but a model can still pass + * a malformed element — an object, a number, null — which previously flowed + * straight into storage. `String({foo:'bar'})` is the literal "[object Object]", + * which then rendered as a garbled todo row in the sidebar and as a meaningless + * acceptance criterion in the contract. Stringify the elements that have a sensible + * string form (string/number/boolean), and DROP the ones that do not (objects, + * arrays, null/undefined) rather than persisting "[object Object]". Each survivor + * is whitespace-collapsed and empties are removed. + */ +export function coerceStringList(value) { + if (!Array.isArray(value)) return []; + const out = []; + for (const item of value) { + let str; + if (typeof item === "string") str = item; + else if (typeof item === "number" && Number.isFinite(item)) str = String(item); + else if (typeof item === "boolean") str = String(item); + else continue; // object/array/null/undefined/NaN → has no meaningful string form + const trimmed = str.replace(/\s+/g, " ").trim(); + if (trimmed) out.push(trimmed); + } + return out; +} + /** * @param {object} deps * @param {ReturnType} deps.store @@ -193,6 +221,16 @@ export function createGoalTools({ store, config, persist, onReset }) { const state = store.stateFor(key); if (!requireGoalMode(state)) return goalModeOnlyResult(); state.active = true; + // Validate/coerce the string-array fields up front (issue #151): a model can + // pass a non-string element (object/number/null) that the schema does not + // catch, which otherwise stored "[object Object]" as an acceptance criterion + // and rendered a garbled sidebar todo. coerceStringList drops the unstringifiable + // ones and normalizes the rest. Use the cleaned lists for BOTH the similarity + // corpus and the stored contract so the two never disagree. + const cleanRequirements = coerceStringList(args.requirements); + const cleanInferred = coerceStringList(args.inferred); + const cleanNonGoals = coerceStringList(args.nonGoals); + const cleanAcceptanceCriteria = coerceStringList(args.acceptanceCriteria); // A genuinely NEW goal in this session (a different verbatim request) must // not inherit the previous goal's gates, verdicts, dirty flags, review // cycles, or accumulated goal text — otherwise the TUI sidebar keeps showing @@ -222,7 +260,12 @@ export function createGoalTools({ store, config, persist, onReset }) { [c?.original, c?.title, ...(c?.requirements || []), ...(c?.acceptanceCriteria || [])] .filter(Boolean) .join(" "); - const incoming = contractCorpus(args); + const incoming = contractCorpus({ + original: args.original, + title: args.title, + requirements: cleanRequirements, + acceptanceCriteria: cleanAcceptanceCriteria, + }); const existing = contractCorpus(state.contract); const sim = goalSimilarity(incoming, existing); const isNewGoal = sim < 0 ? true : sim < SAME_GOAL_THRESHOLD; @@ -234,10 +277,10 @@ export function createGoalTools({ store, config, persist, onReset }) { state.contract = { title: String(args.title || "").replace(/\s+/g, " ").trim(), original: String(args.original || ""), - requirements: args.requirements || [], - inferred: args.inferred || [], - nonGoals: args.nonGoals || [], - acceptanceCriteria: args.acceptanceCriteria || [], + requirements: cleanRequirements, + inferred: cleanInferred, + nonGoals: cleanNonGoals, + acceptanceCriteria: cleanAcceptanceCriteria, at: store.nowIso(), }; // Append the contract's original to the rolling goal text used for diff --git a/tests/agents-unit.test.mjs b/tests/agents-unit.test.mjs new file mode 100644 index 0000000..c0bf71b --- /dev/null +++ b/tests/agents-unit.test.mjs @@ -0,0 +1,100 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { + isReviewAgent, + isGoalAgent, + isPrimaryAgent, + goalSessionActiveForAgent, + prettyAgentName, + PRIMARY_AGENT, + REVIEW_AGENTS, + GOAL_AGENTS, + CYCLE_CLOSING_AGENT, + BASE_GATES, +} from "../plugins/goal-guard/agents.js"; +import { createState } from "../plugins/goal-guard/state.js"; + +// Direct unit coverage for agents.js classifiers (issue #291). Named +// agents-unit.test.mjs because tests/agents.test.mjs validates the agent .md files. + +test("#291 isReviewAgent: true for review gates, false for workers/unknown/empty", () => { + assert.equal(isReviewAgent("goal-reviewer"), true); + assert.equal(isReviewAgent("goal-final-auditor"), true); + assert.equal(isReviewAgent("goal-implementer"), false, "a worker is not a reviewer"); + assert.equal(isReviewAgent("goal"), false, "the primary is not a reviewer"); + assert.equal(isReviewAgent("totally-unknown"), false); + assert.equal(isReviewAgent(""), false); + assert.equal(isReviewAgent(null), false); + assert.equal(isReviewAgent(undefined), false); +}); + +test("#291 isGoalAgent: true for primary/workers/reviewers, false for build/plan/unknown", () => { + assert.equal(isGoalAgent("goal"), true); + assert.equal(isGoalAgent("goal-explorer"), true); + assert.equal(isGoalAgent("goal-security-reviewer"), true); + assert.equal(isGoalAgent("build"), false); + assert.equal(isGoalAgent("plan"), false); + assert.equal(isGoalAgent("unknown"), false); + assert.equal(isGoalAgent(null), false); +}); + +test("#291 isPrimaryAgent: only the exact `goal` agent is primary", () => { + assert.equal(isPrimaryAgent("goal"), true); + assert.equal(isPrimaryAgent(PRIMARY_AGENT), true); + assert.equal(isPrimaryAgent("goal-implementer"), false); + assert.equal(isPrimaryAgent("goal-reviewer"), false); + assert.equal(isPrimaryAgent(""), false); + assert.equal(isPrimaryAgent(null), false); +}); + +test("#291 every BASE_GATE and CYCLE_CLOSING_AGENT is a known review agent", () => { + for (const gate of BASE_GATES) assert.equal(isReviewAgent(gate), true, `${gate} must be a review agent`); + assert.equal(isReviewAgent(CYCLE_CLOSING_AGENT), true); + // Sanity: the canonical lists are internally consistent. + for (const r of REVIEW_AGENTS) assert.ok(GOAL_AGENTS.includes(r), `${r} should be in GOAL_AGENTS`); +}); + +test("#291 goalSessionActiveForAgent: the primary agent always activates, even with null state", () => { + assert.equal(goalSessionActiveForAgent("goal", null), true); + assert.equal(goalSessionActiveForAgent("goal", undefined), true); +}); + +test("#291 goalSessionActiveForAgent: non-goal agents never activate, regardless of state", () => { + const st = createState(); + st.active = true; + st.contract = { title: "x" }; + assert.equal(goalSessionActiveForAgent("build", st), false); + assert.equal(goalSessionActiveForAgent("plan", st), false); + assert.equal(goalSessionActiveForAgent("unknown", st), false); +}); + +test("#291 goalSessionActiveForAgent: a goal worker keeps an ANCHORED session active, not a blank one", () => { + const blank = createState(); + assert.equal(goalSessionActiveForAgent("goal-implementer", blank), false, "no anchored work → not active"); + assert.equal(goalSessionActiveForAgent("goal-implementer", null), false, "null state → not active"); + + const withContract = createState(); + withContract.contract = { title: "t" }; + assert.equal(goalSessionActiveForAgent("goal-implementer", withContract), true); + + const withEdit = createState(); + withEdit.lastEditSeq = 5; + assert.equal(goalSessionActiveForAgent("goal-explorer", withEdit), true); + + const withEvidence = createState(); + withEvidence.evidence = [{ command: "x" }]; + assert.equal(goalSessionActiveForAgent("goal-verifier", withEvidence), true); + + const withFiles = createState(); + withFiles.changedFiles = ["a.js"]; + assert.equal(goalSessionActiveForAgent("goal-architect", withFiles), true); +}); + +test("#291 prettyAgentName: drops the goal- prefix, title-cases, keeps acronyms upper", () => { + assert.equal(prettyAgentName("goal-security-reviewer"), "Security Reviewer"); + assert.equal(prettyAgentName("goal-api-reviewer"), "API Reviewer"); + assert.equal(prettyAgentName("goal-ux-reviewer"), "UX Reviewer"); + assert.equal(prettyAgentName("goal-final-auditor"), "Final Auditor"); + assert.equal(prettyAgentName(""), ""); + assert.equal(prettyAgentName(null), ""); +}); diff --git a/tests/completion.test.mjs b/tests/completion.test.mjs new file mode 100644 index 0000000..541b5f3 --- /dev/null +++ b/tests/completion.test.mjs @@ -0,0 +1,149 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { evaluateCompletionClaim, GUARD_BLOCK_SENTINEL } from "../plugins/goal-guard/completion.js"; +import { createStore, createState } from "../plugins/goal-guard/state.js"; +import { recordVerdict } from "../plugins/goal-guard/verdicts.js"; +import { markEdit } from "../plugins/goal-guard/events.js"; +import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js"; +import { BASE_GATES } from "../plugins/goal-guard/agents.js"; + +// Direct unit coverage for completion.js (issue #291) — previously only exercised +// through the experimental.text.complete hook. + +// A config with NO contextual gates so the required set is exactly BASE_GATES. +const CFG = { ...DEFAULT_CONFIG, contextualGates: false }; + +/** Build an active goal state whose base gates have all PASSED with fresh seqs and + * the requested number of recorded review cycles, so completion is ALLOWED. */ +function allowedState(cycles = 1) { + const store = createStore(); + const st = createState(); + st.active = true; + st.goalText = "do a generic thing"; + // Pass every base gate; the final-auditor pass is what increments reviewCycles. + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + // Drive reviewCycles up to the requested count with intervening edits. + while (st.reviewCycles < cycles) { + markEdit(store, st, "more work"); + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + } + st.dirty = false; + return { store, st }; +} + +test("#291 evaluateCompletionClaim: empty/no text is not blocked", () => { + assert.deepEqual(evaluateCompletionClaim(createState(), CFG, ""), { blocked: false }); + assert.deepEqual(evaluateCompletionClaim(createState(), CFG, null), { blocked: false }); +}); + +test("#291 a marker claim in an INACTIVE session is never policed", () => { + const st = createState(); // active=false + const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 1"); + assert.equal(res.blocked, false); +}); + +test("#291 a mid-sentence MENTION of the marker is not a claim", () => { + const st = createState(); + st.active = true; + const res = evaluateCompletionClaim(st, CFG, "I have not Goal Completed this yet."); + assert.equal(res.blocked, false); +}); + +test("#291 active claim with a MISSING Review cycles line is blocked with the right reason", () => { + const st = createState(); + st.active = true; + const res = evaluateCompletionClaim(st, CFG, "Goal Completed"); + assert.equal(res.blocked, true); + assert.match(res.reason, /missing required Review cycles line/); +}); + +test("#291 active claim with 'Review cycles: 0' and no recorded cycles is blocked", () => { + const st = createState(); + st.active = true; + const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 0"); + assert.equal(res.blocked, true); + assert.match(res.reason, /no review cycles recorded/); + assert.equal(res.claimedCycles, 0); +}); + +test("#291 claimed cycle count must match the recorded count", () => { + const { st } = allowedState(1); + const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 7"); + assert.equal(res.blocked, true); + assert.match(res.reason, /claimed review cycles \(7\) do not match recorded review cycles \(1\)/); +}); + +test("#291 the LAST 'Review cycles: N' line is the claim (earlier mentions ignored)", () => { + const { st } = allowedState(2); + // An earlier recap line cites a wrong number; the conclusion line is correct. + const text = "Earlier I noted Review cycles: 1 (a recap).\n\nGoal Completed\n\nReview cycles: 2"; + const res = evaluateCompletionClaim(st, CFG, text); + assert.equal(res.blocked, false, "the trailing conclusive count matches, so completion is allowed"); + assert.equal(res.claimedCycles, 2); +}); + +test("#291 a fully-satisfied active claim is ALLOWED (not blocked)", () => { + const { st } = allowedState(1); + const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 1"); + assert.equal(res.blocked, false); + assert.equal(res.claimedCycles, 1); +}); + +test("#291 a blocked claim with missing gates names the missing gate(s)", () => { + const st = createState(); + st.active = true; + st.reviewCycles = 1; // pretend a cycle ran but gates are not fresh/passing + const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 1"); + assert.equal(res.blocked, true); + assert.match(res.reason, /required review gates are missing or stale/); +}); + +// --- issue #244: the rewrite is an unambiguous, searchable guard artifact --- + +test("#244/#291 a blocked rewrite carries the unambiguous guard sentinel on its own leading line", () => { + const st = createState(); + st.active = true; + const res = evaluateCompletionClaim(st, CFG, "Goal Completed\n\nReview cycles: 0"); + assert.equal(res.blocked, true); + // The sentinel is something only the guard emits — distinct from any normal + // "I am not done yet" agent prose, and searchable. + assert.ok(res.replacement.startsWith(GUARD_BLOCK_SENTINEL), "sentinel must lead the rewrite"); + assert.match(res.replacement, /\[Goal Guard · blocked completion\]/); + // The marker line is still flipped (existing detection contract preserved). + assert.match(res.replacement, /Goal Not Completed/); + assert.match(res.replacement, /Goal Guard blocked completion: /); +}); + +test("#244 the sentinel makes a guard rewrite distinguishable from a benign 'Goal Not Completed' message", () => { + // A normal agent message can literally contain "Goal Not Completed". The guard's + // rewrite must be set apart by a token that benign prose would never produce. + const benign = "I made a start but Goal Not Completed — I will continue next turn."; + assert.doesNotMatch(benign, /\[Goal Guard · blocked completion\]/); + const st = createState(); + st.active = true; + const res = evaluateCompletionClaim(st, CFG, "Goal Completed"); + assert.match(res.replacement, /\[Goal Guard · blocked completion\]/); +}); + +test("#291 the marker-line markdown prefix is preserved when flipped", () => { + const st = createState(); + st.active = true; + const res = evaluateCompletionClaim(st, CFG, "## Goal Completed\n\nReview cycles: 0"); + assert.match(res.replacement, /## Goal Not Completed/); +}); + +test("#291 a homoglyph-smuggled marker is still blocked (normalized detection)", () => { + const st = createState(); + st.active = true; + // Cyrillic 'о' in "Gоal" — must be detected and blocked. + const res = evaluateCompletionClaim(st, CFG, "Gоal Completed\n\nReview cycles: 0"); + assert.equal(res.blocked, true); + assert.ok(res.replacement.startsWith(GUARD_BLOCK_SENTINEL)); +}); + +test("#291 a custom blockedMarker is honored in the flip", () => { + const st = createState(); + st.active = true; + const res = evaluateCompletionClaim(st, { ...CFG, blockedMarker: "NOPE NOT DONE" }, "Goal Completed\n\nReview cycles: 0"); + assert.match(res.replacement, /NOPE NOT DONE/); +}); diff --git a/tests/concurrency.test.mjs b/tests/concurrency.test.mjs new file mode 100644 index 0000000..143e4e9 --- /dev/null +++ b/tests/concurrency.test.mjs @@ -0,0 +1,126 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { createGuard } from "../plugins/goal-guard/guard.js"; + +// Issue #374: two concurrent goal sessions must complete INDEPENDENTLY — each with +// its own verdicts, gates, evidence, and review cycles — with no cross-contamination. +// Integration tests covered a single session and `file.edited dirties the in-flight +// goal`, but never proved two live sessions stay isolated through completion. + +const noopPersistence = { load: () => null, save: () => {}, flush: () => false, file: "", isDegraded: () => false }; + +function makeGuard(options = {}) { + let t = 0; + return createGuard( + { client: { app: { log: async () => undefined }, tui: { showToast: async () => undefined } } }, + options, + { persistence: noopPersistence, clock: () => (t += 1), syncIdle: true }, + ); +} + +const BASE = ["goal-prompt-auditor", "goal-reviewer", "goal-diff-reviewer", "goal-verifier", "goal-final-auditor"]; + +async function startGoal(hooks, sid, goalText) { + await hooks["chat.params"]({ sessionID: sid, agent: "goal" }, {}); + await hooks["chat.message"]({ sessionID: sid, agent: "goal" }, { parts: [{ type: "text", text: goalText }] }); +} + +async function edit(hooks, sid, callID) { + await hooks["tool.execute.after"]({ tool: "edit", sessionID: sid, callID, args: {} }, { output: "", title: "", metadata: {} }); +} + +async function passGates(hooks, sid, gates) { + for (const agent of gates) { + await hooks["tool.execute.after"]( + { tool: "task", sessionID: sid, callID: agent, args: { subagent_type: agent } }, + { output: "Verdict: PASS", title: "", metadata: {} }, + ); + } +} + +async function tryComplete(hooks, sid, cycles) { + const out = { text: `Goal Completed\n\nReview cycles: ${cycles}` }; + await hooks["experimental.text.complete"]({ sessionID: sid, messageID: "m", partID: "p" }, out); + return out.text; +} + +test("#374 two concurrent goal sessions complete independently without interference", async () => { + const { hooks, store } = makeGuard({ contextualGates: false }); + const A = "sessA"; + const B = "sessB"; + + // Both goals start and both implement (interleaved, simulating concurrency). + await startGoal(hooks, A, "Refactor the billing module"); + await startGoal(hooks, B, "Improve the search ranking"); + await edit(hooks, A, "a1"); + await edit(hooks, B, "b1"); + + // Drive A all the way to a clean review; leave B unreviewed. + await passGates(hooks, A, BASE); + + // A can complete; B is still blocked — proving no leakage of A's passes onto B. + assert.doesNotMatch(await tryComplete(hooks, A, 1), /Goal Not Completed/, "A is fully reviewed → allowed"); + assert.match(await tryComplete(hooks, B, 1), /Goal Not Completed/, "B has no passing gates → blocked"); + + // Each session carries ITS OWN verdicts / cycle count. + const sa = store.stateFor(A); + const sb = store.stateFor(B); + assert.equal(sa.reviewCycles, 1, "A closed one cycle"); + assert.equal(sb.reviewCycles, 0, "B closed no cycle"); + assert.ok(sa.latestVerdict["goal-final-auditor"], "A has an auditor verdict"); + assert.equal(sb.latestVerdict["goal-final-auditor"], undefined, "B has no auditor verdict"); + + // Now bring B to completion; A must remain complete and unaffected. + await passGates(hooks, B, BASE); + assert.doesNotMatch(await tryComplete(hooks, B, 1), /Goal Not Completed/, "B now completes on its own gates"); + assert.doesNotMatch(await tryComplete(hooks, A, 1), /Goal Not Completed/, "A is still complete"); + assert.equal(store.stateFor(A).reviewCycles, 1, "A's cycle count is untouched by B"); + assert.equal(store.stateFor(B).reviewCycles, 1, "B's own cycle count"); +}); + +test("#374 an edit in one session does not dirty or re-block the other", async () => { + const { hooks, store } = makeGuard({ contextualGates: false }); + const A = "isolA"; + const B = "isolB"; + await startGoal(hooks, A, "Add export to CSV"); + await startGoal(hooks, B, "Add import from CSV"); + await edit(hooks, A, "a1"); + await edit(hooks, B, "b1"); + await passGates(hooks, A, BASE); + await passGates(hooks, B, BASE); + + // A makes a NEW edit (re-opening A's gates). B must stay clean and completable. + await hooks["chat.params"]({ sessionID: A, agent: "goal" }, {}); + await edit(hooks, A, "a2"); + + assert.equal(store.stateFor(A).dirty, true, "A is dirty again"); + assert.equal(store.stateFor(B).dirty, false, "B is unaffected by A's edit"); + assert.match(await tryComplete(hooks, A, 1), /Goal Not Completed/, "A re-blocked by its own fresh edit"); + assert.doesNotMatch(await tryComplete(hooks, B, 1), /Goal Not Completed/, "B still completes independently"); +}); + +test("#374 different contextual goals require DIFFERENT gates, scoped per session", async () => { + const { hooks, store } = makeGuard(); // contextual gates ON + const SEC = "secSess"; + const DOC = "docSess"; + // A security goal pulls in goal-security-reviewer; a docs goal pulls in goal-doc-reviewer. + await startGoal(hooks, SEC, "Harden authentication and fix the auth token vulnerability"); + await startGoal(hooks, DOC, "Update the README and project documentation"); + await edit(hooks, SEC, "s1"); + await edit(hooks, DOC, "d1"); + + // Pass only the base gates for both. + await passGates(hooks, SEC, BASE); + await passGates(hooks, DOC, BASE); + + // Each is still blocked on its OWN contextual gate, not the other's. + const secOut = await tryComplete(hooks, SEC, 1); + const docOut = await tryComplete(hooks, DOC, 1); + assert.match(secOut, /goal-security-reviewer/, "security session names its security gate"); + assert.match(docOut, /goal-doc-reviewer/, "docs session names its doc gate"); + // Cross-check: the security session is NOT blocked on the doc reviewer and vice-versa. + assert.doesNotMatch(secOut, /goal-doc-reviewer/); + assert.doesNotMatch(docOut, /goal-security-reviewer/); + + void store; +}); diff --git a/tests/events.test.mjs b/tests/events.test.mjs new file mode 100644 index 0000000..ef4c3ea --- /dev/null +++ b/tests/events.test.mjs @@ -0,0 +1,163 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { + seededTitle, + maybeAutoSeedContract, + markEdit, + markVerification, + markFileChanged, + recordEvidence, + maybeClearDirtyOnFinalPass, +} from "../plugins/goal-guard/events.js"; +import { createStore, createState } from "../plugins/goal-guard/state.js"; +import { recordVerdict } from "../plugins/goal-guard/verdicts.js"; +import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js"; +import { BASE_GATES, CYCLE_CLOSING_AGENT } from "../plugins/goal-guard/agents.js"; + +// Direct unit coverage for events.js — the state-mutation layer (issue #291). + +const CFG = { ...DEFAULT_CONFIG, contextualGates: false }; + +test("#291 seededTitle: short goals pass through, trailing punctuation stripped", () => { + assert.equal(seededTitle("Add a rate limiter"), "Add a rate limiter"); + assert.equal(seededTitle("Fix the bug."), "Fix the bug"); + assert.equal(seededTitle(""), ""); + assert.equal(seededTitle(null), ""); +}); + +test("#291 seededTitle: long goals are clipped near 8 words, not mid-connective", () => { + const t = seededTitle("Add a configurable token bucket rate limiter to the login endpoint for safety"); + const words = t.split(" "); + assert.ok(words.length <= 8, `expected <=8 words, got ${words.length}: ${t}`); + // It must not dangle on a trailing connective like "to"/"for". + assert.doesNotMatch(t, /\b(to|for|with|and|of|in|on)$/i); +}); + +test("#291 maybeAutoSeedContract: no-op without an active session, goal text, or when a contract exists", () => { + const store = createStore(); + assert.equal(maybeAutoSeedContract(store, null), false); + const inactive = createState(); + inactive.goalText = "do a thing"; + assert.equal(maybeAutoSeedContract(store, inactive), false, "inactive → no seed"); + const noText = createState(); + noText.active = true; + assert.equal(maybeAutoSeedContract(store, noText), false, "no goal text → no seed"); + const hasContract = createState(); + hasContract.active = true; + hasContract.goalText = "x"; + hasContract.contract = { title: "t" }; + assert.equal(maybeAutoSeedContract(store, hasContract), false, "existing contract → no seed"); +}); + +test("#291 maybeAutoSeedContract: seeds a minimal auto contract with empty acceptance criteria", () => { + const store = createStore(); + const st = createState(); + st.active = true; + st.goalText = " Add rate limiting to the login endpoint "; + assert.equal(maybeAutoSeedContract(store, st), true); + assert.equal(st.contract.auto, true); + assert.deepEqual(st.contract.acceptanceCriteria, []); + assert.ok(st.contract.title.length > 0); + assert.match(st.contract.original, /Add rate limiting to the login endpoint/); +}); + +test("#291 markEdit: activates, dirties, advances seq, and bounds dirtyReasons", () => { + const store = createStore(); + const st = createState(); + markEdit(store, st, "first"); + assert.equal(st.active, true); + assert.equal(st.dirty, true); + assert.ok(st.lastEditSeq > 0); + const firstSeq = st.lastEditSeq; + markEdit(store, st, "second"); + assert.ok(st.lastEditSeq > firstSeq, "seq is monotonic"); + for (let i = 0; i < 80; i += 1) markEdit(store, st, `r${i}`); + assert.ok(st.dirtyReasons.length <= 50, "dirtyReasons is bounded to 50"); +}); + +test("#291 markEdit: a missing reason falls back to a timestamped default", () => { + const store = createStore(); + const st = createState(); + markEdit(store, st); + assert.match(st.dirtyReasons[0], /^edit at /); +}); + +test("#291 markVerification: sets verificationSeen and returns the stamped seq", () => { + const store = createStore(); + const st = createState(); + const seq = markVerification(store, st); + assert.equal(st.verificationSeen, true); + assert.equal(st.lastVerificationSeq, seq); + assert.ok(seq > 0); +}); + +test("#291 markFileChanged: dedupes files, bounds the list, and marks an edit", () => { + const store = createStore(); + const st = createState(); + markFileChanged(store, st, "a.js"); + markFileChanged(store, st, "a.js"); // duplicate must not be added twice + markFileChanged(store, st, "b.js"); + assert.deepEqual(st.changedFiles, ["a.js", "b.js"]); + assert.equal(st.dirty, true); + // An empty/whitespace name is ignored. + markFileChanged(store, st, " "); + assert.equal(st.changedFiles.length, 2); + for (let i = 0; i < 250; i += 1) markFileChanged(store, st, `f${i}.js`); + assert.ok(st.changedFiles.length <= 200, "changedFiles is bounded to 200"); +}); + +test("#291 recordEvidence: pushes an entry, caps criteria to 50, and marks verification", () => { + const store = createStore(); + const st = createState(); + const manyCriteria = Array.from({ length: 80 }, (_, i) => `c${i}`); + recordEvidence(store, st, "npm test", "all green", manyCriteria); + assert.equal(st.evidence.length, 1); + const entry = st.evidence[0]; + assert.equal(entry.command, "npm test"); + assert.equal(entry.result, "all green"); + assert.equal(entry.criteria.length, 50, "criteria truncated to 50"); + assert.ok(entry.seq > 0, "evidence is stamped with the verification seq"); + assert.equal(st.verificationSeen, true); +}); + +test("#291 recordEvidence: bounds the evidence log to 100 entries", () => { + const store = createStore(); + const st = createState(); + for (let i = 0; i < 130; i += 1) recordEvidence(store, st, `cmd${i}`, "ok", []); + assert.ok(st.evidence.length <= 100); +}); + +test("#291 recordEvidence: non-array criteria become an empty list", () => { + const store = createStore(); + const st = createState(); + recordEvidence(store, st, "cmd", "ok", "not-an-array"); + assert.deepEqual(st.evidence[0].criteria, []); +}); + +test("#291 maybeClearDirtyOnFinalPass: clears dirty only when the auditor passed fresh AND completion is allowed", () => { + const store = createStore(); + const st = createState(); + st.active = true; + st.dirty = true; + // No fresh auditor pass yet → no clear. + assert.equal(maybeClearDirtyOnFinalPass(st, CFG), false); + assert.equal(st.dirty, true); + + // Pass all base gates fresh (final auditor closes the cycle). + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + assert.equal(maybeClearDirtyOnFinalPass(st, CFG), true); + assert.equal(st.dirty, false); + assert.deepEqual(st.dirtyReasons, []); +}); + +test("#291 maybeClearDirtyOnFinalPass: a stale auditor pass (edit after pass) does not clear dirty", () => { + const store = createStore(); + const st = createState(); + st.active = true; + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + // A new edit AFTER the passes makes the auditor pass stale. + markEdit(store, st, "more changes after review"); + assert.equal(maybeClearDirtyOnFinalPass(st, CFG), false); + assert.equal(st.dirty, true); + void CYCLE_CLOSING_AGENT; +}); diff --git a/tests/install.test.mjs b/tests/install.test.mjs index a2b6646..799abe5 100644 --- a/tests/install.test.mjs +++ b/tests/install.test.mjs @@ -11,6 +11,21 @@ import { readRepo } from "./helpers.mjs"; const repoRoot = fileURLToPath(new URL("..", import.meta.url)); const installer = join(repoRoot, "scripts", "install.mjs"); +// Track every temp dir created via mkTemp so the whole suite cleans up after +// itself (issue #67: ~9 mkdtempSync calls × 2 files each were leaked into /tmp +// on every run, accumulating over time and risking CI disk exhaustion). +const TEMP_DIRS = []; +function mkTemp(prefix) { + const dir = mkdtempSync(join(tmpdir(), prefix)); + TEMP_DIRS.push(dir); + return dir; +} +test.after(() => { + for (const dir of TEMP_DIRS.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } +}); + function run(args, opts = {}) { return execFileSync("node", [installer, ...args], { stdio: "pipe", encoding: "utf8", ...opts }); } @@ -47,7 +62,7 @@ test("gitignore excludes secrets, dependencies, and local runtime data", () => { }); test("installer copies components, including the nested plugin module directory", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-")); + const temp = mkTemp("goal-install-"); run(["--target", join(temp, "cfg")]); const cfg = join(temp, "cfg"); assert.equal(existsSync(join(cfg, "agents", "goal.md")), true); @@ -67,7 +82,7 @@ test("installer help shows the public one-command install flow", () => { }); test("installed plugin actually loads from its target location", async () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-load-")); + const temp = mkTemp("goal-install-load-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); const mod = await import(join(cfg, "plugins", "goal-guard.js")); @@ -78,13 +93,13 @@ test("installed plugin actually loads from its target location", async () => { }); test("installer dry run does not write files", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-dry-")); + const temp = mkTemp("goal-install-dry-"); run(["--target", join(temp, "cfg"), "--dry-run"]); assert.equal(existsSync(join(temp, "cfg")), false); }); test("installer is idempotent (second run reports all unchanged)", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-idem-")); + const temp = mkTemp("goal-install-idem-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); const out = run(["--target", cfg]); @@ -92,7 +107,7 @@ test("installer is idempotent (second run reports all unchanged)", () => { }); test("installer refuses to overwrite a locally-modified file", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-conflict-")); + const temp = mkTemp("goal-install-conflict-"); const cfg = join(temp, "cfg"); mkdirSync(join(cfg, "agents"), { recursive: true }); writeFileSync(join(cfg, "agents", "goal.md"), "local change\n"); @@ -100,7 +115,7 @@ test("installer refuses to overwrite a locally-modified file", () => { }); test("installer force replaces a locally-modified file", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-force-")); + const temp = mkTemp("goal-install-force-"); const cfg = join(temp, "cfg"); mkdirSync(join(cfg, "agents"), { recursive: true }); const dest = join(cfg, "agents", "goal.md"); @@ -110,7 +125,7 @@ test("installer force replaces a locally-modified file", () => { }); test("installer can upgrade a file it owns without --force", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-upgrade-")); + const temp = mkTemp("goal-install-upgrade-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); // Simulate a previously-shipped version: dest matches a stale manifest hash. @@ -129,7 +144,7 @@ test("installer can upgrade a file it owns without --force", () => { }); test("installer supports an explicit target directory", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-install-target-")); + const temp = mkTemp("goal-install-target-"); const target = join(temp, "custom-opencode"); run(["--target", target]); assert.equal(existsSync(join(target, "agents", "goal.md")), true); @@ -137,7 +152,7 @@ test("installer supports an explicit target directory", () => { }); test("uninstall removes installed files but keeps locally-modified ones", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-uninstall-")); + const temp = mkTemp("goal-uninstall-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); const modified = join(cfg, "agents", "goal.md"); @@ -149,7 +164,7 @@ test("uninstall removes installed files but keeps locally-modified ones", () => }); test("installer refuses invalid tui.json unless --force backs it up", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-invalid-tui-")); + const temp = mkTemp("goal-invalid-tui-"); const cfg = join(temp, "cfg"); mkdirSync(cfg, { recursive: true }); writeFileSync(join(cfg, "tui.json"), "{bad json\n"); @@ -162,7 +177,7 @@ test("REGRESSION: a non-object tui.json (JSON array) is refused, not silently co // A JSON array is typeof "object", so the old `typeof existing === "object"` // check adopted it, set .plugin on the array (which JSON.stringify drops), and // reported success while the sidebar entry was silently lost. Refuse non-objects. - const temp = mkdtempSync(join(tmpdir(), "goal-array-tui-")); + const temp = mkTemp("goal-array-tui-"); const cfg = join(temp, "cfg"); mkdirSync(cfg, { recursive: true }); writeFileSync(join(cfg, "tui.json"), "[]\n"); @@ -180,7 +195,7 @@ test("REGRESSION: uninstall cleans tui.json before removing the manifest (orphan // re-run can still reach the manifest and complete. Previously the manifest was // removed first, leaving a stuck sidebar reference that --uninstall could no // longer touch (the manifest was already gone). - const temp = mkdtempSync(join(tmpdir(), "goal-uninstall-order-")); + const temp = mkTemp("goal-uninstall-order-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); // normal install const manifestPath = join(cfg, ".goal-mode-manifest.json"); @@ -211,7 +226,7 @@ test("REGRESSION: --target '~' is expanded to the home directory", () => { }); test("uninstall ignores tampered manifest paths outside the target", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-manifest-safe-")); + const temp = mkTemp("goal-manifest-safe-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); const manifest = JSON.parse(readFileSync(join(cfg, ".goal-mode-manifest.json"), "utf8")); @@ -221,7 +236,7 @@ test("uninstall ignores tampered manifest paths outside the target", () => { }); test("issue #20: a clean install + uninstall leaves no Goal Mode files behind (tui.json removed)", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-clean-cycle-")); + const temp = mkTemp("goal-clean-cycle-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); assert.equal(existsSync(join(cfg, "tui.json")), true, "install creates tui.json"); @@ -235,7 +250,7 @@ test("issue #20: a clean install + uninstall leaves no Goal Mode files behind (t }); test("issue #20: uninstall PRESERVES a user-authored tui.json with other plugins", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-keep-tui-")); + const temp = mkTemp("goal-keep-tui-"); const cfg = join(temp, "cfg"); mkdirSync(cfg, { recursive: true }); writeFileSync(join(cfg, "tui.json"), JSON.stringify({ $schema: "https://opencode.ai/tui.json", plugin: ["someone-elses-plugin"] }, null, 2) + "\n"); @@ -262,7 +277,7 @@ test("issue #24: mutually-exclusive flags print a friendly error", () => { }); test("issue #24: a target that is a file prints a friendly error", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-file-target-")); + const temp = mkTemp("goal-file-target-"); const file = join(temp, "not-a-dir"); writeFileSync(file, "x"); const res = runFail(["--target", file]); @@ -272,14 +287,14 @@ test("issue #24: a target that is a file prints a friendly error", () => { }); test("issue #158: dry-run reports 'Would register' for the sidebar, never 'Registered'", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-dryrun-wording-")); + const temp = mkTemp("goal-dryrun-wording-"); const out = run(["--target", join(temp, "cfg"), "--dry-run"]); assert.match(out, /Would register the experimental sidebar/); assert.doesNotMatch(out, /\bRegistered the experimental sidebar/); }); test("issue #134: an unreadable existing dest (broken symlink) does not crash install", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-broken-symlink-")); + const temp = mkTemp("goal-broken-symlink-"); const cfg = join(temp, "cfg"); mkdirSync(join(cfg, "plugins"), { recursive: true }); // A broken symlink where a component file will be copied: readFileSync throws on it. @@ -301,7 +316,7 @@ test("issue #220: a symlink in the source tree is skipped with a warning", () => }); test("issue #293: uninstall --force removes locally-modified files (with a backup)", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-force-uninstall-")); + const temp = mkTemp("goal-force-uninstall-"); const cfg = join(temp, "cfg"); run(["--target", cfg]); const modified = join(cfg, "agents", "goal.md"); @@ -313,7 +328,7 @@ test("issue #293: uninstall --force removes locally-modified files (with a backu }); test("issue #293: install --force backs up a locally-modified file before overwriting", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-force-backup-")); + const temp = mkTemp("goal-force-backup-"); const cfg = join(temp, "cfg"); mkdirSync(join(cfg, "agents"), { recursive: true }); const dest = join(cfg, "agents", "goal.md"); @@ -325,7 +340,7 @@ test("issue #293: install --force backs up a locally-modified file before overwr }); test("issue #133: a malformed tui.json does NOT leave a committed manifest claiming a full install", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-tui-before-manifest-")); + const temp = mkTemp("goal-tui-before-manifest-"); const cfg = join(temp, "cfg"); mkdirSync(cfg, { recursive: true }); writeFileSync(join(cfg, "tui.json"), "{bad json\n"); @@ -340,7 +355,7 @@ test("issue #133: a malformed tui.json does NOT leave a committed manifest claim }); test("issue #197: a dry-run never touches the OpenCode plugin cache", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-cache-dryrun-")); + const temp = mkTemp("goal-cache-dryrun-"); const cache = join(temp, "cache"); const pkgRoot = join(cache, "opencode", "packages"); const canary = join(pkgRoot, "opencode-goal-mode-canary"); @@ -354,7 +369,7 @@ test("issue #197: a dry-run never touches the OpenCode plugin cache", () => { }); test("issue #197: cache clearing never collateral-damages a sibling (name-canary) dir", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-cache-canary-")); + const temp = mkTemp("goal-cache-canary-"); const cache = join(temp, "cache"); const pkgRoot = join(cache, "opencode", "packages"); const canary = join(pkgRoot, "opencode-goal-mode-canary"); @@ -369,7 +384,7 @@ test("issue #197: cache clearing never collateral-damages a sibling (name-canary }); test("issue #197: an idempotent re-install does not churn the cache", () => { - const temp = mkdtempSync(join(tmpdir(), "goal-cache-idem-")); + const temp = mkTemp("goal-cache-idem-"); const cache = join(temp, "cache"); const cfg = join(temp, "cfg"); const env = { ...process.env, XDG_CACHE_HOME: cache }; diff --git a/tests/logger.test.mjs b/tests/logger.test.mjs index 8a89223..8bdc46b 100644 --- a/tests/logger.test.mjs +++ b/tests/logger.test.mjs @@ -47,3 +47,146 @@ test("guardPrompt sends synthetic body via promptAsync", async () => { assert.equal(sent[0].body.parts[0].synthetic, true); assert.match(sent[0].body.system, /Keep going\./); }); + +// --------------------------------------------------------------------------- +// #262 — deeper coverage of the guard's sole agent-communication module. +// --------------------------------------------------------------------------- + +test("#262 buildGuardPromptBody omits model when none is supplied (no model key)", () => { + const body = buildGuardPromptBody("text", null); + assert.equal("model" in body, false, "a null model must not add a model key"); + const withModel = buildGuardPromptBody("text", { providerID: "p", modelID: "m" }); + assert.deepEqual(withModel.model, { providerID: "p", modelID: "m" }); +}); + +test("#262 buildGuardPromptBody always pins the canonical primary agent", () => { + // The body must route the continuation to the `goal` primary agent so a guard turn + // can never be misrouted to Build/Plan and deactivate Goal Mode. + assert.equal(buildGuardPromptBody("a", null).agent, "goal"); + assert.equal(buildGuardPromptBody("b", { modelID: "x" }).agent, "goal"); +}); + +test("#262 buildGuardPromptBody stringifies a non-string directive text", () => { + const body = buildGuardPromptBody(12345, null); + assert.ok(body.system.includes("12345")); + assert.match(body.system, new RegExp(`^${GUARD_PREFIX.replace(/[[\]]/g, "\\$&")}`)); +}); + +test("#262 isSyntheticUserTurn: mixed real+synthetic text is NOT a guard continuation", () => { + // `every` over text parts: one genuine (human) text part means it is a real turn. + assert.equal( + isSyntheticUserTurn([ + { type: "text", text: "(cont)", synthetic: true }, + { type: "text", text: "actually do this instead" }, + ]), + false, + ); +}); + +test("#262 isSyntheticUserTurn: tool-only / file-only turns are real (no text parts)", () => { + assert.equal(isSyntheticUserTurn([{ type: "tool", id: "t" }]), false); + assert.equal(isSyntheticUserTurn([{ type: "file", url: "u" }, { type: "file", url: "v" }]), false); +}); + +test("#262 isSyntheticUserTurn: malformed parts are handled safely", () => { + assert.equal(isSyntheticUserTurn(null), false); + assert.equal(isSyntheticUserTurn(undefined), false); + assert.equal(isSyntheticUserTurn("not an array"), false); + assert.equal(isSyntheticUserTurn([null, { type: "text", text: "hi", synthetic: true }]), true); +}); + +test("#262 guardPrompt returns false on missing client/session/text and never throws", async () => { + const noClient = createLogger({}); + assert.equal(await noClient.guardPrompt("s", "go", null), false); + const ok = createLogger({ session: { promptAsync: async () => {} } }); + assert.equal(await ok.guardPrompt("", "go", null), false, "missing sessionID → false"); + assert.equal(await ok.guardPrompt("s", "", null), false, "missing text → false"); +}); + +test("#262 guardPrompt returns false (and swallows) when promptAsync throws", async () => { + const logger = createLogger({ + session: { + promptAsync: async () => { + throw new Error("SessionBusy"); + }, + }, + }); + // A failed continuation must degrade to false, not propagate into the hook. + assert.equal(await logger.guardPrompt("s", "go", null), false); +}); + +test("#262 guardPrompt returns true on a successful send", async () => { + const logger = createLogger({ session: { promptAsync: async () => ({ ok: true }) } }); + assert.equal(await logger.guardPrompt("s", "go", null), true); +}); + +test("#262 emitGoalCompleted sends the final-deliverable directive with the cycle count and marker", async () => { + const sent = []; + const logger = createLogger({ session: { promptAsync: async (o) => sent.push(o) } }); + const ok = await logger.emitGoalCompleted("sess", 3, { modelID: "m" }); + assert.equal(ok, true); + assert.equal(sent.length, 1); + assert.equal(sent[0].path.id, "sess"); + // The directive must carry the review-cycle count and the completion marker. + assert.match(sent[0].body.system, /Review cycles: 3/); + assert.match(sent[0].body.system, /Goal Completed/); + assert.equal(sent[0].body.parts[0].synthetic, true); +}); + +test("#262 emitGoalCompleted coerces a non-numeric/absent cycle count to 0 and honors a custom marker", async () => { + const sent = []; + const logger = createLogger({ session: { promptAsync: async (o) => sent.push(o) } }, { completionMarker: "SHIPPED" }); + await logger.emitGoalCompleted("sess", undefined, null); + assert.match(sent[0].body.system, /Review cycles: 0/); + assert.match(sent[0].body.system, /SHIPPED/); +}); + +test("#262 continueSession is an alias of guardPrompt (deprecated path still works)", async () => { + const sent = []; + const logger = createLogger({ session: { promptAsync: async (o) => sent.push(o) } }); + assert.equal(await logger.continueSession("s", "carry on", null), true); + assert.equal(sent[0].path.id, "s"); + assert.match(sent[0].body.system, /carry on/); +}); + +test("#262 info/warn/toast are best-effort and never throw on a missing or failing client", async () => { + const silent = createLogger({}); // no app.log, no tui + await assert.doesNotReject(silent.info("hi")); + await assert.doesNotReject(silent.warn("careful")); + await assert.doesNotReject(silent.toast("blocked")); + + const calls = []; + const failing = createLogger({ + app: { log: async () => { throw new Error("log down"); } }, + tui: { showToast: async () => { throw new Error("tui down"); } }, + }); + await assert.doesNotReject(failing.info("x")); + await assert.doesNotReject(failing.toast("y")); + void calls; +}); + +test("#262 info/warn/toast forward the expected payload shape to the client", async () => { + const logs = []; + const toasts = []; + const logger = createLogger({ + app: { log: async (p) => logs.push(p) }, + tui: { showToast: async (p) => toasts.push(p) }, + }); + await logger.info("started", { sessionID: "s" }); + await logger.warn("slow"); + await logger.toast("Completion blocked", "error"); + assert.equal(logs[0].body.service, "goal-guard"); + assert.equal(logs[0].body.level, "info"); + assert.equal(logs[0].body.message, "started"); + assert.deepEqual(logs[0].body.extra, { sessionID: "s" }); + assert.equal(logs[1].body.level, "warn"); + assert.equal(toasts[0].body.message, "Completion blocked"); + assert.equal(toasts[0].body.variant, "error"); +}); + +test("#262 toast defaults to the 'warning' variant", async () => { + const toasts = []; + const logger = createLogger({ tui: { showToast: async (p) => toasts.push(p) } }); + await logger.toast("heads up"); + assert.equal(toasts[0].body.variant, "warning"); +}); diff --git a/tests/persistence.test.mjs b/tests/persistence.test.mjs index fbc5232..83d7bdc 100644 --- a/tests/persistence.test.mjs +++ b/tests/persistence.test.mjs @@ -1,16 +1,25 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { mkdtempSync, existsSync, readFileSync, writeFileSync, mkdirSync, chmodSync, renameSync, rmSync, copyFileSync } from "node:fs"; +import { mkdtempSync, existsSync, readFileSync, writeFileSync, mkdirSync, chmodSync, renameSync, rmSync, copyFileSync, statSync, utimesSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; -import { createPersistence, projectKey, stateBaseDir } from "../plugins/goal-guard/persistence.js"; +import { createPersistence, projectKey, stateBaseDir, DEFAULT_PERSIST_TTL_MS } from "../plugins/goal-guard/persistence.js"; import { createStore } from "../plugins/goal-guard/state.js"; import { createGuard } from "../plugins/goal-guard/guard.js"; +// Track every temp dir so it is removed after the suite (issue #67: these were +// previously leaked into /tmp, accumulating ~18 dirs per run and risking CI disk). +const TEMP_DIRS = []; function tempEnv() { const dir = mkdtempSync(join(tmpdir(), "goal-persist-")); + TEMP_DIRS.push(dir); return { XDG_STATE_HOME: dir, __dir: dir }; } +test.after(() => { + for (const dir of TEMP_DIRS.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } +}); /** A synchronous timer shim so debounced saves can be flushed deterministically. */ function syncTimers() { @@ -227,3 +236,134 @@ test("two different worktrees persist independently", () => { ra.restore(pa.load()); assert.equal(ra.stateFor("x").reviewCycles, 1); }); + +// --------------------------------------------------------------------------- +// #72 — state files must be written owner-only (mode 0600), not world-readable. +// --------------------------------------------------------------------------- + +test("#72 state file is written with owner-only (0600) permissions", () => { + const env = tempEnv(); + const p = createPersistence({ worktree: "/secure-proj", env }); + assert.equal(p.flush(() => ({ goal: "secret goal text", evidence: ["leaky"] })), true); + const mode = statSync(p.file).mode & 0o777; + // The owner keeps read/write; group and other get NOTHING. (On platforms whose + // umask/ACL further restricts owner bits this is still owner-only; the invariant + // we enforce is that no group/other bit is set.) + assert.equal(mode & 0o077, 0, `group/other bits must be clear, got mode ${mode.toString(8)}`); + assert.equal(mode & 0o600, 0o600, `owner read/write must be set, got mode ${mode.toString(8)}`); +}); + +test("#72 a debounced save also lands at 0600 (no world-readable window via tmp+rename)", () => { + const env = tempEnv(); + const timers = syncTimers(); + const p = createPersistence({ worktree: "/secure-debounced", env, setTimer: timers.setTimer, clearTimer: timers.clearTimer }); + p.save(() => ({ a: 1 })); + timers.fire(); + const mode = statSync(p.file).mode & 0o777; + assert.equal(mode & 0o077, 0, `group/other bits must be clear, got mode ${mode.toString(8)}`); +}); + +// --------------------------------------------------------------------------- +// #218 — orphaned per-worktree state files must be pruned (unbounded growth). +// --------------------------------------------------------------------------- + +test("#218 DEFAULT_PERSIST_TTL_MS is a 7-day on-disk retention, longer than the 24h in-memory TTL", () => { + assert.equal(DEFAULT_PERSIST_TTL_MS, 7 * 24 * 60 * 60 * 1000); +}); + +test("#218 cleanup() prunes stale OTHER-worktree files but preserves the current one and fresh files", () => { + const env = tempEnv(); + const dir = stateBaseDir(env); + const current = createPersistence({ worktree: "/current", env }); + const stale = createPersistence({ worktree: "/old-moved-project", env }); + const fresh = createPersistence({ worktree: "/another-active", env }); + + // Write all three files. + current.flush(() => ({ updatedAt: new Date().toISOString(), keep: "me" })); + stale.flush(() => ({ updatedAt: "2000-01-01T00:00:00.000Z", orphan: true })); + fresh.flush(() => ({ updatedAt: new Date().toISOString(), active: true })); + + // Age the stale file's mtime well past the TTL so the mtime check also fires. + const old = Date.now() / 1000 - (8 * 24 * 60 * 60); + utimesSync(stale.file, old, old); + + assert.ok(existsSync(current.file) && existsSync(stale.file) && existsSync(fresh.file)); + const res = current.cleanup(); + assert.ok(res.removed.includes(stale.file), "the stale orphan must be removed"); + assert.equal(existsSync(current.file), true, "the current worktree file is always preserved"); + assert.equal(existsSync(fresh.file), true, "a fresh sibling worktree file is preserved"); + assert.equal(existsSync(stale.file), false, "the stale orphan is gone"); +}); + +test("#218 cleanup() removes a file whose embedded updatedAt is ancient even if mtime is fresh", () => { + const env = tempEnv(); + const current = createPersistence({ worktree: "/cur", env }); + const ancient = createPersistence({ worktree: "/ancient-by-content", env }); + current.flush(() => ({ updatedAt: new Date().toISOString() })); + // Write with a recent mtime but a very old logical updatedAt. + ancient.flush(() => ({ updatedAt: "1999-01-01T00:00:00.000Z" })); + const res = current.cleanup(); + assert.ok(res.removed.includes(ancient.file)); + assert.equal(existsSync(ancient.file), false); +}); + +test("#218 cleanup() is a safe no-op when the state dir does not exist yet", () => { + const env = tempEnv(); + const p = createPersistence({ worktree: "/never", env }); + const res = p.cleanup(); + assert.deepEqual(res, { removed: [], scanned: 0 }); +}); + +test("#218 cleanup() respects a custom persistTtlMs and never touches the current file", () => { + const env = tempEnv(); + const cur = createPersistence({ worktree: "/cur2", env, persistTtlMs: 1000 }); + const other = createPersistence({ worktree: "/other2", env, persistTtlMs: 1000 }); + cur.flush(() => ({ updatedAt: new Date().toISOString() })); + other.flush(() => ({ updatedAt: new Date(Date.now() - 5000).toISOString() })); + const res = cur.cleanup(); + assert.ok(res.removed.includes(other.file), "older than the 1s TTL → pruned"); + assert.equal(existsSync(cur.file), true); +}); + +test("#218 cleanup() removes orphaned .tmp files left by an interrupted write", () => { + const env = tempEnv(); + const dir = stateBaseDir(env); + const cur = createPersistence({ worktree: "/cur3", env }); + cur.flush(() => ({ updatedAt: new Date().toISOString() })); + mkdirSync(dir, { recursive: true }); + const strayTmp = join(dir, `${projectKey("/abandoned")}.json.tmp`); + writeFileSync(strayTmp, "partial", "utf8"); + const res = cur.cleanup(); + assert.ok(res.removed.includes(strayTmp), "a stray temp file must be swept"); + assert.equal(existsSync(strayTmp), false); +}); + +test("#218 cleanup() ignores non-goal-guard files in the directory", () => { + const env = tempEnv(); + const dir = stateBaseDir(env); + const cur = createPersistence({ worktree: "/cur4", env }); + cur.flush(() => ({ updatedAt: new Date().toISOString() })); + const unrelated = join(dir, "README.txt"); + writeFileSync(unrelated, "not ours", "utf8"); + cur.cleanup(); + assert.equal(existsSync(unrelated), true, "an unrelated file is never touched"); +}); + +test("#218 remove() deletes the current worktree's file and its tmp sibling", () => { + const env = tempEnv(); + const dir = stateBaseDir(env); + const p = createPersistence({ worktree: "/disposable", env }); + p.flush(() => ({ updatedAt: new Date().toISOString() })); + writeFileSync(`${p.file}.tmp`, "leftover", "utf8"); + assert.equal(existsSync(p.file), true); + assert.equal(p.remove(), true); + assert.equal(existsSync(p.file), false); + assert.equal(existsSync(`${p.file}.tmp`), false); + void dir; +}); + +test("#218 cleanup() is a no-op when persistence is disabled", () => { + const env = tempEnv(); + const p = createPersistence({ worktree: "/x", env, enabled: false }); + assert.deepEqual(p.cleanup(), { removed: [], scanned: 0 }); +}); diff --git a/tests/plugin.test.mjs b/tests/plugin.test.mjs index 6673a0c..7c6a119 100644 --- a/tests/plugin.test.mjs +++ b/tests/plugin.test.mjs @@ -589,7 +589,18 @@ test("compaction preserves concrete live state values", async () => { test("default export registers the goal_* tools", async () => { const hooks = await plugin({ client: { app: { log: async () => undefined } } }); - assert.ok(hooks.tool); + // The default export imports tools.js (and its `@opencode-ai/plugin` dependency) + // lazily; when that import FAILS the plugin returns hooks WITHOUT a `tool` key. + // A bare `assert.ok(hooks.tool)` masked that as a vague falsy failure that read + // like a logic bug rather than a missing dependency (issue #373). Surface the + // real cause loudly: if `tool` is absent it is almost always ERR_MODULE_NOT_FOUND + // for `@opencode-ai/plugin`. + assert.ok( + hooks.tool, + "Expected the `tool` hook to be registered, but it is absent. This means tools.js " + + "failed to import — most likely ERR_MODULE_NOT_FOUND for `@opencode-ai/plugin`. " + + "Run: npm install @opencode-ai/plugin", + ); for (const name of ["goal_status", "goal_evidence_map", "goal_reviewer_memory", "goal_contract", "goal_evidence", "goal_reset"]) { assert.equal(typeof hooks.tool[name].execute, "function", `${name} missing`); } diff --git a/tests/summary.test.mjs b/tests/summary.test.mjs new file mode 100644 index 0000000..e592ab9 --- /dev/null +++ b/tests/summary.test.mjs @@ -0,0 +1,173 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { + shortGoalLabel, + summarizeState, + reviewerMemoryReport, + statusReport, + evidenceMapReport, + sidebarView, + NO_GOAL, +} from "../plugins/goal-guard/summary.js"; +import { createStore, createState } from "../plugins/goal-guard/state.js"; +import { recordVerdict } from "../plugins/goal-guard/verdicts.js"; +import { markEdit, recordEvidence } from "../plugins/goal-guard/events.js"; +import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js"; +import { BASE_GATES } from "../plugins/goal-guard/agents.js"; + +// Direct unit coverage for summary.js (issue #291). + +const CFG = { ...DEFAULT_CONFIG, contextualGates: false }; + +test("#291 shortGoalLabel: prefers contract.title, then original, then goalText", () => { + assert.equal(shortGoalLabel({ contract: { title: "Migrate auth to JWT" } }), "Migrate auth to JWT"); + assert.equal(shortGoalLabel({ contract: { original: "Add a rate limiter to login" } }), "Add a rate limiter to login"); + assert.equal(shortGoalLabel({ goalText: "just the captured text" }), "just the captured text"); + assert.equal(shortGoalLabel({}), ""); + assert.equal(shortGoalLabel(null), ""); +}); + +test("#291 shortGoalLabel: a long title is truncated with an ellipsis", () => { + const long = "x".repeat(200); + const out = shortGoalLabel({ contract: { title: long } }, 20); + assert.ok(out.length <= 20); + assert.match(out, /…$/); +}); + +test("#291 shortGoalLabel: contract with ONLY original (no title) falls back to original", () => { + // The edge case named in the issue. + const out = shortGoalLabel({ contract: { original: "Implement the export endpoint." } }); + assert.equal(out, "Implement the export endpoint."); +}); + +test("#291 shortGoalLabel: prefers a short first sentence of a long original", () => { + const out = shortGoalLabel({ contract: { original: "Add rate limiting. Then a lot more detail follows that should be dropped." } }, 80); + assert.equal(out, "Add rate limiting."); +}); + +test("#291 summarizeState: renders the key invariants as a single semicolon line", () => { + const store = createStore(); + const st = createState(); + st.active = true; + markEdit(store, st, "did work"); + recordVerdict(store, st, "goal-reviewer", "FAIL", "blocking: X is broken\nVerdict: FAIL"); + const out = summarizeState(st, CFG); + assert.match(out, /active=true/); + assert.match(out, /dirty=true/); + assert.match(out, /reviewCycles=0/); + assert.match(out, /recentVerdicts=goal-reviewer:FAIL/); + assert.match(out, /missingGates=/); +}); + +test("#291 summarizeState: 'none' sentinels when there are no verdicts/reasons", () => { + const st = createState(); + st.active = true; + const out = summarizeState(st, CFG); + assert.match(out, /recentVerdicts=none/); + assert.match(out, /dirtyReasons=none/); +}); + +test("#291 reviewerMemoryReport: partitions open vs resolved and flags freshness", () => { + const store = createStore(); + const st = createState(); + recordVerdict(store, st, "goal-doc-reviewer", "FAIL", "blocking: docs missing\nVerdict: FAIL"); + let report = reviewerMemoryReport(st); + assert.equal(report.open.length, 1); + assert.equal(report.resolved.length, 0); + assert.equal(report.total, 1); + // Resolve it. + recordVerdict(store, st, "goal-doc-reviewer", "PASS", "Verdict: PASS"); + report = reviewerMemoryReport(st); + assert.equal(report.open.length, 0); + assert.equal(report.resolved.length, 1); +}); + +test("#291 reviewerMemoryReport: tolerates a non-array reviewerMemory", () => { + const report = reviewerMemoryReport({ reviewerMemory: undefined, lastEditSeq: 0 }); + assert.deepEqual(report, { open: [], resolved: [], total: 0 }); +}); + +test("#291 statusReport: completionAllowed flips when all gates pass and reflects required/missing", () => { + const store = createStore(); + const st = createState(); + st.active = true; + let report = statusReport(st, CFG); + assert.equal(report.completionAllowed, false); + assert.ok(report.missingGates.length > 0); + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + report = statusReport(st, CFG); + assert.equal(report.completionAllowed, true); + assert.deepEqual(report.missingGates, []); + assert.equal(report.reviewCycles, 1); +}); + +test("#291 statusReport: inactive session is never completion-allowed", () => { + const st = createState(); + st.contract = { acceptanceCriteria: [] }; + const report = statusReport(st, CFG); + assert.equal(report.active, false); + assert.equal(report.completionAllowed, false); +}); + +test("#291 evidenceMapReport: a criterion with fresh matching evidence is 'covered' once gates pass", () => { + const store = createStore(); + const st = createState(); + st.active = true; + st.contract = { acceptanceCriteria: ["Rate limit the login endpoint"] }; + recordEvidence(store, st, "run load test", "ok", ["Rate limit the login endpoint"]); + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + const report = evidenceMapReport(st, CFG); + assert.equal(report.criteria.length, 1); + assert.equal(report.criteria[0].status, "covered"); +}); + +test("#291 evidenceMapReport: a criterion with no evidence is 'missing' with an actionable next step", () => { + const st = createState(); + st.active = true; + st.contract = { acceptanceCriteria: ["Tokens expire after 1h"] }; + const report = evidenceMapReport(st, CFG); + assert.equal(report.criteria[0].status, "missing"); + assert.match(report.criteria[0].nextAction, /Run verification and record it with goal_evidence/); +}); + +test("#291 evidenceMapReport: evidence that matches no criterion is reported as unmappedEvidence", () => { + const store = createStore(); + const st = createState(); + st.active = true; + st.contract = { acceptanceCriteria: ["Login works"] }; + recordEvidence(store, st, "unrelated check", "ok", ["something else entirely"]); + const report = evidenceMapReport(st, CFG); + assert.equal(report.unmappedEvidence.length, 1); + assert.equal(report.unmappedEvidence[0].command, "unrelated check"); +}); + +test("#291 sidebarView: returns NO_GOAL for inactive/no-goal sessions", () => { + assert.equal(sidebarView(null, CFG), NO_GOAL); + assert.equal(sidebarView(createState(), CFG), NO_GOAL, "inactive → NO_GOAL"); + const activeNoGoal = createState(); + activeNoGoal.active = true; // active but no goal label + assert.equal(sidebarView(activeNoGoal, CFG), NO_GOAL); +}); + +test("#291 sidebarView: a running goal exposes gate counts and review-cycle status", () => { + const st = createState(); + st.active = true; + st.contract = { title: "Ship the feature", acceptanceCriteria: ["it works"] }; + const view = sidebarView(st, CFG); + assert.equal(view.state, "running"); + assert.equal(view.goal, "Ship the feature"); + assert.match(view.gates, /\/\d+ gates/); + assert.match(view.status, /in progress/); +}); + +test("#291 sidebarView: a fully-passed, clean goal renders as done", () => { + const store = createStore(); + const st = createState(); + st.active = true; + st.contract = { title: "Done goal", acceptanceCriteria: ["it works"] }; + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + st.dirty = false; + const view = sidebarView(st, CFG); + assert.equal(view.state, "done"); + assert.match(view.status, /completed/); +}); diff --git a/tests/system.test.mjs b/tests/system.test.mjs new file mode 100644 index 0000000..d31b671 --- /dev/null +++ b/tests/system.test.mjs @@ -0,0 +1,118 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { buildSystemInjection } from "../plugins/goal-guard/system.js"; +import { createStore, createState } from "../plugins/goal-guard/state.js"; +import { recordVerdict } from "../plugins/goal-guard/verdicts.js"; +import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js"; +import { BASE_GATES } from "../plugins/goal-guard/agents.js"; + +// Direct unit coverage for system.js — the system-prompt injection (issue #291). +// `bullet`/`exampleTaskCall` are internal, so they are exercised through the public +// buildSystemInjection output (the only entry point the hook uses). + +const CFG = { ...DEFAULT_CONFIG, contextualGates: false }; + +function activeState() { + const st = createState(); + st.active = true; + st.goalText = "build a generic feature"; + return st; +} + +test("#291 buildSystemInjection returns null for an absent or inactive session", () => { + assert.equal(buildSystemInjection(null, CFG), null); + assert.equal(buildSystemInjection(createState(), CFG), null, "inactive → null"); +}); + +test("#291 buildSystemInjection emits the live enforcement header and ground-truth framing", () => { + const out = buildSystemInjection(activeState(), CFG); + assert.match(out, /## Goal Guard — live enforcement state/); + assert.match(out, /Treat it as ground truth/); + assert.match(out, /Review cycles recorded: 0/); +}); + +test("#291 with no contract: it tells the agent to record the contract FIRST", () => { + const out = buildSystemInjection(activeState(), CFG); + assert.match(out, /Goal Contract: not yet recorded/); +}); + +test("#291 an auto-derived contract (no criteria) nudges adding acceptance criteria", () => { + const st = activeState(); + st.contract = { title: "t", original: "x", acceptanceCriteria: [], auto: true }; + const out = buildSystemInjection(st, CFG); + assert.match(out, /Goal Contract: auto-derived/); +}); + +test("#291 recorded acceptance criteria are listed (capped at 8) — issue #155", () => { + const st = activeState(); + st.contract = { + title: "t", + original: "x", + acceptanceCriteria: Array.from({ length: 12 }, (_, i) => `criterion number ${i}`), + }; + const out = buildSystemInjection(st, CFG); + assert.match(out, /12 acceptance criteria recorded/); + // Only the first 8 are inlined. + assert.match(out, /criterion number 0/); + assert.match(out, /criterion number 7/); + assert.doesNotMatch(out, /criterion number 8/); +}); + +test("#291 reviewer-supplied criteria/findings are escaped (no backticks/code-fences/newlines)", () => { + const st = activeState(); + st.contract = { + title: "t", + original: "x", + acceptanceCriteria: ["use ```rm -rf``` and `eval` across\nmultiple lines"], + }; + const out = buildSystemInjection(st, CFG); + // Backticks are folded to single quotes and newlines collapsed → no triple-fence. + assert.doesNotMatch(out, /```/); +}); + +test("#291 with programmaticReview ON: it says the guard runs reviews automatically on stop", () => { + const out = buildSystemInjection(activeState(), { ...CFG, programmaticReview: true }); + assert.match(out, /runs the required reviews AUTOMATICALLY/); + // It must NOT tell the agent to invoke reviewers itself. + assert.doesNotMatch(out, /MANDATORY NEXT ACTION — run the outstanding reviews now using the task tool/); +}); + +test("#291 with programmaticReview OFF: it prescribes an exact task() call for the next gate", () => { + const out = buildSystemInjection(activeState(), { ...CFG, programmaticReview: false }); + assert.match(out, /MANDATORY NEXT ACTION/); + // A concrete, copyable task tool call for the first missing reviewer. + assert.match(out, /task\(subagent_type: "goal-/); +}); + +test("#291 required/missing gates render as comma lists, 'none' when empty", () => { + const store = createStore(); + const st = activeState(); + // Pass every base gate so the missing set is empty → 'none'. + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + const out = buildSystemInjection(st, CFG); + assert.match(out, /Gates still missing or stale: none\./); + assert.match(out, /Required review gates: goal-/); +}); + +test("#291 completion ALLOWED vs BLOCKED line reflects the gate state and marker", () => { + const store = createStore(); + const st = activeState(); + const blocked = buildSystemInjection(st, CFG); + assert.match(blocked, /Completion is currently BLOCKED/); + assert.match(blocked, /Do NOT claim "Goal Completed" yet/); + + for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS"); + const allowed = buildSystemInjection(st, CFG); + assert.match(allowed, /Completion is currently ALLOWED/); + assert.match(allowed, /answer with "Goal Completed"/); +}); + +test("#291 open reviewer-memory findings are surfaced and escaped", () => { + const store = createStore(); + const st = activeState(); + recordVerdict(store, st, "goal-doc-reviewer", "FAIL", "Blocking findings\n- README `missing` tool docs\nVerdict: FAIL"); + const out = buildSystemInjection(st, CFG); + assert.match(out, /Open Reviewer Memory/); + assert.match(out, /README/); + assert.doesNotMatch(out, /```/); +}); diff --git a/tests/tools.test.mjs b/tests/tools.test.mjs new file mode 100644 index 0000000..84cc0f2 --- /dev/null +++ b/tests/tools.test.mjs @@ -0,0 +1,127 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { createGoalTools, coerceStringList, goalSimilarity, SAME_GOAL_THRESHOLD } from "../plugins/goal-guard/tools.js"; +import { createGuard } from "../plugins/goal-guard/guard.js"; + +const noopPersistence = { load: () => null, save: () => {}, flush: () => false, file: "", isDegraded: () => false }; + +function makeGuard(options = {}) { + let t = 0; + return createGuard( + { client: { app: { log: async () => undefined }, tui: { showToast: async () => undefined } } }, + options, + { persistence: noopPersistence, clock: () => (t += 1), syncIdle: true }, + ); +} + +async function makeTools() { + const guard = makeGuard(); + const tools = createGoalTools({ store: guard.store, config: guard.config, persist: guard.persist }); + await guard.hooks["chat.params"]({ sessionID: "s", agent: "goal" }, {}); + return { guard, tools }; +} + +// --------------------------------------------------------------------------- +// #151 — coerceStringList: validate/coerce acceptanceCriteria (and friends). +// --------------------------------------------------------------------------- + +test("#151 coerceStringList keeps non-empty strings and trims/collapses whitespace", () => { + assert.deepEqual(coerceStringList(["a", " b ", "c\n\nd"]), ["a", "b", "c d"]); +}); + +test("#151 coerceStringList stringifies finite numbers and booleans", () => { + assert.deepEqual(coerceStringList([1, 2.5, true, false]), ["1", "2.5", "true", "false"]); +}); + +test("#151 coerceStringList DROPS objects/arrays/null/NaN (never '[object Object]')", () => { + const out = coerceStringList([{ foo: "bar" }, ["x"], null, undefined, NaN, "real"]); + assert.deepEqual(out, ["real"]); + assert.ok(!out.includes("[object Object]"), "an object must never become '[object Object]'"); +}); + +test("#151 coerceStringList drops empty/whitespace-only strings and tolerates non-arrays", () => { + assert.deepEqual(coerceStringList(["", " ", "ok"]), ["ok"]); + assert.deepEqual(coerceStringList(undefined), []); + assert.deepEqual(coerceStringList("not an array"), []); + assert.deepEqual(coerceStringList(null), []); +}); + +test("#151 goal_contract sanitizes object acceptanceCriteria elements before storage", async () => { + const { guard, tools } = await makeTools(); + await tools.goal_contract.execute( + { + title: "Do the thing", + original: "do the thing well", + acceptanceCriteria: ["login works", { malformed: "object" }, "", 42, ["nested"]], + }, + { sessionID: "s" }, + ); + const st = guard.store.stateFor("s"); + // The object and nested-array elements are dropped; the number is stringified; the + // empty string is removed. NOTHING becomes "[object Object]". + assert.deepEqual(st.contract.acceptanceCriteria, ["login works", "42"]); + for (const c of st.contract.acceptanceCriteria) { + assert.equal(typeof c, "string"); + assert.notEqual(c, "[object Object]"); + } +}); + +test("#151 the recorded-criteria count in the result reflects the cleaned list", async () => { + const { tools } = await makeTools(); + const res = await tools.goal_contract.execute( + { title: "T", original: "build it", acceptanceCriteria: ["only this one", { x: 1 }, null] }, + { sessionID: "s" }, + ); + assert.match(res.title, /1 acceptance criteria/); +}); + +test("#151 requirements / inferred / nonGoals are sanitized too", async () => { + const { guard, tools } = await makeTools(); + await tools.goal_contract.execute( + { + title: "T", + original: "build it", + requirements: ["must log in", { bad: 1 }], + inferred: [null, "should be fast"], + nonGoals: [["x"], "no UI changes"], + acceptanceCriteria: ["works"], + }, + { sessionID: "s" }, + ); + const c = guard.store.stateFor("s").contract; + assert.deepEqual(c.requirements, ["must log in"]); + assert.deepEqual(c.inferred, ["should be fast"]); + assert.deepEqual(c.nonGoals, ["no UI changes"]); +}); + +// --------------------------------------------------------------------------- +// goalSimilarity — exported helper used to decide same-goal vs new-goal. +// --------------------------------------------------------------------------- + +test("goalSimilarity: identical content scores 1, disjoint scores 0, undefined → -1", () => { + assert.equal(goalSimilarity("rate limiter login", "rate limiter login"), 1); + assert.equal(goalSimilarity("rate limiter login endpoint", "completely different banana topic"), 0); + assert.equal(goalSimilarity("", "anything"), -1, "no content tokens on one side → -1"); +}); + +test("goalSimilarity: a faithful restatement scores above the same-goal threshold", () => { + const a = "Add a configurable token-bucket rate limiter to the login endpoint"; + const b = "Add a rate limiter to login"; + assert.ok(goalSimilarity(a, b) >= SAME_GOAL_THRESHOLD, "restatement should be treated as the SAME goal"); +}); + +// --------------------------------------------------------------------------- +// Tool gating sanity (session + goal-mode requirements). +// --------------------------------------------------------------------------- + +test("goal_* tools refuse when the host supplies no session id", async () => { + const { tools } = await makeTools(); + const res = await tools.goal_status.execute({}, {}); + assert.equal(res.metadata.reason, "no_session"); +}); + +test("goal_reset requires confirm=true", async () => { + const { tools } = await makeTools(); + const res = await tools.goal_reset.execute({ confirm: false }, { sessionID: "s" }); + assert.match(res.title, /not confirmed/i); +}); diff --git a/tests/verdicts.test.mjs b/tests/verdicts.test.mjs index fd7e919..d27f28a 100644 --- a/tests/verdicts.test.mjs +++ b/tests/verdicts.test.mjs @@ -59,6 +59,17 @@ test("parseVerdict is case-insensitive on the keyword", () => { assert.equal(parseVerdict("verdict: pass"), "PASS"); }); +test("#375 parseVerdict is case-insensitive last-wins across mixed-case verdicts in one text", () => { + // Two verdicts in the same body, both in non-canonical case: the LAST one wins + // and the result is normalized to upper-case regardless of input casing. + assert.equal(parseVerdict("VERDICT: pass\nVERDICT: FAIL"), "FAIL"); + assert.equal(parseVerdict("verdict: FAIL\nVerdict: Pass"), "PASS"); + assert.equal(parseVerdict("Verdict: PASS\nSome text\nverdict: fail"), "FAIL"); + // A lower-case earlier PASS must not beat a later upper-case FAIL, and vice-versa. + assert.equal(parseVerdict("Verdict: pass — looks good\nVERDICT: FAIL — blocking"), "FAIL"); + assert.equal(parseVerdict("VERDICT: FAIL initial\nverdict: pass after fix"), "PASS"); +}); + test("textOf unwraps the task_result envelope", () => { const wrapped = 'Looks good. Verdict: PASS'; assert.match(textOf({ output: wrapped }), /Verdict: PASS/); From 4b68d7bdce8ccf64afcc3ca535aebcc2af1e3c46 Mon Sep 17 00:00:00 2001 From: Devin Oldenburg Date: Sun, 21 Jun 2026 13:35:11 +0200 Subject: [PATCH 2/2] test(logger): fully escape GUARD_PREFIX when building the RegExp CodeQL js/incomplete-sanitization: the assertion escaped only [ and ] before new RegExp(); escape all regex metacharacters so the anchored match is correct regardless of GUARD_PREFIX content. --- tests/logger.test.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/logger.test.mjs b/tests/logger.test.mjs index 8bdc46b..884ee43 100644 --- a/tests/logger.test.mjs +++ b/tests/logger.test.mjs @@ -69,7 +69,7 @@ test("#262 buildGuardPromptBody always pins the canonical primary agent", () => test("#262 buildGuardPromptBody stringifies a non-string directive text", () => { const body = buildGuardPromptBody(12345, null); assert.ok(body.system.includes("12345")); - assert.match(body.system, new RegExp(`^${GUARD_PREFIX.replace(/[[\]]/g, "\\$&")}`)); + assert.match(body.system, new RegExp(`^${GUARD_PREFIX.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`)); }); test("#262 isSyntheticUserTurn: mixed real+synthetic text is NOT a guard continuation", () => {