Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions plugins/goal-guard/completion.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@ import { summarizeState } from "./summary.js";

const CYCLES_RE = /Review cycles:\s*(\d+)/gi;

/**
* Unambiguous guard sentinel prefixed to every blocked-completion rewrite (issue
* #244). The blocked MARKER itself (`Goal Not Completed` by default) is textually
* identical to a normal "I am honestly not done yet" agent sentence, so a user
* skimming the transcript cannot tell a guard intervention from benign agent
* prose. This sentinel is something ONLY the guard emits — it carries the
* bracketed `[Goal Guard]` namespace plus a stop glyph and the word "blocked" — so
* `rg "Goal Guard · blocked"` (or the bracket) finds exactly the interventions
* and never an ordinary message. It is emitted on its OWN leading line, ABOVE the
* (still-flipped) marker line, so existing marker/markdown-prefix detection is
* unaffected. */
export const GUARD_BLOCK_SENTINEL = "⛔ [Goal Guard · blocked completion]";

// Invisible / formatting code points that carry no glyph and must never break up
// or hide the marker: zero-width space/joiners, bidi controls, word-joiner, soft
// hyphen, BOM.
Expand Down Expand Up @@ -136,6 +149,11 @@ export function evaluateCompletionClaim(state, config, text) {
if (replacement === text) {
replacement = `${blockedMarker}\n\n${replacement}`;
}
// Prefix the unambiguous guard sentinel on its OWN leading line so the rewrite is
// visibly a guard artifact and searchable, never conflated with a normal "I am not
// done yet" agent message (issue #244). The marker line itself is left flipped in
// place below it, so existing marker/markdown-prefix detection still holds.
replacement = `${GUARD_BLOCK_SENTINEL}\n${replacement}`;
replacement += `\n\nGoal Guard blocked completion: ${reason}. State: ${summary}`;
return { blocked: true, reason, replacement, claimedCycles };
}
127 changes: 124 additions & 3 deletions plugins/goal-guard/persistence.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,22 @@
*/

import { createHash } from "node:crypto";
import { mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync } from "node:fs";
import { mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync, readdirSync, statSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";

/**
* On-disk retention for orphaned state files (issue #218). The in-memory
* `sessionTtlMs` only governs when an in-memory record may be dropped; it does
* NOT remove the per-worktree file from disk. Without an independent on-disk TTL,
* `$XDG_STATE_HOME/opencode/goal-guard/` accumulates one `<hash>.json` file per
* worktree the user has EVER opened a goal in, holding the verbatim goal text,
* the contract, and the evidence — forever. Default is 7 days, deliberately
* longer than the 24h in-memory TTL so a long-paused goal is not pruned while a
* sibling worktree is still actively saving.
*/
export const DEFAULT_PERSIST_TTL_MS = 7 * 24 * 60 * 60 * 1000;

/** Resolve the base directory for guard state files. */
export function stateBaseDir(env = process.env) {
const xdg = env.XDG_STATE_HOME && env.XDG_STATE_HOME.trim();
Expand All @@ -41,6 +53,7 @@ export function projectKey(worktree) {
* @param {Record<string,string|undefined>} [opts.env]
* @param {(fn: () => void, ms: number) => any} [opts.setTimer] Injectable for tests.
* @param {(handle: any) => void} [opts.clearTimer]
* @param {number} [opts.persistTtlMs] On-disk TTL for orphaned files (issue #218).
*/
export function createPersistence({
worktree,
Expand All @@ -49,11 +62,13 @@ export function createPersistence({
env = process.env,
setTimer = (fn, ms) => setTimeout(fn, ms),
clearTimer = (h) => clearTimeout(h),
persistTtlMs = DEFAULT_PERSIST_TTL_MS,
fs: fsImpl,
} = {}) {
// Injectable fs (tests exercise the EXDEV path); defaults to node:fs.
// Injectable fs (tests exercise the EXDEV/cleanup paths); defaults to node:fs.
const fs = {
mkdirSync, readFileSync, writeFileSync, renameSync, rmSync, copyFileSync,
readdirSync, statSync,
...(fsImpl || {}),
};
const dir = stateBaseDir(env);
Expand All @@ -79,7 +94,13 @@ export function createPersistence({
if (!enabled || degraded) return false;
try {
fs.mkdirSync(dir, { recursive: true });
fs.writeFileSync(tmp, JSON.stringify(data), "utf8");
// mode 0600 (owner read/write only): the snapshot holds the verbatim goal
// text, the contract, the changed-file paths, the verification evidence and
// the reviewer findings — all of which would otherwise land at the default
// umask (typically 0644, world-readable) and leak to any local user on a
// shared/multi-user host (issue #72). The mode is set on the temp file BEFORE
// the rename so the target is never momentarily world-readable.
fs.writeFileSync(tmp, JSON.stringify(data), { encoding: "utf8", mode: 0o600 });
try {
fs.renameSync(tmp, file);
} catch (err) {
Expand Down Expand Up @@ -141,11 +162,111 @@ export function createPersistence({
return writeNow(fn()); // true on success, false on a real write failure
}

/**
* Prune orphaned state files (issue #218).
*
* The store is keyed one file per worktree, so the directory grows unbounded:
* one `<hash>.json` per worktree the user has ever opened a goal in, never
* removed. This sweep deletes any OTHER worktree's file (never the current
* one) when it is stale, where "stale" means EITHER:
* - its mtime is older than `persistTtlMs` (default 7 days), OR
* - its embedded `updatedAt` is older than `persistTtlMs`.
* The current worktree's file is always preserved so an in-flight goal is never
* dropped out from under a live session. Also removes the leftover `.tmp` files.
*
* All disk access is best-effort: a read-only/sandboxed FS, a missing dir, or an
* unreadable entry degrades to a no-op rather than throwing into a hook.
*
* @param {object} [opts]
* @param {number} [opts.now] Injectable wall clock (ms) for tests.
* @returns {{ removed: string[], scanned: number }}
*/
function cleanup({ now = Date.now() } = {}) {
const removed = [];
if (!enabled || degraded) return { removed, scanned: 0 };
let entries;
try {
entries = fs.readdirSync(dir);
} catch {
return { removed, scanned: 0 }; // dir does not exist yet → nothing to prune
}
const cutoff = now - persistTtlMs;
const currentName = `${projectKey(worktree)}.json`;
let scanned = 0;
for (const name of entries) {
// Only touch our own JSON snapshots (and their temp siblings); never any
// unrelated file a user may have placed in the dir.
const isTmp = name.endsWith(".json.tmp");
const isJson = name.endsWith(".json");
if (!isTmp && !isJson) continue;
// Never prune the file backing the CURRENT worktree — it may be actively saving.
if (name === currentName) continue;
scanned += 1;
const full = join(dir, name);
// A stray temp file is always removable (an interrupted write left it behind).
if (isTmp) {
try {
fs.rmSync(full, { force: true });
removed.push(full);
} catch {
/* ignore */
}
continue;
}
let stale = false;
try {
const st = fs.statSync(full);
const mtime = st.mtimeMs !== undefined ? st.mtimeMs : (st.mtime ? new Date(st.mtime).getTime() : 0);
if (mtime < cutoff) stale = true;
if (!stale) {
// mtime is recent, but the embedded updatedAt may still be ancient (the
// file was touched without a logical update). Parse and check it too.
try {
const parsed = JSON.parse(fs.readFileSync(full, "utf8"));
const ts = parsed && parsed.updatedAt ? new Date(parsed.updatedAt).getTime() : NaN;
if (Number.isFinite(ts) && ts < cutoff) stale = true;
} catch {
/* unparseable → leave the mtime decision stand */
}
}
} catch {
continue; // unreadable → skip rather than risk deleting something live
}
if (!stale) continue;
try {
fs.rmSync(full, { force: true });
removed.push(full);
} catch {
/* ignore */
}
}
return { removed, scanned };
}

/** Remove the current worktree's state file (and its temp sibling). Used on a
* full goal_reset / dispose when the caller wants the on-disk snapshot gone too. */
function remove() {
if (!enabled) return false;
let ok = false;
for (const target of [file, tmp]) {
try {
fs.rmSync(target, { force: true });
ok = true;
} catch {
/* ignore */
}
}
return ok;
}

return {
file,
dir,
load,
save,
flush,
cleanup,
remove,
isDegraded: () => degraded,
};
}
53 changes: 48 additions & 5 deletions plugins/goal-guard/tools.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,34 @@ export function goalSimilarity(a, b) {
// same goal that PRESERVES accrued review progress.
export const SAME_GOAL_THRESHOLD = 0.4;

/**
* Coerce an array of contract strings (acceptance criteria, requirements, …) into
* a clean list of non-empty strings (issue #151).
*
* The tool schema declares these as `array(string())`, but a model can still pass
* a malformed element — an object, a number, null — which previously flowed
* straight into storage. `String({foo:'bar'})` is the literal "[object Object]",
* which then rendered as a garbled todo row in the sidebar and as a meaningless
* acceptance criterion in the contract. Stringify the elements that have a sensible
* string form (string/number/boolean), and DROP the ones that do not (objects,
* arrays, null/undefined) rather than persisting "[object Object]". Each survivor
* is whitespace-collapsed and empties are removed.
*/
export function coerceStringList(value) {
if (!Array.isArray(value)) return [];
const out = [];
for (const item of value) {
let str;
if (typeof item === "string") str = item;
else if (typeof item === "number" && Number.isFinite(item)) str = String(item);
else if (typeof item === "boolean") str = String(item);
else continue; // object/array/null/undefined/NaN → has no meaningful string form
const trimmed = str.replace(/\s+/g, " ").trim();
if (trimmed) out.push(trimmed);
}
return out;
}

/**
* @param {object} deps
* @param {ReturnType<import("./state.js").createStore>} deps.store
Expand Down Expand Up @@ -193,6 +221,16 @@ export function createGoalTools({ store, config, persist, onReset }) {
const state = store.stateFor(key);
if (!requireGoalMode(state)) return goalModeOnlyResult();
state.active = true;
// Validate/coerce the string-array fields up front (issue #151): a model can
// pass a non-string element (object/number/null) that the schema does not
// catch, which otherwise stored "[object Object]" as an acceptance criterion
// and rendered a garbled sidebar todo. coerceStringList drops the unstringifiable
// ones and normalizes the rest. Use the cleaned lists for BOTH the similarity
// corpus and the stored contract so the two never disagree.
const cleanRequirements = coerceStringList(args.requirements);
const cleanInferred = coerceStringList(args.inferred);
const cleanNonGoals = coerceStringList(args.nonGoals);
const cleanAcceptanceCriteria = coerceStringList(args.acceptanceCriteria);
// A genuinely NEW goal in this session (a different verbatim request) must
// not inherit the previous goal's gates, verdicts, dirty flags, review
// cycles, or accumulated goal text — otherwise the TUI sidebar keeps showing
Expand Down Expand Up @@ -222,7 +260,12 @@ export function createGoalTools({ store, config, persist, onReset }) {
[c?.original, c?.title, ...(c?.requirements || []), ...(c?.acceptanceCriteria || [])]
.filter(Boolean)
.join(" ");
const incoming = contractCorpus(args);
const incoming = contractCorpus({
original: args.original,
title: args.title,
requirements: cleanRequirements,
acceptanceCriteria: cleanAcceptanceCriteria,
});
const existing = contractCorpus(state.contract);
const sim = goalSimilarity(incoming, existing);
const isNewGoal = sim < 0 ? true : sim < SAME_GOAL_THRESHOLD;
Expand All @@ -234,10 +277,10 @@ export function createGoalTools({ store, config, persist, onReset }) {
state.contract = {
title: String(args.title || "").replace(/\s+/g, " ").trim(),
original: String(args.original || ""),
requirements: args.requirements || [],
inferred: args.inferred || [],
nonGoals: args.nonGoals || [],
acceptanceCriteria: args.acceptanceCriteria || [],
requirements: cleanRequirements,
inferred: cleanInferred,
nonGoals: cleanNonGoals,
acceptanceCriteria: cleanAcceptanceCriteria,
at: store.nowIso(),
};
// Append the contract's original to the rolling goal text used for
Expand Down
100 changes: 100 additions & 0 deletions tests/agents-unit.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import test from "node:test";
import assert from "node:assert/strict";
import {
isReviewAgent,
isGoalAgent,
isPrimaryAgent,
goalSessionActiveForAgent,
prettyAgentName,
PRIMARY_AGENT,
REVIEW_AGENTS,
GOAL_AGENTS,
CYCLE_CLOSING_AGENT,
BASE_GATES,
} from "../plugins/goal-guard/agents.js";
import { createState } from "../plugins/goal-guard/state.js";

// Direct unit coverage for agents.js classifiers (issue #291). Named
// agents-unit.test.mjs because tests/agents.test.mjs validates the agent .md files.

test("#291 isReviewAgent: true for review gates, false for workers/unknown/empty", () => {
assert.equal(isReviewAgent("goal-reviewer"), true);
assert.equal(isReviewAgent("goal-final-auditor"), true);
assert.equal(isReviewAgent("goal-implementer"), false, "a worker is not a reviewer");
assert.equal(isReviewAgent("goal"), false, "the primary is not a reviewer");
assert.equal(isReviewAgent("totally-unknown"), false);
assert.equal(isReviewAgent(""), false);
assert.equal(isReviewAgent(null), false);
assert.equal(isReviewAgent(undefined), false);
});

test("#291 isGoalAgent: true for primary/workers/reviewers, false for build/plan/unknown", () => {
assert.equal(isGoalAgent("goal"), true);
assert.equal(isGoalAgent("goal-explorer"), true);
assert.equal(isGoalAgent("goal-security-reviewer"), true);
assert.equal(isGoalAgent("build"), false);
assert.equal(isGoalAgent("plan"), false);
assert.equal(isGoalAgent("unknown"), false);
assert.equal(isGoalAgent(null), false);
});

test("#291 isPrimaryAgent: only the exact `goal` agent is primary", () => {
assert.equal(isPrimaryAgent("goal"), true);
assert.equal(isPrimaryAgent(PRIMARY_AGENT), true);
assert.equal(isPrimaryAgent("goal-implementer"), false);
assert.equal(isPrimaryAgent("goal-reviewer"), false);
assert.equal(isPrimaryAgent(""), false);
assert.equal(isPrimaryAgent(null), false);
});

test("#291 every BASE_GATE and CYCLE_CLOSING_AGENT is a known review agent", () => {
for (const gate of BASE_GATES) assert.equal(isReviewAgent(gate), true, `${gate} must be a review agent`);
assert.equal(isReviewAgent(CYCLE_CLOSING_AGENT), true);
// Sanity: the canonical lists are internally consistent.
for (const r of REVIEW_AGENTS) assert.ok(GOAL_AGENTS.includes(r), `${r} should be in GOAL_AGENTS`);
});

test("#291 goalSessionActiveForAgent: the primary agent always activates, even with null state", () => {
assert.equal(goalSessionActiveForAgent("goal", null), true);
assert.equal(goalSessionActiveForAgent("goal", undefined), true);
});

test("#291 goalSessionActiveForAgent: non-goal agents never activate, regardless of state", () => {
const st = createState();
st.active = true;
st.contract = { title: "x" };
assert.equal(goalSessionActiveForAgent("build", st), false);
assert.equal(goalSessionActiveForAgent("plan", st), false);
assert.equal(goalSessionActiveForAgent("unknown", st), false);
});

test("#291 goalSessionActiveForAgent: a goal worker keeps an ANCHORED session active, not a blank one", () => {
const blank = createState();
assert.equal(goalSessionActiveForAgent("goal-implementer", blank), false, "no anchored work → not active");
assert.equal(goalSessionActiveForAgent("goal-implementer", null), false, "null state → not active");

const withContract = createState();
withContract.contract = { title: "t" };
assert.equal(goalSessionActiveForAgent("goal-implementer", withContract), true);

const withEdit = createState();
withEdit.lastEditSeq = 5;
assert.equal(goalSessionActiveForAgent("goal-explorer", withEdit), true);

const withEvidence = createState();
withEvidence.evidence = [{ command: "x" }];
assert.equal(goalSessionActiveForAgent("goal-verifier", withEvidence), true);

const withFiles = createState();
withFiles.changedFiles = ["a.js"];
assert.equal(goalSessionActiveForAgent("goal-architect", withFiles), true);
});

test("#291 prettyAgentName: drops the goal- prefix, title-cases, keeps acronyms upper", () => {
assert.equal(prettyAgentName("goal-security-reviewer"), "Security Reviewer");
assert.equal(prettyAgentName("goal-api-reviewer"), "API Reviewer");
assert.equal(prettyAgentName("goal-ux-reviewer"), "UX Reviewer");
assert.equal(prettyAgentName("goal-final-auditor"), "Final Auditor");
assert.equal(prettyAgentName(""), "");
assert.equal(prettyAgentName(null), "");
});
Loading
Loading