devinoldenburg · devinoldenburg · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026
@@ -93,6 +93,19 @@ export const BASE_GATES = Object.freeze([
   "goal-final-auditor",
 ]);
 
+/**
+ * Minimal base-gate set for lean/token-conscious mode. Drops the code-only
+ * diff-reviewer and verifier gates — each full review pass runs O(N) subagent
+ * subtasks (one per required gate) so reducing N by 2 saves two subtask tokens
+ * per cycle. The prompt auditor, correctness reviewer, and final auditor are the
+ * irreducible safety floor (goal alignment + correctness + finality).
+ */
+export const LEAN_BASE_GATES = Object.freeze([
+  "goal-prompt-auditor",
+  "goal-reviewer",
+  "goal-final-auditor",
+]);
+
 /**
  * Gates that only make sense when the goal actually changed code. A research,
  * analysis, explanation, or planning goal produces a text/evidence deliverable

@@ -16,7 +16,7 @@
  * `state`, so it is fully unit-testable.
  */
 
-import { completionAllowed, missingGates } from "./gates.js";
+import { completionAllowed, missingGates, evidenceCoverageMet } from "./gates.js";
 
 /** Consecutive no-change idle ticks after which auto-continue pauses for the human. */
 export const NO_PROGRESS_LIMIT = 4;
@@ -54,13 +54,38 @@ export function progressSignature(state, config) {
  * exact reviewer gates via the task tool. */
 export function continuationMessage(state, config) {
   const missing = missingGates(state, config);
+  const criteria = Array.isArray(state?.contract?.acceptanceCriteria) ? state.contract.acceptanceCriteria : [];
+  const hasCriteria = criteria.length > 0;
+  const evidenceOk = evidenceCoverageMet(state);
   const lines = ["The goal is NOT complete — do not stop. Continue working now."];
   if (!state?.contract) {
     lines.push("First, record the Goal Contract with the `goal_contract` tool (title, the original request, and concrete acceptance criteria) so the objective is anchored.");
   }
   if (state?.dirty) {
     lines.push("There are changes that are not yet reviewed/verified after your latest edits — actually run the code/tests and record it with `goal_evidence`.");
   }
+  if (hasCriteria && !evidenceOk) {
+    const uncovered = criteria.filter((c) => {
+      const full = String(c).trim().toLowerCase();
+      if (!full) return false;
+      const entries = Array.isArray(state?.evidence) ? state.evidence : [];
+      const lastEditSeq = state?.lastEditSeq || 0;
+      return !entries.some((entry) => {
+        const ecriteria = Array.isArray(entry.criteria) ? entry.criteria : [];
+        if (!ecriteria.some((ec) => String(ec).trim().toLowerCase() === full)) return false;
+        if (!entry.seq) return lastEditSeq === 0;
+        return entry.seq > lastEditSeq;
+      });
+    });
+    if (uncovered.length) {
+      lines.push(
+        `EVIDENCE COVERAGE MISSING — every acceptance criterion must have fresh recorded ` +
+        `evidence. Uncovered criteria: ${uncovered.map((c) => `"${c}"`).join(", ")}. ` +
+        `Run verification and record with \`goal_evidence\`, passing each uncovered criterion. ` +
+        `Mere exhaustion of approaches is NOT success — the guard requires proof.`,
+      );
+    }
+  }
   if (missing.length) {
     if (config?.programmaticReview) {
       lines.push(

@@ -28,7 +28,7 @@
  * adversarial digit-runs cannot trigger polynomial backtracking (issue #367).
  */
 
-import { missingGates, completionAllowed } from "./gates.js";
+import { missingGates, completionAllowed, evidenceCoverageMet } from "./gates.js";
 import { summarizeState } from "./summary.js";
 
 const CYCLES_RE = /Review cycles:\s*(\d+)/gi;
@@ -131,7 +131,26 @@ export function evaluateCompletionClaim(state, config, text) {
     reason = `claimed review cycles (${claimedCycles}) do not match recorded review cycles (${state.reviewCycles})`;
   } else if (!completionAllowed(state, config)) {
     const missing = missingGates(state, config).join(", ");
-    reason = `required review gates are missing or stale (${missing || "goal session not active"})`;
+    if (missing) {
+      reason = `required review gates are missing or stale (${missing})`;
+    } else if (!evidenceCoverageMet(state)) {
+      const criteria = Array.isArray(state?.contract?.acceptanceCriteria) ? state.contract.acceptanceCriteria : [];
+      const uncovered = criteria.filter((c) => {
+        const full = String(c).trim().toLowerCase();
+        if (!full) return false;
+        const entries = Array.isArray(state?.evidence) ? state.evidence : [];
+        const lastEditSeq = state?.lastEditSeq || 0;
+        return !entries.some((entry) => {
+          const ecriteria = Array.isArray(entry.criteria) ? entry.criteria : [];
+          if (!ecriteria.some((ec) => String(ec).trim().toLowerCase() === full)) return false;
+          if (!entry.seq) return lastEditSeq === 0;
+          return entry.seq > lastEditSeq;
+        });
+      });
+      reason = `acceptance criteria lack evidence coverage (${uncovered.map((c) => `"${c}"`).join(", ") || "all criteria"}) — goal not achieved, mere exhaustion is not success`;
+    } else {
+      reason = "goal session not active";
+    }
   }
 
   if (!reason) return { blocked: false, claimedCycles };

@@ -56,6 +56,18 @@ export const DEFAULT_CONFIG = Object.freeze({
    * explanation goal is gated on evidence instead, never on an empty diff);
    * "always" forces them on; "never" turns them off. */
   requireCodeReview: "auto",
+  /** Require fresh evidence covering every acceptance criterion before completion
+   * is allowed. This is the programmatic exhaustion-prevention gate: a strong LLM
+   * that exhausts every approach and persuades the reviewers it "tried everything"
+   * cannot satisfy this check for criteria it never actually achieved — the guard
+   * demands verified, recorded proof, not statements of effort. Default true. */
+  requireEvidenceCoverage: true,
+  /** Lean mode — reduces the base gate set to the 3 safety-critical reviewers
+   * (prompt-auditor, reviewer, final-auditor) and disables contextual gates, so
+   * each full review round runs fewer subagent subtasks and token consumption per
+   * cycle drops sharply. Combined with the evidence-coverage check this keeps the
+   * exhaustion loophole closed. Default false. */
+  leanGates: false,
   /** Block non-Goal agents from invoking the goal-* subagents via the task tool. */
   restrictSubagents: true,
   /** Maximum tracked sessions before LRU eviction. */
@@ -134,6 +146,8 @@ export const CONFIG_DOCS = Object.freeze({
   // Gates & scope
   contextualGates: { group: "Gates", summary: "Require specialist reviewer gates derived from goal text / changed files." },
   requireCodeReview: { group: "Gates", summary: "When to require the code-only diff/verification gates: 'auto' (only once the goal edits a file), 'always', or 'never'. Lets non-code agentic goals complete on evidence." },
+  requireEvidenceCoverage: { group: "Gates", summary: "Require fresh recorded evidence for every acceptance criterion before completion (exhaustion-prevention gate — the guard demands verified proof, not statements of effort)." },
+  leanGates: { group: "Gates", summary: "Lean mode: reduce the base reviewer set and disable contextual gates for much lower token consumption per review cycle." },
   restrictSubagents: { group: "Gates", summary: "Lock the goal-* subagents to the Goal agent (other agents can't call them)." },
   // State & lifecycle
   injectSystemState: { group: "State", summary: "Inject a live Goal Guard state block into the system prompt." },
@@ -218,6 +232,8 @@ function fromEnv(env) {
     GOAL_GUARD_PERSIST: ["persist", coerceBool],
     GOAL_GUARD_CONTEXTUAL_GATES: ["contextualGates", coerceBool],
     GOAL_GUARD_REQUIRE_CODE_REVIEW: ["requireCodeReview", coerceStr],
+    GOAL_GUARD_REQUIRE_EVIDENCE_COVERAGE: ["requireEvidenceCoverage", coerceBool],
+    GOAL_GUARD_LEAN_GATES: ["leanGates", coerceBool],
     GOAL_GUARD_RESTRICT_SUBAGENTS: ["restrictSubagents", coerceBool],
     GOAL_GUARD_MAX_SESSIONS: ["maxSessions", coerceInt],
     GOAL_GUARD_SESSION_TTL_MS: ["sessionTtlMs", coerceInt],

@@ -13,7 +13,7 @@
  *    Re-running verification after a clean review no longer re-opens the gates.
  */
 
-import { BASE_GATES, CODE_GATES, CONTEXTUAL_GATES } from "./agents.js";
+import { BASE_GATES, LEAN_BASE_GATES, CODE_GATES, CONTEXTUAL_GATES } from "./agents.js";
 
 /**
  * Whether this goal has touched code. A goal is code-bearing once it is dirty or has
@@ -79,14 +79,19 @@ export function refreshStickyGates(state) {
 
 /** The reviewers that must PASS for this state, given config. */
 export function requiredGates(state, config) {
+  // Lean mode uses the minimal base-gate set (3 reviewers instead of 5) and
+  // disables contextual gates — drastically reduces token consumption per cycle.
+  const lean = !!(config?.leanGates);
   // Code-only gates (diff review, verification) are required only when the goal
   // actually changed code; a non-code goal is gated on its evidence instead, so it
   // is not blocked forever by an empty `git diff`. `requireCodeReview` overrides the
   // auto-detection: "always" forces them on, "never" off.
   const mode = config?.requireCodeReview || "auto";
   const includeCodeGates = mode === "always" ? true : mode === "never" ? false : isCodeBearing(state);
-  const gates = BASE_GATES.filter((agent) => includeCodeGates || !CODE_GATES.includes(agent));
-  if (!config || config.contextualGates) {
+  const baseSet = lean ? LEAN_BASE_GATES : BASE_GATES;
+  const gates = baseSet.filter((agent) => includeCodeGates || !CODE_GATES.includes(agent));
+  // Contextual gates are disabled in lean mode (token economy).
+  if (!lean && (!config || config.contextualGates)) {
     const contextual = new Set([...(state.stickyGates || []), ...contextualGatesFor(state)]);
     for (const agent of contextual) {
       if (!gates.includes(agent)) gates.push(agent);
@@ -106,6 +111,43 @@ export function missingGates(state, config) {
   return requiredGates(state, config).filter((agent) => !gatePassedFresh(state, agent));
 }
 
+/**
+ * Whether the evidence ledger covers every recorded acceptance criterion with
+ * at least one piece of fresh (post-last-edit) evidence. A goal with no criteria
+ * trivially passes.
+ *
+ * This is a programmatic exhaustion-prevention gate: a strong LLM agent that
+ * exhausts every approach and convinces the reviewers it "tried everything"
+ * cannot fake evidence for criteria it never achieved. The guard requires
+ * VERIFIED, recorded proof — not statements of effort.
+ */
+export function evidenceCoverageMet(state) {
+  const criteria = Array.isArray(state?.contract?.acceptanceCriteria)
+    ? state.contract.acceptanceCriteria
+    : [];
+  if (!criteria.length) return true; // no criteria = nothing to cover
+  const entries = Array.isArray(state?.evidence) ? state.evidence : [];
+  const lastEditSeq = state?.lastEditSeq || 0;
+  for (const criterion of criteria) {
+    const full = String(criterion).trim().toLowerCase();
+    if (!full) continue;
+    const covered = entries.some((entry) => {
+      const ecriteria = Array.isArray(entry.criteria) ? entry.criteria : [];
+      if (!ecriteria.some((c) => String(c).trim().toLowerCase() === full)) return false;
+      // Evidence must be fresh (recorded after the last edit).
+      if (!entry.seq) return lastEditSeq === 0;
+      return entry.seq > lastEditSeq;
+    });
+    if (!covered) return false;
+  }
+  return true;
+}
+
 export function completionAllowed(state, config) {
-  return Boolean(state.active) && missingGates(state, config).length === 0;
+  if (!state.active) return false;
+  if (missingGates(state, config).length !== 0) return false;
+  // Evidence-coverage check: every acceptance criterion must have fresh recorded
+  // evidence — a programmatic gate the reviewers cannot be persuaded past.
+  if (config?.requireEvidenceCoverage !== false && !evidenceCoverageMet(state)) return false;
+  return true;
 }
@@ -3,7 +3,7 @@
  * messages, and the `goal_status` tool. Kept pure and dependency-light.
  */
 
-import { requiredGates, missingGates, gatePassedFresh } from "./gates.js";
+import { requiredGates, missingGates, gatePassedFresh, completionAllowed } from "./gates.js";
 import { prettyAgentName } from "./agents.js";
 
 /**
@@ -99,7 +99,7 @@ export function sidebarView(state, config) {
   const cycles = Number(state.reviewCycles) || 0;
   const gates = `${passing}/${required.length} gates`;
   const todos = sidebarTodos(state, required, missing);
-  const done = required.length > 0 && missing.length === 0 && !state.dirty;
+  const done = completionAllowed(state, config) && !state.dirty;
   if (done) {
     return {
       state: "done",
@@ -190,7 +190,7 @@ export function statusReport(state, config) {
     reviewerMemory: reviewerMemoryReport(state),
     changedFiles: state.changedFiles.slice(-50),
     contract: state.contract,
-    completionAllowed: Boolean(state.active) && missing.length === 0,
+    completionAllowed: completionAllowed(state, config),
   };
 }
 

@@ -56,6 +56,7 @@ test("a COMPLETE goal does not auto-continue and resets the counters", () => {
     reviewCycles: 1,
     autoContinueCount: 7,
     autoContinueNoProgress: 3,
+    evidence: [{ command: "test", result: "pass", criteria: ["done"], seq: 2 }],
   });
   const d = evaluateAutoContinue(st, DEFAULT_CONFIG);
   assert.equal(d.continue, false);

@@ -128,3 +128,15 @@ test("REGRESSION: a degenerate maxSessions (0/negative) falls back to the defaul
   assert.equal(resolveConfig({ maxSessions: 25 }).maxSessions, 25);
   assert.equal(resolveConfig({}, { GOAL_GUARD_MAX_SESSIONS: "0" }).maxSessions, DEFAULT_CONFIG.maxSessions);
 });
+
+test("requireEvidenceCoverage defaults to true and is configurable", () => {
+  assert.equal(DEFAULT_CONFIG.requireEvidenceCoverage, true);
+  assert.equal(resolveConfig({ requireEvidenceCoverage: false }).requireEvidenceCoverage, false);
+  assert.equal(resolveConfig(undefined, { GOAL_GUARD_REQUIRE_EVIDENCE_COVERAGE: "off" }).requireEvidenceCoverage, false);
+});
+
+test("leanGates defaults to false and is configurable", () => {
+  assert.equal(DEFAULT_CONFIG.leanGates, false);
+  assert.equal(resolveConfig({ leanGates: true }).leanGates, true);
+  assert.equal(resolveConfig(undefined, { GOAL_GUARD_LEAN_GATES: "1" }).leanGates, true);
+});
@@ -11,7 +11,7 @@ import { evaluateCompletionClaim } from "../plugins/goal-guard/completion.js";
 import { analyzeCommand } from "../plugins/goal-guard/shell.js";
 import { goalSimilarity, SAME_GOAL_THRESHOLD, createGoalTools } from "../plugins/goal-guard/tools.js";
 import { createStore, createState } from "../plugins/goal-guard/state.js";
-import { markEdit } from "../plugins/goal-guard/events.js";
+import { markEdit, recordEvidence } from "../plugins/goal-guard/events.js";
 import { runReviewCycle } from "../plugins/goal-guard/review-runner.js";
 import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js";
 import { completionAllowed } from "../plugins/goal-guard/gates.js";
@@ -59,6 +59,7 @@ async function allowedState(id) {
   state.active = true;
   state.contract = { title: "Do x", original: "do x", acceptanceCriteria: ["x works"] };
   markEdit(store, state, "edit");
+  recordEvidence(store, state, "test", "PASS", ["x works"]);
   await runReviewCycle(mockReviewClient("PASS", id), store, state, DEFAULT_CONFIG, { sessionID: id, sleep: async () => {}, pollMs: 1, timeoutMs: 500 });
   return state;
 }