From 9b2fea761a8bbba2d637a676174a15368666c55b Mon Sep 17 00:00:00 2001
From: Andreas <andreas.aastroem@gmail.com>
Date: Mon, 2 Feb 2026 12:44:44 +1300
Subject: [PATCH 1/4] Allow N/A sections in verify.md for CLI-only features

Previously, all four verify.md sections (Pre-Verification Checklist,
Smoke Test Results, Browser Verification, API Verification) required
substantive content, forcing users to --force bypass for CLI-only
features with no browser or API. Now sections containing "N/A",
"Not applicable", "Not required", or "CLI only" are accepted as valid.
Section headings must still exist.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 packages/specflow/src/commands/complete.ts    |  34 +++-
 .../tests/commands/complete-verify.test.ts    | 183 ++++++++++++++++++
 2 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 packages/specflow/tests/commands/complete-verify.test.ts

diff --git a/packages/specflow/src/commands/complete.ts b/packages/specflow/src/commands/complete.ts
index 4ee001c..d26931f 100644
--- a/packages/specflow/src/commands/complete.ts
+++ b/packages/specflow/src/commands/complete.ts
@@ -104,6 +104,25 @@ function runTests(): { pass: boolean; output: string } {
 /**
  * Validate verify.md has required sections
  */
+/**
+ * Check if a section's content indicates it is not applicable.
+ * Returns true if the content between this heading and the next heading
+ * contains "N/A", "Not applicable", "Not required", or "CLI only" (case-insensitive).
+ */
+function isSectionNotApplicable(content: string, sectionHeading: string): boolean {
+  const headingIndex = content.indexOf(sectionHeading);
+  if (headingIndex === -1) return false;
+
+  const afterHeading = content.slice(headingIndex + sectionHeading.length);
+  const nextHeadingMatch = afterHeading.match(/\n## /);
+  const sectionContent = nextHeadingMatch
+    ? afterHeading.slice(0, nextHeadingMatch.index)
+    : afterHeading;
+
+  const naPattern = /\b(n\/a|not applicable|not required|cli only)\b/i;
+  return naPattern.test(sectionContent);
+}
+
 function validateVerifyFile(verifyPath: string): string[] {
   const errors: string[] = [];
 
@@ -120,8 +139,21 @@ function validateVerifyFile(verifyPath: string): string[] {
   }
 
   // Check that verification was actually completed (not just template)
+  // But skip placeholder checks for sections marked as N/A
   if (content.includes("[paste actual output]") || content.includes("[paste actual response]")) {
-    errors.push("verify.md contains unfilled placeholders - actual verification not performed");
+    // Only flag unfilled placeholders if the section containing them is not marked N/A
+    const placeholderPattern = /\[paste actual (?:output|response)\]/g;
+    let match;
+    while ((match = placeholderPattern.exec(content)) !== null) {
+      const beforeMatch = content.slice(0, match.index);
+      const lastHeadingMatch = beforeMatch.match(/## [^\n]+/g);
+      const lastHeading = lastHeadingMatch ? lastHeadingMatch[lastHeadingMatch.length - 1] : null;
+
+      if (!lastHeading || !isSectionNotApplicable(content, lastHeading)) {
+        errors.push("verify.md contains unfilled placeholders - actual verification not performed");
+        break;
+      }
+    }
   }
 
   return errors;
diff --git a/packages/specflow/tests/commands/complete-verify.test.ts b/packages/specflow/tests/commands/complete-verify.test.ts
new file mode 100644
index 0000000..9e6ceff
--- /dev/null
+++ b/packages/specflow/tests/commands/complete-verify.test.ts
@@ -0,0 +1,183 @@
+/**
+ * Tests for verify.md N/A section support in complete command validation.
+ *
+ * The validateVerifyFile function should accept sections marked as
+ * "N/A", "Not applicable", "Not required", or "CLI only" as valid,
+ * while still requiring section headings to exist and rejecting
+ * unfilled placeholders in active sections.
+ */
+
+import { describe, test, expect, beforeEach, afterEach } from "bun:test";
+import { mkdtempSync, writeFileSync, rmSync } from "fs";
+import { join } from "path";
+import { tmpdir } from "os";
+
+// We need to test validateVerifyFile which is not exported directly.
+// We'll test through validateFeatureCompletion which is exported,
+// but that requires a full setup. Instead, let's test the behavior
+// by creating verify.md files and importing the module internals.
+
+// Since validateVerifyFile is not exported, we test via a small wrapper
+// that mimics its logic using the exported validateFeatureCompletion.
+// However, validateFeatureCompletion needs spec.md, plan.md, etc.
+// So we'll create a minimal spec directory with all required files.
+
+function createSpecDir(): string {
+  const dir = mkdtempSync(join(tmpdir(), "specflow-verify-test-"));
+  // Create all required files so only verify.md validation matters
+  writeFileSync(join(dir, "spec.md"), "# Spec\nSome spec content");
+  writeFileSync(join(dir, "plan.md"), "# Plan\nSome plan content");
+  writeFileSync(join(dir, "tasks.md"), "# Tasks\nSome tasks content");
+  writeFileSync(join(dir, "docs.md"), "# Docs\nSome docs content");
+  return dir;
+}
+
+// Direct test of the file validation by reading the source
+// We'll use a dynamic import approach to access the module
+// Actually, let's just test the exported validateFeatureCompletion
+// and filter for verify-related errors.
+
+import { validateFeatureCompletion } from "../../src/commands/complete";
+
+function getVerifyErrors(specDir: string): string[] {
+  // Save and mock cwd to avoid test-related checks
+  const originalCwd = process.cwd;
+  process.cwd = () => specDir;
+
+  const result = validateFeatureCompletion(specDir);
+
+  process.cwd = originalCwd;
+
+  // Filter to only verify.md related errors
+  return result.errors.filter(
+    (e) => e.includes("verify.md") || e.includes("verification")
+  );
+}
+
+describe("verify.md N/A section validation", () => {
+  let specDir: string;
+
+  beforeEach(() => {
+    specDir = createSpecDir();
+  });
+
+  afterEach(() => {
+    rmSync(specDir, { recursive: true, force: true });
+  });
+
+  test("all sections filled passes validation", () => {
+    writeFileSync(
+      join(specDir, "verify.md"),
+      `# Verification
+
+## Pre-Verification Checklist
+- [x] All tests pass
+- [x] Code reviewed
+
+## Smoke Test Results
+All smoke tests passed successfully.
+
+## Browser Verification
+Tested in Chrome, Firefox, Safari. All pages render correctly.
+
+## API Verification
+All API endpoints return expected responses.
+`
+    );
+
+    const errors = getVerifyErrors(specDir);
+    expect(errors).toEqual([]);
+  });
+
+  test("Browser Verification containing N/A passes", () => {
+    writeFileSync(
+      join(specDir, "verify.md"),
+      `# Verification
+
+## Pre-Verification Checklist
+- [x] All tests pass
+
+## Smoke Test Results
+All smoke tests passed.
+
+## Browser Verification
+N/A
+
+## API Verification
+All API endpoints return expected responses.
+`
+    );
+
+    const errors = getVerifyErrors(specDir);
+    expect(errors).toEqual([]);
+  });
+
+  test("API Verification containing 'Not applicable - CLI only' passes", () => {
+    writeFileSync(
+      join(specDir, "verify.md"),
+      `# Verification
+
+## Pre-Verification Checklist
+- [x] All tests pass
+
+## Smoke Test Results
+All smoke tests passed.
+
+## Browser Verification
+Not required - CLI only tool
+
+## API Verification
+Not applicable - CLI only feature, no API endpoints.
+`
+    );
+
+    const errors = getVerifyErrors(specDir);
+    expect(errors).toEqual([]);
+  });
+
+  test("missing section heading entirely still fails", () => {
+    writeFileSync(
+      join(specDir, "verify.md"),
+      `# Verification
+
+## Pre-Verification Checklist
+- [x] All tests pass
+
+## Smoke Test Results
+All smoke tests passed.
+
+## Browser Verification
+Looks good.
+`
+    );
+    // Missing "## API Verification" heading
+
+    const errors = getVerifyErrors(specDir);
+    expect(errors.length).toBeGreaterThan(0);
+    expect(errors.some((e) => e.includes("API Verification"))).toBe(true);
+  });
+
+  test("unfilled placeholders in active sections still fails", () => {
+    writeFileSync(
+      join(specDir, "verify.md"),
+      `# Verification
+
+## Pre-Verification Checklist
+- [x] All tests pass
+
+## Smoke Test Results
+[paste actual output]
+
+## Browser Verification
+Tested and working.
+
+## API Verification
+All endpoints verified.
+`
+    );
+
+    const errors = getVerifyErrors(specDir);
+    expect(errors.length).toBeGreaterThan(0);
+    expect(errors.some((e) => e.includes("placeholder"))).toBe(true);
+  });
+});

From 3d6b6c13634e0bb9c8f12a5dc7f1d69f7f66a192 Mon Sep 17 00:00:00 2001
From: Andreas <andreas.aastroem@gmail.com>
Date: Mon, 2 Feb 2026 14:20:02 +1300
Subject: [PATCH 2/4] feat: AI-powered headless Doctorow Gate for
 non-interactive environments

Adds automatic AI evaluation of Doctorow Gate checks when running in
non-TTY environments (CI/CD, agent pipelines). Uses claude -p with
Haiku for fast, cheap evaluation. Falls back to pass-by-default on
AI failure to avoid blocking pipelines.

Closes #5

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 packages/specflow/src/lib/doctorow.ts         | 227 +++++++++++++++++-
 .../tests/lib/doctorow-headless.test.ts       | 217 +++++++++++++++++
 2 files changed, 438 insertions(+), 6 deletions(-)
 create mode 100644 packages/specflow/tests/lib/doctorow-headless.test.ts

diff --git a/packages/specflow/src/lib/doctorow.ts b/packages/specflow/src/lib/doctorow.ts
index b79a764..23df9e4 100644
--- a/packages/specflow/src/lib/doctorow.ts
+++ b/packages/specflow/src/lib/doctorow.ts
@@ -8,7 +8,7 @@
  */
 
 import { createInterface } from "readline";
-import { existsSync, readFileSync, appendFileSync } from "fs";
+import { existsSync, readFileSync, appendFileSync, readdirSync } from "fs";
 import { join } from "path";
 
 // =============================================================================
@@ -146,8 +146,9 @@ export function formatCheckResult(result: DoctorowCheckResult): string {
 
 /**
  * Format verification entry for verify.md
+ * @param evaluator - Optional tag like "[AI-evaluated]" to append to confirmed entries
  */
-export function formatVerifyEntry(results: DoctorowCheckResult[]): string {
+export function formatVerifyEntry(results: DoctorowCheckResult[], evaluator?: string): string {
   const lines: string[] = [];
   const timestamp = new Date().toISOString();
 
@@ -159,7 +160,12 @@ export function formatVerifyEntry(results: DoctorowCheckResult[]): string {
     const name = check?.name ?? result.checkId;
 
     if (result.confirmed) {
-      lines.push(`- [x] **${name}**: Confirmed`);
+      const tag = evaluator ? ` ${evaluator}` : "";
+      lines.push(`- [x] **${name}**: Confirmed${tag}`);
+      if (result.skipReason) {
+        // In AI mode, skipReason holds the reasoning
+        lines.push(`  - Reasoning: ${result.skipReason}`);
+      }
     } else if (result.skipReason) {
       lines.push(`- [ ] **${name}**: Skipped`);
       lines.push(`  - Reason: ${result.skipReason}`);
@@ -172,6 +178,207 @@ export function formatVerifyEntry(results: DoctorowCheckResult[]): string {
   return lines.join("\n");
 }
 
+// =============================================================================
+// Headless (AI) Evaluation
+// =============================================================================
+
+/**
+ * Extract JSON from an LLM response.
+ * Handles:
+ * - Claude --output-format json wrapper (extracts from "result" field)
+ * - Markdown code blocks (```json ... ```)
+ * - Raw JSON strings
+ * - JSON embedded in surrounding text
+ */
+export function extractJsonFromResponse(response: string): any | null {
+  let text = response;
+
+  // Check if this is Claude --output-format json wrapper
+  try {
+    const wrapper = JSON.parse(response);
+    if (wrapper.type === "result" && wrapper.result) {
+      text = wrapper.result;
+    }
+  } catch {
+    // Not a JSON wrapper, use response as-is
+  }
+
+  // Try markdown code block first
+  const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
+  if (codeBlockMatch) {
+    try {
+      return JSON.parse(codeBlockMatch[1].trim());
+    } catch {
+      // Continue to other methods
+    }
+  }
+
+  // Try to find JSON object in response
+  const jsonMatch = text.match(/\{[\s\S]*\}/);
+  if (jsonMatch) {
+    try {
+      return JSON.parse(jsonMatch[0]);
+    } catch {
+      // Invalid JSON
+    }
+  }
+
+  return null;
+}
+
+/**
+ * Gather feature artifacts for AI evaluation context.
+ * Reads spec.md, plan.md, tasks.md, verify.md and lists src/ filenames.
+ */
+export function gatherArtifacts(specPath: string): string {
+  const parts: string[] = [];
+
+  const artifactFiles = ["spec.md", "plan.md", "tasks.md", "verify.md"];
+  for (const file of artifactFiles) {
+    const filePath = join(specPath, file);
+    if (existsSync(filePath)) {
+      const content = readFileSync(filePath, "utf-8");
+      parts.push(`--- ${file} ---\n${content}`);
+    }
+  }
+
+  // List src/ files (just names, not content)
+  const srcDir = join(specPath, "..", "..", "..", "src");
+  if (existsSync(srcDir)) {
+    try {
+      const files = listFilesRecursive(srcDir);
+      if (files.length > 0) {
+        parts.push(`--- src/ files ---\n${files.join("\n")}`);
+      }
+    } catch {
+      // Ignore errors reading src directory
+    }
+  }
+
+  return parts.join("\n\n");
+}
+
+/**
+ * Recursively list files in a directory (relative paths).
+ */
+function listFilesRecursive(dir: string, prefix: string = ""): string[] {
+  const results: string[] = [];
+  try {
+    const entries = readdirSync(dir, { withFileTypes: true });
+    for (const entry of entries) {
+      if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
+      const relative = prefix ? `${prefix}/${entry.name}` : entry.name;
+      if (entry.isDirectory()) {
+        results.push(...listFilesRecursive(join(dir, entry.name), relative));
+      } else {
+        results.push(relative);
+      }
+    }
+  } catch {
+    // Ignore permission errors
+  }
+  return results;
+}
+
+/**
+ * Evaluate a single Doctorow check using AI (claude -p).
+ * On failure, returns confirmed=true to avoid blocking the pipeline.
+ */
+export async function evaluateCheckWithAI(
+  check: DoctorowCheck,
+  artifacts: string
+): Promise<DoctorowCheckResult> {
+  const systemPrompt =
+    "You are a code quality reviewer evaluating a feature completion check. " +
+    'Return ONLY valid JSON: {"pass": true, "reasoning": "one sentence explanation"}';
+
+  const userPrompt =
+    `Check: ${check.question}\n\nContext: ${check.prompt}\n\nFeature Artifacts:\n${artifacts}`;
+
+  try {
+    const proc = Bun.spawn(
+      ["claude", "-p", "--model", "claude-haiku-4-5-20251001", "--system-prompt", systemPrompt, userPrompt],
+      {
+        stdout: "pipe",
+        stderr: "pipe",
+        env: { ...process.env },
+      }
+    );
+
+    // 30 second timeout
+    const timeoutPromise = new Promise<null>((resolve) => {
+      setTimeout(() => {
+        proc.kill();
+        resolve(null);
+      }, 30000);
+    });
+
+    const resultPromise = (async () => {
+      const output = await new Response(proc.stdout).text();
+      const exitCode = await proc.exited;
+
+      if (exitCode !== 0) return null;
+
+      const extracted = extractJsonFromResponse(output);
+      if (!extracted || typeof extracted.pass !== "boolean") return null;
+
+      return {
+        checkId: check.id,
+        confirmed: extracted.pass,
+        skipReason: extracted.reasoning || null,
+        timestamp: new Date(),
+      };
+    })();
+
+    const result = await Promise.race([resultPromise, timeoutPromise]);
+
+    if (result) return result;
+  } catch {
+    // Fall through to default
+  }
+
+  // On any AI failure, pass by default
+  return {
+    checkId: check.id,
+    confirmed: true,
+    skipReason: "AI evaluation unavailable — passed by default",
+    timestamp: new Date(),
+  };
+}
+
+/**
+ * Run the Doctorow Gate in headless mode using AI evaluation.
+ * Iterates through all checks and evaluates them with claude -p.
+ */
+export async function runDoctorowGateHeadless(
+  featureId: string,
+  specPath: string
+): Promise<DoctorowResult> {
+  const artifacts = gatherArtifacts(specPath);
+  const results: DoctorowCheckResult[] = [];
+
+  for (const check of DOCTOROW_CHECKS) {
+    console.log(`  Evaluating: ${check.name}...`);
+    const result = await evaluateCheckWithAI(check, artifacts);
+    results.push(result);
+    const status = result.confirmed ? "PASS" : "FAIL";
+    console.log(`  ${status}: ${check.name} - ${result.skipReason || "confirmed"}`);
+  }
+
+  const failedCheck = results.find(r => !r.confirmed);
+  const passed = !failedCheck;
+
+  // Append AI results to verify.md
+  appendToVerifyMd(specPath, results, "[AI-evaluated]");
+
+  return {
+    passed,
+    skipped: false,
+    failedCheck: failedCheck?.checkId,
+    results,
+  };
+}
+
 // =============================================================================
 // Gate Logic
 // =============================================================================
@@ -252,6 +459,14 @@ export async function runDoctorowGate(
     };
   }
 
+  // Detect headless mode
+  const isHeadless = !process.stdin.isTTY || process.env.SPECFLOW_HEADLESS === "true";
+
+  if (isHeadless) {
+    console.log(`\nRunning Doctorow Gate in headless mode (AI evaluation)...`);
+    return runDoctorowGateHeadless(featureId, specPath);
+  }
+
   console.log(`\n🔍 Running Doctorow Gate for ${featureId}`);
   console.log("─".repeat(50));
   console.log("The Doctorow Gate ensures you've considered failure modes,");
@@ -307,7 +522,7 @@ export async function runDoctorowGate(
 /**
  * Append verification results to verify.md
  */
-export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[]): void {
+export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[], evaluator?: string): void {
   const verifyPath = join(specPath, "verify.md");
 
   let content = "";
@@ -325,9 +540,9 @@ export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[
   }
 
   // Append new entry
-  content += formatVerifyEntry(results);
+  content += formatVerifyEntry(results, evaluator);
 
-  appendFileSync(verifyPath, formatVerifyEntry(results));
+  appendFileSync(verifyPath, formatVerifyEntry(results, evaluator));
 }
 
 /**
diff --git a/packages/specflow/tests/lib/doctorow-headless.test.ts b/packages/specflow/tests/lib/doctorow-headless.test.ts
new file mode 100644
index 0000000..0e3204e
--- /dev/null
+++ b/packages/specflow/tests/lib/doctorow-headless.test.ts
@@ -0,0 +1,217 @@
+/**
+ * Doctorow Gate Headless (AI) Mode Tests
+ *
+ * Tests for extractJsonFromResponse, gatherArtifacts, formatVerifyEntry
+ * with evaluator tag, and headless routing detection.
+ * Does NOT test actual claude -p calls (integration tests).
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from "bun:test";
+import { mkdirSync, writeFileSync, rmSync, existsSync, readFileSync } from "fs";
+import { join } from "path";
+import {
+  extractJsonFromResponse,
+  gatherArtifacts,
+  formatVerifyEntry,
+  DoctorowCheckResult,
+} from "../../src/lib/doctorow";
+
+// =============================================================================
+// Test Fixtures
+// =============================================================================
+
+const TEST_DIR = "/tmp/specflow-headless-test";
+const SPEC_PATH = join(TEST_DIR, ".specify", "specs", "f-001-test");
+
+function cleanup(): void {
+  if (existsSync(TEST_DIR)) {
+    rmSync(TEST_DIR, { recursive: true, force: true });
+  }
+}
+
+function setupSpecDir(): void {
+  mkdirSync(SPEC_PATH, { recursive: true });
+}
+
+// =============================================================================
+// extractJsonFromResponse
+// =============================================================================
+
+describe("extractJsonFromResponse", () => {
+  it("should parse raw JSON", () => {
+    const input = '{"pass": true, "reasoning": "tests exist"}';
+    const result = extractJsonFromResponse(input);
+    expect(result).toEqual({ pass: true, reasoning: "tests exist" });
+  });
+
+  it("should extract JSON from markdown code block", () => {
+    const input = 'Here is the result:\n```json\n{"pass": false, "reasoning": "no tests found"}\n```\nDone.';
+    const result = extractJsonFromResponse(input);
+    expect(result).toEqual({ pass: false, reasoning: "no tests found" });
+  });
+
+  it("should extract JSON from code block without json tag", () => {
+    const input = '```\n{"pass": true, "reasoning": "looks good"}\n```';
+    const result = extractJsonFromResponse(input);
+    expect(result).toEqual({ pass: true, reasoning: "looks good" });
+  });
+
+  it("should handle Claude --output-format json wrapper", () => {
+    const inner = '{"pass": true, "reasoning": "all good"}';
+    const wrapper = JSON.stringify({ type: "result", result: inner });
+    const result = extractJsonFromResponse(wrapper);
+    expect(result).toEqual({ pass: true, reasoning: "all good" });
+  });
+
+  it("should extract embedded JSON from surrounding text", () => {
+    const input = 'Based on my analysis, {"pass": true, "reasoning": "confirmed"} is the result.';
+    const result = extractJsonFromResponse(input);
+    expect(result).toEqual({ pass: true, reasoning: "confirmed" });
+  });
+
+  it("should return null for invalid input", () => {
+    expect(extractJsonFromResponse("no json here")).toBeNull();
+    expect(extractJsonFromResponse("")).toBeNull();
+    expect(extractJsonFromResponse("just some text {broken")).toBeNull();
+  });
+
+  it("should handle wrapper with embedded JSON in result string", () => {
+    const inner = 'The answer is ```json\n{"pass": true, "reasoning": "yes"}\n```';
+    const wrapper = JSON.stringify({ type: "result", result: inner });
+    const result = extractJsonFromResponse(wrapper);
+    expect(result).toEqual({ pass: true, reasoning: "yes" });
+  });
+});
+
+// =============================================================================
+// gatherArtifacts
+// =============================================================================
+
+describe("gatherArtifacts", () => {
+  beforeEach(() => {
+    cleanup();
+    setupSpecDir();
+  });
+
+  afterEach(() => {
+    cleanup();
+  });
+
+  it("should gather existing artifact files", () => {
+    writeFileSync(join(SPEC_PATH, "spec.md"), "# Spec\nFeature description");
+    writeFileSync(join(SPEC_PATH, "plan.md"), "# Plan\nImplementation plan");
+
+    const artifacts = gatherArtifacts(SPEC_PATH);
+
+    expect(artifacts).toContain("--- spec.md ---");
+    expect(artifacts).toContain("Feature description");
+    expect(artifacts).toContain("--- plan.md ---");
+    expect(artifacts).toContain("Implementation plan");
+  });
+
+  it("should skip missing artifact files gracefully", () => {
+    writeFileSync(join(SPEC_PATH, "spec.md"), "# Spec only");
+
+    const artifacts = gatherArtifacts(SPEC_PATH);
+
+    expect(artifacts).toContain("--- spec.md ---");
+    expect(artifacts).not.toContain("--- plan.md ---");
+    expect(artifacts).not.toContain("--- tasks.md ---");
+  });
+
+  it("should include src/ file listing when available", () => {
+    const srcDir = join(TEST_DIR, "src");
+    mkdirSync(srcDir, { recursive: true });
+    writeFileSync(join(srcDir, "index.ts"), "export {}");
+    writeFileSync(join(srcDir, "utils.ts"), "export {}");
+
+    const artifacts = gatherArtifacts(SPEC_PATH);
+
+    expect(artifacts).toContain("--- src/ files ---");
+    expect(artifacts).toContain("index.ts");
+    expect(artifacts).toContain("utils.ts");
+  });
+
+  it("should return empty string when no artifacts exist", () => {
+    // specPath exists but has no files
+    const emptyPath = join(TEST_DIR, "empty-spec");
+    mkdirSync(emptyPath, { recursive: true });
+
+    const artifacts = gatherArtifacts(emptyPath);
+    expect(artifacts).toBe("");
+  });
+});
+
+// =============================================================================
+// formatVerifyEntry with evaluator tag
+// =============================================================================
+
+describe("formatVerifyEntry with evaluator", () => {
+  const makeResult = (checkId: string, confirmed: boolean, skipReason: string | null): DoctorowCheckResult => ({
+    checkId,
+    confirmed,
+    skipReason,
+    timestamp: new Date(),
+  });
+
+  it("should include evaluator tag on confirmed entries", () => {
+    const results = [
+      makeResult("failure_test", true, "Error handling tests exist"),
+    ];
+
+    const entry = formatVerifyEntry(results, "[AI-evaluated]");
+
+    expect(entry).toContain("**Failure Test**: Confirmed [AI-evaluated]");
+    expect(entry).toContain("Reasoning: Error handling tests exist");
+  });
+
+  it("should not include evaluator tag when not provided", () => {
+    const results = [
+      makeResult("failure_test", true, null),
+    ];
+
+    const entry = formatVerifyEntry(results);
+
+    expect(entry).toContain("**Failure Test**: Confirmed");
+    expect(entry).not.toContain("[AI-evaluated]");
+  });
+
+  it("should handle mixed results with evaluator", () => {
+    const results = [
+      makeResult("failure_test", true, "Tests exist"),
+      makeResult("assumption_test", false, null),
+    ];
+
+    const entry = formatVerifyEntry(results, "[AI-evaluated]");
+
+    expect(entry).toContain("**Failure Test**: Confirmed [AI-evaluated]");
+    expect(entry).toContain("**Assumption Test**: Not confirmed");
+  });
+});
+
+// =============================================================================
+// Headless routing detection
+// =============================================================================
+
+describe("headless routing", () => {
+  it("should detect non-TTY environment", () => {
+    // In test environment, process.stdin.isTTY is typically undefined/false
+    const isTTY = process.stdin.isTTY;
+    // Bun test runs are non-TTY, so this should be falsy
+    expect(!isTTY).toBe(true);
+  });
+
+  it("should detect SPECFLOW_HEADLESS env var", () => {
+    const original = process.env.SPECFLOW_HEADLESS;
+    process.env.SPECFLOW_HEADLESS = "true";
+
+    const isHeadless = !process.stdin.isTTY || process.env.SPECFLOW_HEADLESS === "true";
+    expect(isHeadless).toBe(true);
+
+    if (original !== undefined) {
+      process.env.SPECFLOW_HEADLESS = original;
+    } else {
+      delete process.env.SPECFLOW_HEADLESS;
+    }
+  });
+});

From 275782ed3bb0e588d7d8d830ead384178eccf047 Mon Sep 17 00:00:00 2001
From: Andreas <andreas.aastroem@gmail.com>
Date: Mon, 2 Feb 2026 14:21:59 +1300
Subject: [PATCH 3/4] feat: make Doctorow Gate AI model configurable

Default to Sonnet (claude-sonnet-4-20250514) for better reasoning on
quality checks. Override via SPECFLOW_DOCTOROW_MODEL env var.

Supported models:
- claude-haiku-4-5-20251001 (fast/cheap)
- claude-sonnet-4-20250514 (balanced, default)
- claude-opus-4-5-20251101 (deep reasoning)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 packages/specflow/src/lib/doctorow.ts         | 19 ++++++++++++--
 .../specflow/src/lib/migrations/embedded.ts   | 26 ++++++++++++++++++-
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/packages/specflow/src/lib/doctorow.ts b/packages/specflow/src/lib/doctorow.ts
index 23df9e4..4ee2284 100644
--- a/packages/specflow/src/lib/doctorow.ts
+++ b/packages/specflow/src/lib/doctorow.ts
@@ -284,12 +284,26 @@ function listFilesRecursive(dir: string, prefix: string = ""): string[] {
  * Evaluate a single Doctorow check using AI (claude -p).
  * On failure, returns confirmed=true to avoid blocking the pipeline.
  */
+/**
+ * Default model for headless Doctorow evaluation.
+ * Sonnet provides good reasoning for quality checks at reasonable cost.
+ * Override via SPECFLOW_DOCTOROW_MODEL env var.
+ *
+ * Recommended models:
+ * - claude-haiku-4-5-20251001: Fast/cheap, may give shallow evaluations
+ * - claude-sonnet-4-20250514: Balanced reasoning (default)
+ * - claude-opus-4-5-20251101: Deep reasoning, higher cost
+ */
+const DEFAULT_DOCTOROW_MODEL = "claude-sonnet-4-20250514";
+
 export async function evaluateCheckWithAI(
   check: DoctorowCheck,
   artifacts: string
 ): Promise<DoctorowCheckResult> {
+  const model = process.env.SPECFLOW_DOCTOROW_MODEL || DEFAULT_DOCTOROW_MODEL;
   const systemPrompt =
     "You are a code quality reviewer evaluating a feature completion check. " +
+    "Analyze the provided feature artifacts carefully. " +
     'Return ONLY valid JSON: {"pass": true, "reasoning": "one sentence explanation"}';
 
   const userPrompt =
@@ -297,7 +311,7 @@ export async function evaluateCheckWithAI(
 
   try {
     const proc = Bun.spawn(
-      ["claude", "-p", "--model", "claude-haiku-4-5-20251001", "--system-prompt", systemPrompt, userPrompt],
+      ["claude", "-p", "--model", model, "--system-prompt", systemPrompt, userPrompt],
       {
         stdout: "pipe",
         stderr: "pipe",
@@ -463,7 +477,8 @@ export async function runDoctorowGate(
   const isHeadless = !process.stdin.isTTY || process.env.SPECFLOW_HEADLESS === "true";
 
   if (isHeadless) {
-    console.log(`\nRunning Doctorow Gate in headless mode (AI evaluation)...`);
+    const model = process.env.SPECFLOW_DOCTOROW_MODEL || DEFAULT_DOCTOROW_MODEL;
+    console.log(`\n🤖 Running Doctorow Gate in headless mode (AI: ${model})...`);
     return runDoctorowGateHeadless(featureId, specPath);
   }
 
diff --git a/packages/specflow/src/lib/migrations/embedded.ts b/packages/specflow/src/lib/migrations/embedded.ts
index 48d7c8e..f47f2bc 100644
--- a/packages/specflow/src/lib/migrations/embedded.ts
+++ b/packages/specflow/src/lib/migrations/embedded.ts
@@ -4,7 +4,7 @@
  * AUTO-GENERATED by scripts/embed-migrations.ts
  * DO NOT EDIT MANUALLY
  *
- * Generated: 2026-01-28T13:34:48.655Z
+ * Generated: 2026-02-02T01:21:51.241Z
  *
  * These migrations are embedded at build time so they work
  * in the compiled binary where import.meta.dir resolves to
@@ -90,4 +90,28 @@ ALTER TABLE features ADD COLUMN skip_duplicate_of TEXT;`,
 -- This is a no-op for safety; manual intervention required for rollback
 SELECT 1;`,
   },
+  {
+    version: 6,
+    name: "add_contrib_prep",
+    upSql: `-- Add contrib prep state tracking table
+-- Tracks the contribution preparation workflow (inventory → sanitize → extract → verify)
+
+CREATE TABLE IF NOT EXISTS contrib_prep_state (
+  feature_id TEXT PRIMARY KEY,
+  gate INTEGER NOT NULL DEFAULT 0,
+  inventory_included INTEGER DEFAULT 0,
+  inventory_excluded INTEGER DEFAULT 0,
+  sanitization_pass INTEGER,
+  sanitization_findings INTEGER DEFAULT 0,
+  tag_name TEXT,
+  tag_hash TEXT,
+  contrib_branch TEXT,
+  verification_pass INTEGER,
+  base_branch TEXT DEFAULT 'main',
+  created_at TEXT NOT NULL,
+  updated_at TEXT NOT NULL,
+  FOREIGN KEY (feature_id) REFERENCES features(id)
+);`,
+    downSql: `DROP TABLE IF EXISTS contrib_prep_state;`,
+  },
 ];

From baf634962163bf863e09351370179a20b0aafece Mon Sep 17 00:00:00 2001
From: Andreas <andreas.aastroem@gmail.com>
Date: Mon, 2 Feb 2026 14:32:02 +1300
Subject: [PATCH 4/4] Fix headless Doctorow: use --output-format json, default
 to Opus

- Add --output-format json to claude -p invocation to ensure parseable
  output in environments with CLAUDE.md hooks/skills configured
- Change default model from Sonnet to Opus for deeper quality reasoning
- Model remains configurable via SPECFLOW_DOCTOROW_MODEL env var

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 packages/specflow/src/lib/doctorow.ts | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/packages/specflow/src/lib/doctorow.ts b/packages/specflow/src/lib/doctorow.ts
index 4ee2284..b9e6132 100644
--- a/packages/specflow/src/lib/doctorow.ts
+++ b/packages/specflow/src/lib/doctorow.ts
@@ -286,15 +286,15 @@ function listFilesRecursive(dir: string, prefix: string = ""): string[] {
  */
 /**
  * Default model for headless Doctorow evaluation.
- * Sonnet provides good reasoning for quality checks at reasonable cost.
+ * Opus provides deep reasoning for thorough quality checks.
  * Override via SPECFLOW_DOCTOROW_MODEL env var.
  *
  * Recommended models:
  * - claude-haiku-4-5-20251001: Fast/cheap, may give shallow evaluations
- * - claude-sonnet-4-20250514: Balanced reasoning (default)
- * - claude-opus-4-5-20251101: Deep reasoning, higher cost
+ * - claude-sonnet-4-20250514: Balanced reasoning, lower cost
+ * - claude-opus-4-5-20251101: Deep reasoning (default)
  */
-const DEFAULT_DOCTOROW_MODEL = "claude-sonnet-4-20250514";
+const DEFAULT_DOCTOROW_MODEL = "claude-opus-4-5-20251101";
 
 export async function evaluateCheckWithAI(
   check: DoctorowCheck,
@@ -311,7 +311,7 @@ export async function evaluateCheckWithAI(
 
   try {
     const proc = Bun.spawn(
-      ["claude", "-p", "--model", model, "--system-prompt", systemPrompt, userPrompt],
+      ["claude", "-p", "--output-format", "json", "--model", model, "--system-prompt", systemPrompt, userPrompt],
       {
         stdout: "pipe",
         stderr: "pipe",