From 9b2fea761a8bbba2d637a676174a15368666c55b Mon Sep 17 00:00:00 2001 From: Andreas Date: Mon, 2 Feb 2026 12:44:44 +1300 Subject: [PATCH 1/4] Allow N/A sections in verify.md for CLI-only features Previously, all four verify.md sections (Pre-Verification Checklist, Smoke Test Results, Browser Verification, API Verification) required substantive content, forcing users to --force bypass for CLI-only features with no browser or API. Now sections containing "N/A", "Not applicable", "Not required", or "CLI only" are accepted as valid. Section headings must still exist. Co-Authored-By: Claude Opus 4.5 --- packages/specflow/src/commands/complete.ts | 34 +++- .../tests/commands/complete-verify.test.ts | 183 ++++++++++++++++++ 2 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 packages/specflow/tests/commands/complete-verify.test.ts diff --git a/packages/specflow/src/commands/complete.ts b/packages/specflow/src/commands/complete.ts index 4ee001c..d26931f 100644 --- a/packages/specflow/src/commands/complete.ts +++ b/packages/specflow/src/commands/complete.ts @@ -104,6 +104,25 @@ function runTests(): { pass: boolean; output: string } { /** * Validate verify.md has required sections */ +/** + * Check if a section's content indicates it is not applicable. + * Returns true if the content between this heading and the next heading + * contains "N/A", "Not applicable", "Not required", or "CLI only" (case-insensitive). + */ +function isSectionNotApplicable(content: string, sectionHeading: string): boolean { + const headingIndex = content.indexOf(sectionHeading); + if (headingIndex === -1) return false; + + const afterHeading = content.slice(headingIndex + sectionHeading.length); + const nextHeadingMatch = afterHeading.match(/\n## /); + const sectionContent = nextHeadingMatch + ? afterHeading.slice(0, nextHeadingMatch.index) + : afterHeading; + + const naPattern = /\b(n\/a|not applicable|not required|cli only)\b/i; + return naPattern.test(sectionContent); +} + function validateVerifyFile(verifyPath: string): string[] { const errors: string[] = []; @@ -120,8 +139,21 @@ function validateVerifyFile(verifyPath: string): string[] { } // Check that verification was actually completed (not just template) + // But skip placeholder checks for sections marked as N/A if (content.includes("[paste actual output]") || content.includes("[paste actual response]")) { - errors.push("verify.md contains unfilled placeholders - actual verification not performed"); + // Only flag unfilled placeholders if the section containing them is not marked N/A + const placeholderPattern = /\[paste actual (?:output|response)\]/g; + let match; + while ((match = placeholderPattern.exec(content)) !== null) { + const beforeMatch = content.slice(0, match.index); + const lastHeadingMatch = beforeMatch.match(/## [^\n]+/g); + const lastHeading = lastHeadingMatch ? lastHeadingMatch[lastHeadingMatch.length - 1] : null; + + if (!lastHeading || !isSectionNotApplicable(content, lastHeading)) { + errors.push("verify.md contains unfilled placeholders - actual verification not performed"); + break; + } + } } return errors; diff --git a/packages/specflow/tests/commands/complete-verify.test.ts b/packages/specflow/tests/commands/complete-verify.test.ts new file mode 100644 index 0000000..9e6ceff --- /dev/null +++ b/packages/specflow/tests/commands/complete-verify.test.ts @@ -0,0 +1,183 @@ +/** + * Tests for verify.md N/A section support in complete command validation. + * + * The validateVerifyFile function should accept sections marked as + * "N/A", "Not applicable", "Not required", or "CLI only" as valid, + * while still requiring section headings to exist and rejecting + * unfilled placeholders in active sections. + */ + +import { describe, test, expect, beforeEach, afterEach } from "bun:test"; +import { mkdtempSync, writeFileSync, rmSync } from "fs"; +import { join } from "path"; +import { tmpdir } from "os"; + +// We need to test validateVerifyFile which is not exported directly. +// We'll test through validateFeatureCompletion which is exported, +// but that requires a full setup. Instead, let's test the behavior +// by creating verify.md files and importing the module internals. + +// Since validateVerifyFile is not exported, we test via a small wrapper +// that mimics its logic using the exported validateFeatureCompletion. +// However, validateFeatureCompletion needs spec.md, plan.md, etc. +// So we'll create a minimal spec directory with all required files. + +function createSpecDir(): string { + const dir = mkdtempSync(join(tmpdir(), "specflow-verify-test-")); + // Create all required files so only verify.md validation matters + writeFileSync(join(dir, "spec.md"), "# Spec\nSome spec content"); + writeFileSync(join(dir, "plan.md"), "# Plan\nSome plan content"); + writeFileSync(join(dir, "tasks.md"), "# Tasks\nSome tasks content"); + writeFileSync(join(dir, "docs.md"), "# Docs\nSome docs content"); + return dir; +} + +// Direct test of the file validation by reading the source +// We'll use a dynamic import approach to access the module +// Actually, let's just test the exported validateFeatureCompletion +// and filter for verify-related errors. + +import { validateFeatureCompletion } from "../../src/commands/complete"; + +function getVerifyErrors(specDir: string): string[] { + // Save and mock cwd to avoid test-related checks + const originalCwd = process.cwd; + process.cwd = () => specDir; + + const result = validateFeatureCompletion(specDir); + + process.cwd = originalCwd; + + // Filter to only verify.md related errors + return result.errors.filter( + (e) => e.includes("verify.md") || e.includes("verification") + ); +} + +describe("verify.md N/A section validation", () => { + let specDir: string; + + beforeEach(() => { + specDir = createSpecDir(); + }); + + afterEach(() => { + rmSync(specDir, { recursive: true, force: true }); + }); + + test("all sections filled passes validation", () => { + writeFileSync( + join(specDir, "verify.md"), + `# Verification + +## Pre-Verification Checklist +- [x] All tests pass +- [x] Code reviewed + +## Smoke Test Results +All smoke tests passed successfully. + +## Browser Verification +Tested in Chrome, Firefox, Safari. All pages render correctly. + +## API Verification +All API endpoints return expected responses. +` + ); + + const errors = getVerifyErrors(specDir); + expect(errors).toEqual([]); + }); + + test("Browser Verification containing N/A passes", () => { + writeFileSync( + join(specDir, "verify.md"), + `# Verification + +## Pre-Verification Checklist +- [x] All tests pass + +## Smoke Test Results +All smoke tests passed. + +## Browser Verification +N/A + +## API Verification +All API endpoints return expected responses. +` + ); + + const errors = getVerifyErrors(specDir); + expect(errors).toEqual([]); + }); + + test("API Verification containing 'Not applicable - CLI only' passes", () => { + writeFileSync( + join(specDir, "verify.md"), + `# Verification + +## Pre-Verification Checklist +- [x] All tests pass + +## Smoke Test Results +All smoke tests passed. + +## Browser Verification +Not required - CLI only tool + +## API Verification +Not applicable - CLI only feature, no API endpoints. +` + ); + + const errors = getVerifyErrors(specDir); + expect(errors).toEqual([]); + }); + + test("missing section heading entirely still fails", () => { + writeFileSync( + join(specDir, "verify.md"), + `# Verification + +## Pre-Verification Checklist +- [x] All tests pass + +## Smoke Test Results +All smoke tests passed. + +## Browser Verification +Looks good. +` + ); + // Missing "## API Verification" heading + + const errors = getVerifyErrors(specDir); + expect(errors.length).toBeGreaterThan(0); + expect(errors.some((e) => e.includes("API Verification"))).toBe(true); + }); + + test("unfilled placeholders in active sections still fails", () => { + writeFileSync( + join(specDir, "verify.md"), + `# Verification + +## Pre-Verification Checklist +- [x] All tests pass + +## Smoke Test Results +[paste actual output] + +## Browser Verification +Tested and working. + +## API Verification +All endpoints verified. +` + ); + + const errors = getVerifyErrors(specDir); + expect(errors.length).toBeGreaterThan(0); + expect(errors.some((e) => e.includes("placeholder"))).toBe(true); + }); +}); From 3d6b6c13634e0bb9c8f12a5dc7f1d69f7f66a192 Mon Sep 17 00:00:00 2001 From: Andreas Date: Mon, 2 Feb 2026 14:20:02 +1300 Subject: [PATCH 2/4] feat: AI-powered headless Doctorow Gate for non-interactive environments Adds automatic AI evaluation of Doctorow Gate checks when running in non-TTY environments (CI/CD, agent pipelines). Uses claude -p with Haiku for fast, cheap evaluation. Falls back to pass-by-default on AI failure to avoid blocking pipelines. Closes #5 Co-Authored-By: Claude Opus 4.5 --- packages/specflow/src/lib/doctorow.ts | 227 +++++++++++++++++- .../tests/lib/doctorow-headless.test.ts | 217 +++++++++++++++++ 2 files changed, 438 insertions(+), 6 deletions(-) create mode 100644 packages/specflow/tests/lib/doctorow-headless.test.ts diff --git a/packages/specflow/src/lib/doctorow.ts b/packages/specflow/src/lib/doctorow.ts index b79a764..23df9e4 100644 --- a/packages/specflow/src/lib/doctorow.ts +++ b/packages/specflow/src/lib/doctorow.ts @@ -8,7 +8,7 @@ */ import { createInterface } from "readline"; -import { existsSync, readFileSync, appendFileSync } from "fs"; +import { existsSync, readFileSync, appendFileSync, readdirSync } from "fs"; import { join } from "path"; // ============================================================================= @@ -146,8 +146,9 @@ export function formatCheckResult(result: DoctorowCheckResult): string { /** * Format verification entry for verify.md + * @param evaluator - Optional tag like "[AI-evaluated]" to append to confirmed entries */ -export function formatVerifyEntry(results: DoctorowCheckResult[]): string { +export function formatVerifyEntry(results: DoctorowCheckResult[], evaluator?: string): string { const lines: string[] = []; const timestamp = new Date().toISOString(); @@ -159,7 +160,12 @@ export function formatVerifyEntry(results: DoctorowCheckResult[]): string { const name = check?.name ?? result.checkId; if (result.confirmed) { - lines.push(`- [x] **${name}**: Confirmed`); + const tag = evaluator ? ` ${evaluator}` : ""; + lines.push(`- [x] **${name}**: Confirmed${tag}`); + if (result.skipReason) { + // In AI mode, skipReason holds the reasoning + lines.push(` - Reasoning: ${result.skipReason}`); + } } else if (result.skipReason) { lines.push(`- [ ] **${name}**: Skipped`); lines.push(` - Reason: ${result.skipReason}`); @@ -172,6 +178,207 @@ export function formatVerifyEntry(results: DoctorowCheckResult[]): string { return lines.join("\n"); } +// ============================================================================= +// Headless (AI) Evaluation +// ============================================================================= + +/** + * Extract JSON from an LLM response. + * Handles: + * - Claude --output-format json wrapper (extracts from "result" field) + * - Markdown code blocks (```json ... ```) + * - Raw JSON strings + * - JSON embedded in surrounding text + */ +export function extractJsonFromResponse(response: string): any | null { + let text = response; + + // Check if this is Claude --output-format json wrapper + try { + const wrapper = JSON.parse(response); + if (wrapper.type === "result" && wrapper.result) { + text = wrapper.result; + } + } catch { + // Not a JSON wrapper, use response as-is + } + + // Try markdown code block first + const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/); + if (codeBlockMatch) { + try { + return JSON.parse(codeBlockMatch[1].trim()); + } catch { + // Continue to other methods + } + } + + // Try to find JSON object in response + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (jsonMatch) { + try { + return JSON.parse(jsonMatch[0]); + } catch { + // Invalid JSON + } + } + + return null; +} + +/** + * Gather feature artifacts for AI evaluation context. + * Reads spec.md, plan.md, tasks.md, verify.md and lists src/ filenames. + */ +export function gatherArtifacts(specPath: string): string { + const parts: string[] = []; + + const artifactFiles = ["spec.md", "plan.md", "tasks.md", "verify.md"]; + for (const file of artifactFiles) { + const filePath = join(specPath, file); + if (existsSync(filePath)) { + const content = readFileSync(filePath, "utf-8"); + parts.push(`--- ${file} ---\n${content}`); + } + } + + // List src/ files (just names, not content) + const srcDir = join(specPath, "..", "..", "..", "src"); + if (existsSync(srcDir)) { + try { + const files = listFilesRecursive(srcDir); + if (files.length > 0) { + parts.push(`--- src/ files ---\n${files.join("\n")}`); + } + } catch { + // Ignore errors reading src directory + } + } + + return parts.join("\n\n"); +} + +/** + * Recursively list files in a directory (relative paths). + */ +function listFilesRecursive(dir: string, prefix: string = ""): string[] { + const results: string[] = []; + try { + const entries = readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.name.startsWith(".") || entry.name === "node_modules") continue; + const relative = prefix ? `${prefix}/${entry.name}` : entry.name; + if (entry.isDirectory()) { + results.push(...listFilesRecursive(join(dir, entry.name), relative)); + } else { + results.push(relative); + } + } + } catch { + // Ignore permission errors + } + return results; +} + +/** + * Evaluate a single Doctorow check using AI (claude -p). + * On failure, returns confirmed=true to avoid blocking the pipeline. + */ +export async function evaluateCheckWithAI( + check: DoctorowCheck, + artifacts: string +): Promise { + const systemPrompt = + "You are a code quality reviewer evaluating a feature completion check. " + + 'Return ONLY valid JSON: {"pass": true, "reasoning": "one sentence explanation"}'; + + const userPrompt = + `Check: ${check.question}\n\nContext: ${check.prompt}\n\nFeature Artifacts:\n${artifacts}`; + + try { + const proc = Bun.spawn( + ["claude", "-p", "--model", "claude-haiku-4-5-20251001", "--system-prompt", systemPrompt, userPrompt], + { + stdout: "pipe", + stderr: "pipe", + env: { ...process.env }, + } + ); + + // 30 second timeout + const timeoutPromise = new Promise((resolve) => { + setTimeout(() => { + proc.kill(); + resolve(null); + }, 30000); + }); + + const resultPromise = (async () => { + const output = await new Response(proc.stdout).text(); + const exitCode = await proc.exited; + + if (exitCode !== 0) return null; + + const extracted = extractJsonFromResponse(output); + if (!extracted || typeof extracted.pass !== "boolean") return null; + + return { + checkId: check.id, + confirmed: extracted.pass, + skipReason: extracted.reasoning || null, + timestamp: new Date(), + }; + })(); + + const result = await Promise.race([resultPromise, timeoutPromise]); + + if (result) return result; + } catch { + // Fall through to default + } + + // On any AI failure, pass by default + return { + checkId: check.id, + confirmed: true, + skipReason: "AI evaluation unavailable — passed by default", + timestamp: new Date(), + }; +} + +/** + * Run the Doctorow Gate in headless mode using AI evaluation. + * Iterates through all checks and evaluates them with claude -p. + */ +export async function runDoctorowGateHeadless( + featureId: string, + specPath: string +): Promise { + const artifacts = gatherArtifacts(specPath); + const results: DoctorowCheckResult[] = []; + + for (const check of DOCTOROW_CHECKS) { + console.log(` Evaluating: ${check.name}...`); + const result = await evaluateCheckWithAI(check, artifacts); + results.push(result); + const status = result.confirmed ? "PASS" : "FAIL"; + console.log(` ${status}: ${check.name} - ${result.skipReason || "confirmed"}`); + } + + const failedCheck = results.find(r => !r.confirmed); + const passed = !failedCheck; + + // Append AI results to verify.md + appendToVerifyMd(specPath, results, "[AI-evaluated]"); + + return { + passed, + skipped: false, + failedCheck: failedCheck?.checkId, + results, + }; +} + // ============================================================================= // Gate Logic // ============================================================================= @@ -252,6 +459,14 @@ export async function runDoctorowGate( }; } + // Detect headless mode + const isHeadless = !process.stdin.isTTY || process.env.SPECFLOW_HEADLESS === "true"; + + if (isHeadless) { + console.log(`\nRunning Doctorow Gate in headless mode (AI evaluation)...`); + return runDoctorowGateHeadless(featureId, specPath); + } + console.log(`\nšŸ” Running Doctorow Gate for ${featureId}`); console.log("─".repeat(50)); console.log("The Doctorow Gate ensures you've considered failure modes,"); @@ -307,7 +522,7 @@ export async function runDoctorowGate( /** * Append verification results to verify.md */ -export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[]): void { +export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[], evaluator?: string): void { const verifyPath = join(specPath, "verify.md"); let content = ""; @@ -325,9 +540,9 @@ export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[ } // Append new entry - content += formatVerifyEntry(results); + content += formatVerifyEntry(results, evaluator); - appendFileSync(verifyPath, formatVerifyEntry(results)); + appendFileSync(verifyPath, formatVerifyEntry(results, evaluator)); } /** diff --git a/packages/specflow/tests/lib/doctorow-headless.test.ts b/packages/specflow/tests/lib/doctorow-headless.test.ts new file mode 100644 index 0000000..0e3204e --- /dev/null +++ b/packages/specflow/tests/lib/doctorow-headless.test.ts @@ -0,0 +1,217 @@ +/** + * Doctorow Gate Headless (AI) Mode Tests + * + * Tests for extractJsonFromResponse, gatherArtifacts, formatVerifyEntry + * with evaluator tag, and headless routing detection. + * Does NOT test actual claude -p calls (integration tests). + */ + +import { describe, it, expect, beforeEach, afterEach } from "bun:test"; +import { mkdirSync, writeFileSync, rmSync, existsSync, readFileSync } from "fs"; +import { join } from "path"; +import { + extractJsonFromResponse, + gatherArtifacts, + formatVerifyEntry, + DoctorowCheckResult, +} from "../../src/lib/doctorow"; + +// ============================================================================= +// Test Fixtures +// ============================================================================= + +const TEST_DIR = "/tmp/specflow-headless-test"; +const SPEC_PATH = join(TEST_DIR, ".specify", "specs", "f-001-test"); + +function cleanup(): void { + if (existsSync(TEST_DIR)) { + rmSync(TEST_DIR, { recursive: true, force: true }); + } +} + +function setupSpecDir(): void { + mkdirSync(SPEC_PATH, { recursive: true }); +} + +// ============================================================================= +// extractJsonFromResponse +// ============================================================================= + +describe("extractJsonFromResponse", () => { + it("should parse raw JSON", () => { + const input = '{"pass": true, "reasoning": "tests exist"}'; + const result = extractJsonFromResponse(input); + expect(result).toEqual({ pass: true, reasoning: "tests exist" }); + }); + + it("should extract JSON from markdown code block", () => { + const input = 'Here is the result:\n```json\n{"pass": false, "reasoning": "no tests found"}\n```\nDone.'; + const result = extractJsonFromResponse(input); + expect(result).toEqual({ pass: false, reasoning: "no tests found" }); + }); + + it("should extract JSON from code block without json tag", () => { + const input = '```\n{"pass": true, "reasoning": "looks good"}\n```'; + const result = extractJsonFromResponse(input); + expect(result).toEqual({ pass: true, reasoning: "looks good" }); + }); + + it("should handle Claude --output-format json wrapper", () => { + const inner = '{"pass": true, "reasoning": "all good"}'; + const wrapper = JSON.stringify({ type: "result", result: inner }); + const result = extractJsonFromResponse(wrapper); + expect(result).toEqual({ pass: true, reasoning: "all good" }); + }); + + it("should extract embedded JSON from surrounding text", () => { + const input = 'Based on my analysis, {"pass": true, "reasoning": "confirmed"} is the result.'; + const result = extractJsonFromResponse(input); + expect(result).toEqual({ pass: true, reasoning: "confirmed" }); + }); + + it("should return null for invalid input", () => { + expect(extractJsonFromResponse("no json here")).toBeNull(); + expect(extractJsonFromResponse("")).toBeNull(); + expect(extractJsonFromResponse("just some text {broken")).toBeNull(); + }); + + it("should handle wrapper with embedded JSON in result string", () => { + const inner = 'The answer is ```json\n{"pass": true, "reasoning": "yes"}\n```'; + const wrapper = JSON.stringify({ type: "result", result: inner }); + const result = extractJsonFromResponse(wrapper); + expect(result).toEqual({ pass: true, reasoning: "yes" }); + }); +}); + +// ============================================================================= +// gatherArtifacts +// ============================================================================= + +describe("gatherArtifacts", () => { + beforeEach(() => { + cleanup(); + setupSpecDir(); + }); + + afterEach(() => { + cleanup(); + }); + + it("should gather existing artifact files", () => { + writeFileSync(join(SPEC_PATH, "spec.md"), "# Spec\nFeature description"); + writeFileSync(join(SPEC_PATH, "plan.md"), "# Plan\nImplementation plan"); + + const artifacts = gatherArtifacts(SPEC_PATH); + + expect(artifacts).toContain("--- spec.md ---"); + expect(artifacts).toContain("Feature description"); + expect(artifacts).toContain("--- plan.md ---"); + expect(artifacts).toContain("Implementation plan"); + }); + + it("should skip missing artifact files gracefully", () => { + writeFileSync(join(SPEC_PATH, "spec.md"), "# Spec only"); + + const artifacts = gatherArtifacts(SPEC_PATH); + + expect(artifacts).toContain("--- spec.md ---"); + expect(artifacts).not.toContain("--- plan.md ---"); + expect(artifacts).not.toContain("--- tasks.md ---"); + }); + + it("should include src/ file listing when available", () => { + const srcDir = join(TEST_DIR, "src"); + mkdirSync(srcDir, { recursive: true }); + writeFileSync(join(srcDir, "index.ts"), "export {}"); + writeFileSync(join(srcDir, "utils.ts"), "export {}"); + + const artifacts = gatherArtifacts(SPEC_PATH); + + expect(artifacts).toContain("--- src/ files ---"); + expect(artifacts).toContain("index.ts"); + expect(artifacts).toContain("utils.ts"); + }); + + it("should return empty string when no artifacts exist", () => { + // specPath exists but has no files + const emptyPath = join(TEST_DIR, "empty-spec"); + mkdirSync(emptyPath, { recursive: true }); + + const artifacts = gatherArtifacts(emptyPath); + expect(artifacts).toBe(""); + }); +}); + +// ============================================================================= +// formatVerifyEntry with evaluator tag +// ============================================================================= + +describe("formatVerifyEntry with evaluator", () => { + const makeResult = (checkId: string, confirmed: boolean, skipReason: string | null): DoctorowCheckResult => ({ + checkId, + confirmed, + skipReason, + timestamp: new Date(), + }); + + it("should include evaluator tag on confirmed entries", () => { + const results = [ + makeResult("failure_test", true, "Error handling tests exist"), + ]; + + const entry = formatVerifyEntry(results, "[AI-evaluated]"); + + expect(entry).toContain("**Failure Test**: Confirmed [AI-evaluated]"); + expect(entry).toContain("Reasoning: Error handling tests exist"); + }); + + it("should not include evaluator tag when not provided", () => { + const results = [ + makeResult("failure_test", true, null), + ]; + + const entry = formatVerifyEntry(results); + + expect(entry).toContain("**Failure Test**: Confirmed"); + expect(entry).not.toContain("[AI-evaluated]"); + }); + + it("should handle mixed results with evaluator", () => { + const results = [ + makeResult("failure_test", true, "Tests exist"), + makeResult("assumption_test", false, null), + ]; + + const entry = formatVerifyEntry(results, "[AI-evaluated]"); + + expect(entry).toContain("**Failure Test**: Confirmed [AI-evaluated]"); + expect(entry).toContain("**Assumption Test**: Not confirmed"); + }); +}); + +// ============================================================================= +// Headless routing detection +// ============================================================================= + +describe("headless routing", () => { + it("should detect non-TTY environment", () => { + // In test environment, process.stdin.isTTY is typically undefined/false + const isTTY = process.stdin.isTTY; + // Bun test runs are non-TTY, so this should be falsy + expect(!isTTY).toBe(true); + }); + + it("should detect SPECFLOW_HEADLESS env var", () => { + const original = process.env.SPECFLOW_HEADLESS; + process.env.SPECFLOW_HEADLESS = "true"; + + const isHeadless = !process.stdin.isTTY || process.env.SPECFLOW_HEADLESS === "true"; + expect(isHeadless).toBe(true); + + if (original !== undefined) { + process.env.SPECFLOW_HEADLESS = original; + } else { + delete process.env.SPECFLOW_HEADLESS; + } + }); +}); From 275782ed3bb0e588d7d8d830ead384178eccf047 Mon Sep 17 00:00:00 2001 From: Andreas Date: Mon, 2 Feb 2026 14:21:59 +1300 Subject: [PATCH 3/4] feat: make Doctorow Gate AI model configurable Default to Sonnet (claude-sonnet-4-20250514) for better reasoning on quality checks. Override via SPECFLOW_DOCTOROW_MODEL env var. Supported models: - claude-haiku-4-5-20251001 (fast/cheap) - claude-sonnet-4-20250514 (balanced, default) - claude-opus-4-5-20251101 (deep reasoning) Co-Authored-By: Claude Opus 4.5 --- packages/specflow/src/lib/doctorow.ts | 19 ++++++++++++-- .../specflow/src/lib/migrations/embedded.ts | 26 ++++++++++++++++++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/packages/specflow/src/lib/doctorow.ts b/packages/specflow/src/lib/doctorow.ts index 23df9e4..4ee2284 100644 --- a/packages/specflow/src/lib/doctorow.ts +++ b/packages/specflow/src/lib/doctorow.ts @@ -284,12 +284,26 @@ function listFilesRecursive(dir: string, prefix: string = ""): string[] { * Evaluate a single Doctorow check using AI (claude -p). * On failure, returns confirmed=true to avoid blocking the pipeline. */ +/** + * Default model for headless Doctorow evaluation. + * Sonnet provides good reasoning for quality checks at reasonable cost. + * Override via SPECFLOW_DOCTOROW_MODEL env var. + * + * Recommended models: + * - claude-haiku-4-5-20251001: Fast/cheap, may give shallow evaluations + * - claude-sonnet-4-20250514: Balanced reasoning (default) + * - claude-opus-4-5-20251101: Deep reasoning, higher cost + */ +const DEFAULT_DOCTOROW_MODEL = "claude-sonnet-4-20250514"; + export async function evaluateCheckWithAI( check: DoctorowCheck, artifacts: string ): Promise { + const model = process.env.SPECFLOW_DOCTOROW_MODEL || DEFAULT_DOCTOROW_MODEL; const systemPrompt = "You are a code quality reviewer evaluating a feature completion check. " + + "Analyze the provided feature artifacts carefully. " + 'Return ONLY valid JSON: {"pass": true, "reasoning": "one sentence explanation"}'; const userPrompt = @@ -297,7 +311,7 @@ export async function evaluateCheckWithAI( try { const proc = Bun.spawn( - ["claude", "-p", "--model", "claude-haiku-4-5-20251001", "--system-prompt", systemPrompt, userPrompt], + ["claude", "-p", "--model", model, "--system-prompt", systemPrompt, userPrompt], { stdout: "pipe", stderr: "pipe", @@ -463,7 +477,8 @@ export async function runDoctorowGate( const isHeadless = !process.stdin.isTTY || process.env.SPECFLOW_HEADLESS === "true"; if (isHeadless) { - console.log(`\nRunning Doctorow Gate in headless mode (AI evaluation)...`); + const model = process.env.SPECFLOW_DOCTOROW_MODEL || DEFAULT_DOCTOROW_MODEL; + console.log(`\nšŸ¤– Running Doctorow Gate in headless mode (AI: ${model})...`); return runDoctorowGateHeadless(featureId, specPath); } diff --git a/packages/specflow/src/lib/migrations/embedded.ts b/packages/specflow/src/lib/migrations/embedded.ts index 48d7c8e..f47f2bc 100644 --- a/packages/specflow/src/lib/migrations/embedded.ts +++ b/packages/specflow/src/lib/migrations/embedded.ts @@ -4,7 +4,7 @@ * AUTO-GENERATED by scripts/embed-migrations.ts * DO NOT EDIT MANUALLY * - * Generated: 2026-01-28T13:34:48.655Z + * Generated: 2026-02-02T01:21:51.241Z * * These migrations are embedded at build time so they work * in the compiled binary where import.meta.dir resolves to @@ -90,4 +90,28 @@ ALTER TABLE features ADD COLUMN skip_duplicate_of TEXT;`, -- This is a no-op for safety; manual intervention required for rollback SELECT 1;`, }, + { + version: 6, + name: "add_contrib_prep", + upSql: `-- Add contrib prep state tracking table +-- Tracks the contribution preparation workflow (inventory → sanitize → extract → verify) + +CREATE TABLE IF NOT EXISTS contrib_prep_state ( + feature_id TEXT PRIMARY KEY, + gate INTEGER NOT NULL DEFAULT 0, + inventory_included INTEGER DEFAULT 0, + inventory_excluded INTEGER DEFAULT 0, + sanitization_pass INTEGER, + sanitization_findings INTEGER DEFAULT 0, + tag_name TEXT, + tag_hash TEXT, + contrib_branch TEXT, + verification_pass INTEGER, + base_branch TEXT DEFAULT 'main', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + FOREIGN KEY (feature_id) REFERENCES features(id) +);`, + downSql: `DROP TABLE IF EXISTS contrib_prep_state;`, + }, ]; From baf634962163bf863e09351370179a20b0aafece Mon Sep 17 00:00:00 2001 From: Andreas Date: Mon, 2 Feb 2026 14:32:02 +1300 Subject: [PATCH 4/4] Fix headless Doctorow: use --output-format json, default to Opus - Add --output-format json to claude -p invocation to ensure parseable output in environments with CLAUDE.md hooks/skills configured - Change default model from Sonnet to Opus for deeper quality reasoning - Model remains configurable via SPECFLOW_DOCTOROW_MODEL env var Co-Authored-By: Claude Opus 4.5 --- packages/specflow/src/lib/doctorow.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/specflow/src/lib/doctorow.ts b/packages/specflow/src/lib/doctorow.ts index 4ee2284..b9e6132 100644 --- a/packages/specflow/src/lib/doctorow.ts +++ b/packages/specflow/src/lib/doctorow.ts @@ -286,15 +286,15 @@ function listFilesRecursive(dir: string, prefix: string = ""): string[] { */ /** * Default model for headless Doctorow evaluation. - * Sonnet provides good reasoning for quality checks at reasonable cost. + * Opus provides deep reasoning for thorough quality checks. * Override via SPECFLOW_DOCTOROW_MODEL env var. * * Recommended models: * - claude-haiku-4-5-20251001: Fast/cheap, may give shallow evaluations - * - claude-sonnet-4-20250514: Balanced reasoning (default) - * - claude-opus-4-5-20251101: Deep reasoning, higher cost + * - claude-sonnet-4-20250514: Balanced reasoning, lower cost + * - claude-opus-4-5-20251101: Deep reasoning (default) */ -const DEFAULT_DOCTOROW_MODEL = "claude-sonnet-4-20250514"; +const DEFAULT_DOCTOROW_MODEL = "claude-opus-4-5-20251101"; export async function evaluateCheckWithAI( check: DoctorowCheck, @@ -311,7 +311,7 @@ export async function evaluateCheckWithAI( try { const proc = Bun.spawn( - ["claude", "-p", "--model", model, "--system-prompt", systemPrompt, userPrompt], + ["claude", "-p", "--output-format", "json", "--model", model, "--system-prompt", systemPrompt, userPrompt], { stdout: "pipe", stderr: "pipe",