Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion packages/specflow/src/commands/complete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,25 @@ function runTests(): { pass: boolean; output: string } {
/**
* Validate verify.md has required sections
*/
/**
* Check if a section's content indicates it is not applicable.
* Returns true if the content between this heading and the next heading
* contains "N/A", "Not applicable", "Not required", or "CLI only" (case-insensitive).
*/
function isSectionNotApplicable(content: string, sectionHeading: string): boolean {
const headingIndex = content.indexOf(sectionHeading);
if (headingIndex === -1) return false;

const afterHeading = content.slice(headingIndex + sectionHeading.length);
const nextHeadingMatch = afterHeading.match(/\n## /);
const sectionContent = nextHeadingMatch
? afterHeading.slice(0, nextHeadingMatch.index)
: afterHeading;

const naPattern = /\b(n\/a|not applicable|not required|cli only)\b/i;
return naPattern.test(sectionContent);
}

function validateVerifyFile(verifyPath: string): string[] {
const errors: string[] = [];

Expand All @@ -120,8 +139,21 @@ function validateVerifyFile(verifyPath: string): string[] {
}

// Check that verification was actually completed (not just template)
// But skip placeholder checks for sections marked as N/A
if (content.includes("[paste actual output]") || content.includes("[paste actual response]")) {
errors.push("verify.md contains unfilled placeholders - actual verification not performed");
// Only flag unfilled placeholders if the section containing them is not marked N/A
const placeholderPattern = /\[paste actual (?:output|response)\]/g;
let match;
while ((match = placeholderPattern.exec(content)) !== null) {
const beforeMatch = content.slice(0, match.index);
const lastHeadingMatch = beforeMatch.match(/## [^\n]+/g);
const lastHeading = lastHeadingMatch ? lastHeadingMatch[lastHeadingMatch.length - 1] : null;

if (!lastHeading || !isSectionNotApplicable(content, lastHeading)) {
errors.push("verify.md contains unfilled placeholders - actual verification not performed");
break;
}
}
}

return errors;
Expand Down
242 changes: 236 additions & 6 deletions packages/specflow/src/lib/doctorow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*/

import { createInterface } from "readline";
import { existsSync, readFileSync, appendFileSync } from "fs";
import { existsSync, readFileSync, appendFileSync, readdirSync } from "fs";
import { join } from "path";

// =============================================================================
Expand Down Expand Up @@ -146,8 +146,9 @@ export function formatCheckResult(result: DoctorowCheckResult): string {

/**
* Format verification entry for verify.md
* @param evaluator - Optional tag like "[AI-evaluated]" to append to confirmed entries
*/
export function formatVerifyEntry(results: DoctorowCheckResult[]): string {
export function formatVerifyEntry(results: DoctorowCheckResult[], evaluator?: string): string {
const lines: string[] = [];
const timestamp = new Date().toISOString();

Expand All @@ -159,7 +160,12 @@ export function formatVerifyEntry(results: DoctorowCheckResult[]): string {
const name = check?.name ?? result.checkId;

if (result.confirmed) {
lines.push(`- [x] **${name}**: Confirmed`);
const tag = evaluator ? ` ${evaluator}` : "";
lines.push(`- [x] **${name}**: Confirmed${tag}`);
if (result.skipReason) {
// In AI mode, skipReason holds the reasoning
lines.push(` - Reasoning: ${result.skipReason}`);
}
} else if (result.skipReason) {
lines.push(`- [ ] **${name}**: Skipped`);
lines.push(` - Reason: ${result.skipReason}`);
Expand All @@ -172,6 +178,221 @@ export function formatVerifyEntry(results: DoctorowCheckResult[]): string {
return lines.join("\n");
}

// =============================================================================
// Headless (AI) Evaluation
// =============================================================================

/**
* Extract JSON from an LLM response.
* Handles:
* - Claude --output-format json wrapper (extracts from "result" field)
* - Markdown code blocks (```json ... ```)
* - Raw JSON strings
* - JSON embedded in surrounding text
*/
export function extractJsonFromResponse(response: string): any | null {
let text = response;

// Check if this is Claude --output-format json wrapper
try {
const wrapper = JSON.parse(response);
if (wrapper.type === "result" && wrapper.result) {
text = wrapper.result;
}
} catch {
// Not a JSON wrapper, use response as-is
}

// Try markdown code block first
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
if (codeBlockMatch) {
try {
return JSON.parse(codeBlockMatch[1].trim());
} catch {
// Continue to other methods
}
}

// Try to find JSON object in response
const jsonMatch = text.match(/\{[\s\S]*\}/);
if (jsonMatch) {
try {
return JSON.parse(jsonMatch[0]);
} catch {
// Invalid JSON
}
}

return null;
}

/**
* Gather feature artifacts for AI evaluation context.
* Reads spec.md, plan.md, tasks.md, verify.md and lists src/ filenames.
*/
export function gatherArtifacts(specPath: string): string {
const parts: string[] = [];

const artifactFiles = ["spec.md", "plan.md", "tasks.md", "verify.md"];
for (const file of artifactFiles) {
const filePath = join(specPath, file);
if (existsSync(filePath)) {
const content = readFileSync(filePath, "utf-8");
parts.push(`--- ${file} ---\n${content}`);
}
}

// List src/ files (just names, not content)
const srcDir = join(specPath, "..", "..", "..", "src");
if (existsSync(srcDir)) {
try {
const files = listFilesRecursive(srcDir);
if (files.length > 0) {
parts.push(`--- src/ files ---\n${files.join("\n")}`);
}
} catch {
// Ignore errors reading src directory
}
}

return parts.join("\n\n");
}

/**
* Recursively list files in a directory (relative paths).
*/
function listFilesRecursive(dir: string, prefix: string = ""): string[] {
const results: string[] = [];
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
const relative = prefix ? `${prefix}/${entry.name}` : entry.name;
if (entry.isDirectory()) {
results.push(...listFilesRecursive(join(dir, entry.name), relative));
} else {
results.push(relative);
}
}
} catch {
// Ignore permission errors
}
return results;
}

/**
* Evaluate a single Doctorow check using AI (claude -p).
* On failure, returns confirmed=true to avoid blocking the pipeline.
*/
/**
* Default model for headless Doctorow evaluation.
* Opus provides deep reasoning for thorough quality checks.
* Override via SPECFLOW_DOCTOROW_MODEL env var.
*
* Recommended models:
* - claude-haiku-4-5-20251001: Fast/cheap, may give shallow evaluations
* - claude-sonnet-4-20250514: Balanced reasoning, lower cost
* - claude-opus-4-5-20251101: Deep reasoning (default)
*/
const DEFAULT_DOCTOROW_MODEL = "claude-opus-4-5-20251101";

export async function evaluateCheckWithAI(
check: DoctorowCheck,
artifacts: string
): Promise<DoctorowCheckResult> {
const model = process.env.SPECFLOW_DOCTOROW_MODEL || DEFAULT_DOCTOROW_MODEL;
const systemPrompt =
"You are a code quality reviewer evaluating a feature completion check. " +
"Analyze the provided feature artifacts carefully. " +
'Return ONLY valid JSON: {"pass": true, "reasoning": "one sentence explanation"}';

const userPrompt =
`Check: ${check.question}\n\nContext: ${check.prompt}\n\nFeature Artifacts:\n${artifacts}`;

try {
const proc = Bun.spawn(
["claude", "-p", "--output-format", "json", "--model", model, "--system-prompt", systemPrompt, userPrompt],
{
stdout: "pipe",
stderr: "pipe",
env: { ...process.env },
}
);

// 30 second timeout
const timeoutPromise = new Promise<null>((resolve) => {
setTimeout(() => {
proc.kill();
resolve(null);
}, 30000);
});

const resultPromise = (async () => {
const output = await new Response(proc.stdout).text();
const exitCode = await proc.exited;

if (exitCode !== 0) return null;

const extracted = extractJsonFromResponse(output);
if (!extracted || typeof extracted.pass !== "boolean") return null;

return {
checkId: check.id,
confirmed: extracted.pass,
skipReason: extracted.reasoning || null,
timestamp: new Date(),
};
})();

const result = await Promise.race([resultPromise, timeoutPromise]);

if (result) return result;
} catch {
// Fall through to default
}

// On any AI failure, pass by default
return {
checkId: check.id,
confirmed: true,
skipReason: "AI evaluation unavailable — passed by default",
timestamp: new Date(),
};
}

/**
* Run the Doctorow Gate in headless mode using AI evaluation.
* Iterates through all checks and evaluates them with claude -p.
*/
export async function runDoctorowGateHeadless(
featureId: string,
specPath: string
): Promise<DoctorowResult> {
const artifacts = gatherArtifacts(specPath);
const results: DoctorowCheckResult[] = [];

for (const check of DOCTOROW_CHECKS) {
console.log(` Evaluating: ${check.name}...`);
const result = await evaluateCheckWithAI(check, artifacts);
results.push(result);
const status = result.confirmed ? "PASS" : "FAIL";
console.log(` ${status}: ${check.name} - ${result.skipReason || "confirmed"}`);
}

const failedCheck = results.find(r => !r.confirmed);
const passed = !failedCheck;

// Append AI results to verify.md
appendToVerifyMd(specPath, results, "[AI-evaluated]");

return {
passed,
skipped: false,
failedCheck: failedCheck?.checkId,
results,
};
}

// =============================================================================
// Gate Logic
// =============================================================================
Expand Down Expand Up @@ -252,6 +473,15 @@ export async function runDoctorowGate(
};
}

// Detect headless mode
const isHeadless = !process.stdin.isTTY || process.env.SPECFLOW_HEADLESS === "true";

if (isHeadless) {
const model = process.env.SPECFLOW_DOCTOROW_MODEL || DEFAULT_DOCTOROW_MODEL;
console.log(`\n🤖 Running Doctorow Gate in headless mode (AI: ${model})...`);
return runDoctorowGateHeadless(featureId, specPath);
}

console.log(`\n🔍 Running Doctorow Gate for ${featureId}`);
console.log("─".repeat(50));
console.log("The Doctorow Gate ensures you've considered failure modes,");
Expand Down Expand Up @@ -307,7 +537,7 @@ export async function runDoctorowGate(
/**
* Append verification results to verify.md
*/
export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[]): void {
export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[], evaluator?: string): void {
const verifyPath = join(specPath, "verify.md");

let content = "";
Expand All @@ -325,9 +555,9 @@ export function appendToVerifyMd(specPath: string, results: DoctorowCheckResult[
}

// Append new entry
content += formatVerifyEntry(results);
content += formatVerifyEntry(results, evaluator);

appendFileSync(verifyPath, formatVerifyEntry(results));
appendFileSync(verifyPath, formatVerifyEntry(results, evaluator));
}

/**
Expand Down
26 changes: 25 additions & 1 deletion packages/specflow/src/lib/migrations/embedded.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* AUTO-GENERATED by scripts/embed-migrations.ts
* DO NOT EDIT MANUALLY
*
* Generated: 2026-01-28T13:34:48.655Z
* Generated: 2026-02-02T01:21:51.241Z
*
* These migrations are embedded at build time so they work
* in the compiled binary where import.meta.dir resolves to
Expand Down Expand Up @@ -90,4 +90,28 @@ ALTER TABLE features ADD COLUMN skip_duplicate_of TEXT;`,
-- This is a no-op for safety; manual intervention required for rollback
SELECT 1;`,
},
{
version: 6,
name: "add_contrib_prep",
upSql: `-- Add contrib prep state tracking table
-- Tracks the contribution preparation workflow (inventory → sanitize → extract → verify)

CREATE TABLE IF NOT EXISTS contrib_prep_state (
feature_id TEXT PRIMARY KEY,
gate INTEGER NOT NULL DEFAULT 0,
inventory_included INTEGER DEFAULT 0,
inventory_excluded INTEGER DEFAULT 0,
sanitization_pass INTEGER,
sanitization_findings INTEGER DEFAULT 0,
tag_name TEXT,
tag_hash TEXT,
contrib_branch TEXT,
verification_pass INTEGER,
base_branch TEXT DEFAULT 'main',
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (feature_id) REFERENCES features(id)
);`,
downSql: `DROP TABLE IF EXISTS contrib_prep_state;`,
},
];
Loading