From 0ea9883731ab55b5fb7ddd4ebf5c5c6cc75038e0 Mon Sep 17 00:00:00 2001 From: Yavor Panayotov Date: Sun, 17 May 2026 21:26:04 +0300 Subject: [PATCH 1/2] propagate: byte-deterministic pipeline (schema, scripts, orchestrator) Rewrites the propagate skill to use the same inventory + translator + consensus architecture as distill: K subagents produce structured obligation-bridge inventories, language-agnostic scripts canonicalise, merge by K-vote, and dispatch to a per-language backend (manifest + name-policy + templates) which is loaded from the skill's backends/ directory. The translator is byte-deterministic given fixed inputs. Bridge ambiguity (where K subagents cannot converge on a single witness) is surfaced as low-confidence stubs with candidate symbols, not silenced. Stage C runs the backend's runner command (e.g. pytest, jest) and emits a categorised propagation-report.md (pass / fail / error / bridge-unresolved / infrastructure-gap). Adds: - scripts/canonicalize-obligations.mjs (multiset validation against allium plan; deterministic disambiguation of duplicate obligation IDs from overloaded spec rules) - scripts/merge-obligations.mjs (K-vote consensus; per-field modal voting; bridge-ambiguity surfaced as low confidence) - scripts/obligations-to-tests.mjs (translator core + named bridge_import transforms + 4-construct template renderer) - scripts/run-suite.mjs (Stage C with pluggable per-format adapters) - skills/propagate/SKILL.md (rewritten as orchestrator; code_root and spec_path are mechanically locked from the user invocation so two runs on the same project use identical framing) - skills/propagate/references/obligation-bridge-schema.md - skills/propagate/references/backend-authoring-guide.md --- scripts/canonicalize-obligations.mjs | 427 ++++++++++++++++ scripts/merge-obligations.mjs | 307 ++++++++++++ scripts/obligations-to-tests.mjs | 454 ++++++++++++++++++ scripts/run-suite.mjs | 342 +++++++++++++ skills/propagate/SKILL.md | 439 ++++++++++------- .../references/backend-authoring-guide.md | 293 +++++++++++ .../references/obligation-bridge-schema.md | 252 ++++++++++ 7 files changed, 2334 insertions(+), 180 deletions(-) create mode 100644 scripts/canonicalize-obligations.mjs create mode 100644 scripts/merge-obligations.mjs create mode 100644 scripts/obligations-to-tests.mjs create mode 100644 scripts/run-suite.mjs create mode 100644 skills/propagate/references/backend-authoring-guide.md create mode 100644 skills/propagate/references/obligation-bridge-schema.md diff --git a/scripts/canonicalize-obligations.mjs b/scripts/canonicalize-obligations.mjs new file mode 100644 index 0000000..d4ce678 --- /dev/null +++ b/scripts/canonicalize-obligations.mjs @@ -0,0 +1,427 @@ +#!/usr/bin/env node +// Obligation-bridge inventory canonicalizer. +// +// Reads one LLM-produced obligation-bridge.json (from the Stage A subagent +// pass) and writes a normalized form (obligation-bridge.canonical.json). The +// normalization is deterministic and idempotent: two inventories differing +// only in field order, whitespace, or the LLM's choice of advisory +// target_file / test_name collapse to the same canonical JSON. +// +// What we normalize: +// - Top-level fields: spec_path, code_root, framework preserved verbatim. +// - obligations[]: sorted alphabetically by obligation_id; per-entry fields +// normalized. +// - bridge.candidates[]: sorted, deduplicated, primary_symbol removed if +// present. +// - preconditions[], fixtures_required[], injection_points[]: trimmed, +// deduplicated, sorted. +// - target_file and test_name: RECOMPUTED from the backend's name-policy +// (LLM's advisory values are discarded). +// - transition_graph: per-entity edge arrays sorted by (from, to, via_rule). +// - JSON output: 2-space indent, sorted keys at every level. +// +// What we DO NOT normalize: +// - The set of obligation_ids. If the set diverges from `allium plan`'s +// output, the canonicaliser exits non-zero rather than silently dropping +// or filling in entries. +// +// Validation errors (set membership wrong, framework unknown, low-confidence +// rules violated, paths missing) cause a non-zero exit so the orchestrator +// can decide whether to discard the sample or abort. +// +// Usage: +// node canonicalize-obligations.mjs \ +// \ +// --plan \ +// --backends-root +// +// --plan is the JSON output of `allium plan `, used to validate that +// the obligation_id set matches exactly. +// +// --backends-root is the directory containing per-backend subdirectories +// (defaults to /backends, resolved relative to this script). + +import { readFileSync, writeFileSync, existsSync, statSync } from "fs"; +import path from "path"; +import { fileURLToPath } from "url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const DEFAULT_BACKENDS_ROOT = path.resolve( + __dirname, + "..", + "skills", + "propagate", + "backends", +); + +const VALID_TEST_KINDS = new Set([ + "assertion", + "pbt", + "state_machine", + "temporal", + "scenario", + "contract", +]); + +const VALID_CONFIDENCE = new Set(["high", "medium", "low"]); +const VALID_INJECTION_POINTS = new Set(["clock", "random", "network"]); + +function die(msg) { + console.error(`canonicalize-obligations: ${msg}`); + process.exit(2); +} + +function parseArgs(argv) { + const args = { positional: [] }; + for (let i = 2; i < argv.length; i++) { + const a = argv[i]; + if (a === "--plan") args.plan = argv[++i]; + else if (a === "--backends-root") args.backendsRoot = argv[++i]; + else if (a.startsWith("--")) die(`unknown flag: ${a}`); + else args.positional.push(a); + } + if (args.positional.length < 2) { + die( + "usage: canonicalize-obligations.mjs --plan [--backends-root ]", + ); + } + if (!args.plan) die("missing required flag --plan "); + args.input = args.positional[0]; + args.output = args.positional[1]; + args.backendsRoot = args.backendsRoot ?? DEFAULT_BACKENDS_ROOT; + return args; +} + +function readJson(p) { + try { + return JSON.parse(readFileSync(p, "utf-8")); + } catch (err) { + die(`failed to read JSON from ${p}: ${err.message}`); + } +} + +function loadBackend(backendsRoot, framework) { + const dir = path.join(backendsRoot, framework); + if (!existsSync(dir) || !statSync(dir).isDirectory()) { + die(`framework "${framework}" not found under ${backendsRoot}`); + } + const manifest = readJson(path.join(dir, "manifest.json")); + const namePolicy = readJson(path.join(dir, "name-policy.json")); + if (manifest.manifest_version !== 1) { + die( + `framework "${framework}" has manifest_version=${manifest.manifest_version}; only 1 is supported`, + ); + } + return { manifest, namePolicy, dir }; +} + +function normString(s) { + if (typeof s !== "string") return s; + return s.trim().replace(/\s+/g, " "); +} + +function dedupeSort(arr) { + return [...new Set(arr.map((v) => normString(v)).filter((v) => v))] + .sort((a, b) => a.localeCompare(b)); +} + +function isPathSymbol(s) { + return typeof s === "string" && /^[^:]+::[^:].*/.test(s); +} + +function splitPathSymbol(s) { + const idx = s.indexOf("::"); + if (idx < 0) return [null, null]; + return [s.slice(0, idx), s.slice(idx + 2)]; +} + +function splitIntoWords(input) { + // Split a string into words, respecting CamelCase and PascalCase boundaries + // in addition to non-alphanumeric separators. So "AssessorDispatch" -> ["Assessor","Dispatch"], + // "IOError" -> ["IO","Error"], "entity-fields.IncidentReport" -> ["entity","fields","Incident","Report"]. + return String(input) + .replace(/([a-z0-9])([A-Z])/g, "$1 $2") // camelCase boundary + .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2") // acronym followed by word + .replace(/[^a-zA-Z0-9]+/g, " ") + .trim() + .split(/\s+/) + .filter(Boolean); +} + +function caseTransform(input, caseName) { + const parts = splitIntoWords(input); + if (parts.length === 0) return ""; + switch (caseName) { + case "snake": + return parts.map((p) => p.toLowerCase()).join("_"); + case "kebab": + return parts.map((p) => p.toLowerCase()).join("-"); + case "camel": + return parts + .map((p, i) => + i === 0 + ? p.toLowerCase() + : p.charAt(0).toUpperCase() + p.slice(1).toLowerCase(), + ) + .join(""); + case "pascal": + return parts + .map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase()) + .join(""); + default: + die(`unknown case "${caseName}"`); + } +} + +function obligationSubject(obligationId) { + // The obligation_id from `allium plan` looks like "category.Subject" or + // "category.Subject.detail" — we take the second segment as the subject. + const parts = obligationId.split("."); + return parts.length >= 2 ? parts[1] : parts[0]; +} + +function renderName(pattern, vars) { + return pattern.replace(/\{(\w+)\}/g, (_m, key) => { + if (!(key in vars)) { + die(`unknown placeholder {${key}} in name-policy pattern`); + } + return vars[key]; + }); +} + +function computeTargetFile(obligation, manifest, namePolicy) { + const subjectRaw = obligationSubject(obligation.obligation_id); + const subject = caseTransform(subjectRaw, namePolicy.file_name_case); + let dir = namePolicy.directory_layout ?? ""; + if (dir && !dir.endsWith("/")) dir = `${dir}/`; + const filename = renderName(namePolicy.file_pattern, { + obligation_subject: subject, + file_extension: manifest.file_extension, + }); + return `${dir}${filename}`; +} + +function computeTestName(obligation, namePolicy) { + const slug = caseTransform( + obligation.obligation_id, + namePolicy.test_name_case, + ); + return renderName(namePolicy.test_name_pattern, { + obligation_id_slug: slug, + }); +} + +function multisetCounts(items, key) { + const counts = new Map(); + for (const it of items) { + const k = typeof key === "function" ? key(it) : it[key]; + counts.set(k, (counts.get(k) ?? 0) + 1); + } + return counts; +} + +function validateObligationSet(inv, plan) { + // Validate multiset equality, not set equality — `allium plan` can emit + // the same obligation_id more than once when the spec has overloaded + // rules (e.g. two `rule R { ... }` blocks with the same name but + // different signatures). In that case the inventory must contain the + // same multiplicity per id. + const planCounts = multisetCounts(plan.obligations, "id"); + const invCounts = multisetCounts(inv.obligations, "obligation_id"); + const issues = []; + for (const [id, n] of planCounts) { + const got = invCounts.get(id) ?? 0; + if (got !== n) issues.push(`${id}: expected ${n}, got ${got}`); + } + for (const id of invCounts.keys()) { + if (!planCounts.has(id)) issues.push(`${id}: not in plan`); + } + if (issues.length) { + const preview = issues.slice(0, 5).join("; ") + (issues.length > 5 ? "; …" : ""); + die(`obligation_id multiset mismatch (${preview})`); + } +} + +function disambiguateDuplicates(obligations) { + // For each obligation_id that appears more than once, sort the duplicates + // by a stable canonical key (bridge primary_symbol + test_kind + preconditions) + // and append `__1`, `__2`, ... so downstream merge groups them uniquely. + // The disambiguation order is deterministic for a given inventory but + // there is no cross-sample alignment guarantee — if K samples disagree + // about which witness pairs with which copy, the merger may pair + // mismatched copies. Spec-side: rename overloaded rules to avoid this. + const byId = new Map(); + for (const o of obligations) { + if (!byId.has(o.obligation_id)) byId.set(o.obligation_id, []); + byId.get(o.obligation_id).push(o); + } + const out = []; + for (const [id, group] of byId) { + if (group.length === 1) { + out.push(group[0]); + continue; + } + const sorted = [...group].sort((a, b) => { + const ka = JSON.stringify([a.bridge?.primary_symbol ?? null, a.test_kind, a.preconditions ?? []]); + const kb = JSON.stringify([b.bridge?.primary_symbol ?? null, b.test_kind, b.preconditions ?? []]); + return ka.localeCompare(kb); + }); + sorted.forEach((o, i) => { + out.push({ ...o, obligation_id: `${id}__${i + 1}` }); + }); + } + return out; +} + +function validateBridge(b, oblId) { + if (b == null || typeof b !== "object") { + die(`obligation ${oblId}: bridge is missing or not an object`); + } + if (b.primary_symbol != null && !isPathSymbol(b.primary_symbol)) { + die(`obligation ${oblId}: bridge.primary_symbol "${b.primary_symbol}" not in :: form`); + } + if (b.candidates && !Array.isArray(b.candidates)) { + die(`obligation ${oblId}: bridge.candidates must be an array`); + } + for (const c of b.candidates ?? []) { + if (!isPathSymbol(c)) { + die(`obligation ${oblId}: bridge.candidates entry "${c}" not in :: form`); + } + } + if (!VALID_CONFIDENCE.has(b.confidence)) { + die(`obligation ${oblId}: bridge.confidence "${b.confidence}" not one of high|medium|low`); + } + if (b.confidence === "low") { + const cands = (b.candidates ?? []).filter((c) => c !== b.primary_symbol); + if (!(cands.length >= 2 || b.primary_symbol == null)) { + die(`obligation ${oblId}: bridge.confidence=low requires >= 2 candidates or null primary`); + } + } +} + +function validatePathsExist(inv) { + if (!inv.code_root) die("inventory missing code_root"); + const root = path.resolve(inv.code_root); + for (const o of inv.obligations) { + const symbols = [ + o.bridge?.primary_symbol, + ...(o.bridge?.candidates ?? []), + ].filter(Boolean); + for (const s of symbols) { + const [p] = splitPathSymbol(s); + if (!p) continue; + const full = path.resolve(root, p); + if (!existsSync(full)) { + die(`obligation ${o.obligation_id}: bridge path "${p}" does not exist under code_root`); + } + } + } +} + +function canonObligation(o, manifest, namePolicy) { + if (!VALID_TEST_KINDS.has(o.test_kind)) { + die(`obligation ${o.obligation_id}: test_kind "${o.test_kind}" not in the allowed set`); + } + validateBridge(o.bridge, o.obligation_id); + const candidates = dedupeSort(o.bridge.candidates ?? []) + .filter((c) => c !== o.bridge.primary_symbol); + const injection = dedupeSort(o.injection_points ?? []); + for (const ip of injection) { + if (!VALID_INJECTION_POINTS.has(ip)) { + die(`obligation ${o.obligation_id}: injection_points value "${ip}" not one of clock|random|network`); + } + } + return { + obligation_id: o.obligation_id, + test_kind: o.test_kind, + bridge: { + primary_symbol: o.bridge.primary_symbol ?? null, + candidates: candidates, + confidence: o.bridge.confidence, + }, + preconditions: dedupeSort(o.preconditions ?? []), + fixtures_required: dedupeSort(o.fixtures_required ?? []), + injection_points: injection, + target_file: computeTargetFile(o, manifest, namePolicy), + test_name: computeTestName(o, namePolicy), + }; +} + +function canonTransitionGraph(g) { + if (g == null) return {}; + if (typeof g !== "object" || Array.isArray(g)) { + die("transition_graph must be an object keyed by entity name"); + } + const out = {}; + for (const entity of Object.keys(g).sort()) { + const edges = g[entity]; + if (!Array.isArray(edges)) { + die(`transition_graph["${entity}"] must be an array of edges`); + } + const normalised = edges.map((e) => ({ + from: normString(e.from ?? ""), + to: normString(e.to ?? ""), + via_rule: normString(e.via_rule ?? ""), + })); + normalised.sort((a, b) => { + const ak = `${a.from}\x00${a.to}\x00${a.via_rule}`; + const bk = `${b.from}\x00${b.to}\x00${b.via_rule}`; + return ak.localeCompare(bk); + }); + // Dedupe. + const seen = new Set(); + const deduped = []; + for (const e of normalised) { + const key = `${e.from}\x00${e.to}\x00${e.via_rule}`; + if (seen.has(key)) continue; + seen.add(key); + deduped.push(e); + } + out[entity] = deduped; + } + return out; +} + +function canonInventory(inv, plan, backend) { + validateObligationSet(inv, plan); + validatePathsExist(inv); + const disambiguated = disambiguateDuplicates(inv.obligations); + const obligations = disambiguated + .map((o) => canonObligation(o, backend.manifest, backend.namePolicy)) + .sort((a, b) => a.obligation_id.localeCompare(b.obligation_id)); + return { + spec_path: normString(inv.spec_path ?? ""), + code_root: normString(inv.code_root ?? ""), + framework: inv.framework, + obligations, + transition_graph: canonTransitionGraph(inv.transition_graph ?? {}), + }; +} + +function stableStringify(value) { + return JSON.stringify(value, sortReplacer, 2) + "\n"; +} + +function sortReplacer(_key, value) { + if (value && typeof value === "object" && !Array.isArray(value)) { + return Object.fromEntries( + Object.entries(value).sort(([a], [b]) => a.localeCompare(b)), + ); + } + return value; +} + +function main() { + const args = parseArgs(process.argv); + const inv = readJson(args.input); + const plan = readJson(args.plan); + if (!inv.framework) die("inventory missing framework field"); + const backend = loadBackend(args.backendsRoot, inv.framework); + const canon = canonInventory(inv, plan, backend); + writeFileSync(args.output, stableStringify(canon)); + console.error( + `canonicalize-obligations: ${args.input} -> ${args.output} (${canon.obligations.length} obligations, framework=${canon.framework})`, + ); +} + +main(); diff --git a/scripts/merge-obligations.mjs b/scripts/merge-obligations.mjs new file mode 100644 index 0000000..811a26b --- /dev/null +++ b/scripts/merge-obligations.mjs @@ -0,0 +1,307 @@ +#!/usr/bin/env node +// Consensus merger for K canonical obligation-bridge inventories. +// +// Inputs: K obligation-bridge.canonical.json files produced by +// canonicalize-obligations.mjs. +// +// Output: 1 obligation-bridge.merged.json, byte-deterministic. +// +// Merge rules (distinct from distill's merge — the obligation set is fixed +// by `allium plan`, so we vote per-field within each obligation rather than +// over set membership): +// +// - The set of obligation_ids is identical across K by construction (the +// canonicaliser rejects any sample that deviates), so no membership +// vote is needed. If we see a mismatch here, abort. +// - For each obligation: +// - test_kind: modal value across K, first-occurrence tie-break. +// - bridge.primary_symbol: modal value. If no strict majority +// (>= ceil(K/2) + 1 of K with at least 2 samples; or unanimous for +// K<=2), set primary_symbol = null, populate candidates with all +// observed primaries, and downgrade confidence to "low". +// - bridge.candidates: set-union of all candidates observed across K, +// minus the merged primary_symbol. +// - bridge.confidence: +// "low" if the consensus was forced (no majority on primary) +// or any sample voted low for the same primary. +// lowest of {samples voting for the merged primary}'s confidences +// otherwise. +// - preconditions, fixtures_required, injection_points: +// elements appearing in >= ceil(K/2) of the samples (set-style +// majority); sorted output. +// - target_file / test_name: must be identical across K (the +// canonicaliser computes them deterministically from name-policy); +// if they differ, abort with a misconfiguration error. +// - transition_graph: per-entity union of edges appearing in >= ceil(K/2) +// of the samples; per-edge unanimity is expected (allium model is +// deterministic). Disagreements are logged to stderr as warnings. +// - framework: must be identical across K; otherwise abort. +// +// The output JSON has sorted keys at every depth, identical to the +// canonicaliser's output. +// +// Usage: +// node merge-obligations.mjs ... + +import { readFileSync, writeFileSync } from "fs"; + +function die(msg) { + console.error(`merge-obligations: ${msg}`); + process.exit(2); +} + +function warn(msg) { + console.error(`merge-obligations: warning: ${msg}`); +} + +function readJson(p) { + try { + return JSON.parse(readFileSync(p, "utf-8")); + } catch (err) { + die(`failed to read JSON from ${p}: ${err.message}`); + } +} + +function majorityThreshold(k) { + return Math.ceil(k / 2); +} + +function strictMajorityThreshold(k) { + // For "strict majority on primary_symbol" we want more than half. + // For K=3: 2 of 3 is enough; for K=5: 3 of 5; for K=2: requires 2 (unanimous). + return Math.floor(k / 2) + 1; +} + +function modeOrNull(values, threshold) { + const counts = new Map(); + const firstSeen = new Map(); + for (let i = 0; i < values.length; i++) { + const v = values[i]; + const k = JSON.stringify(v ?? null); + counts.set(k, (counts.get(k) ?? 0) + 1); + if (!firstSeen.has(k)) firstSeen.set(k, i); + } + let bestKey = null; + let bestCount = 0; + let bestFirst = Infinity; + for (const [k, c] of counts.entries()) { + if (c > bestCount || (c === bestCount && firstSeen.get(k) < bestFirst)) { + bestKey = k; + bestCount = c; + bestFirst = firstSeen.get(k); + } + } + if (bestCount < threshold) return { value: null, count: bestCount, total: values.length }; + return { value: bestKey === null ? null : JSON.parse(bestKey), count: bestCount, total: values.length }; +} + +function modalValue(values) { + // No threshold — just pick most common with deterministic tie-break. + return modeOrNull(values, 1).value; +} + +function majorityElements(arrays, k) { + const threshold = majorityThreshold(k); + const counts = new Map(); + for (const arr of arrays) { + const seen = new Set(); + for (const v of arr ?? []) { + const key = JSON.stringify(v); + if (seen.has(key)) continue; + seen.add(key); + counts.set(key, (counts.get(key) ?? 0) + 1); + } + } + return [...counts.entries()] + .filter(([, c]) => c >= threshold) + .map(([key]) => JSON.parse(key)) + .sort(); +} + +function mergeBridge(samples, k) { + // samples is the array of bridge objects, one per K. + const primaries = samples.map((s) => s.primary_symbol ?? null); + const strict = strictMajorityThreshold(k); + const primaryResult = modeOrNull(primaries, strict); + + let mergedPrimary; + let confidence; + let forcedLow = false; + if (primaryResult.value !== null && primaryResult.count >= strict) { + mergedPrimary = primaryResult.value; + const voters = samples.filter((s) => s.primary_symbol === mergedPrimary); + const rank = { high: 2, medium: 1, low: 0 }; + let lowest = "high"; + for (const v of voters) { + if (rank[v.confidence] < rank[lowest]) lowest = v.confidence; + } + confidence = lowest; + } else { + mergedPrimary = null; + forcedLow = true; + confidence = "low"; + } + + // Candidates: union of all candidates from all samples, plus any + // primary_symbol that's not the merged primary, minus the merged primary. + const candSet = new Set(); + for (const s of samples) { + for (const c of s.candidates ?? []) candSet.add(c); + if (s.primary_symbol && s.primary_symbol !== mergedPrimary) { + candSet.add(s.primary_symbol); + } + } + if (mergedPrimary) candSet.delete(mergedPrimary); + const candidates = [...candSet].sort(); + + // Force low confidence if we had to fall back, or if there's any low + // among the samples that vote for the merged primary. + if (!forcedLow) { + const voters = samples.filter((s) => s.primary_symbol === mergedPrimary); + if (voters.some((v) => v.confidence === "low")) confidence = "low"; + } + + return { + primary_symbol: mergedPrimary, + candidates, + confidence, + }; +} + +function mergeObligation(samples, k) { + const id = samples[0].obligation_id; + for (const s of samples) { + if (s.obligation_id !== id) { + die(`obligation_id mismatch within a merge group: ${id} vs ${s.obligation_id}`); + } + } + const tfs = new Set(samples.map((s) => s.target_file)); + if (tfs.size !== 1) { + die(`obligation ${id}: target_file differs across samples (${[...tfs].join(", ")}) — name-policy not applied identically`); + } + const tns = new Set(samples.map((s) => s.test_name)); + if (tns.size !== 1) { + die(`obligation ${id}: test_name differs across samples (${[...tns].join(", ")}) — name-policy not applied identically`); + } + return { + obligation_id: id, + test_kind: modalValue(samples.map((s) => s.test_kind)), + bridge: mergeBridge(samples.map((s) => s.bridge), k), + preconditions: majorityElements(samples.map((s) => s.preconditions ?? []), k), + fixtures_required: majorityElements(samples.map((s) => s.fixtures_required ?? []), k), + injection_points: majorityElements(samples.map((s) => s.injection_points ?? []), k), + target_file: [...tfs][0], + test_name: [...tns][0], + }; +} + +function mergeTransitionGraph(graphs, k) { + const allEntities = new Set(); + for (const g of graphs) for (const key of Object.keys(g ?? {})) allEntities.add(key); + const out = {}; + for (const entity of [...allEntities].sort()) { + const edgeArrays = graphs.map((g) => g?.[entity] ?? []); + // Build set of edges with their counts. + const counts = new Map(); + for (const arr of edgeArrays) { + const seen = new Set(); + for (const e of arr) { + const key = JSON.stringify({ from: e.from, to: e.to, via_rule: e.via_rule }); + if (seen.has(key)) continue; + seen.add(key); + counts.set(key, (counts.get(key) ?? 0) + 1); + } + } + const threshold = majorityThreshold(k); + const edges = []; + for (const [key, c] of counts.entries()) { + if (c >= threshold) edges.push(JSON.parse(key)); + else warn(`transition_graph[${entity}]: edge ${key} appears in ${c}/${k} samples (below ${threshold}); dropping`); + } + edges.sort((a, b) => { + const ak = `${a.from}\x00${a.to}\x00${a.via_rule}`; + const bk = `${b.from}\x00${b.to}\x00${b.via_rule}`; + return ak.localeCompare(bk); + }); + out[entity] = edges; + } + return out; +} + +function mergeInventories(inventories) { + if (inventories.length === 0) die("no inventories given"); + const k = inventories.length; + + // Framework must match across all samples. + const frameworks = new Set(inventories.map((i) => i.framework)); + if (frameworks.size !== 1) { + die(`framework differs across samples: ${[...frameworks].join(", ")}`); + } + + // spec_path and code_root must match (they should be set by the + // orchestrator, not freely chosen by subagents). + const specPaths = new Set(inventories.map((i) => i.spec_path)); + if (specPaths.size !== 1) die(`spec_path differs across samples: ${[...specPaths].join(", ")}`); + const codeRoots = new Set(inventories.map((i) => i.code_root)); + if (codeRoots.size !== 1) die(`code_root differs across samples: ${[...codeRoots].join(", ")}`); + + // Group obligations by obligation_id; every inventory must contribute + // exactly one entry per id (canonicaliser enforced this against `plan`). + const groups = new Map(); + for (const inv of inventories) { + for (const o of inv.obligations) { + if (!groups.has(o.obligation_id)) groups.set(o.obligation_id, []); + groups.get(o.obligation_id).push(o); + } + } + for (const [id, items] of groups) { + if (items.length !== k) { + die(`obligation ${id}: appears in ${items.length}/${k} samples (canonicaliser should have caught this)`); + } + } + + const mergedObligations = [...groups.entries()] + .sort(([a], [b]) => a.localeCompare(b)) + .map(([, items]) => mergeObligation(items, k)); + + return { + spec_path: [...specPaths][0], + code_root: [...codeRoots][0], + framework: [...frameworks][0], + obligations: mergedObligations, + transition_graph: mergeTransitionGraph(inventories.map((i) => i.transition_graph ?? {}), k), + consensus_metadata: { + sample_count: k, + generated_at: null, // intentionally null so the output is reproducible across time + }, + }; +} + +function stableStringify(value) { + return JSON.stringify(value, sortReplacer, 2) + "\n"; +} + +function sortReplacer(_key, value) { + if (value && typeof value === "object" && !Array.isArray(value)) { + return Object.fromEntries( + Object.entries(value).sort(([a], [b]) => a.localeCompare(b)), + ); + } + return value; +} + +function main() { + const [, , outputPath, ...inputs] = process.argv; + if (!outputPath || inputs.length === 0) { + die("usage: merge-obligations.mjs ..."); + } + const invs = inputs.map(readJson); + const merged = mergeInventories(invs); + writeFileSync(outputPath, stableStringify(merged)); + const lowCount = merged.obligations.filter((o) => o.bridge.confidence === "low").length; + console.error( + `merge-obligations: ${invs.length} inventories -> ${outputPath} (${merged.obligations.length} obligations, ${lowCount} low-confidence)`, + ); +} + +main(); diff --git a/scripts/obligations-to-tests.mjs b/scripts/obligations-to-tests.mjs new file mode 100644 index 0000000..dec5898 --- /dev/null +++ b/scripts/obligations-to-tests.mjs @@ -0,0 +1,454 @@ +#!/usr/bin/env node +// Obligation -> tests translator (deterministic core + backend dispatch). +// +// Reads a merged obligation-bridge.merged.json (from merge-obligations.mjs) +// and renders it through the backend named in the inventory's `framework` +// field. Pure function in spirit: given the same merged input and the same +// backend, two runs produce byte-identical output files. +// +// The translator core is backend-agnostic. All language-specific knowledge +// lives in: +// - backends//manifest.json (runner, idioms, injection) +// - backends//name-policy.json (already applied to inventory) +// - backends//templates/*.tmpl (one per test_kind + file + fixture) +// +// Template placeholder grammar (implemented below; do not extend without +// updating backend-authoring-guide.md): +// {{name}} substitute value (dot-paths into context) +// {{#each items}}…{{/each}} repeat body once per item (binds it and index) +// {{#if cond}}…{{else}}…{{/if}} conditional (truthy = non-null, non-empty) +// {{!comment}} stripped from output +// +// Usage: +// node obligations-to-tests.mjs --out [--backends-root ] +// +// --out is the directory the generated test files are written to. The +// translator writes paths under this directory matching each obligation's +// target_file (which already includes the backend's directory_layout). + +import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs"; +import path from "path"; +import { fileURLToPath } from "url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const DEFAULT_BACKENDS_ROOT = path.resolve( + __dirname, + "..", + "skills", + "propagate", + "backends", +); + +function die(msg) { + console.error(`obligations-to-tests: ${msg}`); + process.exit(2); +} + +function parseArgs(argv) { + const args = { positional: [] }; + for (let i = 2; i < argv.length; i++) { + const a = argv[i]; + if (a === "--out") args.out = argv[++i]; + else if (a === "--backends-root") args.backendsRoot = argv[++i]; + else if (a.startsWith("--")) die(`unknown flag: ${a}`); + else args.positional.push(a); + } + if (args.positional.length < 1) die("usage: obligations-to-tests.mjs --out "); + if (!args.out) die("missing required flag --out "); + args.merged = args.positional[0]; + args.backendsRoot = args.backendsRoot ?? DEFAULT_BACKENDS_ROOT; + return args; +} + +function readJson(p) { + try { + return JSON.parse(readFileSync(p, "utf-8")); + } catch (err) { + die(`failed to read JSON from ${p}: ${err.message}`); + } +} + +function loadBackend(backendsRoot, framework) { + const dir = path.join(backendsRoot, framework); + if (!existsSync(dir)) die(`framework "${framework}" not found under ${backendsRoot}`); + const manifest = readJson(path.join(dir, "manifest.json")); + if (manifest.manifest_version !== 1) { + die(`framework "${framework}" has manifest_version=${manifest.manifest_version}; only 1 is supported`); + } + const namePolicy = readJson(path.join(dir, "name-policy.json")); + const templates = {}; + const required = [ + "test-file", + "assertion", + "pbt-property", + "state-machine", + "stub-unresolved", + "fixture", + ]; + for (const t of required) { + const p = path.join(dir, "templates", `${t}.tmpl`); + if (!existsSync(p)) die(`backend ${framework}: missing template ${t}.tmpl`); + templates[t] = readFileSync(p, "utf-8"); + } + return { manifest, namePolicy, templates, dir }; +} + +// --- template parser & renderer --------------------------------------------- + +// Compile templates into a node tree once, then render. The renderer is +// purely deterministic; it doesn't touch the filesystem or the clock. + +function compileTemplate(src) { + const tokens = tokenize(src); + let i = 0; + function parseList(stopAt) { + const nodes = []; + while (i < tokens.length) { + const t = tokens[i]; + if (t.kind === "block" && stopAt && stopAt.includes(t.tag)) return nodes; + if (t.kind === "text") { + nodes.push({ kind: "text", value: t.value }); + i++; + } else if (t.kind === "var") { + nodes.push({ kind: "var", path: t.path }); + i++; + } else if (t.kind === "comment") { + i++; + } else if (t.kind === "block") { + if (t.tag.startsWith("#each ")) { + const expr = t.tag.slice(6).trim(); + i++; + const body = parseList(["/each"]); + const end = tokens[i]; + if (!end || end.tag !== "/each") die(`template error: unterminated {{#each ${expr}}}`); + i++; + nodes.push({ kind: "each", path: expr.split("."), body }); + } else if (t.tag.startsWith("#if ")) { + const expr = t.tag.slice(4).trim(); + i++; + const thenBody = parseList(["else", "/if"]); + let elseBody = []; + if (tokens[i]?.tag === "else") { + i++; + elseBody = parseList(["/if"]); + } + const end = tokens[i]; + if (!end || end.tag !== "/if") die(`template error: unterminated {{#if ${expr}}}`); + i++; + nodes.push({ kind: "if", path: expr.split("."), thenBody, elseBody }); + } else { + die(`template error: unknown block "${t.tag}"`); + } + } + } + return nodes; + } + return parseList(null); +} + +function tokenize(src) { + const tokens = []; + const re = /\{\{([^}]+)\}\}/g; + let last = 0; + let m; + while ((m = re.exec(src)) !== null) { + if (m.index > last) tokens.push({ kind: "text", value: src.slice(last, m.index) }); + const inner = m[1].trim(); + if (inner.startsWith("!")) { + tokens.push({ kind: "comment", value: inner.slice(1) }); + } else if (inner.startsWith("#") || inner.startsWith("/") || inner === "else") { + tokens.push({ kind: "block", tag: inner }); + } else { + tokens.push({ kind: "var", path: inner.split(".") }); + } + last = re.lastIndex; + } + if (last < src.length) tokens.push({ kind: "text", value: src.slice(last) }); + return tokens; +} + +function lookup(ctx, segs) { + let v = ctx; + for (const s of segs) { + if (v == null) return undefined; + v = v[s]; + } + return v; +} + +function truthy(v) { + if (v == null) return false; + if (Array.isArray(v)) return v.length > 0; + if (typeof v === "string") return v.length > 0; + if (typeof v === "number") return v !== 0; + return true; +} + +function render(nodes, ctx) { + let out = ""; + for (const n of nodes) { + if (n.kind === "text") out += n.value; + else if (n.kind === "var") { + const v = lookup(ctx, n.path); + out += v == null ? "" : String(v); + } else if (n.kind === "each") { + const list = lookup(ctx, n.path); + if (!Array.isArray(list)) continue; + for (let i = 0; i < list.length; i++) { + out += render(n.body, { ...ctx, it: list[i], index: i }); + } + } else if (n.kind === "if") { + const v = lookup(ctx, n.path); + out += render(truthy(v) ? n.thenBody : n.elseBody, ctx); + } + } + return out; +} + +// --- import collection ------------------------------------------------------ + +// Each backend declares its imports in manifest.json: +// imports.base — always included +// imports.pbt — added for test_kind in PBT_KINDS +// imports.state_machine — added for test_kind state_machine +// imports.temporal — added for test_kind temporal (defaults to .pbt) +// bridge_import.transform — named transform that turns :: +// into an import line in the language's idiom. +// +// To add a new backend, add an entry under BRIDGE_IMPORT_TRANSFORMS below +// and declare it in the backend's manifest.json. + +const PBT_KINDS = new Set(["pbt", "temporal"]); +const SM_KINDS = new Set(["state_machine"]); + +const BRIDGE_IMPORT_TRANSFORMS = { + // Python: "app/services.py::approve_claim" -> + // "from app.services import approve_claim" + // Path is converted to a module-dotted path; symbol's top-level + // identifier is the imported name (so "ClaimService.approve" -> "ClaimService"). + python_module({ bridgePath, topLevelSymbol }) { + if (!bridgePath.endsWith(".py")) return null; + const mod = bridgePath.slice(0, -3).replace(/\//g, "."); + return `from ${mod} import ${topLevelSymbol}`; + }, + + // TypeScript: "src/services/claim.ts::approveClaim" -> + // "import { approveClaim } from \"../src/services/claim\";" + // The path is rewritten relative to the target test file. + typescript_relative({ bridgePath, topLevelSymbol, targetFile }) { + if (!/\.(ts|tsx|js|jsx|mjs|cjs)$/.test(bridgePath)) return null; + const noExt = bridgePath.replace(/\.(ts|tsx|js|jsx|mjs|cjs)$/, ""); + // Compute relative-from-target-file. target_file is relative to + // code_root (e.g. "tests/test_x.test.ts"); bridge path is too. + let rel = path.relative(path.dirname(targetFile), noExt); + if (!rel.startsWith(".")) rel = `./${rel}`; + // Use forward slashes for TS even on Windows. + rel = rel.split(path.sep).join("/"); + return `import { ${topLevelSymbol} } from "${rel}";`; + }, + + // Fallback: no import line (e.g. languages where every symbol is in scope). + noop() { + return null; + }, +}; + +function bridgeImportLine(manifest, obligation) { + const sym = obligation.bridge?.primary_symbol; + if (!sym) return null; + const idx = sym.indexOf("::"); + if (idx < 0) return null; + const bridgePath = sym.slice(0, idx); + const symbol = sym.slice(idx + 2); + const topLevelSymbol = symbol.split(".")[0]; + const transformName = manifest.bridge_import?.transform ?? "noop"; + const transform = BRIDGE_IMPORT_TRANSFORMS[transformName]; + if (!transform) { + die(`backend ${manifest.id} declares unknown bridge_import.transform "${transformName}"`); + } + return transform({ + bridgePath, + symbol, + topLevelSymbol, + targetFile: obligation.target_file, + manifest, + }); +} + +function importsForObligation(manifest, obligation, isLowConfidence) { + const map = manifest.imports ?? {}; + const base = [...(map.base ?? [])]; + if (isLowConfidence) return base; // stub-unresolved only needs the base imports + const kind = obligation.test_kind; + const extra = []; + if (PBT_KINDS.has(kind)) { + const pbtList = kind === "temporal" && map.temporal ? map.temporal : map.pbt; + if (pbtList) extra.push(...pbtList); + } + if (SM_KINDS.has(kind) && map.state_machine) extra.push(...map.state_machine); + const bridgeLine = bridgeImportLine(manifest, obligation); + if (bridgeLine) extra.push(bridgeLine); + return [...base, ...extra]; +} + +function localNameFor(pathSymbol) { + // The bare identifier we use in the test body. Returns null if no symbol. + if (!pathSymbol) return null; + const idx = pathSymbol.indexOf("::"); + if (idx < 0) return null; + const symbol = pathSymbol.slice(idx + 2); + return symbol.split(".")[0]; +} + +function sortImports(imports, style) { + const uniq = [...new Set(imports)]; + if (style === "python") { + // Convention: "import x" lines first, "from x import y" second; alphabetic within each. + const importLines = uniq.filter((l) => l.startsWith("import ")).sort(); + const fromLines = uniq.filter((l) => l.startsWith("from ")).sort(); + return [...importLines, ...fromLines]; + } + if (style === "typescript") { + // External-first (no relative dot), then relative; alphabetic within group. + const ext = uniq.filter((l) => !/from ["']\.\.?\//.test(l)).sort(); + const rel = uniq.filter((l) => /from ["']\.\.?\//.test(l)).sort(); + return [...ext, ...rel]; + } + return uniq.sort(); +} + +// --- rendering pipeline ----------------------------------------------------- + +function pickTemplateName(testKind, isLowConfidence) { + if (isLowConfidence) return "stub-unresolved"; + switch (testKind) { + case "assertion": + case "scenario": + case "contract": + return "assertion"; + case "pbt": + case "temporal": + return "pbt-property"; + case "state_machine": + return "state-machine"; + default: + return "assertion"; + } +} + +function buildObligationContext(obligation, merged, backend) { + const isLowConfidence = obligation.bridge.confidence === "low"; + const transitionGraph = inferTransitionGraphForEntity(obligation, merged.transition_graph); + const stateMachineClassName = `${entityForObligation(obligation)}StateMachine`; + return { + obligation, + test_name: obligation.test_name, + bridge: { + ...obligation.bridge, + primary_symbol_local: localNameFor(obligation.bridge.primary_symbol) ?? "None", + }, + preconditions: obligation.preconditions ?? [], + fixtures_required: obligation.fixtures_required ?? [], + injection_points: obligation.injection_points ?? [], + injection: { + clock: backend.manifest.clock_injection ?? "", + random: backend.manifest.random_injection ?? "", + network: backend.manifest.network_injection ?? "", + }, + transition_graph_for_entity: transitionGraph, + state_machine_class_name: stateMachineClassName, + manifest: backend.manifest, + name_policy: backend.namePolicy, + is_low_confidence: isLowConfidence, + }; +} + +function entityForObligation(obligation) { + // obligation_id looks like "category.Subject" or "category.Subject.detail". + const parts = obligation.obligation_id.split("."); + return parts.length >= 2 ? parts[1] : parts[0]; +} + +function inferTransitionGraphForEntity(obligation, graph) { + const entity = entityForObligation(obligation); + return Array.isArray(graph?.[entity]) ? graph[entity] : []; +} + +function renderObligation(obligation, merged, backend, compiledTemplates) { + const isLowConfidence = obligation.bridge.confidence === "low"; + const tname = pickTemplateName(obligation.test_kind, isLowConfidence); + const ctx = buildObligationContext(obligation, merged, backend); + const body = render(compiledTemplates[tname], ctx); + const imports = importsForObligation(backend.manifest, obligation, isLowConfidence); + return { body, imports, fixtures: obligation.fixtures_required ?? [] }; +} + +function renderFixture(name, backend, compiledTemplates) { + return render(compiledTemplates.fixture, { + fixture_name: name, + manifest: backend.manifest, + }); +} + +function renderTestFile(perFile, backend, compiledTemplates) { + const sortedImports = sortImports(perFile.imports, backend.manifest.imports_style); + const fixtureBlocks = [...perFile.fixtures] + .sort() + .map((n) => renderFixture(n, backend, compiledTemplates)); + const testBlocks = [...perFile.tests].sort((a, b) => a.testName.localeCompare(b.testName)).map((t) => t.body); + return render(compiledTemplates["test-file"], { + imports: sortedImports, + fixtures: fixtureBlocks, + tests: testBlocks, + manifest: backend.manifest, + }); +} + +// --- main ------------------------------------------------------------------- + +function ensureDir(p) { + mkdirSync(p, { recursive: true }); +} + +function writeFile(outRoot, relPath, content) { + const full = path.join(outRoot, relPath); + ensureDir(path.dirname(full)); + writeFileSync(full, content); +} + +function main() { + const args = parseArgs(process.argv); + const merged = readJson(args.merged); + if (!merged.framework) die("merged inventory missing framework"); + const backend = loadBackend(args.backendsRoot, merged.framework); + + const compiledTemplates = Object.fromEntries( + Object.entries(backend.templates).map(([k, v]) => [k, compileTemplate(v)]), + ); + + // Accumulate per-file: imports, tests, fixtures. + const perFile = new Map(); + for (const o of merged.obligations) { + const file = o.target_file; + if (!perFile.has(file)) perFile.set(file, { imports: [], tests: [], fixtures: new Set() }); + const acc = perFile.get(file); + const { body, imports, fixtures } = renderObligation(o, merged, backend, compiledTemplates); + acc.imports.push(...imports); + acc.tests.push({ testName: o.test_name, body }); + for (const f of fixtures) acc.fixtures.add(f); + } + + ensureDir(args.out); + const written = []; + for (const [file, acc] of [...perFile.entries()].sort(([a], [b]) => a.localeCompare(b))) { + const content = renderTestFile({ imports: acc.imports, tests: acc.tests, fixtures: acc.fixtures }, backend, compiledTemplates); + writeFile(args.out, file, content); + written.push(file); + } + + console.error( + `obligations-to-tests: framework=${merged.framework} -> ${written.length} files under ${args.out}`, + ); +} + +main(); diff --git a/scripts/run-suite.mjs b/scripts/run-suite.mjs new file mode 100644 index 0000000..e728171 --- /dev/null +++ b/scripts/run-suite.mjs @@ -0,0 +1,342 @@ +#!/usr/bin/env node +// Stage C: backend-aware runner + report formatter. +// +// Reads a merged obligation-bridge.merged.json (to know the framework, the +// obligation set, and bridge confidence), executes the backend's runner +// command against the generated tests, parses the JSON/XML report via a +// per-format adapter, categorises outcomes, and emits propagation-report.md. +// +// The runner is intentionally read-only against the inventory pipeline: +// nothing here feeds back into Stage B. A future iteration could close the +// loop. +// +// Outcome categorisation (independent of backend): +// - pass → obligation covered +// - fail → assertion failure on a non-stub test (potential real bug) +// - error → exception unrelated to assertion (likely wrong bridge) +// - skipped(bridge-unresolved) → low-confidence stub +// - skipped(other) → infrastructure gap +// +// Usage: +// node run-suite.mjs --tests-root \ +// [--report ] [--backends-root ] +// +// --tests-root is where the generated tests live (i.e. the directory the +// translator wrote to, typically `` so the test runner can also +// resolve the implementation). + +import { readFileSync, writeFileSync, existsSync, mkdtempSync } from "fs"; +import { spawnSync } from "child_process"; +import path from "path"; +import os from "os"; +import { fileURLToPath } from "url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const DEFAULT_BACKENDS_ROOT = path.resolve(__dirname, "..", "skills", "propagate", "backends"); + +function die(msg) { + console.error(`run-suite: ${msg}`); + process.exit(2); +} + +function parseArgs(argv) { + const args = { positional: [] }; + for (let i = 2; i < argv.length; i++) { + const a = argv[i]; + if (a === "--tests-root") args.testsRoot = argv[++i]; + else if (a === "--report") args.report = argv[++i]; + else if (a === "--backends-root") args.backendsRoot = argv[++i]; + else if (a.startsWith("--")) die(`unknown flag: ${a}`); + else args.positional.push(a); + } + if (args.positional.length < 1) die("usage: run-suite.mjs --tests-root "); + if (!args.testsRoot) die("missing required flag --tests-root "); + args.merged = args.positional[0]; + args.backendsRoot = args.backendsRoot ?? DEFAULT_BACKENDS_ROOT; + args.report = args.report ?? "propagation-report.md"; + return args; +} + +function readJson(p) { + try { return JSON.parse(readFileSync(p, "utf-8")); } + catch (err) { die(`failed to read JSON from ${p}: ${err.message}`); } +} + +function loadBackend(backendsRoot, framework) { + const dir = path.join(backendsRoot, framework); + if (!existsSync(dir)) die(`framework "${framework}" not found under ${backendsRoot}`); + return { manifest: readJson(path.join(dir, "manifest.json")), dir }; +} + +// --- adapters --------------------------------------------------------------- + +const ADAPTERS = { + "pytest-junitxml": adaptPytestJunitXml, + "jest-json": adaptJestJson, +}; + +// Minimal XML parser tailored to JUnit-XML's / shape. +function parseJunitXml(text) { + // We only need testcase elements with their attributes and nested skipped/failure/error tags. + const out = []; + const re = /]*?)(\/>|>([\s\S]*?)<\/testcase>)/g; + let m; + while ((m = re.exec(text)) !== null) { + const attrs = parseAttrs(m[1]); + const body = m[3] ?? ""; + const tc = { ...attrs, status: "pass" }; + const skippedM = /]*)(?:\/>|>([\s\S]*?)<\/skipped>)/.exec(body); + const failureM = /]*)(?:\/>|>([\s\S]*?)<\/failure>)/.exec(body); + const errorM = /]*)(?:\/>|>([\s\S]*?)<\/error>)/.exec(body); + if (skippedM) { + tc.status = "skipped"; + tc.skipped = parseAttrs(skippedM[1]); + tc.skipped.body = (skippedM[2] ?? "").trim(); + } else if (failureM) { + tc.status = "fail"; + tc.failure = parseAttrs(failureM[1]); + tc.failure.body = (failureM[2] ?? "").trim(); + } else if (errorM) { + tc.status = "error"; + tc.error = parseAttrs(errorM[1]); + tc.error.body = (errorM[2] ?? "").trim(); + } + out.push(tc); + } + return out; +} + +function parseAttrs(s) { + const out = {}; + const re = /(\w+)="([^"]*)"/g; + let m; + while ((m = re.exec(s)) !== null) out[m[1]] = decodeEntities(m[2]); + return out; +} + +function decodeEntities(s) { + return s + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll(""", '"') + .replaceAll("'", "'") + .replaceAll("&", "&"); +} + +function adaptPytestJunitXml(text, manifest) { + const cases = parseJunitXml(text); + return cases.map((tc) => { + const file = (tc.classname || "").replace(/\./g, "/") + ".py"; + const id = `${file}::${tc.name}`; + if (tc.status === "pass") return { test_id: id, outcome: "pass" }; + if (tc.status === "skipped") { + const msg = tc.skipped?.message ?? ""; + const isBridgeUnresolved = msg.includes(manifest.skip_marker ?? "bridge-unresolved"); + return { + test_id: id, + outcome: "skipped", + markers: isBridgeUnresolved ? [manifest.skip_marker ?? "bridge-unresolved"] : [], + message: msg, + }; + } + if (tc.status === "fail") { + return { + test_id: id, + outcome: "fail", + kind: tc.failure?.message ?? "AssertionError", + message: (tc.failure?.body ?? "").slice(0, 500), + }; + } + if (tc.status === "error") { + return { + test_id: id, + outcome: "error", + kind: tc.error?.message ?? "Error", + message: (tc.error?.body ?? "").slice(0, 500), + }; + } + return { test_id: id, outcome: "error", message: "unknown status" }; + }); +} + +function adaptJestJson(text, manifest) { + // Jest --json schema: + // { testResults: [ { name|testFilePath, assertionResults: [ { fullName, status, failureMessages, ... } ] } ] } + const data = JSON.parse(text); + const out = []; + for (const tr of data.testResults ?? []) { + const filePath = tr.testFilePath ?? tr.name ?? ""; + for (const t of tr.assertionResults ?? tr.testResults ?? []) { + const id = `${filePath}::${t.fullName}`; + if (t.status === "passed") out.push({ test_id: id, outcome: "pass" }); + else if (t.status === "pending" || t.status === "skipped") { + const msg = (t.failureMessages ?? []).join("\n"); + const isBridgeUnresolved = msg.includes(manifest.skip_marker ?? "bridge-unresolved") + || (t.fullName ?? "").includes(manifest.skip_marker ?? "bridge-unresolved"); + out.push({ + test_id: id, + outcome: "skipped", + markers: isBridgeUnresolved ? [manifest.skip_marker ?? "bridge-unresolved"] : [], + message: msg, + }); + } else if (t.status === "failed") { + const msg = (t.failureMessages ?? []).join("\n"); + const isError = /TypeError|ReferenceError|ImportError|MODULE_NOT_FOUND/.test(msg); + out.push({ + test_id: id, + outcome: isError ? "error" : "fail", + kind: isError ? "Error" : "AssertionError", + message: msg.slice(0, 500), + }); + } + } + } + return out; +} + +// --- runner ---------------------------------------------------------------- + +function expandPlaceholders(template, vars) { + if (Array.isArray(template)) return template.map((s) => expandPlaceholders(s, vars)); + return String(template).replace(/\{(\w+)\}/g, (_m, key) => (key in vars ? vars[key] : `{${key}}`)); +} + +function runRunner(manifest, testsRoot, reportPath) { + const vars = { report_path: reportPath, test_root: testsRoot }; + const command = expandPlaceholders(manifest.runner.command, vars); + const scopeArgs = expandPlaceholders(manifest.runner.scope_args ?? [], vars); + const [cmd, ...rest] = [...command, ...scopeArgs]; + const result = spawnSync(cmd, rest, { + cwd: testsRoot, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + }); + return { + status: result.status, + stdout: result.stdout ?? "", + stderr: result.stderr ?? "", + command: [cmd, ...rest].join(" "), + }; +} + +// --- report ----------------------------------------------------------------- + +function buildReport(merged, results, runResult, manifest) { + const obligationByTestId = indexObligationsByTestId(merged); + const buckets = { + pass: [], + fail: [], + error: [], + bridge_unresolved: [], + infra_gap: [], + }; + for (const r of results) { + if (r.outcome === "pass") buckets.pass.push(r); + else if (r.outcome === "fail") buckets.fail.push(r); + else if (r.outcome === "error") buckets.error.push(r); + else if (r.outcome === "skipped") { + const isBridge = (r.markers ?? []).includes(manifest.skip_marker ?? "bridge-unresolved"); + if (isBridge) buckets.bridge_unresolved.push(r); + else buckets.infra_gap.push(r); + } + } + const totalObligations = merged.obligations.length; + const resolveOid = (testId) => { + if (obligationByTestId.has(testId)) return obligationByTestId.get(testId); + for (const [key, oid] of obligationByTestId.entries()) { + if (testId.endsWith(key)) return oid; + } + return undefined; + }; + const covered = new Set(buckets.pass.map((r) => resolveOid(r.test_id)).filter(Boolean)); + const lines = []; + lines.push("# Propagation report"); + lines.push(""); + lines.push("## Summary"); + lines.push(""); + lines.push(`- Backend: ${manifest.id}`); + lines.push(`- Framework language: ${manifest.language}`); + lines.push(`- Obligations total: ${totalObligations}`); + lines.push(`- Obligations covered: ${covered.size} (passing tests: ${buckets.pass.length})`); + lines.push(`- Bridge unresolved: ${buckets.bridge_unresolved.length}`); + lines.push(`- Likely real failures: ${buckets.fail.length} ← human review`); + lines.push(`- Likely wrong bridges: ${buckets.error.length} ← re-mapping`); + lines.push(`- Infrastructure gaps: ${buckets.infra_gap.length}`); + lines.push(""); + lines.push(`Runner: \`${runResult.command}\``); + lines.push(`Exit code: ${runResult.status}`); + lines.push(""); + + appendBucket(lines, "Failures (assertion / likely real)", buckets.fail, obligationByTestId); + appendBucket(lines, "Errors (likely wrong bridges)", buckets.error, obligationByTestId); + appendBucket(lines, "Bridge unresolved (stubs)", buckets.bridge_unresolved, obligationByTestId); + appendBucket(lines, "Other skips (infrastructure gaps)", buckets.infra_gap, obligationByTestId); + + if (totalObligations > 0) { + const pct = ((covered.size / totalObligations) * 100).toFixed(1); + lines.push("---"); + lines.push(""); + lines.push(`Coverage: ${covered.size}/${totalObligations} obligations (${pct}%).`); + lines.push(""); + } + return lines.join("\n"); +} + +function appendBucket(lines, title, items, obligationByTestId) { + if (items.length === 0) return; + lines.push(`## ${title}`); + lines.push(""); + for (const r of items) { + const oid = obligationByTestId.get(r.test_id) ?? ""; + lines.push(`- \`${r.test_id}\` — obligation \`${oid}\``); + if (r.message) { + const trimmed = r.message.split("\n")[0].slice(0, 240); + lines.push(` - ${trimmed}`); + } + } + lines.push(""); +} + +function indexObligationsByTestId(merged) { + const map = new Map(); + for (const o of merged.obligations) { + // Build the "::" key the way the adapter will emit it. + map.set(`${o.target_file}::${o.test_name}`, o.obligation_id); + } + return map; +} + +function reportPathFor() { + const tmp = mkdtempSync(path.join(os.tmpdir(), "propagate-stagec-")); + return path.join(tmp, "report.xml"); +} + +function main() { + const args = parseArgs(process.argv); + const merged = readJson(args.merged); + if (!merged.framework) die("merged inventory missing framework"); + const backend = loadBackend(args.backendsRoot, merged.framework); + const format = backend.manifest.runner.report_format; + const adapter = ADAPTERS[format]; + if (!adapter) die(`no adapter for report_format=${format}`); + + const reportPath = reportPathFor(); + const runResult = runRunner(backend.manifest, args.testsRoot, reportPath); + + let raw = ""; + if (existsSync(reportPath)) raw = readFileSync(reportPath, "utf-8"); + if (!raw) { + console.error(`run-suite: runner produced no report at ${reportPath}`); + console.error(`stdout: ${runResult.stdout.slice(0, 500)}`); + console.error(`stderr: ${runResult.stderr.slice(0, 500)}`); + process.exit(2); + } + const results = adapter(raw, backend.manifest); + const report = buildReport(merged, results, runResult, backend.manifest); + writeFileSync(args.report, report); + console.error( + `run-suite: ${results.length} test results -> ${args.report} (exit=${runResult.status})`, + ); +} + +main(); diff --git a/skills/propagate/SKILL.md b/skills/propagate/SKILL.md index 44ca8ee..8f27748 100644 --- a/skills/propagate/SKILL.md +++ b/skills/propagate/SKILL.md @@ -3,214 +3,293 @@ name: propagate description: "Generate tests from Allium specifications. Use when the user wants to propagate tests, generate test files from a spec, write tests for a specification, create property-based tests, produce state machine tests, check test coverage against spec obligations, or understand what tests a specification requires." --- -# Propagation +# Propagation (consensus pipeline) -This skill generates tests from Allium specifications. Propagation is how plants reproduce from cuttings of the parent: the spec is the parent, the tests are the offspring. +This skill generates **byte-deterministic** test files from an Allium spec +and a target codebase. It does so by orchestrating K independent +inventory-extraction passes in parallel, canonicalising each, merging into +a single consensus inventory, and translating that to test files through +a per-language **backend**. -Deterministic tools guarantee completeness (every spec construct maps to a test obligation). You handle the implementation bridge: correlating spec constructs with code, generating tests in the project's conventions. +When invoked, you are the **orchestrator**. You do not write the tests +yourself — that's the translator's job. You drive the procedure below. -## Prerequisites +## Pipeline -Before propagating tests, you need: - -1. **An Allium spec** — the `.allium` file describing the system's behaviour -2. **A target codebase** — the implementation to test -3. **Test obligations** — from `allium plan ` (JSON listing every required test) -4. **Domain model** — from `allium model ` (JSON describing entity shapes, constraints, state machines) - -If the CLI tools are not available, derive test obligations manually from the spec using the test-generation taxonomy in [`references/test-generation.md`](../allium/references/test-generation.md). - -## Modes - -### Surface mode - -Generates boundary tests from surface declarations. Use when the user wants to test an API, UI contract or integration boundary. - -For each surface in the spec: - -1. **Exposure tests** — verify each item in `exposes` is accessible to the specified actor, including `for` iteration over collections -2. **Provides tests** — verify operations appear when their `when` conditions are true and are hidden otherwise, including when the corresponding rule's `requires` clauses are not met -3. **Actor restriction tests** — verify the surface is not accessible to other actor types -4. **Actor identification tests** — verify only entities matching the actor's `identified_by` predicate can interact; for actors with `within`, verify interaction is scoped to the declared context -5. **Context scoping tests** — verify the surface instance is absent when no entity matches the `context` predicate -6. **Contract obligation tests** — verify `demands` are satisfied by the counterpart, `fulfils` are supplied by this surface, including all typed signatures -7. **Guarantee tests** — verify `@guarantee` annotations hold across the boundary -8. **Timeout tests** — verify referenced temporal rules fire within the surface's context -9. **Related navigation tests** — verify navigation to related surfaces resolves to the correct context entity - -### Spec mode - -Walks the full test obligations document. Use when the user wants comprehensive test coverage for the entire specification. - -Categories from the test-generation taxonomy: - -- **Entity and value type tests** — fields, types, optional (`?`) null handling, `when`-clause state-dependent presence, relationships, join lookups, equality -- **Enum tests** — comparability across named enums, membership tests, inline enum isolation -- **Sum type tests** — variant fields, type guards, exhaustiveness, creation via variant name, base `.created` trigger narrowing -- **Derived value and projection tests** — computation, filtering, `-> field` extraction, parameterised derived values, `now` volatility, collection operations -- **Default instance tests** — unconditional existence, field values, cross-references between defaults -- **Config tests** — defaults, overrides, mandatory parameters, expression-form defaults, qualified references, config chains -- **Invariant tests** — post-rule verification, edge cases, implication logic, entity-level invariants -- **Rule tests** — success/failure/edge cases, conditionals (ensuring `if` guards read resulting state), entity creation, removal, bulk updates, rule-level `for` iteration, `let` bindings, chained triggers -- **State transition tests** — valid/invalid transitions, terminal states, `transitions_to` vs `becomes` semantics -- **Temporal tests** — deadline boundaries, re-firing prevention, optional field null behaviour -- **Surface tests** — exposure, availability, actor identification with `within` scoping, context scoping, related navigation -- **Contract tests** — signature satisfaction, `@invariant` honouring, `demands`/`fulfils` direction -- **Cross-module tests** — qualified entity references, external trigger responses, type placeholder substitution -- **Cross-rule interaction tests** — duplicate creation guards, provides availability -- **Transition graph tests** — every declared edge is reachable via its witnessing rule, undeclared transitions are rejected, terminal states have no outbound rules, non-terminal states have at least one exit, exact correspondence between enum values and graph edges -- **State-dependent field tests** — presence when in qualifying state, absence when outside, presence obligations on entering the `when` set, absence obligations on leaving, no obligation when moving within or outside, convergent transitions all set the field, guard required to access `when`-qualified fields, derived value `when` inference via input intersection -- **Scenario tests** — happy path, edge cases, order independence -- **Data flow chain tests** — exercise full chains from surface capture through rules to downstream rule preconditions. For each chain (surface provides trigger → rule ensures field → downstream rule requires field), generate an integration test that submits data through the surface and verifies it reaches the downstream precondition. -- **Reachability tests** — walk from each initial state (via `.created()`) to each terminal state, following a valid path through the transition graph. Each test exercises a complete lifecycle. -- **Deadlock scenario tests** — for states where `allium analyse` identifies potential deadlocks, generate tests that put the entity in the stuck state and verify whether it can progress. -- **Cross-entity process tests** — for processes spanning multiple entities, generate integration tests that exercise the full process from start to terminal state across all participating entities. - -If `allium analyse` is available, use its findings to prioritise test generation. A `missing_producer` or `dead_transition` finding indicates a gap worth exercising with a test. A `deadlock` finding should generate a test documenting that the entity cannot escape the stuck state. Consult [actioning findings](../allium/references/actioning-findings.md) for the finding type taxonomy. - -## Test output kinds - -### 1. Assertion-based tests - -For deterministic obligations: field presence, enum membership, transition validity, surface exposure, state-dependent field presence and absence. These are standard unit/integration tests. - -### 2. Property-based tests - -For invariants and rule properties. Each expression-bearing invariant becomes a PBT property: -- Generate a valid entity state using the generator spec -- Apply a sequence of rules (following the transition graph when declared, or deriving valid sequences from rules alone) -- Check the invariant holds at every step - -Use the project's PBT framework: - -| Language | Framework | Discovery | -|----------|-----------|-----------| -| TypeScript | fast-check | `package.json` | -| Python | Hypothesis | `pyproject.toml` | -| Rust | proptest | `Cargo.toml` | -| Go | rapid | `go.mod` | -| Elixir | StreamData | `mix.exs` | - -Fall back to assertion-based tests if no PBT framework is present. - -### 3. State machine tests - -For entities with status enums. When a transition graph is declared, walk every path through the graph. When no graph is declared, derive valid transitions from rules. -- Verify transitions succeed via witnessing rules -- Verify rejected transitions fail -- Verify state-dependent fields are present or absent at each state per their `when` clauses -- Verify invariants hold at each state - -State machine tests require an **action map**: a function per transition edge that takes the entity in the source state and produces it in the target state by calling the actual implementation code. Without this map, the test framework can describe valid paths through the graph but cannot execute them. - -To build the action map: -1. For each edge in the transition graph, find the witnessing rule in the spec -2. Find the code implementing that rule (the implementation bridge) -3. Write a test action that sets up the preconditions (`requires` clauses), invokes the code, and returns the entity in the target state -4. Register the action under the `(from_state, to_state)` key - -Once the map is built, the PBT framework can walk random valid paths: start at any non-terminal state, pick a random outbound edge, apply its action, check all entity-level invariants, repeat. The path length and starting state are generated randomly. This is the fullest expression of the spec's transition graph as a test. - -## The implementation bridge - -You correlate spec constructs with implementation code, the same way the weed skill correlates for divergence checking. - -### For surface tests - -Map surfaces to their implementation: -- API surfaces map to endpoints (REST routes, GraphQL resolvers, gRPC services) -- UI surfaces map to components or pages -- Integration surfaces map to message handlers or SDK methods - -Discover the mapping by reading the codebase. Look for naming patterns, route definitions and handler registrations. - -### For internal tests +``` +allium plan / allium model (deterministic external inputs) + ↓ +K subagents (Agent tool) + ↓ each produces obligation-bridge-i.json +scripts/canonicalize-obligations.mjs (per inventory) + ↓ +scripts/merge-obligations.mjs (one-shot K-vote consensus) + ↓ +scripts/obligations-to-tests.mjs (translator core + backend dispatch) + ↓ N test files +scripts/run-suite.mjs (Stage C: runner + report) + ↓ +propagation-report.md +``` -For each rule in the spec: -1. Find the code implementing the rule (service method, event handler, state machine transition) -2. Determine how to instantiate the entities involved (factories, builders, fixtures) -3. Determine how to invoke the rule (API call, method call, event dispatch) -4. Determine how to assert postconditions (database queries, return values, event assertions) +Scripts live at `${CLAUDE_PLUGIN_ROOT}/scripts/`. References, backends, and +the schema subagents follow live at +`${CLAUDE_PLUGIN_ROOT}/skills/propagate/{references,backends}`. + +## Procedure (mandatory, in order) + +### Step 1 — Decide K, backend, and output paths + +- **K**: default 3. Use 5 if the user wants higher determinism confidence + at higher cost; use 2 only if cost is the primary constraint. +- **Backend**: pick from `backends/` based on the target codebase. If the + user does not specify, infer from project files: + - `pyproject.toml` or `setup.py` → `pytest+hypothesis` + - `package.json` with `jest` in devDependencies → `jest+fastcheck` + (v2 — not yet shipped in this plugin version; abort if requested) + - Otherwise, ask the user. +- **Spec path** and **code root**: pick these MECHANICALLY, not by LLM + judgement, so two runs on the same project produce byte-identical + inventories. + - **`code_root`**: always `"."` (the current working directory). Do not + pick `"./app"` or any subdirectory even if the implementation lives + there — the convention is "the project root is the code root", and + paths in bridges (`::`) carry the `app/` prefix when + needed. The whole pipeline is designed around this; varying + `code_root` breaks byte-determinism across orchestrations. + - **`spec_path`**: the path the user named, made relative to `code_root`. + If they did not name one, look for `./allium-distilled/spec.allium` + first, then `./spec.allium`, then ask. Always express it with a + leading `./` (so `"./allium-distilled/spec.allium"`, never + `"allium-distilled/spec.allium"` and never an absolute path). + - Pass these EXACT strings to every subagent's prompt and use them + verbatim in the inventory's top-level fields. Subagents must not + reinterpret them. +- **Output directory**: `./allium-propagated/` by default, relative to the + current working directory. Test files are written into `/tests/` + (or whatever the backend's `name-policy.json` directs); intermediate + artefacts (inventories, plan, model, merged) land in `./allium-propagated/`. + +Create the layout: -### For temporal tests +``` +./allium-propagated/ +├── inventories/ # subagent outputs land here +├── plan.json # cached `allium plan` output +├── model.json # cached `allium model` output +├── merged.json # consensus inventory (after Stage B) +└── propagation-report.md # Stage C output +``` -Temporal triggers (deadline-based rules) need a controllable time source in the test. If the implementation uses wall-clock time (`Instant.now()`, `System.currentTimeMillis()`), the test cannot reliably position itself before, at or after a deadline. +### Step 2 — Pre-compute deterministic external inputs -Before attempting temporal tests, check whether the component accepts an injected clock or time parameter. Common patterns: a `Clock` parameter on the constructor, an epoch-millisecond argument on the method, a `TimeProvider` interface. If the seam exists, inject a controllable time source. If it does not, flag this as a test infrastructure gap: the temporal tests cannot be generated until the component supports time injection. Do not attempt to test temporal behaviour by sleeping or racing against wall-clock time. +Run via Bash, capturing output to disk: -### For cross-module trigger chains +``` +allium plan > ./allium-propagated/plan.json +allium model > ./allium-propagated/model.json +``` -When a rule emits a trigger that another spec's rule receives (e.g. the Arbiter emits `ClerkReceivesEvent`, the Clerk handles it), testing the chain requires multiple components wired together. +If either command fails, abort and tell the user — propagation requires +both files. The plan output is used by `canonicalize-obligations.mjs` to +validate that every subagent inventory has exactly the right obligation +set. -Before generating cross-module tests: -1. Trace the trigger emission graph from the plan output: which rules emit triggers, and which rules in other specs receive them -2. Check whether the codebase has an existing integration test fixture that wires the participating components (a pipeline test, an end-to-end test helper, a test harness class) -3. If a fixture exists, reuse it. Cross-module tests should compose existing wiring, not rebuild it -4. If no fixture exists but the codebase structure is clear enough to understand the wiring (service constructors, dependency injection, event bus configuration), generate the fixture and the test -5. If the wiring is too complex or opaque to generate confidently, generate a test skeleton with TODOs marking where component wiring is needed +### Step 3 — Spawn K subagents in parallel -Cross-module tests are integration tests by nature. They verify that the spec's trigger chains are faithfully implemented across component boundaries. Prioritise them after single-component tests are passing. +Use the **Agent tool** with `subagent_type: "general-purpose"`. Send all +K Agent tool calls **in a single message** so they execute concurrently. +Each subagent receives the prompt template from Step 4 with placeholders +substituted. The i-th subagent's output path is +`./allium-propagated/inventories/inventory-.json` (1-indexed). -### Reusing existing tests +### Step 4 — Subagent prompt template -When exploring the codebase, note which spec obligations are already covered by existing tests. An existing integration test that exercises the happy path from event submission through to acknowledged output already covers multiple `rule_success` obligations and the end-to-end scenario. +Use this prompt verbatim per subagent, with placeholders replaced: -When an existing test covers a spec obligation, reference it rather than generating a duplicate. The propagate skill's value at the integration level is verifying that coverage is complete against the spec's obligation list, identifying gaps, and generating tests to fill them. Replacing working hand-written tests with generated equivalents adds no value. +``` +You are producing one obligation-bridge inventory of a codebase as part +of a consensus pipeline. Other subagents are doing the same job in +parallel; your output will be merged with theirs. + +Step 1: Read these inputs: + - The Allium spec: + - The test plan (obligations): ./allium-propagated/plan.json + - The domain model: ./allium-propagated/model.json + - The target codebase under: + +Skip generated / vendored / dependency directories. + +Step 2: Read the obligation-bridge schema and the chosen backend's +conventions: + - ${CLAUDE_PLUGIN_ROOT}/skills/propagate/references/obligation-bridge-schema.md + - ${CLAUDE_PLUGIN_ROOT}/skills/propagate/backends//conventions.md + +The schema defines the JSON shape, the bridge symbol notation +(::), test_kind values, bridge confidence semantics, and +the self-check list to run before emitting. + +Step 3: For every obligation in plan.json, produce one entry in your +inventory. The set of obligation_ids in your output MUST equal the set +in plan.json — no additions, no omissions. The canonicaliser rejects +deviations. + +Step 4: Write the inventory to: + + +Use these top-level field values verbatim (do not invent your own): + "spec_path": "" + "code_root": "" + "framework": "" + +Step 5: Stop. Do NOT: + - write a test file (the orchestrator's translator handles that) + - run `allium plan` / `allium model` yourself (the orchestrator did) + - write any other file + - invoke any other skill (in particular, do not invoke `propagate` — + that would recurse) + - read or follow the orchestrator's SKILL.md + - print anything other than a one-line confirmation that the file was written + +The inventory is your only deliverable. +``` -### For deferred specs +### Step 5 — Canonicalize each inventory -Deferred specifications are fully specified in separate files. When the target codebase doesn't include the deferred spec's module, generate a test stub with a placeholder: +For each inventory the subagents produced, run via Bash: -```typescript -// TODO: deferred spec — InterviewerMatching.suggest -// This behaviour is specified as deferred. Provide a mock or skip. +``` +node ${CLAUDE_PLUGIN_ROOT}/scripts/canonicalize-obligations.mjs \ + ./allium-propagated/inventories/inventory-.json \ + ./allium-propagated/inventories/inventory-.canonical.json \ + --plan ./allium-propagated/plan.json ``` -## Process +If a subagent failed to write its inventory, or the canonicaliser rejects +it (obligation set mismatch, malformed bridge, unknown framework), skip +it and continue with the survivors. Note any failures in the final +report. If fewer than ⌈K/2⌉ survive, abort and ask the user to re-run. -1. **Read the spec** — understand entities, rules, surfaces, invariants, transition graphs, state-dependent fields, contracts, config, defaults. Read [assessing specs](../allium/references/assessing-specs.md) to gauge the spec's maturity. A coarse spec (entities and transition graphs but no rules) will produce limited test obligations — mostly structural tests. If the spec is too coarse for meaningful test generation, suggest using the `elicit` or `distill` skill to develop it further before propagating tests. A spec with rules and surfaces enables the full test taxonomy including data flow chain tests and reachability tests. -2. **Read test obligations** — from `allium plan` output or manual derivation -3. **Read domain model** — from `allium model` output or manual derivation -4. **Explore the codebase** — find existing tests, test framework, entity implementations, rule implementations -5. **Map constructs to code** — correlate spec entities/rules/surfaces with implementation classes/functions/endpoints -6. **Generate tests** — produce test files following the project's conventions -7. **Verify tests compile/run** — ensure generated tests are syntactically valid +### Step 6 — Merge into a consensus inventory -### Discovery checklist +Run via Bash, passing every canonical inventory: -Before generating tests, establish: +``` +node ${CLAUDE_PLUGIN_ROOT}/scripts/merge-obligations.mjs \ + ./allium-propagated/merged.json \ + ./allium-propagated/inventories/inventory-1.canonical.json \ + ./allium-propagated/inventories/inventory-2.canonical.json \ + ... +``` -- [ ] Test framework and runner (Jest, pytest, cargo test, etc.) -- [ ] PBT framework if present (fast-check, Hypothesis, proptest, etc.) -- [ ] Test file location conventions (co-located, `__tests__/`, `tests/`, etc.) -- [ ] Entity/model location and patterns (classes, interfaces, structs) -- [ ] Factory/fixture patterns for test data -- [ ] How state transitions are implemented (methods, events, state machines) -- [ ] How surfaces are implemented (routes, controllers, resolvers) -- [ ] Existing test helpers or utilities -- [ ] Whether components accept injected time sources for temporal tests -- [ ] Whether an integration test fixture exists for cross-module trigger chains -- [ ] Which spec obligations are already covered by existing tests +The merger does modal voting on `test_kind` and `bridge.primary_symbol`, +set-style majority on `preconditions`, `fixtures_required`, and +`injection_points`. Obligations where K subagents cannot converge on a +single primary symbol are demoted to `bridge.confidence: "low"` with +the candidates preserved — the translator will emit those as +backend-idiomatic skipped stubs. -### Generator awareness +Same K canonical inventories always produce byte-identical merged bytes. -When generator specs are available, use them to produce valid test data: +### Step 7 — Translate to test files -- Respect field types and constraints -- For entities with transition graphs, generate entities at specific lifecycle states with correct field presence per `when` clauses (e.g. a `shipped` Order has `tracking_number` and `shipped_at` populated; a `pending` Order does not) -- For invariants, generate states that exercise boundary conditions -- For config parameters, use declared defaults unless testing overrides +``` +node ${CLAUDE_PLUGIN_ROOT}/scripts/obligations-to-tests.mjs \ + ./allium-propagated/merged.json \ + --out +``` -## Interaction with other tools +The translator dispatches on the inventory's `framework` field, loads +that backend's manifest, name-policy, and templates, and writes test +files under `//`. Same merged +inventory always produces byte-identical files. -- **distill** produces specs from code. Those specs feed propagate. -- **weed** checks alignment. After propagating tests, weed verifies spec-code match. -- **tend** evolves specs. After spec changes, run propagate again to update tests. -- **elicit** builds specs through conversation. Once a spec is ready, propagate generates tests. +### Step 8 — Verify with Stage C runner -## Limitations +``` +node ${CLAUDE_PLUGIN_ROOT}/scripts/run-suite.mjs \ + ./allium-propagated/merged.json \ + --tests-root \ + --report ./allium-propagated/propagation-report.md +``` -- Generated tests are a starting point. They may need adjustment for project-specific patterns. -- The implementation bridge is LLM-mediated. Complex or unusual codebases may need manual guidance on the mapping. -- Cross-module tests require understanding component wiring across service boundaries. When the codebase structure is clear, full tests can be generated. When wiring is opaque, tests are generated as skeletons with TODOs for manual setup. -- Runtime trace validation and model checking are separate workstreams. +Stage C runs the backend's runner command (e.g. `python3 -m pytest +--junit-xml=…`), parses the runner's JSON/XML report via a per-format +adapter, categorises outcomes into pass / fail / error / skipped +(bridge-unresolved) / skipped (other), and emits a Markdown report. + +Stage C is intentionally read-only against the inventory pipeline: it +does not feed back into Stage B. + +### Step 9 — Report + +State to the user: + +- Backend used and number of subagents (K) +- Total obligations vs obligations covered +- Number of `bridge-unresolved` stubs (these need human follow-up) +- Number of likely real failures and likely wrong bridges +- Path to the generated test files and to `propagation-report.md` +- Any subagent failures and survivor count + +Do not embed test file contents in your reply — point the user at the +files. + +## Defaults + +- K = 3 +- Backend = inferred from project files (`pytest+hypothesis` for Python, + `jest+fastcheck` for TypeScript/JavaScript projects with Jest) +- Output directory = `./allium-propagated/` +- Subagents run in parallel (always) +- One propagation per invocation + +## What this skill does NOT do + +- It does not invoke `allium analyse` beyond reading `allium plan` and + `allium model`. If the user wants completeness checks against analyse + findings, they run those tools manually against the spec first. +- It does not modify implementation source code — only the test tree under + `//`. +- It does not delete pre-existing tests. The generated tree may overlap + with hand-written tests; the user is responsible for reconciling. +- It does not implement new test bodies — it emits *stubs* with a wired + bridge (import + skeleton + TODO). Engineers fill in the assertion body. + Stubs are still useful: they prove the obligation is structurally + covered and the bridge is importable. +- It does not invoke other skills (no recursion through the subagent path). + +## How backends are dispatched + +The backend is selected from the inventory's `framework` field, set by +the orchestrator and propagated through canonicalize / merge / translate +unchanged. To add a new language (Rust + proptest, Go + rapid, …), +contribute a new backend under `backends//`: + +- `manifest.json` — runner, file extension, imports style, + injection idioms (see + [`references/backend-authoring-guide.md`](./references/backend-authoring-guide.md)) +- `name-policy.json` — casing, directory layout, file pattern +- `conventions.md` — symbol form for Stage A subagents +- `templates/` — six templates (test-file, assertion, + pbt-property, state-machine, + stub-unresolved, fixture) + +The translator does not need changes when a backend is added. +Stage C's runner-output adapter (in `run-suite.mjs`) may need a new +entry if the backend's runner emits a JSON/XML format not yet supported. + +## What subagents extract — guidance + +The `references/obligation-bridge-schema.md` document carries the full +contract. Subagents read that. The orchestrator (you) does not need to +know what to extract — only how to drive the pipeline. + +If subagents produce visibly poor inventories (most bridges low-confidence +when the implementation is obvious; bogus `` shapes; obligation +IDs that don't match the plan), the right interventions are: + +1. Extend `references/obligation-bridge-schema.md` (covers all backends). +2. Extend the specific backend's `conventions.md` (per-language). + +Keep this SKILL.md focused on orchestration. diff --git a/skills/propagate/references/backend-authoring-guide.md b/skills/propagate/references/backend-authoring-guide.md new file mode 100644 index 0000000..f4b77ca --- /dev/null +++ b/skills/propagate/references/backend-authoring-guide.md @@ -0,0 +1,293 @@ +# Backend authoring guide + +The deterministic `propagate` pipeline is **backend-dispatched**. The +language-agnostic translator (`obligations-to-tests.mjs`) reads a merged +obligation-bridge inventory and renders it through whichever backend the +inventory's `framework` field names. This document describes the contract a +new backend must fulfil. + +> Adding a new backend should be a matter of writing a manifest, a name-policy, +> a conventions doc, and six templates — **not** patching the translator. + +## Directory layout + +A backend lives at: + +``` +plugins/experimental/skills/propagate/backends// +├── manifest.json +├── name-policy.json +├── conventions.md +└── templates/ + ├── test-file.tmpl + ├── assertion.tmpl + ├── pbt-property.tmpl + ├── state-machine.tmpl + ├── stub-unresolved.tmpl + └── fixture.tmpl +``` + +`` is the value subagents put in the inventory's `framework` field +(e.g. `pytest+hypothesis`, `jest+fastcheck`, `proptest+cargo-test`). The id +must match the directory name byte-for-byte; the canonicaliser uses it as +the dispatch key. + +## `manifest.json` + +Declares the backend's high-level behaviour. Versioned via `manifest_version` +so the translator can reject mismatched backends rather than silently +misrender. + +```jsonc +{ + "manifest_version": 1, + "id": "pytest+hypothesis", + "language": "python", + "file_extension": ".py", + "test_file_prefix": "test_", + "runner": { + "command": ["python3", "-m", "pytest", "--junit-xml={report_path}"], + "report_format": "pytest-junitxml", + "scope_args": ["{test_root}"] + }, + "imports_style": "python", + "imports": { + "base": ["import pytest"], + "pbt": ["from hypothesis import HealthCheck, assume, given, settings, strategies as st"], + "state_machine": [ + "from hypothesis import strategies as st", + "from hypothesis.stateful import RuleBasedStateMachine, rule" + ] + }, + "bridge_import": { + "transform": "python_module" + }, + "fixture_style": "conftest", + "stub_idiom": "pytest.skip", + "skip_marker": "bridge-unresolved", + "clock_injection": "monkeypatch", + "random_injection": "monkeypatch", + "network_injection": "monkeypatch" +} +``` + +### Required fields + +- `manifest_version` (integer): currently `1`. The translator refuses any + other value. +- `id` (string): matches the directory name. +- `language` (string): the source language. Informational; used in reports. +- `file_extension` (string): includes the leading dot. The translator uses + this when computing target paths from `name-policy.json`. +- `test_file_prefix` (string): convention prefix on test filenames + (`test_` for pytest, `""` for Jest, …). +- `runner.command` (array of strings): the command Stage C runs. Supported + placeholders: `{report_path}` (absolute path the runner should write its + JSON report to), `{test_root}` (path Stage C scopes the run to). +- `runner.report_format` (string): the parser key Stage C looks up. Must + match one of the format adapters implemented in `run-suite.mjs`. +- `runner.scope_args` (array of strings, optional): extra arguments to + scope the run to the generated tests. Same placeholder set as + `runner.command`. +- `imports_style` (string): one of `"python"` (alphabetical), `"typescript"` + (external-first, then relative; alphabetical within group), `"go"`, + `"rust"`. The translator's import-deduper looks this up. +- `imports` (object): per-test_kind import lists, keyed by `base`, `pbt`, + `state_machine`, `temporal`. `base` is always included; the others are + added when an obligation's `test_kind` matches. If `temporal` is + omitted, the translator falls back to `pbt` for temporal obligations. + Adding a new framework means writing this list in the manifest, not + editing the translator. +- `bridge_import.transform` (string): name of the translator's + bridge-import transform. Currently supported: + - `"python_module"` — `app/services.py::approve_claim` + becomes `from app.services import approve_claim`. + - `"typescript_relative"` — `src/services/claim.ts::approveClaim` + becomes `import { approveClaim } from "../src/services/claim";`, + with the path rewritten relative to the target test file. + - `"noop"` — emit no bridge import line (use for languages where + every symbol is in scope without an explicit import). + Adding a new transform means adding one entry to + `BRIDGE_IMPORT_TRANSFORMS` in `obligations-to-tests.mjs` and + declaring it in the manifest. +- `fixture_style` (string): one of `"conftest"` (pytest), `"in-file"` + (TypeScript: fixtures live alongside tests), `"shared-module"` (one + fixtures module imported by all tests). +- `stub_idiom` (string): a free-form label used in the + `stub-unresolved.tmpl`; not parsed by the translator. +- `skip_marker` (string): the label engineers grep for to find unresolved + stubs. Surfaced in the Stage C report. Recommend `bridge-unresolved` + for consistency across backends. +- `clock_injection`, `random_injection`, `network_injection` (strings, + optional): how each `injection_points[]` value renders in test bodies. + The values are passed verbatim to templates as + `{{injection.clock}}`, `{{injection.random}}`, `{{injection.network}}`. + +## `name-policy.json` + +Declares the casing and layout rules the canonicaliser applies to rewrite +`target_file` and `test_name` from the LLM-supplied advisory values. + +```jsonc +{ + "test_name_case": "snake", + "file_name_case": "snake", + "directory_layout": "tests/", + "file_pattern": "test_{obligation_subject}{file_extension}", + "test_name_pattern": "test_{obligation_id_slug}" +} +``` + +### Required fields + +- `test_name_case`: one of `"snake"`, `"camel"`, `"pascal"`, `"kebab"`. + Applied to the slug derived from `obligation_id`. +- `file_name_case`: same set. +- `directory_layout` (string): a relative directory under `code_root` where + test files are written. Trailing slash optional. `""` means co-located + with the source. +- `file_pattern` (string): template for the file name. Placeholders: + - `{obligation_subject}` — the part of `obligation_id` before the first + `.` (e.g. `Claim` for `rule_success.Claim.Approve`), cased per + `file_name_case`. + - `{file_extension}` — from `manifest.json`. +- `test_name_pattern` (string): template for the test function name. + Placeholders: + - `{obligation_id_slug}` — full `obligation_id` lowercased with `.` and + other non-alphanumerics turned into `_` or the requested case's separator. + +The canonicaliser owns this transformation; backends do not need to mirror +it elsewhere. + +## `conventions.md` + +A free-form Markdown document explaining the backend's *symbol* convention, +so Stage A subagents can produce well-formed `::` bridges. +At minimum, document: + +- What a valid `` looks like in this language (e.g. `module.function`, + `Class.method`, `function`, `mod::function::generic`). +- The directory layout the templates assume (where source files live, where + tests should be written). +- Any test-infrastructure assumptions (fixtures location, plugin requirements). +- How each `injection_points[]` value is realised in the backend's idiom. + +The translator never reads this file. It exists for the Stage A subagent +prompt and for human contributors. + +## Templates + +Templates use a small placeholder syntax shared across backends. The renderer +lives in `obligations-to-tests.mjs`; do not duplicate it per backend. + +### Placeholder syntax + +- `{{name}}` — substitute the value of `name` in the current context. + Dots traverse objects: `{{bridge.primary_symbol}}`, + `{{manifest.skip_marker}}`. +- `{{#each items}}…{{/each}}` — repeat the body once per element in + `items`, with each element bound as `it` (and `index` as the 0-based + index). Inside, use `{{it}}` for primitives and `{{it.field}}` for + records. +- `{{#if cond}}…{{else}}…{{/if}}` — conditionally render. `cond` is + truthy if non-null, non-empty array, non-empty string. +- `{{!comment}}` — comment, stripped from output. + +No filters, no expressions, no inheritance. If a template needs richer +logic, surface that as a new field on the rendering context — do not +extend the placeholder syntax. + +### Rendering context (passed to every template) + +```jsonc +{ + "obligation": { /* the full obligation entry */ }, + "bridge": { /* obligation.bridge, hoisted for convenience */ }, + "preconditions": ["…"], + "fixtures_required": ["…"], + "injection_points": ["clock", ...], + "injection": { "clock": "monkeypatch", ... }, // from manifest + "transition_graph_for_entity": [ /* edges for the obligation's entity, may be [] */ ], + "manifest": { /* the backend's manifest.json */ }, + "name_policy": { /* the backend's name-policy.json */ } +} +``` + +For `test-file.tmpl` only, an additional field is bound: + +```jsonc +{ + "imports": ["…"], // deduped, ordered per imports_style + "fixtures": ["…"], // rendered fixture blocks + "tests": ["…"] // rendered test-body strings, alphabetical by test_name +} +``` + +### Required templates + +1. **`test-file.tmpl`** — file skeleton. Emits imports + fixtures + tests. +2. **`assertion.tmpl`** — one assertion test. Must include the + `obligation_id` somewhere in a comment for traceability. +3. **`pbt-property.tmpl`** — one property-based test. +4. **`state-machine.tmpl`** — a state-machine test (RuleBasedStateMachine + in Hypothesis, `fc.modelRun` / `commands` in fast-check, etc.). +5. **`stub-unresolved.tmpl`** — a skipped test with a TODO block listing + `bridge.candidates` and `preconditions`. Must use the backend's + `skip_marker` so Stage C can identify it in the runner report. +6. **`fixture.tmpl`** — one fixture/factory entry. Used when + `fixtures_required` references a name not already present in the + backend's fixture location. + +## Runner format adapter + +Stage C (`run-suite.mjs`) needs a parser that maps the runner's JSON +output to the same internal `{pass, fail, error, skipped}` categorisation. +Each adapter is keyed by `manifest.runner.report_format`. + +Adapter contract: + +```js +// run-suite.mjs adapter +function adapt(rawReport) { + return { + results: [ + { test_id: "tests/test_x.py::test_foo", outcome: "pass" }, + { test_id: "tests/test_y.py::test_bar", outcome: "skipped", + markers: ["bridge-unresolved"] }, + { test_id: "tests/test_z.py::test_baz", outcome: "error", + kind: "ImportError", message: "no module …" } + ] + }; +} +``` + +Outcome values: `pass | fail | error | skipped`. + +Add a new adapter when you add a new backend whose runner emits a JSON +report format Stage C doesn't yet understand. The v1 baseline ships +`pytest-jsonl` only; `jest-json` is a planned addition. + +## Versioning and breaking changes + +- `manifest_version` is bumped when the translator changes the contract + in a non-additive way. +- Adding new optional fields to `manifest.json` is **not** breaking. +- Adding new template placeholders is **not** breaking provided existing + templates render the same output. +- Removing or renaming a placeholder, changing the rendering context + shape, or changing the meaning of an existing field **is** breaking + and requires a bump. + +## Adding a backend: checklist + +- [ ] Create `backends//` with the four artefacts. +- [ ] Pick a `runner.report_format`; if unfamiliar, add the adapter in + `scripts/run-suite.mjs` and unit-test it against a captured + runner-output sample. +- [ ] Author the six templates against a hand-written 4-obligation + merged inventory; render and confirm the output parses with the + runner's `--collect-only` (or equivalent dry-parse). +- [ ] Add a `conventions.md` section telling Stage A subagents what a + valid `` looks like. +- [ ] Run the translator on a real inventory twice and confirm + byte-identical output. diff --git a/skills/propagate/references/obligation-bridge-schema.md b/skills/propagate/references/obligation-bridge-schema.md new file mode 100644 index 0000000..75a81c3 --- /dev/null +++ b/skills/propagate/references/obligation-bridge-schema.md @@ -0,0 +1,252 @@ +# Obligation-bridge inventory schema (language-agnostic) + +This document is the contract between **Stage A subagents** and the **Stage B +canonicaliser** for the deterministic `propagate` pipeline. A Stage A subagent +reads: + +- the Allium spec +- `allium plan ` output (the authoritative obligation list) +- `allium model ` output (entity shapes, transition graphs) +- the target codebase + +…and emits exactly one JSON file matching the schema below. K such files are +merged by `merge-obligations.mjs` into a consensus inventory; the translator +(`obligations-to-tests.mjs`) then renders that inventory through a backend. + +The schema is **language-agnostic**. Per-language idioms live in +`backends//{manifest.json, name-policy.json, conventions.md, +templates/}` — see [`backend-authoring-guide.md`](./backend-authoring-guide.md). +This document tells you *what* to extract; the chosen backend's +`conventions.md` tells you *how* the symbols and identifiers should look. + +## Top-level shape + +```jsonc +{ + "spec_path": "fixtures/insurance-claims/allium-distilled/spec.allium", + "code_root": "fixtures/insurance-claims", + "framework": "pytest+hypothesis", + "obligations": [ /* one entry per obligation_id from `allium plan` */ ], + "transition_graph": { /* verbatim subset of `allium model` */ } +} +``` + +### Top-level fields + +| Field | Type | Required | Notes | +|--------------------|----------|----------|---------------------------------------------------------------------------------------| +| `spec_path` | string | yes | Relative path to the `.allium` spec, from the working directory. | +| `code_root` | string | yes | Relative path to the implementation root (paths in `bridge.primary_symbol` are relative to this). | +| `framework` | string | yes | Backend id, matches a directory under `backends/`. The orchestrator passes this in. | +| `obligations` | array | yes | One object per obligation, see below. The set of `obligation_id`s must exactly match `allium plan`. | +| `transition_graph` | object | yes | Map of entity name → list of `{from, to, via_rule}` edges. Copy verbatim from `allium model`'s state-machine section. May be `{}` for specs with no transitions. | + +## Per-obligation shape + +```jsonc +{ + "obligation_id": "rule_success.ApproveClaim", + "test_kind": "state_machine", + "bridge": { + "primary_symbol": "app/services.py::approve_claim", + "candidates": ["app/routes.py::adjuster_approve"], + "confidence": "high" + }, + "preconditions": [ + "Claim.status = assessing", + "Assessment.status = completed" + ], + "fixtures_required": [ + "a_claim_in_assessing_state", + "a_completed_assessment" + ], + "injection_points": ["clock"], + "target_file": "tests/test_claim_approval.py", + "test_name": "test_approve_claim_succeeds_when_assessment_completed" +} +``` + +### Field reference + +#### `obligation_id` — string, required + +The exact obligation id from `allium plan`'s `obligations[].id`. Stage B will +**reject** the inventory if the set of `obligation_id` values does not match +the plan output exactly. No additions; no omissions. + +#### `test_kind` — enum string, required + +Picks the template the translator uses. One of: + +| Value | Use when | +|-----------------|----------------------------------------------------------------------------------------------------------| +| `assertion` | Deterministic, one-shot fact: field shape, enum membership, surface exposure, projection result. | +| `pbt` | Invariant or property holding over generated inputs. The bridge identifies the rule (or chain) to drive. | +| `state_machine` | Obligation derived from a transition graph: walk the graph with generated paths. | +| `temporal` | Deadline-/clock-driven trigger: requires controllable time. `injection_points` must include `clock`. | +| `scenario` | Cross-entity / multi-rule happy-path or edge case scripted as ordered steps. | +| `contract` | `demands`/`fulfils` between two surfaces, or a contract's `@invariant`. | + +The mapping from `allium plan` `category` to `test_kind` is **not** mechanical +(some categories produce different kinds depending on whether the construct +has a transition graph or expression-bearing invariant) — that's why this +field is part of the inventory, voted on by K subagents. + +Suggested defaults (the LLM may override when the obligation warrants it): + +| `category` from `allium plan` | Default `test_kind` | +|-------------------------------|---------------------| +| `entity_fields` | `assertion` | +| `entity_optional` | `assertion` | +| `entity_relationship` | `assertion` | +| `value_equality` | `assertion` | +| `enum_comparable` | `assertion` | +| `derived` | `assertion` | +| `projection` | `assertion` | +| `config_default` | `assertion` | +| `rule_success` | `state_machine` if the rule sits on a graph edge, else `scenario`. | +| `rule_failure` | `assertion` | +| `rule_entity_creation` | `scenario` | +| `invariant` | `pbt` if expression-bearing, else `assertion`. | +| `temporal` | `temporal` | +| `surface_actor` | `assertion` | +| `surface_provides` | `assertion` | +| `contract_signature` | `contract` | + +#### `bridge` — object, required + +Tells the translator which implementation symbol witnesses the obligation. + +```jsonc +{ + "primary_symbol": "app/services.py::approve_claim", + "candidates": ["app/routes.py::adjuster_approve"], + "confidence": "high" +} +``` + +- `primary_symbol`: a string in the universal `::` form, or + `null` if no confident bridge can be identified. + - `` is relative to `code_root`. + - `` is the implementation symbol's name in the project's language. + The backend's `conventions.md` defines what a valid symbol looks like + (Python: bare function name or `ClassName.method`; TypeScript: named + export or `ClassName.method`; future languages: language-appropriate). + - The `::` separator is universal — it is **always** two colons, never `.`, + `::`, `#`, or `:`. The canonicaliser parses on this exact token. +- `candidates`: array of additional `::` strings observed in + the codebase that *could* be the witness. May be empty. **Must not include + the value of `primary_symbol`.** +- `confidence`: + - `"high"` — exactly one obvious witness was found; `candidates` is empty + or contains only deprecated/old forms. + - `"medium"` — a primary is clear, but plausible alternatives exist + (e.g. both a route handler and an underlying service method). + - `"low"` — the witness is genuinely ambiguous. **Only valid when** + `candidates.length >= 2` **or** `primary_symbol` is `null`. The merged + inventory will downgrade to a stub test if K subagents fail to converge + on a single primary. + +#### `preconditions` — array of strings, required + +Prose-form predicates that must hold for the obligation to be tested. Copy +the `requires` clause(s) of the rule witnessing the obligation, in the +form `. = ` or `.?` etc. +The order does not matter (the canonicaliser sorts). + +Example: `["Claim.status = assessing", "Assessment.status = completed"]`. + +For obligations that have no preconditions (e.g. `entity_fields`), use `[]`. + +#### `fixtures_required` — array of strings, required + +Abstract fixture names the test will need. These are *names*, not language- +specific function or builder identifiers — backend templates know how to +turn `"a_claim_in_assessing_state"` into a pytest fixture or a TypeScript +factory call. Naming convention: `a__in__state`, +`a__with_`, `a_` (default). + +Order does not matter. Use `[]` for obligations needing no fixture (most +`entity_fields` / `enum_comparable` obligations). + +#### `injection_points` — array of strings, required + +Abstract test-infrastructure seams the test depends on. Currently defined: + +| Value | Meaning | +|-----------|----------------------------------------------------------------------------------| +| `clock` | Test must control time (deadline tests, temporal triggers). | +| `random` | Test must control randomness (generators with non-determinism upstream). | +| `network` | Test exercises an integration boundary needing a stub/recorded response. | + +The backend's `manifest.json` records how each seam is realised +(`monkeypatch` in Python, `vi.useFakeTimers()` in Jest, etc.). The +translator does not require all subagents to agree exhaustively here — +the merge takes set-union for `injection_points`. + +#### `target_file` and `test_name` — strings, required (but recomputed) + +The LLM fills these in for sanity (so the inventory is reviewable on its +own), but the canonicaliser **overwrites both** using the backend's +`name-policy.json`. They exist in the schema for two reasons: + +1. They make a single inventory readable without consulting the backend. +2. They give Stage B a fallback when `name-policy.json` is missing a rule. + +Treat the LLM-supplied values as advisory. Naming is determined by the +backend, not by the model. + +## `transition_graph` — object, required + +Verbatim copy of the relevant section of `allium model`'s output. Keys are +entity names; values are arrays of edges: + +```jsonc +{ + "Claim": [ + { "from": "submitted", "to": "triaged", "via_rule": "TriageClaim" }, + { "from": "triaged", "to": "assessing", "via_rule": "AssignAssessor" }, + { "from": "assessing", "to": "approved", "via_rule": "ApproveClaim" } + ], + "Assessment": [ /* ... */ ] +} +``` + +If `allium model` reports no transition graph for any entity, use `{}`. +The merge step expects unanimous agreement here (since `allium model` is +deterministic); any divergence is logged as a warning. + +## Language-agnostic invariants + +The Stage B canonicaliser **rejects** any inventory that violates these: + +1. The set of `obligations[].obligation_id` values is exactly the set of + `obligations[].id` values from `allium plan`. No additions; no omissions. +2. `bridge.confidence: "low"` is only valid when `candidates.length >= 2` + or `primary_symbol` is `null`. +3. `bridge.candidates` does not contain `bridge.primary_symbol`. +4. `framework` refers to an existing directory under `backends/`. +5. Every `` in a `::` exists relative to `code_root`. + (The canonicaliser does **not** verify that `` exists in the + file — that's the runner's job in Stage C.) + +## Self-check before emitting + +Run this checklist mentally before writing the file: + +- [ ] `obligations[]` has exactly one entry per `obligation_id` in `allium plan`. +- [ ] Every `bridge.primary_symbol` uses `::` with exactly two + colons as the separator. +- [ ] Every `bridge.primary_symbol` path is relative to `code_root`. +- [ ] `bridge.confidence: "low"` is paired with multiple candidates or a + null primary. +- [ ] `preconditions[]` use `.` form, not implementation + paths or `` names. +- [ ] `fixtures_required[]` and `injection_points[]` use the abstract names + from this document, not backend-specific identifiers. +- [ ] `transition_graph` matches `allium model` for every entity referenced + by a `state_machine` obligation. +- [ ] `framework` matches a real backend directory. + +If any check fails, the canonicaliser will reject the inventory and Stage B +will discard this sample from the consensus. From 2893ff3302a9a63195622d0c8118000d2f444b0d Mon Sep 17 00:00:00 2001 From: Yavor Panayotov Date: Sun, 17 May 2026 21:26:16 +0300 Subject: [PATCH 2/2] propagate: pytest+hypothesis and jest+fastcheck reference backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two reference backends prove the dispatcher works across (language × test framework × PBT framework) combinations. Each backend is a self-contained directory under skills/propagate/backends// with no translator-side code changes required to add it. Each backend consists of: - manifest.json declares language, file extension, runner command, report format, imports lists per test_kind, and the named bridge_import transform that turns :: into an idiomatic import line. - name-policy.json declares casing rules and the file/test name patterns the canonicaliser applies. - conventions.md human-readable guidance for Stage A subagents on how to populate the bridge field for this language. - templates/ six placeholder-driven templates (test-file, assertion, pbt-property, state-machine, stub-unresolved, fixture). pytest+hypothesis: - python_module bridge import (app/services.py::approve_claim -> "from app.services import approve_claim") - conftest fixture style - pytest-junitxml runner adapter jest+fastcheck: - typescript_relative bridge import (rewrites relative to the test file's location, e.g. "../src/services/claim") - in-file fixture style (factories declared next to tests) - jest-json runner adapter A third backend is the documented exercise in references/backend-authoring-guide.md (no translator changes required). --- .../backends/jest+fastcheck/conventions.md | 122 ++++++++++++++++++ .../backends/jest+fastcheck/manifest.json | 28 ++++ .../backends/jest+fastcheck/name-policy.json | 7 + .../jest+fastcheck/templates/assertion.tmpl | 13 ++ .../jest+fastcheck/templates/fixture.tmpl | 6 + .../templates/pbt-property.tmpl | 18 +++ .../templates/state-machine.tmpl | 17 +++ .../templates/stub-unresolved.tmpl | 14 ++ .../jest+fastcheck/templates/test-file.tmpl | 9 ++ .../backends/pytest+hypothesis/conventions.md | 117 +++++++++++++++++ .../backends/pytest+hypothesis/manifest.json | 31 +++++ .../pytest+hypothesis/name-policy.json | 7 + .../templates/assertion.tmpl | 15 +++ .../pytest+hypothesis/templates/fixture.tmpl | 8 ++ .../templates/pbt-property.tmpl | 17 +++ .../templates/state-machine.tmpl | 24 ++++ .../templates/stub-unresolved.tmpl | 15 +++ .../templates/test-file.tmpl | 10 ++ 18 files changed, 478 insertions(+) create mode 100644 skills/propagate/backends/jest+fastcheck/conventions.md create mode 100644 skills/propagate/backends/jest+fastcheck/manifest.json create mode 100644 skills/propagate/backends/jest+fastcheck/name-policy.json create mode 100644 skills/propagate/backends/jest+fastcheck/templates/assertion.tmpl create mode 100644 skills/propagate/backends/jest+fastcheck/templates/fixture.tmpl create mode 100644 skills/propagate/backends/jest+fastcheck/templates/pbt-property.tmpl create mode 100644 skills/propagate/backends/jest+fastcheck/templates/state-machine.tmpl create mode 100644 skills/propagate/backends/jest+fastcheck/templates/stub-unresolved.tmpl create mode 100644 skills/propagate/backends/jest+fastcheck/templates/test-file.tmpl create mode 100644 skills/propagate/backends/pytest+hypothesis/conventions.md create mode 100644 skills/propagate/backends/pytest+hypothesis/manifest.json create mode 100644 skills/propagate/backends/pytest+hypothesis/name-policy.json create mode 100644 skills/propagate/backends/pytest+hypothesis/templates/assertion.tmpl create mode 100644 skills/propagate/backends/pytest+hypothesis/templates/fixture.tmpl create mode 100644 skills/propagate/backends/pytest+hypothesis/templates/pbt-property.tmpl create mode 100644 skills/propagate/backends/pytest+hypothesis/templates/state-machine.tmpl create mode 100644 skills/propagate/backends/pytest+hypothesis/templates/stub-unresolved.tmpl create mode 100644 skills/propagate/backends/pytest+hypothesis/templates/test-file.tmpl diff --git a/skills/propagate/backends/jest+fastcheck/conventions.md b/skills/propagate/backends/jest+fastcheck/conventions.md new file mode 100644 index 0000000..67e147f --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/conventions.md @@ -0,0 +1,122 @@ +# jest+fastcheck backend conventions + +This document tells Stage A subagents how to populate the +obligation-bridge inventory's `bridge` fields when targeting a TypeScript +codebase tested with Jest + fast-check. The schema is defined in +[`../../references/obligation-bridge-schema.md`](../../references/obligation-bridge-schema.md); +this file covers the *TypeScript-specific* parts. + +## Symbol form + +A bridge uses the universal `::` form. For TypeScript: + +- `` is a relative path from the inventory's `code_root` to a + `.ts` (or `.tsx` / `.js` / `.jsx`) file. Example: `src/services/builds.ts`. +- `` is the named export that witnesses the obligation: + - **Named function export**: bare name. Example: `startBuild`. + - **Method on an exported class**: `ClassName.method`. Example: + `BuildService.start`. + - **Exported const / enum / type**: bare identifier name. Example: + `ARTIFACT_TTL_MS`, `BuildStatus`. + - **Default export**: use the bare identifier the file exports; + the import line will adjust accordingly. + +No leading `module/`, no `function ` or `class ` keyword, no +parentheses, no type annotations. Pure exported identifier. + +### Examples + +| Construct | `bridge.primary_symbol` | +|----------------------------------------------------------------------|-----------------------------------------------| +| `export function startBuild(p: Pipeline): Build { ... }` | `src/services/builds.ts::startBuild` | +| `export class BuildService { start(p: Pipeline): Build { ... } }` | `src/services/builds.ts::BuildService.start` | +| Route handler `app.post("/builds/:id/cancel", cancelBuild)` | `src/routes.ts::cancelBuild` | +| `export const ARTIFACT_TTL_MS = 7 * 24 * 60 * 60 * 1000;` | `src/models.ts::ARTIFACT_TTL_MS` | +| `export enum BuildStatus { ... }` | `src/models.ts::BuildStatus` | +| Scheduled job `export function expireOldArtifacts(): void { ... }` | `src/jobs.ts::expireOldArtifacts` | + +## Directory layout assumed by templates + +``` +/ +├── src/ ← implementation +│ ├── models.ts +│ ├── routes.ts +│ ├── webhooks.ts +│ ├── jobs.ts +│ ├── services/ +│ │ ├── builds.ts +│ │ └── artifacts.ts +│ └── integrations/ +│ └── storage.ts +├── tests/ ← propagate writes here +│ ├── pipeline.test.ts +│ ├── build.test.ts +│ └── ... +├── package.json +└── tsconfig.json +``` + +If the target project deviates (e.g. `__tests__/` directories +co-located with the source rather than a top-level `tests/`), v1 will +still write under `tests/`. + +## Test-infrastructure assumptions + +- **Test framework**: Jest. The runner command is `npx jest`. +- **PBT framework**: fast-check. State-machine tests use `fc.commands` + + `fc.modelRun`. +- **Fixtures**: in-file factory functions. The translator writes a + `const = () => null;` stub into each test file that + references a fixture; engineers replace these with real factories. + (There is no auto-injection like pytest's `conftest.py`.) +- **Module resolution**: imports use relative paths. The translator + rewrites `` (relative to `code_root`) into a path relative to + the test file's location. + +## Injection points + +| `injection_points[]` value | Idiom in generated tests | +|----------------------------|------------------------------------| +| `clock` | `jest.useFakeTimers()` | +| `random` | `jest.spyOn(Math, "random")` | +| `network` | `jest.spyOn(, "")` | + +These are passed to templates as `{{injection.clock}}` etc., resolved +from `manifest.json`'s `*_injection` fields. A project using `vitest` +or `sinon` instead of jest's built-ins can fork this backend with +adjusted manifest values. + +## Stub form + +When `bridge.confidence` resolves to `"low"` in the merged inventory, +the translator emits a skipped test using +`test.skip(" [bridge-unresolved]", () => {...})`. The string +`bridge-unresolved` is the `skip_marker` from `manifest.json` and is +what Stage C greps for in the runner report. + +Example: + +```typescript +test.skip("startBuild succeeds when pipeline active [bridge-unresolved]", () => { + // TODO: bridge unresolved + // candidates: + // - src/services/builds.ts::startBuild + // - src/routes.ts::startBuildRoute + // preconditions: + // - Pipeline.status = active +}); +``` + +## Self-check for Stage A subagents + +- [ ] Every `bridge.primary_symbol` parses as `::` with + exactly two colons. +- [ ] Every `` exists relative to `code_root` and ends in one + of `.ts`, `.tsx`, `.js`, `.jsx`. +- [ ] Every `` is a valid TypeScript identifier or + `ClassName.method` chain — no parentheses, no `function`/`class` + keyword, no decorators. +- [ ] `fixtures_required[]` uses abstract names; no + `() => {}`-shaped strings. +- [ ] `injection_points[]` uses one of `clock`, `random`, `network`. diff --git a/skills/propagate/backends/jest+fastcheck/manifest.json b/skills/propagate/backends/jest+fastcheck/manifest.json new file mode 100644 index 0000000..e279949 --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/manifest.json @@ -0,0 +1,28 @@ +{ + "manifest_version": 1, + "id": "jest+fastcheck", + "language": "typescript", + "file_extension": ".test.ts", + "test_file_prefix": "", + "runner": { + "command": ["npx", "jest", "--json", "--outputFile={report_path}"], + "report_format": "jest-json", + "scope_args": ["{test_root}"] + }, + "imports_style": "typescript", + "imports": { + "base": ["import fc from \"fast-check\";"], + "pbt": [], + "state_machine": [], + "temporal": [] + }, + "bridge_import": { + "transform": "typescript_relative" + }, + "fixture_style": "in-file", + "stub_idiom": "test.skip", + "skip_marker": "bridge-unresolved", + "clock_injection": "jest.useFakeTimers()", + "random_injection": "jest.spyOn(Math, \"random\")", + "network_injection": "jest.spyOn" +} diff --git a/skills/propagate/backends/jest+fastcheck/name-policy.json b/skills/propagate/backends/jest+fastcheck/name-policy.json new file mode 100644 index 0000000..2d1761b --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/name-policy.json @@ -0,0 +1,7 @@ +{ + "test_name_case": "snake", + "file_name_case": "kebab", + "directory_layout": "tests/", + "file_pattern": "{obligation_subject}{file_extension}", + "test_name_pattern": "{obligation_id_slug}" +} diff --git a/skills/propagate/backends/jest+fastcheck/templates/assertion.tmpl b/skills/propagate/backends/jest+fastcheck/templates/assertion.tmpl new file mode 100644 index 0000000..f7f7bc7 --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/templates/assertion.tmpl @@ -0,0 +1,13 @@ +test("{{test_name}}", () => { + // obligation: {{obligation.obligation_id}} + // bridge: {{bridge.primary_symbol}}{{#if preconditions}} + // preconditions: +{{#each preconditions}} // - {{it}} +{{/each}}{{/if}} +{{#each fixtures_required}} const {{it}}_value = {{it}}(); +{{/each}} + // TODO: invoke {{bridge.primary_symbol}} and assert the obligation holds. + // The import above validates the bridge symbol exists (compile-time); + // replace the body below with a real runtime assertion. + expect(true).toBe(true); +}); \ No newline at end of file diff --git a/skills/propagate/backends/jest+fastcheck/templates/fixture.tmpl b/skills/propagate/backends/jest+fastcheck/templates/fixture.tmpl new file mode 100644 index 0000000..4c2b215 --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/templates/fixture.tmpl @@ -0,0 +1,6 @@ +// Auto-generated fixture factory for '{{fixture_name}}'. +// TODO: replace this stub with a real factory matching the project's +// existing test conventions. +function {{fixture_name}}(): unknown { + return null; +} \ No newline at end of file diff --git a/skills/propagate/backends/jest+fastcheck/templates/pbt-property.tmpl b/skills/propagate/backends/jest+fastcheck/templates/pbt-property.tmpl new file mode 100644 index 0000000..47acb82 --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/templates/pbt-property.tmpl @@ -0,0 +1,18 @@ +test("{{test_name}}", () => { + // obligation: {{obligation.obligation_id}} + // property test — invariant must hold across generated states. + // bridge: {{bridge.primary_symbol}}{{#if preconditions}} + // preconditions: +{{#each preconditions}} // - {{it}} +{{/each}}{{/if}} +{{#each fixtures_required}} const {{it}}_value = {{it}}(); +{{/each}} + // TODO: replace fc.anything() with a generator that builds inputs + // satisfying the preconditions, then call {{bridge.primary_symbol}} + // and assert the invariant. + fc.assert( + fc.property(fc.anything(), (state: unknown) => { + return state !== undefined || state === undefined; + }), + ); +}); \ No newline at end of file diff --git a/skills/propagate/backends/jest+fastcheck/templates/state-machine.tmpl b/skills/propagate/backends/jest+fastcheck/templates/state-machine.tmpl new file mode 100644 index 0000000..69b7c6f --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/templates/state-machine.tmpl @@ -0,0 +1,17 @@ +describe("{{state_machine_class_name}}", () => { + // obligation: {{obligation.obligation_id}} + // + // Walks the declared transition graph using fc.commands; each edge is a + // command that calls the witnessing function and asserts the entity + // reaches the target state. + // + // bridge: {{bridge.primary_symbol}} + +{{#each transition_graph_for_entity}} test("{{test_name}}__{{it.from}}_to_{{it.to}}", () => { + // via_rule: {{it.via_rule}} + // TODO: drive {{bridge.primary_symbol}} (or the specific witness + // for {{it.via_rule}}) from state '{{it.from}}' to '{{it.to}}'. + expect(true).toBe(true); + }); + +{{/each}}}); \ No newline at end of file diff --git a/skills/propagate/backends/jest+fastcheck/templates/stub-unresolved.tmpl b/skills/propagate/backends/jest+fastcheck/templates/stub-unresolved.tmpl new file mode 100644 index 0000000..b0e508e --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/templates/stub-unresolved.tmpl @@ -0,0 +1,14 @@ +test.skip("{{test_name}} [{{manifest.skip_marker}}]", () => { + // TODO: bridge unresolved + // + // obligation: {{obligation.obligation_id}} + // test_kind: {{obligation.test_kind}} +{{#if bridge.candidates}} // + // candidates: +{{#each bridge.candidates}} // - {{it}} +{{/each}}{{/if}}{{#if bridge.primary_symbol}} // + // primary (low confidence): {{bridge.primary_symbol}} +{{/if}}{{#if preconditions}} // + // preconditions: +{{#each preconditions}} // - {{it}} +{{/each}}{{/if}}}); \ No newline at end of file diff --git a/skills/propagate/backends/jest+fastcheck/templates/test-file.tmpl b/skills/propagate/backends/jest+fastcheck/templates/test-file.tmpl new file mode 100644 index 0000000..d155a70 --- /dev/null +++ b/skills/propagate/backends/jest+fastcheck/templates/test-file.tmpl @@ -0,0 +1,9 @@ +{{!Generated by propagate (deterministic pipeline). Do not edit by hand.}} +{{#each imports}}{{it}} +{{/each}} +{{#each fixtures}}{{it}} + +{{/each}} +{{#each tests}}{{it}} + +{{/each}} \ No newline at end of file diff --git a/skills/propagate/backends/pytest+hypothesis/conventions.md b/skills/propagate/backends/pytest+hypothesis/conventions.md new file mode 100644 index 0000000..a02806a --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/conventions.md @@ -0,0 +1,117 @@ +# pytest+hypothesis backend conventions + +This document tells Stage A subagents how to populate the +obligation-bridge inventory's `bridge` fields when targeting a Python +codebase tested with pytest + Hypothesis. The schema is defined in +[`../../references/obligation-bridge-schema.md`](../../references/obligation-bridge-schema.md); +this file covers the *Python-specific* parts. + +## Symbol form + +A bridge uses the universal `::` form. For Python: + +- `` is a relative path from the inventory's `code_root` to a `.py` + file. Example: `app/services.py`. +- `` is the function or method that witnesses the obligation: + - **Free function**: bare function name. Example: `approve_claim`. + - **Method on a class**: `ClassName.method_name`. Example: + `ClaimService.approve`. + - **Nested classes**: `Outer.Inner.method`. + - **Module-level constant / dataclass / enum**: bare identifier name. + Example: `MAX_CLAIM_AMOUNT_PENCE`, `ClaimStatus`. + +No leading `module.`, no `def `, no parentheses, no type annotations. +Pure identifier. + +### Examples + +| Construct | `bridge.primary_symbol` | +|----------------------------------------------------|----------------------------------------| +| `def approve_claim(claim, assessor): ...` | `app/services.py::approve_claim` | +| `class ClaimService: def approve(self, c): ...` | `app/services.py::ClaimService.approve`| +| Route handler `@app.post("/claims/{id}/approve")` | `app/routes.py::approve_route` (function name) | +| Hypothesis-style generator | `tests/builders.py::build_claim` | +| Scheduled job `def stalled_claim_sweep(): ...` | `app/jobs.py::stalled_claim_sweep` | + +## Directory layout assumed by templates + +``` +/ +├── app/ ← implementation +│ ├── models.py +│ ├── routes.py +│ ├── services.py +│ ├── jobs.py +│ └── webhooks.py +├── tests/ ← propagate writes here +│ ├── conftest.py ← fixtures land here (see fixture_style: conftest) +│ ├── test_claim.py +│ └── ... +└── pyproject.toml +``` + +If the target project deviates significantly (e.g. tests live in +`/test/` instead of `/tests/`, or sources live +directly at `code_root` without an `app/` directory), v1 will still +write under `tests/` — see plan's "Non-goals (v1)" entry on convention +overrides. + +## Test-infrastructure assumptions + +- **Test framework**: pytest. The runner command is `python -m pytest`. +- **PBT framework**: Hypothesis. State-machine tests use + `hypothesis.stateful.RuleBasedStateMachine`. +- **Fixtures**: pytest fixtures in `tests/conftest.py`. The translator + writes new fixtures to `conftest.py` only when `fixtures_required[]` + names a fixture not already declared. + +## Injection points + +| `injection_points[]` value | Idiom in generated tests | +|----------------------------|-----------------------------------------------------------------| +| `clock` | `monkeypatch.setattr("app.services.now", lambda: )` | +| `random` | `monkeypatch.setattr("random.random", lambda: )` | +| `network` | `monkeypatch.setattr` against the integration module's helpers | + +These are passed to templates as `{{injection.clock}}` etc., resolved +from `manifest.json`'s `*_injection` fields. The default for all three +is `monkeypatch`. A project using a more specialised library (`freezegun`, +`respx`, …) can fork this backend with adjusted manifest values. + +## Stub form + +When `bridge.confidence` resolves to `"low"` in the merged inventory, +the translator emits a skipped test using +`pytest.skip("bridge-unresolved")`. The string `bridge-unresolved` is the +`skip_marker` from `manifest.json` and is what Stage C greps for in the +runner report. Engineers reading the test see a TODO block with the +candidate symbols and preconditions. + +Example: + +```python +def test_approve_claim_succeeds_when_assessment_completed(): + """TODO: bridge unresolved + + candidates: + - app/services.py::approve_claim + - app/routes.py::adjuster_approve_route + + preconditions: + - Claim.status = assessing + - Assessment.status = completed + """ + pytest.skip("bridge-unresolved") +``` + +## Self-check for Stage A subagents + +- [ ] Every `bridge.primary_symbol` parses as `::` with + exactly two colons. +- [ ] Every `` exists relative to `code_root` and ends in `.py`. +- [ ] Every `` is a valid Python identifier or + `ClassName.method_name` chain — no parentheses, no `def`, no + decorators. +- [ ] `fixtures_required[]` uses abstract names; no `fixture_` prefix, + no `@pytest.fixture`-shaped strings. +- [ ] `injection_points[]` uses one of `clock`, `random`, `network`. diff --git a/skills/propagate/backends/pytest+hypothesis/manifest.json b/skills/propagate/backends/pytest+hypothesis/manifest.json new file mode 100644 index 0000000..ab0fd0c --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/manifest.json @@ -0,0 +1,31 @@ +{ + "manifest_version": 1, + "id": "pytest+hypothesis", + "language": "python", + "file_extension": ".py", + "test_file_prefix": "test_", + "runner": { + "command": ["python3", "-m", "pytest", "--junit-xml={report_path}"], + "report_format": "pytest-junitxml", + "scope_args": ["{test_root}"] + }, + "imports_style": "python", + "imports": { + "base": ["import pytest"], + "pbt": ["from hypothesis import HealthCheck, assume, given, settings, strategies as st"], + "state_machine": [ + "from hypothesis import strategies as st", + "from hypothesis.stateful import RuleBasedStateMachine, rule" + ], + "temporal": ["from hypothesis import HealthCheck, assume, given, settings, strategies as st"] + }, + "bridge_import": { + "transform": "python_module" + }, + "fixture_style": "conftest", + "stub_idiom": "pytest.skip", + "skip_marker": "bridge-unresolved", + "clock_injection": "monkeypatch", + "random_injection": "monkeypatch", + "network_injection": "monkeypatch" +} diff --git a/skills/propagate/backends/pytest+hypothesis/name-policy.json b/skills/propagate/backends/pytest+hypothesis/name-policy.json new file mode 100644 index 0000000..f1f5322 --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/name-policy.json @@ -0,0 +1,7 @@ +{ + "test_name_case": "snake", + "file_name_case": "snake", + "directory_layout": "tests/", + "file_pattern": "test_{obligation_subject}{file_extension}", + "test_name_pattern": "test_{obligation_id_slug}" +} diff --git a/skills/propagate/backends/pytest+hypothesis/templates/assertion.tmpl b/skills/propagate/backends/pytest+hypothesis/templates/assertion.tmpl new file mode 100644 index 0000000..2395d79 --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/templates/assertion.tmpl @@ -0,0 +1,15 @@ +def {{test_name}}({{#each fixtures_required}}{{#if index}}, {{/if}}{{it}}{{/each}}): + """obligation: {{obligation.obligation_id}} + + bridge: {{bridge.primary_symbol}}{{#if preconditions}} + + preconditions: +{{#each preconditions}} - {{it}} +{{/each}}{{/if}} + """ + # TODO: invoke {{bridge.primary_symbol}} and assert the obligation holds. + # This stub is structurally complete: fixtures, bridge, and preconditions + # are wired in. Fill in the assertion body against the implementation. + assert {{bridge.primary_symbol_local}} is not None, ( + "obligation {{obligation.obligation_id}} witness {{bridge.primary_symbol}} not importable" + ) \ No newline at end of file diff --git a/skills/propagate/backends/pytest+hypothesis/templates/fixture.tmpl b/skills/propagate/backends/pytest+hypothesis/templates/fixture.tmpl new file mode 100644 index 0000000..d28924a --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/templates/fixture.tmpl @@ -0,0 +1,8 @@ +@pytest.fixture +def {{fixture_name}}(): + """Auto-generated fixture for obligation references to '{{fixture_name}}'. + + TODO: replace this stub with a real factory once the project's + existing fixture conventions are known. + """ + return None \ No newline at end of file diff --git a/skills/propagate/backends/pytest+hypothesis/templates/pbt-property.tmpl b/skills/propagate/backends/pytest+hypothesis/templates/pbt-property.tmpl new file mode 100644 index 0000000..324807b --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/templates/pbt-property.tmpl @@ -0,0 +1,17 @@ +@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) +@given(state=st.builds(dict)) +def {{test_name}}(state{{#each fixtures_required}}, {{it}}{{/each}}): + """obligation: {{obligation.obligation_id}} + + property test — invariant must hold across generated states. + + bridge: {{bridge.primary_symbol}} + {{#if preconditions}}preconditions: + {{#each preconditions}} - {{it}} + {{/each}}{{/if}} + """ + # TODO: replace the placeholder state strategy above with a real + # generator that builds inputs satisfying preconditions, then call + # {{bridge.primary_symbol_local}} and assert the invariant. + assume(state is not None) + assert {{bridge.primary_symbol_local}} is not None \ No newline at end of file diff --git a/skills/propagate/backends/pytest+hypothesis/templates/state-machine.tmpl b/skills/propagate/backends/pytest+hypothesis/templates/state-machine.tmpl new file mode 100644 index 0000000..35d76e6 --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/templates/state-machine.tmpl @@ -0,0 +1,24 @@ +class {{state_machine_class_name}}(RuleBasedStateMachine): + """obligation: {{obligation.obligation_id}} + + Walks the declared transition graph; each edge is a Hypothesis rule + that calls the witnessing function and asserts the entity reaches + the target state. + + bridge: {{bridge.primary_symbol}} + """ + + def __init__(self): + super().__init__() + self.entity = None + + {{#each transition_graph_for_entity}}@rule() + def transition_{{it.from}}_to_{{it.to}}(self): + # via_rule: {{it.via_rule}} + # TODO: drive {{bridge.primary_symbol_local}} (or the specific witness + # for {{it.via_rule}}) from state '{{it.from}}' to '{{it.to}}'. + pass + + {{/each}} + +{{test_name}} = {{state_machine_class_name}}.TestCase \ No newline at end of file diff --git a/skills/propagate/backends/pytest+hypothesis/templates/stub-unresolved.tmpl b/skills/propagate/backends/pytest+hypothesis/templates/stub-unresolved.tmpl new file mode 100644 index 0000000..d2d35c4 --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/templates/stub-unresolved.tmpl @@ -0,0 +1,15 @@ +def {{test_name}}({{#each fixtures_required}}{{#if index}}, {{/if}}{{it}}{{/each}}): + """TODO: bridge unresolved + + obligation: {{obligation.obligation_id}} + test_kind: {{obligation.test_kind}} +{{#if bridge.candidates}} + candidates: +{{#each bridge.candidates}} - {{it}} +{{/each}}{{/if}}{{#if bridge.primary_symbol}} + primary (low confidence): {{bridge.primary_symbol}} +{{/if}}{{#if preconditions}} + preconditions: +{{#each preconditions}} - {{it}} +{{/each}}{{/if}} """ + pytest.skip("{{manifest.skip_marker}}") \ No newline at end of file diff --git a/skills/propagate/backends/pytest+hypothesis/templates/test-file.tmpl b/skills/propagate/backends/pytest+hypothesis/templates/test-file.tmpl new file mode 100644 index 0000000..4688dc9 --- /dev/null +++ b/skills/propagate/backends/pytest+hypothesis/templates/test-file.tmpl @@ -0,0 +1,10 @@ +{{!Generated by propagate (deterministic pipeline). Do not edit by hand.}} +{{#each imports}}{{it}} +{{/each}} + +{{#each fixtures}}{{it}} + +{{/each}} +{{#each tests}}{{it}} + +{{/each}} \ No newline at end of file