diff --git a/internal/hostneutrality/code_source_test.go b/internal/hostneutrality/code_source_test.go new file mode 100644 index 00000000..cec1f1d8 --- /dev/null +++ b/internal/hostneutrality/code_source_test.go @@ -0,0 +1,103 @@ +// ABOUTME: Code-derived sources the hostneutrality re-binds bind to — host env-var +// ABOUTME: names and dispatch subcommands AST-extracted from the binary, so a check's expectation has an independent source. +package hostneutrality + +import ( + "go/ast" + "go/parser" + "go/token" + "path/filepath" + "testing" +) + +// repoRoot is the project root (two levels up from this package's source dir). +func repoRoot() string { + return filepath.Join("..", "..") +} + +// hostEnvVar AST-extracts the host-derivation env-var name the binary reads from +// internal/dispatch/build.go (the `getenv("CODEX_THREAD_ID")` / "CLAUDECODE" +// selectors). It returns the name if the binary reads it, else "". This is the +// independent source for "the skill branches on the same env var the binary reads": +// if the binary stops reading the var, or the skill stops branching on it, the two +// diverge and a check binding to this reds. +func hostEnvVar(t *testing.T, name string) string { + t.Helper() + src := filepath.Join(repoRoot(), "internal", "dispatch", "build.go") + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, src, nil, 0) + if err != nil { + t.Fatalf("parse build.go: %v", err) + } + found := "" + ast.Inspect(f, func(n ast.Node) bool { + lit, ok := n.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + if trimLit(lit.Value) == name { + found = name + return false + } + return true + }) + return found +} + +// dispatchSubcommands AST-extracts the dispatch subcommand names the binary routes +// from internal/dispatch/dispatch.go's Run switch — the independent source for the +// claude-helper / relocated-command checks (so a renamed subcommand shifts the set +// rather than the test self-matching a frozen literal). +func dispatchSubcommands(t *testing.T) map[string]bool { + t.Helper() + src := filepath.Join(repoRoot(), "internal", "dispatch", "dispatch.go") + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, src, nil, 0) + if err != nil { + t.Fatalf("parse dispatch.go: %v", err) + } + subs := map[string]bool{} + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok || fn.Name.Name != "Run" { + return true + } + ast.Inspect(fn, func(m ast.Node) bool { + sw, ok := m.(*ast.SwitchStmt) + if !ok { + return true + } + tag, ok := sw.Tag.(*ast.IndexExpr) + if !ok { + return true + } + if id, ok := tag.X.(*ast.Ident); !ok || id.Name != "args" { + return true + } + for _, stmt := range sw.Body.List { + cc, ok := stmt.(*ast.CaseClause) + if !ok { + continue + } + for _, e := range cc.List { + if lit, ok := e.(*ast.BasicLit); ok && lit.Kind == token.STRING { + subs[trimLit(lit.Value)] = true + } + } + } + return false + }) + return false + }) + if len(subs) == 0 { + t.Fatal("extracted zero dispatch subcommands from dispatch.go") + } + return subs +} + +func trimLit(s string) string { + if len(s) >= 2 && (s[0] == '"' || s[0] == '`') { + return s[1 : len(s)-1] + } + return s +} diff --git a/internal/hostneutrality/codex_runtime_contract_test.go b/internal/hostneutrality/codex_runtime_contract_test.go index 84553a5a..716291e7 100644 --- a/internal/hostneutrality/codex_runtime_contract_test.go +++ b/internal/hostneutrality/codex_runtime_contract_test.go @@ -9,7 +9,20 @@ import ( "testing" ) +// TestCodexRuntimeAdaptersAreLoadable is a code-bound invariant: each runtime +// SKILL.md branches on the SAME host env var the binary reads +// (CODEX_THREAD_ID, AST-extracted from internal/dispatch/build.go) and loads its +// Codex adapter file. The env-var expectation comes from the binary's +// host-derivation code, not a literal frozen against the skill — if the binary +// stops reading CODEX_THREAD_ID, or the skill stops branching on it, the two +// diverge and this reds. The adapter-content tokens are the remaining +// text-consistency portion. func TestCodexRuntimeAdaptersAreLoadable(t *testing.T) { + markCodeBoundInvariant(t, "hostEnvVar CODEX_THREAD_ID (internal/dispatch/build.go host-derivation)") + envVar := hostEnvVar(t, "CODEX_THREAD_ID") + if envVar == "" { + t.Fatal("the binary no longer reads CODEX_THREAD_ID for host derivation — the env var the skill must branch on is gone") + } root := filepath.Join("..", "..") cases := []struct { name string @@ -32,8 +45,8 @@ func TestCodexRuntimeAdaptersAreLoadable(t *testing.T) { t.Run(tc.name, func(t *testing.T) { skill := readText(t, tc.skillPath) adapterBase := filepath.Base(tc.adapter) - if !strings.Contains(skill, "CODEX_THREAD_ID") || !strings.Contains(skill, adapterBase) { - t.Fatalf("%s SKILL.md must branch on CODEX_THREAD_ID and load %s:\n%s", tc.name, adapterBase, skill) + if !strings.Contains(skill, envVar) || !strings.Contains(skill, adapterBase) { + t.Fatalf("%s SKILL.md must branch on %s (the binary's host-derivation env var) and load %s:\n%s", tc.name, envVar, adapterBase, skill) } body := readText(t, tc.adapter) @@ -46,7 +59,15 @@ func TestCodexRuntimeAdaptersAreLoadable(t *testing.T) { } } +// TestCodexAwaitingCompletionPinsMailboxSemantics is a non-AC text-consistency +// lint: the Codex FO adapter carries the mailbox-wait clauses (async final-status +// notification, wait_agent-timeout-is-normal, do-not-poll). Per the proof policy +// this presence check does NOT prove the FO obeys the mailbox semantics; the +// behavior is exercised by the Codex live runner's awaiting-completion path +// (codex_live_runner_test.go / codex_idle_notification_test.go). This lint guards +// the clauses being dropped from the adapter. func TestCodexAwaitingCompletionPinsMailboxSemantics(t *testing.T) { + markNonAC(t, "Codex live runner awaiting-completion path (internal/ensigncycle codex_live_runner + codex_idle_notification)") body := readText(t, filepath.Join("..", "..", "skills", "first-officer", "references", "codex-first-officer-runtime.md")) for _, want := range []string{ "async final-status notification in the FO mailbox", diff --git a/internal/hostneutrality/ensign_dev_leakage_locks_test.go b/internal/hostneutrality/ensign_dev_leakage_locks_test.go index 50e38e18..039a03a8 100644 --- a/internal/hostneutrality/ensign_dev_leakage_locks_test.go +++ b/internal/hostneutrality/ensign_dev_leakage_locks_test.go @@ -40,6 +40,7 @@ var devLeakageCorePaths = []string{ // code substrate in the worktree-isolation clause. A re-introduction of the // banned literal fails the test (negative proof of lock-in). func TestNoDevLeakageInUniversalCore(t *testing.T) { + markNonAC(t, "text-hygiene lint, NOT a behavioral claim — a property of the text (the universal core stays free of dev-discipline prose). No behavioral oracle: there is nothing for the FO/ensign to DO; the value is catching accidental dev-leakage back into the universal contract.") for _, path := range devLeakageCorePaths { t.Run(filepath.Base(path), func(t *testing.T) { body, err := os.ReadFile(path) @@ -71,6 +72,7 @@ func TestNoDevLeakageInUniversalCore(t *testing.T) { // "CODE only" noun must NOT delete the worktree-isolation boundary — both cores // must still carry an isolation clause naming the worktree. func TestWorktreeIsolationClauseSurvives(t *testing.T) { + markNonAC(t, "text-hygiene lint, NOT a behavioral claim — a property of the text (an isolation clause survives in the cores). No behavioral oracle: the worktree-isolation BEHAVIOR is enforced by the dispatch worktree machinery, not this clause; the lint only guards the clause from being deleted when the substrate noun is neutralized.") for _, path := range devLeakageCorePaths { t.Run(filepath.Base(path), func(t *testing.T) { body, err := os.ReadFile(path) @@ -107,6 +109,7 @@ var runtimeAdapterFieldPaths = []string{ // "worktree path". Scoped to the field-enumeration sentence — a file-wide ban // would false-fail on the legitimate conditional usage elsewhere. func TestRuntimeAdaptersUseNeutralLocationVocabulary(t *testing.T) { + markNonAC(t, "text-hygiene lint, NOT a behavioral claim — a property of the text (the field-enumeration sentence uses neutral location vocabulary). No behavioral oracle and no independent code source: the vocabulary choice is prose hygiene; the lint guards against the banned 'worktree path' wording creeping back.") for _, path := range runtimeAdapterFieldPaths { t.Run(filepath.Base(path), func(t *testing.T) { body, err := os.ReadFile(path) @@ -157,6 +160,7 @@ var devHomePresence = []struct { // checkable change" deliverable-proof policy. Fails if a future edit strips a // dev home's guidance. func TestDevDisciplinesSurviveInDevHomes(t *testing.T) { + markNonAC(t, "text-hygiene lint, NOT a behavioral claim — a property of the text (the re-homed dev guidance survives in its dev home). No behavioral oracle and no independent code source: it is a relocate-not-delete prose consistency check, valued for catching a dev home's guidance being stripped.") for _, h := range devHomePresence { t.Run(filepath.Base(h.path), func(t *testing.T) { body, err := os.ReadFile(h.path) diff --git a/internal/hostneutrality/live_scenario_practice_test.go b/internal/hostneutrality/live_scenario_practice_test.go index 9fdf1413..3a1303cb 100644 --- a/internal/hostneutrality/live_scenario_practice_test.go +++ b/internal/hostneutrality/live_scenario_practice_test.go @@ -26,6 +26,7 @@ var recommendedPracticesSectionRe = regexp.MustCompile(`(?is)## Recommended prac // of presence check that guards the existing recommended-practice blocks; the // claim is about the text itself, so proof at the claim's own level is legit. func TestLiveScenarioRecommendedPracticePresent(t *testing.T) { + markNonAC(t, "n/a — the claim is about the dev-template text itself (the live-scenario practice is documented); proof at the claim's own level") path := filepath.Join("..", "..", "skills", "commission", "references", "templates", "development.md") body, err := os.ReadFile(path) if err != nil { diff --git a/internal/hostneutrality/nonac_marker_test.go b/internal/hostneutrality/nonac_marker_test.go new file mode 100644 index 00000000..4b271e21 --- /dev/null +++ b/internal/hostneutrality/nonac_marker_test.go @@ -0,0 +1,731 @@ +// ABOUTME: The non-AC text-consistency marker + the AC-3 sweep meta-test for the +// ABOUTME: hostneutrality suite — a presence/absence check over an instruction file proves nothing unless it self-classifies. +package hostneutrality + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "strings" + "testing" +) + +// markNonAC declares a test a non-AC text-consistency lint (the prose carries a +// required clause / stays free of a banned token), NOT a behavioral proof, naming +// the behavioral oracle (a live drive, a code-side test, or "n/a — the claim is +// about the text"). The proof policy (f8b257cf) bans a string match over an +// instruction file the model reads as proof of any behavioral acceptance +// criterion. The AC-3 sweep (TestNoUndeclaredHostneutralityTautology) keys on this +// call; it does nothing at runtime. +func markNonAC(t *testing.T, oracle string) { + t.Helper() + if oracle == "" { + t.Fatal("markNonAC requires a non-empty behavioral oracle reference") + } +} + +// markCodeBoundInvariant declares a test's expectation comes from an independent +// code-side source (a Go const, an env-var token the binary defines, a dispatch +// subcommand, a DIFFERENT file's n-gram) that can DIVERGE from the file under +// test — a legitimate invariant, not a tautology. The sweep treats it as declared. +func markCodeBoundInvariant(t *testing.T, source string) { + t.Helper() + if source == "" { + t.Fatal("markCodeBoundInvariant requires a non-empty independent-source reference") + } +} + +// instructionFileReaders are the helpers that read a markdown instruction file the +// model ingests (a skill or contract) — the seed of the reader set the sweep grows +// to a fixpoint. A test that calls one is reading an ingested file (the READ alone +// triggers the must-declare rule; how it then inspects the bytes is irrelevant). +// Tests that scan CODE (host_neutrality_test.go's scanFile over .go files via +// parser.ParseFile, not a content read sink) are NOT in this set, so the sweep does +// not flag the legitimate go/parser code invariants. +var instructionFileReaders = map[string]bool{ + "readSkill": true, + "readText": true, + // The markdown-span parsers read an instruction file internally; a test that + // drives one is reading an instruction file even though the os.ReadFile lives + // one frame down. + "parseSpans": true, + "parseProseSpansForOverlap": true, +} + +// instructionPathIdents are the package-level path variables that resolve to a +// markdown instruction file. A test that reads one of these via a read sink +// (os.ReadFile/os.Open/io.ReadAll/bufio) is reading an ingested file — the read +// triggers the must-declare rule regardless of how the bytes are then inspected. +// (Code-scanning tests reference ../dispatch, ../status package dirs, never these.) +// Path vars defined in ANOTHER file of this package are an out-of-scope flow (M-C, +// tracked in sweep-guard-reader-axis-invert) only insofar as the var name is not in +// this list; the listed names are recognized wherever read. +var instructionPathIdents = map[string]bool{ + "foCorePath": true, + "ensignCorePath": true, + "commissionSkillPath": true, + "sharedCorePath": true, + "claudeRuntimePath": true, + "contractProseFiles": true, + "sharedCorePaths": true, + "runtimeAdapterPaths": true, + "devLeakageCorePaths": true, + "runtimeAdapterFieldPaths": true, + "devHomePresence": true, +} + +// TestNoUndeclaredHostneutralityTautology is the AC-3 sweep for this package, +// re-runnable offline. It parses every *_test.go and flags any test function that +// READS a recognized markdown INSTRUCTION file's content — via a named reader +// helper, a tainted os.ReadFile/os.Open/io read, or a WalkDir-collected `.md`, +// through the reader-axis flow shapes the taint covers (below) — UNLESS it +// self-classifies via markNonAC or markCodeBoundInvariant. The go/parser code-scan +// invariants (host_neutrality_test.go's scanFile over .go source via parser.ParseFile, +// NOT a content read sink) and the spanHostQualified unit test are NOT flagged: they +// read no instruction file. The undeclared-offender count is the AC-3 metric; it +// must be zero. +// +// What the guard actually guarantees (two axes — one closed, one bounded): +// +// - MATCH axis (closed, universal, load-bearing): the sweep keys on the READ, not +// on how the bytes are inspected. ONCE a read of a recognized instruction file +// is detected, the test MUST declare regardless of the inspection idiom +// (strings.Contains/Index/EqualFold, bytes.*, regexp.Regexp.Match, a bare ==) — +// the trigger is the ingest, not the match, so the whole match class is closed. +// +// - READER axis (covered flow shapes, NOT exhaustive): a read is detected for an +// in-package read of a RECOGNIZED instruction path (a skill-tree/contract +// segment or an instructionPathIdent package var) reaching a read sink through a +// bare-`string` param, a `:=`/`=` local, a struct field, a method receiver, or a +// closure capture; path built by `+` / strings.Join / filepath.Join / +// fmt.Sprintf; transitive helper chains followed to a fixpoint. +// +// KNOWN OUT-OF-SCOPE (tracked in sweep-guard-reader-axis-invert, id +// 4qnn7dbzkyh9qv65t618vtxy, backstopped by the detached adversarial audit before +// merge — NOT silently dropped): `[]string`/`...string`-param + range/slice-element +// flow (M-D), cross-package reads (M-B), a path in a package var defined in another +// file (M-C), and unrecognized surfaces such as AGENTS.md / mods/*.md (M-A). These +// are the recurring enumerated-shape reader-flow class cycles 1-3 closed instances +// of; the follow-up weighs an invert/positive predicate and a go/types+SSA taint +// that closes the class definitionally. +func TestNoUndeclaredHostneutralityTautology(t *testing.T) { + offenders := sweepHostneutralityTautologies(t, ".") + for _, o := range offenders { + t.Errorf("%s reads a markdown instruction file's content without self-classifying — call markNonAC (with its behavioral oracle) or markCodeBoundInvariant (with its independent source); how the bytes are inspected does not matter", o) + } + if len(offenders) > 0 { + t.Fatalf("AC-3 sweep: %d undeclared tautological-behavioral-proof test(s) in hostneutrality; the count must be zero", len(offenders)) + } +} + +func sweepHostneutralityTautologies(t *testing.T, dir string) []string { + t.Helper() + fset := token.NewFileSet() + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("read package dir %s: %v", dir, err) + } + var files []*ast.File + for _, ent := range entries { + name := ent.Name() + if ent.IsDir() || !strings.HasSuffix(name, "_test.go") { + continue + } + f, err := parser.ParseFile(fset, dir+"/"+name, nil, 0) + if err != nil { + t.Fatalf("parse %s: %v", name, err) + } + files = append(files, f) + } + + // First pass: discover this package's instruction-file reader helpers, then grow + // the set to a fixpoint so a read cannot hide behind a helper chain. Seeded with + // the named readers; a func is ALSO a reader if it ingests instruction content + // directly (readsInstructionContent — a tainted ReadFile/Open/io read, or a + // WalkDir-collected `.md`) OR (transitive) it calls a known reader. Methods are + // NOT skipped: a reader can be a method on a fixture struct (the s.path / + // method-receiver flow). The code-scan helper scanFile is NOT a reader: it uses + // parser.ParseFile (not a content read sink) over a `../dispatch` path (no + // instruction taint). + taintedFields := instructionTaintedFields(files) + readers := map[string]bool{} + for r := range instructionFileReaders { + readers[r] = true + } + helperCalls := map[string]map[string]bool{} + for _, f := range files { + for _, decl := range f.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || strings.HasPrefix(fn.Name.Name, "Test") { + continue + } + helperCalls[fn.Name.Name] = collectCalls(fn) + if readsInstructionContent(fn, taintedFields) { + readers[fn.Name.Name] = true + } + } + } + for grew := true; grew; { + grew = false + for name, calls := range helperCalls { + if readers[name] { + continue + } + for r := range readers { + if calls[r] { + readers[name] = true + grew = true + break + } + } + } + } + + // Second pass: a test is an offender if it ingests instruction-file content — + // directly (readsInstructionContent) or via a discovered reader helper — and does + // NOT declare its proof standing. The sweep keys on the READ, not a match-func + // allowlist. + var offenders []string + for _, f := range files { + for _, decl := range f.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || !strings.HasPrefix(fn.Name.Name, "Test") { + continue + } + calls := collectCalls(fn) + readsInstruction := readsInstructionContent(fn, taintedFields) + for r := range readers { + if calls[r] { + readsInstruction = true + break + } + } + declared := calls["markNonAC"] || calls["markCodeBoundInvariant"] + if readsInstruction && !declared { + offenders = append(offenders, fn.Name.Name) + } + } + } + return sortedUniqueHN(offenders) +} + +// collectCalls walks a function body and returns the set of called function names +// (bare and selector trailing name), used to detect reader-helper calls and the +// markNonAC / markCodeBoundInvariant declarations. +func collectCalls(fn *ast.FuncDecl) map[string]bool { + calls := map[string]bool{} + ast.Inspect(fn, func(n ast.Node) bool { + if call, ok := n.(*ast.CallExpr); ok { + switch f := call.Fun.(type) { + case *ast.Ident: + calls[f.Name] = true + case *ast.SelectorExpr: + calls[f.Sel.Name] = true + } + } + return true + }) + return calls +} + +// readSinks are the call selectors that ingest a file's content given a path. +var readSinks = map[string]bool{ + "ReadFile": true, // os.ReadFile + "Open": true, // os.Open + "ReadAll": true, // io.ReadAll + "NewScanner": true, // bufio.NewScanner + "NewReader": true, // bufio.NewReader +} + +// readsInstructionContent reports whether fn ingests a recognized instruction +// file's content through the reader-axis flow shapes the taint COVERS — the +// positive/taint replacement for the Cycle-1/2 allow-lists, covering a bounded set +// of flows, not an exhaustive one. It taints a string derived from a recognized +// instruction-file path (a skill-tree/contract segment via isInstructionPathLiteral, +// or an instructionPathIdent package var) reaching a read sink (ReadFile/Open/ +// ReadAll/bufio) through a bare-`string` param, a `:=`/`=` local, a package-wide +// struct field, or a method receiver; path built by +/strings.Join/filepath.Join/ +// fmt.Sprintf; plus a WalkDir-collected instruction `.md`. +// +// NOT covered (tracked in sweep-guard-reader-axis-invert, id +// 4qnn7dbzkyh9qv65t618vtxy, audit-backstopped): `[]string`/`...string`-param + +// range/slice-element flow (M-D), cross-package reader helpers (M-B), a package var +// defined in another file (M-C), unrecognized surfaces like AGENTS.md / mods/*.md +// (M-A). See TestNoUndeclaredHostneutralityTautology's doc for the full honest bound. +func readsInstructionContent(fn *ast.FuncDecl, taintedFields map[string]bool) bool { + tainted := instructionTaintedNames(fn, taintedFields) + found := false + ast.Inspect(fn, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + if readSinks[sel.Sel.Name] { + for _, arg := range call.Args { + if exprInstructionTainted(arg, tainted) || readsTaintedField(arg, taintedFields) { + found = true + } + } + } + if (sel.Sel.Name == "WalkDir" || sel.Sel.Name == "Walk") && fnFiltersInstructionMarkdown(fn) { + found = true + } + return true + }) + return found +} + +// readsTaintedField reports whether expr reads a struct field whose name is in the +// package-wide instruction-tainted-field set — the s.path / method-receiver flow. +func readsTaintedField(expr ast.Expr, taintedFields map[string]bool) bool { + hit := false + ast.Inspect(expr, func(n ast.Node) bool { + if sel, ok := n.(*ast.SelectorExpr); ok && taintedFields[sel.Sel.Name] { + hit = true + } + return true + }) + return hit +} + +// instructionTaintedFields scans every struct composite literal and field +// assignment across the package, returning the set of FIELD NAMES ever assigned an +// instruction-file path. Keyed by field name (no type info at parse time) — an +// over-approximation that errs toward flagging, which the proof policy wants. +func instructionTaintedFields(files []*ast.File) map[string]bool { + fields := map[string]bool{} + for _, f := range files { + ast.Inspect(f, func(n ast.Node) bool { + switch node := n.(type) { + case *ast.KeyValueExpr: + if key, ok := node.Key.(*ast.Ident); ok && exprInstructionTainted(node.Value, nil) { + fields[key.Name] = true + } + case *ast.AssignStmt: + for i, rhs := range node.Rhs { + if i >= len(node.Lhs) { + break + } + if sel, ok := node.Lhs[i].(*ast.SelectorExpr); ok && exprInstructionTainted(rhs, nil) { + fields[sel.Sel.Name] = true + } + } + } + return true + }) + } + return fields +} + +// instructionTaintedNames computes the set of names (params, locals, recv.field +// selectors) in fn holding a string derived from an instruction-file path. Every +// string parameter is tainted (a helper that reads a string param is a path-arg +// reader — the caller supplies the .md path). It propagates through := / = to a +// fixpoint, including a local assigned from a package-wide tainted field. +func instructionTaintedNames(fn *ast.FuncDecl, taintedFields map[string]bool) map[string]bool { + tainted := map[string]bool{} + if fn.Type.Params != nil { + for _, field := range fn.Type.Params.List { + if id, ok := field.Type.(*ast.Ident); ok && id.Name == "string" { + for _, name := range field.Names { + tainted[name.Name] = true + } + } + } + } + for grew := true; grew; { + grew = false + ast.Inspect(fn, func(n ast.Node) bool { + assign, ok := n.(*ast.AssignStmt) + if !ok { + return true + } + for i, rhs := range assign.Rhs { + if i >= len(assign.Lhs) { + break + } + if !exprInstructionTainted(rhs, tainted) && !readsTaintedField(rhs, taintedFields) { + continue + } + if name := lvalueName(assign.Lhs[i]); name != "" && !tainted[name] { + tainted[name] = true + grew = true + } + } + return true + }) + } + return tainted +} + +// lvalueName renders an assignable target as a taint key: a bare ident or a +// selector `recv.field`. +func lvalueName(e ast.Expr) string { + switch x := e.(type) { + case *ast.Ident: + return x.Name + case *ast.SelectorExpr: + if inner, ok := x.X.(*ast.Ident); ok { + return inner.Name + "." + x.Sel.Name + } + return x.Sel.Name + } + return "" +} + +// exprInstructionTainted reports whether expr carries an instruction-file path +// taint anywhere in its subtree: a tainted name, an instruction-path literal/segment, +// or a known instructionPathIdent package var — so the +/strings.Join/filepath.Join/ +// fmt.Sprintf/string(...) path-build idioms (whose tainted operand is a subtree node) +// are covered. It does NOT cover a taint carried in a slice element or recovered via +// a range variable (M-D) — see readsInstructionContent's NOT-covered note and the +// follow-up sweep-guard-reader-axis-invert. +func exprInstructionTainted(expr ast.Expr, tainted map[string]bool) bool { + hit := false + ast.Inspect(expr, func(n ast.Node) bool { + switch x := n.(type) { + case *ast.BasicLit: + if x.Kind == token.STRING && isInstructionPathLiteral(strings.Trim(x.Value, "`\"")) { + hit = true + } + case *ast.Ident: + if tainted[x.Name] || instructionPathIdents[x.Name] { + hit = true + } + case *ast.SelectorExpr: + if inner, ok := x.X.(*ast.Ident); ok && tainted[inner.Name+"."+x.Sel.Name] { + hit = true + } + } + return true + }) + return hit +} + +// fnFiltersInstructionMarkdown reports whether fn's body filters paths by a `.md` +// suffix — the WalkDir-collector signal. +func fnFiltersInstructionMarkdown(fn *ast.FuncDecl) bool { + hit := false + ast.Inspect(fn, func(n ast.Node) bool { + if lit, ok := n.(*ast.BasicLit); ok && lit.Kind == token.STRING { + if strings.HasSuffix(strings.Trim(lit.Value, "`\""), ".md") { + hit = true + } + } + return true + }) + return hit +} + +// instructionPathSegments are the skill-tree / contract path segments that mark a +// path literal as an instruction file. The RECOGNIZED-instruction-surface predicate +// (a deliberate bound, not universal): a path carrying one of these listed segments +// is an instruction path, taint catching it even before a `.md` suffix is appended +// (closing the `.md`-suffix-only detection a Join/split-built suffix evaded). +// +// KNOWN OUT-OF-SCOPE surface (M-A, tracked in sweep-guard-reader-axis-invert, id +// 4qnn7dbzkyh9qv65t618vtxy): a real instruction surface whose path carries NONE of +// these segments (e.g. AGENTS.md, mods/*.md) is not recognized and a read of it is +// not flagged. The follow-up weighs a predicate that recognizes the surface +// definitionally rather than by this enumerated list. +var instructionPathSegments = map[string]bool{ + "skills": true, + "references": true, + "agents": true, + "first-officer": true, + "ensign": true, + "commission": true, + "present-gate": true, + "SKILL.md": true, +} + +// isInstructionPathLiteral reports whether a string literal is (a fragment of) an +// instruction-file path: it carries a skill-tree/contract segment. A `.json` +// manifest path carries none and is not instruction. +func isInstructionPathLiteral(s string) bool { + if strings.HasSuffix(s, ".json") { + return false + } + for seg := range instructionPathSegments { + if s == seg || strings.Contains(s, seg) { + return true + } + } + return false +} + +func sortedUniqueHN(in []string) []string { + seen := map[string]bool{} + var out []string + for _, s := range in { + if !seen[s] { + seen[s] = true + out = append(out, s) + } + } + for i := 1; i < len(out); i++ { + for j := i; j > 0 && out[j-1] > out[j]; j-- { + out[j-1], out[j] = out[j], out[j-1] + } + } + return out +} + +func containsStrHN(in []string, want string) bool { + for _, s := range in { + if s == want { + return true + } + } + return false +} + +// TestHostneutralitySweepDetectsAnUndeclaredTautology is the mutation control for +// the sweep: it must RED on the shape it polices and GREEN once that shape +// self-classifies. Writes synthetic fixtures to a temp dir and runs the sweep. +func TestHostneutralitySweepDetectsAnUndeclaredTautology(t *testing.T) { + dir := t.TempDir() + undeclared := `package fixture +import "strings" +func TestUndeclaredHN(t *T) { + text := readSkill(t, foCorePath) + if strings.Contains(text, "x") { _ = text } +} +` + declared := `package fixture +func TestDeclaredHN(t *T) { + markNonAC(t, "live split-root-halt scenario") + text := readSkill(t, foCorePath) + if strings.Contains(text, "x") { _ = text } +} +` + codeScan := `package fixture +func TestCodeScanHN(t *T) { + leaks := scanFile(t, "../dispatch/x.go") + if strings.Contains(leaks[0].text, ".claude") { _ = leaks } +} +` + writeFixture(t, dir+"/undeclared_test.go", undeclared) + off := sweepHostneutralityTautologies(t, dir) + if !containsStrHN(off, "TestUndeclaredHN") { + t.Fatalf("sweep failed to flag an undeclared instruction-file presence check; offenders=%v", off) + } + + writeFixture(t, dir+"/declared_test.go", declared) + writeFixture(t, dir+"/codescan_test.go", codeScan) + off = sweepHostneutralityTautologies(t, dir) + if containsStrHN(off, "TestDeclaredHN") { + t.Fatalf("sweep wrongly flagged a declared lint; offenders=%v", off) + } + if containsStrHN(off, "TestCodeScanHN") { + t.Fatalf("sweep wrongly flagged a code-scanning invariant (reads no instruction file); offenders=%v", off) + } + if !containsStrHN(off, "TestUndeclaredHN") { + t.Fatalf("adding declared/codescan fixtures must not stop the sweep flagging the undeclared one; offenders=%v", off) + } + + // A multi-hop-helper tautology — the read hidden one frame down behind a wrapper + // that calls the named reader — must also be flagged: the transitive reader + // fixpoint propagates reader-ness up the call chain. Before the fixpoint the HN + // sweep left this GREEN (the validation Cycle-1 finding 1). + multiHop := `package fixture +import "strings" +func wrapHop(t *T) string { return readSkill(t, foCorePath) } +func TestMultiHopUndeclaredHN(t *T) { + text := wrapHop(t) + if strings.Contains(text, "x") { _ = text } +} +` + dir2 := t.TempDir() + writeFixture(t, dir2+"/multihop_test.go", multiHop) + off = sweepHostneutralityTautologies(t, dir2) + if !containsStrHN(off, "TestMultiHopUndeclaredHN") { + t.Fatalf("sweep failed to flag a multi-hop-helper tautology (transitive reader fixpoint not working); offenders=%v", off) + } +} + +// TestHostneutralitySweepDetectsEvasionShapes is the planted-control mutation test +// for the reader-discovery evasion shapes the validation audit proved the HN sweep +// missed (the integration sweep already guarded these; this ports the guard). Each +// case plants a synthetic offender reaching an instruction file through a shape the +// naive named-reader/`.md`-literal detection cannot see, runs the sweep, asserts it +// REDs, then plants the declared form and asserts it GREENs. A regression removing a +// discovery mechanism leaves the matching case un-flagged, failing this control. +func TestHostneutralitySweepDetectsEvasionShapes(t *testing.T) { + // Shape 1 — multi-hop transitive helper: the tautology hides one hop down behind + // a wrapper that calls the named reader readSkill. The fixpoint must propagate + // reader-ness up the chain. This is the finding-1 control: before the fixpoint, + // the HN sweep left this GREEN. + multiHop := `package fixture +import "strings" +func wrapHop(t *T) string { + return readSkill(t, foCorePath) +} +func TestMultiHopHN(t *T) { + text := wrapHop(t) + if strings.Contains(text, "x") { _ = text } +} +` + assertRedThenGreenHN(t, "multi-hop transitive helper", "TestMultiHopHN", multiHop) + + // Shape 2 — path-arg reader: a NEW helper (not in the named map) os.ReadFile's a + // value built from its own path parameter; the `.md` literal lives in the caller. + pathArg := `package fixture +import ( + "os" + "strings" +) +func readArg(t *T, path string) string { + b, _ := os.ReadFile(path) + return string(b) +} +func TestPathArgHN(t *T) { + text := readArg(t, "../../skills/first-officer/references/first-officer-shared-core.md") + if strings.Contains(text, "x") { _ = text } +} +` + assertRedThenGreenHN(t, "path-arg reader", "TestPathArgHN", pathArg) + + // Shape 3 — WalkDir collector: a helper WalkDirs a tree returning `.md` paths the + // caller reads+matches. + walkDir := `package fixture +import ( + "os" + "path/filepath" + "strings" +) +func walkSkills(t *T, base string) []string { + var out []string + filepath.WalkDir(base, func(p string, d os.DirEntry, err error) error { + if !d.IsDir() && strings.HasSuffix(p, ".md") { out = append(out, p) } + return nil + }) + return out +} +func TestWalkDirHN(t *T) { + for _, p := range walkSkills(t, "../../skills") { + b, _ := os.ReadFile(p) + if strings.Contains(string(b), "x") { _ = b } + } +} +` + assertRedThenGreenHN(t, "WalkDir collector", "TestWalkDirHN", walkDir) + + // Shape 4 — split-".md" suffix: the read path is built as base + "." + "md", so + // no single literal carries the `.md` suffix; constStringConcat must rejoin it. + splitMD := `package fixture +import ( + "os" + "path/filepath" + "strings" +) +func TestSplitSuffixHN(t *T) { + p := filepath.Join("..", "..", "skills", "first-officer", "references", "first-officer-shared-core" + "." + "md") + b, _ := os.ReadFile(p) + if strings.Contains(string(b), "x") { _ = b } +} +` + assertRedThenGreenHN(t, "split-.md suffix", "TestSplitSuffixHN", splitMD) + + // Shape 5 (Cycle-3 M1, match axis) — the ingested bytes are inspected with a + // match idiom OUTSIDE the old matchFuncs allowlist. The positive rule keys on the + // READ (readSkill), so HOW the bytes are inspected is irrelevant. + matchIndex := `package fixture +import "strings" +func TestMatchIndexHN(t *T) { + text := readSkill(t, foCorePath) + if strings.Index(text, "HALT") < 0 { t.Error("x") } +} +` + assertRedThenGreenHN(t, "match via strings.Index (no Contains)", "TestMatchIndexHN", matchIndex) + + matchBytesRegexp := `package fixture +import "regexp" +func TestMatchBytesRegexpHN(t *T) { + text := readSkill(t, foCorePath) + re := regexp.MustCompile("HALT") + if !re.Match([]byte(text)) { t.Error("x") } +} +` + assertRedThenGreenHN(t, "match via regexp.Regexp.Match([]byte)", "TestMatchBytesRegexpHN", matchBytesRegexp) + + // Shape 6 (Cycle-3 M2, reader axis) — the `.md` path is built with strings.Join, + // not `+`. The base fragment carries a skill-tree segment so it taints before the + // suffix is appended. + joinPath := `package fixture +import ( + "os" + "strings" +) +func TestJoinPathHN(t *T) { + base := "../../skills/first-officer/references/first-officer-shared-core" + p := strings.Join([]string{base, "md"}, ".") + b, _ := os.ReadFile(p) + if strings.Index(string(b), "x") < 0 { t.Error("y") } +} +` + assertRedThenGreenHN(t, "strings.Join-built .md path", "TestJoinPathHN", joinPath) + + // Shape 7 (Cycle-3 M3, reader axis) — the `.md` path flows through a struct field + // and the read happens via a METHOD on that struct; discovery must include methods + // and taint the package-wide field. + structMethod := `package fixture +import ( + "os" + "strings" +) +type fixt struct { path string } +func (f *fixt) read(t *T) string { + b, _ := os.ReadFile(f.path) + return string(b) +} +func TestStructMethodHN(t *T) { + f := &fixt{path: "../../skills/first-officer/references/first-officer-shared-core.md"} + s := f.read(t) + if strings.Contains(s, "x") { _ = s } +} +` + assertRedThenGreenHN(t, "struct-field + method-receiver path flow", "TestStructMethodHN", structMethod) +} + +// assertRedThenGreenHN plants fixtureSrc, runs the HN sweep, requires offenderName +// flagged (RED on the evasion shape), then rewrites the test with a markNonAC +// declaration and requires it cleared (GREEN once declared). +func assertRedThenGreenHN(t *testing.T, shape, offenderName, fixtureSrc string) { + t.Helper() + dir := t.TempDir() + writeFixture(t, dir+"/evasion_test.go", fixtureSrc) + off := sweepHostneutralityTautologies(t, dir) + if !containsStrHN(off, offenderName) { + t.Fatalf("%s: HN sweep failed to flag the evasion offender %s; offenders=%v", shape, offenderName, off) + } + + declaredSrc := strings.Replace( + fixtureSrc, + "func "+offenderName+"(t *T) {", + "func "+offenderName+`(t *T) { + markNonAC(t, "declared evasion-shape fixture")`, + 1, + ) + if declaredSrc == fixtureSrc { + t.Fatalf("%s: could not inject markNonAC into the fixture for %s (signature not found)", shape, offenderName) + } + dir2 := t.TempDir() + writeFixture(t, dir2+"/evasion_test.go", declaredSrc) + off = sweepHostneutralityTautologies(t, dir2) + if containsStrHN(off, offenderName) { + t.Fatalf("%s: HN sweep still flagged %s after it declared markNonAC; offenders=%v", shape, offenderName, off) + } +} + +func writeFixture(t *testing.T, path, content string) { + t.Helper() + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatal(err) + } +} diff --git a/internal/hostneutrality/prose_inflator_locks_test.go b/internal/hostneutrality/prose_inflator_locks_test.go index d522fdfa..bb4c42f0 100644 --- a/internal/hostneutrality/prose_inflator_locks_test.go +++ b/internal/hostneutrality/prose_inflator_locks_test.go @@ -69,6 +69,7 @@ var auditTrailRegexes = []*regexp.Regexp{ // the swept HEAD is clean; a deliberately-inserted regression of any banned // phrase fails the test (positive proof of lock-in). func TestNoAuditTrailExposition(t *testing.T) { + markNonAC(t, "text-hygiene lint, NOT a behavioral claim — a property of the text (the contract files stay free of audit-trail exposition). No behavioral oracle and no independent code source: it is prose-inflation hygiene, valued for catching history-as-meta-comment re-inflating the dispatch load.") for _, path := range contractProseFiles { t.Run(filepath.Base(path), func(t *testing.T) { body, err := os.ReadFile(path) @@ -109,6 +110,7 @@ func TestNoAuditTrailExposition(t *testing.T) { // // Threshold: 12 contiguous words. func TestNoCrossFileRestatement(t *testing.T) { + markCodeBoundInvariant(t, "12-word n-grams sourced from a DIFFERENT file (the runtime adapter cores) than the shared cores under test") // Build the n-gram set from the runtime adapter cores, excluding // exception spans. adapterNGrams := map[string]string{} diff --git a/internal/hostneutrality/prose_neutrality_test.go b/internal/hostneutrality/prose_neutrality_test.go index 973a1aa5..843f3a23 100644 --- a/internal/hostneutrality/prose_neutrality_test.go +++ b/internal/hostneutrality/prose_neutrality_test.go @@ -21,23 +21,17 @@ var sharedCorePath = filepath.Join("..", "..", "skills", "first-officer", "refer var claudeRuntimePath = filepath.Join("..", "..", "skills", "first-officer", "references", "claude-first-officer-runtime.md") -// claudeHelperTokens are the named Claude-runtime helper commands / functions that -// must not appear UNQUALIFIED in the generic core. Each is a Claude-only mechanism -// (a binary subcommand or a ~/.claude-reading helper function), not a cross-runtime -// capability. A bare mention inside a host-qualified span is allowed; an unqualified -// algorithm step that names one fails. -var claudeHelperTokens = []string{ +// claudeHelperProseTokens are the Claude-runtime helper names that have NO code +// subcommand source — ~/.claude-reading helper functions and the named drift +// classes of the reconcile helper. They are authored prose tokens; the +// code-sourced subcommands (context-budget, *-standing, reconcile) are added at +// test time from the dispatch router via claudeHelperTokens(t), so the banned set +// has an independent code source that shifts when the router changes. +var claudeHelperProseTokens = []string{ "claude-team", - "context-budget", - "spawn-standing", - "list-standing", - "show-standing", "member_exists", "lookup_model", - // Claude-bound by virtue of LoadReconcileTeam's ~/.claude/teams roster read; - // the helper hard-fails at setup on a runtime without that on-disk source. - "spacedock dispatch reconcile", - // Class A/B/C/D/E are the named drift classes of that helper — same binding. + // Class A/B/C/D/E are the named drift classes of the reconcile helper. "Class A", "Class B", "Class C", @@ -45,6 +39,29 @@ var claudeHelperTokens = []string{ "Class E", } +// claudeHelperTokens combines the code-sourced Claude-coupled dispatch subcommands +// (derived from the router, qualified `spacedock dispatch reconcile` for the +// roster-reading helper) with the authored prose tokens. The subcommand half binds +// to dispatch.go, so the banned set diverges from the generic core when a +// subcommand is renamed in code — that divergence is what makes the unqualified- +// helper check a real invariant rather than a self-match. +func claudeHelperTokens(t *testing.T) []string { + t.Helper() + subs := dispatchSubcommands(t) + tokens := append([]string{}, claudeHelperProseTokens...) + for _, sub := range []string{"context-budget", "spawn-standing", "list-standing", "show-standing"} { + if !subs[sub] { + t.Fatalf("dispatch router no longer exposes Claude-coupled helper %q", sub) + } + tokens = append(tokens, sub) + } + if !subs["reconcile"] { + t.Fatal("dispatch router no longer exposes the reconcile subcommand") + } + tokens = append(tokens, "spacedock dispatch reconcile") + return tokens +} + // A span counts as host-qualified only when it names BOTH runtimes by their // capitalized product names — the `X on Codex, Y on Claude` contrast shape, where // the Claude token is a qualified realization presented alongside the Codex @@ -66,10 +83,12 @@ const ( // `claude-team context-budget` reuse step makes it fail; the line-207 // `send_input on Codex, SendMessage on Claude teams` span passes (host-qualified). func TestSharedCoreHasNoUnqualifiedClaudeHelpers(t *testing.T) { + markCodeBoundInvariant(t, "dispatchSubcommands (dispatch.go) supplies the Claude-coupled helper subcommand tokens") spans := parseSpans(t, sharedCorePath) + helperTokens := claudeHelperTokens(t) var violations []string for _, sp := range spans { - for _, tok := range claudeHelperTokens { + for _, tok := range helperTokens { if !strings.Contains(sp.text, tok) { continue } @@ -91,17 +110,34 @@ func TestSharedCoreHasNoUnqualifiedClaudeHelpers(t *testing.T) { } } -// TestClaudeAdapterOwnsRelocatedCommands confirms the relocation landed: the -// concrete Claude invocation of each relocated capability lives in the Claude -// adapter. This is the other half of the invariant — the commands did not vanish, -// they moved. Asserts the adapter names the four subcommand surfaces. +// TestClaudeAdapterOwnsRelocatedCommands is a code-bound invariant confirming the +// relocation landed: the Claude adapter names each relocated dispatch-helper +// subcommand. The required set is DERIVED from the dispatch router's actual +// subcommands (dispatchSubcommands over dispatch.go), not literals frozen against +// the adapter — so a subcommand renamed in the router shifts the expectation and +// reds here if the adapter still names the old one (or drops the new one). The +// commands did not vanish, they moved; this is the presence half of the +// relocation invariant. func TestClaudeAdapterOwnsRelocatedCommands(t *testing.T) { + markCodeBoundInvariant(t, "dispatchSubcommands (internal/dispatch/dispatch.go router)") data, err := os.ReadFile(claudeRuntimePath) if err != nil { t.Fatalf("read %s: %v", claudeRuntimePath, err) } body := string(data) - for _, want := range []string{"context-budget", "list-standing", "spawn-standing", "show-standing", "spacedock dispatch reconcile"} { + subs := dispatchSubcommands(t) + // The relocated Claude-coupled helpers — each must be a real router subcommand + // (so the anchor cannot name a command the binary does not route) and must + // appear in the adapter. + relocated := []string{"context-budget", "list-standing", "spawn-standing", "show-standing", "reconcile"} + for _, sub := range relocated { + if !subs[sub] { + t.Fatalf("the dispatch router no longer exposes %q — the relocated-command set diverged from the binary", sub) + } + want := sub + if sub == "reconcile" { + want = "spacedock dispatch reconcile" // the qualified form the adapter documents + } if !strings.Contains(body, want) { t.Errorf("Claude adapter %s does not name the relocated command %q", claudeRuntimePath, want) } diff --git a/internal/hostneutrality/split_root_sync_contract_test.go b/internal/hostneutrality/split_root_sync_contract_test.go index 25f182e8..be3e0d1f 100644 --- a/internal/hostneutrality/split_root_sync_contract_test.go +++ b/internal/hostneutrality/split_root_sync_contract_test.go @@ -40,10 +40,20 @@ func assertAll(t *testing.T, name, text string, tokens []string) { } } -// TestFOHaltGateProse pins B5: the FO core carries the boot halt-gate keyed on -// the Phase-A boot fields (split-root && entity_dir_present false → halt dispatch, -// point at `spacedock state init`). +// TestFOHaltGateProse is a non-AC text-consistency lint: it asserts the FO core +// carries the boot halt-gate prose keyed on the boot fields (split-root && +// entity_dir_present false → HALT, point at `spacedock state init`). Per the proof +// policy this presence check does NOT prove the FO halts; an inverted clause keeps +// every token. The MECHANISM is proven by command-level tests — the binary EMITS +// the halt signal (internal/status TestBootJSONStateBackendEntityDirAbsent observes +// `entity_dir_present: false` + `state_backend: split-root`) and the `spacedock +// state init` recovery WORKS (internal/cli TestStateInitResumesFreshClone) — but +// the OWED behavioral proof that the FO actually HALTs on that signal (rather than +// running state init silently and proceeding) is a live drive, tracked as task +// ev3e (fo-halt-sync-journey-live-drives). This lint guards the prose tokens until +// ev3e lands the live oracle. func TestFOHaltGateProse(t *testing.T) { + markNonAC(t, "OWED live drive: task ev3e (fo-halt-sync-journey-live-drives). Mechanism today: internal/status TestBootJSONStateBackendEntityDirAbsent (binary emits the halt signal) + internal/cli TestStateInitResumesFreshClone (recovery works)") text := readSkill(t, foCorePath) assertAll(t, "FO core (B5 halt-gate)", text, []string{ "state_backend", @@ -54,10 +64,20 @@ func TestFOHaltGateProse(t *testing.T) { }) } -// TestFOSyncProse pins the FO half of B6: pull --rebase on boot, push after a -// state commit, and the M-3 rebase-conflict halt (abort + surface + no -// force-push, no auto-resolve). +// TestFOSyncProse is a non-AC text-consistency lint: it asserts the FO core +// carries the B6 sync prose (pull --rebase, push origin, the M-3 rebase-conflict +// halt: abort + no force-push + no auto-resolve). Per the proof policy this +// presence check does NOT prove the FO performs the sync. The git MECHANICS are +// already oracle-covered by real two-writer e2e — internal/cli state_sync_test.go +// (TestTwoWriterSyncHappyPath: push → non-ff rejection → pull --rebase → re-push; +// TestTwoWriterSameEntityConflictHalts: CONFLICT → rebase --abort, no force-push) +// and internal/dispatch build_statecommit_test.go. The remaining behavioral +// proof — that the FO actually ISSUES this sync at the contract points — rides +// task ev3e's halt drive (fo-halt-sync-journey-live-drives), where ev3e's ideation +// folded the sync/journey residual into the halt scenario. This lint guards the +// prose tokens. func TestFOSyncProse(t *testing.T) { + markNonAC(t, "behavioral-issuance rides task ev3e's halt drive (fo-halt-sync-journey-live-drives). Sync MECHANICS already oracle-covered: internal/cli state_sync_test.go (TestTwoWriterSyncHappyPath + TestTwoWriterSameEntityConflictHalts) + internal/dispatch build_statecommit_test.go (TestStateCommitGuidanceResolvesPaths)") text := readSkill(t, foCorePath) assertAll(t, "FO core (B6 sync)", text, []string{ "pull --rebase", @@ -69,10 +89,15 @@ func TestFOSyncProse(t *testing.T) { }) } -// TestEnsignSyncProse pins the ensign half of B6: push after committing, pull -// --rebase on a push rejection, and the M-3 rebase-conflict halt (abort + -// surface + no force-push, no auto-resolve), alongside the path-scoped rule. +// TestEnsignSyncProse is a non-AC text-consistency lint: it asserts the ensign +// core carries the B6 sync prose (push origin, pull --rebase, the M-3 +// rebase-conflict halt) alongside the path-scoped rule. Same disposition as the FO +// half: the git MECHANICS are oracle-covered by the real two-writer e2e in +// internal/cli + build_statecommit_test.go; the remaining behavioral proof that +// the ensign ISSUES this sync after its state commits rides task ev3e's halt drive +// (fo-halt-sync-journey-live-drives). This lint guards the tokens. func TestEnsignSyncProse(t *testing.T) { + markNonAC(t, "behavioral-issuance rides task ev3e's halt drive (fo-halt-sync-journey-live-drives). Sync MECHANICS already oracle-covered: internal/cli state_sync_test.go (TestTwoWriterSyncHappyPath + TestTwoWriterSameEntityConflictHalts) + internal/dispatch build_statecommit_test.go (TestStateCommitGuidanceResolvesPaths)") text := readSkill(t, ensignCorePath) assertAll(t, "ensign core (B6 sync)", text, []string{ "push origin", @@ -83,10 +108,19 @@ func TestEnsignSyncProse(t *testing.T) { }) } -// TestCommissionJourneyProse pins B3: the commission SKILL.md carries the -// journey-1 orphan-branch mechanics (clear inherited tree, linked worktree, -// state init pointer) and the journey-2 $inline prose. +// TestCommissionJourneyProse is a non-AC text-consistency lint: it asserts the +// commission SKILL.md carries the journey-1 orphan-branch mechanics (clear +// inherited tree, linked worktree, state-init pointer) and the journey-2 $inline +// prose. Per the proof policy this presence check does NOT prove the mechanics +// work. The orphan-birth/resume MECHANICS are oracle-covered by command-level +// tests — internal/cli state_new_test.go (TestStateNewBirthsSplitRoot) + +// state_init_test.go (TestCommissionOrphanBranchScaffolding: orphan branch with a +// cleared tree as a linked worktree; TestStateInitInlineNoOp: the $inline branch). +// The remaining behavioral proof that the commission FLOW drives these journeys +// rides task ev3e's halt drive (fo-halt-sync-journey-live-drives). This lint guards +// the prose tokens. func TestCommissionJourneyProse(t *testing.T) { + markNonAC(t, "behavioral-issuance rides task ev3e's halt drive (fo-halt-sync-journey-live-drives). Journey MECHANICS already oracle-covered: internal/cli state_init_test.go (TestStateInitResumesFreshClone + TestCommissionOrphanBranchScaffolding + TestStateInitInlineNoOp) + state_new_test.go (TestStateNewBirthsSplitRoot)") text := readSkill(t, commissionSkillPath) assertAll(t, "commission SKILL.md (journeys)", text, []string{ "checkout --orphan", diff --git a/skills/integration/codex_idle_notification_test.go b/skills/integration/codex_idle_notification_test.go index faa529ab..3c77269f 100644 --- a/skills/integration/codex_idle_notification_test.go +++ b/skills/integration/codex_idle_notification_test.go @@ -21,7 +21,16 @@ var codexIdleNotificationClassifications = map[string]bool{ "no_notification_observed": true, } +// TestCodexIdleNotificationRuntimeContract is a non-AC text-consistency lint: it +// asserts the Codex runtime adapter's `## Awaiting Completion` section carries the +// three outcome headings + scheduling-priority clauses and stays free of +// blanket-foreground-wait wording. Per the proof policy this presence check does +// NOT prove the FO observes the idle-notification semantics; the behavior is +// proven by the captured idle-wake evidence (TestCodexIdleNotificationEvidenceSchema +// validates real recorded probe runs) and the Codex live runner's +// awaiting-completion path. This lint guards the adapter clauses. func TestCodexIdleNotificationRuntimeContract(t *testing.T) { + markNonAC(t, "TestCodexIdleNotificationEvidenceSchema (captured idle-wake evidence) + Codex live runner awaiting-completion path") root := skillsRoot(t) path := filepath.Join(root, "first-officer", "references", "codex-first-officer-runtime.md") data, err := os.ReadFile(path) diff --git a/skills/integration/contract_gate_test.go b/skills/integration/contract_gate_test.go index d4a55088..396e3543 100644 --- a/skills/integration/contract_gate_test.go +++ b/skills/integration/contract_gate_test.go @@ -41,6 +41,7 @@ var embeddedRangeRe = regexp.MustCompile(`>=\s*\d+\s*,\s*<\s*\d+`) // behaviorally by internal/contract/gate_test.go, which drives a real spacedock // stub --version and observes discover invoked 0×/1×. func TestStartupEmbeddedRangeBracketsContractVersion(t *testing.T) { + markCodeBoundInvariant(t, "contract.CONTRACT_VERSION (the binary's contract version) — the embedded range must bracket the independent code value") startup := sectionAfter(foSharedCore(t), "## Startup") raw := embeddedRangeRe.FindString(startup) if raw == "" { @@ -103,6 +104,7 @@ func startupStep1(t *testing.T) string { // which is proof at the claim's own level, not a behavioral claim a code gate // could enforce here. func TestStartupAbortSplitsByBinaryPresence(t *testing.T) { + markNonAC(t, "n/a — doc-as-deliverable: the binary is absent by definition of this failure mode, so the contract prose IS the only artifact present; proof at the claim's own level") step1 := startupStep1(t) const ( @@ -179,6 +181,7 @@ func TestStartupAbortSplitsByBinaryPresence(t *testing.T) { // that the gate guidance is NOT duplicated across prose files — which a code // change can violate and this test would catch. func TestStartupGateGuidanceHasSingleProseSource(t *testing.T) { + markNonAC(t, "n/a — structural single-source/dedup invariant over the .md surface; the contract-version gate behavior is proven by TestStartupEmbeddedRangeBracketsContractVersion + TestStartupAbortSplitsByBinaryPresence") root := repoRoot(t) // Markers unique to the startup-gate abort prose. A second .md file carrying // any of these would be a drift-prone mirror of the single source of truth. diff --git a/skills/integration/contract_status_path_test.go b/skills/integration/contract_status_path_test.go index 0ffa7de8..a162670f 100644 --- a/skills/integration/contract_status_path_test.go +++ b/skills/integration/contract_status_path_test.go @@ -32,6 +32,7 @@ var pluginPrivateStatusRefs = []string{ // and only this structural scan over the contract bytes catches it. This is NOT // bare prose-grep — it asserts a structural negative the system depends on. func TestNoPluginPrivateStatusPathInContracts(t *testing.T) { + markNonAC(t, "behavioral coverage: the launcher smoke seam (TestLauncherListSetArchive drives the real `spacedock status` binary for list/set/archive) + internal/status/* prove the positive `spacedock status` path; this is the structural-absence complement no positive seam can prove") root := skillsRoot(t) fo := readSkill(t, root, "first-officer/references/first-officer-shared-core.md") ensign := readSkill(t, root, "ensign/references/ensign-shared-core.md") diff --git a/skills/integration/feedback_rejection_flow_test.go b/skills/integration/feedback_rejection_flow_test.go index 06d842d9..293fd9ab 100644 --- a/skills/integration/feedback_rejection_flow_test.go +++ b/skills/integration/feedback_rejection_flow_test.go @@ -42,9 +42,17 @@ func feedbackRejectionFlowSkill(t *testing.T) string { return string(b) } -// TestFeedbackProcedurePresentInSkill locks AC-1(a): the moved procedure -// fingerprints are present in skills/feedback-rejection-flow/SKILL.md. +// TestFeedbackProcedurePresentInSkill is a non-AC text-consistency lint: the +// moved procedure fingerprints are present in +// skills/feedback-rejection-flow/SKILL.md — the prose MOVED here. Per the proof +// policy this presence check does NOT prove the FO follows the procedure; an +// inverted skill body keeps every fingerprint. The behavior — the FO observes a +// REJECTED report and routes the concrete finding back through implementation — is +// proven by the live rejection-flow scenario (runClaudeRejectionFlowScenario / +// runCodexRejectionFlowScenario, asserted by assertRejectionFlow) and its offline +// mutation control TestRejectionFlowNegativeMissingRoute. func TestFeedbackProcedurePresentInSkill(t *testing.T) { + markNonAC(t, "live rejection-flow scenario (assertRejectionFlow) + TestRejectionFlowNegativeMissingRoute") skill := feedbackRejectionFlowSkill(t) for name, fp := range feedbackProcedureFingerprints { if !strings.Contains(skill, fp) { @@ -53,10 +61,15 @@ func TestFeedbackProcedurePresentInSkill(t *testing.T) { } } -// TestFeedbackFaithfulnessClausesPresentInSkill locks AC-2 (faithfulness): the -// two mis-route-on-loss clauses — the Codex `send_input` non-completion caveat -// and the `feedback-to` target-read clause — are present in the skill body. +// TestFeedbackFaithfulnessClausesPresentInSkill is a non-AC text-consistency +// lint: the two mis-route-on-loss clauses — the Codex `send_input` non-completion +// caveat and the `feedback-to` target-read clause — are present in the skill body. +// This is text authoring, not behavioral proof; the behavior that the FO routes +// the fix to the feedback-to target (not the reviewer) is proven by the live +// rejection-flow scenario, which asserts the entity returns to status: +// implementation with the fix applied. func TestFeedbackFaithfulnessClausesPresentInSkill(t *testing.T) { + markNonAC(t, "live rejection-flow scenario (assertRejectionFlow) + TestRejectionFlowNegativeMissingRoute") skill := feedbackRejectionFlowSkill(t) for name, fp := range feedbackFaithfulnessFingerprints { if !strings.Contains(skill, fp) { @@ -65,13 +78,16 @@ func TestFeedbackFaithfulnessClausesPresentInSkill(t *testing.T) { } } -// TestFeedbackProcedureAbsentFromFOCore locks AC-1(b): the moved procedure -// fingerprints (and the faithfulness clauses, which moved with the body) are NO -// LONGER present in first-officer-shared-core.md — moved, not duplicated. Whole- -// file (NOT region-scoped): region-scoping an absence check would false-pass -// content that moved elsewhere in the file. Negative-proof: re-inlining the -// procedure re-introduces a fingerprint and flips this RED. +// TestFeedbackProcedureAbsentFromFOCore is a non-AC text-consistency lint (dedup): +// the moved procedure fingerprints (and the faithfulness clauses, which moved with +// the body) are NO LONGER present in first-officer-shared-core.md — moved, not +// duplicated. Whole-file (NOT region-scoped): region-scoping an absence check +// would false-pass content that moved elsewhere. This is a structural dedup +// property, not a behavioral claim; the FO's rejection behavior is proven by the +// live rejection-flow scenario. Re-inlining the procedure re-introduces a +// fingerprint and flips this RED. func TestFeedbackProcedureAbsentFromFOCore(t *testing.T) { + markNonAC(t, "dedup lint; behavior via live rejection-flow scenario (assertRejectionFlow)") fo := foCore(t) for name, fp := range feedbackProcedureFingerprints { if strings.Contains(fo, fp) { @@ -98,16 +114,18 @@ func TestFeedbackProcedureAbsentFromFOCore(t *testing.T) { // only fires on the feedback-rejection-flow include family. var feedbackAtInclude = regexp.MustCompile(`@(?:\.{1,2}/)*feedback-rejection-flow\b`) -// TestFOCoreInvokesFeedbackRejectionSkill locks AC-1(c): the FO core invokes the -// skill via Skill(...) at the rejection-detection point and uses NO cross-skill -// @-include toward feedback-rejection-flow ANYWHERE in the file. The positive -// Skill(...) check is region-scoped to `## Completion and Gates` (the seam lives -// at the detection point); the @-include ban is WHOLE-FILE so a stale include -// re-introduced in any other section (e.g. `## Merge and Cleanup`) is caught, not -// just one in the detection region. The Skill(...) literal is the integration -// seam; any `@`-token resolving toward feedback-rejection-flow is the disproven -// mechanism. +// TestFOCoreInvokesFeedbackRejectionSkill is a non-AC text-consistency lint: it +// asserts the FO core carries the Skill(skill="spacedock:feedback-rejection-flow") +// invocation literal at the rejection-detection point and no disproven +// cross-skill @-include. Per the proof policy this presence check does NOT prove +// the FO invokes the skill on a rejection: an inverted clause ("NEVER invoke +// feedback-rejection-flow; just wait") keeps the Skill(...) substring and passes +// (verified in ideation). The behavior — the FO routes a REJECTED finding back +// through implementation — is proven only by the live rejection-flow scenario +// (assertRejectionFlow). This lint guards the seam STRING and bans the @-include +// mechanism; it is the text half, not the behavioral proof. func TestFOCoreInvokesFeedbackRejectionSkill(t *testing.T) { + markNonAC(t, "live rejection-flow scenario (assertRejectionFlow) + TestRejectionFlowNegativeMissingRoute") fo := foCore(t) region := sectionAfter(fo, "## Completion and Gates") if region == "" { @@ -121,11 +139,15 @@ func TestFOCoreInvokesFeedbackRejectionSkill(t *testing.T) { } } -// TestAlwaysOnMachineryRetainedInFOCore locks AC-1(d): the referenced always-on -// machinery did NOT move with the procedure. The FO Write Scope `### Feedback -// Cycles` write-scope bullet and the reuse-conditions/budget-probe block stay in -// the FO core. Negative-proof: deleting either anchor reds this. +// TestAlwaysOnMachineryRetainedInFOCore is a non-AC text-consistency lint (the +// retention sibling of the dedup checks): the referenced always-on machinery did +// NOT move with the procedure — the FO Write Scope `### Feedback Cycles` bullet +// and the reuse-conditions block stay in the FO core. This is a structural +// retention property, not a behavioral claim; that the FO actually tracks feedback +// cycles is proven by the live rejection-flow scenario. Deleting either anchor +// reds this. func TestAlwaysOnMachineryRetainedInFOCore(t *testing.T) { + markNonAC(t, "retention lint; behavior via live rejection-flow scenario (assertRejectionFlow)") fo := foCore(t) for name, anchor := range map[string]string{ "feedback-cycles-write-scope": "**`### Feedback Cycles` section**", @@ -137,12 +159,15 @@ func TestAlwaysOnMachineryRetainedInFOCore(t *testing.T) { } } -// TestClaudeBareModeSeamStaysConsistent locks AC-2 (seam): the Claude adapter's -// `## Feedback Rejection Flow (bare mode)` seam stays — still present, still the -// sequential-dispatch sentence and the keep-reviewer-alive sentence. The seam is -// a Claude-runtime execution mode, NOT moved into the runtime-neutral skill. -// Negative-proof: removing the seam (or either sentence) reds this. +// TestClaudeBareModeSeamStaysConsistent is a non-AC text-consistency lint: the +// Claude adapter's `## Feedback Rejection Flow (bare mode)` seam stays present +// with its sequential-dispatch and keep-reviewer-alive sentences — the seam is a +// Claude-runtime execution mode, not moved into the runtime-neutral skill. This +// is text authoring, not behavioral proof; the live rejection-flow scenario +// (Claude runner) exercises the bare-mode path for real. Removing the seam reds +// this. func TestClaudeBareModeSeamStaysConsistent(t *testing.T) { + markNonAC(t, "live rejection-flow scenario, Claude runner (assertRejectionFlow)") claude := vendoredSkillFiles(t)["first-officer/references/claude-first-officer-runtime.md"] for name, fp := range map[string]string{ "bare-mode-heading": "## Feedback Rejection Flow (bare mode)", @@ -175,32 +200,48 @@ func feedbackRejectionFrontmatterValue(t *testing.T, key string) (string, bool) return "", false } -// TestFeedbackRejectionSkillNameMatchesSeam locks AC-2 (hardened seam): the -// frontmatter `name:` VALUE equals `feedback-rejection-flow` — the directory name -// AND the `Skill(skill="spacedock:feedback-rejection-flow")` invocation seam. -// Token-presence alone (skill_surface_test.go) would pass a renamed skill the -// seam no longer reaches; binding the value to the seam target catches that -// drift. Negative-proof: a bogus name value reds this. +// feedbackRejectionSeamLiteral is the seam name the FO contract invokes; the +// re-bound checks read the expected value from the contract, not from the skill +// file under test. +const feedbackRejectionSeamLiteral = "feedback-rejection-flow" + +// TestFeedbackRejectionSkillNameMatchesSeam is a code-bound invariant: the skill's +// frontmatter `name:` equals the seam name the FO CONTRACT invokes +// (Skill(skill="spacedock:NAME") in first-officer-shared-core.md). The expected +// value comes from the contract, not the skill file under test — renaming either +// side makes the two diverge and reds this, catching a renamed skill the FO +// invocation no longer reaches. func TestFeedbackRejectionSkillNameMatchesSeam(t *testing.T) { + markCodeBoundInvariant(t, "FO contract Skill(skill=\"spacedock:feedback-rejection-flow\") invocation (first-officer-shared-core.md)") name, ok := feedbackRejectionFrontmatterValue(t, "name") if !ok { t.Fatal("feedback-rejection-flow SKILL.md frontmatter has no name field") } - if name != "feedback-rejection-flow" { - t.Errorf("feedback-rejection-flow SKILL.md frontmatter name is %q, want %q (the directory name and the Skill(skill=\"spacedock:feedback-rejection-flow\") seam)", name, "feedback-rejection-flow") + seam := invokedSeamName(foCore(t), feedbackRejectionSeamLiteral) + if seam == "" { + t.Fatalf("FO contract does not invoke Skill(skill=\"spacedock:%s\") — the seam the skill name must match is gone", feedbackRejectionSeamLiteral) + } + if name != seam { + t.Errorf("feedback-rejection-flow SKILL.md frontmatter name is %q, but the FO contract invokes the seam %q — a renamed skill the FO invocation no longer reaches", name, seam) } } -// TestFeedbackRejectionSkillIsFOInternal locks AC-2 (hardened seam): the -// frontmatter carries `user-invocable: false` — the skill is FO-internal (loaded -// mid-run via Skill()), not a captain-facing user skill. Negative-proof: flipping -// to `true` reds this. +// TestFeedbackRejectionSkillIsFOInternal is a code-bound invariant binding the +// skill's `user-invocable` frontmatter to its ROLE: a skill the FO invokes mid-run +// via Skill(skill="spacedock:NAME") is FO-internal and MUST be +// `user-invocable: false`. The expected value is REQUIRED by the contract invoking +// the seam (an independent source), not a free literal; flipping the frontmatter +// to `true` while the FO still invokes the seam reds this. func TestFeedbackRejectionSkillIsFOInternal(t *testing.T) { + markCodeBoundInvariant(t, "FO contract Skill(skill=\"spacedock:feedback-rejection-flow\") invocation implies FO-internal") + if invokedSeamName(foCore(t), feedbackRejectionSeamLiteral) == "" { + t.Fatalf("FO contract does not invoke the feedback-rejection-flow seam — the FO-internal premise no longer holds") + } v, ok := feedbackRejectionFrontmatterValue(t, "user-invocable") if !ok { t.Fatal("feedback-rejection-flow SKILL.md frontmatter has no user-invocable field") } if v != "false" { - t.Errorf("feedback-rejection-flow SKILL.md frontmatter user-invocable is %q, want \"false\" (the skill is FO-internal)", v) + t.Errorf("feedback-rejection-flow SKILL.md frontmatter user-invocable is %q, but the FO contract invokes it as a mid-run seam — an FO-internal skill must be user-invocable: false", v) } } diff --git a/skills/integration/nonac_marker_test.go b/skills/integration/nonac_marker_test.go new file mode 100644 index 00000000..5ae7e19c --- /dev/null +++ b/skills/integration/nonac_marker_test.go @@ -0,0 +1,794 @@ +// ABOUTME: The non-AC text-consistency marker + the AC-3 sweep meta-test — a +// ABOUTME: presence/absence check over an LLM-ingested instruction file is proof only if it declares its behavioral oracle. +package integration + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "strings" + "testing" +) + +// markNonAC is the explicit demotion seam required by the proof policy (f8b257cf): +// a string/substring/regex match over an instruction file the model reads NEVER +// satisfies a behavioral acceptance criterion. A test that matches such a file is +// legitimate ONLY as a text-consistency sanity check (the prose moved, a clause is +// present, a token is absent) — never as the proof of behavior. Calling +// markNonAC(t, oracle) declares that: +// - this test is a non-AC text-consistency lint, NOT a behavioral proof, and +// - `oracle` names where the behavior it touches is ACTUALLY proven (a live +// drive, a code-side invariant, or "n/a — pure text property" when the claim +// is itself about the text and has no behavior to drive). +// +// The AC-3 sweep meta-test (TestNoUndeclaredTautologicalProof) keys on this call: +// any test in this package that matches an ingested instruction file but does NOT +// call markNonAC is flagged as an undeclared tautology standing in for a +// behavioral claim. The call itself does nothing at runtime — its value is the +// declaration the sweep reads from the source. +func markNonAC(t *testing.T, oracle string) { + t.Helper() + if oracle == "" { + t.Fatal("markNonAC requires a non-empty behavioral oracle reference") + } +} + +// markCodeBoundInvariant is the second of the two explicit classifications the +// proof policy's litmus demands ("does the expected value come from a source OTHER +// than the file under test?"). A test calls markCodeBoundInvariant(t, source) to +// declare that its expectation is NOT a literal hardcoded against the file under +// test but is read from `source` — an independent code-side value (a shared Go +// const, the seam target the Skill() invocation uses, a manifest the binary +// parses) that can DIVERGE from the file. That divergence is exactly what makes +// the check able to fail as an invariant, so it is a legitimate AC-2 invariant, +// not a tautology. The AC-3 sweep treats a markCodeBoundInvariant test as +// declared (not an offender), the same as a markNonAC text-consistency lint — +// every text-matching test must self-classify as one or the other. The call does +// nothing at runtime; its value is the source-level declaration the sweep reads. +func markCodeBoundInvariant(t *testing.T, source string) { + t.Helper() + if source == "" { + t.Fatal("markCodeBoundInvariant requires a non-empty independent-source reference") + } +} + +// ingestedFileReaders are the helper functions in this package that read an +// instruction file the model ingests (a contract, a workflow README, a skill +// body) — the seed of the reader set the sweep grows to a fixpoint. A test that +// calls one of these (the READ — how it then inspects the bytes is irrelevant under +// the match-axis positive rule) is a presence/absence check over an ingested file, +// exactly the shape the proof policy bans as standalone behavioral proof. The AC-3 +// sweep treats such a test as tautological unless it declares markNonAC, or unless +// it binds the expected value to a code-side source (a re-bound Bucket-B invariant — +// markCodeBoundInvariant). The sweep distinguishes the two by the declaration: a +// re-bound invariant whose expectation diverges from the file declares +// markCodeBoundInvariant; a pure text-consistency lint declares markNonAC. +var ingestedFileReaders = map[string]bool{ + "foCore": true, + "foRuntime": true, + "presentGateSkill": true, + "feedbackRejectionFlowSkill": true, + "usingClaudeTeamSkill": true, + "vendoredSkillFiles": true, + "presentGateFrontmatterValue": true, + "feedbackRejectionFrontmatterValue": true, +} + +// TestNoUndeclaredTautologicalProof is the AC-3 sweep, re-runnable offline. It +// parses every *_test.go in this package and flags any test function that READS a +// recognized instruction file's content — via a reader helper, a tainted +// os.ReadFile/os.Open, or a WalkDir-collected `.md`, through the flow shapes the +// reader-axis taint covers (below) — unless it self-classifies via markNonAC or +// markCodeBoundInvariant. The count of undeclared offenders is the AC-3 metric: it +// must be zero. +// +// What the guard actually guarantees (two axes, with one closed and one bounded): +// +// - MATCH axis (closed, universal, load-bearing): the sweep keys on the READ, not +// on how the bytes are then inspected. ONCE a read of a recognized instruction +// file is detected, the test MUST declare regardless of the inspection idiom — +// strings.Contains/Index/EqualFold, bytes.*, regexp.Regexp.Match, len(Split)>1, +// a bare `==`, anything. Enumerating "match functions" was whack-a-mole; this +// rule closes the whole class because the trigger is the ingest, not the match. +// +// - READER axis (covered flow shapes, NOT exhaustive): a read is detected for an +// in-package read of a RECOGNIZED instruction path (a skill-tree/contract +// segment, isInstructionPathLiteral) reaching a read sink through these flows: a +// bare-`string` parameter, a `:=`/`=` local, a struct field, a method receiver, +// a closure capture; with the path built by `+` / strings.Join / filepath.Join / +// fmt.Sprintf. A transitive helper chain is followed to a fixpoint. +// +// KNOWN OUT-OF-SCOPE (tracked in the follow-up task sweep-guard-reader-axis-invert, +// id 4qnn7dbzkyh9qv65t618vtxy, backstopped by the detached adversarial audit before +// merge — NOT silently dropped): +// - M-A: unrecognized instruction surfaces (AGENTS.md, mods/*.md) — not in the +// instructionPathSegments predicate. +// - M-B: cross-package reads (a read whose reader helper lives in another package). +// - M-C: a path held in a package var defined in another file of this package. +// - M-D: `[]string`/`...string`-param + range/slice-element flow. +// These are the same recurring enumerated-shape reader-flow class cycles 1-3 each +// closed instances of; the follow-up weighs an invert/positive predicate and a +// go/types+SSA taint that closes the class definitionally. +// +// This sweep is itself a code-side invariant over real parsed test source, not a +// text match over an instruction file — its expected value (which reads reach an +// instruction file) is independent of any contract prose, so it can fail when a +// future edit adds an undeclared ingest through a covered flow. +func TestNoUndeclaredTautologicalProof(t *testing.T) { + offenders := sweepUndeclaredTautologies(t, ".") + for _, o := range offenders { + t.Errorf("%s reads an ingested instruction file's content without calling markNonAC or markCodeBoundInvariant — declare it a non-AC text-consistency lint (with its behavioral oracle) or re-bind its expectation to a code-side source; how the bytes are inspected does not matter", o) + } + if len(offenders) > 0 { + t.Fatalf("AC-3 sweep: %d undeclared tautological-behavioral-proof test(s); the count must be zero", len(offenders)) + } +} + +// sweepUndeclaredTautologies returns the sorted names of test functions in the +// package at dir that read an ingested instruction file, match it with a +// substring/regex call, and do NOT declare markNonAC. Exported as a helper so the +// sweep's own mutation control (TestSweepDetectsAnUndeclaredTautology) can call it +// against a synthetic fixture. +func sweepUndeclaredTautologies(t *testing.T, dir string) []string { + t.Helper() + fset := token.NewFileSet() + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("read package dir %s: %v", dir, err) + } + var files []*ast.File + for _, ent := range entries { + name := ent.Name() + if ent.IsDir() || !strings.HasSuffix(name, "_test.go") { + continue + } + f, err := parser.ParseFile(fset, dir+"/"+name, nil, 0) + if err != nil { + t.Fatalf("parse %s: %v", name, err) + } + files = append(files, f) + } + + // First pass: discover the package's instruction-file reader helpers, then grow + // the set to a fixpoint so a read cannot hide behind a helper chain. A func is a + // reader if it ingests instruction-file content directly (readsInstructionContent + // — a tainted os.ReadFile/io read, or a WalkDir-collected `.md`) OR (transitive) + // it calls a known reader. Methods are NOT skipped: a reader can be a method on a + // fixture struct (the s.path / method-receiver flow shape). The seeded named + // helpers cover readers that return a non-`.md`-literal handle (vendoredSkillFiles + // returns a map). + taintedFields := instructionTaintedFields(files) + readers := map[string]bool{} + for r := range ingestedFileReaders { + readers[r] = true + } + helperCalls := map[string]map[string]bool{} + for _, f := range files { + for _, decl := range f.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || strings.HasPrefix(fn.Name.Name, "Test") { + continue + } + helperCalls[fn.Name.Name] = collectCalls(fn) + if readsInstructionContent(fn, taintedFields) { + readers[fn.Name.Name] = true + } + } + } + for grew := true; grew; { + grew = false + for name, calls := range helperCalls { + if readers[name] { + continue + } + for r := range readers { + if calls[r] { + readers[name] = true + grew = true + break + } + } + } + } + + // Second pass: a test is an offender if it ingests instruction-file content — + // directly (readsInstructionContent) or via a discovered reader helper — and does + // NOT declare its proof standing. The sweep keys on the READ, not on a match-func + // allowlist: any inspection of ingested bytes (Contains/Index/EqualFold, bytes.*, + // regexp.Match, a bare ==, …) is covered because the trigger is the ingest itself. + var offenders []string + for _, f := range files { + for _, decl := range f.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || !strings.HasPrefix(fn.Name.Name, "Test") { + continue + } + calls := collectCalls(fn) + readsIngested := readsInstructionContent(fn, taintedFields) + for r := range readers { + if calls[r] { + readsIngested = true + break + } + } + declared := calls["markNonAC"] || calls["markCodeBoundInvariant"] + if readsIngested && !declared { + offenders = append(offenders, fn.Name.Name) + } + } + } + return sortedUnique(offenders) +} + +// readsInstructionContent reports whether fn ingests a recognized instruction +// file's content through the reader-axis flow shapes the taint COVERS — it is the +// positive/taint replacement for the Cycle-1/2 allow-lists (readsParamPath + +// walksForMarkdown + constStringConcat-only `+` concat), but it covers a bounded +// set of flows, not an exhaustive one. It taints a string derived from a recognized +// instruction-file path (a skill-tree/contract segment, isInstructionPathLiteral) +// built by `+` / strings.Join / filepath.Join / fmt.Sprintf and flowed through a +// bare-`string` param, a `:=`/`=` local, a struct field, or a method receiver, and +// reports a read when a tainted path flows into a read sink (os.ReadFile/os.Open/ +// io.ReadAll/bufio scanner-reader), or when fn WalkDir/Walks a tree collecting +// instruction `.md` files (the reader-of-many shape its callers then read). +// +// NOT covered (tracked in sweep-guard-reader-axis-invert, id +// 4qnn7dbzkyh9qv65t618vtxy, audit-backstopped): `[]string`/`...string`-param + +// range/slice-element flow (M-D), cross-package reader helpers (M-B), a package var +// defined in another file of this package (M-C), and unrecognized surfaces like +// AGENTS.md / mods/*.md (M-A). See TestNoUndeclaredTautologicalProof's doc for the +// full honest bound. +func readsInstructionContent(fn *ast.FuncDecl, taintedFields map[string]bool) bool { + tainted := instructionTaintedNames(fn, taintedFields) + found := false + ast.Inspect(fn, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + if readSinks[sel.Sel.Name] { + for _, arg := range call.Args { + if exprInstructionTainted(arg, tainted) || readsTaintedField(arg, taintedFields) { + found = true + } + } + } + // A WalkDir/Walk that filters on an instruction `.md` path is a reader-of-many: + // it collects the paths its callers read+inspect. + if (sel.Sel.Name == "WalkDir" || sel.Sel.Name == "Walk") && fnFiltersInstructionMarkdown(fn) { + found = true + } + return true + }) + return found +} + +// readsTaintedField reports whether expr reads a struct field whose name is in the +// package-wide instruction-tainted-field set — the s.path / method-receiver flow +// (Cycle-3 M3): the `.md` literal is assigned to the field in a constructor in one +// function and the read happens via a field selector in another (a method). Field +// taint is computed package-wide (instructionTaintedFields), so this catches the +// read even though the assigning literal is not in fn's own body. A generated-path +// field (runBuild's res.DispatchFilePath) is never instruction-assigned, so it is +// not in the set and not flagged. +func readsTaintedField(expr ast.Expr, taintedFields map[string]bool) bool { + hit := false + ast.Inspect(expr, func(n ast.Node) bool { + if sel, ok := n.(*ast.SelectorExpr); ok && taintedFields[sel.Sel.Name] { + hit = true + } + return true + }) + return hit +} + +// instructionTaintedFields scans every struct composite literal and every +// assignment to a field selector across the package, returning the set of FIELD +// NAMES ever assigned an instruction-file path. Keyed by field name (no type info +// at parse time) — a deliberate over-approximation that errs toward flagging, which +// the proof policy wants. A field only ever assigned a generated/temp path (a +// dispatch artifact) never enters the set. +func instructionTaintedFields(files []*ast.File) map[string]bool { + fields := map[string]bool{} + for _, f := range files { + ast.Inspect(f, func(n ast.Node) bool { + switch node := n.(type) { + case *ast.KeyValueExpr: + if key, ok := node.Key.(*ast.Ident); ok { + if exprInstructionTainted(node.Value, nil) { + fields[key.Name] = true + } + } + case *ast.AssignStmt: + for i, rhs := range node.Rhs { + if i >= len(node.Lhs) { + break + } + if sel, ok := node.Lhs[i].(*ast.SelectorExpr); ok && exprInstructionTainted(rhs, nil) { + fields[sel.Sel.Name] = true + } + } + } + return true + }) + } + return fields +} + +// readSinks are the call selectors that ingest a file's content given a path: the +// os reads, io.ReadAll over an opened handle, and the bufio scanner/reader +// constructors. A tainted instruction path flowing into any of these is an ingest. +var readSinks = map[string]bool{ + "ReadFile": true, // os.ReadFile + "Open": true, // os.Open + "ReadAll": true, // io.ReadAll + "NewScanner": true, // bufio.NewScanner + "NewReader": true, // bufio.NewReader +} + +// instructionTaintedNames computes the set of identifier names (params, locals, +// struct-field selectors rendered as `recv.field`, range vars) in fn that hold a +// string derived from an instruction-file path. It seeds from instruction-path +// expressions (a `.md` skill-tree literal/segment, an instructionPathSegment, a +// known instruction ident) and propagates through := / = assignments and string +// conversions to a fixpoint, so a path built and then read in separate statements +// is still tainted at the read. +func instructionTaintedNames(fn *ast.FuncDecl, taintedFields map[string]bool) map[string]bool { + tainted := map[string]bool{} + // Seed: any parameter is a candidate taint carrier only if the CALLER supplies an + // instruction path; within fn we cannot see the caller, so a reader-helper whose + // path arg is a parameter is caught by the param-flow rule below (the parameter is + // tainted when fn itself also references an instruction literal, OR unconditionally + // for a single-string-param helper that reads it — the readSkill(t, path) shape). + // We treat every string parameter as tainted: a helper that ReadFiles a string + // param is, by construction, a path-arg reader (the caller supplies the .md path). + if fn.Type.Params != nil { + for _, field := range fn.Type.Params.List { + if isStringyType(field.Type) { + for _, name := range field.Names { + tainted[name.Name] = true + } + } + } + } + for grew := true; grew; { + grew = false + ast.Inspect(fn, func(n ast.Node) bool { + assign, ok := n.(*ast.AssignStmt) + if !ok { + return true + } + for i, rhs := range assign.Rhs { + if i >= len(assign.Lhs) { + break + } + // A local assigned from an instruction-tainted expr, OR from a read of a + // package-wide instruction-tainted field, carries the taint forward. + if !exprInstructionTainted(rhs, tainted) && !readsTaintedField(rhs, taintedFields) { + continue + } + if name := lvalueName(assign.Lhs[i]); name != "" && !tainted[name] { + tainted[name] = true + grew = true + } + } + return true + }) + } + return tainted +} + +// lvalueName renders an assignable target as a taint-tracking key: a bare ident, or +// a selector `recv.field` (the struct-field path-flow shape). +func lvalueName(e ast.Expr) string { + switch x := e.(type) { + case *ast.Ident: + return x.Name + case *ast.SelectorExpr: + if inner, ok := x.X.(*ast.Ident); ok { + return inner.Name + "." + x.Sel.Name + } + return x.Sel.Name + } + return "" +} + +// exprInstructionTainted reports whether an expression carries an instruction-file +// path taint: it references a tainted name (ident or `recv.field` selector), an +// instruction-path string literal/segment, or a known instruction path ident, +// anywhere in its subtree — so the `+` / strings.Join / filepath.Join / fmt.Sprintf +// path-build idioms (whose tainted operand is a node in the subtree) are covered. +// The over-approximation toward flagging is deliberate. It does NOT cover a taint +// carried in a slice element or recovered via a range variable (M-D) — see +// readsInstructionContent's NOT-covered note and the follow-up +// sweep-guard-reader-axis-invert. +func exprInstructionTainted(expr ast.Expr, tainted map[string]bool) bool { + hit := false + ast.Inspect(expr, func(n ast.Node) bool { + switch x := n.(type) { + case *ast.BasicLit: + if x.Kind == token.STRING && isInstructionPathLiteral(strings.Trim(x.Value, "`\"")) { + hit = true + } + case *ast.Ident: + if tainted[x.Name] { + hit = true + } + case *ast.SelectorExpr: + if inner, ok := x.X.(*ast.Ident); ok && tainted[inner.Name+"."+x.Sel.Name] { + hit = true + } + } + return true + }) + return hit +} + +// fnFiltersInstructionMarkdown reports whether fn's body filters paths by an +// instruction `.md` suffix — the WalkDir-collector signal. A `.md` HasSuffix check +// (or an instruction-`.md` literal) anywhere in a WalkDir helper marks it a +// reader-of-many over the instruction surface. +func fnFiltersInstructionMarkdown(fn *ast.FuncDecl) bool { + hit := false + ast.Inspect(fn, func(n ast.Node) bool { + if lit, ok := n.(*ast.BasicLit); ok && lit.Kind == token.STRING { + if strings.HasSuffix(strings.Trim(lit.Value, "`\""), ".md") { + hit = true + } + } + return true + }) + return hit +} + +// collectCalls walks a function body and returns the set of called function names +// (bare `foo(...)` and selector `pkg.Foo(...)`/`recv.Method(...)` trailing name). +// Used to detect calls to discovered reader helpers and the markNonAC / +// markCodeBoundInvariant declarations. +func collectCalls(fn *ast.FuncDecl) map[string]bool { + calls := map[string]bool{} + ast.Inspect(fn, func(n ast.Node) bool { + if call, ok := n.(*ast.CallExpr); ok { + switch f := call.Fun.(type) { + case *ast.Ident: + calls[f.Name] = true + case *ast.SelectorExpr: + calls[f.Sel.Name] = true + } + } + return true + }) + return calls +} + +// isStringyType reports whether a parameter type node carries a path string: a bare +// `string` (the readSkill(t, path) shape) — the kind a path-arg reader takes. +func isStringyType(t ast.Expr) bool { + id, ok := t.(*ast.Ident) + return ok && id.Name == "string" +} + +// instructionPathSegments are the skill-tree / contract path segments that mark a +// path literal as targeting an instruction file the model ingests (a skill, +// contract, agent, or runtime adapter) rather than a binary-parsed artifact (a +// manifest .json) or a dev-only doc (docs/dev/*.md recipes are NOT an LLM +// instruction surface and are intentionally out of scope — Cycle-2 P1 divergence: +// the sweep scopes to the shipped skill/contract surface, not every `.md` in the +// repo). +// +// This is the RECOGNIZED-instruction-surface predicate (a deliberate bound, not a +// universal one): a path carrying one of these listed segments is an instruction +// path. A path fragment carrying a segment is instruction-tainted even before a +// `.md` suffix is appended, so strings.Join([]string{"…/first-officer-shared-core", +// "md"}, ".") taints on the segment in the base (closing the Cycle-1 +// `.md`-suffix-AND-segment pair a split/Join-built suffix evaded). +// +// KNOWN OUT-OF-SCOPE surfaces (M-A, tracked in sweep-guard-reader-axis-invert, id +// 4qnn7dbzkyh9qv65t618vtxy): a real instruction surface whose path carries NONE of +// these segments — e.g. AGENTS.md or mods/*.md — is not recognized and a read of it +// is not flagged. The follow-up weighs an invert/positive predicate that recognizes +// the instruction surface definitionally rather than by this enumerated list. +var instructionPathSegments = map[string]bool{ + "skills": true, + "references": true, + "agents": true, + "first-officer": true, + "ensign": true, + "commission": true, + "present-gate": true, + "SKILL.md": true, +} + +// isInstructionPathLiteral reports whether a string literal is (a fragment of) an +// instruction-file path: it carries a skill-tree/contract segment. A `.json` +// manifest path or a docs/dev recipe path carries none and is not instruction. +func isInstructionPathLiteral(s string) bool { + if strings.HasSuffix(s, ".json") { + return false + } + for seg := range instructionPathSegments { + if s == seg || strings.Contains(s, seg) { + return true + } + } + return false +} + +// TestSweepDetectsAnUndeclaredTautology is the mutation control for the AC-3 +// sweep itself: the sweep is the AC-3 oracle, so it must be demonstrated to RED on +// the exact shape it polices and GREEN once that shape declares its demotion. +// Without this, the sweep could silently degrade to a no-op (e.g. an ingested-file +// reader renamed out of the map) and pass vacuously. It writes two synthetic test +// files to a temp dir and runs the sweep against it: +// - undeclared: a test that reads an ingested file (foCore) and matches it +// (strings.Contains) but never calls markNonAC -> MUST be flagged. +// - declared: the same shape plus a markNonAC call -> MUST NOT be flagged. +func TestSweepDetectsAnUndeclaredTautology(t *testing.T) { + dir := t.TempDir() + undeclared := `package fixture +import "strings" +func TestUndeclaredFixture(t *T) { + fo := foCore(t) + if strings.Contains(fo, "x") { _ = fo } +} +` + declared := `package fixture +func TestDeclaredFixture(t *T) { + markNonAC(t, "behavioral oracle: live gate-guardrail scenario") + fo := foCore(t) + if strings.Contains(fo, "x") { _ = fo } +} +` + writeFile(t, dir+"/undeclared_test.go", undeclared) + offenders := sweepUndeclaredTautologies(t, dir) + if !containsStr(offenders, "TestUndeclaredFixture") { + t.Fatalf("sweep failed to flag an undeclared presence-check over an ingested file; offenders=%v", offenders) + } + + writeFile(t, dir+"/declared_test.go", declared) + offenders = sweepUndeclaredTautologies(t, dir) + if containsStr(offenders, "TestDeclaredFixture") { + t.Fatalf("sweep wrongly flagged a declared (markNonAC) text-consistency lint; offenders=%v", offenders) + } + if !containsStr(offenders, "TestUndeclaredFixture") { + t.Fatalf("adding a declared fixture must not stop the sweep flagging the undeclared one; offenders=%v", offenders) + } + + // A code-bound invariant (expectation from an independent source) is the other + // valid self-classification and must clear the sweep too. + codeBound := `package fixture +func TestCodeBoundFixture(t *T) { + markCodeBoundInvariant(t, "shared const presentGateSeamName") + fo := foCore(t) + if strings.Contains(fo, presentGateSeamName) { _ = fo } +} +` + writeFile(t, dir+"/codebound_test.go", codeBound) + offenders = sweepUndeclaredTautologies(t, dir) + if containsStr(offenders, "TestCodeBoundFixture") { + t.Fatalf("sweep wrongly flagged a code-bound invariant; offenders=%v", offenders) + } +} + +// TestSweepDetectsEvasionShapes is the planted-control mutation test for the +// reader-discovery evasion shapes the validation audit proved the sweep missed. +// Each case plants a synthetic offender that reaches an instruction file through a +// shape the naive `.md`-literal-in-the-reader detection cannot see, runs the sweep, +// and asserts it REDs (flags the offender); then it plants the declared form and +// asserts it GREENs. A regression that removed a discovery mechanism would let the +// matching case go un-flagged, failing this control. +func TestSweepDetectsEvasionShapes(t *testing.T) { + // Shape 1 — path-arg reader: the helper os.ReadFile's a value built from its own + // path parameter; the `.md` literal lives in the CALLER (the readSkill(t,root,rel) + // shape). readsInstructionPath over the helper body sees no literal, so only + // parameter-flow detection catches it. + pathArg := `package fixture +import ( + "os" + "path/filepath" + "strings" +) +func readArg(t *T, root, rel string) string { + b, _ := os.ReadFile(filepath.Join(root, rel)) + return string(b) +} +func TestPathArgOffender(t *T) { + s := readArg(t, root, "first-officer/references/first-officer-shared-core.md") + if strings.Contains(s, "x") { _ = s } +} +` + assertRedThenGreen(t, "path-arg reader", "TestPathArgOffender", pathArg) + + // Shape 2 — WalkDir collector: the helper WalkDirs a tree collecting `.md` + // paths and RETURNS them; it never os.ReadFile's the `.md` itself. The caller + // reads+matches each returned path (the shippedSkillText shape). + walkDir := `package fixture +import ( + "os" + "path/filepath" + "strings" +) +func walkSkills(t *T, base string) []string { + var out []string + filepath.WalkDir(base, func(p string, d os.DirEntry, err error) error { + if !d.IsDir() && strings.HasSuffix(p, ".md") { out = append(out, p) } + return nil + }) + return out +} +func TestWalkDirOffender(t *T) { + for _, p := range walkSkills(t, root) { + b, _ := os.ReadFile(p) + if strings.Contains(string(b), "x") { _ = b } + } +} +` + assertRedThenGreen(t, "WalkDir collector", "TestWalkDirOffender", walkDir) + + // Shape 3 — split-".md" suffix: the read path is constructed as + // base + "." + "md", so no single literal carries the `.md` suffix. The + // constant-concatenation reconstruction must rejoin it before .md detection. + splitMD := `package fixture +import ( + "os" + "path/filepath" + "strings" +) +func TestSplitSuffixOffender(t *T) { + p := filepath.Join(root, "first-officer", "references", "first-officer-shared-core" + "." + "md") + b, _ := os.ReadFile(p) + if strings.Contains(string(b), "x") { _ = b } +} +` + assertRedThenGreen(t, "split-.md suffix", "TestSplitSuffixOffender", splitMD) + + // Shape 4 — multi-hop transitive helper: a tautology hidden two frames down + // (the test calls wrapHop, which calls readArg, which reads a param path). The + // reader fixpoint must propagate reader-ness up the call chain. This is the + // integration-side guard that the transitive fixpoint stays load-bearing. + multiHop := `package fixture +import ( + "os" + "path/filepath" + "strings" +) +func readArg2(t *T, root, rel string) string { + b, _ := os.ReadFile(filepath.Join(root, rel)) + return string(b) +} +func wrapHop(t *T, root string) string { + return readArg2(t, root, "ensign/references/ensign-shared-core.md") +} +func TestMultiHopOffender(t *T) { + s := wrapHop(t, root) + if strings.Contains(s, "x") { _ = s } +} +` + assertRedThenGreen(t, "multi-hop transitive helper", "TestMultiHopOffender", multiHop) + + // Shape 5 (Cycle-3 M1, match axis) — the ingested bytes are inspected with a + // match idiom OUTSIDE the old matchFuncs allowlist (strings.Index, regexp.Match + // over []byte). The positive rule keys on the READ, so HOW the bytes are inspected + // is irrelevant — the read of foCore alone must flag it regardless of the idiom. + matchIndex := `package fixture +import "strings" +func TestMatchIndexOffender(t *T) { + fo := foCore(t) + if strings.Index(fo, "Skill(skill=\"spacedock:present-gate\")") < 0 { t.Error("x") } +} +` + assertRedThenGreen(t, "match via strings.Index (no Contains)", "TestMatchIndexOffender", matchIndex) + + matchBytesRegexp := `package fixture +import "regexp" +func TestMatchBytesRegexpOffender(t *T) { + fo := foCore(t) + re := regexp.MustCompile("present-gate") + if !re.Match([]byte(fo)) { t.Error("x") } +} +` + assertRedThenGreen(t, "match via regexp.Regexp.Match([]byte)", "TestMatchBytesRegexpOffender", matchBytesRegexp) + + // Shape 6 (Cycle-3 M2, reader axis) — the `.md` path is built with strings.Join, + // not `+`. The base fragment carries an instruction segment so it taints before + // the suffix is appended; the constStringConcat-only-`+` design missed this. + joinPath := `package fixture +import ( + "os" + "strings" +) +func TestJoinPathOffender(t *T) { + base := "../../skills/first-officer/references/first-officer-shared-core" + p := strings.Join([]string{base, "md"}, ".") + b, _ := os.ReadFile(p) + if strings.Index(string(b), "x") < 0 { t.Error("y") } +} +` + assertRedThenGreen(t, "strings.Join-built .md path", "TestJoinPathOffender", joinPath) + + // Shape 7 (Cycle-3 M3, reader axis) — the `.md` path flows through a struct field + // and the read happens via a METHOD on that struct. readsParamPath tracked only + // string params and discovery skipped methods (fn.Recv != nil); taint over the + // field + method discovery must catch it. + structMethod := `package fixture +import ( + "os" + "strings" +) +type fixt struct { path string } +func (f *fixt) read(t *T) string { + b, _ := os.ReadFile(f.path) + return string(b) +} +func TestStructMethodOffender(t *T) { + f := &fixt{path: "skills/first-officer/references/first-officer-shared-core.md"} + s := f.read(t) + if strings.Contains(s, "x") { _ = s } +} +` + assertRedThenGreen(t, "struct-field + method-receiver path flow", "TestStructMethodOffender", structMethod) +} + +// assertRedThenGreen plants fixtureSrc in a fresh temp dir, runs the sweep, and +// requires offenderName to be flagged (RED on the evasion shape). It then rewrites +// the offending test with a markNonAC declaration and requires the offender to +// clear (GREEN once declared). The declared rewrite reuses the fixture verbatim +// with the marker inserted as the test body's first statement. +func assertRedThenGreen(t *testing.T, shape, offenderName, fixtureSrc string) { + t.Helper() + dir := t.TempDir() + writeFile(t, dir+"/evasion_test.go", fixtureSrc) + offenders := sweepUndeclaredTautologies(t, dir) + if !containsStr(offenders, offenderName) { + t.Fatalf("%s: sweep failed to flag the evasion offender %s; offenders=%v", shape, offenderName, offenders) + } + + declaredSrc := strings.Replace( + fixtureSrc, + "func "+offenderName+"(t *T) {", + "func "+offenderName+`(t *T) { + markNonAC(t, "declared evasion-shape fixture")`, + 1, + ) + if declaredSrc == fixtureSrc { + t.Fatalf("%s: could not inject markNonAC into the fixture for %s (signature not found)", shape, offenderName) + } + dir2 := t.TempDir() + writeFile(t, dir2+"/evasion_test.go", declaredSrc) + offenders = sweepUndeclaredTautologies(t, dir2) + if containsStr(offenders, offenderName) { + t.Fatalf("%s: sweep still flagged %s after it declared markNonAC; offenders=%v", shape, offenderName, offenders) + } +} + +func containsStr(in []string, want string) bool { + for _, s := range in { + if s == want { + return true + } + } + return false +} + +func sortedUnique(in []string) []string { + seen := map[string]bool{} + var out []string + for _, s := range in { + if !seen[s] { + seen[s] = true + out = append(out, s) + } + } + // simple insertion sort — the lists are tiny + for i := 1; i < len(out); i++ { + for j := i; j > 0 && out[j-1] > out[j]; j-- { + out[j-1], out[j] = out[j], out[j-1] + } + } + return out +} diff --git a/skills/integration/portability_test.go b/skills/integration/portability_test.go index 6d30eac3..6f88b090 100644 --- a/skills/integration/portability_test.go +++ b/skills/integration/portability_test.go @@ -76,6 +76,7 @@ func isClaudeAdapter(path string) bool { // future scope bug that empties shippedSkillText fails loudly rather than // passing vacuously. func TestShippedSurfaceHasNoHiddenMachineDependency(t *testing.T) { + markNonAC(t, "n/a — pure portability property of the shipped instruction surface (a clean install names no HOME-config/interpreter/plugin-private path). No positive behavioral seam can prove this absence; the empty-walk guard below keeps it from passing vacuously") root := skillsRoot(t) repo := repoRoot(t) files := shippedSkillText(t, root, repo) @@ -128,6 +129,7 @@ func TestShippedSurfaceHasNoHiddenMachineDependency(t *testing.T) { // distinguishes, which would otherwise let TestShippedSurfaceHasNoHiddenMachineDependency // pass for the wrong reason. func TestPortabilityCheckDiscriminatesHostSpecific(t *testing.T) { + markNonAC(t, "n/a — pure portability property: the positive controls for the discriminator in TestShippedSurfaceHasNoHiddenMachineDependency (the Claude-adapter ~/.claude exclusion is load-bearing, the HOME-rooted regex does not false-positive on project-relative .claude/ paths). A property of the shipped surface, not a behavioral claim") root := skillsRoot(t) repo := repoRoot(t) files := shippedSkillText(t, root, repo) diff --git a/skills/integration/present_gate_test.go b/skills/integration/present_gate_test.go index c7b3eb95..51607807 100644 --- a/skills/integration/present_gate_test.go +++ b/skills/integration/present_gate_test.go @@ -55,10 +55,19 @@ func foCore(t *testing.T) string { return vendoredSkillFiles(t)["first-officer/references/first-officer-shared-core.md"] } -// TestGatePresentationPresentInSkill locks AC-1(a): the moved Gate-Presentation -// fingerprints (template + assembly rules) are present in -// skills/present-gate/SKILL.md. +// TestGatePresentationPresentInSkill is a non-AC text-consistency lint: it +// asserts the moved Gate-Presentation fingerprints (template + assembly rules) are +// present in skills/present-gate/SKILL.md — that the prose MOVED here, real +// authoring work. Per the proof policy (f8b257cf) this presence check does NOT +// prove the FO actually renders the gate from the skill: a meaning-inverted skill +// body keeps every fingerprint. The behavior — the FO loads present-gate via +// Skill() and presents the gate without self-approving — is proven by the live +// gate-guardrail scenario (internal/ensigncycle, runClaudeGateGuardrailScenario / +// runCodexGateGuardrailScenario, asserted by assertGateHeld) and its offline +// mutation control TestGateGuardrailNegativeBrokenStateTransition. This lint only +// guards against the fingerprints being dropped or the prose being deleted. func TestGatePresentationPresentInSkill(t *testing.T) { + markNonAC(t, "live gate-guardrail scenario (assertGateHeld) + TestGateGuardrailNegativeBrokenStateTransition") skill := presentGateSkill(t) for name, fp := range gatePresentationFingerprints { if !strings.Contains(skill, fp) { @@ -67,10 +76,15 @@ func TestGatePresentationPresentInSkill(t *testing.T) { } } -// TestAllNineAssemblyRulesPresentInSkill locks AC-2(a): the skill carries all -// nine captain-facing assembly-rule fingerprints — the count is the teeth, a -// dropped rule reds the absence of its fingerprint. +// TestAllNineAssemblyRulesPresentInSkill is a non-AC text-consistency lint: it +// asserts the skill carries all nine captain-facing assembly-rule fingerprints +// (the count is the teeth — a dropped rule reds the absence of its fingerprint). +// Per the proof policy this is text authoring, not behavioral proof: an inverted +// rule body keeps the fingerprint. The behavior that the FO actually FOLLOWS the +// assembly rules when rendering a gate is proven by the live gate-guardrail +// scenario, not this presence check. func TestAllNineAssemblyRulesPresentInSkill(t *testing.T) { + markNonAC(t, "live gate-guardrail scenario (assertGateHeld) + TestGateGuardrailNegativeBrokenStateTransition") skill := presentGateSkill(t) if len(assemblyRuleFingerprints) != 9 { t.Fatalf("expected 9 assembly-rule fingerprints, have %d", len(assemblyRuleFingerprints)) @@ -82,12 +96,16 @@ func TestAllNineAssemblyRulesPresentInSkill(t *testing.T) { } } -// TestGatePresentationAbsentFromFOCore locks AC-1(b): the moved fingerprints are -// NO LONGER present in first-officer-shared-core.md — moved, not duplicated. -// Whole-file (NOT region-scoped): region-scoping an absence check would -// false-pass content that moved elsewhere in the file. Negative-proof: -// re-inlining the block re-introduces a fingerprint and flips this RED. +// TestGatePresentationAbsentFromFOCore is a non-AC text-consistency lint (dedup): +// it asserts the moved fingerprints are NO LONGER present in +// first-officer-shared-core.md — moved, not duplicated. Whole-file (NOT +// region-scoped): region-scoping an absence check would false-pass content that +// moved elsewhere in the file. This is a structural dedup property, not a +// behavioral claim; the FO's gate behavior is proven by the live gate-guardrail +// scenario. The lint guards against the block being re-inlined (which would +// re-introduce a fingerprint and flip this RED). func TestGatePresentationAbsentFromFOCore(t *testing.T) { + markNonAC(t, "dedup lint; behavior via live gate-guardrail scenario (assertGateHeld)") fo := foCore(t) for name, fp := range gatePresentationFingerprints { if strings.Contains(fo, fp) { @@ -105,13 +123,20 @@ func TestGatePresentationAbsentFromFOCore(t *testing.T) { // enum missed. var presentGateAtInclude = regexp.MustCompile(`@(?:\.{1,2}/)*present-gate\b`) -// TestFOCoreInvokesPresentGateSkill locks AC-1(c): the FO core's `## Completion -// and Gates` section invokes the skill via Skill(...) at the gate point and does -// NOT use the spike-disproven cross-skill @-include. Region-scoped to -// `## Completion and Gates` (the positive Skill()-present / @-absent assertions -// only). The Skill(...) literal is the integration seam; any `@`-token resolving -// toward present-gate is the disproven mechanism. +// TestFOCoreInvokesPresentGateSkill is a non-AC text-consistency lint: it asserts +// the FO core's `## Completion and Gates` section carries the +// Skill(skill="spacedock:present-gate") invocation literal and no disproven +// cross-skill @-include. Per the proof policy this presence check does NOT prove +// the FO invokes the skill: a meaning-inverted clause ("NEVER invoke present-gate; +// self-approve silently") keeps the Skill(...) substring and passes (verified in +// ideation — the mutation harness left this GREEN under inversion). The behavior — +// the FO actually loads present-gate and presents the gate without self-approving +// — is proven only by the live gate-guardrail scenario (assertGateHeld) and its +// offline mutation control. This lint guards the seam STRING (so the skill name in +// the contract and the skill's own frontmatter cannot silently drift apart) and +// bans the @-include mechanism; it is the text half, not the behavioral proof. func TestFOCoreInvokesPresentGateSkill(t *testing.T) { + markNonAC(t, "live gate-guardrail scenario (assertGateHeld) + TestGateGuardrailNegativeBrokenStateTransition") fo := foCore(t) region := sectionAfter(fo, "## Completion and Gates") if region == "" { @@ -125,23 +150,40 @@ func TestFOCoreInvokesPresentGateSkill(t *testing.T) { } } -// presentGateLeakageLiterals are spacedock dispatch-helper tokens the -// gate-presentation skill must NOT name — the prose is FO judgment/format, not -// shell wiring. Mirrors the sibling using-claude-team leakage table. -var presentGateLeakageLiterals = []string{ - "spacedock dispatch", - "spacedock status", +// presentGateBannedHelperPrefixes selects, from the code-derived spacedock +// vocabulary, the dispatch/status command PREFIXES the gate-presentation skill +// must not name — its prose is FO judgment/format, not shell wiring. It +// deliberately omits the stage-option keys (the skill legitimately references +// `{feedback-to target}` when describing a bounce-back decision), so it bans only +// the qualified command invocations. +func presentGateBannedHelperPrefixes(t *testing.T) []string { + t.Helper() + var out []string + for _, tok := range spacedockLeakageTokens(t) { + if tok == "spacedock dispatch" || tok == "spacedock status" { + out = append(out, tok) + } + } + return out } -// TestPresentGateSkillFreeOfDispatchHelperLeak locks AC-2 (absence half): the -// gate-presentation skill is free of any spacedock-dispatch-helper token. -// Negative-proof: a `spacedock dispatch`/`spacedock status` token leaking into -// the skill reds this. +// TestPresentGateSkillFreeOfDispatchHelperLeak is a code-bound invariant: the +// gate-presentation skill is free of the binary's `spacedock dispatch` / +// `spacedock status` command prefixes. The expected token set is DERIVED from the +// binary's registered command verbs (spacedockTopLevelCommands), not a literal +// frozen against the skill — so it diverges when a command verb is renamed in +// cli.go, which is what lets this fail as an invariant. A `spacedock dispatch` / +// `spacedock status` token leaking into the skill reds it. func TestPresentGateSkillFreeOfDispatchHelperLeak(t *testing.T) { + markCodeBoundInvariant(t, "spacedockTopLevelCommands (cli.go Use: verbs)") skill := presentGateSkill(t) - for _, banned := range presentGateLeakageLiterals { - if strings.Contains(skill, banned) { - t.Errorf("present-gate SKILL.md leaks spacedock dispatch-helper token %q (gate-presentation prose is FO judgment, not shell wiring)", banned) + banned := presentGateBannedHelperPrefixes(t) + if len(banned) == 0 { + t.Fatal("derived zero command prefixes — the cli.go command surface diverged") + } + for _, b := range banned { + if strings.Contains(skill, b) { + t.Errorf("present-gate SKILL.md leaks spacedock command prefix %q (gate-presentation prose is FO judgment, not shell wiring)", b) } } } @@ -166,32 +208,53 @@ func presentGateFrontmatterValue(t *testing.T, key string) (string, bool) { return "", false } -// TestPresentGateSkillNameMatchesSeam locks AC-2: the frontmatter `name:` VALUE -// equals `present-gate` — the directory name AND the -// `Skill(skill="spacedock:present-gate")` invocation seam. Token-presence alone -// (skill_surface_test.go) would pass a renamed skill that the seam no longer -// reaches; binding the value to the seam target catches that drift. Negative- -// proof: a bogus name value reds this. +// presentGateSeamName is the seam target name the FO contract actually invokes. +// It is read from a DIFFERENT file than the skill under test — the FO shared core, +// the file that drives the FO — so the skill's frontmatter `name:` and the +// contract's `Skill(skill="spacedock:NAME")` invocation have independent sources +// that can diverge. +const presentGateSeamLiteral = "present-gate" + +// TestPresentGateSkillNameMatchesSeam is a code-bound invariant: the skill's +// frontmatter `name:` equals the seam name the FO CONTRACT invokes +// (Skill(skill="spacedock:NAME") in first-officer-shared-core.md). The expected +// value comes from the contract, not from the skill file under test — so renaming +// the skill's frontmatter, or renaming the contract's invocation, makes the two +// diverge and reds this. That is the seam-drift the check exists to catch: a +// renamed skill the FO's invocation no longer reaches. func TestPresentGateSkillNameMatchesSeam(t *testing.T) { + markCodeBoundInvariant(t, "FO contract Skill(skill=\"spacedock:present-gate\") invocation (first-officer-shared-core.md)") name, ok := presentGateFrontmatterValue(t, "name") if !ok { t.Fatal("present-gate SKILL.md frontmatter has no name field") } - if name != "present-gate" { - t.Errorf("present-gate SKILL.md frontmatter name is %q, want %q (the directory name and the Skill(skill=\"spacedock:present-gate\") seam)", name, "present-gate") + seam := invokedSeamName(foCore(t), presentGateSeamLiteral) + if seam == "" { + t.Fatalf("FO contract does not invoke Skill(skill=\"spacedock:%s\") — the seam the skill name must match is gone", presentGateSeamLiteral) + } + if name != seam { + t.Errorf("present-gate SKILL.md frontmatter name is %q, but the FO contract invokes the seam %q — a renamed skill the FO invocation no longer reaches", name, seam) } } -// TestPresentGateSkillIsFOInternal locks AC-2: the frontmatter carries -// `user-invocable: false` — the skill is FO-internal (loaded mid-run via -// Skill()), not a captain-facing user skill. Negative-proof: flipping to `true` -// reds this. +// TestPresentGateSkillIsFOInternal is a code-bound invariant binding the skill's +// `user-invocable` frontmatter to its ROLE in the FO contract: a skill the FO +// invokes mid-run via Skill(skill="spacedock:NAME") is FO-internal and MUST be +// `user-invocable: false`, never a captain-facing user skill. The expected value +// is not a free literal — it is REQUIRED by the contract invoking the seam: the +// presence of the invocation (an independent source) is what makes +// `user-invocable: true` wrong. Flipping the frontmatter to `true` while the FO +// still invokes the seam reds this. func TestPresentGateSkillIsFOInternal(t *testing.T) { + markCodeBoundInvariant(t, "FO contract Skill(skill=\"spacedock:present-gate\") invocation implies FO-internal") + if invokedSeamName(foCore(t), presentGateSeamLiteral) == "" { + t.Fatalf("FO contract does not invoke the present-gate seam — the FO-internal premise no longer holds") + } v, ok := presentGateFrontmatterValue(t, "user-invocable") if !ok { t.Fatal("present-gate SKILL.md frontmatter has no user-invocable field") } if v != "false" { - t.Errorf("present-gate SKILL.md frontmatter user-invocable is %q, want \"false\" (the skill is FO-internal)", v) + t.Errorf("present-gate SKILL.md frontmatter user-invocable is %q, but the FO contract invokes it as a mid-run seam — an FO-internal skill must be user-invocable: false", v) } } diff --git a/skills/integration/reconcile_session_contract_test.go b/skills/integration/reconcile_session_contract_test.go index acdb17ee..3f46129d 100644 --- a/skills/integration/reconcile_session_contract_test.go +++ b/skills/integration/reconcile_session_contract_test.go @@ -61,6 +61,7 @@ func reconcileStep0Region(t *testing.T, text string) string { // anywhere in the file. A paraphrase that drops the precondition (re-inviting the // unsafe bare-fallback) fails this. func TestReconcileStep0RequiresTeamIdentityForRoster(t *testing.T) { + markNonAC(t, "internal/dispatch reconcile_session_test.go (TestReconcileSessionMatchedDiscovery, TestReconcileExplicitTeamNameIgnoresSession, TestReconcileDegradeEmitsGitClasses) — the code gates enforcing the team-identity->roster-class behavior") region := reconcileStep0Region(t, claudeFORuntime(t)) // The region must tie roster reconciliation to a required team identity. @@ -90,6 +91,7 @@ func TestReconcileStep0RequiresTeamIdentityForRoster(t *testing.T) { // to an unsafe heuristic. The bracketed-optional form is exactly the wording the // entity flagged as inviting the bare unsafe invocation. func TestReconcileStep0DropsOptionalTeamNameFraming(t *testing.T) { + markNonAC(t, "internal/dispatch reconcile_session_test.go (TestReconcileExplicitTeamNameIgnoresSession + TestReconcileGateSuppressesEvenWithPopulatedRoster) — the code gates that make bare reconcile git-only") region := reconcileStep0Region(t, claudeFORuntime(t)) if strings.Contains(region, "[--team-name {team_name}]") { t.Errorf("step-0 region still frames --team-name as a bracketed-optional flag, re-inviting the unsafe bare fallback:\n%s", region) diff --git a/skills/integration/ship_local_ceremony_test.go b/skills/integration/ship_local_ceremony_test.go index 496eb5f9..147cbe50 100644 --- a/skills/integration/ship_local_ceremony_test.go +++ b/skills/integration/ship_local_ceremony_test.go @@ -43,6 +43,7 @@ func subsectionAfter(text, heading string) string { // TestMergeLocalNoSentinelTerminalSetSucceeds and siblings), so this lint does // not grep the ceremony prose for force-related wording. func TestShipLocalCeremonyBlockExists(t *testing.T) { + markNonAC(t, "internal/status merge_policy_guard_test.go (TestMergeLocalNoSentinelTerminalSetSucceeds and siblings)") fo := vendoredSkillFiles(t)["first-officer/references/first-officer-shared-core.md"] region := subsectionAfter(fo, "### Ship-Local Ceremony") if region == "" { diff --git a/skills/integration/skill_surface_test.go b/skills/integration/skill_surface_test.go index e8763455..93b8e673 100644 --- a/skills/integration/skill_surface_test.go +++ b/skills/integration/skill_surface_test.go @@ -18,6 +18,7 @@ var userSkills = []string{"commission", "debrief", "refit", "ensign", "first-off // TestUserSkillsPresentWithFrontmatter locks AC-1: each of the five user skills // ships a SKILL.md whose YAML frontmatter declares a `name` and a `description`. func TestUserSkillsPresentWithFrontmatter(t *testing.T) { + markNonAC(t, "n/a — structural config lint (each SKILL.md frontmatter declares name+description); the skill-discovery behavior is exercised by the host loading the surface") root := skillsRoot(t) for _, skill := range userSkills { path := filepath.Join(root, skill, "SKILL.md") @@ -81,6 +82,7 @@ var referenceRe = regexp.MustCompile(`@?(references/[A-Za-z0-9_./-]+\.md)`) // Brace-placeholder template paths (e.g. references/templates/{name}.md) are // resolved against their concrete siblings rather than the literal `{name}`. func TestPiRuntimeAdaptersAreLoadable(t *testing.T) { + markCodeBoundInvariant(t, "os.Stat against the real skill tree (the adapter file must resolve on disk) — an independent filesystem source, not the SKILL.md text") root := skillsRoot(t) cases := []struct { skill string @@ -105,6 +107,7 @@ func TestPiRuntimeAdaptersAreLoadable(t *testing.T) { } func TestPiFirstOfficerRuntimeRequiresFreshSubagentContextForStages(t *testing.T) { + markNonAC(t, "Pi live runner (internal/ensigncycle TestLivePiSubagentEnsignSmoke exercises the Pi subagent dispatch path with fresh context)") root := skillsRoot(t) path := filepath.Join(root, "first-officer", "references", "pi-first-officer-runtime.md") data, err := os.ReadFile(path) @@ -136,6 +139,7 @@ func TestPiFirstOfficerRuntimeRequiresFreshSubagentContextForStages(t *testing.T } func TestPiFirstOfficerRuntimeForbidsSubagentAcceptanceForStages(t *testing.T) { + markNonAC(t, "Pi live runner (internal/ensigncycle TestLivePiSubagentEnsignSmoke exercises the Pi subagent dispatch path)") root := skillsRoot(t) path := filepath.Join(root, "first-officer", "references", "pi-first-officer-runtime.md") data, err := os.ReadFile(path) @@ -167,6 +171,7 @@ func TestPiFirstOfficerRuntimeForbidsSubagentAcceptanceForStages(t *testing.T) { } func TestPiFirstOfficerRuntimeFollowupsAreFreshByDefault(t *testing.T) { + markNonAC(t, "Pi live runner (internal/ensigncycle TestLivePiSubagentEnsignSmoke exercises the Pi subagent dispatch path; fresh-redispatch is the default it drives)") root := skillsRoot(t) path := filepath.Join(root, "first-officer", "references", "pi-first-officer-runtime.md") data, err := os.ReadFile(path) @@ -202,6 +207,7 @@ func TestPiFirstOfficerRuntimeFollowupsAreFreshByDefault(t *testing.T) { } func TestUserSkillReferenceClosureResolves(t *testing.T) { + markCodeBoundInvariant(t, "os.Stat against the real skill tree (every @references/ path must resolve on disk) — an independent filesystem source") root := skillsRoot(t) for _, skill := range userSkills { skillDir := filepath.Join(root, skill) @@ -235,6 +241,7 @@ func TestUserSkillReferenceClosureResolves(t *testing.T) { // The reconciled surface calls `spacedock status`; a blind-copied python-era // path fails here. func TestNoPluginPrivateStatusPathInUserSkills(t *testing.T) { + markNonAC(t, "behavioral coverage: the launcher smoke seam (TestLauncherListSetArchive drives the real `spacedock status` binary) + internal/status/* prove the positive `spacedock status` path; this is the structural-absence complement over the shipped skill surface no positive seam can prove") root := skillsRoot(t) repo := repoRoot(t) banned := []string{ diff --git a/skills/integration/skill_text_test.go b/skills/integration/skill_text_test.go index f7ff15c3..05f59951 100644 --- a/skills/integration/skill_text_test.go +++ b/skills/integration/skill_text_test.go @@ -45,9 +45,14 @@ func vendoredSkillFiles(t *testing.T) map[string]string { return out } -// TestNoPluginStatusPathInVendoredSkills locks AC-1: no file in the vendored -// skill instruction surface references the plugin-private status path. +// TestNoPluginStatusPathInVendoredSkills is a non-AC structural absence lint: no +// file in the vendored skill instruction surface references the plugin-private +// status path or the {spacedock_plugin_dir} token. This is a structural negative +// over the on-disk surface, not a behavioral claim; the real status invocation +// path is exercised by internal/status. The lint catches a blind-copied python-era +// path being re-introduced into the instruction text. func TestNoPluginStatusPathInVendoredSkills(t *testing.T) { + markNonAC(t, "n/a — structural absence over the instruction surface; status behavior via internal/status") for name, content := range vendoredSkillFiles(t) { if strings.Contains(content, "skills/commission/bin/status") { t.Errorf("%s references plugin-private status path 'skills/commission/bin/status'", name) @@ -75,6 +80,7 @@ func TestNoPluginStatusPathInVendoredSkills(t *testing.T) { // NOT bare prose-grep — it asserts a structural negative over the amendments, // not the presence of an instruction clause. func TestNoPRMergeOrModBehaviorIntroduced(t *testing.T) { + markNonAC(t, "n/a — structural absence scope-fence over the amendment regions; merge/dispatch lifecycle behavior via internal/status guards") files := vendoredSkillFiles(t) // The only `## Hook:` text legitimately present is the pre-existing Mod Hook @@ -106,10 +112,14 @@ func TestNoPRMergeOrModBehaviorIntroduced(t *testing.T) { } } -// TestFirstOfficerDispatchDocsUseFlagFileMode locks the dispatch-build -// ergonomics contract: the FO runtime docs must teach file-backed dispatch input -// and runtime-derived host selection, not inline shell JSON. +// TestSkillSurfaceDocumentsSpacedockBinInvariant is a non-AC text-consistency +// lint: the FO/ensign/debrief surface documents the env-aware +// `${SPACEDOCK_BIN:-spacedock}` launcher token. The actual launcher-resolution +// behavior is exercised by the front-door tests in internal/cli (the binary path +// propagation), not by this presence check; this only guards that the documented +// invocation token is not silently dropped from the instruction surface. func TestSkillSurfaceDocumentsSpacedockBinInvariant(t *testing.T) { + markNonAC(t, "internal/cli front-door tests (spacedock binary path propagation)") files := vendoredSkillFiles(t) for _, name := range []string{ "first-officer/references/first-officer-shared-core.md", @@ -123,16 +133,32 @@ func TestSkillSurfaceDocumentsSpacedockBinInvariant(t *testing.T) { } } +// TestFirstOfficerDispatchDocsUseFlagFileMode is a code-bound invariant on the +// dispatch-build ergonomics contract: the FO runtime docs must teach the +// file-backed dispatch-build flags the BINARY actually parses (derived from +// dispatch.go's isBuildRequestFlag via spacedockBuildRequestFlags), not inline +// shell JSON. The required-flag set comes from code, not a literal frozen against +// the docs, so renaming a build flag in the router makes the docs check diverge +// and red. The banned inline-JSON absence and the host-derivation tokens +// (CLAUDECODE / CODEX_THREAD_ID env vars) are documented alongside. func TestFirstOfficerDispatchDocsUseFlagFileMode(t *testing.T) { + markCodeBoundInvariant(t, "spacedockBuildRequestFlags (dispatch.go isBuildRequestFlag)") files := vendoredSkillFiles(t) claude := files["first-officer/references/claude-first-officer-runtime.md"] codex := files["first-officer/references/codex-first-officer-runtime.md"] + // The primary file-backed flags the docs must teach are the intersection of the + // binary's build-request flags with the load-bearing trio the dispatch contract + // names; binding to the code source means a renamed flag reds here. + wantFlags := intersect(spacedockBuildRequestFlags(t), "--entity-path", "--stage", "--checklist-file") + if len(wantFlags) != 3 { + t.Fatalf("the binary no longer defines all of --entity-path/--stage/--checklist-file as build-request flags; derived %v", wantFlags) + } for name, content := range map[string]string{ "claude-first-officer-runtime.md": claude, "codex-first-officer-runtime.md": codex, } { - for _, want := range []string{"--entity-path", "--stage", "--checklist-file"} { + for _, want := range wantFlags { if !strings.Contains(content, want) { t.Errorf("%s primary dispatch docs do not mention %s", name, want) } @@ -219,6 +245,7 @@ func commissionStateBackendDecisionRows(t *testing.T) map[string]string { // which pins the two-row shape and each row's bound outcome rather than prose // wording, and FAILS if a branch is dropped, merged, or rebound to the wrong path. func TestCommissionStateBackendDecisionRule(t *testing.T) { + markNonAC(t, "n/a — structural two-row decision-table self-consistency (no Go scaffolder takes standalone-vs-code-repo and emits frontmatter); the split-root behavior is proven by internal/cli TestStateNewBirthsSplitRoot") rows := commissionStateBackendDecisionRows(t) splitRoot, hasSplit := rows["Split-root"] inline, hasInline := rows["Inline"] diff --git a/skills/integration/spacedock_vocabulary_test.go b/skills/integration/spacedock_vocabulary_test.go new file mode 100644 index 00000000..1984906c --- /dev/null +++ b/skills/integration/spacedock_vocabulary_test.go @@ -0,0 +1,307 @@ +// ABOUTME: Code-derived spacedock CLI vocabulary — the independent source the +// ABOUTME: leakage checks bind to, AST-extracted from the dispatch router + the status stage-option keys. +package integration + +import ( + "go/ast" + "go/parser" + "go/token" + "path/filepath" + "regexp" + "testing" +) + +// skillSeamRe captures the skill name from a `Skill(skill="spacedock:NAME")` +// invocation in an FO/ensign contract — the integration seam the FO actually +// invokes mid-run. The captured NAME is the independent source a skill's +// frontmatter `name:` binds to: if the contract's invocation and the skill's +// declared name drift apart, the seam breaks, and a check comparing the two REDs. +var skillSeamRe = regexp.MustCompile(`Skill\(skill="spacedock:([a-z0-9-]+)"\)`) + +// invokedSeamName returns the skill name the given contract text invokes for the +// expected target. It scans every Skill(skill="spacedock:NAME") invocation and +// returns the matching NAME, or "" if the contract never invokes that seam. The +// expected value is read from the CONTRACT (the file that drives the FO), not from +// the skill file under test, so the two have independent sources that can diverge. +func invokedSeamName(contract, want string) string { + for _, m := range skillSeamRe.FindAllStringSubmatch(contract, -1) { + if m[1] == want { + return m[1] + } + } + return "" +} + +// spacedockDispatchSubcommands AST-extracts the dispatch subcommand names the +// binary actually routes, from the `switch args[0] { case "..." }` in +// internal/dispatch/dispatch.go's Run. This is an independent code-side source: +// the binary parses these as commands, not from any instruction file the model +// reads, and a rename in the router shifts the set — which is exactly what lets a +// leakage check that binds to it diverge from a stale token frozen in a skill. +func spacedockDispatchSubcommands(t *testing.T) []string { + t.Helper() + src := filepath.Join(repoRoot(t), "internal", "dispatch", "dispatch.go") + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, src, nil, 0) + if err != nil { + t.Fatalf("parse dispatch.go: %v", err) + } + var subs []string + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok || fn.Name.Name != "Run" { + return true + } + ast.Inspect(fn, func(m ast.Node) bool { + sw, ok := m.(*ast.SwitchStmt) + if !ok { + return true + } + // Only the subcommand switch (`switch args[0]`) yields command names. + tag, ok := sw.Tag.(*ast.IndexExpr) + if !ok { + return true + } + if id, ok := tag.X.(*ast.Ident); !ok || id.Name != "args" { + return true + } + for _, stmt := range sw.Body.List { + cc, ok := stmt.(*ast.CaseClause) + if !ok { + continue + } + for _, expr := range cc.List { + if lit, ok := expr.(*ast.BasicLit); ok && lit.Kind == token.STRING { + subs = append(subs, trimQuotes(lit.Value)) + } + } + } + return false + }) + return false + }) + if len(subs) == 0 { + t.Fatal("extracted zero dispatch subcommands from dispatch.go — the AST source diverged from the router shape") + } + return subs +} + +// spacedockBuildRequestFlags AST-extracts the dispatch-build request flag names +// the binary accepts, from the `case "--entity-path", ...` in dispatch.go's +// isBuildRequestFlag. These are the real flags the build path parses — an +// independent code-side source for the "docs teach file-backed dispatch input" +// invariant: a flag renamed in code shifts the set, so a docs check binding to it +// tracks the binary's actual flag surface rather than a frozen literal. +func spacedockBuildRequestFlags(t *testing.T) []string { + t.Helper() + src := filepath.Join(repoRoot(t), "internal", "dispatch", "dispatch.go") + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, src, nil, 0) + if err != nil { + t.Fatalf("parse dispatch.go: %v", err) + } + var flags []string + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok || fn.Name.Name != "isBuildRequestFlag" { + return true + } + ast.Inspect(fn, func(m ast.Node) bool { + cc, ok := m.(*ast.CaseClause) + if !ok { + return true + } + for _, e := range cc.List { + if lit, ok := e.(*ast.BasicLit); ok && lit.Kind == token.STRING { + flags = append(flags, trimQuotes(lit.Value)) + } + } + return true + }) + return false + }) + if len(flags) == 0 { + t.Fatal("extracted zero build-request flags from isBuildRequestFlag in dispatch.go") + } + return flags +} + +// spacedockTopLevelCommands AST-extracts the binary's top-level command names +// from the `Use: " ..."` fields of the cobra commands in internal/cli/cli.go. +// The first word of each Use string is the command verb (`status`, `dispatch`, +// `claude`, ...). This is the independent source for the `spacedock dispatch` / +// `spacedock status` leakage prefixes: the binary registers these verbs, and a +// rename in cli.go shifts the set, so a leakage check that binds to it tracks the +// real command surface rather than a frozen literal. +func spacedockTopLevelCommands(t *testing.T) []string { + t.Helper() + src := filepath.Join(repoRoot(t), "internal", "cli", "cli.go") + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, src, nil, 0) + if err != nil { + t.Fatalf("parse cli.go: %v", err) + } + var cmds []string + ast.Inspect(f, func(n ast.Node) bool { + kv, ok := n.(*ast.KeyValueExpr) + if !ok { + return true + } + key, ok := kv.Key.(*ast.Ident) + if !ok || key.Name != "Use" { + return true + } + lit, ok := kv.Value.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + use := trimQuotes(lit.Value) + if i := indexSpace(use); i >= 0 { + use = use[:i] + } + if use != "" && use != "spacedock" { + cmds = append(cmds, use) + } + return true + }) + if len(cmds) == 0 { + t.Fatal("extracted zero top-level commands from cli.go Use: fields") + } + return cmds +} + +func indexSpace(s string) int { + for i := 0; i < len(s); i++ { + if s[i] == ' ' { + return i + } + } + return -1 +} + +// spacedockStageOptionKeys AST-extracts the stage-option keys the binary parses, +// from the `[]string{"feedback-to", ...}` literal in internal/status/stages.go. +// These are real frontmatter keys the binary reads, not file prose — an +// independent source that shifts when a key is renamed in code. +func spacedockStageOptionKeys(t *testing.T) []string { + t.Helper() + src := filepath.Join(repoRoot(t), "internal", "status", "stages.go") + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, src, nil, 0) + if err != nil { + t.Fatalf("parse stages.go: %v", err) + } + var keys []string + ast.Inspect(f, func(n ast.Node) bool { + cl, ok := n.(*ast.CompositeLit) + if !ok { + return true + } + arr, ok := cl.Type.(*ast.ArrayType) + if !ok { + return true + } + if id, ok := arr.Elt.(*ast.Ident); !ok || id.Name != "string" { + return true + } + var lits []string + for _, e := range cl.Elts { + lit, ok := e.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + lits = append(lits, trimQuotes(lit.Value)) + } + // The stage-option list is the one containing "feedback-to" — pin to that + // literal so an unrelated []string{...} does not contribute. + for _, l := range lits { + if l == "feedback-to" { + keys = lits + return false + } + } + return true + }) + if len(keys) == 0 { + t.Fatal("did not find the stage-option-keys []string{...} (feedback-to) in stages.go") + } + return keys +} + +// spacedockLeakageTokens is the canonical spacedock-specific token set the +// host-neutral / generic-skill bodies must NOT name. It is DERIVED from code (the +// dispatch router's subcommand surface + the status stage-option keys), not a +// literal frozen in a test, so the set and any skill body can diverge — that +// divergence is what makes a leakage check able to fail as an invariant rather +// than as a self-match. +// +// Only spacedock-SPECIFIC forms are included, so a generic-prose word never +// false-fires: the bare `spacedock dispatch` / `spacedock status` prefixes; the +// dispatch helper subcommands QUALIFIED with their `spacedock dispatch ` prefix +// (so a bare English `reconcile`/`build` in event-loop prose is fine, only the +// qualified invocation leaks); and the hyphenated stage-option keys (e.g. +// `feedback-to`), which are spacedock frontmatter vocabulary, not generic words — +// the single-word stage keys (agent/fresh/model) are excluded because they appear +// in legitimate generic prose. +func spacedockLeakageTokens(t *testing.T) []string { + t.Helper() + var tokens []string + // The leak-prone top-level prefixes: dispatch + status, derived from the + // binary's registered command verbs (not a frozen literal). + for _, cmd := range spacedockTopLevelCommands(t) { + if cmd == "dispatch" || cmd == "status" { + tokens = append(tokens, "spacedock "+cmd) + } + } + for _, sub := range spacedockDispatchSubcommands(t) { + // build/show-stage-def are the host-neutral surface a skill may name; the + // Claude-coupled helper subcommands are the leak-prone ones — banned only in + // their qualified `spacedock dispatch ` form so a generic English word + // (a bare `reconcile`) does not false-fire. + switch sub { + case "build", "show-stage-def": + default: + tokens = append(tokens, "spacedock dispatch "+sub) + } + } + for _, k := range spacedockStageOptionKeys(t) { + // Only the hyphenated, spacedock-specific stage keys (feedback-to) are + // leak-prone as a bare token; single-word keys appear in generic prose. + if isHyphenated(k) { + tokens = append(tokens, k) + } + } + return tokens +} + +func isHyphenated(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] == '-' { + return true + } + } + return false +} + +// intersect returns the want values that appear in have — used to derive the +// load-bearing subset of a code-extracted set without hardcoding the full set. +func intersect(have []string, want ...string) []string { + present := map[string]bool{} + for _, h := range have { + present[h] = true + } + var out []string + for _, w := range want { + if present[w] { + out = append(out, w) + } + } + return out +} + +func trimQuotes(s string) string { + if len(s) >= 2 && (s[0] == '"' || s[0] == '`') { + return s[1 : len(s)-1] + } + return s +} diff --git a/skills/integration/terminal_teardown_retry_test.go b/skills/integration/terminal_teardown_retry_test.go index 6c9ea832..b632e48a 100644 --- a/skills/integration/terminal_teardown_retry_test.go +++ b/skills/integration/terminal_teardown_retry_test.go @@ -49,6 +49,7 @@ const terminalTeardownMarker = "TERMINAL_TEARDOWN_BOUNDED: best-effort teardown // is the live-e2e CI run (AC-1). The lint guards against the clause being dropped // or re-inverted in a future edit. func TestTerminalTeardownIsBoundedBestEffort(t *testing.T) { + markNonAC(t, "internal/ensigncycle teardown-grade drive (#285): TestTerminalTeardownGradePassesOnMarkerEmission + TestTerminalTeardownGradeFailsWhenMarkerNeverEmitted (expectTerminalTeardownGrade over real/synthetic streams) + the live-e2e CI run (AC-1)") files := vendoredSkillFiles(t) // negatingPhrases are the inversion fingerprints. Two groups: @@ -286,9 +287,19 @@ func numberedStep(region string, n int) string { // HONEST CEILING: prose lints are inherently reword-evadable — a sufficiently // novel paraphrase of "you may tear down early" that uses none of the permission // cues above will still pass. This lint raises the bar to "the common affirmative -// re-intros red AND the ban's positive framing is pinned"; the BEHAVIORAL oracle -// for the ban surviving is the live-e2e run (AC-1), not this structural lint. +// re-intros red AND the ban's positive framing is pinned"; it is NOT the +// behavioral proof. +// +// Behavioral coverage: every live team scenario (gate-guardrail, rejection-flow) +// dispatches a real ensign and awaits its completion, so a premature +// pre-completion TeamDelete would break those runs — the ban is exercised +// implicitly. There is NO dedicated mutation-controlled drive that ASSERTS the +// pre-completion-TeamDelete ban specifically (distinct from the terminal-teardown +// HANG covered by the #285 teardown-grade and TestSonnetTeamDeleteHangReplay). +// That dedicated drive is OWED and flagged to team-lead for a follow-up task; it +// is NOT silently capped here. func TestAwaitingCompletionStillBansPreCompletionTeamDelete(t *testing.T) { + markNonAC(t, "OWED dedicated drive (flagged to team-lead, follow-up TBD): the pre-completion-TeamDelete ban. Implicit today: every live team scenario (gate-guardrail/rejection-flow) awaits a real ensign completion, so a premature teardown breaks the run") skill := usingClaudeTeamSkill(t) region := sectionAfter(skill, "## Awaiting Completion") if region == "" { diff --git a/skills/integration/using_claude_team_test.go b/skills/integration/using_claude_team_test.go index 40b71646..b9ffbdf8 100644 --- a/skills/integration/using_claude_team_test.go +++ b/skills/integration/using_claude_team_test.go @@ -48,10 +48,16 @@ func foRuntime(t *testing.T) string { return vendoredSkillFiles(t)["first-officer/references/claude-first-officer-runtime.md"] } -// TestGenericBlocksPresentInSkill locks AC-1(a): each of the four generic -// team-lifecycle blocks is present in skills/using-claude-team/SKILL.md, keyed -// by its unique fingerprint literal. +// TestGenericBlocksPresentInSkill is a non-AC text-consistency lint: each of the +// four generic team-lifecycle blocks is present in +// skills/using-claude-team/SKILL.md (the blocks MOVED here). Per the proof policy +// this is text authoring, not behavioral proof; the behavior these blocks govern +// (the FO creates a team, dispatches workers, awaits completion, tears down) is +// exercised for real by every team-using live scenario (the gate-guardrail and +// rejection-flow runs both launch a real team via the FO runtime). This lint +// guards against a block fingerprint being dropped. func TestGenericBlocksPresentInSkill(t *testing.T) { + markNonAC(t, "live team-using scenarios (gate-guardrail/rejection-flow launch a real team)") skill := usingClaudeTeamSkill(t) for block, fp := range genericBlockFingerprints { if !strings.Contains(skill, fp) { @@ -60,13 +66,15 @@ func TestGenericBlocksPresentInSkill(t *testing.T) { } } -// TestGenericBlocksAbsentFromFORuntime locks AC-1(b): the four generic -// fingerprints are NO LONGER present in claude-first-officer-runtime.md — the -// blocks moved, not duplicated. Whole-file (NOT region-scoped): region-scoping a -// generic-absence check would false-pass content that moved elsewhere in the -// file. Negative-proof: re-inlining any block re-introduces its fingerprint and -// flips this RED. +// TestGenericBlocksAbsentFromFORuntime is a non-AC text-consistency lint (dedup): +// the four generic fingerprints are NO LONGER present in +// claude-first-officer-runtime.md — the blocks moved, not duplicated. Whole-file +// (NOT region-scoped) so content that moved elsewhere does not false-pass. This is +// a structural dedup property, not a behavioral claim; the team behavior is proven +// by the live team-using scenarios. Re-inlining any block re-introduces its +// fingerprint and flips this RED. func TestGenericBlocksAbsentFromFORuntime(t *testing.T) { + markNonAC(t, "dedup lint; behavior via live team-using scenarios") fo := foRuntime(t) for block, fp := range genericBlockFingerprints { if strings.Contains(fo, fp) { @@ -75,13 +83,16 @@ func TestGenericBlocksAbsentFromFORuntime(t *testing.T) { } } -// TestFORuntimeInvokesSkill locks AC-1(c): the FO runtime's `## Team Creation` -// section invokes the skill via Skill(...) and does NOT use the disproven -// cross-skill @-include. Region-scoped to `## Team Creation` (the positive -// Skill()-present / @-absent assertions only — the region legitimately retains -// the standing-teammate subsections). The Skill(...) literal is the integration -// seam; the @-form is the spike-disproven mechanism. +// TestFORuntimeInvokesSkill is a non-AC text-consistency lint: it asserts the FO +// runtime's `## Team Creation` section carries the +// Skill(skill="spacedock:using-claude-team") invocation literal and no disproven +// cross-skill @-include. Per the proof policy this presence check does NOT prove +// the FO invokes the skill: an inverted clause keeps the substring. The behavior — +// the FO loads the team-harness discipline and runs a real team — is exercised by +// every team-using live scenario. This lint guards the seam STRING and bans the +// @-include mechanism; it is the text half, not the behavioral proof. func TestFORuntimeInvokesSkill(t *testing.T) { + markNonAC(t, "live team-using scenarios (gate-guardrail/rejection-flow launch a real team)") fo := foRuntime(t) region := sectionAfter(fo, "## Team Creation") if region == "" { @@ -97,11 +108,14 @@ func TestFORuntimeInvokesSkill(t *testing.T) { } } -// TestFORuntimeDroppedMaterially locks AC-1(d): claude-first-officer-runtime.md -// line count dropped by at least foRuntimeLineDropFloor vs the pre-change -// baseline. Secondary signal to the fingerprint-absence teeth above — the floor -// catches a no-op edit, the fingerprints catch a duplicate-not-moved edit. +// TestFORuntimeDroppedMaterially is a non-AC structural lint: it asserts +// claude-first-officer-runtime.md dropped at least foRuntimeLineDropFloor lines vs +// a hardcoded pre-change baseline. This is a drift-prone numeric floor (a +// secondary signal to the fingerprint-absence dedup lints), not a behavioral +// claim; no behavior depends on the exact line count. Kept as a sanity check that +// the extraction actually removed bulk, not as proof of any AC. func TestFORuntimeDroppedMaterially(t *testing.T) { + markNonAC(t, "n/a — structural line-count floor, no behavior to drive") fo := foRuntime(t) lines := strings.Count(fo, "\n") if fo != "" && !strings.HasSuffix(fo, "\n") { @@ -114,47 +128,66 @@ func TestFORuntimeDroppedMaterially(t *testing.T) { } } -// skillLeakageLiterals are spacedock-specific tokens the generic team-harness -// skill must NOT name. The qualified `spacedock dispatch` covers every -// dispatch-helper leak (build / reconcile / context-budget) in one. Mirrors the -// sibling devLeakageLiterals table. Bare `reconcile` is deliberately NOT here — -// it appears in legitimate generic event-loop/backstop prose and would -// false-fire. -var skillLeakageLiterals = []string{ - "spacedock dispatch", - "spacedock status", - "feedback-to", - "context-budget", -} - -// TestSkillFreeOfSpacedockTokens locks AC-2 (absence half): the generic skill is -// free of spacedock-specific tokens. Negative-proof: a `spacedock dispatch` -// token leaking into the skill reds this. +// TestSkillFreeOfSpacedockTokens is a code-bound invariant: the generic +// team-harness skill is free of the spacedock-specific vocabulary. The banned set +// is DERIVED from code (spacedockLeakageTokens: the dispatch router's helper +// subcommands qualified with their `spacedock dispatch ` prefix, the +// `spacedock dispatch`/`spacedock status` command prefixes from cli.go, and the +// hyphenated stage-option keys like `feedback-to`), not a literal frozen against +// the skill — so the set shifts when the binary's command surface changes, which +// is what lets this fail as an invariant. A spacedock token leaking into the +// skill reds it. The qualified `spacedock dispatch ` forms mean a bare +// English word (a generic `reconcile` in event-loop prose) never false-fires. func TestSkillFreeOfSpacedockTokens(t *testing.T) { + markCodeBoundInvariant(t, "spacedockLeakageTokens (dispatch router + cli.go verbs + status stage keys)") skill := usingClaudeTeamSkill(t) - for _, banned := range skillLeakageLiterals { - if strings.Contains(skill, banned) { - t.Errorf("using-claude-team SKILL.md leaks spacedock-specific token %q (must stay team-harness-generic)", banned) + banned := spacedockLeakageTokens(t) + if len(banned) == 0 { + t.Fatal("derived zero leakage tokens — the code-side vocabulary source diverged") + } + for _, b := range banned { + if strings.Contains(skill, b) { + t.Errorf("using-claude-team SKILL.md leaks spacedock-specific token %q (must stay team-harness-generic)", b) } } } -// TestSpacedockDecisionsStayInFORuntime locks AC-2 (presence half): the -// spacedock decision points REMAIN in the FO contract. The positive anchors are -// the QUALIFIED dispatch-helper calls — `spacedock dispatch build`, -// `spacedock dispatch context-budget`, and the fully-qualified -// `spacedock dispatch reconcile` (NOT bare `reconcile`, which is ×4 incl. -// generic prose). Negative-proof: the qualified build/context-budget/reconcile -// call wrongly moved out of the FO contract reds this. +// TestSpacedockDecisionsStayInFORuntime is a code-bound invariant: the spacedock +// decision points REMAIN in the FO contract. The required anchors are the +// QUALIFIED dispatch-helper invocations DERIVED from the dispatch router +// (`spacedock dispatch build`, `spacedock dispatch context-budget`, +// `spacedock dispatch reconcile`) rather than literals frozen against the file — +// the binary defines these subcommands, so the anchor set tracks the real command +// surface and a subcommand renamed in code shifts the expectation. A qualified +// decision call wrongly moved out of the FO contract reds this. func TestSpacedockDecisionsStayInFORuntime(t *testing.T) { + markCodeBoundInvariant(t, "spacedockDispatchSubcommands (dispatch.go router)") fo := foRuntime(t) - for _, anchor := range []string{ - "spacedock dispatch build", - "spacedock dispatch context-budget", - "spacedock dispatch reconcile", - } { + subs := spacedockDispatchSubcommands(t) + required := spacedockQualified(subs, "build", "context-budget", "reconcile") + if len(required) != 3 { + t.Fatalf("expected build/context-budget/reconcile in the dispatch router, derived %v from %v", required, subs) + } + for _, anchor := range required { if !strings.Contains(fo, anchor) { t.Errorf("claude-first-officer-runtime.md no longer contains spacedock decision anchor %q (must stay in the FO contract)", anchor) } } } + +// spacedockQualified returns `spacedock dispatch ` for each `want` that the +// router actually exposes in subs — so a decision anchor cannot name a subcommand +// the binary does not route, and a renamed subcommand drops out of the set. +func spacedockQualified(subs []string, want ...string) []string { + have := map[string]bool{} + for _, s := range subs { + have[s] = true + } + var out []string + for _, w := range want { + if have[w] { + out = append(out, "spacedock dispatch "+w) + } + } + return out +} diff --git a/skills/integration/working_principles_test.go b/skills/integration/working_principles_test.go index 8f82295c..222a5f2a 100644 --- a/skills/integration/working_principles_test.go +++ b/skills/integration/working_principles_test.go @@ -40,6 +40,7 @@ func shippedInstructionFiles(t *testing.T) map[string]string { // instructions a clean-room contributor reads. The check is case-insensitive so // "Oracle"/"ORACLE" cannot sneak through. func TestShippedInstructionsCarryNoInsiderJargon(t *testing.T) { + markNonAC(t, "n/a — the claim is about the shipped instruction text (plain language, no insider jargon); banned-token absence lint with no behavior to drive") bannedJargon := []string{"oracle"} for label, content := range shippedInstructionFiles(t) { lower := strings.ToLower(content) @@ -59,6 +60,7 @@ func TestShippedInstructionsCarryNoInsiderJargon(t *testing.T) { // wording is doc review, not a Go assertion, and a paraphrase that keeps the // heading is not something this lint should pass or fail on. func TestFOContractCarriesWorkingPrinciplesSection(t *testing.T) { + markNonAC(t, "n/a — the claim is about the contract text (a structural section-heading anchor); no behavior to drive") fo := shippedInstructionFiles(t)["FO contract (first-officer-shared-core.md)"] if !strings.Contains(fo, "## Working Principles") { t.Errorf("FO contract missing the `## Working Principles` section heading")