From 74245d7df2b60d31a9749e8a865f6e850e18c6d1 Mon Sep 17 00:00:00 2001 From: CL Kao Date: Thu, 4 Jun 2026 08:05:22 -0700 Subject: [PATCH 1/2] Lift Gate Presentation into a lazy spacedock:present-gate skill Move the entire `## Gate Presentation` block (the captain-facing gate-review template + nine assembly rules) out of first-officer-shared-core.md into a new FO-internal `skills/present-gate/SKILL.md`, loaded lazily via `Skill(skill="spacedock:present-gate")` at the gate point instead of riding the eager FO boot read. The always-on decide-to-gate / AC-cross-check / self-approve logic stays in `## Completion and Gates`; only the rendering moves. The moved block is byte-identical to origin/next lines 158-191. Add skills/integration/present_gate_test.go (AC-1/AC-2): present-in-skill + absent-from-core fingerprints, all nine assembly-rule fingerprints, Skill()-seam present / @-include absent region-scoped to `## Completion and Gates`, and no dispatch-helper leak in the skill. Each negative half demonstrated by real mutation. Add "present-gate" to userSkills so the frontmatter + reference-closure oracles cover it. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../references/first-officer-shared-core.md | 37 +---- skills/integration/present_gate_test.go | 139 ++++++++++++++++++ skills/integration/skill_surface_test.go | 2 +- skills/present-gate/SKILL.md | 44 ++++++ 4 files changed, 185 insertions(+), 37 deletions(-) create mode 100644 skills/integration/present_gate_test.go create mode 100644 skills/present-gate/SKILL.md diff --git a/skills/first-officer/references/first-officer-shared-core.md b/skills/first-officer/references/first-officer-shared-core.md index 389c4a2b..1d2b32a3 100644 --- a/skills/first-officer/references/first-officer-shared-core.md +++ b/skills/first-officer/references/first-officer-shared-core.md @@ -149,47 +149,12 @@ SendMessage(to="{agent}-{slug}-{completed_stage}", message="Advancing to next st If the stage is gated: - never self-approve -- present the stage report per `## Gate Presentation` below +- present the stage report by invoking `Skill(skill="spacedock:present-gate")` and following its template + assembly rules - keep the worker alive while waiting at the gate - on a feedback gate recommending `REJECTED`, auto-bounce into the feedback rejection flow instead of waiting for manual review - on captain reject at a `feedback-to` stage, enter the Feedback Rejection Flow (priority over generic rejection) - on captain approve to a non-terminal next stage, apply the reuse conditions. On reuse: keep the agent and SendMessage the next stage. On fresh: shut down the agent and any kept-alive `feedback-to` target the next stage does not need. -## Gate Presentation - -Present gate reviews in this format: - -``` -Gate review: {entity title} — {stage} -Chosen direction: {one-line summary of the ensign's chosen approach, or `n/a` for stages without a chosen-direction concept (e.g., simple work stages, merge)} -Recommend {approve | reject: {one-line reason}}. - -Checklist (from ## Stage Report in {entity_file_path} lines {start}-{end}): -- DONE: {≤10-word gist of item} -- SKIPPED: {gist} — {one-line reason} -- FAILED: {gist} — {one-line reason} - -{If reviewer findings exist, render them under a `Reviewer findings` heading in two tiers — `Material:` (fact-corrections, contract violations, missing AC evidence, broken claims) and `Polish:` (wording, format drift, non-blocking suggestions). Drop the tier entirely if it has no items. If no reviewer ran, omit this whole block.} - -Assessment: {N} done, {N} skipped, {N} failed. - -Decision: {one-line decision prompt naming what approval/rejection does in concrete terms — e.g., "approve to enter implementation in worktree `.worktrees/...`" or "reject to bounce back to {feedback-to target} with the material findings above"}. -``` - -### Captain-facing assembly rules - -The template is the floor, not the ceiling. The FO MUST hold to the following discipline when filling it: - -1. **Lede first, decision last, nothing between them buried.** The first three lines (title, chosen direction, recommend) and the final line (decision) are the spine. Everything else is supporting evidence; if the captain stops reading after line three, they can still vote. -2. **Chosen direction is required as FO prose.** When the stage selected among options (ideation picks an approach, validation picks PASS/REJECTED), name it on the `Chosen direction:` line; don't make the captain infer from the Checklist gist or open the entity file. For stages without a chosen direction, use `n/a`. -3. **Cite the Stage Report; render a one-line gist roll-up.** Do not paste it into the gate message. Under `Checklist:`, render one bullet per DONE/SKIPPED/FAILED item as a verb-noun gist (≤10 words, FO paraphrase, no new facts). For SKIPPED/FAILED, append `— {one-line reason}`. Cite the full report by file path and line range. If a reviewer Material finding directly questions a checklist item's evidence, inline that item's evidence paragraph under the finding so the captain can decide without opening the file. Otherwise no Stage Report content appears. -4. **Reviewer findings render in priority tiers.** Group into `Material:` (fact-corrections, contract violations, missing AC evidence, claims contradicted by the codebase) and `Polish:` (wording, format drift, non-blocking suggestions). Drop empty tiers. Do not flat-bullet material next to polish. -5. **Recommendation appears exactly once.** The `Recommend {approve | reject: {reason}}` line is the only place the FO states its verdict. Do not duplicate it elsewhere or re-explain it in an enumerated list. -6. **Bounce-back recommendations name the concrete asks.** If recommending reject, the reason line names the specific concerns by content, not by reference. Bad: "address the reviewer's five concrete notes." Good: "tighten AC-2 substring assertion; correct the file X claim; cut the format-pedantry aside." -7. **No format-pedantry asides.** Format drift (`1./2./3./4.` instead of `**AC-N**`, missing trailing period) is not load-bearing for a gate decision. Surface only if it blocks the gate; if it does, it is a Material finding, not a separate paragraph. -8. **One sentence of worktree heads-up when approval changes worktree state.** When approving opens or closes a worktree, the Decision line names it: "approve to enter implementation in worktree `.worktrees/{worker_key}-{slug}`". One sentence, not a section. -9. **Target length: 15-25 lines of FO-authored prose.** The full gate message should fit in 15-25 lines. If it exceeds 25, the FO is over-narrating; cut. - ## Feedback Rejection Flow When a feedback stage recommends REJECTED: diff --git a/skills/integration/present_gate_test.go b/skills/integration/present_gate_test.go new file mode 100644 index 00000000..784ac00f --- /dev/null +++ b/skills/integration/present_gate_test.go @@ -0,0 +1,139 @@ +// ABOUTME: AC-1/AC-2 oracles for the present-gate extraction — the Gate +// ABOUTME: Presentation template + nine assembly rules live in the skill, not the FO core, with a Skill() seam. +package integration + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// assemblyRuleFingerprints uniquely identifies each of the nine captain-facing +// assembly rules moved into the present-gate skill. Each literal is unique-1 in +// the pre-change first-officer-shared-core.md (verified during ideation). The +// count is the teeth of AC-2: a dropped rule reds the absence of its +// fingerprint in the skill. +var assemblyRuleFingerprints = map[string]string{ + "lede-first/decision-last": "Lede first, decision last", + "chosen-direction-required": "Chosen direction is required as FO prose", + "cite-the-report": "Cite the Stage Report; render a one-line gist roll-up", + "reviewer-findings-in-tiers": "Reviewer findings render in priority tiers", + "recommendation-appears-once": "Recommendation appears exactly once", + "bounce-back-names-asks": "Bounce-back recommendations name the concrete asks", + "no-format-pedantry-asides": "No format-pedantry asides", + "one-sentence-worktree-heads-up": "One sentence of worktree heads-up when approval changes worktree state", + "target-15-25-lines": "Target length: 15-25 lines", +} + +// gatePresentationFingerprints identifies the moved Gate-Presentation content: +// the format template plus a representative subset of the assembly rules. AC-1(a) +// asserts presence in the skill; AC-1(b) asserts absence from the FO core. +var gatePresentationFingerprints = map[string]string{ + "template": "Gate review: {entity title}", + "lede-first/decision-last": "Lede first, decision last", + "chosen-direction-required": "Chosen direction is required as FO prose", + "no-format-pedantry-asides": "No format-pedantry asides", + "target-15-25-lines": "Target length: 15-25 lines", +} + +// presentGateSkill reads the new skill body under test. +func presentGateSkill(t *testing.T) string { + t.Helper() + p := filepath.Join(skillsRoot(t), "present-gate", "SKILL.md") + b, err := os.ReadFile(p) + if err != nil { + t.Fatalf("read present-gate SKILL.md: %v", err) + } + return string(b) +} + +// foCore reads the FO shared-core contract under test. +func foCore(t *testing.T) string { + t.Helper() + return vendoredSkillFiles(t)["first-officer/references/first-officer-shared-core.md"] +} + +// TestGatePresentationPresentInSkill locks AC-1(a): the moved Gate-Presentation +// fingerprints (template + assembly rules) are present in +// skills/present-gate/SKILL.md. +func TestGatePresentationPresentInSkill(t *testing.T) { + skill := presentGateSkill(t) + for name, fp := range gatePresentationFingerprints { + if !strings.Contains(skill, fp) { + t.Errorf("present-gate SKILL.md missing %s fingerprint %q", name, fp) + } + } +} + +// TestAllNineAssemblyRulesPresentInSkill locks AC-2(a): the skill carries all +// nine captain-facing assembly-rule fingerprints — the count is the teeth, a +// dropped rule reds the absence of its fingerprint. +func TestAllNineAssemblyRulesPresentInSkill(t *testing.T) { + skill := presentGateSkill(t) + if len(assemblyRuleFingerprints) != 9 { + t.Fatalf("expected 9 assembly-rule fingerprints, have %d", len(assemblyRuleFingerprints)) + } + for name, fp := range assemblyRuleFingerprints { + if !strings.Contains(skill, fp) { + t.Errorf("present-gate SKILL.md missing assembly rule %s fingerprint %q", name, fp) + } + } +} + +// TestGatePresentationAbsentFromFOCore locks AC-1(b): the moved fingerprints are +// NO LONGER present in first-officer-shared-core.md — moved, not duplicated. +// Whole-file (NOT region-scoped): region-scoping an absence check would +// false-pass content that moved elsewhere in the file. Negative-proof: +// re-inlining the block re-introduces a fingerprint and flips this RED. +func TestGatePresentationAbsentFromFOCore(t *testing.T) { + fo := foCore(t) + for name, fp := range gatePresentationFingerprints { + if strings.Contains(fo, fp) { + t.Errorf("first-officer-shared-core.md still inlines %s fingerprint %q (moved, not duplicated)", name, fp) + } + } +} + +// TestFOCoreInvokesPresentGateSkill locks AC-1(c): the FO core's `## Completion +// and Gates` section invokes the skill via Skill(...) at the gate point and does +// NOT use the spike-disproven cross-skill @-include. Region-scoped to +// `## Completion and Gates` (the positive Skill()-present / @-absent assertions +// only). The Skill(...) literal is the integration seam; the @-form is the +// disproven mechanism. +func TestFOCoreInvokesPresentGateSkill(t *testing.T) { + fo := foCore(t) + region := sectionAfter(fo, "## Completion and Gates") + if region == "" { + t.Fatal("first-officer-shared-core.md has no `## Completion and Gates` section") + } + if !strings.Contains(region, `Skill(skill="spacedock:present-gate")`) { + t.Errorf("`## Completion and Gates` section does not invoke Skill(skill=\"spacedock:present-gate\")") + } + for _, banned := range []string{"@../present-gate", "@present-gate"} { + if strings.Contains(region, banned) { + t.Errorf("`## Completion and Gates` section uses the disproven cross-skill @-include %q", banned) + } + } +} + +// presentGateLeakageLiterals are spacedock dispatch-helper tokens the +// gate-presentation skill must NOT name — the prose is FO judgment/format, not +// shell wiring. Mirrors the sibling using-claude-team leakage table. +var presentGateLeakageLiterals = []string{ + "spacedock dispatch", + "spacedock status", +} + +// TestPresentGateSkillFreeOfDispatchHelperLeak locks AC-2 (absence half): the +// gate-presentation skill is free of any spacedock-dispatch-helper token. +// Negative-proof: a `spacedock dispatch`/`spacedock status` token leaking into +// the skill reds this. +func TestPresentGateSkillFreeOfDispatchHelperLeak(t *testing.T) { + skill := presentGateSkill(t) + for _, banned := range presentGateLeakageLiterals { + if strings.Contains(skill, banned) { + t.Errorf("present-gate SKILL.md leaks spacedock dispatch-helper token %q (gate-presentation prose is FO judgment, not shell wiring)", banned) + } + } +} diff --git a/skills/integration/skill_surface_test.go b/skills/integration/skill_surface_test.go index 0024a2b8..1896c4e1 100644 --- a/skills/integration/skill_surface_test.go +++ b/skills/integration/skill_surface_test.go @@ -13,7 +13,7 @@ import ( // userSkills is the published user skill surface: the six skills the host // discovers (each owns a SKILL.md). `integration` is deliberately absent — it // holds only *_test.go and must not publish. -var userSkills = []string{"commission", "debrief", "refit", "ensign", "first-officer", "using-claude-team"} +var userSkills = []string{"commission", "debrief", "refit", "ensign", "first-officer", "using-claude-team", "present-gate"} // TestUserSkillsPresentWithFrontmatter locks AC-1: each of the five user skills // ships a SKILL.md whose YAML frontmatter declares a `name` and a `description`. diff --git a/skills/present-gate/SKILL.md b/skills/present-gate/SKILL.md new file mode 100644 index 00000000..a1e0e30c --- /dev/null +++ b/skills/present-gate/SKILL.md @@ -0,0 +1,44 @@ +--- +name: present-gate +description: "First-officer gate-presentation rendering — the captain-facing gate-review template plus the nine assembly rules for filling it (lede-first/decision-last spine, chosen-direction prose, report citation, reviewer-finding tiers, single recommendation, concrete bounce-back asks, length budget). Invoke at the gate point after the FO has decided a stage must be presented." +user-invocable: false +--- + +# Present Gate + +This skill carries the first-officer's captain-facing gate-presentation rendering: the gate-review format template and the assembly rules for filling it. The decide-to-gate and AC-cross-check policy stays always-on in the FO contract; this skill loads at the gate point to render the decision the FO has already made. + +## Gate Presentation + +Present gate reviews in this format: + +``` +Gate review: {entity title} — {stage} +Chosen direction: {one-line summary of the ensign's chosen approach, or `n/a` for stages without a chosen-direction concept (e.g., simple work stages, merge)} +Recommend {approve | reject: {one-line reason}}. + +Checklist (from ## Stage Report in {entity_file_path} lines {start}-{end}): +- DONE: {≤10-word gist of item} +- SKIPPED: {gist} — {one-line reason} +- FAILED: {gist} — {one-line reason} + +{If reviewer findings exist, render them under a `Reviewer findings` heading in two tiers — `Material:` (fact-corrections, contract violations, missing AC evidence, broken claims) and `Polish:` (wording, format drift, non-blocking suggestions). Drop the tier entirely if it has no items. If no reviewer ran, omit this whole block.} + +Assessment: {N} done, {N} skipped, {N} failed. + +Decision: {one-line decision prompt naming what approval/rejection does in concrete terms — e.g., "approve to enter implementation in worktree `.worktrees/...`" or "reject to bounce back to {feedback-to target} with the material findings above"}. +``` + +### Captain-facing assembly rules + +The template is the floor, not the ceiling. The FO MUST hold to the following discipline when filling it: + +1. **Lede first, decision last, nothing between them buried.** The first three lines (title, chosen direction, recommend) and the final line (decision) are the spine. Everything else is supporting evidence; if the captain stops reading after line three, they can still vote. +2. **Chosen direction is required as FO prose.** When the stage selected among options (ideation picks an approach, validation picks PASS/REJECTED), name it on the `Chosen direction:` line; don't make the captain infer from the Checklist gist or open the entity file. For stages without a chosen direction, use `n/a`. +3. **Cite the Stage Report; render a one-line gist roll-up.** Do not paste it into the gate message. Under `Checklist:`, render one bullet per DONE/SKIPPED/FAILED item as a verb-noun gist (≤10 words, FO paraphrase, no new facts). For SKIPPED/FAILED, append `— {one-line reason}`. Cite the full report by file path and line range. If a reviewer Material finding directly questions a checklist item's evidence, inline that item's evidence paragraph under the finding so the captain can decide without opening the file. Otherwise no Stage Report content appears. +4. **Reviewer findings render in priority tiers.** Group into `Material:` (fact-corrections, contract violations, missing AC evidence, claims contradicted by the codebase) and `Polish:` (wording, format drift, non-blocking suggestions). Drop empty tiers. Do not flat-bullet material next to polish. +5. **Recommendation appears exactly once.** The `Recommend {approve | reject: {reason}}` line is the only place the FO states its verdict. Do not duplicate it elsewhere or re-explain it in an enumerated list. +6. **Bounce-back recommendations name the concrete asks.** If recommending reject, the reason line names the specific concerns by content, not by reference. Bad: "address the reviewer's five concrete notes." Good: "tighten AC-2 substring assertion; correct the file X claim; cut the format-pedantry aside." +7. **No format-pedantry asides.** Format drift (`1./2./3./4.` instead of `**AC-N**`, missing trailing period) is not load-bearing for a gate decision. Surface only if it blocks the gate; if it does, it is a Material finding, not a separate paragraph. +8. **One sentence of worktree heads-up when approval changes worktree state.** When approving opens or closes a worktree, the Decision line names it: "approve to enter implementation in worktree `.worktrees/{worker_key}-{slug}`". One sentence, not a section. +9. **Target length: 15-25 lines of FO-authored prose.** The full gate message should fit in 15-25 lines. If it exceeds 25, the FO is over-narrating; cut. From 6b6737c1f1b1e93f93e92340b873015fb38a50fc Mon Sep 17 00:00:00 2001 From: CL Kao Date: Thu, 4 Jun 2026 11:09:23 -0700 Subject: [PATCH 2/2] Harden present-gate oracles: name==seam, structural @-scan, user-invocable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three test-strength fixes from the detached audit (tests only; the deliverable behavior already passed validation including a live AC-3 drive): 1. TestPresentGateSkillNameMatchesSeam — assert the frontmatter name VALUE equals `present-gate` (the directory name and the Skill(skill="spacedock:present-gate") seam target), not just token presence. 2. TestFOCoreInvokesPresentGateSkill — replace the enumerated @-include ban with a structural regex scan (@(?:\.{1,2}/)*present-gate\b) over the `## Completion and Gates` region, catching the @./present-gate/SKILL.md family the old two-literal enum missed. 3. TestPresentGateSkillIsFOInternal — assert the frontmatter carries user-invocable: false. Each negative demonstrated by real mutation then restored green. Co-Authored-By: Claude Opus 4.8 (1M context) --- skills/integration/present_gate_test.go | 70 ++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/skills/integration/present_gate_test.go b/skills/integration/present_gate_test.go index 784ac00f..c7b3eb95 100644 --- a/skills/integration/present_gate_test.go +++ b/skills/integration/present_gate_test.go @@ -5,6 +5,7 @@ package integration import ( "os" "path/filepath" + "regexp" "strings" "testing" ) @@ -95,12 +96,21 @@ func TestGatePresentationAbsentFromFOCore(t *testing.T) { } } +// presentGateAtInclude matches an `@`-prefixed path token that resolves toward +// the present-gate skill — `@present-gate`, `@./present-gate`, +// `@../present-gate/SKILL.md`, etc. The leading `@` plus any run of relative-path +// segments (`./`, `../`, bare) ending in a `present-gate` path component is the +// disproven cross-skill include the seam must NOT use. A structural scan, not an +// enumerated literal table — it catches the `@./present-gate/...` family the old +// enum missed. +var presentGateAtInclude = regexp.MustCompile(`@(?:\.{1,2}/)*present-gate\b`) + // TestFOCoreInvokesPresentGateSkill locks AC-1(c): the FO core's `## Completion // and Gates` section invokes the skill via Skill(...) at the gate point and does // NOT use the spike-disproven cross-skill @-include. Region-scoped to // `## Completion and Gates` (the positive Skill()-present / @-absent assertions -// only). The Skill(...) literal is the integration seam; the @-form is the -// disproven mechanism. +// only). The Skill(...) literal is the integration seam; any `@`-token resolving +// toward present-gate is the disproven mechanism. func TestFOCoreInvokesPresentGateSkill(t *testing.T) { fo := foCore(t) region := sectionAfter(fo, "## Completion and Gates") @@ -110,10 +120,8 @@ func TestFOCoreInvokesPresentGateSkill(t *testing.T) { if !strings.Contains(region, `Skill(skill="spacedock:present-gate")`) { t.Errorf("`## Completion and Gates` section does not invoke Skill(skill=\"spacedock:present-gate\")") } - for _, banned := range []string{"@../present-gate", "@present-gate"} { - if strings.Contains(region, banned) { - t.Errorf("`## Completion and Gates` section uses the disproven cross-skill @-include %q", banned) - } + if m := presentGateAtInclude.FindString(region); m != "" { + t.Errorf("`## Completion and Gates` section uses the disproven cross-skill @-include %q", m) } } @@ -137,3 +145,53 @@ func TestPresentGateSkillFreeOfDispatchHelperLeak(t *testing.T) { } } } + +// presentGateFrontmatterValue returns the trimmed scalar value of a top-level +// `key:` line in skills/present-gate/SKILL.md's YAML frontmatter, with any +// surrounding quotes stripped. The bool reports whether the key was found. +func presentGateFrontmatterValue(t *testing.T, key string) (string, bool) { + t.Helper() + fm, ok := frontmatter(presentGateSkill(t)) + if !ok { + t.Fatal("present-gate SKILL.md has no YAML frontmatter block") + } + prefix := key + ":" + for _, line := range strings.Split(fm, "\n") { + if strings.HasPrefix(line, prefix) { + v := strings.TrimSpace(strings.TrimPrefix(line, prefix)) + v = strings.Trim(v, `"'`) + return v, true + } + } + return "", false +} + +// TestPresentGateSkillNameMatchesSeam locks AC-2: the frontmatter `name:` VALUE +// equals `present-gate` — the directory name AND the +// `Skill(skill="spacedock:present-gate")` invocation seam. Token-presence alone +// (skill_surface_test.go) would pass a renamed skill that the seam no longer +// reaches; binding the value to the seam target catches that drift. Negative- +// proof: a bogus name value reds this. +func TestPresentGateSkillNameMatchesSeam(t *testing.T) { + name, ok := presentGateFrontmatterValue(t, "name") + if !ok { + t.Fatal("present-gate SKILL.md frontmatter has no name field") + } + if name != "present-gate" { + t.Errorf("present-gate SKILL.md frontmatter name is %q, want %q (the directory name and the Skill(skill=\"spacedock:present-gate\") seam)", name, "present-gate") + } +} + +// TestPresentGateSkillIsFOInternal locks AC-2: the frontmatter carries +// `user-invocable: false` — the skill is FO-internal (loaded mid-run via +// Skill()), not a captain-facing user skill. Negative-proof: flipping to `true` +// reds this. +func TestPresentGateSkillIsFOInternal(t *testing.T) { + v, ok := presentGateFrontmatterValue(t, "user-invocable") + if !ok { + t.Fatal("present-gate SKILL.md frontmatter has no user-invocable field") + } + if v != "false" { + t.Errorf("present-gate SKILL.md frontmatter user-invocable is %q, want \"false\" (the skill is FO-internal)", v) + } +}