Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/dev/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ A task moves to implementation once its design is approved. The work here is to

- **Inputs:** The fleshed-out task body from ideation with approach and acceptance criteria
- **Outputs:** The deliverable committed to the relevant repo or state checkout, with a summary of what was produced and where
- Implementation completion is not a stopping point: once the deliverable is committed and the stage report filed, the entity routes immediately to independent `validation` dispatch — a fresh validator, since `validation` is `fresh: true` — unless a gate, blocker, terminal ceremony, or captain decision intervenes. The FO does not park a completed implementation and wait.
- **Good:** Minimal changes that satisfy acceptance criteria, clean Go packages, stable CLI output, tests where appropriate, and a self-contained deliverable
- **Bad:** Over-engineering, unrelated refactoring, skipping tests, ignoring edge cases identified in ideation, or leaving the deliverable incomplete for validation to finish

Expand Down
162 changes: 162 additions & 0 deletions internal/ensigncycle/auto_continue_fixtures_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package ensigncycle

import (
"fmt"
"os"
"path/filepath"
"regexp"
"testing"
)

// Auto-continue fixtures and assertion for AC-5: a dev-shaped workflow parked at
// an implementation-ready state, exercising whether the FO continues the
// lifecycle (advance to validation + dispatch a fresh validator / present the
// gate) instead of stopping after the implementation report is filed. Like the
// other shared fixtures these live under the DEFAULT build tag so the offline
// negative case (auto_continue_negative_test.go) grades the assertion with no
// model spend, while the //go:build live half (auto_continue_live_test.go) drives
// the same fixture against a real agent.

// validationStatusOrBeyond matches an entity whose status advanced to validation
// (or the terminal done) — i.e. the FO did NOT leave it parked at implementation.
var validationStatusOrBeyond = regexp.MustCompile(`(?im)^status:\s*(validation|done)\s*$`)

// implementationStatusAC5 matches an entity still parked at implementation — the
// failure mode this regression guards against.
var implementationStatusAC5 = regexp.MustCompile(`(?im)^status:\s*implementation\s*$`)

func writeAutoContinueWorkflowNoGit(dir string) (string, error) {
if err := os.WriteFile(filepath.Join(dir, "README.md"), []byte(autoContinueReadme()), 0o644); err != nil {
return "", err
}
entityPath := filepath.Join(dir, "auto-continue-task.md")
if err := os.WriteFile(entityPath, []byte(autoContinueEntity()), 0o644); err != nil {
return "", err
}
return entityPath, nil
}

func writeAutoContinueWorkflow(t *testing.T, root string) string {
t.Helper()
entityPath, err := writeAutoContinueWorkflowNoGit(root)
if err != nil {
t.Fatal(err)
}
gitInit(t, root)
return entityPath
}

// autoContinueReadme is the dev-shaped fixture workflow from AC-4: a non-split-
// root workflow backlog → implementation → validation(worktree, fresh, gate) →
// done. validation is `fresh: true` so the FO must dispatch a FRESH validator,
// and `gate: true` so the FO presents the validation gate after the validator
// reports. The implementation stage carries a concrete validator instruction so a
// dispatched validator has real work that leaves a durable validation report.
func autoContinueReadme() string {
return "---\n" +
"entity-type: task\n" +
"id-style: slug\n" +
"stages:\n" +
" defaults:\n" +
" worktree: false\n" +
" concurrency: 1\n" +
" states:\n" +
" - name: backlog\n" +
" initial: true\n" +
" - name: implementation\n" +
" worktree: true\n" +
" - name: validation\n" +
" worktree: true\n" +
" fresh: true\n" +
" feedback-to: implementation\n" +
" gate: true\n" +
" - name: done\n" +
" terminal: true\n" +
"---\n" +
"# Auto-Continue Fixture\n\n" +
"### backlog\n\nSeed the task.\n\n- **Outputs:** A seed task.\n\n" +
"### implementation\n\nProduce the deliverable.\n\n- **Outputs:** The deliverable plus an implementation stage report.\n\n" +
"### validation\n\n" +
"Verify the implementation against the acceptance criteria. Append a `## Stage Report: validation` " +
"section to the entity with one `- DONE:` item and a PASSED or REJECTED recommendation.\n\n" +
"- **Outputs:** A PASSED or REJECTED validation stage report.\n\n" +
"### done\n\nTerminal state.\n"
}

// autoContinueEntity is parked at status: implementation with a filed
// implementation stage report — the implementation-ready state from AC-4. A
// correct FO verifies this report and immediately advances to validation +
// dispatches a fresh validator (then presents the validation gate); a broken FO
// stops here with the report filed and the status unchanged.
func autoContinueEntity() string {
return "---\n" +
"id: auto-continue-task\n" +
"title: Auto Continue Task\n" +
"status: implementation\n" +
"completed:\n" +
"verdict:\n" +
"worktree:\n" +
"---\n" +
"# Auto Continue Task\n\n" +
"The implementation is complete and its stage report is filed below. The next lifecycle " +
"step is independent validation.\n\n" +
"## Acceptance criteria\n\n" +
"**AC-1** The deliverable exists and is committed.\n" +
"Verified by: the implementation stage report below plus a validation pass.\n\n" +
"## Stage Report: implementation\n\n" +
"- DONE: Produce the deliverable\n" +
" The deliverable is committed and ready for independent verification.\n" +
"\n### Summary\n\n" +
"Implementation is complete. The first officer must advance to validation and dispatch a fresh validator.\n"
}

// autoContinuePrompt is the NEUTRAL runbook from AC-4 — `Use $spacedock:first-
// officer` with no "drive to done" coaching. It points the FO at the workflow and
// the one parked entity and asks it to proceed normally. It deliberately does NOT
// tell the FO to advance, dispatch, or validate — whether it does so is exactly
// the behavior under test. Run non-interactively (`claude -p`), the FO enters
// single-entity mode and drives the parked implementation forward on its own; a
// broken FO stops after the implementation report instead.
func autoContinuePrompt() string {
return fmt.Sprintf("%s\n\n%s\n%s\n%s",
"Use $spacedock:first-officer for this whole run.",
"Workflow directory: .",
"Process the entity `auto-continue-task`. Its implementation worker has just completed and filed its stage report.",
"Proceed with the workflow as the first-officer contract directs, then give your final response.",
)
}

// assertAutoContinue is host-neutral: before/after entity-state strings plus the
// FO's observed output. It grades the DURABLE outcome, not transcript phrasing.
// The lifecycle continued when the entity is no longer parked at implementation
// (status advanced to validation or done) AND a validation stage report appears
// in the entity body — the durable footprint of a fresh validator the FO
// dispatched. A run that narrates "advancing to validation" in the transcript but
// leaves the durable state at status: implementation with no validation report
// fails on the state checks, not on transcript shape.
func assertAutoContinue(before, after, observed string) error {
if implementationStatusAC5.MatchString(after) {
return fmt.Errorf("FO left the entity parked at status: implementation — it stopped instead of advancing")
}
if !validationStatusOrBeyond.MatchString(after) {
return fmt.Errorf("FO did not advance the entity to status: validation (or beyond)")
}
if !regexpValidationReport.MatchString(after) {
return fmt.Errorf("no `## Stage Report: validation` appeared — the FO did not dispatch/run a validator")
}
// The implementation report must still be present (the FO advanced, it did not
// discard the prior stage's report) — guards against an after-state that simply
// replaced the body rather than appending the validation report.
if !regexpImplementationReport.MatchString(before) {
return fmt.Errorf("fixture invariant broken: before-state lacks the implementation stage report")
}
if !regexpImplementationReport.MatchString(after) {
return fmt.Errorf("the implementation stage report was lost from the entity after the run")
}
return nil
}

var (
regexpValidationReport = regexp.MustCompile(`(?m)^## Stage Report: validation\b`)
regexpImplementationReport = regexp.MustCompile(`(?m)^## Stage Report: implementation\b`)
)
89 changes: 89 additions & 0 deletions internal/ensigncycle/auto_continue_live_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
//go:build live

// ABOUTME: AC-5 live regression — a real FO, given an implementation-ready dev
// ABOUTME: entity, must advance to validation and dispatch a fresh validator, not stop.
package ensigncycle

import (
"context"
"os"
"path/filepath"
"testing"
"time"

"github.com/spacedock-dev/spacedock/internal/livescenario"
)

// TestLiveAutoContinueAfterImplementation is AC-5's live half: a real FO is
// pointed at a dev-shaped workflow whose one entity is parked at an
// implementation-ready state (status: implementation with a filed implementation
// stage report) under the NEUTRAL `Use $spacedock:first-officer` runbook — no
// "drive to done" coaching. The scenario grades on the DURABLE outcome via the
// shared, state-oriented assertAutoContinue: the FO must advance the entity past
// implementation AND leave a `## Stage Report: validation` behind — the durable
// footprint of a fresh validator it dispatched. A run that stops after the
// implementation report, leaving the entity at status: implementation, REDS the
// grade. Running this (`go test -tags live -run TestLiveAutoContinueAfterImplementation`)
// against a real credential produces the session artifact AC-5's `Verified by:
// live …` citation requires.
//
// Non-interactive `claude -p` puts the FO in single-entity mode, where it drives
// the parked entity all the way to terminal `done` and ARCHIVES it to
// `_archive/auto-continue-task.md`. That over-runs the minimum (it more than
// proves the FO did not stop after implementation), so the grade reads the entity
// from wherever it lands — the original path or the archive — via the captured
// workflow dir. The primitive's tolerant post-run read hands the Assert an empty
// after-body when the original path was archived; the Assert then resolves the
// real end-state. It reuses claudeRunnerAdapter + errGraded from
// livescenario_adapter_live_test.go and the assertAutoContinue grade shared with
// the offline negative case.
func TestLiveAutoContinueAfterImplementation(t *testing.T) {
runner := newClaudeLiveRunner(t)
// Implementation completion → validator dispatch → (single-entity) gate
// auto-resolve → merge/terminalize runs TWO full agent runs serially (the FO
// and the fresh validator), so the budget is generous.
adapter := claudeRunnerAdapter{t: t, runner: runner, timeout: 15 * time.Minute}

var workflowDir string
sc := livescenario.Scenario{
Name: "auto-continue-after-implementation",
Runbook: autoContinuePrompt(),
Setup: func(dir string) (string, error) {
// Capture the staged workflow dir so the Assert can find the entity even
// after the FO archives it. Stage WITHOUT git-init; the adapter git-inits
// once the primitive has captured the pre-run state (matching the live order).
workflowDir = dir
return writeAutoContinueWorkflowNoGit(dir)
},
Assert: func(before, after livescenario.EntityState, observed string) error {
afterBody := resolveAutoContinueEndState(workflowDir, after.Body)
if err := assertAutoContinue(before.Body, afterBody, observed); err != nil {
return errGraded(err.Error())
}
return nil
},
}

if err := livescenario.Run(context.Background(), t.TempDir(), sc, adapter); err != nil {
t.Fatalf("live auto-continue scenario graded FAIL: %v", err)
}
}

// resolveAutoContinueEndState returns the entity's durable end-state body. The FO
// may archive a terminalized entity, moving it out of its original path; in that
// case the primitive's after-body is empty (the original path is gone) and the
// real end-state lives at `_archive/auto-continue-task.md`. This reads that
// archived copy when present, otherwise falls back to the primitive's after-body
// (the entity stayed put — e.g. held at the validation gate). It NEVER fabricates
// state: a genuinely absent entity yields an empty body and the state-oriented
// assertAutoContinue reds.
func resolveAutoContinueEndState(workflowDir, afterBody string) string {
if afterBody != "" {
return afterBody
}
archived := filepath.Join(workflowDir, "_archive", "auto-continue-task.md")
if data, err := os.ReadFile(archived); err == nil {
return string(data)
}
return afterBody
}
58 changes: 58 additions & 0 deletions internal/ensigncycle/auto_continue_negative_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package ensigncycle

import (
"strings"
"testing"
)

// AC-5 offline negative case: assertAutoContinue is behavior/state oriented, not
// a transcript-shape tautology. These cases build the SPECIFIC broken end-state
// the regression guards against — an FO that filed the implementation report and
// STOPPED, leaving the entity at status: implementation with no validation report
// — and prove the assertion reds even when the transcript narrates intent to
// continue. They are offline (default tag): the assertion is a pure function over
// entity-state + observed strings, so they spend no model.

func TestAutoContinueNegativeStoppedAfterImplementation(t *testing.T) {
before := autoContinueEntity()

// Baseline: an FO that truly advanced — status moved to validation and a
// validation stage report was appended — GRADES PASS. Without this the negative
// cases could pass against an assertion that always errors.
advanced := strings.Replace(before, "status: implementation", "status: validation", 1) +
"\n## Stage Report: validation\n\n- DONE: Verified the deliverable\n PASSED.\n"
if advanced == before {
t.Fatal("fixture must contain `status: implementation` to advance")
}
if err := assertAutoContinue(before, advanced, "Advanced to validation and dispatched a fresh validator; gate presented."); err != nil {
t.Fatalf("a truly-advanced end-state must grade PASS, got: %v", err)
}

// Broken: the FO stopped after filing the implementation report. The durable
// state is byte-identical to the staged fixture (still status: implementation,
// no validation report). Even WITH a transcript that narrates advancing to
// validation, the state-oriented grade must catch the stop.
stoppedObserved := "Implementation complete. Advancing to validation and dispatching a fresh validator."
if err := assertAutoContinue(before, before, stoppedObserved); err == nil {
t.Fatal("an FO that left the entity at status: implementation with no validation report must RED the grade even with a transcript narrating advancement")
}

// Broken: status advanced to validation in the frontmatter but NO validation
// stage report was produced — a partial move (the FO bumped status but never
// dispatched/ran the validator). This must still fail on the missing validation
// report, not pass on the status bump alone.
statusBumpedNoReport := strings.Replace(before, "status: implementation", "status: validation", 1)
if statusBumpedNoReport == before {
t.Fatal("fixture must contain `status: implementation` to bump")
}
if err := assertAutoContinue(before, statusBumpedNoReport, "advancing to validation"); err == nil {
t.Fatal("a status bump with no validation stage report must RED the grade (the validator never ran)")
}

// Broken: a validation report was appended but the status was left at
// implementation — the inverse partial move. Must fail on the status check.
reportNoStatus := before + "\n## Stage Report: validation\n\n- DONE: Verified\n PASSED.\n"
if err := assertAutoContinue(before, reportNoStatus, "validated the deliverable"); err == nil {
t.Fatal("a validation report with status left at implementation must RED the grade")
}
}
Loading
Loading