From 05ba04af130fb217dd9c18ae550b2ef3980e8570 Mon Sep 17 00:00:00 2001 From: CL Kao Date: Fri, 5 Jun 2026 00:17:12 -0700 Subject: [PATCH 1/2] ci: add pi runtime live lane --- .github/workflows/runtime-live-e2e.yml | 114 ++++++++++++++++-- docs/dev/README.md | 19 ++- internal/cli/pi.go | 4 +- internal/cli/pi_frontdoor_test.go | 14 +++ internal/ensigncycle/pi_live_runner_test.go | 39 +++--- .../ensigncycle/pi_shared_coverage_test.go | 53 ++++++++ .../ensigncycle/shared_coverage_meta_test.go | 9 ++ .../ensigncycle/shared_scenarios_docs_test.go | 4 +- internal/release/workflow_exec_guard_test.go | 34 +++++- 9 files changed, 257 insertions(+), 33 deletions(-) create mode 100644 internal/ensigncycle/pi_shared_coverage_test.go diff --git a/.github/workflows/runtime-live-e2e.yml b/.github/workflows/runtime-live-e2e.yml index fc08a7083..1c7b22421 100644 --- a/.github/workflows/runtime-live-e2e.yml +++ b/.github/workflows/runtime-live-e2e.yml @@ -8,15 +8,15 @@ # # The offline job carries NO environment and NO secret: it builds + runs the # default Go suite (the live-tagged test compiles out) as the secret-free gate, -# and runs unconditionally on every PR. Only the live job declares an -# `environment:` (a required-reviewer approval gate, reviewer = clkao) and reads -# only the host-specific secret it needs: ANTHROPIC_API_KEY for Claude, -# OPENAI_API_KEY for Codex. The Claude live job is a matrix over two variants: -# sonnet (the Python-land floor) on CI-E2E and claude-opus-4-8 on CI-E2E-OPUS — each its own -# separately-approved deployment. So a live PR run needs same-repo-or-no-secrets -# + the per-variant environment approval; each variant pauses in `waiting` until -# a maintainer approves its environment, so the API-spending dispatch cannot -# start unapproved. +# and runs unconditionally on every PR. Only live jobs declare an `environment:` +# (a required-reviewer approval gate, reviewer = clkao) and read only the +# host-specific secret they need: ANTHROPIC_API_KEY for Claude, OPENAI_API_KEY +# for Codex and Pi. The Claude live job is a matrix over two variants: sonnet +# (the Python-land floor) on CI-E2E and claude-opus-4-8 on CI-E2E-OPUS — each its +# own separately-approved deployment. So a live PR run needs +# same-repo-or-no-secrets + the per-variant environment approval; each variant +# pauses in `waiting` until a maintainer approves its environment, so the +# API-spending dispatch cannot start unapproved. name: Runtime Live E2E @@ -33,6 +33,11 @@ on: required: false type: string default: "" + pi_version: + description: "Pin pi-coding-agent to a specific version (e.g. 2.0.5). Empty = npm latest." + required: false + type: string + default: "" effort: description: "Effort hint for the live run (low, high, xhigh). Default low." required: false @@ -314,3 +319,94 @@ jobs: live-artifacts/codex/** live-artifacts/journey-metrics/** if-no-files-found: warn + + pi-live: + needs: offline + runs-on: ubuntu-latest + environment: + name: CI-E2E-PI + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SPACEDOCK_PI_LIVE_REQUIRED: "1" + SPACEDOCK_LIVE_ARTIFACT_DIR: ${{ github.workspace }}/live-artifacts/pi + SPACEDOCK_JOURNEY_METRICS_DIR: ${{ github.workspace }}/live-artifacts/journey-metrics/pi + PI_OFFLINE: "1" + steps: + - name: Check required secret + run: | + if [ -z "${OPENAI_API_KEY}" ]; then + echo "OPENAI_API_KEY is required for pi-live after CI-E2E-PI approval." >&2 + exit 1 + fi + + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version: "1.22" + + - uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install Pi CLI and substrates + env: + PI_VERSION: ${{ inputs.pi_version }} + run: | + if [ -n "$PI_VERSION" ]; then + npm install -g "pi-coding-agent@$PI_VERSION" + else + npm install -g pi-coding-agent + fi + pi --version + pi install npm:pi-subagents + pi install npm:pi-intercom + echo "PI_SUBAGENTS_PACKAGE_ROOT=$HOME/.pi/agent/npm/node_modules/pi-subagents" >> "$GITHUB_ENV" + + - name: Build spacedock binary + run: | + go build -o ./spacedock ./cmd/spacedock + echo "SPACEDOCK_BIN=$(pwd)/spacedock" >> "$GITHUB_ENV" + echo "SPACEDOCK_REPO_ROOT=$GITHUB_WORKSPACE" >> "$GITHUB_ENV" + echo "$(pwd)" >> "$GITHUB_PATH" + + - name: Configure git identity + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --global init.defaultBranch main + + - name: Verify Pi current-checkout setup + run: | + mkdir -p "$SPACEDOCK_LIVE_ARTIFACT_DIR" + spacedock doctor --host pi --plugin-dir "$GITHUB_WORKSPACE" | tee "$SPACEDOCK_LIVE_ARTIFACT_DIR/pi-doctor.txt" + test -f "$GITHUB_WORKSPACE/skills/first-officer/references/pi-first-officer-runtime.md" + test -f "$GITHUB_WORKSPACE/skills/ensign/references/pi-ensign-runtime.md" + test -f "$PI_SUBAGENTS_PACKAGE_ROOT/src/extension/index.ts" + test -f "$PI_SUBAGENTS_PACKAGE_ROOT/skills/pi-subagents/SKILL.md" + + - name: Show tool versions + run: | + pi --version + go version + echo "### Pi live tool versions" >> "$GITHUB_STEP_SUMMARY" + echo "- \`pi --version\`: \`$(pi --version)\`" >> "$GITHUB_STEP_SUMMARY" + echo "- \`go version\`: \`$(go version)\`" >> "$GITHUB_STEP_SUMMARY" + echo "- Effort: \`${{ inputs.effort }}\`" >> "$GITHUB_STEP_SUMMARY" + + - name: Run Pi shared scenario coverage guard + run: go test -tags live -count=1 -run 'TestSharedScenarioRunnerCoverage|TestPiSharedScenarioCoverage' ./internal/ensigncycle -v + + - name: Run live Pi front-door smoke + run: go test -tags live -count=1 -run TestLivePiFrontDoorSmoke ./internal/ensigncycle -v + + - name: Upload live artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: runtime-live-e2e-pi-live + path: | + ./spacedock + live-artifacts/pi/** + live-artifacts/journey-metrics/** + if-no-files-found: warn diff --git a/docs/dev/README.md b/docs/dev/README.md index 7d17b464a..33a968133 100644 --- a/docs/dev/README.md +++ b/docs/dev/README.md @@ -148,7 +148,7 @@ spacedock status --workflow-dir docs/dev --next The live lanes prove runtime behavior, not text shape. Static grep checks over workflow YAML or skill prose are not a substitute for launching the real host front door, observing its output, and checking the resulting workflow state. -A runtime regression should be caught once per user journey and then exercised by EACH supported host. The shared runtime scenarios make that real: one host-neutral scenario table, two per-host runner adapters (Claude and Codex) implementing the same scenario IDs, and a parity guard that fails if a scenario exists for one host only. +A runtime regression should be caught once per user journey and then exercised by EACH supported host. The shared runtime scenarios make that real: one host-neutral scenario table, per-host runner adapters (Claude and Codex today, with Pi tracked through an explicit live/codified/gap coverage map until its shared runners are live-safe) implementing or accounting for the same scenario IDs, and a parity guard that fails if a scenario exists for one host only. ### Shared runtime scenarios @@ -159,7 +159,7 @@ The scenario surface lives in `internal/ensigncycle` and splits into four host-n | Scenario table | `shared_scenarios_test.go` (`sharedRuntimeScenarios()`) | Yes | | Fixtures + prompts | `shared_fixtures_test.go` | Yes | | Assertions | `gate_assert_impl_test.go`, `shared_assertions_impl_test.go` | Yes | -| Runner adapter | `codex_live_runner_test.go`, `claude_live_runner_test.go` | No — one per host | +| Runner adapter | `codex_live_runner_test.go`, `claude_live_runner_test.go`, `pi_shared_coverage_test.go` | No — one per host; Pi currently records explicit live/codified/gap status for each shared scenario | The shared table (`sharedRuntimeScenario`) carries ONLY runtime-neutral facts: scenario `name` (ID), `oldPythonTest` provenance, behavior `intent`, and a live `timeout`. It encodes NO launch, auth, plugin, artifact, or transcript field — `TestSharedRuntimeScenarioDefinitions` reflects over the type and fails if any field names a single host. @@ -188,7 +188,7 @@ Assertions prefer durable workflow state over transcript phrasing: entity frontm 3. Add a host-neutral assertion over `(before, after, observed)` strings (or reuse an existing one) and at least one offline negative case in `shared_scenarios_negative_test.go` that builds the broken end-state and proves the assertion goes red. 4. Add a runner entry for the new `name` to BOTH `codexScenarioRunners()` and `claudeScenarioRunners()`. `TestSharedScenarioRunnerCoverage` fails until both hosts cover it. -The shared coverage meta-test enforces parity in both directions: every shared scenario must have a runner for each host, and every runner must map to a defined scenario. +The shared coverage meta-test enforces parity in both directions: every shared scenario must have a Claude and Codex runner plus a Pi live/codified/gap coverage entry, and every runner or Pi coverage entry must map to a defined scenario. ### Local live execution @@ -212,13 +212,19 @@ Run the Codex shared suite locally (`npm install -g @openai/codex` then `codex l go test -tags live -count=1 -run TestLiveCodexSharedScenarios ./internal/ensigncycle -v ``` +Run the Pi front-door smoke locally (`npm install -g pi-coding-agent`, `pi install npm:pi-subagents`, and either `pi login` or `OPENAI_API_KEY`). The smoke loads the current checkout's Spacedock first-officer and ensign skills plus the local pi-subagents extension/skill explicitly; it verifies durable state in the split-root state checkout rather than transcript wording alone. + +```bash +go test -tags live -count=1 -run TestLivePiFrontDoorSmoke ./internal/ensigncycle -v +``` + The parity and definition guards run with no model spend — useful before paying for a live run: ```bash -go test -tags live -run 'TestSharedScenarioRunnerCoverage|TestSharedRuntimeScenarioDefinitions' ./internal/ensigncycle -v +go test -tags live -run 'TestSharedScenarioRunnerCoverage|TestSharedRuntimeScenarioDefinitions|TestPiSharedScenarioCoverage' ./internal/ensigncycle -v ``` -Without auth, the respective live suite skips locally (Claude/Codex), except in CI where the lane requires it. +Without auth, the respective live suite skips locally (Claude/Codex/Pi), except in CI where the lane requires it. ### GitHub setup @@ -226,8 +232,9 @@ Workflow: `.github/workflows/runtime-live-e2e.yml`. The offline gate job (`go te - `claude-live` (matrix: `sonnet` on `CI-E2E`, `claude-opus-4-8` on `CI-E2E-OPUS`): secret `ANTHROPIC_API_KEY`. Runs `TestLiveEnsignCycle` (the full-cycle smoke) AND `TestLiveClaudeSharedScenarios` (the shared suite). Artifacts under `live-artifacts/claude//` plus the session jsonl under `$CLAUDE_CONFIG_DIR`. - `codex-live` (environment `CI-E2E-CODEX`): secret `OPENAI_API_KEY`, `SPACEDOCK_CODEX_LIVE_REQUIRED=1` so a missing key fails clearly after approval. Runs `TestLiveCodexSharedScenarios`. Artifacts under `live-artifacts/codex/`. +- `pi-live` (environment `CI-E2E-PI`): secret `OPENAI_API_KEY`, `SPACEDOCK_PI_LIVE_REQUIRED=1` so missing Pi/OpenAI prerequisites fail clearly after approval. Installs `pi-coding-agent`, `pi-subagents`, and `pi-intercom`, runs the Pi shared coverage guard plus `TestLivePiFrontDoorSmoke`, and uploads artifacts under `live-artifacts/pi/`. -Both live lanes must test the current checkout, not a remote `--ref next` install. The Codex lane generates a local marketplace under `$RUNNER_TEMP`: +All live lanes must test the current checkout, not a remote `--ref next` install. The Codex lane generates a local marketplace under `$RUNNER_TEMP`: ```text .agents/plugins/marketplace.json diff --git a/internal/cli/pi.go b/internal/cli/pi.go index 84d7e9d54..5b59d8a72 100644 --- a/internal/cli/pi.go +++ b/internal/cli/pi.go @@ -34,6 +34,7 @@ type piRuntimeConfig struct { firstOfficer string ensign string authPath string + openAIAPIKey string pluginDirSource string } @@ -232,6 +233,7 @@ func piRuntimeConfigFromEnv(env []string, dir, pluginDir string) piRuntimeConfig firstOfficer: filepath.Join(repo, "skills", "first-officer", "SKILL.md"), ensign: filepath.Join(repo, "skills", "ensign", "SKILL.md"), authPath: authPath, + openAIAPIKey: envMap["OPENAI_API_KEY"], pluginDirSource: pluginDirSource, } } @@ -239,7 +241,7 @@ func piRuntimeConfigFromEnv(env []string, dir, pluginDir string) piRuntimeConfig func checkPiRuntime(ops piRuntimeOps, cfg piRuntimeConfig) piCheckResult { bin, err := ops.LookPath("pi") res := piCheckResult{piBinOK: err == nil, piBin: bin, packageRoot: cfg.packageRoot, repoRoot: cfg.repoRoot, authPath: cfg.authPath} - res.authOK = ops.Stat(cfg.authPath) == nil + res.authOK = ops.Stat(cfg.authPath) == nil || strings.TrimSpace(cfg.openAIAPIKey) != "" res.extensionOK = ops.Stat(cfg.extensionPath) == nil res.subagentsSkillOK = ops.Stat(filepath.Join(cfg.subagentsSkill, "SKILL.md")) == nil res.firstOfficerOK = ops.Stat(cfg.firstOfficer) == nil diff --git a/internal/cli/pi_frontdoor_test.go b/internal/cli/pi_frontdoor_test.go index 9b1ffbc7a..aa8fb756a 100644 --- a/internal/cli/pi_frontdoor_test.go +++ b/internal/cli/pi_frontdoor_test.go @@ -240,6 +240,20 @@ func TestPiDoctorReportsMissingAndHealthyRuntime(t *testing.T) { } }) + t.Run("openai-api-key-auth", func(t *testing.T) { + var stdout, stderr bytes.Buffer + code := runDoctorWithPi(context.Background(), []string{"--host", "pi", "--plugin-dir", repo}, &fakeHost{}, &fakePiRuntimeOps{ + lookPath: map[string]string{"pi": "/bin/pi"}, + statOK: statOKForPiResources(repo, pkg), + }, append(piTestEnv(pkg, home), "OPENAI_API_KEY=test-key"), &stdout, &stderr) + if code != 0 { + t.Fatalf("exit=%d stderr=%q stdout=%q", code, stderr.String(), stdout.String()) + } + if !strings.Contains(stdout.String(), "OK Pi auth") { + t.Fatalf("OpenAI-key doctor output should accept env auth:\n%s", stdout.String()) + } + }) + t.Run("healthy", func(t *testing.T) { var stdout, stderr bytes.Buffer statOK := statOKForPiResources(repo, pkg) diff --git a/internal/ensigncycle/pi_live_runner_test.go b/internal/ensigncycle/pi_live_runner_test.go index 98f2c7a63..f063fc086 100644 --- a/internal/ensigncycle/pi_live_runner_test.go +++ b/internal/ensigncycle/pi_live_runner_test.go @@ -61,7 +61,7 @@ func newPiLiveSmokeFixture(t *testing.T, name, repo, piSubagentsRoot, binary str piHome := t.TempDir() sessionDir := t.TempDir() cleanHome := t.TempDir() - seedPiLocalAuth(t, piHome, os.Getenv("HOME")) + seedPiLiveAuth(t, piHome, os.Getenv("HOME"), os.Getenv("OPENAI_API_KEY"), os.Getenv("SPACEDOCK_PI_LIVE_REQUIRED")) workflowRoot, stateRoot, entityPath = writePiSplitRootSmokeWorkflow(t) artifactDir = filepath.Join(piLiveArtifactDir(t, name), "run") if err := os.MkdirAll(filepath.Join(artifactDir, "sessions"), 0o755); err != nil { @@ -214,23 +214,32 @@ func piLiveSmokeEntity() string { func seedPiLocalAuth(t *testing.T, piHome, realHome string) { t.Helper() - if realHome == "" { - t.Skip("no HOME set; cannot locate ~/.pi/agent/auth.json for Pi live smoke") - } - authPath := filepath.Join(realHome, ".pi", "agent", "auth.json") - b, err := os.ReadFile(authPath) - if err != nil { - t.Skipf("no live Pi auth available: expected %s; run pi login or provide the auth file", authPath) - } - if strings.TrimSpace(string(b)) == "" { - t.Skipf("live Pi auth file is empty: %s", authPath) + seedPiLiveAuth(t, piHome, realHome, os.Getenv("OPENAI_API_KEY"), os.Getenv("SPACEDOCK_PI_LIVE_REQUIRED")) +} + +func seedPiLiveAuth(t *testing.T, piHome, realHome, openAIAPIKey, required string) { + t.Helper() + if realHome != "" { + authPath := filepath.Join(realHome, ".pi", "agent", "auth.json") + b, err := os.ReadFile(authPath) + if err == nil && strings.TrimSpace(string(b)) != "" { + if err := os.MkdirAll(piHome, 0o700); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(piHome, "auth.json"), b, 0o600); err != nil { + t.Fatal(err) + } + return + } } - if err := os.MkdirAll(piHome, 0o700); err != nil { - t.Fatal(err) + if strings.TrimSpace(openAIAPIKey) != "" { + return } - if err := os.WriteFile(filepath.Join(piHome, "auth.json"), b, 0o600); err != nil { - t.Fatal(err) + message := "no live Pi auth available: expected ~/.pi/agent/auth.json or OPENAI_API_KEY" + if required != "" { + t.Fatal(message + " for the approval-gated pi-live lane") } + t.Skip(message + "; run pi login or set OPENAI_API_KEY to run the live Pi suite") } func piLiveEnv(piHome, sessionDir, cleanHome, binaryDir, piSubagentsRoot string) []string { diff --git a/internal/ensigncycle/pi_shared_coverage_test.go b/internal/ensigncycle/pi_shared_coverage_test.go new file mode 100644 index 000000000..c566c3b28 --- /dev/null +++ b/internal/ensigncycle/pi_shared_coverage_test.go @@ -0,0 +1,53 @@ +//go:build live + +package ensigncycle + +import "testing" + +type piSharedScenarioCoverage struct { + mode string + reason string +} + +func piSharedScenarioCoverageMap() map[string]piSharedScenarioCoverage { + return map[string]piSharedScenarioCoverage{ + "gate-guardrail": { + mode: "gap", + reason: "Pi currently has durable live coverage for subagent dispatch/front-door setup, but not a live-safe shared first-officer gate runner.", + }, + "rejection-flow": { + mode: "gap", + reason: "Pi currently has durable live coverage for subagent dispatch/front-door setup, but not a live-safe shared first-officer rejection-flow runner.", + }, + "merge-hook-guardrail": { + mode: "gap", + reason: "Pi currently has durable live coverage for subagent dispatch/front-door setup, but not a live-safe shared first-officer merge-hook runner.", + }, + } +} + +func TestPiSharedScenarioCoverage(t *testing.T) { + coverage := piSharedScenarioCoverageMap() + defined := map[string]bool{} + for _, scenario := range sharedRuntimeScenarios() { + defined[scenario.name] = true + entry, ok := coverage[scenario.name] + if !ok { + t.Errorf("shared scenario %q has no Pi coverage entry", scenario.name) + continue + } + switch entry.mode { + case "live", "codified", "gap": + default: + t.Errorf("shared scenario %q has invalid Pi coverage mode %q", scenario.name, entry.mode) + } + if entry.reason == "" { + t.Errorf("shared scenario %q Pi coverage entry needs an honest reason", scenario.name) + } + } + for name := range coverage { + if !defined[name] { + t.Errorf("Pi coverage entry %q has no shared scenario definition", name) + } + } +} diff --git a/internal/ensigncycle/shared_coverage_meta_test.go b/internal/ensigncycle/shared_coverage_meta_test.go index b03c27c4a..756754aa1 100644 --- a/internal/ensigncycle/shared_coverage_meta_test.go +++ b/internal/ensigncycle/shared_coverage_meta_test.go @@ -14,6 +14,7 @@ import "testing" func TestSharedScenarioRunnerCoverage(t *testing.T) { codexRunners := codexScenarioRunners() claudeRunners := claudeScenarioRunners() + piCoverage := piSharedScenarioCoverageMap() scenarios := sharedRuntimeScenarios() if len(scenarios) == 0 { @@ -27,6 +28,9 @@ func TestSharedScenarioRunnerCoverage(t *testing.T) { if claudeRunners[scenario.name] == nil { t.Errorf("shared scenario %q has no Claude runner", scenario.name) } + if _, ok := piCoverage[scenario.name]; !ok { + t.Errorf("shared scenario %q has no Pi live/codified/gap coverage entry", scenario.name) + } } // A runner with no matching shared scenario is also drift: a host scenario the @@ -46,4 +50,9 @@ func TestSharedScenarioRunnerCoverage(t *testing.T) { t.Errorf("Claude runner %q has no shared scenario definition", name) } } + for name := range piCoverage { + if !defined[name] { + t.Errorf("Pi coverage entry %q has no shared scenario definition", name) + } + } } diff --git a/internal/ensigncycle/shared_scenarios_docs_test.go b/internal/ensigncycle/shared_scenarios_docs_test.go index 5504c71fb..e477302cd 100644 --- a/internal/ensigncycle/shared_scenarios_docs_test.go +++ b/internal/ensigncycle/shared_scenarios_docs_test.go @@ -9,7 +9,7 @@ import ( // TestSharedScenarioDocsContract is the AC-6 guard: docs/dev/README.md documents // the shared-scenario contract — how to add a scenario, what belongs in the -// host-neutral definition, what belongs in each runner, and the local Claude/Codex +// host-neutral definition, what belongs in each runner, and the local Claude/Codex/Pi // live commands. The README IS the claim here (the contract is the evergreen doc), // so a presence check over its real text is proof at the claim's own level: it // fails if a future edit drops a required clause. The required-clause set is @@ -39,6 +39,7 @@ func TestSharedScenarioDocsContract(t *testing.T) { "runner adapter", "codexScenarioRunners()", "claudeScenarioRunners()", + "pi_shared_coverage_test.go", // How to add a shared scenario. "To add a shared runtime scenario", // The parity guard the contract leans on. @@ -48,6 +49,7 @@ func TestSharedScenarioDocsContract(t *testing.T) { // that bit the live gate). "go test -tags live -count=1 -run TestLiveClaudeSharedScenarios ./internal/ensigncycle -v", "go test -tags live -count=1 -run TestLiveCodexSharedScenarios ./internal/ensigncycle -v", + "go test -tags live -count=1 -run TestLivePiFrontDoorSmoke ./internal/ensigncycle -v", } for _, clause := range mustContain { if !strings.Contains(doc, clause) { diff --git a/internal/release/workflow_exec_guard_test.go b/internal/release/workflow_exec_guard_test.go index 1d5e34619..3b941b65f 100644 --- a/internal/release/workflow_exec_guard_test.go +++ b/internal/release/workflow_exec_guard_test.go @@ -16,6 +16,7 @@ func assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(workflow string) error { for _, want := range []string{ `SPACEDOCK_JOURNEY_METRICS_DIR: ${{ github.workspace }}/live-artifacts/journey-metrics/claude/${{ matrix.model }}`, `SPACEDOCK_JOURNEY_METRICS_DIR: ${{ github.workspace }}/live-artifacts/journey-metrics/codex`, + `SPACEDOCK_JOURNEY_METRICS_DIR: ${{ github.workspace }}/live-artifacts/journey-metrics/pi`, } { if !hasExecutableYAMLLine(workflow, want) { return fmt.Errorf("runtime-live-e2e.yml missing active metrics env line %q", want) @@ -31,12 +32,32 @@ func assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(workflow string) error { if codexRun < 0 { return fmt.Errorf("runtime-live-e2e.yml has no executable Codex shared scenario run") } + piCoverageRun := findExecutableStep(steps, "Run Pi shared scenario coverage guard", "TestPiSharedScenarioCoverage") + if piCoverageRun < 0 { + return fmt.Errorf("runtime-live-e2e.yml has no executable Pi shared scenario coverage guard") + } + piSmokeRun := findExecutableStep(steps, "Run live Pi front-door smoke", "TestLivePiFrontDoorSmoke") + if piSmokeRun < 0 { + return fmt.Errorf("runtime-live-e2e.yml has no executable Pi front-door smoke") + } + if !hasExecutableYAMLLine(workflow, `OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}`) || !hasExecutableYAMLLine(workflow, `SPACEDOCK_PI_LIVE_REQUIRED: "1"`) || !hasExecutableYAMLLine(workflow, `name: CI-E2E-PI`) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job is missing its OpenAI secret, required flag, or CI-E2E-PI environment") + } + if !workflowHasExecutableCommandContaining(workflow, "pi install npm:pi-subagents") || !workflowHasExecutableCommandContaining(workflow, "pi install npm:pi-intercom") { + return fmt.Errorf("runtime-live-e2e.yml Pi live job does not install required Pi substrates") + } + if !workflowHasExecutableCommandContaining(workflow, `spacedock doctor --host pi --plugin-dir "$GITHUB_WORKSPACE"`) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job does not verify current-checkout Spacedock skills") + } if !hasJourneyMetricsUploadAfter(steps, claudeRun, codexRun) { return fmt.Errorf("runtime-live-e2e.yml Claude shared scenario job does not upload raw journey metrics") } - if !hasJourneyMetricsUploadAfter(steps, codexRun, len(steps)) { + if !hasJourneyMetricsUploadAfter(steps, codexRun, piCoverageRun) { return fmt.Errorf("runtime-live-e2e.yml Codex shared scenario job does not upload raw journey metrics") } + if !hasJourneyMetricsUploadAfter(steps, piSmokeRun, len(steps)) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job does not upload raw journey metrics") + } return nil } @@ -157,6 +178,17 @@ func findExecutableStep(steps []workflowStep, name, commandFragment string) int return -1 } +func workflowHasExecutableCommandContaining(workflow, want string) bool { + for _, step := range parseWorkflowSteps(workflow) { + for _, command := range executableShellCommands(step.run) { + if strings.Contains(command, want) { + return true + } + } + } + return false +} + func hasJourneyMetricsUploadAfter(steps []workflowStep, start, stop int) bool { for i := start + 1; i < stop && i < len(steps); i++ { step := steps[i] From 011cd1d79af2705efb253577bed6a87c17f7451e Mon Sep 17 00:00:00 2001 From: CL Kao Date: Fri, 5 Jun 2026 00:41:32 -0700 Subject: [PATCH 2/2] ci: fix pi live cli install --- .github/workflows/runtime-live-e2e.yml | 36 +++++++----- internal/release/journey_workflow_test.go | 59 ++++++++++++++++++++ internal/release/workflow_exec_guard_test.go | 19 ++++++- 3 files changed, 97 insertions(+), 17 deletions(-) diff --git a/.github/workflows/runtime-live-e2e.yml b/.github/workflows/runtime-live-e2e.yml index 1c7b22421..d33185947 100644 --- a/.github/workflows/runtime-live-e2e.yml +++ b/.github/workflows/runtime-live-e2e.yml @@ -33,11 +33,6 @@ on: required: false type: string default: "" - pi_version: - description: "Pin pi-coding-agent to a specific version (e.g. 2.0.5). Empty = npm latest." - required: false - type: string - default: "" effort: description: "Effort hint for the live run (low, high, xhigh). Default low." required: false @@ -350,18 +345,29 @@ jobs: node-version: "20" - name: Install Pi CLI and substrates - env: - PI_VERSION: ${{ inputs.pi_version }} run: | - if [ -n "$PI_VERSION" ]; then - npm install -g "pi-coding-agent@$PI_VERSION" - else - npm install -g pi-coding-agent - fi + npm --version + NPM_BEFORE="$(node -e 'console.log(new Date(Date.now() - 24*60*60*1000).toISOString())')" + echo "Using npm --before age gate for pi-live installs: $NPM_BEFORE" + + npm install -g @earendil-works/pi-coding-agent --before="$NPM_BEFORE" --ignore-scripts --no-audit --no-fund --omit=dev + command -v pi + test -x "$(command -v pi)" pi --version - pi install npm:pi-subagents - pi install npm:pi-intercom - echo "PI_SUBAGENTS_PACKAGE_ROOT=$HOME/.pi/agent/npm/node_modules/pi-subagents" >> "$GITHUB_ENV" + global_npm_root="$(npm root -g)" + node -e "const p=require('$global_npm_root/@earendil-works/pi-coding-agent/package.json'); if (p.name !== '@earendil-works/pi-coding-agent') throw new Error('unexpected Pi package name '+p.name); if (!p.bin || p.bin.pi !== 'dist/cli.js') throw new Error('unexpected Pi bin '+JSON.stringify(p.bin)); console.log('verified '+p.name+'@'+p.version+' bin pi='+p.bin.pi)" + + pi_npm_root="$HOME/.pi/agent/npm" + mkdir -p "$pi_npm_root" + npm install --prefix "$pi_npm_root" \ + pi-subagents \ + pi-intercom \ + --before="$NPM_BEFORE" --ignore-scripts --no-audit --no-fund --omit=dev + node -e "const p=require('$pi_npm_root/node_modules/pi-subagents/package.json'); if (p.name !== 'pi-subagents') throw new Error('unexpected pi-subagents package name '+p.name); console.log('verified '+p.name+'@'+p.version)" + node -e "const p=require('$pi_npm_root/node_modules/pi-intercom/package.json'); if (p.name !== 'pi-intercom') throw new Error('unexpected pi-intercom package name '+p.name); console.log('verified '+p.name+'@'+p.version)" + test -f "$pi_npm_root/node_modules/pi-subagents/src/extension/index.ts" + test -f "$pi_npm_root/node_modules/pi-subagents/skills/pi-subagents/SKILL.md" + echo "PI_SUBAGENTS_PACKAGE_ROOT=$pi_npm_root/node_modules/pi-subagents" >> "$GITHUB_ENV" - name: Build spacedock binary run: | diff --git a/internal/release/journey_workflow_test.go b/internal/release/journey_workflow_test.go index 774fdf1d1..12533cc32 100644 --- a/internal/release/journey_workflow_test.go +++ b/internal/release/journey_workflow_test.go @@ -64,6 +64,65 @@ func TestRuntimeLiveWorkflowGuardRejectsMissingSharedScenarioRun(t *testing.T) { } } +func TestRuntimeLiveWorkflowGuardRejectsUnscopedPiPackage(t *testing.T) { + live := readWorkflow(t, "runtime-live-e2e.yml") + adversarial := strings.Replace(live, + `npm install -g @earendil-works/pi-coding-agent --before="$NPM_BEFORE" --ignore-scripts --no-audit --no-fund --omit=dev`, + `npm install -g pi-coding-agent --before="$NPM_BEFORE" --ignore-scripts --no-audit --no-fund --omit=dev`, + 1) + if adversarial == live { + t.Fatal("fixture workflow missing scoped Pi CLI install command") + } + + if err := assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(adversarial); err == nil { + t.Fatal("runtime live workflow guard accepted the wrong unscoped Pi CLI package") + } +} + +func TestRuntimeLiveWorkflowGuardRejectsMissingPiBeforeAgeGate(t *testing.T) { + live := readWorkflow(t, "runtime-live-e2e.yml") + adversarial := strings.ReplaceAll(live, ` --before="$NPM_BEFORE"`, ``) + if adversarial == live { + t.Fatal("fixture workflow missing npm --before install flags") + } + + if err := assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(adversarial); err == nil { + t.Fatal("runtime live workflow guard accepted Pi npm installs without --before") + } +} + +func TestRuntimeLiveWorkflowGuardRejectsObsoletePiMinReleaseAgeProbe(t *testing.T) { + live := readWorkflow(t, "runtime-live-e2e.yml") + adversarial := strings.Replace(live, + `NPM_BEFORE="$(node -e 'console.log(new Date(Date.now() - 24*60*60*1000).toISOString())')" + echo "Using npm --before age gate for pi-live installs: $NPM_BEFORE"`, + `npm config get min-release-age + npm config set min-release-age 1440`, + 1) + if adversarial == live { + t.Fatal("fixture workflow missing npm --before age-gate timestamp") + } + + if err := assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(adversarial); err == nil { + t.Fatal("runtime live workflow guard accepted obsolete min-release-age probing") + } +} + +func TestRuntimeLiveWorkflowGuardRejectsUnverifiedPiPackageInstall(t *testing.T) { + live := readWorkflow(t, "runtime-live-e2e.yml") + adversarial := strings.Replace(live, + `node -e "const p=require('$global_npm_root/@earendil-works/pi-coding-agent/package.json'); if (p.name !== '@earendil-works/pi-coding-agent') throw new Error('unexpected Pi package name '+p.name); if (!p.bin || p.bin.pi !== 'dist/cli.js') throw new Error('unexpected Pi bin '+JSON.stringify(p.bin)); console.log('verified '+p.name+'@'+p.version+' bin pi='+p.bin.pi)"`, + `echo "skipping Pi package verification"`, + 1) + if adversarial == live { + t.Fatal("fixture workflow missing Pi CLI package verification command") + } + + if err := assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(adversarial); err == nil { + t.Fatal("runtime live workflow guard accepted an unverified Pi CLI package install") + } +} + func TestReleaseWorkflowGuardRejectsCommentOnlyJourneyCostBuilder(t *testing.T) { release := readWorkflow(t, "release.yml") adversarial := strings.Replace(release, diff --git a/internal/release/workflow_exec_guard_test.go b/internal/release/workflow_exec_guard_test.go index 3b941b65f..c10e5c0c7 100644 --- a/internal/release/workflow_exec_guard_test.go +++ b/internal/release/workflow_exec_guard_test.go @@ -43,8 +43,23 @@ func assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(workflow string) error { if !hasExecutableYAMLLine(workflow, `OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}`) || !hasExecutableYAMLLine(workflow, `SPACEDOCK_PI_LIVE_REQUIRED: "1"`) || !hasExecutableYAMLLine(workflow, `name: CI-E2E-PI`) { return fmt.Errorf("runtime-live-e2e.yml Pi live job is missing its OpenAI secret, required flag, or CI-E2E-PI environment") } - if !workflowHasExecutableCommandContaining(workflow, "pi install npm:pi-subagents") || !workflowHasExecutableCommandContaining(workflow, "pi install npm:pi-intercom") { - return fmt.Errorf("runtime-live-e2e.yml Pi live job does not install required Pi substrates") + if workflowHasExecutableCommandContaining(workflow, `npm config get min-release-age`) || workflowHasExecutableCommandContaining(workflow, `npm config set min-release-age`) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job uses obsolete min-release-age probing instead of npm --before") + } + if !workflowHasExecutableCommandContaining(workflow, `NPM_BEFORE="$(node -e 'console.log(new Date(Date.now() - 24*60*60*1000).toISOString())')"`) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job does not compute an npm --before age-gate timestamp") + } + if !workflowHasExecutableCommandContaining(workflow, `npm install -g @earendil-works/pi-coding-agent --before="$NPM_BEFORE" --ignore-scripts --no-audit --no-fund --omit=dev`) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job does not install the scoped Pi CLI with npm --before and safer npm flags") + } + if workflowHasExecutableCommandContaining(workflow, "npm install -g pi-coding-agent") || workflowHasExecutableCommandContaining(workflow, "npm install -g \"pi-coding-agent@") { + return fmt.Errorf("runtime-live-e2e.yml Pi live job installs the wrong unscoped Pi CLI package") + } + if !workflowHasExecutableCommandContaining(workflow, `npm install --prefix "$pi_npm_root" pi-subagents pi-intercom --before="$NPM_BEFORE" --ignore-scripts --no-audit --no-fund --omit=dev`) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job does not directly install required Pi substrates with npm --before and safer npm flags") + } + if !workflowHasExecutableCommandContaining(workflow, `test -x "$(command -v pi)"`) || !workflowHasExecutableCommandContaining(workflow, `p.name !== '@earendil-works/pi-coding-agent'`) || !workflowHasExecutableCommandContaining(workflow, `p.bin.pi !== 'dist/cli.js'`) || !workflowHasExecutableCommandContaining(workflow, `p.name !== 'pi-subagents'`) || !workflowHasExecutableCommandContaining(workflow, `p.name !== 'pi-intercom'`) || !workflowHasExecutableCommandContaining(workflow, `test -f "$pi_npm_root/node_modules/pi-subagents/src/extension/index.ts"`) || !workflowHasExecutableCommandContaining(workflow, `test -f "$pi_npm_root/node_modules/pi-subagents/skills/pi-subagents/SKILL.md"`) { + return fmt.Errorf("runtime-live-e2e.yml Pi live job does not verify installed Pi package names, versions, bin, and resource paths") } if !workflowHasExecutableCommandContaining(workflow, `spacedock doctor --host pi --plugin-dir "$GITHUB_WORKSPACE"`) { return fmt.Errorf("runtime-live-e2e.yml Pi live job does not verify current-checkout Spacedock skills")