From f1d2d871facc1aa0770ab88fe8732f103fe3921e Mon Sep 17 00:00:00 2001 From: CL Kao Date: Fri, 5 Jun 2026 13:08:41 -0700 Subject: [PATCH 1/5] ci: split release.yml into journey-ledger + goreleaser siblings (POLICY 1) Splits the single goreleaser job into two siblings so a never-fired Runtime-Live-E2E run on next can no longer block the cut: - journey-ledger (authored before goreleaser so the document-order guard stays green): Download / Build / Publish the cost ledger. The download step degrades to a non-fatal skip (exit 0, found=false) when no producer run exists, and the Build/Publish steps are gated on that output so a producer-less / empty-dir cut SKIPS them (job green) instead of running journey-costs over an empty dir and REDding the job (POLICY 1). Carries the one-way needs: goreleaser edge required so gh release upload runs after the Release exists. - goreleaser: cuts the release regardless of journey-ledger. Hardens assertReleaseWorkflowPublishesJourneyCosts to bind needs: to the OWNING job: a goreleaser->journey-ledger edge now REDs the guard, while the safe journey-ledger->goreleaser edge does not false-trip it. Adds the job-level skip-consequence guard plus adversarial tests for the unsafe edge and an ungated builder, and a shell-level exercise of the download skip branch. Anchors the empty-body guard extractor on release-notes.txt so it selects the right if-condition now that the download step also carries one. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/release.yml | 105 ++++++---- internal/release/journey_workflow_test.go | 152 ++++++++++++++ internal/release/notes_extract_test.go | 11 +- internal/release/workflow_exec_guard_test.go | 206 +++++++++++++++++++ 4 files changed, 435 insertions(+), 39 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 61569ce18..67c391606 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,6 +13,76 @@ permissions: contents: write jobs: + # The journey-cost ledger is a SIBLING of the goreleaser job, not a + # pre-goreleaser gate, so a missing producer run can never block the cut. It is + # authored before goreleaser in this file because the document-order guard + # (internal/release/workflow_exec_guard_test.go) parses steps job-unaware and + # requires the builder to precede the goreleaser action in text. It carries the + # ONE-WAY edge `needs: goreleaser`: `gh release upload` needs the Release to + # exist, and only goreleaser creates it, so this job waits for goreleaser — + # goreleaser does NOT wait for this job, so the cut proceeds regardless. + journey-ledger: + needs: goreleaser + runs-on: macos-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.22" + + # Find the latest successful Runtime Live E2E run on next and pull its + # journey-metrics artifacts. When no such run exists this is a NON-FATAL + # skip (`exit 0`, found=false) — runtime-live-e2e.yml has no push:next + # trigger, so a fresh cut routinely has no producer run, and blocking the + # ledger here must not fail anything. The emitted `found` output gates the + # downstream Build/Publish steps so they SKIP (job green) rather than run + # `journey-costs` over an empty dir and RED the job. + - name: Download latest journey metrics artifacts + id: download_metrics + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + mkdir -p "$RUNNER_TEMP/runtime-live-artifacts" "$RUNNER_TEMP/journey-metrics" + run_id="$(gh run list --workflow "Runtime Live E2E" --branch next --status success --limit 1 --json databaseId --jq '.[0].databaseId')" + if [ -z "$run_id" ] || [ "$run_id" = "null" ]; then + echo "::warning::no successful Runtime Live E2E run found on next; skipping journey ledger" >&2 + echo "found=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + gh run download "$run_id" --dir "$RUNNER_TEMP/runtime-live-artifacts" + find "$RUNNER_TEMP/runtime-live-artifacts" -path '*/journey-metrics/*.json' -type f -exec cp {} "$RUNNER_TEMP/journey-metrics/" \; + if [ -z "$(find "$RUNNER_TEMP/journey-metrics" -name '*.json' -type f -print -quit)" ]; then + echo "::warning::downloaded Runtime Live E2E artifacts contained no journey metrics JSON; skipping journey ledger" >&2 + echo "found=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "found=true" >> "$GITHUB_OUTPUT" + + - name: Build journey cost ledger + if: steps.download_metrics.outputs.found == 'true' + run: | + set -euo pipefail + RELEASE_VERSION="${GITHUB_REF_NAME#v}" + go run ./cmd/spacedock-release journey-costs "$RELEASE_VERSION" \ + --metrics-dir "$RUNNER_TEMP/journey-metrics" \ + --out "$RUNNER_TEMP/journey-costs-v${RELEASE_VERSION}.json" + test -s "$RUNNER_TEMP/journey-costs-v${RELEASE_VERSION}.json" + + - name: Publish journey cost ledger + if: steps.download_metrics.outputs.found == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + RELEASE_VERSION="${GITHUB_REF_NAME#v}" + gh release upload "$GITHUB_REF_NAME" "$RUNNER_TEMP/journey-costs-v${RELEASE_VERSION}.json" --clobber + goreleaser: runs-on: macos-latest steps: @@ -59,33 +129,6 @@ jobs: exit 1 fi - - name: Download latest journey metrics artifacts - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -euo pipefail - mkdir -p "$RUNNER_TEMP/runtime-live-artifacts" "$RUNNER_TEMP/journey-metrics" - run_id="$(gh run list --workflow "Runtime Live E2E" --branch next --status success --limit 1 --json databaseId --jq '.[0].databaseId')" - if [ -z "$run_id" ] || [ "$run_id" = "null" ]; then - echo "::error::no successful Runtime Live E2E run found on next for journey metrics" >&2 - exit 1 - fi - gh run download "$run_id" --dir "$RUNNER_TEMP/runtime-live-artifacts" - find "$RUNNER_TEMP/runtime-live-artifacts" -path '*/journey-metrics/*.json' -type f -exec cp {} "$RUNNER_TEMP/journey-metrics/" \; - if [ -z "$(find "$RUNNER_TEMP/journey-metrics" -name '*.json' -type f -print -quit)" ]; then - echo "::error::downloaded Runtime Live E2E artifacts contained no journey metrics JSON" >&2 - exit 1 - fi - - - name: Build journey cost ledger - run: | - set -euo pipefail - RELEASE_VERSION="${GITHUB_REF_NAME#v}" - go run ./cmd/spacedock-release journey-costs "$RELEASE_VERSION" \ - --metrics-dir "$RUNNER_TEMP/journey-metrics" \ - --out "$RUNNER_TEMP/journey-costs-v${RELEASE_VERSION}.json" - test -s "$RUNNER_TEMP/journey-costs-v${RELEASE_VERSION}.json" - - name: Run goreleaser uses: goreleaser/goreleaser-action@v6 with: @@ -99,14 +142,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} HOMEBREW_TAP_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }} - - name: Publish journey cost ledger - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - set -euo pipefail - RELEASE_VERSION="${GITHUB_REF_NAME#v}" - gh release upload "$GITHUB_REF_NAME" "$RUNNER_TEMP/journey-costs-v${RELEASE_VERSION}.json" --clobber - # AC-4: stamp the plugin manifests' `version` to the release so the host # plugin panel (`claude plugin list --json`) displays a version that tracks # the binary instead of the frozen placeholder. The displayed version is diff --git a/internal/release/journey_workflow_test.go b/internal/release/journey_workflow_test.go index 8fc5b908f..ab0884301 100644 --- a/internal/release/journey_workflow_test.go +++ b/internal/release/journey_workflow_test.go @@ -2,6 +2,7 @@ package release import ( "os" + "os/exec" "path/filepath" "strings" "testing" @@ -153,6 +154,157 @@ func TestReleaseWorkflowGuardRejectsCommentOnlyJourneyCostPublish(t *testing.T) } } +// TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger injects the exact +// re-coupling bug this separation closes: a `needs: journey-ledger` edge on the +// goreleaser job re-blocks the cut on the never-fired Runtime-Live-E2E run. The +// hardened guard binds `needs:` to the OWNING job (the one carrying the +// goreleaser action) and must RED on this edge — while the SAFE one-way +// `journey-ledger: needs: goreleaser` edge in the real file does NOT trip it. +func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger(t *testing.T) { + release := readWorkflow(t, "release.yml") + if err := assertReleaseWorkflowPublishesJourneyCosts(release); err != nil { + t.Fatalf("real release.yml unexpectedly fails the guard before mutation: %v", err) + } + adversarial := strings.Replace(release, + " goreleaser:\n runs-on: macos-latest", + " goreleaser:\n needs: journey-ledger\n runs-on: macos-latest", + 1) + if adversarial == release { + t.Fatal("fixture workflow missing the goreleaser job header to mutate") + } + + if err := assertReleaseWorkflowPublishesJourneyCosts(adversarial); err == nil { + t.Fatal("release workflow guard accepted a goreleaser job that needs the journey-ledger job (re-coupling the cut to the never-fired run)") + } +} + +// TestReleaseWorkflowSkipsLedgerWhenNoProducerRun proves the JOB-LEVEL skip +// consequence of POLICY 1: when the download step finds no producer run it must +// not merely exit 0 — the downstream Build/Publish-ledger steps must be GATED on +// the download step's producer-found output, so a producer-less / empty-dir cut +// SKIPS those steps (journey-ledger job green) instead of hard-failing the +// `journey-costs` builder over an empty dir (journey-ledger job RED). Parsed +// from the real release.yml, not matched against prose. +func TestReleaseWorkflowSkipsLedgerWhenNoProducerRun(t *testing.T) { + release := readWorkflow(t, "release.yml") + if err := assertReleaseLedgerStepsSkipWhenNoProducerRun(release); err != nil { + t.Fatal(err) + } +} + +// TestReleaseWorkflowGuardRejectsUngatedLedgerBuild is the adversarial twin of +// the skip-consequence proof: strip the producer-found `if:` gate off the Build +// step and the guard must RED, because an ungated builder runs `journey-costs` +// over an empty dir on a producer-less cut and REDs the journey-ledger job — +// the exact failure POLICY 1 closes. +func TestReleaseWorkflowGuardRejectsUngatedLedgerBuild(t *testing.T) { + release := readWorkflow(t, "release.yml") + if err := assertReleaseLedgerStepsSkipWhenNoProducerRun(release); err != nil { + t.Fatalf("real release.yml unexpectedly fails the skip-consequence guard before mutation: %v", err) + } + adversarial := strings.Replace(release, + " - name: Build journey cost ledger\n if: steps.download_metrics.outputs.found == 'true'\n", + " - name: Build journey cost ledger\n", + 1) + if adversarial == release { + t.Fatal("fixture workflow missing the gated Build journey cost ledger step to mutate") + } + + if err := assertReleaseLedgerStepsSkipWhenNoProducerRun(adversarial); err == nil { + t.Fatal("release workflow guard accepted an ungated Build step that would RED the journey-ledger job on a producer-less cut") + } +} + +// TestReleaseDownloadSkipBranchExitsZeroOnEmptyRunList EXERCISES the download +// step's missing-run branch: it runs the REAL download script extracted from +// release.yml against a stubbed `gh` that returns an empty run list, and asserts +// the script exits 0 (non-fatal skip, not the old exit 1) and emits +// `found=false` to $GITHUB_OUTPUT so the downstream gate skips. This is the +// behavior-level proof of the skip branch, not a substring check. +func TestReleaseDownloadSkipBranchExitsZeroOnEmptyRunList(t *testing.T) { + release := readWorkflow(t, "release.yml") + script := extractStepRun(t, release, "Download latest journey metrics artifacts") + + dir := t.TempDir() + // Stub `gh` on PATH so the download step's `gh run list ... --jq '.[0].databaseId'` + // yields the empty-result string the real CLI returns when no run matches. + binDir := filepath.Join(dir, "bin") + if err := os.MkdirAll(binDir, 0o755); err != nil { + t.Fatalf("mkdir bin: %v", err) + } + ghStub := "#!/bin/sh\nexit 0\n" + if err := os.WriteFile(filepath.Join(binDir, "gh"), []byte(ghStub), 0o755); err != nil { + t.Fatalf("write gh stub: %v", err) + } + outPath := filepath.Join(dir, "github_output") + if err := os.WriteFile(outPath, nil, 0o644); err != nil { + t.Fatalf("seed github_output: %v", err) + } + + cmd := exec.Command("bash", "-c", script) + cmd.Dir = dir + cmd.Env = append(os.Environ(), + "PATH="+binDir+string(os.PathListSeparator)+os.Getenv("PATH"), + "RUNNER_TEMP="+dir, + "GITHUB_OUTPUT="+outPath, + "GH_TOKEN=stub", + ) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("download skip branch exited non-zero (want exit 0 on empty run list): %v\n%s", err, out) + } + + gotOutput, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("read github_output: %v", err) + } + if !strings.Contains(string(gotOutput), "found=false") { + t.Errorf("download skip branch did not emit found=false to $GITHUB_OUTPUT; got:\n%s", gotOutput) + } +} + +// extractStepRun pulls a named step's `run: |` block out of a workflow document, +// dedented to a runnable shell script, so tests exercise the EXACT script CI +// runs rather than a hand-copied duplicate. +func extractStepRun(t *testing.T, workflow, stepName string) string { + t.Helper() + for _, step := range parseWorkflowSteps(workflow) { + if step.name == stepName { + if step.run == "" { + t.Fatalf("step %q has no run block", stepName) + } + return dedent(step.run) + } + } + t.Fatalf("workflow has no step named %q", stepName) + return "" +} + +// dedent strips the common leading-space indent off every non-blank line of a +// `run:` block so the extracted YAML script runs under a shell. +func dedent(block string) string { + lines := strings.Split(block, "\n") + min := -1 + for _, line := range lines { + if strings.TrimSpace(line) == "" { + continue + } + n := len(line) - len(strings.TrimLeft(line, " ")) + if min < 0 || n < min { + min = n + } + } + if min <= 0 { + return block + } + for i, line := range lines { + if len(line) >= min { + lines[i] = line[min:] + } + } + return strings.Join(lines, "\n") +} + func readWorkflow(t *testing.T, name string) string { t.Helper() data, err := os.ReadFile(filepath.Join("..", "..", ".github", "workflows", name)) diff --git a/internal/release/notes_extract_test.go b/internal/release/notes_extract_test.go index ed0c4da94..ee8625c09 100644 --- a/internal/release/notes_extract_test.go +++ b/internal/release/notes_extract_test.go @@ -55,10 +55,13 @@ func TestAnnotatedTagBodyRoundTrips(t *testing.T) { // guardConditionRe pulls the empty-body guard condition out of release.yml's // "Extract release notes from the tag body" step, e.g. the `[ -z "..." ]` test -// inside `if ; then`. The test runs the REAL guard string from the -// workflow, so a regression (e.g. back to the dead `[ ! -s release-notes.txt ]` -// form) breaks this test instead of silently shipping a blank-body Release. -var guardConditionRe = regexp.MustCompile(`(?m)^\s*if (\[.*\]); then\s*$`) +// inside `if ; then`. The condition is anchored on `release-notes.txt` so +// it selects this guard specifically rather than any other `if [ ... ]; then` +// line in the file (the journey-ledger download step also carries one). The test +// runs the REAL guard string from the workflow, so a regression (e.g. back to +// the dead `[ ! -s release-notes.txt ]` form) breaks this test instead of +// silently shipping a blank-body Release. +var guardConditionRe = regexp.MustCompile(`(?m)^\s*if (\[.*release-notes\.txt.*\]); then\s*$`) // TestReleaseYAMLGuardRejectsEmptyBody locks the release.yml empty-body guard: // `git tag -l --format='%(contents:body)'` always appends a trailing newline, so diff --git a/internal/release/workflow_exec_guard_test.go b/internal/release/workflow_exec_guard_test.go index 42747c82e..e0b859acf 100644 --- a/internal/release/workflow_exec_guard_test.go +++ b/internal/release/workflow_exec_guard_test.go @@ -7,11 +7,24 @@ import ( type workflowStep struct { name string + id string + ifCond string uses string run string withPath string } +// workflowJob is a top-level entry under `jobs:` — its name, its declared +// `needs:` edges, and the steps it owns. The job graph is parsed separately from +// the flat step list so the separation guard can bind a `needs:` edge to the +// OWNING job (the job carrying the goreleaser action) rather than matching the +// edge anywhere in the document. +type workflowJob struct { + name string + needs []string + steps []workflowStep +} + func assertRuntimeLiveWorkflowUploadsRawJourneyMetrics(workflow string) error { for _, want := range []string{ `SPACEDOCK_JOURNEY_METRICS_DIR: ${{ github.workspace }}/live-artifacts/journey-metrics/claude/${{ matrix.model }}`, @@ -122,6 +135,109 @@ func assertReleaseWorkflowPublishesJourneyCosts(workflow string) error { if publishStep <= builderStep { return fmt.Errorf("release.yml publishes journey costs before building them") } + if err := assertGoreleaserDoesNotNeedJourneyLedger(workflow); err != nil { + return err + } + return nil +} + +// assertGoreleaserDoesNotNeedJourneyLedger binds the separation invariant to the +// job DAG: the job that carries the goreleaser action must NOT declare `needs:` +// on the job that builds the journey-cost ledger. That edge would re-block the +// cut on the never-fired Runtime-Live-E2E run (the exact bug this separation +// closes). The edge is bound to the OWNING job — found by which job holds the +// goreleaser action vs. which holds the journey-cost builder — so the SAFE +// reverse edge (journey-ledger needs: goreleaser, required so `gh release upload` +// runs after the Release exists) does not false-trip the check. +func assertGoreleaserDoesNotNeedJourneyLedger(workflow string) error { + jobs := parseWorkflowJobs(workflow) + goreleaserJob, ledgerJob := "", "" + for _, job := range jobs { + for _, step := range job.steps { + if strings.HasPrefix(step.uses, "goreleaser/goreleaser-action@") { + goreleaserJob = job.name + } + for _, command := range executableShellCommands(step.run) { + if isJourneyCostBuilder(command) { + ledgerJob = job.name + } + } + } + } + if goreleaserJob == "" { + return fmt.Errorf("release.yml has no job carrying the goreleaser action") + } + if ledgerJob == "" { + return fmt.Errorf("release.yml has no job carrying the journey-cost builder") + } + if goreleaserJob == ledgerJob { + // Single-job layout: no cross-job needs edge to re-block the cut. + return nil + } + for _, job := range jobs { + if job.name != goreleaserJob { + continue + } + for _, need := range job.needs { + if need == ledgerJob { + return fmt.Errorf("release.yml goreleaser job %q declares needs: %q — re-blocking the cut on the journey-ledger job", goreleaserJob, ledgerJob) + } + } + } + return nil +} + +// assertReleaseLedgerStepsSkipWhenNoProducerRun proves POLICY 1's job-level +// skip: the journey-cost builder and its release-upload step must be GATED on +// the download step's producer-found output, so a producer-less / empty-dir cut +// SKIPS them (journey-ledger job green) instead of running `journey-costs` over +// an empty dir and REDding the job. It locates the download step by its emitted +// `found=` output and requires both the builder step and the upload step to +// carry an `if:` referencing that step's `found` output. +func assertReleaseLedgerStepsSkipWhenNoProducerRun(workflow string) error { + steps := parseWorkflowSteps(workflow) + + downloadID := "" + for _, step := range steps { + if step.id == "" { + continue + } + for _, command := range executableShellCommands(step.run) { + if strings.Contains(command, `found=false`) && strings.Contains(command, `"$GITHUB_OUTPUT"`) { + downloadID = step.id + } + } + } + if downloadID == "" { + return fmt.Errorf("release.yml has no download step with an id that emits found=false to $GITHUB_OUTPUT (the non-fatal skip flag)") + } + + gate := fmt.Sprintf("steps.%s.outputs.found == 'true'", downloadID) + builderGated, publishGated := false, false + for _, step := range steps { + isBuilder, isPublish := false, false + for _, command := range executableShellCommands(step.run) { + if isJourneyCostBuilder(command) { + isBuilder = true + } + if strings.HasPrefix(command, `gh release upload `) && + strings.Contains(command, `"$RUNNER_TEMP/journey-costs-v${RELEASE_VERSION}.json"`) { + isPublish = true + } + } + if isBuilder && step.ifCond == gate { + builderGated = true + } + if isPublish && step.ifCond == gate { + publishGated = true + } + } + if !builderGated { + return fmt.Errorf("release.yml journey-cost builder is not gated on %q; a producer-less cut would run journey-costs over an empty dir and RED the journey-ledger job", gate) + } + if !publishGated { + return fmt.Errorf("release.yml journey-cost publish step is not gated on %q; a producer-less cut would attempt an upload of a missing ledger", gate) + } return nil } @@ -146,6 +262,10 @@ func parseWorkflowSteps(workflow string) []workflowStep { } step := &steps[len(steps)-1] switch { + case strings.HasPrefix(trimmed, "id: "): + step.id = strings.Trim(strings.TrimPrefix(trimmed, "id: "), `"`) + case strings.HasPrefix(trimmed, "if: "): + step.ifCond = strings.TrimSpace(strings.TrimPrefix(trimmed, "if: ")) case strings.HasPrefix(trimmed, "uses: "): step.uses = strings.Trim(strings.TrimPrefix(trimmed, "uses: "), `"`) case strings.HasPrefix(trimmed, "run: |"): @@ -179,6 +299,92 @@ func parseWorkflowSteps(workflow string) []workflowStep { return steps } +// parseWorkflowJobs partitions a workflow into its top-level jobs (the 2-space +// indented keys under `jobs:`), capturing each job's declared `needs:` edges and +// the steps it owns. This is the job-aware view the separation guard needs: the +// flat parseWorkflowSteps cannot tell which job a `needs:` edge or step belongs +// to, so a `goreleaser → journey-ledger` edge would otherwise be invisible. +func parseWorkflowJobs(workflow string) []workflowJob { + lines := strings.Split(workflow, "\n") + jobsStart := -1 + for i, line := range lines { + if strings.TrimSpace(line) == "jobs:" && leadingSpaces(line) == 0 { + jobsStart = i + 1 + break + } + } + if jobsStart < 0 { + return nil + } + + var jobs []workflowJob + for i := jobsStart; i < len(lines); i++ { + line := lines[i] + trimmed := strings.TrimSpace(line) + if trimmed == "" || strings.HasPrefix(trimmed, "#") { + continue + } + // A top-level key at column 0 ends the jobs block (e.g. a trailing + // document key). Job keys sit at exactly 2-space indent. + if leadingSpaces(line) == 0 { + break + } + if leadingSpaces(line) == 2 && strings.HasSuffix(trimmed, ":") { + jobs = append(jobs, workflowJob{name: strings.TrimSuffix(trimmed, ":")}) + continue + } + if len(jobs) == 0 { + continue + } + job := &jobs[len(jobs)-1] + if leadingSpaces(line) == 4 && strings.HasPrefix(trimmed, "needs:") { + job.needs = parseNeeds(trimmed) + } + } + + // Attach steps by slicing each job's text span and reusing the flat parser. + jobHeaderAt := func(name string) int { + for i := jobsStart; i < len(lines); i++ { + if leadingSpaces(lines[i]) == 2 && strings.TrimSpace(lines[i]) == name+":" { + return i + } + } + return -1 + } + for j := range jobs { + start := jobHeaderAt(jobs[j].name) + end := len(lines) + if j+1 < len(jobs) { + end = jobHeaderAt(jobs[j+1].name) + } + if start >= 0 && end > start { + jobs[j].steps = parseWorkflowSteps(strings.Join(lines[start:end], "\n")) + } + } + return jobs +} + +// parseNeeds reads a job's `needs:` declaration in any of YAML's three shapes — +// scalar (`needs: goreleaser`), flow sequence (`needs: [a, b]`), or (when the +// line is just `needs:`) an empty set the caller treats as no edge. Block-list +// form is not used in this repo's workflows and is not parsed here. +func parseNeeds(trimmed string) []string { + rest := strings.TrimSpace(strings.TrimPrefix(trimmed, "needs:")) + if rest == "" { + return nil + } + rest = strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(rest, "["), "]")) + var out []string + for _, part := range strings.Split(rest, ",") { + if name := strings.Trim(strings.TrimSpace(part), `"'`); name != "" { + out = append(out, name) + } + } + return out +} + +// findExecutableStep returns the index of the step named name whose run block +// contains commandFragment, or -1. func findExecutableStep(steps []workflowStep, name, commandFragment string) int { for i, step := range steps { if step.name != name { From bd1fb72b8e8378d4231396d4fd196e7602320633 Mon Sep 17 00:00:00 2001 From: CL Kao Date: Fri, 5 Jun 2026 14:14:05 -0700 Subject: [PATCH 2/5] test: close the block-list needs evasion in the goreleaser separation guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M1 (route-back from validation cycle 1): parseNeeds only read scalar/flow needs:, so a future goreleaser re-coupling written as a YAML block-list (needs: then - journey-ledger) reported goreleaser needs=[] and evaded assertGoreleaserDoesNotNeedJourneyLedger — the guard would greenlight a real re-coupling edge. parseWorkflowJobs now consumes the block-list entries that follow a bare needs: line, and TestReleaseWorkflowGuardRejectsGoreleaserNeeds- JourneyLedger injects the edge in all three forms (scalar/flow/block-list); the safe reverse edge on journey-ledger stays tolerated. P1: the download skip-branch now tolerates a gh error / missing gh as found=false (|| true on the run_id query) instead of aborting under set -e; TestReleaseDownloadSkipBranchToleratesGhError exercises both the gh-error and missing-gh paths. go test ./internal/release/ 34/34; full offline go test ./... 1176 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/release.yml | 6 +- internal/release/journey_workflow_test.go | 93 ++++++++++++++++++-- internal/release/workflow_exec_guard_test.go | 22 ++++- 3 files changed, 107 insertions(+), 14 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 67c391606..233abf17e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,7 +49,11 @@ jobs: run: | set -euo pipefail mkdir -p "$RUNNER_TEMP/runtime-live-artifacts" "$RUNNER_TEMP/journey-metrics" - run_id="$(gh run list --workflow "Runtime Live E2E" --branch next --status success --limit 1 --json databaseId --jq '.[0].databaseId')" + # `|| true` degrades a gh error (or a missing gh) to an empty run_id so + # the no-run skip branch below fires (found=false, exit 0) instead of + # aborting under set -e. The journey ledger is best-effort; a query + # failure must never RED the cut. + run_id="$(gh run list --workflow "Runtime Live E2E" --branch next --status success --limit 1 --json databaseId --jq '.[0].databaseId' || true)" if [ -z "$run_id" ] || [ "$run_id" = "null" ]; then echo "::warning::no successful Runtime Live E2E run found on next; skipping journey ledger" >&2 echo "found=false" >> "$GITHUB_OUTPUT" diff --git a/internal/release/journey_workflow_test.go b/internal/release/journey_workflow_test.go index ab0884301..ef32f9b74 100644 --- a/internal/release/journey_workflow_test.go +++ b/internal/release/journey_workflow_test.go @@ -160,21 +160,35 @@ func TestReleaseWorkflowGuardRejectsCommentOnlyJourneyCostPublish(t *testing.T) // hardened guard binds `needs:` to the OWNING job (the one carrying the // goreleaser action) and must RED on this edge — while the SAFE one-way // `journey-ledger: needs: goreleaser` edge in the real file does NOT trip it. +// The edge is injected in every YAML shape `needs:` can take (scalar, flow +// sequence, and block list) so no single re-coupling syntax evades the guard. func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger(t *testing.T) { release := readWorkflow(t, "release.yml") if err := assertReleaseWorkflowPublishesJourneyCosts(release); err != nil { t.Fatalf("real release.yml unexpectedly fails the guard before mutation: %v", err) } - adversarial := strings.Replace(release, - " goreleaser:\n runs-on: macos-latest", - " goreleaser:\n needs: journey-ledger\n runs-on: macos-latest", - 1) - if adversarial == release { - t.Fatal("fixture workflow missing the goreleaser job header to mutate") - } - if err := assertReleaseWorkflowPublishesJourneyCosts(adversarial); err == nil { - t.Fatal("release workflow guard accepted a goreleaser job that needs the journey-ledger job (re-coupling the cut to the never-fired run)") + for _, tc := range []struct { + name string + needsForm string + }{ + {"scalar", " needs: journey-ledger\n"}, + {"flow sequence", " needs: [journey-ledger]\n"}, + {"block list", " needs:\n - journey-ledger\n"}, + } { + t.Run(tc.name, func(t *testing.T) { + adversarial := strings.Replace(release, + " goreleaser:\n runs-on: macos-latest", + " goreleaser:\n"+tc.needsForm+" runs-on: macos-latest", + 1) + if adversarial == release { + t.Fatal("fixture workflow missing the goreleaser job header to mutate") + } + + if err := assertReleaseWorkflowPublishesJourneyCosts(adversarial); err == nil { + t.Fatalf("release workflow guard accepted a goreleaser job that needs the journey-ledger job via %s form (re-coupling the cut to the never-fired run)", tc.name) + } + }) } } @@ -263,6 +277,67 @@ func TestReleaseDownloadSkipBranchExitsZeroOnEmptyRunList(t *testing.T) { } } +// TestReleaseDownloadSkipBranchToleratesGhError EXERCISES the download step's +// degradation when the `gh run list` query itself fails — a gh error or a +// missing gh on PATH. Under `set -euo pipefail` an un-guarded command +// substitution would abort the step (RED the journey-ledger job); the `|| true` +// degrades it to an empty run_id so the no-run skip branch fires (exit 0, +// found=false) and the cut is never blocked by a best-effort ledger query. +func TestReleaseDownloadSkipBranchToleratesGhError(t *testing.T) { + release := readWorkflow(t, "release.yml") + script := extractStepRun(t, release, "Download latest journey metrics artifacts") + + for _, tc := range []struct { + name string + ghStub string // empty ghStub means: do not put gh on PATH at all + }{ + {"gh exits non-zero", "#!/bin/sh\necho 'gh: API rate limit exceeded' >&2\nexit 1\n"}, + {"gh missing from PATH", ""}, + } { + t.Run(tc.name, func(t *testing.T) { + dir := t.TempDir() + binDir := filepath.Join(dir, "bin") + if err := os.MkdirAll(binDir, 0o755); err != nil { + t.Fatalf("mkdir bin: %v", err) + } + // PATH carries system utilities (mkdir/find/awk) plus binDir, but NOT + // the directories a real gh lives in — so the gh seen by the script is + // exactly the stub we write here, or nothing when we write none. + path := binDir + string(os.PathListSeparator) + "/usr/bin" + string(os.PathListSeparator) + "/bin" + if tc.ghStub != "" { + if err := os.WriteFile(filepath.Join(binDir, "gh"), []byte(tc.ghStub), 0o755); err != nil { + t.Fatalf("write gh stub: %v", err) + } + } + outPath := filepath.Join(dir, "github_output") + if err := os.WriteFile(outPath, nil, 0o644); err != nil { + t.Fatalf("seed github_output: %v", err) + } + + cmd := exec.Command("bash", "-c", script) + cmd.Dir = dir + cmd.Env = append(os.Environ(), + "PATH="+path, + "RUNNER_TEMP="+dir, + "GITHUB_OUTPUT="+outPath, + "GH_TOKEN=stub", + ) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("download step aborted on a gh query failure (want exit 0, found=false): %v\n%s", err, out) + } + + gotOutput, err := os.ReadFile(outPath) + if err != nil { + t.Fatalf("read github_output: %v", err) + } + if !strings.Contains(string(gotOutput), "found=false") { + t.Errorf("download step did not emit found=false to $GITHUB_OUTPUT on a gh query failure; got:\n%s", gotOutput) + } + }) + } +} + // extractStepRun pulls a named step's `run: |` block out of a workflow document, // dedented to a runnable shell script, so tests exercise the EXACT script CI // runs rather than a hand-copied duplicate. diff --git a/internal/release/workflow_exec_guard_test.go b/internal/release/workflow_exec_guard_test.go index e0b859acf..3212be4c7 100644 --- a/internal/release/workflow_exec_guard_test.go +++ b/internal/release/workflow_exec_guard_test.go @@ -339,6 +339,19 @@ func parseWorkflowJobs(workflow string) []workflowJob { job := &jobs[len(jobs)-1] if leadingSpaces(line) == 4 && strings.HasPrefix(trimmed, "needs:") { job.needs = parseNeeds(trimmed) + // Block-list form: a bare `needs:` followed by deeper-indented + // `- name` entries. Consume those entries here so the edge is not + // invisible to the separation guard. + for i+1 < len(lines) { + next := strings.TrimSpace(lines[i+1]) + if !strings.HasPrefix(next, "- ") { + break + } + if name := strings.Trim(strings.TrimSpace(strings.TrimPrefix(next, "- ")), `"'`); name != "" { + job.needs = append(job.needs, name) + } + i++ + } } } @@ -364,10 +377,11 @@ func parseWorkflowJobs(workflow string) []workflowJob { return jobs } -// parseNeeds reads a job's `needs:` declaration in any of YAML's three shapes — -// scalar (`needs: goreleaser`), flow sequence (`needs: [a, b]`), or (when the -// line is just `needs:`) an empty set the caller treats as no edge. Block-list -// form is not used in this repo's workflows and is not parsed here. +// parseNeeds reads the `needs:` LINE itself: scalar (`needs: goreleaser`) or +// flow sequence (`needs: [a, b]`). A bare `needs:` line yields no edges here — +// the block-list entries that follow it on deeper-indented `- name` lines are +// consumed by parseWorkflowJobs, which has the surrounding lines parseNeeds +// cannot see. func parseNeeds(trimmed string) []string { rest := strings.TrimSpace(strings.TrimPrefix(trimmed, "needs:")) if rest == "" { From d371b5b61acf0064260c0f63b91c2866a5b0fb5c Mon Sep 17 00:00:00 2001 From: CL Kao Date: Fri, 5 Jun 2026 14:42:14 -0700 Subject: [PATCH 3/5] test: parse goreleaser needs: with yaml.v3 to close all separation-guard shape evasions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M1a/M1b (cycle-2 re-audit): the hand-rolled needs-parser still let real goreleaser->journey-ledger re-couplings stay GREEN — an inline trailing `# comment` evaded all three shapes (the file's own safe reverse edge is comment-documented), and a blank line between block-list entries dropped the later entries. Patching more shapes is whack-a-mole; replace the scanner with a gopkg.in/yaml.v3 parse (already a direct dep) that reads each job's needs as a normalized list — comments, blank lines, scalar/flow/block, quoting, and anchors handled for free. parseJobNeeds replaces parseNeeds entirely; parseWorkflowJobs now sources needs from it and keeps line-based step attribution. assertGoreleaserDoesNotNeedJourneyLedger keeps its contract: REJECT goreleaser->journey-ledger, TOLERATE the reverse. The rejection test now covers scalar/flow/block plus all three inline-comment variants plus a blank-line-split block list (all RED); a new TestReleaseWorkflowGuardToleratesSafeReverseEdge- InEveryShape proves the safe reverse edge stays GREEN in every shape. Mutation-confirm: neutering parseJobNeeds reds all 7 rejection forms while tolerance stays green. No regression: skip-gating, AC-1 job-level, the AC-2 release-notes.txt anchor, and the P1 || true gh-error tolerance are untouched. go test ./internal/release/ 45/45; full offline go test ./... 1187 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/release/journey_workflow_test.go | 50 +++++++++++- internal/release/workflow_exec_guard_test.go | 83 +++++++++++--------- 2 files changed, 93 insertions(+), 40 deletions(-) diff --git a/internal/release/journey_workflow_test.go b/internal/release/journey_workflow_test.go index ef32f9b74..981bae881 100644 --- a/internal/release/journey_workflow_test.go +++ b/internal/release/journey_workflow_test.go @@ -160,8 +160,9 @@ func TestReleaseWorkflowGuardRejectsCommentOnlyJourneyCostPublish(t *testing.T) // hardened guard binds `needs:` to the OWNING job (the one carrying the // goreleaser action) and must RED on this edge — while the SAFE one-way // `journey-ledger: needs: goreleaser` edge in the real file does NOT trip it. -// The edge is injected in every YAML shape `needs:` can take (scalar, flow -// sequence, and block list) so no single re-coupling syntax evades the guard. +// The edge is injected in every YAML shape `needs:` can take — scalar, flow +// sequence, block list, the same three with a trailing inline `# comment`, and a +// block list split by a blank line — so no syntactic variation evades the guard. func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger(t *testing.T) { release := readWorkflow(t, "release.yml") if err := assertReleaseWorkflowPublishesJourneyCosts(release); err != nil { @@ -175,6 +176,10 @@ func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger(t *testing.T) { {"scalar", " needs: journey-ledger\n"}, {"flow sequence", " needs: [journey-ledger]\n"}, {"block list", " needs:\n - journey-ledger\n"}, + {"scalar with inline comment", " needs: journey-ledger # required for the upload\n"}, + {"flow sequence with inline comment", " needs: [journey-ledger] # required for the upload\n"}, + {"block list with inline comment", " needs:\n - journey-ledger # required for the upload\n"}, + {"block list split by a blank line", " needs:\n - some-other-gate\n\n - journey-ledger\n"}, } { t.Run(tc.name, func(t *testing.T) { adversarial := strings.Replace(release, @@ -192,6 +197,47 @@ func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger(t *testing.T) { } } +// TestReleaseWorkflowGuardToleratesSafeReverseEdgeInEveryShape is the direction +// twin of the rejection test: the SAFE one-way edge `journey-ledger: needs: +// goreleaser` (the upload waits for the Release goreleaser creates) must stay +// GREEN no matter which YAML shape it is written in. The guard binds `needs:` to +// the OWNING job, so rewriting the real file's reverse edge into scalar, flow, +// block-list, their inline-comment variants, or a blank-line-split block list +// must NOT trip the goreleaser→journey-ledger check. +func TestReleaseWorkflowGuardToleratesSafeReverseEdgeInEveryShape(t *testing.T) { + release := readWorkflow(t, "release.yml") + const realEdge = " journey-ledger:\n needs: goreleaser\n" + if !strings.Contains(release, realEdge) { + t.Fatalf("fixture workflow missing the journey-ledger needs: goreleaser edge to rewrite") + } + + for _, tc := range []struct { + name string + needsForm string + }{ + // The flow/block/comment/blank-line shapes differ from the real file's + // scalar edge, so the rewrite is exercised; the scalar baseline itself is + // the real file the parent guard already passes. + {"flow sequence", " needs: [goreleaser]\n"}, + {"block list", " needs:\n - goreleaser\n"}, + {"scalar with inline comment", " needs: goreleaser # upload waits for the Release\n"}, + {"flow sequence with inline comment", " needs: [goreleaser] # upload waits for the Release\n"}, + {"block list with inline comment", " needs:\n - goreleaser # upload waits for the Release\n"}, + {"block list split by a blank line", " needs:\n - goreleaser\n\n - some-other-gate\n"}, + } { + t.Run(tc.name, func(t *testing.T) { + rewritten := strings.Replace(release, realEdge, " journey-ledger:\n"+tc.needsForm, 1) + if rewritten == release { + t.Fatal("rewrite of the journey-ledger needs edge did not apply") + } + + if err := assertReleaseWorkflowPublishesJourneyCosts(rewritten); err != nil { + t.Fatalf("release workflow guard wrongly rejected the SAFE reverse edge via %s form: %v", tc.name, err) + } + }) + } +} + // TestReleaseWorkflowSkipsLedgerWhenNoProducerRun proves the JOB-LEVEL skip // consequence of POLICY 1: when the download step finds no producer run it must // not merely exit 0 — the downstream Build/Publish-ledger steps must be GATED on diff --git a/internal/release/workflow_exec_guard_test.go b/internal/release/workflow_exec_guard_test.go index 3212be4c7..06524593b 100644 --- a/internal/release/workflow_exec_guard_test.go +++ b/internal/release/workflow_exec_guard_test.go @@ -3,6 +3,8 @@ package release import ( "fmt" "strings" + + "gopkg.in/yaml.v3" ) type workflowStep struct { @@ -300,10 +302,12 @@ func parseWorkflowSteps(workflow string) []workflowStep { } // parseWorkflowJobs partitions a workflow into its top-level jobs (the 2-space -// indented keys under `jobs:`), capturing each job's declared `needs:` edges and -// the steps it owns. This is the job-aware view the separation guard needs: the -// flat parseWorkflowSteps cannot tell which job a `needs:` edge or step belongs -// to, so a `goreleaser → journey-ledger` edge would otherwise be invisible. +// indented keys under `jobs:`), pairing each job's `needs:` edges (from a real +// YAML parse, parseJobNeeds) with the steps it owns (sliced from the job's text +// span and run through the flat step parser). This is the job-aware view the +// separation guard needs: the flat parseWorkflowSteps cannot tell which job a +// step belongs to, so a `goreleaser → journey-ledger` edge would otherwise be +// invisible. func parseWorkflowJobs(workflow string) []workflowJob { lines := strings.Split(workflow, "\n") jobsStart := -1 @@ -317,6 +321,12 @@ func parseWorkflowJobs(workflow string) []workflowJob { return nil } + // `needs:` edges come from a real YAML parse (parseJobNeeds), not the + // line-walk: needs is the security-relevant field for the separation guard, + // and a hand-rolled scan misses comment-trailed entries, blank-line-split + // block lists, anchors, and quoting that a parser handles for free. + needsByJob := parseJobNeeds(workflow) + var jobs []workflowJob for i := jobsStart; i < len(lines); i++ { line := lines[i] @@ -330,29 +340,10 @@ func parseWorkflowJobs(workflow string) []workflowJob { break } if leadingSpaces(line) == 2 && strings.HasSuffix(trimmed, ":") { - jobs = append(jobs, workflowJob{name: strings.TrimSuffix(trimmed, ":")}) - continue - } - if len(jobs) == 0 { + name := strings.TrimSuffix(trimmed, ":") + jobs = append(jobs, workflowJob{name: name, needs: needsByJob[name]}) continue } - job := &jobs[len(jobs)-1] - if leadingSpaces(line) == 4 && strings.HasPrefix(trimmed, "needs:") { - job.needs = parseNeeds(trimmed) - // Block-list form: a bare `needs:` followed by deeper-indented - // `- name` entries. Consume those entries here so the edge is not - // invisible to the separation guard. - for i+1 < len(lines) { - next := strings.TrimSpace(lines[i+1]) - if !strings.HasPrefix(next, "- ") { - break - } - if name := strings.Trim(strings.TrimSpace(strings.TrimPrefix(next, "- ")), `"'`); name != "" { - job.needs = append(job.needs, name) - } - i++ - } - } } // Attach steps by slicing each job's text span and reusing the flat parser. @@ -377,21 +368,37 @@ func parseWorkflowJobs(workflow string) []workflowJob { return jobs } -// parseNeeds reads the `needs:` LINE itself: scalar (`needs: goreleaser`) or -// flow sequence (`needs: [a, b]`). A bare `needs:` line yields no edges here — -// the block-list entries that follow it on deeper-indented `- name` lines are -// consumed by parseWorkflowJobs, which has the surrounding lines parseNeeds -// cannot see. -func parseNeeds(trimmed string) []string { - rest := strings.TrimSpace(strings.TrimPrefix(trimmed, "needs:")) - if rest == "" { +// parseJobNeeds reads every job's `needs:` edges via a real YAML parse, keyed by +// job name. GitHub Actions allows `needs:` as a scalar (`needs: a`) or a +// sequence (flow `[a, b]` or block list); yaml.v3 normalizes all of them — and +// strips inline `# comments`, tolerates blank lines between block-list entries, +// and resolves quoting and anchors — so no syntactic shape of a re-coupling +// edge can hide from the separation guard. A workflow that does not parse as +// YAML yields no edges (the guard's other checks still run against the text). +func parseJobNeeds(workflow string) map[string][]string { + var doc struct { + Jobs map[string]struct { + Needs yaml.Node `yaml:"needs"` + } `yaml:"jobs"` + } + if err := yaml.Unmarshal([]byte(workflow), &doc); err != nil { return nil } - rest = strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(rest, "["), "]")) - var out []string - for _, part := range strings.Split(rest, ",") { - if name := strings.Trim(strings.TrimSpace(part), `"'`); name != "" { - out = append(out, name) + out := make(map[string][]string, len(doc.Jobs)) + for name, job := range doc.Jobs { + switch job.Needs.Kind { + case yaml.ScalarNode: + if job.Needs.Value != "" { + out[name] = []string{job.Needs.Value} + } + case yaml.SequenceNode: + var needs []string + for _, item := range job.Needs.Content { + if item.Kind == yaml.ScalarNode && item.Value != "" { + needs = append(needs, item.Value) + } + } + out[name] = needs } } return out From 435b97a754e4a17a413206edd44501003fec09e3 Mon Sep 17 00:00:00 2001 From: CL Kao Date: Fri, 5 Jun 2026 18:55:19 -0700 Subject: [PATCH 4/5] test: parse the whole release.yml job graph with yaml.v3 so job-identity shapes can't evade the separation guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M1/M2 (cycle-3 re-audit): the cycle-3 fix closed the needs-VALUE shapes, but JOB IDENTITY was still a hand-rolled line-walk and two shapes beat it — an `&anchor`/`*alias` on needs reached parseJobNeeds as an unhandled node so the edge vanished, and a quoted job key ("goreleaser":) missed the literal needsByJob lookup. Patching either is more whack-a-mole. Finish the conversion: parse the entire job graph — names, alias-resolved needs, and per-job steps — from ONE yaml.v3 pass and attribute goreleaser/builder from that structure. The last line-walk for job identity (parseJobNeeds, jobsStart, jobHeaderAt) is gone. yaml.v3 resolves aliases before decode and normalizes quoted keys natively, so M1, M2, and any future job-identity shape are handled by the parser, not a growing line scan. needs decodes through a needsList type that accepts scalar or sequence. Adversarial coverage: TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger- ViaJobIdentityShapes REDs both the anchor/alias and quoted-key re-couplings; the tolerance twin keeps the safe reverse edge green in those shapes; a new TestReleaseWorkflowJobGraphMatchesGitHubActions pins the real-file parse to GHA (goreleaser needs empty, journey-ledger needs only goreleaser). Mutation-confirm: dropping sequence-edge decode reds the alias form; dropping scalar-edge decode reds the quoted-key form. No regression: the 7-form value table + its tolerance twin, skip-gating, AC-1 job-level, the AC-2 release-notes.txt anchor, and P1 gh-error tolerance all green. go test ./internal/release/ 52/52; full offline go test ./... 1194 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/release/journey_workflow_test.go | 129 +++++++++++++++++ internal/release/workflow_exec_guard_test.go | 142 +++++++------------ 2 files changed, 182 insertions(+), 89 deletions(-) diff --git a/internal/release/journey_workflow_test.go b/internal/release/journey_workflow_test.go index 981bae881..847ec8dca 100644 --- a/internal/release/journey_workflow_test.go +++ b/internal/release/journey_workflow_test.go @@ -197,6 +197,61 @@ func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedger(t *testing.T) { } } +// TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedgerViaJobIdentityShapes +// attacks the OTHER half of the edge — the job-identity end — with YAML shapes a +// line-walk over the raw text cannot see but the real GHA YAML resolver does: +// - alias: an `&anchor` on journey-ledger's needs and a `*anchor` on +// goreleaser's needs — GHA resolves the alias to `[goreleaser, journey-ledger]` +// so goreleaser really does need journey-ledger. +// - quoted job key: `"goreleaser":` is the same job key as `goreleaser:` to a +// YAML parser, so a `needs: journey-ledger` under it is a real re-coupling. +// +// Both must RED. Because the whole job graph (names + alias-resolved needs + +// step attribution) comes from one yaml.v3 pass, no job-identity shape evades. +func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedgerViaJobIdentityShapes(t *testing.T) { + release := readWorkflow(t, "release.yml") + if err := assertReleaseWorkflowPublishesJourneyCosts(release); err != nil { + t.Fatalf("real release.yml unexpectedly fails the guard before mutation: %v", err) + } + + for _, tc := range []struct { + name string + mutate func(string) string + }{ + { + "anchor/alias needs", func(s string) string { + // Anchor a list containing journey-ledger on the reverse edge, then + // alias it onto goreleaser — GHA resolves *grx so goreleaser needs + // journey-ledger. + s = strings.Replace(s, + " journey-ledger:\n needs: goreleaser\n", + " journey-ledger:\n needs: &grx [goreleaser, journey-ledger]\n", 1) + return strings.Replace(s, + " goreleaser:\n runs-on: macos-latest", + " goreleaser:\n needs: *grx\n runs-on: macos-latest", 1) + }, + }, + { + "quoted job key", func(s string) string { + return strings.Replace(s, + " goreleaser:\n runs-on: macos-latest", + " \"goreleaser\":\n needs: journey-ledger\n runs-on: macos-latest", 1) + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + adversarial := tc.mutate(release) + if adversarial == release { + t.Fatal("fixture workflow shape to mutate not found") + } + + if err := assertReleaseWorkflowPublishesJourneyCosts(adversarial); err == nil { + t.Fatalf("release workflow guard accepted a goreleaser→journey-ledger re-coupling via %s (the job-identity end evaded the guard)", tc.name) + } + }) + } +} + // TestReleaseWorkflowGuardToleratesSafeReverseEdgeInEveryShape is the direction // twin of the rejection test: the SAFE one-way edge `journey-ledger: needs: // goreleaser` (the upload waits for the Release goreleaser creates) must stay @@ -238,6 +293,80 @@ func TestReleaseWorkflowGuardToleratesSafeReverseEdgeInEveryShape(t *testing.T) } } +// TestReleaseWorkflowGuardToleratesSafeReverseEdgeViaJobIdentityShapes is the +// direction twin of the job-identity rejection test: an anchor/alias on the SAFE +// reverse edge, or a quoted journey-ledger job key, must NOT trip the guard — +// the edge still points journey-ledger → goreleaser, the required upload order, +// not the cut-blocking reverse. +func TestReleaseWorkflowGuardToleratesSafeReverseEdgeViaJobIdentityShapes(t *testing.T) { + release := readWorkflow(t, "release.yml") + + for _, tc := range []struct { + name string + mutate func(string) string + }{ + { + "anchor/alias needs", func(s string) string { + // Anchor goreleaser on a one-off carrier job, alias it onto the + // journey-ledger reverse edge — still journey-ledger → goreleaser. + return strings.Replace(s, + " journey-ledger:\n needs: goreleaser\n", + " journey-ledger:\n needs: &grx [goreleaser]\n", 1) + }, + }, + { + "quoted job key", func(s string) string { + return strings.Replace(s, + " journey-ledger:\n needs: goreleaser\n", + " \"journey-ledger\":\n needs: goreleaser\n", 1) + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + rewritten := tc.mutate(release) + if rewritten == release { + t.Fatal("rewrite of the safe reverse edge did not apply") + } + + if err := assertReleaseWorkflowPublishesJourneyCosts(rewritten); err != nil { + t.Fatalf("release workflow guard wrongly rejected the SAFE reverse edge via %s: %v", tc.name, err) + } + }) + } +} + +// TestReleaseWorkflowJobGraphMatchesGitHubActions asserts the parsed job graph of +// the real release.yml matches what GitHub Actions resolves: goreleaser has NO +// `needs:` (it cuts regardless), and journey-ledger needs ONLY goreleaser (the +// one-way upload-ordering edge). This pins the parser to GHA semantics so a +// future parse regression that drops or invents an edge is caught directly, not +// only through the guard. +func TestReleaseWorkflowJobGraphMatchesGitHubActions(t *testing.T) { + release := readWorkflow(t, "release.yml") + needs := map[string][]string{} + for _, job := range parseWorkflowJobs(release) { + needs[job.name] = []string(job.needs) + } + + if _, ok := needs["goreleaser"]; !ok { + t.Fatalf("parsed graph missing the goreleaser job; got jobs %v", keysOf(needs)) + } + if len(needs["goreleaser"]) != 0 { + t.Errorf("goreleaser must have no needs (cuts regardless); got %v", needs["goreleaser"]) + } + if got := needs["journey-ledger"]; len(got) != 1 || got[0] != "goreleaser" { + t.Errorf("journey-ledger must need only goreleaser; got %v", got) + } +} + +func keysOf(m map[string][]string) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} + // TestReleaseWorkflowSkipsLedgerWhenNoProducerRun proves the JOB-LEVEL skip // consequence of POLICY 1: when the download step finds no producer run it must // not merely exit 0 — the downstream Build/Publish-ledger steps must be GATED on diff --git a/internal/release/workflow_exec_guard_test.go b/internal/release/workflow_exec_guard_test.go index 06524593b..47bceea57 100644 --- a/internal/release/workflow_exec_guard_test.go +++ b/internal/release/workflow_exec_guard_test.go @@ -301,107 +301,71 @@ func parseWorkflowSteps(workflow string) []workflowStep { return steps } -// parseWorkflowJobs partitions a workflow into its top-level jobs (the 2-space -// indented keys under `jobs:`), pairing each job's `needs:` edges (from a real -// YAML parse, parseJobNeeds) with the steps it owns (sliced from the job's text -// span and run through the flat step parser). This is the job-aware view the -// separation guard needs: the flat parseWorkflowSteps cannot tell which job a -// step belongs to, so a `goreleaser → journey-ledger` edge would otherwise be -// invisible. +// parseWorkflowJobs reads the workflow's whole job graph — job names, their +// `needs:` edges, and the steps each owns — from a single gopkg.in/yaml.v3 pass. +// Job identity is the security-relevant axis for the separation guard, and a +// real YAML parse resolves what a line-walk cannot: anchored/aliased needs +// (`needs: *anchor`), quoted job keys (`"goreleaser":`), comment-trailed and +// blank-line-split sequences, and flow/block/scalar forms all normalize the same +// way GitHub Actions resolves them. A workflow that does not parse as YAML +// yields no jobs (the guard's text-level checks still run against the source). func parseWorkflowJobs(workflow string) []workflowJob { - lines := strings.Split(workflow, "\n") - jobsStart := -1 - for i, line := range lines { - if strings.TrimSpace(line) == "jobs:" && leadingSpaces(line) == 0 { - jobsStart = i + 1 - break - } + var doc struct { + Jobs map[string]struct { + Needs needsList `yaml:"needs"` + Steps []struct { + Name string `yaml:"name"` + ID string `yaml:"id"` + If string `yaml:"if"` + Uses string `yaml:"uses"` + Run string `yaml:"run"` + } `yaml:"steps"` + } `yaml:"jobs"` } - if jobsStart < 0 { + if err := yaml.Unmarshal([]byte(workflow), &doc); err != nil { return nil } - - // `needs:` edges come from a real YAML parse (parseJobNeeds), not the - // line-walk: needs is the security-relevant field for the separation guard, - // and a hand-rolled scan misses comment-trailed entries, blank-line-split - // block lists, anchors, and quoting that a parser handles for free. - needsByJob := parseJobNeeds(workflow) - var jobs []workflowJob - for i := jobsStart; i < len(lines); i++ { - line := lines[i] - trimmed := strings.TrimSpace(line) - if trimmed == "" || strings.HasPrefix(trimmed, "#") { - continue - } - // A top-level key at column 0 ends the jobs block (e.g. a trailing - // document key). Job keys sit at exactly 2-space indent. - if leadingSpaces(line) == 0 { - break - } - if leadingSpaces(line) == 2 && strings.HasSuffix(trimmed, ":") { - name := strings.TrimSuffix(trimmed, ":") - jobs = append(jobs, workflowJob{name: name, needs: needsByJob[name]}) - continue - } - } - - // Attach steps by slicing each job's text span and reusing the flat parser. - jobHeaderAt := func(name string) int { - for i := jobsStart; i < len(lines); i++ { - if leadingSpaces(lines[i]) == 2 && strings.TrimSpace(lines[i]) == name+":" { - return i - } - } - return -1 - } - for j := range jobs { - start := jobHeaderAt(jobs[j].name) - end := len(lines) - if j+1 < len(jobs) { - end = jobHeaderAt(jobs[j+1].name) - } - if start >= 0 && end > start { - jobs[j].steps = parseWorkflowSteps(strings.Join(lines[start:end], "\n")) - } + for name, job := range doc.Jobs { + wj := workflowJob{name: name, needs: job.Needs} + for _, step := range job.Steps { + wj.steps = append(wj.steps, workflowStep{ + name: step.Name, + id: step.ID, + ifCond: step.If, + uses: step.Uses, + run: step.Run, + }) + } + jobs = append(jobs, wj) } return jobs } -// parseJobNeeds reads every job's `needs:` edges via a real YAML parse, keyed by -// job name. GitHub Actions allows `needs:` as a scalar (`needs: a`) or a -// sequence (flow `[a, b]` or block list); yaml.v3 normalizes all of them — and -// strips inline `# comments`, tolerates blank lines between block-list entries, -// and resolves quoting and anchors — so no syntactic shape of a re-coupling -// edge can hide from the separation guard. A workflow that does not parse as -// YAML yields no edges (the guard's other checks still run against the text). -func parseJobNeeds(workflow string) map[string][]string { - var doc struct { - Jobs map[string]struct { - Needs yaml.Node `yaml:"needs"` - } `yaml:"jobs"` - } - if err := yaml.Unmarshal([]byte(workflow), &doc); err != nil { +// needsList decodes a job's `needs:` — GitHub Actions allows a scalar +// (`needs: a`) or a sequence (flow `[a, b]` or block list), and either may be an +// anchor/alias. Decoding through this custom type lets yaml.v3 resolve aliases +// to their anchored value and normalize quoting before we read the names, so no +// `needs:` shape can hide a re-coupling edge from the separation guard. +type needsList []string + +func (n *needsList) UnmarshalYAML(value *yaml.Node) error { + switch value.Kind { + case yaml.ScalarNode: + if value.Value != "" { + *n = needsList{value.Value} + } return nil - } - out := make(map[string][]string, len(doc.Jobs)) - for name, job := range doc.Jobs { - switch job.Needs.Kind { - case yaml.ScalarNode: - if job.Needs.Value != "" { - out[name] = []string{job.Needs.Value} - } - case yaml.SequenceNode: - var needs []string - for _, item := range job.Needs.Content { - if item.Kind == yaml.ScalarNode && item.Value != "" { - needs = append(needs, item.Value) - } - } - out[name] = needs + case yaml.SequenceNode: + var names []string + if err := value.Decode(&names); err != nil { + return err } + *n = names + return nil + default: + return nil } - return out } // findExecutableStep returns the index of the step named name whose run block From b845c9a4e0d7f6c38ce10fa9f1d6e12b0b70059b Mon Sep 17 00:00:00 2001 From: CL Kao Date: Fri, 5 Jun 2026 19:14:30 -0700 Subject: [PATCH 5/5] test: make the goreleaser separation guard deterministic across multiple action carriers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cycle-5 (4th audit): assertGoreleaserDoesNotNeedJourneyLedger overwrote a single goreleaserJob/ledgerJob string on each map-iteration match and checked needs on only the LAST-iterated carrier. Go map order is random, so a workflow with 2+ jobs running the goreleaser action — one declaring needs: journey-ledger — greenlit on the runs that happened to iterate the safe carrier last (~flaky), leaving the guard latently unsound on any multi-carrier shape. Collect ALL goreleaser-action carriers AND ALL ledger-builder carriers in one pass, then reject if ANY goreleaser carrier needs ANY ledger carrier. No last-wins overwrite, no map-order dependence — deterministic. A job that is itself both carriers cannot need itself, so the self-pair is skipped (preserves the old single-job-layout semantics). Adversarial coverage: TestReleaseWorkflowGuardRejectsMultiCarrierGoreleaserNeeds- JourneyLedger inserts a second goreleaser-action job that needs journey-ledger and asserts the guard REDs on every one of 200 runs (a last-wins regression greenlights within that loop); TestReleaseWorkflowGuardToleratesMultiCarrier- SafeShape keeps two carriers that need nothing GREEN over 200 runs. Mutation-confirm: reverting to a last-wins single-carrier check reds the rejection test. No regression: the job-identity rejections + tolerance twins + parse-matches-GHA, the 7-form value table, skip-gating, AC-1, AC-2 release-notes.txt anchor, and P1 all green; release.yml untouched this cycle. go test ./internal/release/ 54/54 (deterministic across -count=10); full offline go test ./... 1196 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/release/journey_workflow_test.go | 69 ++++++++++++++++++++ internal/release/workflow_exec_guard_test.go | 50 +++++++------- 2 files changed, 96 insertions(+), 23 deletions(-) diff --git a/internal/release/journey_workflow_test.go b/internal/release/journey_workflow_test.go index 847ec8dca..ab2718479 100644 --- a/internal/release/journey_workflow_test.go +++ b/internal/release/journey_workflow_test.go @@ -1,6 +1,7 @@ package release import ( + "fmt" "os" "os/exec" "path/filepath" @@ -252,6 +253,74 @@ func TestReleaseWorkflowGuardRejectsGoreleaserNeedsJourneyLedgerViaJobIdentitySh } } +// goreleaserCarrierJob is a second job that ALSO runs the goreleaser action, +// used to build multi-carrier fixtures. The `%s` is its `needs:` block (may be +// empty). It must be authored before the real goreleaser job's text so the +// flat document-order builder<=goreleaser guard stays satisfied. +const goreleaserCarrierJob = ` goreleaser-extra: +%s runs-on: macos-latest + steps: + - name: Run goreleaser (extra carrier) + uses: goreleaser/goreleaser-action@v6 + with: + version: "~> v2" + args: release --clean +` + +// insertGoreleaserCarrier authors a second goreleaser-action job (with the given +// needs block) just before the real goreleaser job in the workflow text. +func insertGoreleaserCarrier(t *testing.T, workflow, needsBlock string) string { + t.Helper() + const anchor = " goreleaser:\n runs-on: macos-latest" + if !strings.Contains(workflow, anchor) { + t.Fatal("fixture workflow missing the goreleaser job header to anchor a second carrier before") + } + carrier := fmt.Sprintf(goreleaserCarrierJob, needsBlock) + return strings.Replace(workflow, anchor, carrier+anchor, 1) +} + +// TestReleaseWorkflowGuardRejectsMultiCarrierGoreleaserNeedsJourneyLedger proves +// the guard is DETERMINISTIC across multiple goreleaser-action carriers. With two +// jobs running the goreleaser action — one declaring `needs: journey-ledger` — +// a guard that inspected only the LAST map-iterated carrier would greenlight on +// ~the fraction of runs that iterate the safe carrier last (Go map order is +// random). The collect-all guard rejects on ANY goreleaser carrier → ledger +// edge, so it must RED on EVERY run. Asserted over many iterations so a +// last-wins regression cannot hide behind a lucky map order. +func TestReleaseWorkflowGuardRejectsMultiCarrierGoreleaserNeedsJourneyLedger(t *testing.T) { + release := readWorkflow(t, "release.yml") + adversarial := insertGoreleaserCarrier(t, release, " needs: journey-ledger\n") + if adversarial == release { + t.Fatal("multi-carrier mutation did not apply") + } + + const runs = 200 + for i := 0; i < runs; i++ { + if err := assertReleaseWorkflowPublishesJourneyCosts(adversarial); err == nil { + t.Fatalf("run %d/%d: guard accepted a multi-carrier workflow where a goreleaser-action job needs journey-ledger (last-wins map-order flakiness)", i+1, runs) + } + } +} + +// TestReleaseWorkflowGuardToleratesMultiCarrierSafeShape is the safe twin: two +// goreleaser-action carriers, NEITHER needing the journey-ledger job (the extra +// carrier needs nothing). The collect-all guard must stay GREEN on every run — +// multiple carriers are not themselves a re-coupling. +func TestReleaseWorkflowGuardToleratesMultiCarrierSafeShape(t *testing.T) { + release := readWorkflow(t, "release.yml") + safe := insertGoreleaserCarrier(t, release, "") + if safe == release { + t.Fatal("multi-carrier safe mutation did not apply") + } + + const runs = 200 + for i := 0; i < runs; i++ { + if err := assertReleaseWorkflowPublishesJourneyCosts(safe); err != nil { + t.Fatalf("run %d/%d: guard wrongly rejected a safe multi-carrier workflow (extra goreleaser carrier needs nothing): %v", i+1, runs, err) + } + } +} + // TestReleaseWorkflowGuardToleratesSafeReverseEdgeInEveryShape is the direction // twin of the rejection test: the SAFE one-way edge `journey-ledger: needs: // goreleaser` (the upload waits for the Release goreleaser creates) must stay diff --git a/internal/release/workflow_exec_guard_test.go b/internal/release/workflow_exec_guard_test.go index 47bceea57..db6f45f3b 100644 --- a/internal/release/workflow_exec_guard_test.go +++ b/internal/release/workflow_exec_guard_test.go @@ -144,45 +144,49 @@ func assertReleaseWorkflowPublishesJourneyCosts(workflow string) error { } // assertGoreleaserDoesNotNeedJourneyLedger binds the separation invariant to the -// job DAG: the job that carries the goreleaser action must NOT declare `needs:` -// on the job that builds the journey-cost ledger. That edge would re-block the -// cut on the never-fired Runtime-Live-E2E run (the exact bug this separation -// closes). The edge is bound to the OWNING job — found by which job holds the -// goreleaser action vs. which holds the journey-cost builder — so the SAFE -// reverse edge (journey-ledger needs: goreleaser, required so `gh release upload` -// runs after the Release exists) does not false-trip the check. +// job DAG: no job carrying the goreleaser action may declare `needs:` on a job +// that builds the journey-cost ledger. That edge would re-block the cut on the +// never-fired Runtime-Live-E2E run (the exact bug this separation closes). It +// collects ALL goreleaser-action carriers and ALL ledger-builder carriers and +// rejects if ANY carrier→ledger edge exists — so the result does not depend on +// Go's randomized map-iteration order (a last-wins scan over a multi-carrier +// workflow would be latently flaky). The SAFE reverse edge (journey-ledger +// needs: goreleaser, required so `gh release upload` runs after the Release +// exists) points the other way and does not false-trip the check; a job that is +// itself both carriers cannot need itself, so the self-pair is skipped. func assertGoreleaserDoesNotNeedJourneyLedger(workflow string) error { jobs := parseWorkflowJobs(workflow) - goreleaserJob, ledgerJob := "", "" + ledgerJobs := map[string]bool{} + var goreleaserCarriers []workflowJob for _, job := range jobs { + isGoreleaser, isLedger := false, false for _, step := range job.steps { if strings.HasPrefix(step.uses, "goreleaser/goreleaser-action@") { - goreleaserJob = job.name + isGoreleaser = true } for _, command := range executableShellCommands(step.run) { if isJourneyCostBuilder(command) { - ledgerJob = job.name + isLedger = true } } } + if isLedger { + ledgerJobs[job.name] = true + } + if isGoreleaser { + goreleaserCarriers = append(goreleaserCarriers, job) + } } - if goreleaserJob == "" { + if len(goreleaserCarriers) == 0 { return fmt.Errorf("release.yml has no job carrying the goreleaser action") } - if ledgerJob == "" { + if len(ledgerJobs) == 0 { return fmt.Errorf("release.yml has no job carrying the journey-cost builder") } - if goreleaserJob == ledgerJob { - // Single-job layout: no cross-job needs edge to re-block the cut. - return nil - } - for _, job := range jobs { - if job.name != goreleaserJob { - continue - } - for _, need := range job.needs { - if need == ledgerJob { - return fmt.Errorf("release.yml goreleaser job %q declares needs: %q — re-blocking the cut on the journey-ledger job", goreleaserJob, ledgerJob) + for _, carrier := range goreleaserCarriers { + for _, need := range carrier.needs { + if need != carrier.name && ledgerJobs[need] { + return fmt.Errorf("release.yml goreleaser job %q declares needs: %q — re-blocking the cut on the journey-ledger job", carrier.name, need) } } }