Semantic-Org · jlukic · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/.github/workflows/benchmarks-report.yml b/.github/workflows/benchmarks-report.yml
@@ -51,15 +51,6 @@ jobs:
           git fetch origin main --depth=1
           git checkout origin/main -- tools/ci/bench/reporter/ 2>/dev/null || true
 
-      # Overlay main's bench-history.json onto the PR checkout so the
-      # reporter's peak-attribution runs against the freshest history,
-      # not whatever version was on the PR's branch point. `|| true` keeps
-      # the step non-fatal if main has no file yet (first run after D3a).
-      - name: Fetch latest bench-history.json from main
-        run: |
-          git fetch origin main --depth=1
-          git show origin/main:tools/ci/bench/reporter/bench-history.json > tools/ci/bench/reporter/bench-history.json 2>/dev/null || true
-
       - name: Download bench artifacts
         uses: dawidd6/action-download-artifact@v21
         with:
@@ -106,6 +97,9 @@ jobs:
           ENDED: ${{ github.event.workflow_run.updated_at }}
         run: |
           WALL_CLOCK=$(( $(date -d "$ENDED" +%s) - $(date -d "$STARTED" +%s) ))
+          # --scope pr: peak attribution uses PR-iteration history only.
+          # main-history is still loaded for drift quantification but excluded
+          # from the comparison set.
           node tools/ci/bench/reporter/reporter.js \
             --results results \
             --sha '${{ github.event.workflow_run.head_sha }}' \
@@ -115,6 +109,7 @@ jobs:
             --base-ref 'main' \
             --repo '${{ github.repository }}' \
             --pr-history pr-history.json \
+            --scope pr \
             --wall-clock "$WALL_CLOCK" \
             --out bench-report
 
@@ -191,18 +186,21 @@ jobs:
 
       - name: Append history entry
         run: |
-          # The benched commit is the workflow_run's head_sha (the merge
-          # commit on main). Parent comes from the git history we just
-          # fetched. Timestamp is the bench run's completion time so the
-          # history is ordered by when the measurement was taken, not
-          # when the commit landed.
+          # Benched commit is the workflow_run's head_sha; parent comes from
+          # the depth-2 fetch above. Timestamp is the bench run's completion
+          # so history is ordered by measurement, not commit-land time.
+          # baseline-sha.txt is the sidecar uploaded next to each matrix
+          # cell's tachometer JSON. Any cell's value works — all cells in
+          # one workflow_run benched against the same baseline.
           BENCHED_SHA='${{ github.event.workflow_run.head_sha }}'
           PARENT_SHA=$(git rev-parse "$BENCHED_SHA^" 2>/dev/null || echo '')
+          BASELINE_SHA=$(find results -name baseline-sha.txt -type f -exec cat {} \; -quit)
           node tools/ci/bench/reporter/append-history.js \
             --results results \
             --sha "$BENCHED_SHA" \
             --msg '${{ github.event.workflow_run.display_title }}' \
             --parent-sha "$PARENT_SHA" \
+            --baseline-sha "$BASELINE_SHA" \
             --timestamp '${{ github.event.workflow_run.updated_at }}' \
             --history tools/ci/bench/reporter/bench-history.json
 

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -127,19 +127,28 @@ jobs:
       # PR: baseline = base branch tip.
       # Push to main: baseline = this commit's parent (so the delta captures
       #   the merged commit's effect; bench-history indexes the current
-      #   commit's absolute CI).
+      #   commit's absolute CI alongside the within-session percent-delta).
+      #
+      # Resolve baseline SHA inline so it can be written to the artifact as
+      # a sidecar (baseline-sha.txt). The reporter pins each metric's
+      # percent_delta_ci to that SHA — required for cross-iteration drift
+      # detection (see tools/ci/bench/reporter/reporter.js:computeBaselineDrift).
       - name: Build baseline
         run: |
           if [ '${{ github.event_name }}' = 'push' ]; then
             # Fetch enough history to reach the parent commit locally.
             git fetch origin main --depth=2
+            BASELINE_SHA=$(git rev-parse HEAD~1)
             git checkout HEAD~1 -- packages/*/src/
           else
             git fetch origin ${{ github.event.pull_request.base.ref }} --depth=1
+            BASELINE_SHA=$(git rev-parse FETCH_HEAD)
             git checkout FETCH_HEAD -- packages/*/src/
           fi
           node packages/${{ matrix.entry.package }}/bench/tachometer/build-ci.js baseline
           git checkout HEAD -- packages/*/src/
+          mkdir -p results
+          echo "$BASELINE_SHA" > results/baseline-sha.txt
 
       # Run just this matrix cell's single config.
       # Per-cell auto-sample tail is governed by the config's own `timeout`
@@ -156,4 +165,9 @@ jobs:
         uses: actions/upload-artifact@v7
         with:
           name: results-${{ matrix.entry.name }}
-          path: results/*.json
+          # Include baseline-sha.txt sidecar — the reporter and history
+          # archiver read it to pin percent_delta_ci entries to their
+          # baseline SHA.
+          path: |
+            results/*.json
+            results/baseline-sha.txt
diff --git a/ai/plans/ROADMAP.md b/ai/plans/ROADMAP.md
@@ -83,6 +83,7 @@ Plans with an open PR or live pair work. Updated as ceremony when a PR opens; en
 
 - [Release 0.18.0](active/release-0-18-0.md) — [PR #122](https://github.com/Semantic-Org/Semantic-Next/pull/122) `docs/shippable` (menu trimming + audit pass pending). Ships the next tagged release; last was 0.17.0 in November.
 - [Signal Performance](active/signal-performance.md) — [PR #150](https://github.com/Semantic-Org/Semantic-Next/pull/150) freeze-by-default. Perf story unresolved (see plan's Bench Results); release inclusion is the open call.
+- [Bench Peak Attribution](active/bench-peak-attribution.md) — [PR #178](https://github.com/Semantic-Org/Semantic-Next/pull/178) methodology fix for cross-session absolute-ms comparisons. Eliminates phantom "Regressions from peak" on PRs.
 
 ---
 
@@ -200,7 +201,7 @@ Slot in wherever there's a gap; not phase-gated.
 | P12 | [Template Spread Syntax](template-spread-syntax.md) | 4-8h | pair | scoped | `{>card ...friend}` — object spread in data passing. Ship when component templates demonstrate need. |
 | P13 | [Template Content Projection](template-wrapper-snippets.md) | 12-16h (1.5-2d) | pair | scoped | `{>content}` — content projection for snippets + subtemplates. Ship when component templates demonstrate need. |
 | P14 | [Template Let Bindings](template-let-bindings.md) | 10-14h (1-2d) | pair | scoped | `{#let}...{/let}` — snippet-for-vars. Ship when component templates demonstrate need. |
-| P15 | [Bench Reporter Overhaul](bench-reporter-overhaul.md) | 16-24h (2-3d) | pair | initial | Two coordinated tracks. **A — peak attribution correctness**: schema_v2 stores within-session percent-delta + tip-of-tree SHA; reporter peak compares same-session deltas; `--scope pr` drops main-history from PR comments. Fixes PR #174's 23 phantom regressions. **B — suite rationalization remainder** (from `icebox/tachometer-overhaul.md`): story-driven config reorg, triplet collapses, `wake-count-single-key` + `nested-mutation` micros, `timeout` final pass. Four PRs under `workflow_run` constraint. Supersedes the icebox plan. |
+| P15 | [Bench Peak Attribution](active/bench-peak-attribution.md) | 9-11h (1.5d) | pair | scoped | Fix the live peak-attribution bug. PR #174 (test-only, no perf changes) currently surfaces 25 phantom "Regressions from peak"; active perf PRs carry partial false-flagging too. Schema_v2 persists `percent_delta_ci` + `baseline_sha` per metric; reporter switches peak compare to same-session percent-delta; `--scope pr` drops main-history overlay on PR comments; drift flag with chain-of-percent-deltas when baselines differ. `bench-history.json` wiped to empty v2 (v1 entries fed the bug). Two PRs: methodology fix + suite cleanup (`toggle-{first,last}-10` + conditional `timeout` 3→2). |
 
 ---
 
@@ -214,6 +215,6 @@ Plans drafted but not on the active roadmap. See `ai/plans/icebox/` for files.
 - [Signals TC39 Integration](icebox/signals-tc39-integration.md) — adopt native `Signal.State`/`Signal.Computed` as backing primitives when TC39 ships. Blocked on TC39 Stage 3+.
 - [Add Icon Stroke Width](icebox/add-icon-stroke-width.md) — power-user feature, post-1.0.
 - [Audit Fix Continuation](icebox/audit-fix-continuation.md) — process work for follow-up audits.
-- [Tachometer Overhaul — PR B remainder](icebox/tachometer-overhaul.md) — suite rationalization + knob tuning + new benches. PR A (CI parallelization) and PR C (in-house Node reporter) shipped; PR B is the only outstanding piece.
+- [Bench Suite Expansion](icebox/bench-suite-expansion.md) — file-scoped hot-path micros (`micro-expression-evaluator`, `micro-signal`, etc.) + new end-to-end benches (`wake-count-single-key`, `nested-mutation`, `hydrate-1000-card`). Surgical adds; lands when underlying perf work needs them.
 - [Contributing Surface](icebox/contributing-surface.md) — pre-1.0 stance + 1.0 graduation pass + post-1.0 triage flow (size + scope, GH-shaped vs md-shaped). Most icebox graduates at 1.0; the rest stays internal.
 - [Registry](icebox/registry.md) — community registry for components and behaviors, runtime + compile-time consumption from one source, author-namespaced publishing under `@sui-hub` with editorial canonical aliases above. Post-Phase 4.