zincware · PythonFZ · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_baseline.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_baseline.json
diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0002_perf_analysis.json b/.benchmarks/Darwin-CPython-3.11-64bit/0002_perf_analysis.json
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -0,0 +1,108 @@
+# Benchmark CI Pipeline
+#
+# Runs after the "Tests" workflow succeeds on main. Executes the full benchmark
+# suite on Python 3.13 and pushes results to gh-pages at /dev/bench/ via
+# github-action-benchmark.
+#
+# PRs receive a benchmark comparison table in Job Summary and fail on
+# regressions beyond 150% (PR-01, PR-02, PR-03).
+#
+# To enforce the merge gate, enable branch protection requiring the
+# 'Benchmarks' check to pass: Settings > Branches > Branch protection rules.
+#
+# CI-04: Release/tag events do NOT get a separate benchmark run. Every push to
+# main updates the gh-pages dashboard, so releases inherit the latest baseline.
+#
+# CI-01: github-action-benchmark with auto-push: true auto-creates the gh-pages
+# branch on first run. GitHub Pages must be manually enabled once:
+# Settings > Pages > Source: Deploy from a branch > gh-pages / root.
+
+name: Benchmarks
+
+on:
+  workflow_run:
+    workflows: ["Tests"]
+    types: [completed]
+    branches: [main]
+  pull_request:
+    types: [opened, synchronize]
+
+permissions:
+  contents: write
+  deployments: write
+
+concurrency:
+  group: benchmark-${{ github.event.pull_request.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request' || github.event.workflow_run.conclusion == 'success'
+
+    services:
+      redis:
+        image: redis:7
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+      mongodb:
+        image: mongo:7
+        env:
+          MONGO_INITDB_ROOT_USERNAME: root
+          MONGO_INITDB_ROOT_PASSWORD: example
+        ports:
+          - 27017:27017
+        options: >-
+          --health-cmd "mongosh --eval 'db.runCommand(\"ping\").ok' --quiet"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv and set the python version
+        uses: astral-sh/setup-uv@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install package
+        run: |
+          uv sync --all-extras --dev
+
+      - name: Run benchmarks
+        run: |
+          uv run pytest -m benchmark --benchmark-only --benchmark-json=benchmark_results.json
+
+      - name: Store benchmark results (main)
+        if: github.event_name == 'workflow_run'
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          tool: "pytest"
+          output-file-path: benchmark_results.json
+          gh-pages-branch: gh-pages
+          benchmark-data-dir-path: dev/bench
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          auto-push: true
+
+      - name: Compare benchmark results (PR)
+        if: github.event_name == 'pull_request'
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          tool: "pytest"
+          output-file-path: benchmark_results.json
+          gh-pages-branch: gh-pages
+          benchmark-data-dir-path: dev/bench
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          auto-push: false
+          save-data-file: false
+          summary-always: true
+          comment-on-alert: true
+          fail-on-alert: true
+          alert-threshold: "150%"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -55,21 +55,3 @@ jobs:
           uv run python --version
           uv run pytest
 
-      - name: Run benchmarks
-        run: |
-          uv run python --version
-          uv run pytest -m benchmark --benchmark-only --benchmark-json=benchmark_results.json
-
-      - name: Visualize benchmarks
-        run: |
-          uv run docs/visualize_benchmarks.py benchmark_results.json
-        if: always()
-
-      - name: Upload benchmark results
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: benchmark-results-${{ matrix.python-version }}
-          path: |
-            benchmark_results.json
-            *.png
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ tests/data/
 
 # Benchmark results (machine-specific)
 benchmark_results.json
+.benchmarks/
 
 # Git worktrees
 .worktrees/
diff --git a/.planning/PROJECT.md b/.planning/PROJECT.md
@@ -26,6 +26,13 @@ Every storage backend must be fast, correct, and tested through a single paramet
 
 ### Active
 
+- [ ] PR benchmark comments showing perf diff vs base branch (BENCH-01)
+- [ ] Benchmark JSON committed to repo, overwritten per merge/tag (BENCH-02)
+- [ ] GitHub Pages dashboard tracking performance over releases (BENCH-03)
+- [ ] Evaluate and select CI benchmark tooling (CML, github-action-benchmark, etc.) (BENCH-04)
+
+### Backlog
+
 - [ ] Store schema in backend metadata at write time for O(1) introspection (OPT-01)
 - [ ] Improve cache-to secondary backend pattern in ASEIO (OPT-02)
 - [ ] Investigate pytest-codspeed for CI-stable benchmarks (OPT-03)
@@ -83,4 +90,4 @@ Known performance characteristics:
 | Facade bounds-check elimination | Delegate IndexError to backend instead of pre-checking len() | ✓ Good — saves round-trip for positive indices |
 
 ---
-*Last updated: 2026-03-06 after v1.0 milestone*
+*Last updated: 2026-03-09 after v0.3.1 milestone start*
diff --git a/.planning/REQUIREMENTS.md b/.planning/REQUIREMENTS.md
@@ -0,0 +1,87 @@
+# Requirements: asebytes
+
+**Defined:** 2026-03-09
+**Core Value:** Every storage backend must be fast, correct, and tested through a single parametrized test suite
+
+## v0.3.1 Requirements
+
+Requirements for CI benchmark infrastructure milestone. Each maps to roadmap phases.
+
+### CI Infrastructure
+
+- [x] **CI-01**: gh-pages branch exists with GitHub Pages enabled serving benchmark dashboard
+- [x] **CI-02**: Post-matrix benchmark job runs github-action-benchmark for a single Python version (latest)
+- [x] **CI-03**: Auto-push to gh-pages only on main branch pushes, not PRs
+- [x] **CI-04**: Release/tag events trigger a benchmark snapshot on gh-pages
+
+### PR Feedback
+
+- [ ] **PR-01**: PRs receive a full benchmark comparison summary (tables with deltas for all benchmarks) vs main -- showing both regressions and improvements
+- [ ] **PR-02**: Alert threshold is configurable (starting at 150%)
+- [ ] **PR-03**: Fail-on-regression gate blocks PR merge on benchmark regression
+
+### Dashboard
+
+- [ ] **DASH-01**: GitHub Pages serves auto-generated Chart.js time-series dashboard with minimal project docs (description, usage, links)
+- [ ] **DASH-02**: README embeds live benchmark figures from GitHub Pages, replacing static visualization PNGs
+- [ ] **DASH-03**: max-items-in-chart limits data growth on gh-pages
+
+## Maintenance Requirements
+
+### Test Isolation (Phase 8)
+
+- [x] **ISO-01**: MongoDB contract tests pass without data leaking between tests
+- [x] **ISO-02**: Redis contract tests pass without data leaking between tests
+- [x] **ISO-03**: All other backend contract tests remain green after isolation changes (no regressions)
+
+## Future Requirements
+
+### Enhanced PR Comments
+
+- **PR-04**: Per-backend grouping in PR comparison tables
+- **PR-05**: Visualization PNGs embedded in PR comments
+
+### Dashboard Enhancements
+
+- **DASH-04**: Release-tagged benchmark snapshots with comparison view
+- **DASH-05**: Memory profiling pipeline integrated into dashboard
+
+## Out of Scope
+
+| Feature | Reason |
+|---------|--------|
+| Per-Python-version benchmark tracking | Adds complexity without proportional regression detection benefit |
+| Hosted SaaS dashboard (codspeed, bencher) | External dependency; Chart.js on gh-pages is sufficient |
+| Fork PR benchmark comments | GitHub token scoping prevents it; low fork contribution volume |
+| Custom React dashboard | Maintenance overhead; Chart.js auto-generation covers needs |
+| pytest-codspeed integration | Orthogonal to CI tracking; codspeed measures CPU not I/O |
+
+## Traceability
+
+Which phases cover which requirements. Updated during roadmap creation.
+
+| Requirement | Phase | Status |
+|-------------|-------|--------|
+| CI-01 | Phase 5 | Complete |
+| CI-02 | Phase 5 | Complete |
+| CI-03 | Phase 5 | Complete |
+| CI-04 | Phase 5 | Complete |
+| PR-01 | Phase 6 | Pending |
+| PR-02 | Phase 6 | Pending |
+| PR-03 | Phase 6 | Pending |
+| DASH-01 | Phase 7 | Pending |
+| DASH-02 | Phase 7 | Pending |
+| DASH-03 | Phase 7 | Pending |
+| ISO-01 | Phase 8 | Complete |
+| ISO-02 | Phase 8 | Complete |
+| ISO-03 | Phase 8 | Complete |
+
+**Coverage:**
+- v0.3.1 requirements: 10 total
+- Maintenance requirements: 3 total
+- Mapped to phases: 13
+- Unmapped: 0
+
+---
+*Requirements defined: 2026-03-09*
+*Last updated: 2026-03-09 after phase 8 planning*
diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md
@@ -2,27 +2,102 @@
 
 ## Milestones
 
-- ✅ **v1.0 Maintenance & Performance Overhaul** — Phases 1-4 (shipped 2026-03-06)
+- v1.0 Maintenance & Performance Overhaul -- Phases 1-4 (shipped 2026-03-06)
+- v0.3.1 CI Benchmark Infrastructure -- Phases 5-7 (in progress)
 
 ## Phases
 
+**Phase Numbering:**
+- Integer phases (1, 2, 3): Planned milestone work
+- Decimal phases (2.1, 2.2): Urgent insertions (marked with INSERTED)
+
 <details>
-<summary>✅ v1.0 Maintenance & Performance Overhaul (Phases 1-4) — SHIPPED 2026-03-06</summary>
+<summary>v1.0 Maintenance & Performance Overhaul (Phases 1-4) -- SHIPPED 2026-03-06</summary>
 
-- [x] Phase 1: Backend Architecture (3/3 plans) — completed 2026-03-06
-- [x] Phase 2: H5MD Compliance (4/4 plans) — completed 2026-03-06
-- [x] Phase 3: Contract Test Suite (4/4 plans) — completed 2026-03-06
-- [x] Phase 4: Benchmarks & Performance (2/2 plans) — completed 2026-03-06
+- [x] Phase 1: Backend Architecture (3/3 plans) -- completed 2026-03-06
+- [x] Phase 2: H5MD Compliance (4/4 plans) -- completed 2026-03-06
+- [x] Phase 3: Contract Test Suite (4/4 plans) -- completed 2026-03-06
+- [x] Phase 4: Benchmarks & Performance (2/2 plans) -- completed 2026-03-06
 
 Full details: `.planning/milestones/v1.0-ROADMAP.md`
 
 </details>
 
+### v0.3.1 CI Benchmark Infrastructure (In Progress)
+
+**Milestone Goal:** Automated benchmark tracking in CI with PR regression feedback and a public GitHub Pages dashboard.
+
+- [x] **Phase 5: Benchmark Pipeline** - gh-pages branch, benchmark workflow job, auto-push on main, release snapshots (completed 2026-03-09)
+- [ ] **Phase 6: PR Feedback** - PR comparison comments, configurable alert threshold, fail-on-regression gate
+- [ ] **Phase 7: Dashboard and README** - Chart.js dashboard with project docs, README live figures, data growth limits
+
+## Phase Details
+
+### Phase 5: Benchmark Pipeline
+**Goal**: Every push to main and every release tag produces benchmark results stored on gh-pages, building a historical baseline
+**Depends on**: Nothing (first phase of v0.3.1)
+**Requirements**: CI-01, CI-02, CI-03, CI-04
+**Success Criteria** (what must be TRUE):
+  1. gh-pages branch exists and GitHub Pages serves content from it
+  2. Pushing a commit to main triggers a post-matrix benchmark job that stores results on gh-pages
+  3. Opening or updating a PR does NOT push benchmark data to gh-pages
+  4. Tagging a release triggers a benchmark snapshot committed to gh-pages
+**Plans**: 1 plan
+
+Plans:
+- [ ] 05-01-PLAN.md — Create benchmark.yml workflow, clean up tests.yml and legacy files
+
+### Phase 6: PR Feedback
+**Goal**: PR authors see benchmark comparison results and regressions block merge
+**Depends on**: Phase 5 (baseline data must exist on gh-pages)
+**Requirements**: PR-01, PR-02, PR-03
+**Success Criteria** (what must be TRUE):
+  1. PRs receive a comment with a full benchmark comparison table showing deltas (regressions and improvements) vs main
+  2. The alert threshold percentage is configurable in the workflow YAML (default 150%)
+  3. A PR with a benchmark regression beyond the threshold is blocked from merging
+**Plans**: 1 plan
+
+Plans:
+- [ ] 06-01-PLAN.md — Add PR trigger, comparison step, and fail-on-regression gate to benchmark.yml
+
+### Phase 7: Dashboard and README
+**Goal**: Users can view benchmark trends over time on a public dashboard and see live figures in the README
+**Depends on**: Phase 5 (dashboard auto-generated by github-action-benchmark)
+**Requirements**: DASH-01, DASH-02, DASH-03
+**Success Criteria** (what must be TRUE):
+  1. GitHub Pages serves a Chart.js time-series dashboard with project description, usage, and links
+  2. README displays live benchmark figures sourced from GitHub Pages, replacing any static visualization PNGs
+  3. max-items-in-chart is configured to limit data growth on gh-pages
+**Plans**: TBD
+
+Plans:
+- [ ] 07-01: TBD
+
+### Phase 8: Fix failing tests in Redis/Mongo backends (test isolation)
+**Goal:** MongoDB and Redis contract tests pass reliably with per-test data isolation via unique group names
+**Depends on:** Nothing (independent bugfix)
+**Requirements**: ISO-01, ISO-02, ISO-03
+**Success Criteria** (what must be TRUE):
+  1. MongoDB tests pass without data leaking between tests
+  2. Redis tests pass without data leaking between tests
+  3. All other backend tests remain green (no regressions)
+**Plans**: 1 plan
+
+Plans:
+- [x] 08-01-PLAN.md — Add unique group= to all facade fixtures for per-test isolation
+
 ## Progress
 
+**Execution Order:**
+Phases execute in numeric order: 5 -> 6 -> 7
+
 | Phase | Milestone | Plans Complete | Status | Completed |
 |-------|-----------|----------------|--------|-----------|
 | 1. Backend Architecture | v1.0 | 3/3 | Complete | 2026-03-06 |
 | 2. H5MD Compliance | v1.0 | 4/4 | Complete | 2026-03-06 |
 | 3. Contract Test Suite | v1.0 | 4/4 | Complete | 2026-03-06 |
 | 4. Benchmarks & Performance | v1.0 | 2/2 | Complete | 2026-03-06 |
+| 5. Benchmark Pipeline | 1/1 | Complete   | 2026-03-09 | - |
+| 6. PR Feedback | v0.3.1 | 0/1 | Not started | - |
+| 7. Dashboard and README | v0.3.1 | 0/? | Not started | - |
+| 8. Test Isolation Fix | Maintenance | 1/1 | Complete | 2026-03-09 |