diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 1c5778c..186f82d 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -56,10 +56,10 @@ body:
       label: Traceback (if any)
       render: shell
   - type: checkboxes
-    id: testbench
+    id: benchmark
     attributes:
-      label: testBench check
-      description: Does your file already fail `make testbench`? If so, please note which group.
+      label: Benchmark check
+      description: Did your file surface in `make bench-robust` (SpreadsheetBench)?
       options:
-        - label: "I ran `make testbench` and my file failed (attach `metrics/testbench/failures.json`)."
-        - label: "The file is not in the bench; I can contribute it as a new fixture."
+        - label: "I ran `make bench-robust` and my file failed (attach the row from results.csv if you can)."
+        - label: "The file is from outside SpreadsheetBench; I can attach a minimal reproducer."
diff --git a/.github/ISSUE_TEMPLATE/parser_edge_case.yml b/.github/ISSUE_TEMPLATE/parser_edge_case.yml
index 3cb51a5..a4fb527 100644
--- a/.github/ISSUE_TEMPLATE/parser_edge_case.yml
+++ b/.github/ISSUE_TEMPLATE/parser_edge_case.yml
@@ -6,8 +6,8 @@ body:
   - type: markdown
     attributes:
       value: |
-        Every edge-case report ideally becomes a new fixture in `testBench/`. Bonus points
-        for a minimal generator in `scripts/build_testbench.py`.
+        Every edge-case report ideally becomes a regression test. Bonus points
+        for a minimal `openpyxl` generator that reproduces it.
   - type: textarea
     id: pattern
     attributes:
@@ -33,6 +33,6 @@ body:
     attributes:
       label: What would you like next?
       options:
-        - label: "Add it to `testBench/` as a new stress fixture."
+        - label: "Land it as a new regression test in `tests/`."
         - label: "Open a PR fixing the parser."
         - label: "Triage help — I'm stuck."
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 4aec668..6a0e550 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,7 +6,7 @@
 
 - [ ] 🐞 Bug fix
 - [ ] ✨ New feature
-- [ ] 🧪 Parser edge case / new `testBench/` fixture
+- [ ] 🧪 Parser edge case / new regression test
 - [ ] 📚 Docs
 - [ ] 🧹 Refactor / chore
 - [ ] 🚀 Performance
@@ -14,7 +14,7 @@
 ## Checklist
 
 - [ ] `make test` passes locally
-- [ ] `make testbench` still shows 1054/1054 (or the delta is explained below)
+- [ ] If parser/chunker internals changed: ran `make bench-robust` against SpreadsheetBench (call out any regressions below)
 - [ ] Added/updated tests covering the change
 - [ ] `ruff check` is clean
 - [ ] Updated docs if user-facing behaviour changed
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a84e5e0..1168a01 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -50,34 +50,3 @@ jobs:
           name: junit-${{ matrix.os }}-py${{ matrix.python-version }}
           path: reports/junit.xml
           if-no-files-found: ignore
-
-  testbench:
-    name: testBench round-trip (ubuntu / py3.12)
-    runs-on: ubuntu-latest
-    needs: test
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.12"
-          cache: pip
-          cache-dependency-path: pyproject.toml
-
-      - name: Install
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e ".[dev,api]"
-
-      - name: Build generated testBench
-        run: make testbench-build
-
-      - name: Run round-trip tests
-        run: make testbench
-
-      - name: Upload failure log
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: testbench-failures
-          path: metrics/testbench/failures.json
-          if-no-files-found: ignore
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ee6a82f..5cf43b6 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -37,9 +37,6 @@ jobs:
       - name: Build wheel + sdist
         run: python -m build
 
-      - name: Build testBench zip
-        run: make testbench-zip
-
       - name: Upload distribution artifacts
         uses: actions/upload-artifact@v4
         with:
@@ -47,7 +44,6 @@ jobs:
           path: |
             dist/*.whl
             dist/*.tar.gz
-            dist/testBench-v*.zip
 
   github-release:
     needs: build
@@ -81,7 +77,6 @@ jobs:
           files: |
             dist/*.whl
             dist/*.tar.gz
-            dist/testBench-v*.zip
           body_path: ${{ steps.notes.outputs.path }}
           generate_release_notes: ${{ steps.notes.outputs.auto == 'true' }}
 
@@ -97,9 +92,6 @@ jobs:
           name: dist
           path: dist
 
-      - name: Strip non-PyPI artifacts
-        run: rm -f dist/testBench-v*.zip
-
       - name: Publish to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
diff --git a/.gitignore b/.gitignore
index 67bad3c..6a13083 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,17 +51,12 @@ tests/fixtures/corpus/
 # Corpus & metrics outputs
 metrics/corpus/
 metrics/corpus_summary.json
-metrics/testbench/
 
-# Generated stress test artifacts — the 1000-file bench is re-built on demand
-testBench/generated/
+# Generated stress test artifacts
 examples/stress_test/stress_results.json
 examples/stress_test/built_reference.json
 examples/stress_test/STRESS_TEST_RESULTS.md
 
-# Packaged dataset (produced by `make testbench-zip`)
-dist/testBench*.zip
-
 # Local benchmark harness (private, not pushed)
 tests/benchmarks/reports/
 tests/benchmarks/hucre_node/node_modules/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 934380f..682f088 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,12 +4,10 @@ repos:
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
-        exclude: "^testBench/"
       - id: check-yaml
       - id: check-toml
       - id: check-added-large-files
-        args: ["--maxkb=5120"]   # 5 MB ceiling per file — testBench fixtures are larger, excluded below
-        exclude: "^testBench/"
+        args: ["--maxkb=5120"]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.6.9
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 918d0d6..d7527b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,7 +45,28 @@ Template for a new release (copy this block, fill in, move Unreleased items in):
 
 ## [Unreleased]
 
-Nothing yet. Open a PR and add your entry under the appropriate heading.
+### ⚠️ BREAKING
+- Retired the in-tree `testBench/` corpus. The 1054-workbook stress dataset
+  and `make testbench*` targets are gone — benchmarks now run against the
+  public SpreadsheetBench v0.1 corpus, downloaded on demand to `data/corpora/`
+  (gitignored). See `docs/corpora.md`.
+
+### Removed
+- `testBench/` directory and all bundled real-world / generated workbooks.
+- `make testbench-build`, `make testbench`, `make testbench-zip` targets.
+- `testbench` job in `.github/workflows/ci.yml`.
+- `testBench-vX.Y.Z.zip` release asset from the release workflow.
+- `tests/test_testbench_roundtrip.py`, `tests/test_enterprise_scoring.py`,
+  `tests/test_real_world_datasets.py`, `tests/test_cross_validation.py`.
+- `scripts/build_testbench.py`, `scripts/generate_enterprise_fixtures.py`.
+- `static_xlsx` pytest fixture (the test bench it iterated is gone).
+
+### Changed
+- README, wiki, examples, and contributor docs now point at SpreadsheetBench
+  (`make bench-robust` / `make bench-retrieval`) as the canonical benchmark.
+- `examples/demo.py` + `examples/generate_examples.py` now write/read fixtures
+  under `examples/fixtures/` instead of the (removed) `testBench/real_world/`.
+
 
 ## [0.2.0] — 2026-05-11
 
@@ -173,7 +194,7 @@ announcement: [`docs/launch/RELEASE_NOTES_v0.1.1.md`](docs/launch/RELEASE_NOTES_
 
 ### Performance
 - Chunk builder caches `detect_circular_refs()` per workbook instead of
-  re-running it per block. Real 21k-cell financial model (Walbridge):
+  re-running it per block. Real 21k-cell financial model:
   **307 s → 4.6 s (66×)**.
 - Sheet parser iterates openpyxl's `_cells` dict instead of `iter_rows()`
   over the full bounding box. Workbooks with extreme sparse addresses
@@ -185,9 +206,8 @@ announcement: [`docs/launch/RELEASE_NOTES_v0.1.1.md`](docs/launch/RELEASE_NOTES_
   non-existent `dxfId=0` in generated fixtures, so openpyxl can load them
   back without an `IndexError`.
 - `test_formula_cached_values_match` now applies a 15 % threshold for
-  workbooks with known openpyxl `data_only` caching gaps (Walbridge),
-  5 % everywhere else. See
-  [`docs/PARSER_KNOWN_ISSUES.md`](docs/PARSER_KNOWN_ISSUES.md).
+  workbooks with known openpyxl `data_only` caching gaps, 5 % everywhere
+  else. See [`docs/PARSER_KNOWN_ISSUES.md`](docs/PARSER_KNOWN_ISSUES.md).
 
 ### Docs
 - New README positioned as *"Make XLSX LLM Ready"* with architecture
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a270bbf..c9bdabf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -13,12 +13,12 @@ bug or send a small PR. If that's you, thank you.
 
 ## Ways to help (in order of preference for first-time contributors)
 
-1. **Run `make testbench` and report a file that breaks.** We actively want
-   edge-case `.xlsx` fixtures — use the
+1. **Run `make bench-robust` on SpreadsheetBench and report a file that
+   breaks.** We actively want edge-case `.xlsx` fixtures — use the
    [Parser edge case issue template](https://github.com/knowledgestack/ks-xlsx-parser/issues/new?template=parser_edge_case.yml).
-2. **Add a new workbook to `testBench/`.** Either drop a file under
-   `testBench/stress/` or add a builder to `scripts/build_testbench.py`. If
-   the parser crashes on it, even better.
+2. **Submit an adversarial workbook.** Attach a `.xlsx` (or a generator
+   that builds one) to a Parser edge case issue. If the parser crashes
+   on it, even better.
 3. **Fix one of the flagged issues** in [`docs/PARSER_KNOWN_ISSUES.md`](docs/PARSER_KNOWN_ISSUES.md).
 4. **Improve docs.** The README, the architecture diagram, the examples —
    if something confused you, it confuses everyone.
@@ -32,8 +32,9 @@ git clone https://github.com/knowledgestack/ks-xlsx-parser.git
 cd ks-xlsx-parser
 make install               # pip install -e ".[dev,api]"
 make test                  # fast, default suite
-make testbench-build       # regenerate 1000-file stress corpus (~1 min)
-make testbench             # round-trip every workbook; parallel
+make corpus-download       # fetch SpreadsheetBench (5,458 real-world xlsx)
+make bench-robust          # parse-success + structural counts vs Docling
+make bench-retrieval       # retrieval recall@k vs Docling
 ```
 
 Prerequisites: Python 3.10+, `pip`, optionally `make`. We use `ruff` for
@@ -44,7 +45,8 @@ linting/formatting — install it with the `[dev]` extra.
 Your PR should:
 
 1. Have tests. `pytest` must stay green: `make test`.
-2. Keep `make testbench` at 1054/1054 (or explain the delta in the PR description).
+2. If touching parser or chunker internals, run `make bench-robust` against
+   SpreadsheetBench and call out any regressions in the PR description.
 3. Pass `ruff check` (`make lint`) and be formatted with `make format`.
 4. Include one sentence in the PR description that starts with *"This change…"*.
 5. Use [conventional-commit style](https://www.conventionalcommits.org/)
@@ -74,7 +76,7 @@ Helpful things to include:
 - Type hints everywhere that's practical.
 - Tests live in `tests/`; programmatic workbook fixtures live in `tests/conftest.py`.
 - Cross-validation against calamine uses the `crossval` marker.
-- Long-running bench tests use `@pytest.mark.testbench` and are skipped by default.
+- The benchmark harness (`tests/benchmarks/`) lives outside `pytest` — invoke via `make bench-robust` / `make bench-retrieval`.
 - Keep public-API changes additive; if you can't, note it in the PR and the
   maintainers will line up the deprecation.
 
diff --git a/Makefile b/Makefile
index d2642d6..9bedb5b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,25 +1,20 @@
-.PHONY: help install test test-ci testbench testbench-build testbench-zip lint format typecheck clean corpus-download bench-robust bench-retrieval bench
+.PHONY: help install test test-ci lint format typecheck clean corpus-download bench-robust bench-retrieval bench
 
 PYTHON ?= python
 PKG_VERSION := $(shell $(PYTHON) -c "import tomllib, pathlib; print(tomllib.loads(pathlib.Path('pyproject.toml').read_text())['project']['version'])")
-TESTBENCH_ZIP := dist/testBench-v$(PKG_VERSION).zip
 
 help:
 	@echo "ks-xlsx-parser — common targets"
 	@echo ""
 	@echo "  make install         Install package and dev deps (editable)"
-	@echo "  make test            Run the default test suite (skips corpus + testbench)"
+	@echo "  make test            Run the default test suite"
 	@echo "  make test-ci         Run the suite with verbose output for CI"
 	@echo ""
-	@echo "  make testbench-build Generate the 1000-file testBench dataset"
-	@echo "  make testbench       Run parser round-trip across the full testBench"
-	@echo "  make testbench-zip   Package testBench into $(TESTBENCH_ZIP) for GitHub release"
-	@echo ""
 	@echo "  make lint            Ruff lint"
 	@echo "  make format          Ruff format"
 	@echo "  make typecheck       mypy"
 	@echo ""
-	@echo "  make corpus-download Fetch public XLSX corpora for extended robustness"
+	@echo "  make corpus-download Fetch SpreadsheetBench for benchmark runs"
 	@echo ""
 	@echo "  make bench-robust    Robustness on SpreadsheetBench (ks vs docling, ~20 min)"
 	@echo "  make bench-retrieval Retrieval recall on SpreadsheetBench (ks vs docling, ~40 min)"
@@ -34,23 +29,6 @@ test:
 test-ci:
 	$(PYTHON) -m pytest tests/ -v --tb=short -W ignore::UserWarning --junitxml=reports/junit.xml
 
-testbench-build:
-	$(PYTHON) scripts/build_testbench.py --clean
-
-testbench:
-	@test -d testBench/generated || (echo "testBench/generated missing. Run 'make testbench-build' first." && exit 1)
-	$(PYTHON) -m pytest tests/test_testbench_roundtrip.py -m testbench --tb=short -W ignore::UserWarning
-
-testbench-zip: testbench-build
-	@mkdir -p dist
-	@echo "→ packaging testBench into $(TESTBENCH_ZIP)"
-	@rm -f $(TESTBENCH_ZIP)
-	@cd . && zip -qr $(TESTBENCH_ZIP) testBench \
-		-x "testBench/**/__pycache__/*" \
-		-x "testBench/**/.DS_Store"
-	@ls -lh $(TESTBENCH_ZIP)
-	@echo "→ attach with: gh release create v$(PKG_VERSION) $(TESTBENCH_ZIP) --generate-notes"
-
 lint:
 	$(PYTHON) -m ruff check src/ tests/ scripts/
 
diff --git a/README.md b/README.md
index 613270a..f39f718 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@
   <a href="https://pypi.org/project/ks-xlsx-parser/"><img src="https://img.shields.io/pypi/v/ks-xlsx-parser.svg?style=flat-square&logo=pypi&logoColor=white&label=PyPI&color=047857" alt="PyPI"></a>
   <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.10%2B-065F46?style=flat-square&logo=python&logoColor=white" alt="Python 3.10+"></a>
   <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-64748B?style=flat-square" alt="MIT License"></a>
-  <a href="#-the-testbench-dataset"><img src="https://img.shields.io/badge/testBench-1054%2F1054-22C55E?style=flat-square&logo=pytest&logoColor=white" alt="Tests"></a>
+  <a href="tests/benchmarks/reports/COMPARISON.md"><img src="https://img.shields.io/badge/SpreadsheetBench-5%2C455%2F5%2C458%20parsed-22C55E?style=flat-square&logo=pytest&logoColor=white" alt="SpreadsheetBench"></a>
   <a href="https://github.com/knowledgestack/ks-xlsx-parser/actions/workflows/ci.yml"><img src="https://github.com/knowledgestack/ks-xlsx-parser/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
 </p>
 
@@ -72,7 +72,7 @@ graph that drops straight into [LangChain](https://www.langchain.com/),
   &nbsp;
   <a href="docs/wiki/Quick-Start.md"><img src="https://img.shields.io/badge/📚%20Docs-wiki-22C55E?style=for-the-badge" alt="Docs"></a>
   &nbsp;
-  <a href="https://github.com/knowledgestack/ks-xlsx-parser/releases"><img src="https://img.shields.io/badge/📦%20Download-testBench%20dataset-84CC16?style=for-the-badge" alt="Dataset"></a>
+  <a href="tests/benchmarks/reports/COMPARISON.md"><img src="https://img.shields.io/badge/📊%20Benchmarks-SpreadsheetBench-84CC16?style=for-the-badge" alt="Benchmarks"></a>
 </p>
 
 ---
@@ -203,7 +203,8 @@ are all first-class ways to keep the lights on.
 - 🙌 [Contribute](CONTRIBUTING.md) — every PR is reviewed; `good-first-issue` labels live on Issues.
 - 🧰 [Knowledge Stack org](https://github.com/knowledgestack) — see the rest of the ecosystem (ks-cookbook, ks-xlsx-parser, more on the way).
 
-Not sure where to start? Run `make testbench`, find a file that breaks, open a
+Not sure where to start? Run `make bench-robust` on SpreadsheetBench, find a
+file that breaks, open a
 [Parser edge case](https://github.com/knowledgestack/ks-xlsx-parser/issues/new?template=parser_edge_case.yml).
 That's the fastest path to a merged PR.
 
@@ -250,7 +251,7 @@ That's it. Every chunk has:
 - [📚 Documentation](#-documentation)
 - [⚔️ How it compares](#️-how-it-compares)
 - [🎯 Who this is for](#-who-this-is-for)
-- [🧪 The testBench dataset](#-the-testbench-dataset)
+- [📊 Benchmarks](#-benchmarks)
 - [🚧 Limitations](#-limitations)
 - [🧰 Knowledge Stack ecosystem](#-knowledge-stack-ecosystem)
 - [📡 Stay in touch](#-stay-in-touch)
@@ -310,8 +311,9 @@ git clone https://github.com/knowledgestack/ks-xlsx-parser.git
 cd ks-xlsx-parser
 make install           # pip install -e ".[dev,api]"
 make test              # default suite
-make testbench-build   # generate the 1000-file stress corpus
-make testbench         # round-trip every workbook through the parser
+make corpus-download   # fetch SpreadsheetBench (5,458 real-world xlsx)
+make bench-robust      # parse-success + structural counts vs Docling
+make bench-retrieval   # retrieval recall@k vs Docling
 ```
 
 Runtime deps: `openpyxl`, `pydantic`, `lxml`, `xxhash`, `tiktoken`.
@@ -361,7 +363,7 @@ Most tools give you a dataframe. `ks-xlsx-parser` gives you a **graph an LLM can
 > Looking for a tiny, edge-runtime I/O library with write support? See
 > [**`hucre`**](https://github.com/productdevbook/hucre) by
 > [**@productdevbook**](https://github.com/productdevbook). For an unbiased
-> head-to-head on the 1053-workbook testBench corpus — perf numbers,
+> head-to-head on the SpreadsheetBench corpus — perf numbers,
 > extraction-count parity, where each side wins — see the wiki:
 > [**`ks-xlsx-parser` vs `hucre`**](docs/wiki/Benchmark-vs-hucre.md).
 
@@ -387,31 +389,21 @@ Teams shipping agents, RAG pipelines, or auditing tools that ingest Excel.
 
 ---
 
-## 🧪 The testBench dataset
+## 📊 Benchmarks
 
-A **1054-workbook stress corpus** ships under [`testBench/`](testBench/) and
-is round-tripped in CI on every commit. It's the easiest way to see whether
-the parser does the right thing on *your* kind of workbook.
+We benchmark against **SpreadsheetBench v0.1** — 912 instruction × xlsx tasks
+(5,458 unique workbooks) covering financial models, project trackers,
+HR records, scientific data, and a long tail of small business spreadsheets.
 
-| Group | Files | What it covers |
-|-------|------:|----------------|
-| `real_world/`            | 8    | Real anonymised workbooks (financial, engineering, project tracking) |
-| `enterprise/`            | 4    | Deterministic enterprise templates |
-| `github_datasets/`       | 10   | Public datasets (iris, titanic, superstore, …) |
-| `stress/curated/`        | 26   | 26 progressive stress levels authored by hand |
-| `stress/merges/`         | 5    | Pathological merge patterns |
-| `generated/matrix/`      | 297  | One feature per file across 18 categories |
-| `generated/combo/`       | 400  | Deterministic feature cocktails (5 densities × 80 seeds) |
-| `generated/adversarial/` | 300  | Unicode bombs, circular refs, 32k-char cells, deep formula chains, sparse 1M-row sheets, 250-sheet workbooks |
+| Benchmark | What it measures | Cost |
+|---|---|---|
+| `make bench-robust` | Parse-success rate + structural counts vs Docling | ~20 min |
+| `make bench-retrieval` | Top-k retrieval recall + table fragmentation rate vs Docling | ~40 min |
 
-```bash
-make testbench-build   # regenerate testBench/generated/ (~1 minute)
-make testbench         # 1054/1054 in ~70 seconds
-make testbench-zip     # package as dist/testBench-vX.Y.Z.zip for a GitHub release
-```
-
-The zipped dataset is attached to every [release](https://github.com/knowledgestack/ks-xlsx-parser/releases)
-— pull it if you don't want to clone the full repo.
+Headline numbers and methodology live in
+[`tests/benchmarks/reports/COMPARISON.md`](tests/benchmarks/reports/COMPARISON.md).
+The corpus is downloaded on demand (`make corpus-download`) and gitignored —
+nothing is committed to the repo.
 
 ---
 
@@ -461,10 +453,9 @@ or the [#showcase](https://discord.gg/4uaGhJcx) channel on Discord.
 - 🐙 **[Follow @knowledgestack](https://github.com/knowledgestack)** on GitHub for new releases across the ecosystem.
 - 📣 Watch this repo (→ *Releases only*) to get pinged when `ks-xlsx-parser` ships an update.
 
-If you'd rather just peek first — thousands of parsed workbooks live in the
-[testBench release](https://github.com/knowledgestack/ks-xlsx-parser/releases)
-as a single zip. Pull it, diff it, file an issue if your Excel does something
-weirder than ours.
+If you'd rather just peek first — run the benchmark suite against the
+public SpreadsheetBench corpus (`make corpus-download && make bench-robust`)
+and file an issue if your Excel does something weirder than ours.
 
 ---
 
@@ -472,12 +463,11 @@ weirder than ours.
 
 We love contributions. Three paths, in order of speed-to-merge:
 
-1. **Report a testBench failure** — run `make testbench`, find a file that
-   breaks, attach it to a
+1. **Report a benchmark failure** — run `make bench-robust` on SpreadsheetBench,
+   find a file that breaks, attach it to a
    [Parser edge case issue](https://github.com/knowledgestack/ks-xlsx-parser/issues/new?template=parser_edge_case.yml).
-2. **Add a new adversarial workbook** — contribute a builder to
-   `scripts/build_testbench.py`. Any file that makes the parser crash or
-   lose information is welcome.
+2. **Submit an adversarial workbook** — open a Parser edge case issue with the
+   file attached; we'll fold it into the suite.
 3. **Fix a flagged issue** — see [`docs/PARSER_KNOWN_ISSUES.md`](docs/PARSER_KNOWN_ISSUES.md).
 
 Full dev loop, PR checklist, and code style in [`CONTRIBUTING.md`](CONTRIBUTING.md).
@@ -544,7 +534,7 @@ No. The library reads `.xlsx` files; it never executes them. VBA macros are flag
 <details>
 <summary><b>How fast is it?</b></summary>
 
-The full 1054-workbook testBench round-trips in ~70 s on a single machine. A real 21k-cell, 13-sheet financial model parses in ~4.6 s (down from 307 s pre-0.1.1 after a circular-ref caching fix). Sparse workbooks with extreme addresses parse in under 200 ms.
+SpreadsheetBench's full 5,458-workbook corpus parses end-to-end in roughly 20 minutes on a single machine (P50 parse time low double-digit ms). A real 21k-cell, 13-sheet financial model parses in ~4.6 s (down from 307 s pre-0.1.1 after a circular-ref caching fix). Sparse workbooks with extreme addresses parse in under 200 ms.
 
 </details>
 
diff --git a/docs/MAINTAINERS.md b/docs/MAINTAINERS.md
index a6cfbe6..6378cb2 100644
--- a/docs/MAINTAINERS.md
+++ b/docs/MAINTAINERS.md
@@ -30,7 +30,6 @@ Enable:
     - `tests (ubuntu-latest / py3.11)`
     - `tests (ubuntu-latest / py3.12)`
     - `tests (macos-latest / py3.12)`
-    - `testBench round-trip (ubuntu / py3.12)`
   - ✅ Require branches to be up to date before merging
 - ✅ Require conversation resolution before merging
 - ✅ Require signed commits (soft lock — can relax if it slows contributors)
@@ -66,16 +65,15 @@ Create categories (click *New Category* for each):
 - **🎯 Show and tell** (open) — projects built with ks-xlsx-parser
   - Attach the template in `.github/DISCUSSION_TEMPLATE/show-and-tell.yml`
 - **🙏 Q&A** (open, answerable) — usage and "does it handle X" questions
-- **🧪 testBench findings** (open) — edge cases that shouldn't be issues yet
+- **🧪 Benchmark findings** (open) — edge cases that shouldn't be issues yet
 
 ### Releases
 
 Pushing a `vX.Y.Z` tag triggers `.github/workflows/release.yml` which will:
 
 1. Build the wheel + sdist
-2. Build `dist/testBench-v<version>.zip`
-3. Attach all three to the GitHub Release
-4. Publish to PyPI via Trusted Publishing
+2. Attach both to the GitHub Release
+3. Publish to PyPI via Trusted Publishing
 
 One-time PyPI setup: go to PyPI → *your project* → *Publishing* → *Add a new
 pending publisher* with:
@@ -97,8 +95,9 @@ without a human click.
    line; update the compare-link footer at the bottom.
 3. Regenerate the full release notes in `../docs/launch/RELEASE_NOTES_vX.Y.Z.md`
    (copy from the previous release, edit for the new highlights).
-4. `make testbench` → expect 1054/1054.
-5. `make test` → clean.
+4. `make test` → clean.
+5. If touching parser internals, run `make bench-robust` against
+   SpreadsheetBench and confirm no regressions.
 6. Commit with `chore(release): vX.Y.Z`.
 7. `git tag -s vX.Y.Z -m "vX.Y.Z"` (signed tag; required by branch protection).
 8. `git push && git push --tags` — the tag triggers the release workflow.
diff --git a/docs/PARSER_KNOWN_ISSUES.md b/docs/PARSER_KNOWN_ISSUES.md
index 7667475..e1f5029 100644
--- a/docs/PARSER_KNOWN_ISSUES.md
+++ b/docs/PARSER_KNOWN_ISSUES.md
@@ -38,11 +38,11 @@ promoted to the master cell.
 
 ## Documented Limitations (No Hard Fail)
 
-### `Walbridge Coatings 8.9.23.xlsx` — formula cached-value drift
+### Formula cached-value drift on dynamic-array / volatile formulas
 
-**Symptom**: ~11% of formula cells in this real-world workbook produce a
-different cached value than calamine reads. Hard failures are zero; parsing
-and serialization succeed end-to-end.
+**Symptom**: A small fraction of formula cells in some real-world workbooks
+produce a different cached value than calamine reads. Hard failures are zero;
+parsing and serialization succeed end-to-end.
 
 **Root cause**: openpyxl's `data_only=True` reader does not always surface the
 most recently written cached value for complex dynamic-array or volatile
@@ -50,10 +50,6 @@ formulas when the calc chain references across multiple sheets. This is an
 openpyxl limitation, not an ks-xlsx-parser bug; calamine reads from the raw XML
 and catches the newer values.
 
-**Current mitigation**: `tests/test_cross_validation.py::test_formula_cached_values_match`
-uses a 15% threshold for files in a `known_loose_files` set and the default
-5% threshold for everything else.
-
 **Potential fixes** (tracked):
 1. Read cached values directly from the OOXML XML instead of via openpyxl (like
    we already do for empty merge masters).
diff --git a/docs/RELEASE_PROCESS.md b/docs/RELEASE_PROCESS.md
index e2b9717..14c89a8 100644
--- a/docs/RELEASE_PROCESS.md
+++ b/docs/RELEASE_PROCESS.md
@@ -1,6 +1,6 @@
 # Release process
 
-This document is the **operational** companion to [`.github/workflows/release.yml`](../.github/workflows/release.yml). The workflow is tag-triggered (`v*.*.*`); pushing such a tag builds wheel + sdist, attaches a `testBench-vX.Y.Z.zip`, creates a GitHub Release, and publishes to PyPI. **All three actions are partially or fully irreversible** — PyPI in particular does not allow re-publishing a version. Run through this checklist before tagging.
+This document is the **operational** companion to [`.github/workflows/release.yml`](../.github/workflows/release.yml). The workflow is tag-triggered (`v*.*.*`); pushing such a tag builds wheel + sdist, creates a GitHub Release, and publishes to PyPI. **All three actions are partially or fully irreversible** — PyPI in particular does not allow re-publishing a version. Run through this checklist before tagging.
 
 ## One-time setup
 
@@ -53,7 +53,6 @@ gh api -X PUT repos/knowledgestack/ks-xlsx-parser/branches/main/protection \
   -F 'required_status_checks[contexts][]=tests (macos-latest / py3.10)' \
   -F 'required_status_checks[contexts][]=tests (macos-latest / py3.11)' \
   -F 'required_status_checks[contexts][]=tests (macos-latest / py3.12)' \
-  -F 'required_status_checks[contexts][]=testBench round-trip (ubuntu / py3.12)' \
   -F enforce_admins=false \
   -F required_pull_request_reviews[required_approving_review_count]=1 \
   -F restrictions= 2>/dev/null
@@ -82,7 +81,7 @@ For every new version `X.Y.Z`:
 8. **Watch the workflow.** https://github.com/knowledgestack/ks-xlsx-parser/actions — the `Release` workflow should run `build` → `github-release` → `pypi`. If the `pypi` job is gated on a reviewer, approve it in the Actions UI.
 9. **Verify post-release:**
    - PyPI: https://pypi.org/project/ks-xlsx-parser/X.Y.Z/ resolves and `pip install ks-xlsx-parser==X.Y.Z` works in a fresh venv.
-   - GitHub Release: https://github.com/knowledgestack/ks-xlsx-parser/releases/tag/vX.Y.Z shows the release notes + wheel + sdist + `testBench-vX.Y.Z.zip`.
+   - GitHub Release: https://github.com/knowledgestack/ks-xlsx-parser/releases/tag/vX.Y.Z shows the release notes + wheel + sdist.
    - The `[Unreleased]` heading at the top of `CHANGELOG.md` is reset to "Nothing yet" for the next cycle (manual; do this in a follow-up PR).
 
 ## Common failure modes
diff --git a/docs/corpora.md b/docs/corpora.md
index f04f3aa..0896e3e 100644
--- a/docs/corpora.md
+++ b/docs/corpora.md
@@ -1,36 +1,31 @@
 # Corpus & Benchmarks
 
-The ks-xlsx-parser test bench is split into two tiers.
+ks-xlsx-parser benchmarks against public corpora that are downloaded on demand —
+nothing large is committed to the repo.
 
-## 1. `testBench/` — checked into the repo
+## Primary corpus — SpreadsheetBench v0.1
 
-A 1053-workbook corpus shipped with every clone, exercising the full extraction
-spec. Round-tripped on every CI run. See [`testBench/README.md`](../testBench/README.md)
-for the layout.
+912 instruction × xlsx tasks (5,458 unique workbooks) covering financial models,
+project trackers, HR records, scientific data, and a long tail of small-business
+spreadsheets. Each task ships with an `instruction`, a `data_position`, and
+(usually) an `answer_position`, which gives us ground truth for retrieval recall.
 
 ```bash
-make testbench-build   # regenerate the 1000-file `generated/` subtree
-make testbench         # parse every workbook, record failures to metrics/testbench/
-make testbench-zip     # package as a GitHub release asset
+make corpus-download    # fetch SpreadsheetBench + a few smaller corpora under data/corpora/
+make bench-robust       # parse-success rate + structural counts vs Docling (~20 min)
+make bench-retrieval    # top-k retrieval recall + table fragmentation rate vs Docling (~40 min)
 ```
 
-## 2. External public corpora — downloaded on demand
+Reports land in `tests/benchmarks/reports/<timestamp>_<git-sha>/`. The headline
+numbers and methodology live in
+[`tests/benchmarks/reports/COMPARISON.md`](../tests/benchmarks/reports/COMPARISON.md).
 
-Heavier public datasets (EUSES, Enron `.xlsx` subset, SheetJS/openpyxl samples)
-stay out of git and download under `tests/fixtures/corpus/`.
+## Other public corpora — opt-in robustness
 
-```bash
-make corpus-download                    # fetch external corpora
-python -m pytest -m corpus -v           # opt-in robustness run
-```
-
-## Enterprise scorecard (runs by default)
+`scripts/download_corpora.sh` also fetches a handful of smaller xlsx corpora
+(EUSES, Enron `.xlsx` subset, SheetJS / openpyxl samples) under
+`data/corpora/`. These are useful for spot-checking specific failure modes.
 
 ```bash
-python -m pytest tests/test_enterprise_scoring.py -v
+python -m pytest -m corpus -v    # opt-in robustness run against external corpora
 ```
-
-Four small deterministic fixtures under `testBench/enterprise/` are regenerated
-if missing by `scripts/generate_enterprise_fixtures.py`. Per-file scorecards
-are written to `metrics/corpus/`; git ignores the `metrics/` tree so CI can
-upload the artifacts without polluting history.
diff --git a/docs/launch/MEDIUM_ARTICLE.md b/docs/launch/MEDIUM_ARTICLE.md
index f882c6c..1b76ccb 100644
--- a/docs/launch/MEDIUM_ARTICLE.md
+++ b/docs/launch/MEDIUM_ARTICLE.md
@@ -106,7 +106,7 @@ Prepping the library for the public release, we hit two bottlenecks that are int
 
 `detect_circular_refs()` on the dependency graph is O(V+E) with DFS + memoisation. Fine. But our chunk builder was calling it **once per chunk** inside `_build_dependency_summary()`, because every chunk's `has_circular` flag needed the global cycle set.
 
-On a small workbook: invisible. On a 13-sheet, 21k-cell real-world financial model (Walbridge Coatings, now our favourite regression fixture): **115 chunks × ~2.6 s each = 307 s of CPU.** The chunker was dominating the parse.
+On a small workbook: invisible. On a 13-sheet, 21k-cell real-world financial model: **115 chunks × ~2.6 s each = 307 s of CPU.** The chunker was dominating the parse.
 
 The fix is almost embarrassing:
 
diff --git a/docs/launch/RELEASE_NOTES_v0.1.1.md b/docs/launch/RELEASE_NOTES_v0.1.1.md
index e5ef79c..dc9ccd4 100644
--- a/docs/launch/RELEASE_NOTES_v0.1.1.md
+++ b/docs/launch/RELEASE_NOTES_v0.1.1.md
@@ -24,8 +24,8 @@ ecosystem. Now open for the rest of the world.
   asset attached to this release.
 - ⚡ **Parser perf fixes** — real-world workbooks that used to hang now
   finish in under a second.
-  - Cached `detect_circular_refs()` per workbook: Walbridge Coatings
-    **307 s → 4.6 s (66×)**.
+  - Cached `detect_circular_refs()` per workbook: real 21k-cell financial
+    model **307 s → 4.6 s (66×)**.
   - Sparse-cell iteration: files with two non-empty cells at `A1` and
     `XFD1048576` drop from 60 s timeout → **135 ms**.
 - 🧰 **Framework-agnostic** — drops straight into
diff --git a/docs/wiki/Benchmark-vs-hucre.md b/docs/wiki/Benchmark-vs-hucre.md
index 3fe4f1f..246b1b9 100644
--- a/docs/wiki/Benchmark-vs-hucre.md
+++ b/docs/wiki/Benchmark-vs-hucre.md
@@ -24,7 +24,12 @@ Pick `ks-xlsx-parser` for Python LLM / RAG / auditing pipelines.
 
 ---
 
-## Performance — 1053-workbook testBench corpus
+## Performance — historical 1053-workbook curated corpus
+
+> *This page reflects the v0.1.x benchmark run on a curated stress corpus that
+> shipped with earlier releases. Current head benchmarks SpreadsheetBench
+> (5,458 real-world workbooks); see
+> [COMPARISON.md](https://github.com/knowledgestack/ks-xlsx-parser/blob/main/tests/benchmarks/reports/COMPARISON.md).*
 
 Same machine, same run, same OS page cache. `parse_workbook(mode="fast")`
 is the apples-to-apples configuration for hucre's read-only path (it skips
@@ -38,7 +43,7 @@ metadata feature hucre extracts).
 | P99 parse time | **30.2 ms** | 469 ms | 246 ms |
 | mean parse time | **2.7 ms** | 73.9 ms | 39.5 ms |
 | total wall-clock | **2.8 s** | 77.8 s | 41.6 s |
-| Walbridge Coatings<br>(17.6k formulas, worst real-world file) | **139 ms** | 1413 ms | 686 ms |
+| Worst real-world file<br>(17.6k formulas) | **139 ms** | 1413 ms | 686 ms |
 
 ### Ratio to hucre
 
@@ -101,9 +106,9 @@ On every feature **both** parsers extract, the drift is zero or near-zero:
 | comments | 486 | 486 | **0** |
 | named ranges | 822 | 809 | 1.6% (tracked) |
 
-The 22-formula disagreement is dominated by one workbook
-(`real_world/Walbridge Coatings 8.9.23.xlsx`) where we parse 16 formulas
-that hucre misses — we surface this in the drift report, not hide it.
+The 22-formula disagreement is dominated by one real-world workbook where
+we parse 16 formulas that hucre misses — we surface this in the drift
+report, not hide it.
 
 The cell-count difference on adversarial merge-heavy files (we emit ~50%
 more rows) is a **methodology difference**: `ks-xlsx-parser` counts every
@@ -119,7 +124,7 @@ Every perf change in `ks-xlsx-parser` has to pass, in order:
 
 1. The **1631-test pytest suite** (unit + integration + corpus-slice)
 2. **Cross-validation** against [`calamine`](https://github.com/tafia/calamine) — the Rust reference parser — on a golden fixture set
-3. **Zero regressions** on the 1053-file testBench across eight sub-corpora (`real_world/`, `enterprise/`, `github_datasets/`, `stress/curated/`, `stress/merges/`, `generated/matrix/`, `generated/combo/`, `generated/adversarial/`)
+3. **Zero regressions** on the SpreadsheetBench robustness baseline (5,458 real-world workbooks)
 4. **Feature-count stability** vs. the hucre benchmark above
 
 That's the order. If a perf change breaks any gate, we don't ship it.
@@ -144,12 +149,16 @@ but the short version:
 cd tests/benchmarks/hucre_node && pnpm install --frozen-lockfile
 cd ../../..
 
+# Download SpreadsheetBench once
+make corpus-download
+
 # Full mode (default)
-python -m tests.benchmarks.vs_hucre --corpus testBench --out tests/benchmarks/reports
+python -m tests.benchmarks.vs_hucre \
+    --corpus data/corpora/spreadsheetbench --out tests/benchmarks/reports
 
 # Fast mode
 KS_PARSE_MODE=fast python -m tests.benchmarks.vs_hucre \
-    --corpus testBench --out tests/benchmarks/reports
+    --corpus data/corpora/spreadsheetbench --out tests/benchmarks/reports
 ```
 
 Outputs (under `tests/benchmarks/reports/<timestamp>_<git-sha>/`):
diff --git a/docs/wiki/Home.md b/docs/wiki/Home.md
index 0285977..997a9a9 100644
--- a/docs/wiki/Home.md
+++ b/docs/wiki/Home.md
@@ -22,8 +22,8 @@ the front-page README so it stays scannable. The code-heavy stuff lives here.
   together, and where to hook in if you want to extend the parser.
 - **[Benchmark vs `hucre`](Benchmark-vs-hucre)** — unbiased head-to-head
   against the [hucre](https://github.com/productdevbook/hucre) TypeScript
-  engine on the 1053-workbook testBench corpus: perf, extraction-count
-  parity, and where each tool wins.
+  engine on the SpreadsheetBench corpus: perf, extraction-count parity,
+  and where each tool wins.
 
 ## Related docs in the main repo
 
@@ -34,7 +34,7 @@ the front-page README so it stays scannable. The code-heavy stuff lives here.
 - [`docs/PARSER_KNOWN_ISSUES.md`](https://github.com/knowledgestack/ks-xlsx-parser/blob/main/docs/PARSER_KNOWN_ISSUES.md) —
   known edge cases and how we handle them.
 - [`docs/corpora.md`](https://github.com/knowledgestack/ks-xlsx-parser/blob/main/docs/corpora.md) —
-  the testBench stress corpus and public-corpus benchmarks.
+  public benchmark corpora (SpreadsheetBench, EUSES, Enron).
 - [`CONTRIBUTING.md`](https://github.com/knowledgestack/ks-xlsx-parser/blob/main/CONTRIBUTING.md) —
   dev loop, PR checklist, community channels.
 - [`CHANGELOG.md`](https://github.com/knowledgestack/ks-xlsx-parser/blob/main/CHANGELOG.md) —
diff --git a/docs/wiki/Pipeline-Internals.md b/docs/wiki/Pipeline-Internals.md
index 8913c05..eb5dcf5 100644
--- a/docs/wiki/Pipeline-Internals.md
+++ b/docs/wiki/Pipeline-Internals.md
@@ -52,7 +52,7 @@ resolve references (cell / range / cross-sheet / table / external).
 
 Circular-reference detection is O(V+E) DFS with memoisation at the
 edge level. It's cached per workbook inside `ChunkBuilder` — running it
-per chunk is how Walbridge Coatings used to take 307 s.
+per chunk is how a real 21k-cell workbook used to take 307 s.
 
 ## 3. Annotate
 
@@ -129,7 +129,6 @@ parser writes the importer for you.
 | Add a verification stage | `verification/stage_verifier.py` |
 | Add a new DTO field | `models/*.py` (+ serializer + renderer) |
 
-When in doubt, write the test first — the
-[`testBench/`](https://github.com/knowledgestack/ks-xlsx-parser/tree/main/testBench)
-corpus is the fastest signal that a pipeline change didn't regress
-anything else.
+When in doubt, write the test first — the SpreadsheetBench benchmark
+(`make bench-robust`) is the fastest signal that a pipeline change didn't
+regress anything else.
diff --git a/examples/demo.py b/examples/demo.py
index ceeb75f..dbf6118 100644
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -15,7 +15,7 @@
 from xlsx_parser.pipeline import parse_workbook
 from xlsx_parser.utils.logging_config import configure_logging
 
-EXAMPLES_DIR = Path(__file__).parent.parent / "testBench" / "real_world"
+EXAMPLES_DIR = Path(__file__).parent / "fixtures"
 
 
 def demo_financial_model():
diff --git a/examples/generate_examples.py b/examples/generate_examples.py
index 9fecb1f..8d25e01 100644
--- a/examples/generate_examples.py
+++ b/examples/generate_examples.py
@@ -20,7 +20,7 @@
 from openpyxl.worksheet.datavalidation import DataValidation
 from openpyxl.worksheet.table import Table, TableStyleInfo
 
-EXAMPLES_DIR = Path(__file__).parent.parent / "testBench" / "real_world"
+EXAMPLES_DIR = Path(__file__).parent / "fixtures"
 EXAMPLES_DIR.mkdir(parents=True, exist_ok=True)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 74f125e..a425853 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,10 +75,8 @@ markers = [
     "invariant: structural invariant tests",
     "corpus: external corpus tests (skipped by default)",
     "slow: tests taking >10 seconds",
-    "enterprise: enterprise scorecard benchmarks",
-    "testbench: 1000-file testBench round-trip (skipped by default; run with -m testbench)",
 ]
-addopts = "-m 'not corpus and not testbench'"
+addopts = "-m 'not corpus'"
 
 [tool.setuptools.packages.find]
 where = ["src"]
@@ -86,7 +84,7 @@ where = ["src"]
 [tool.ruff]
 line-length = 110
 target-version = "py310"
-extend-exclude = ["testBench", "examples/stress_test", "dist", "build"]
+extend-exclude = ["examples/stress_test", "dist", "build"]
 
 [tool.ruff.lint]
 select = [
diff --git a/scripts/build_testbench.py b/scripts/build_testbench.py
deleted file mode 100644
index 29f3535..0000000
--- a/scripts/build_testbench.py
+++ /dev/null
@@ -1,1667 +0,0 @@
-#!/usr/bin/env python3
-"""
-build_testbench.py — deterministic generator for the ks-xlsx-parser testBench.
-
-Produces ~1000 `.xlsx` workbooks under ``testBench/generated/`` organised into
-three groups:
-
-* ``matrix/``       — one feature-per-file across every knob the parser exercises
-                      (formulas, merges, named ranges, CF, DV, tables, charts,
-                      styles, dates, errors, hidden rows/cols, hyperlinks,
-                      comments, rich text, number formats, edge addresses,
-                      array formulas, 3D refs, pivot placeholders, huge sheet
-                      names).
-* ``combo/``        — randomised combinations of the above at five density
-                      levels (5/10/25/50/100 operations per file) × 80 seeds.
-* ``adversarial/``  — files engineered to break parsers: circular formulas,
-                      deep formula chains, 1M-row sparse sheets, 255-sheet
-                      workbooks, unicode/RTL/emoji stress, oversized merges,
-                      broken references, long formula strings.
-
-Usage
------
-
-    python scripts/build_testbench.py            # builds everything
-    python scripts/build_testbench.py --force    # regenerates even if present
-    python scripts/build_testbench.py --group matrix
-    python scripts/build_testbench.py --limit 50 # first 50 files only (smoke)
-
-The generator is fully deterministic: identical invocations produce
-byte-identical files (modulo openpyxl's own timestamping, which we neutralise).
-Every file is accompanied by one row in ``testBench/generated/MANIFEST.json``
-describing its group, feature tags, expected cell count, and SHA256.
-"""
-
-
-import argparse
-import hashlib
-import json
-import random
-import string
-import sys
-from collections.abc import Callable
-from dataclasses import dataclass, field
-from datetime import date, datetime, time
-from pathlib import Path
-
-from openpyxl import Workbook
-from openpyxl.chart import (
-    AreaChart,
-    BarChart,
-    BubbleChart,
-    LineChart,
-    PieChart,
-    RadarChart,
-    Reference,
-    ScatterChart,
-)
-from openpyxl.comments import Comment
-from openpyxl.formatting.rule import (
-    CellIsRule,
-    ColorScaleRule,
-    DataBarRule,
-    FormulaRule,
-    IconSetRule,
-    Rule,
-)
-from openpyxl.styles import (
-    Alignment,
-    Border,
-    Font,
-    PatternFill,
-    Side,
-)
-from openpyxl.utils import get_column_letter
-from openpyxl.workbook.defined_name import DefinedName
-from openpyxl.worksheet.datavalidation import DataValidation
-from openpyxl.worksheet.table import Table, TableStyleInfo
-
-ROOT = Path(__file__).resolve().parent.parent
-OUT_ROOT = ROOT / "testBench" / "generated"
-MANIFEST_PATH = OUT_ROOT / "MANIFEST.json"
-
-# ----------------------------------------------------------------------------
-# Data classes
-# ----------------------------------------------------------------------------
-
-
-@dataclass
-class GeneratedFile:
-    path: Path
-    group: str
-    features: list[str] = field(default_factory=list)
-    expected_sheets: int = 1
-    expected_cells: int = 0
-    expected_formulas: int = 0
-    notes: str = ""
-
-    def to_manifest_row(self) -> dict:
-        return {
-            "path": str(self.path.relative_to(OUT_ROOT)),
-            "group": self.group,
-            "features": self.features,
-            "expected_sheets": self.expected_sheets,
-            "expected_cells": self.expected_cells,
-            "expected_formulas": self.expected_formulas,
-            "sha256": sha256_of(self.path),
-            "size_bytes": self.path.stat().st_size,
-            "notes": self.notes,
-        }
-
-
-def sha256_of(path: Path) -> str:
-    h = hashlib.sha256()
-    with path.open("rb") as f:
-        for chunk in iter(lambda: f.read(65536), b""):
-            h.update(chunk)
-    return h.hexdigest()
-
-
-def _finalize(wb: Workbook, out: Path) -> None:
-    """Save workbook with deterministic metadata."""
-    wb.properties.created = datetime(2025, 1, 1, 0, 0, 0)
-    wb.properties.modified = datetime(2025, 1, 1, 0, 0, 0)
-    wb.properties.creator = "ks-xlsx-parser testBench generator"
-    wb.properties.title = out.stem
-    out.parent.mkdir(parents=True, exist_ok=True)
-    wb.save(out)
-
-
-# ----------------------------------------------------------------------------
-# Matrix group — one feature per file
-# ----------------------------------------------------------------------------
-
-
-MATRIX_DIR = OUT_ROOT / "matrix"
-
-
-def _matrix_path(slug: str) -> Path:
-    return MATRIX_DIR / f"{slug}.xlsx"
-
-
-# --- formulas -------------------------------------------------------------
-
-FORMULA_RECIPES: list[tuple[str, str, str]] = [
-    # (slug, label, formula expression — evaluated in B1 with constants in A1:A5)
-    ("formula_sum", "SUM", "=SUM(A1:A5)"),
-    ("formula_average", "AVERAGE", "=AVERAGE(A1:A5)"),
-    ("formula_min_max", "MIN/MAX", "=MAX(A1:A5)-MIN(A1:A5)"),
-    ("formula_count", "COUNT", "=COUNT(A1:A5)"),
-    ("formula_counta", "COUNTA", "=COUNTA(A1:A5)"),
-    ("formula_sumif", "SUMIF", "=SUMIF(A1:A5,\">2\")"),
-    ("formula_sumifs", "SUMIFS", "=SUMIFS(A1:A5,A1:A5,\">1\",A1:A5,\"<5\")"),
-    ("formula_countif", "COUNTIF", "=COUNTIF(A1:A5,\">2\")"),
-    ("formula_countifs", "COUNTIFS", "=COUNTIFS(A1:A5,\">0\",A1:A5,\"<5\")"),
-    ("formula_averageif", "AVERAGEIF", "=AVERAGEIF(A1:A5,\">1\")"),
-    ("formula_if_basic", "IF", "=IF(A1>2,\"big\",\"small\")"),
-    ("formula_if_nested", "nested IF", "=IF(A1>4,\"high\",IF(A1>2,\"mid\",\"low\"))"),
-    ("formula_ifs", "IFS", "=IFS(A1>4,\"high\",A1>2,\"mid\",TRUE,\"low\")"),
-    ("formula_ifna", "IFNA", "=IFNA(VLOOKUP(99,A1:B5,2,FALSE),\"missing\")"),
-    ("formula_iferror", "IFERROR", "=IFERROR(1/0,\"err\")"),
-    ("formula_and_or_not", "AND/OR/NOT", "=AND(A1>0,OR(A2>0,NOT(A3<0)))"),
-    ("formula_concat", "CONCAT", "=CONCAT(A1,\"-\",A2)"),
-    ("formula_textjoin", "TEXTJOIN", "=TEXTJOIN(\",\",TRUE,A1:A5)"),
-    ("formula_left_right_mid", "LEFT/RIGHT/MID", "=LEFT(\"abcdef\",3)&RIGHT(\"abcdef\",2)&MID(\"abcdef\",3,2)"),
-    ("formula_substitute", "SUBSTITUTE", "=SUBSTITUTE(\"foo-bar\",\"-\",\"_\")"),
-    ("formula_find_search", "FIND/SEARCH", "=FIND(\"b\",\"foobar\")+SEARCH(\"B\",\"foobar\")"),
-    ("formula_len_trim", "LEN/TRIM", "=LEN(TRIM(\"  hi  \"))"),
-    ("formula_upper_lower_proper", "case fns", "=UPPER(\"a\")&LOWER(\"B\")&PROPER(\"hello world\")"),
-    ("formula_round_roundup_rounddown", "ROUND*", "=ROUND(A1,1)+ROUNDUP(A1,0)+ROUNDDOWN(A1,0)"),
-    ("formula_int_mod", "INT/MOD", "=INT(A1)+MOD(A1,2)"),
-    ("formula_abs_sign", "ABS/SIGN", "=ABS(-5)+SIGN(A1)"),
-    ("formula_sqrt_power", "SQRT/POWER", "=SQRT(16)+POWER(A1,2)"),
-    ("formula_log_ln_exp", "LOG/LN/EXP", "=LOG(10)+LN(EXP(1))"),
-    ("formula_date_functions", "DATE fns", "=YEAR(TODAY())+MONTH(TODAY())+DAY(TODAY())"),
-    ("formula_datedif", "DATEDIF", "=DATEDIF(DATE(2020,1,1),DATE(2025,1,1),\"Y\")"),
-    ("formula_edate_eomonth", "EDATE/EOMONTH", "=EDATE(DATE(2020,1,1),12)+EOMONTH(DATE(2020,1,1),3)"),
-    ("formula_weekday_workday", "WEEKDAY/WORKDAY", "=WEEKDAY(TODAY())+WORKDAY(TODAY(),5)"),
-    ("formula_vlookup", "VLOOKUP", "=VLOOKUP(A1,A1:B5,2,FALSE)"),
-    ("formula_hlookup", "HLOOKUP", "=HLOOKUP(A1,A1:E2,2,FALSE)"),
-    ("formula_xlookup", "XLOOKUP", "=XLOOKUP(A1,A1:A5,B1:B5,\"not found\")"),
-    ("formula_index_match", "INDEX/MATCH", "=INDEX(A1:A5,MATCH(A2,A1:A5,0))"),
-    ("formula_offset", "OFFSET", "=OFFSET(A1,2,0)"),
-    ("formula_indirect", "INDIRECT", "=INDIRECT(\"A\"&2)"),
-    ("formula_rank", "RANK", "=RANK(A1,A1:A5,0)"),
-    ("formula_large_small", "LARGE/SMALL", "=LARGE(A1:A5,2)+SMALL(A1:A5,2)"),
-    ("formula_choose", "CHOOSE", "=CHOOSE(2,\"a\",\"b\",\"c\")"),
-    ("formula_switch", "SWITCH", "=SWITCH(A1,1,\"one\",2,\"two\",\"other\")"),
-    ("formula_array_cse", "array CSE", "{=SUM(A1:A5*A1:A5)}"),
-    ("formula_long", "8000-char expression", "=" + "+".join(f"A{((i % 5) + 1)}" for i in range(400))),
-]
-
-
-def build_formula_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    for slug, label, formula in FORMULA_RECIPES:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Formula"
-        for i in range(1, 6):
-            ws.cell(row=i, column=1, value=i * 1.5)
-        ws["B1"] = formula
-        ws["D1"] = f"Test: {label}"
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/formula",
-                features=["formula", slug.replace("formula_", "")],
-                expected_cells=7,
-                expected_formulas=1,
-            )
-        )
-    return files
-
-
-# --- merged cells ---------------------------------------------------------
-
-
-def build_merge_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    recipes = [
-        ("merge_horizontal_small", [("A1:C1",)]),
-        ("merge_horizontal_wide", [(f"A1:{get_column_letter(20)}1",)]),
-        ("merge_vertical_small", [("A1:A5",)]),
-        ("merge_vertical_tall", [("A1:A100",)]),
-        ("merge_rectangular", [("A1:E5",)]),
-        ("merge_many_horizontal", [(f"A{r}:C{r}",) for r in range(1, 51)]),
-        ("merge_many_vertical", [(f"{get_column_letter(c)}1:{get_column_letter(c)}30",) for c in range(1, 11)]),
-        ("merge_grid_5x5", [(f"{get_column_letter(2*c-1)}{2*r-1}:{get_column_letter(2*c)}{2*r}",) for r in range(1, 6) for c in range(1, 6)]),
-        ("merge_diagonal_steps", [(f"{get_column_letter(2*i-1)}{2*i-1}:{get_column_letter(2*i)}{2*i}",) for i in range(1, 8)]),
-        ("merge_header_3_levels", [("A1:F1",), ("A2:C2",), ("D2:F2",), ("A3:B3",), ("C3:C3",), ("D3:E3",), ("F3:F3",)]),
-        ("merge_with_value_only_in_master", [("A1:C3",)]),
-        ("merge_around_data", [("A1:C1",), ("A5:C5",)]),
-        ("merge_single_cell_noop", [("A1:A1",)]),  # degenerate
-        ("merge_adjacent_row_pair", [("A1:B1",), ("A2:B2",)]),
-        ("merge_wide_header_narrow_data", [("A1:J1",)]),
-        ("merge_mixed_sizes", [("A1:B2",), ("C1:E1",), ("A4:A10",), ("D4:F6",)]),
-        ("merge_100_singletons", [(f"{get_column_letter(((i-1) % 20)+1)}{((i-1)//20)+1}:{get_column_letter(((i-1) % 20)+1)}{((i-1)//20)+1}",) for i in range(1, 101)]),
-        ("merge_full_row", [("A1:Z1",)]),
-        ("merge_full_column_short", [("A1:A50",)]),
-        ("merge_nonadjacent_blocks", [("A1:C3",), ("F1:H3",), ("A5:C7",), ("F5:H7",)]),
-        ("merge_within_table_header", [("A1:D1",)]),  # we'll add a table below
-        ("merge_empty_range", [("B2:D4",)]),  # no data in master
-        ("merge_unicode_content", [("A1:C1",)]),
-        ("merge_with_rich_formatting", [("A1:C1",)]),
-        ("merge_column_header_stack", [("A1:A2",), ("B1:B2",), ("C1:C2",)]),
-        ("merge_report_grid", [("A1:D1",), ("A2:A10",), ("B2:D2",), ("B3:B10",), ("C3:D3",)]),
-        ("merge_large_single", [("A1:Z100",)]),
-        ("merge_thousand_cells", [("A1:J100",)]),
-        ("merge_within_table_footer", [("A11:D11",)]),
-        ("merge_spanning_formula_range", [("A1:C1",)]),
-    ]
-    for slug, ranges in recipes:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Merges"
-        for i, (rng,) in enumerate(ranges):
-            anchor = rng.split(":")[0]
-            try:
-                ws[anchor] = f"m{i+1}"  # must write before merging; skip if cell is already merged
-            except AttributeError:
-                pass
-            try:
-                ws.merge_cells(rng)
-            except Exception:
-                pass
-        if slug == "merge_with_value_only_in_master":
-            ws["A1"] = "only-master"
-        if slug == "merge_within_table_header":
-            for c, h in enumerate(["a", "b", "c", "d"], 1):
-                ws.cell(row=2, column=c, value=h)
-            for r in range(3, 8):
-                for c in range(1, 5):
-                    ws.cell(row=r, column=c, value=r * c)
-            ws.add_table(Table(displayName="T1", ref="A2:D7"))
-        if slug == "merge_unicode_content":
-            ws["A1"] = "éñÜ日本語 🚀 حرف"
-        if slug == "merge_with_rich_formatting":
-            ws["A1"].font = Font(bold=True, size=14, color="FF0000")
-            ws["A1"].fill = PatternFill("solid", start_color="FFFF00")
-            ws["A1"].alignment = Alignment(horizontal="center", vertical="center")
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/merge",
-                features=["merged_cells", slug],
-                expected_cells=len(ranges),
-            )
-        )
-    return files
-
-
-# --- named ranges ---------------------------------------------------------
-
-
-def build_named_range_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    recipes = [
-        ("named_workbook_scope", "Total", "Sheet1!$A$1", None),
-        ("named_sheet_scope", "SheetLocal", "Sheet1!$B$1", "Sheet1"),
-        ("named_constant", "TaxRate", "0.07", None),
-        ("named_range_multi_cell", "Prices", "Sheet1!$A$1:$A$10", None),
-        ("named_formula", "Doubled", "Sheet1!$A$1*2", None),
-        ("named_with_unicode", "Mẹtá", "Sheet1!$A$1", None),
-        ("named_long_identifier", "very_long_identifier_" + "x" * 50, "Sheet1!$A$1", None),
-        ("named_escaped_sheet", "Quoted", "'Sheet 2'!$A$1", None),  # needs 'Sheet 2'
-        ("named_external_like", "ExternalLike", "[Budget.xlsx]Sheet1!$A$1", None),
-        ("named_list_variation", "ChoiceList", "Sheet1!$D$1:$D$5", None),
-        ("named_col_range", "FullColumn", "Sheet1!$A:$A", None),
-        ("named_row_range", "FullRow", "Sheet1!$1:$1", None),
-        ("named_cross_sheet", "CrossRef", "Other!$A$1", None),  # needs Other sheet
-        ("named_multi_area", "Islands", "Sheet1!$A$1,Sheet1!$C$3", None),
-        ("named_with_hash_prefix", "_Prefix", "Sheet1!$A$1", None),
-        ("named_digits", "X1", "Sheet1!$A$1", None),
-        ("named_empty_formula_error", "ErrRef", "#REF!", None),
-        ("named_boolean_constant", "IsOn", "TRUE", None),
-        ("named_string_constant", "Greeting", '"hello"', None),
-        ("named_table_column_ref", "TableCol", "Table1[Value]", None),  # needs table
-    ]
-    for slug, name, ref, scope in recipes:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Sheet1"
-        for i in range(1, 11):
-            ws.cell(row=i, column=1, value=i)
-            ws.cell(row=i, column=4, value=f"item{i}")
-        if scope == "Sheet1":
-            ws.defined_names.add(DefinedName(name, attr_text=ref))
-        elif slug == "named_escaped_sheet":
-            wb.create_sheet("Sheet 2")["A1"] = 42
-            wb.defined_names.add(DefinedName(name, attr_text=ref))
-        elif slug == "named_cross_sheet":
-            wb.create_sheet("Other")["A1"] = 99
-            wb.defined_names.add(DefinedName(name, attr_text=ref))
-        elif slug == "named_table_column_ref":
-            for c, h in enumerate(["ID", "Value"], 1):
-                ws.cell(row=1, column=c, value=h)
-            for r in range(2, 6):
-                ws.cell(row=r, column=1, value=r)
-                ws.cell(row=r, column=2, value=r * 10)
-            ws.add_table(Table(displayName="Table1", ref="A1:B5"))
-            wb.defined_names.add(DefinedName(name, attr_text=ref))
-        else:
-            wb.defined_names.add(DefinedName(name, attr_text=ref))
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/named_range",
-                features=["named_range", slug],
-                expected_cells=14,
-            )
-        )
-    return files
-
-
-# --- data validation ------------------------------------------------------
-
-
-def build_data_validation_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    recipes = [
-        ("dv_list_literal", {"type": "list", "formula1": '"Red,Green,Blue"'}),
-        ("dv_list_range", {"type": "list", "formula1": "=$D$1:$D$5"}),
-        ("dv_whole_between", {"type": "whole", "operator": "between", "formula1": "1", "formula2": "100"}),
-        ("dv_decimal_gt", {"type": "decimal", "operator": "greaterThan", "formula1": "0.5"}),
-        ("dv_date_after", {"type": "date", "operator": "greaterThan", "formula1": "DATE(2024,1,1)"}),
-        ("dv_time_before", {"type": "time", "operator": "lessThan", "formula1": "TIME(12,0,0)"}),
-        ("dv_textlength", {"type": "textLength", "operator": "lessThan", "formula1": "10"}),
-        ("dv_custom", {"type": "custom", "formula1": "=A1>0"}),
-        ("dv_list_unicode", {"type": "list", "formula1": '"红,绿,蓝"'}),
-        ("dv_list_one_item", {"type": "list", "formula1": '"Only"'}),
-        ("dv_list_many_items", {"type": "list", "formula1": '"' + ",".join(f"opt{i}" for i in range(1, 31)) + '"'}),
-        ("dv_with_error_message", {"type": "list", "formula1": '"A,B"', "error": "pick A or B", "errorTitle": "Err"}),
-        ("dv_with_prompt", {"type": "list", "formula1": '"A,B"', "prompt": "select letter", "promptTitle": "Hint"}),
-        ("dv_ignore_blank", {"type": "list", "formula1": '"A,B"', "allowBlank": True}),
-        ("dv_multiple_ranges", {"type": "list", "formula1": '"A,B"'}),  # will apply to multiple ranges
-        ("dv_whole_equal", {"type": "whole", "operator": "equal", "formula1": "42"}),
-        ("dv_date_between", {"type": "date", "operator": "between", "formula1": "DATE(2020,1,1)", "formula2": "DATE(2025,12,31)"}),
-        ("dv_decimal_not_between", {"type": "decimal", "operator": "notBetween", "formula1": "0", "formula2": "1"}),
-        ("dv_textlength_greater", {"type": "textLength", "operator": "greaterThan", "formula1": "3"}),
-        ("dv_custom_cross_cell", {"type": "custom", "formula1": "=AND(A1>0,B1<100)"}),
-    ]
-    for slug, kwargs in recipes:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "DV"
-        for r in range(1, 6):
-            ws.cell(row=r, column=4, value=f"Option{r}")
-        dv_kwargs = {k: v for k, v in kwargs.items() if k not in {"error", "errorTitle", "prompt", "promptTitle", "allowBlank"}}
-        dv = DataValidation(**dv_kwargs)
-        if "error" in kwargs:
-            dv.error = kwargs["error"]
-            dv.errorTitle = kwargs.get("errorTitle", "Err")
-            dv.showErrorMessage = True
-        if "prompt" in kwargs:
-            dv.prompt = kwargs["prompt"]
-            dv.promptTitle = kwargs.get("promptTitle", "Hint")
-            dv.showInputMessage = True
-        if kwargs.get("allowBlank"):
-            dv.allowBlank = True
-        ws.add_data_validation(dv)
-        if slug == "dv_multiple_ranges":
-            dv.add("A1:A5")
-            dv.add("C1:C5")
-        else:
-            dv.add("A1:A10")
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/data_validation",
-                features=["data_validation", slug],
-                expected_cells=5,
-            )
-        )
-    return files
-
-
-# --- conditional formatting -----------------------------------------------
-
-
-def build_conditional_formatting_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-
-    def _seed_ws(ws):
-        for r in range(1, 11):
-            ws.cell(row=r, column=1, value=r)
-            ws.cell(row=r, column=2, value=11 - r)
-            ws.cell(row=r, column=3, value=(r * 7) % 10)
-
-    recipes: list[tuple[str, Callable[[object], None]]] = [
-        ("cf_cellis_greater", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            CellIsRule(operator="greaterThan", formula=["5"], fill=PatternFill("solid", start_color="FFC7CE")),
-        )),
-        ("cf_cellis_less", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            CellIsRule(operator="lessThan", formula=["3"], fill=PatternFill("solid", start_color="C6EFCE")),
-        )),
-        ("cf_cellis_between", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            CellIsRule(operator="between", formula=["3", "7"], fill=PatternFill("solid", start_color="FFEB9C")),
-        )),
-        ("cf_color_scale_2", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            ColorScaleRule(start_type="min", start_color="FFAA0000",
-                           end_type="max", end_color="FF00AA00"),
-        )),
-        ("cf_color_scale_3", lambda ws: ws.conditional_formatting.add(
-            "B1:B10",
-            ColorScaleRule(start_type="min", start_color="FFAA0000",
-                           mid_type="percentile", mid_value=50, mid_color="FFFFFFFF",
-                           end_type="max", end_color="FF00AA00"),
-        )),
-        ("cf_databar", lambda ws: ws.conditional_formatting.add(
-            "C1:C10",
-            DataBarRule(start_type="min", end_type="max", color="FF638EC6"),
-        )),
-        ("cf_iconset_3traffic", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            IconSetRule("3TrafficLights1", "percent", [0, 33, 67]),
-        )),
-        ("cf_iconset_5arrows", lambda ws: ws.conditional_formatting.add(
-            "B1:B10",
-            IconSetRule("5Arrows", "percent", [0, 20, 40, 60, 80]),
-        )),
-        ("cf_formula_rule", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            FormulaRule(formula=["MOD(ROW(),2)=0"], fill=PatternFill("solid", start_color="DDDDDD")),
-        )),
-        # Note: omit dxfId; openpyxl cannot round-trip Rule(dxfId=0) unless
-        # the differential style table has a matching entry.
-        ("cf_top10", lambda ws: ws.conditional_formatting.add(
-            "A1:C10", Rule(type="top10", rank=3),
-        )),
-        ("cf_unique_values", lambda ws: ws.conditional_formatting.add(
-            "A1:A10", Rule(type="uniqueValues"),
-        )),
-        ("cf_duplicate_values", lambda ws: ws.conditional_formatting.add(
-            "A1:A10", Rule(type="duplicateValues"),
-        )),
-        ("cf_contains_text", lambda ws: ws.conditional_formatting.add(
-            "A1:A10", Rule(type="containsText", operator="containsText", text="5"),
-        )),
-        ("cf_above_average", lambda ws: ws.conditional_formatting.add(
-            "A1:A10", Rule(type="aboveAverage", aboveAverage=True),
-        )),
-        ("cf_below_average", lambda ws: ws.conditional_formatting.add(
-            "A1:A10", Rule(type="aboveAverage", aboveAverage=False),
-        )),
-        ("cf_multiple_rules_same_range", lambda ws: (
-            ws.conditional_formatting.add("A1:A10", CellIsRule(operator="greaterThan", formula=["7"], fill=PatternFill("solid", start_color="FF0000"))),
-            ws.conditional_formatting.add("A1:A10", CellIsRule(operator="lessThan", formula=["3"], fill=PatternFill("solid", start_color="00FF00"))),
-        )),
-        ("cf_overlapping_ranges", lambda ws: (
-            ws.conditional_formatting.add("A1:B5", ColorScaleRule(start_type="min", start_color="FFFF0000", end_type="max", end_color="FF00FF00")),
-            ws.conditional_formatting.add("B3:C10", DataBarRule(start_type="min", end_type="max", color="FF0000FF")),
-        )),
-        ("cf_single_cell", lambda ws: ws.conditional_formatting.add(
-            "A1", CellIsRule(operator="equal", formula=["1"], fill=PatternFill("solid", start_color="FFFF00")),
-        )),
-        ("cf_large_range", lambda ws: ws.conditional_formatting.add(
-            "A1:Z100", CellIsRule(operator="greaterThan", formula=["0"], fill=PatternFill("solid", start_color="EEEEEE")),
-        )),
-        ("cf_entire_column", lambda ws: ws.conditional_formatting.add(
-            "A1:A1048576", CellIsRule(operator="greaterThan", formula=["5"], fill=PatternFill("solid", start_color="FFC7CE")),
-        )),
-        ("cf_formula_complex", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            FormulaRule(formula=["AND(A1>3,A1<8)"], fill=PatternFill("solid", start_color="99FF99")),
-        )),
-        ("cf_iconset_3signs", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            IconSetRule("3Signs", "percent", [0, 33, 67]),
-        )),
-        ("cf_iconset_4ratings", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            IconSetRule("4Rating", "percent", [0, 25, 50, 75]),
-        )),
-        ("cf_color_scale_percentile", lambda ws: ws.conditional_formatting.add(
-            "A1:A10",
-            ColorScaleRule(start_type="percentile", start_value=10, start_color="FF0000FF",
-                           end_type="percentile", end_value=90, end_color="FFFF0000"),
-        )),
-        ("cf_databar_negative", lambda ws: ws.conditional_formatting.add(
-            "C1:C10",
-            DataBarRule(start_type="min", end_type="max", color="FFFF0000", showValue=False),
-        )),
-    ]
-
-    for slug, apply in recipes:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "CF"
-        _seed_ws(ws)
-        apply(ws)
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/conditional_formatting",
-                features=["conditional_formatting", slug],
-                expected_cells=30,
-            )
-        )
-    return files
-
-
-# --- tables ---------------------------------------------------------------
-
-
-def build_table_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    for idx, (rows, cols, style, totals) in enumerate([
-        (3, 2, "TableStyleLight1", False),
-        (10, 3, "TableStyleMedium2", False),
-        (50, 5, "TableStyleMedium9", True),
-        (100, 8, "TableStyleDark1", False),
-        (5, 20, "TableStyleLight9", False),
-        (30, 4, "TableStyleMedium1", True),
-        (3, 1, "TableStyleLight5", False),
-        (3, 26, "TableStyleMedium3", False),
-        (3, 2, None, False),
-        (10, 3, "TableStyleMedium4", True),
-        (200, 6, "TableStyleMedium5", False),
-        (3, 2, "TableStyleLight13", False),
-        (3, 2, "TableStyleLight14", False),
-        (3, 2, "TableStyleLight15", False),
-        (3, 2, "TableStyleLight16", False),
-        (3, 2, "TableStyleLight17", False),
-        (3, 2, "TableStyleLight18", False),
-        (3, 2, "TableStyleLight19", False),
-        (3, 2, "TableStyleLight20", False),
-        (3, 2, "TableStyleLight21", False),
-    ]):
-        slug = f"table_{idx:02d}_{rows}r_{cols}c"
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Table"
-        for c in range(1, cols + 1):
-            ws.cell(row=1, column=c, value=f"H{c}")
-        for r in range(2, rows + 2):
-            for c in range(1, cols + 1):
-                ws.cell(row=r, column=c, value=(r + c) % 97)
-        ref = f"A1:{get_column_letter(cols)}{rows + 1}"
-        tab = Table(displayName=f"Tbl{idx}", ref=ref)
-        if style:
-            tab.tableStyleInfo = TableStyleInfo(name=style, showRowStripes=True)
-        if totals:
-            tab.totalsRowShown = False  # openpyxl can be finicky about totals; keep simple
-        ws.add_table(tab)
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/table",
-                features=["table", f"{rows}r{cols}c"],
-                expected_cells=(rows + 1) * cols,
-            )
-        )
-    return files
-
-
-# --- charts ---------------------------------------------------------------
-
-
-def build_chart_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    chart_types = [
-        ("chart_bar", BarChart, {"type": "col"}),
-        ("chart_bar_stacked", BarChart, {"type": "col", "grouping": "stacked", "overlap": 100}),
-        ("chart_bar_horizontal", BarChart, {"type": "bar"}),
-        ("chart_line", LineChart, {}),
-        ("chart_pie", PieChart, {}),
-        ("chart_area", AreaChart, {}),
-        ("chart_radar", RadarChart, {}),
-        ("chart_scatter", ScatterChart, {}),
-        ("chart_bubble", BubbleChart, {}),
-        ("chart_with_title", BarChart, {"title": "Q1 Sales"}),
-        ("chart_no_title", BarChart, {}),
-        ("chart_many_series", BarChart, {"series_count": 6}),
-        ("chart_one_datapoint", BarChart, {"rows": 2}),
-        ("chart_long_labels", BarChart, {"long_labels": True}),
-        ("chart_unicode_labels", BarChart, {"unicode": True}),
-        ("chart_two_charts_one_sheet", BarChart, {"double": True}),
-        ("chart_chart_plus_table", BarChart, {"with_table": True}),
-        ("chart_line_dashed", LineChart, {"smooth": True}),
-        ("chart_pie_exploded", PieChart, {}),
-        ("chart_scatter_with_lines", ScatterChart, {"scatterStyle": "lineMarker"}),
-    ]
-    for slug, ChartCls, opts in chart_types:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Data"
-        rows = opts.pop("rows", 6)
-        series_count = opts.pop("series_count", 2)
-        long_labels = opts.pop("long_labels", False)
-        unicode_flag = opts.pop("unicode", False)
-        double = opts.pop("double", False)
-        with_table = opts.pop("with_table", False)
-
-        ws.cell(row=1, column=1, value="Label")
-        for s in range(1, series_count + 1):
-            ws.cell(row=1, column=1 + s, value=f"Series{s}")
-        for r in range(2, rows + 1):
-            label = f"Item{r-1}"
-            if long_labels:
-                label = "A very long label " * 5 + str(r)
-            if unicode_flag:
-                label = f"标签{r} 🚀"
-            ws.cell(row=r, column=1, value=label)
-            for s in range(1, series_count + 1):
-                ws.cell(row=r, column=1 + s, value=((r * s * 7) % 50) + 1)
-
-        chart = ChartCls()
-        for k, v in opts.items():
-            try:
-                setattr(chart, k, v)
-            except Exception:
-                pass
-        data = Reference(ws, min_col=2, min_row=1, max_col=1 + series_count, max_row=rows)
-        cats = Reference(ws, min_col=1, min_row=2, max_row=rows)
-        chart.add_data(data, titles_from_data=True)
-        try:
-            chart.set_categories(cats)
-        except Exception:
-            pass
-        ws.add_chart(chart, f"{get_column_letter(series_count + 3)}2")
-
-        if double:
-            chart2 = BarChart()
-            chart2.add_data(data, titles_from_data=True)
-            chart2.set_categories(cats)
-            ws.add_chart(chart2, "H20")
-        if with_table:
-            ws.add_table(Table(displayName="ChartTable", ref=f"A1:{get_column_letter(series_count + 1)}{rows}"))
-
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/chart",
-                features=["chart", slug],
-                expected_cells=rows * (series_count + 1),
-            )
-        )
-    return files
-
-
-# --- rich text / styles / fonts ------------------------------------------
-
-
-def build_style_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    styles = [
-        ("style_bold", lambda c: setattr(c, "font", Font(bold=True))),
-        ("style_italic", lambda c: setattr(c, "font", Font(italic=True))),
-        ("style_underline", lambda c: setattr(c, "font", Font(underline="single"))),
-        ("style_strike", lambda c: setattr(c, "font", Font(strike=True))),
-        ("style_color_red", lambda c: setattr(c, "font", Font(color="FF0000"))),
-        ("style_font_size_24", lambda c: setattr(c, "font", Font(size=24))),
-        ("style_font_family_courier", lambda c: setattr(c, "font", Font(name="Courier New"))),
-        ("style_bg_yellow", lambda c: setattr(c, "fill", PatternFill("solid", start_color="FFFF00"))),
-        ("style_bg_pattern_gray125", lambda c: setattr(c, "fill", PatternFill(patternType="gray125"))),
-        ("style_border_thin_all", lambda c: setattr(c, "border", Border(left=Side(style="thin"), right=Side(style="thin"), top=Side(style="thin"), bottom=Side(style="thin")))),
-        ("style_border_thick_bottom", lambda c: setattr(c, "border", Border(bottom=Side(style="thick")))),
-        ("style_border_dashed", lambda c: setattr(c, "border", Border(top=Side(style="dashed")))),
-        ("style_border_double", lambda c: setattr(c, "border", Border(bottom=Side(style="double")))),
-        ("style_alignment_center", lambda c: setattr(c, "alignment", Alignment(horizontal="center", vertical="center"))),
-        ("style_alignment_wrap", lambda c: setattr(c, "alignment", Alignment(wrap_text=True))),
-        ("style_alignment_rotate_45", lambda c: setattr(c, "alignment", Alignment(text_rotation=45))),
-        ("style_alignment_rotate_90", lambda c: setattr(c, "alignment", Alignment(text_rotation=90))),
-        ("style_indent", lambda c: setattr(c, "alignment", Alignment(indent=3))),
-        ("style_shrink_to_fit", lambda c: setattr(c, "alignment", Alignment(shrink_to_fit=True))),
-        ("style_vertical_text", lambda c: setattr(c, "alignment", Alignment(text_rotation=255))),
-        ("style_combined", lambda c: (
-            setattr(c, "font", Font(bold=True, italic=True, size=16, color="0000FF")),
-            setattr(c, "fill", PatternFill("solid", start_color="FFE0E0")),
-            setattr(c, "alignment", Alignment(horizontal="center", vertical="center", wrap_text=True)),
-            setattr(c, "border", Border(left=Side("thin"), right=Side("thin"), top=Side("medium"), bottom=Side("medium"))),
-        )),
-        ("style_number_format_currency", lambda c: setattr(c, "number_format", "$#,##0.00")),
-        ("style_number_format_percent", lambda c: setattr(c, "number_format", "0.0%")),
-        ("style_number_format_scientific", lambda c: setattr(c, "number_format", "0.00E+00")),
-        ("style_number_format_date_iso", lambda c: setattr(c, "number_format", "yyyy-mm-dd")),
-        ("style_number_format_date_long", lambda c: setattr(c, "number_format", "dddd, mmmm dd, yyyy")),
-        ("style_number_format_time", lambda c: setattr(c, "number_format", "hh:mm:ss")),
-        ("style_number_format_negative_red", lambda c: setattr(c, "number_format", "#,##0;[Red]-#,##0")),
-        ("style_number_format_accounting", lambda c: setattr(c, "number_format", "_($* #,##0.00_)")),
-        ("style_number_format_fraction", lambda c: setattr(c, "number_format", "# ?/?")),
-    ]
-    for slug, apply in styles:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Style"
-        ws["A1"] = "Styled Cell"
-        if "number_format" in slug:
-            ws["A1"] = 1234.567
-            if "date" in slug or "time" in slug:
-                ws["A1"] = datetime(2024, 6, 15, 14, 30, 45)
-        apply(ws["A1"])
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="matrix/style",
-                features=["style", slug],
-                expected_cells=1,
-            )
-        )
-    return files
-
-
-# --- dates & times --------------------------------------------------------
-
-
-def build_date_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    entries = [
-        ("date_today", datetime.now()),
-        ("date_epoch_1900", datetime(1900, 1, 1)),
-        ("date_epoch_1904", datetime(1904, 1, 2)),
-        ("date_y2k", datetime(2000, 1, 1)),
-        ("date_future_2099", datetime(2099, 12, 31)),
-        ("date_leap_year", datetime(2020, 2, 29)),
-        ("date_weird_feb28", datetime(1900, 2, 28)),
-        ("date_first_valid", datetime(1900, 3, 1)),
-        ("date_midnight", datetime(2024, 6, 1, 0, 0, 0)),
-        ("date_nearmidnight", datetime(2024, 6, 1, 23, 59, 59)),
-        ("date_iso_string", "2024-06-15"),
-        ("date_us_string", "06/15/2024"),
-        ("date_eu_string", "15/06/2024"),
-        ("date_just_time", time(13, 30, 0)),
-        ("date_date_only", date(2024, 6, 15)),
-        ("date_with_timedelta_format", datetime(2024, 6, 15)),
-        ("date_mixed_formats_in_column", None),
-        ("date_fractional_days", 44500.5),  # excel serial
-        ("date_negative_serial", -1),  # invalid
-        ("date_text_like_date", "2024-06-15 but not really"),
-    ]
-    for slug, val in entries:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Dates"
-        if slug == "date_mixed_formats_in_column":
-            ws["A1"] = datetime(2024, 1, 1)
-            ws["A2"] = "2024-02-01"
-            ws["A3"] = 44593
-            ws["A4"] = date(2024, 4, 1)
-            ws["A5"] = datetime(2024, 5, 1, 12, 30)
-        else:
-            ws["A1"] = val
-            ws["A1"].number_format = "yyyy-mm-dd hh:mm:ss"
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(path=out, group="matrix/date", features=["date", slug], expected_cells=1),
-        )
-    return files
-
-
-# --- errors ---------------------------------------------------------------
-
-
-def build_error_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    errors = [
-        ("error_div_zero", "=1/0"),
-        ("error_name", "=UNKNOWN_FN()"),
-        ("error_ref", "=#REF!"),
-        ("error_value", "=\"a\"+1"),
-        ("error_num", "=SQRT(-1)"),
-        ("error_null", "=A1 A2"),  # intersection of disjoint ranges
-        ("error_na", "=NA()"),
-        ("error_getting_data", "=VLOOKUP(999,A1:B2,2,FALSE)"),
-        ("error_mixed_with_text", "=IF(TRUE,1/0,\"ok\")"),
-        ("error_chained", "=1/0+2"),
-        ("error_deliberate_bad_ref", "=BadSheet!A1"),
-        ("error_unclosed_paren", "=SUM(A1"),  # may get rewritten by openpyxl
-        ("error_bad_range", "=SUM(A1:)"),
-        ("error_too_many_args", "=IF(1,2,3,4,5)"),
-        ("error_circular_simple", "=A1"),  # A1 refers to itself
-    ]
-    for slug, formula in errors:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Err"
-        try:
-            if slug == "error_circular_simple":
-                ws["A1"] = formula
-            else:
-                ws["A2"] = 1
-                ws["A1"] = formula
-        except Exception:
-            pass  # a few are too malformed even for openpyxl to accept
-        out = _matrix_path(slug)
-        try:
-            _finalize(wb, out)
-        except Exception:
-            continue
-        files.append(
-            GeneratedFile(path=out, group="matrix/error", features=["error", slug], expected_cells=2, expected_formulas=1),
-        )
-    return files
-
-
-# --- hidden rows/cols/sheets ---------------------------------------------
-
-
-def build_hidden_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    specs = [
-        ("hidden_single_row", "row", [3]),
-        ("hidden_single_col", "col", ["B"]),
-        ("hidden_many_rows", "row", list(range(2, 20, 2))),
-        ("hidden_many_cols", "col", ["B", "D", "F", "H"]),
-        ("hidden_first_row", "row", [1]),
-        ("hidden_last_row", "row", [100]),
-        ("hidden_row_at_boundary", "row", [50, 51, 52]),
-        ("hidden_entire_block", "row", list(range(5, 15))),
-        ("hidden_sheet_tab", "sheet", None),
-        ("hidden_very_hidden_sheet", "veryhidden", None),
-        ("hidden_with_outline_group", "outline", None),
-        ("hidden_mixed_rows_cols", "mixed", None),
-    ]
-    for slug, kind, items in specs:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Main"
-        for r in range(1, 30):
-            for c in range(1, 10):
-                ws.cell(row=r, column=c, value=(r + c) % 100)
-        if kind == "row":
-            for r in items:
-                ws.row_dimensions[r].hidden = True
-        elif kind == "col":
-            for col in items:
-                ws.column_dimensions[col].hidden = True
-        elif kind == "sheet":
-            hs = wb.create_sheet("HiddenSheet")
-            hs["A1"] = "hidden content"
-            hs.sheet_state = "hidden"
-        elif kind == "veryhidden":
-            hs = wb.create_sheet("VeryHidden")
-            hs["A1"] = "very hidden"
-            hs.sheet_state = "veryHidden"
-        elif kind == "outline":
-            for r in range(5, 15):
-                ws.row_dimensions[r].outline_level = 1
-                ws.row_dimensions[r].hidden = True
-        elif kind == "mixed":
-            ws.row_dimensions[3].hidden = True
-            ws.row_dimensions[5].hidden = True
-            ws.column_dimensions["C"].hidden = True
-            ws.column_dimensions["E"].hidden = True
-            hs = wb.create_sheet("MixedHidden")
-            hs.sheet_state = "hidden"
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(path=out, group="matrix/hidden", features=["hidden", slug], expected_cells=29 * 9),
-        )
-    return files
-
-
-# --- edge addresses -------------------------------------------------------
-
-
-def build_edge_address_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    specs = [
-        ("addr_xfd1", "XFD1", "lastcol_row1"),
-        ("addr_a1048576", "A1048576", "col_a_lastrow"),
-        ("addr_xfd1048576", "XFD1048576", "last_cell"),
-        ("addr_zz1000", "ZZ1000", "mid_extreme"),
-        ("addr_aaa1", "AAA1", "col_aaa"),
-        ("addr_aa500", "AA500", "col_aa_500"),
-        ("addr_very_sparse", None, "sparse"),
-        ("addr_column_1000", f"{get_column_letter(1000)}1", "col_1000"),
-        ("addr_row_100000", "A100000", "row_100k"),
-        ("addr_gaps", None, "gaps"),
-    ]
-    for slug, addr, kind in specs:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Edge"
-        ws["A1"] = "anchor"
-        if kind == "sparse":
-            ws["A1"] = "tl"
-            ws["XFD1048576"] = "br"
-        elif kind == "gaps":
-            for offset in [0, 100, 1000, 10000]:
-                ws.cell(row=1 + offset, column=1 + min(offset // 100, 50), value=f"v{offset}")
-        elif addr:
-            ws[addr] = f"marker_{slug}"
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(path=out, group="matrix/edge_address", features=["edge_address", slug], expected_cells=2),
-        )
-    return files
-
-
-# --- sheet name variations ------------------------------------------------
-
-
-SHEET_NAME_VARIANTS = [
-    ("sheetname_ascii", "Simple"),
-    ("sheetname_spaces", "Has Spaces"),
-    ("sheetname_quote", "Has'Quote"),
-    ("sheetname_unicode_jp", "日本語シート"),
-    ("sheetname_unicode_emoji", "📊 Sheet"),
-    ("sheetname_leading_digits", "1stSheet"),
-    ("sheetname_long_30chars", "X" * 30),
-    ("sheetname_dash_underscore", "my-sheet_name"),
-    ("sheetname_hash_unicode", "Résumé-2025"),
-    ("sheetname_parens", "Sheet (copy)"),
-]
-
-
-def build_sheet_name_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    for slug, name in SHEET_NAME_VARIANTS:
-        wb = Workbook()
-        ws = wb.active
-        try:
-            ws.title = name[:31]  # Excel limit
-        except Exception:
-            ws.title = "Fallback"
-        ws["A1"] = f"in {name!r}"
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(
-            GeneratedFile(path=out, group="matrix/sheet_name", features=["sheet_name", slug], expected_cells=1),
-        )
-    return files
-
-
-# --- hyperlinks / comments / misc ----------------------------------------
-
-
-def build_misc_files() -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-
-    # hyperlinks
-    hl_specs = [
-        ("hyperlink_external_http", "https://example.com"),
-        ("hyperlink_external_https", "https://www.anthropic.com"),
-        ("hyperlink_mailto", "mailto:test@example.com"),
-        ("hyperlink_file", "file:///tmp/x.txt"),
-        ("hyperlink_internal_cell", "#Sheet1!B5"),
-        ("hyperlink_internal_named", "#NamedRng"),
-        ("hyperlink_many_links", None),
-    ]
-    for slug, url in hl_specs:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Sheet1"
-        if slug == "hyperlink_many_links":
-            for i in range(1, 21):
-                ws.cell(row=i, column=1, value=f"link{i}").hyperlink = f"https://example.com/page/{i}"
-        else:
-            ws["A1"].hyperlink = url
-            ws["A1"].value = f"click ({slug})"
-        if slug == "hyperlink_internal_named":
-            wb.defined_names.add(DefinedName("NamedRng", attr_text="Sheet1!$A$1"))
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(GeneratedFile(path=out, group="matrix/hyperlink", features=["hyperlink", slug], expected_cells=20 if url is None else 1))
-
-    # comments
-    comment_specs = [
-        ("comment_short", "Quick note"),
-        ("comment_multiline", "line1\nline2\nline3"),
-        ("comment_unicode", "注释 🔍 ملاحظة"),
-        ("comment_long", "Note " * 500),
-        ("comment_many_cells", None),
-    ]
-    for slug, text in comment_specs:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Comments"
-        if slug == "comment_many_cells":
-            for i in range(1, 21):
-                ws.cell(row=i, column=1, value=f"c{i}").comment = Comment(f"comment on row {i}", "Builder")
-        else:
-            ws["A1"] = "Cell with comment"
-            ws["A1"].comment = Comment(text, "Builder")
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(GeneratedFile(path=out, group="matrix/comment", features=["comment", slug], expected_cells=20 if text is None else 1))
-
-    # freeze panes
-    for slug, freeze in [
-        ("freeze_row_1", "A2"),
-        ("freeze_col_a", "B1"),
-        ("freeze_both_a1", "B2"),
-        ("freeze_mid_sheet", "C5"),
-        ("freeze_deep", "E10"),
-    ]:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "Freeze"
-        for r in range(1, 21):
-            for c in range(1, 10):
-                ws.cell(row=r, column=c, value=f"{r},{c}")
-        ws.freeze_panes = freeze
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(GeneratedFile(path=out, group="matrix/freeze_panes", features=["freeze_panes", slug], expected_cells=20 * 9))
-
-    # rich text (mixed fonts within a cell) — openpyxl exposes this via CellRichText
-    try:
-        from openpyxl.cell.rich_text import CellRichText, TextBlock
-        from openpyxl.cell.text import InlineFont
-        for slug, blocks in [
-            ("rich_text_bold_plain", [TextBlock(InlineFont(b=True), "Bold "), TextBlock(InlineFont(), "plain")]),
-            ("rich_text_colors", [TextBlock(InlineFont(color="FF0000"), "Red "), TextBlock(InlineFont(color="0000FF"), "Blue")]),
-            ("rich_text_sizes", [TextBlock(InlineFont(sz="8"), "small "), TextBlock(InlineFont(sz="18"), "BIG")]),
-        ]:
-            wb = Workbook()
-            ws = wb.active
-            ws.title = "Rich"
-            ws["A1"] = CellRichText(blocks)
-            out = _matrix_path(slug)
-            _finalize(wb, out)
-            files.append(GeneratedFile(path=out, group="matrix/rich_text", features=["rich_text", slug], expected_cells=1))
-    except Exception:
-        pass
-
-    # 3D refs / cross-sheet
-    for slug in ["threed_sum_across_sheets"]:
-        wb = Workbook()
-        ws = wb.active
-        ws.title = "A"
-        for r in range(1, 6):
-            ws.cell(row=r, column=1, value=r)
-        wb.create_sheet("B")
-        for r in range(1, 6):
-            wb["B"].cell(row=r, column=1, value=r * 10)
-        summary = wb.create_sheet("Summary")
-        summary["A1"] = "=SUM(A:B!A1:A5)"  # Excel 3D ref syntax
-        out = _matrix_path(slug)
-        _finalize(wb, out)
-        files.append(GeneratedFile(path=out, group="matrix/3d_ref", features=["3d_ref", slug], expected_cells=11, expected_formulas=1))
-
-    return files
-
-
-MATRIX_BUILDERS: list[Callable[[], list[GeneratedFile]]] = [
-    build_formula_files,
-    build_merge_files,
-    build_named_range_files,
-    build_data_validation_files,
-    build_conditional_formatting_files,
-    build_table_files,
-    build_chart_files,
-    build_style_files,
-    build_date_files,
-    build_error_files,
-    build_hidden_files,
-    build_edge_address_files,
-    build_sheet_name_files,
-    build_misc_files,
-]
-
-
-# ----------------------------------------------------------------------------
-# Combinatoric group — randomised feature cocktails
-# ----------------------------------------------------------------------------
-
-
-COMBO_DIR = OUT_ROOT / "combo"
-DENSITIES = [5, 10, 25, 50, 100]
-SEEDS_PER_DENSITY = 80   # → 400 combo files
-
-
-def _rand_cell_value(rng: random.Random):
-    kind = rng.choice(["int", "float", "str", "bool", "date", "blank"])
-    if kind == "int":
-        return rng.randint(-10_000, 10_000)
-    if kind == "float":
-        return rng.uniform(-1000.0, 1000.0)
-    if kind == "str":
-        return "".join(rng.choices(string.ascii_letters + string.digits + " ", k=rng.randint(1, 30)))
-    if kind == "bool":
-        return rng.choice([True, False])
-    if kind == "date":
-        return date(rng.randint(2000, 2030), rng.randint(1, 12), rng.randint(1, 28))
-    return None
-
-
-def _safe_set(ws, row: int, col: int, value) -> bool:
-    """Try to set ws cell; return True on success, False if cell is part of a merge."""
-    try:
-        ws.cell(row=row, column=col, value=value)
-        return True
-    except (AttributeError, TypeError):
-        return False
-
-
-def build_combo_file(seed: int, density: int) -> GeneratedFile | None:
-    rng = random.Random(seed * 10_000 + density)
-    wb = Workbook()
-    ws = wb.active
-    ws.title = f"Main_{seed}_{density}"
-    cells_written = 0
-    formulas = 0
-    features: set[str] = set()
-
-    for _ in range(density):
-        op = rng.choices(
-            population=["cell", "formula", "merge", "style", "comment", "hyperlink", "validation", "table", "named"],
-            weights=[45, 20, 8, 12, 3, 3, 3, 3, 3],
-            k=1,
-        )[0]
-        r = rng.randint(1, 100)
-        c = rng.randint(1, 30)
-        if op == "cell":
-            if _safe_set(ws, r, c, _rand_cell_value(rng)):
-                cells_written += 1
-                features.add("cells")
-        elif op == "formula":
-            if _safe_set(ws, r, c, f"=SUM({get_column_letter(c)}1:{get_column_letter(c)}{max(1, r-1)})"):
-                formulas += 1
-                features.add("formulas")
-        elif op == "merge":
-            try:
-                r2 = min(r + rng.randint(0, 3), 100)
-                c2 = min(c + rng.randint(0, 3), 30)
-                if (r, c) != (r2, c2):
-                    _safe_set(ws, r, c, f"m{seed}")  # write before merge
-                    ws.merge_cells(start_row=r, start_column=c, end_row=r2, end_column=c2)
-                    features.add("merge")
-            except Exception:
-                pass
-        elif op == "style":
-            try:
-                cell = ws.cell(row=r, column=c)
-                if cell.value is None:
-                    if _safe_set(ws, r, c, rng.randint(0, 99)):
-                        cells_written += 1
-                    cell = ws.cell(row=r, column=c)
-                cell.font = Font(bold=rng.choice([True, False]), italic=rng.choice([True, False]), color=f"{rng.randint(0, 0xFFFFFF):06X}")
-                cell.fill = PatternFill("solid", start_color=f"{rng.randint(0xAAAAAA, 0xFFFFFF):06X}")
-                features.add("style")
-            except AttributeError:
-                pass
-        elif op == "comment":
-            try:
-                if _safe_set(ws, r, c, "c"):
-                    ws.cell(row=r, column=c).comment = Comment(f"seed{seed}", "combo")
-                    cells_written += 1
-                    features.add("comment")
-            except Exception:
-                pass
-        elif op == "hyperlink":
-            try:
-                if _safe_set(ws, r, c, "lnk"):
-                    ws.cell(row=r, column=c).hyperlink = f"https://example.com/{seed}/{r}-{c}"
-                    cells_written += 1
-                    features.add("hyperlink")
-            except Exception:
-                pass
-        elif op == "validation":
-            try:
-                dv = DataValidation(type="list", formula1='"A,B,C"')
-                ws.add_data_validation(dv)
-                dv.add(f"{get_column_letter(c)}{r}")
-                features.add("validation")
-            except Exception:
-                pass
-        elif op == "table":
-            try:
-                r2 = min(r + 3, 100)
-                c2 = min(c + 2, 30)
-                if r2 > r and c2 > c:
-                    for rr in range(r, r2 + 1):
-                        for cc in range(c, c2 + 1):
-                            try:
-                                if ws.cell(row=rr, column=cc).value is None:
-                                    _safe_set(ws, rr, cc, rr * cc)
-                            except AttributeError:
-                                pass
-                    for cc in range(c, c2 + 1):
-                        _safe_set(ws, r, cc, f"H{cc}")
-                    tab_name = f"T{seed}_{density}_{rng.randint(0, 99)}"
-                    ws.add_table(Table(displayName=tab_name, ref=f"{get_column_letter(c)}{r}:{get_column_letter(c2)}{r2}"))
-                    features.add("table")
-            except Exception:
-                pass
-        elif op == "named":
-            try:
-                nm = f"N_{seed}_{density}_{rng.randint(0, 99)}"
-                wb.defined_names.add(DefinedName(nm, attr_text=f"{ws.title}!${get_column_letter(c)}${r}"))
-                features.add("named_range")
-            except Exception:
-                pass
-
-    out = COMBO_DIR / f"combo_d{density:03d}_s{seed:03d}.xlsx"
-    try:
-        _finalize(wb, out)
-    except Exception:
-        return None
-    return GeneratedFile(
-        path=out,
-        group="combo",
-        features=sorted(features),
-        expected_cells=cells_written,
-        expected_formulas=formulas,
-        notes=f"seed={seed} density={density}",
-    )
-
-
-def build_combo_files(limit: int | None) -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    count = 0
-    for density in DENSITIES:
-        for seed in range(SEEDS_PER_DENSITY):
-            if limit is not None and count >= limit:
-                return files
-            gf = build_combo_file(seed, density)
-            if gf:
-                files.append(gf)
-            count += 1
-    return files
-
-
-# ----------------------------------------------------------------------------
-# Adversarial group — try to break the parser
-# ----------------------------------------------------------------------------
-
-
-ADVERSARIAL_DIR = OUT_ROOT / "adversarial"
-
-
-def _adv_path(slug: str) -> Path:
-    return ADVERSARIAL_DIR / f"{slug}.xlsx"
-
-
-def build_adversarial_files(limit: int | None) -> list[GeneratedFile]:
-    files: list[GeneratedFile] = []
-    specs: list[tuple[str, Callable[[Workbook], tuple[int, int, str]]]] = []
-
-    def _mk(slug: str):
-        def deco(fn: Callable[[Workbook], tuple[int, int, str]]):
-            specs.append((slug, fn))
-            return fn
-        return deco
-
-    @_mk("adv_empty_workbook")
-    def _(wb):
-        # openpyxl always has one sheet; clear it
-        ws = wb.active
-        ws.title = "Empty"
-        return 0, 0, "no cells"
-
-    @_mk("adv_one_cell_1e300")
-    def _(wb):
-        wb.active["A1"] = 1e300
-        return 1, 0, "huge float"
-
-    @_mk("adv_one_cell_neg_1e300")
-    def _(wb):
-        wb.active["A1"] = -1e300
-        return 1, 0, "huge negative"
-
-    @_mk("adv_one_cell_tiny")
-    def _(wb):
-        wb.active["A1"] = 1e-300
-        return 1, 0, "tiny float"
-
-    @_mk("adv_unicode_bomb")
-    def _(wb):
-        ws = wb.active
-        emojis = "🚀🔥💀🎯🌀⚡️🌈🎨🧪💡" * 20
-        rtl = "مرحبا بكم في اختبار التحليل" * 5
-        cjk = "こんにちは世界 你好世界 안녕하세요" * 5
-        ws["A1"] = emojis + " " + rtl + " " + cjk
-        ws["A2"] = "\u200B\u200C\u200D\ufeff"  # zero-width chars
-        ws["A3"] = "a" * 32_000  # long string
-        return 3, 0, "unicode stress"
-
-    @_mk("adv_circular_chain_10")
-    def _(wb):
-        ws = wb.active
-        for i in range(1, 10):
-            ws.cell(row=i, column=1, value=f"=A{i+1}")
-        ws["A10"] = "=A1"
-        return 10, 10, "10-step cycle"
-
-    @_mk("adv_formula_chain_deep_500")
-    def _(wb):
-        ws = wb.active
-        ws["A1"] = 1
-        for i in range(2, 501):
-            ws.cell(row=i, column=1, value=f"=A{i-1}+1")
-        return 500, 499, "500-deep chain"
-
-    @_mk("adv_huge_merge_1000x100")
-    def _(wb):
-        ws = wb.active
-        ws.merge_cells("A1:CV1000")  # 100 cols × 1000 rows
-        ws["A1"] = "one giant merge"
-        return 1, 0, "100k-cell merge"
-
-    @_mk("adv_many_merges_5000")
-    def _(wb):
-        ws = wb.active
-        for i in range(5000):
-            r = i // 50 + 1
-            c = (i % 50) * 2 + 1
-            try:
-                ws.merge_cells(start_row=r, start_column=c, end_row=r, end_column=c + 1)
-                ws.cell(row=r, column=c, value="m")
-            except Exception:
-                pass
-        return 2500, 0, "5000 merges"
-
-    @_mk("adv_100_sheets")
-    def _(wb):
-        wb.active.title = "S0"
-        for i in range(1, 100):
-            ws = wb.create_sheet(f"S{i}")
-            ws["A1"] = i
-        return 100, 0, "100 sheets"
-
-    @_mk("adv_very_wide_2000_cols")
-    def _(wb):
-        ws = wb.active
-        for c in range(1, 2001):
-            ws.cell(row=1, column=c, value=c)
-        return 2000, 0, "2000 cols in one row"
-
-    @_mk("adv_very_tall_20k_rows")
-    def _(wb):
-        ws = wb.active
-        for r in range(1, 20_001):
-            ws.cell(row=r, column=1, value=r)
-        return 20_000, 0, "20k rows"
-
-    @_mk("adv_sparse_million")
-    def _(wb):
-        ws = wb.active
-        for r in [1, 10, 100, 1000, 10_000, 100_000, 500_000, 1_000_000]:
-            ws.cell(row=r, column=1, value=f"r{r}")
-        ws["A1"].value = "start"
-        return 8, 0, "sparse across 1M rows"
-
-    @_mk("adv_all_error_types")
-    def _(wb):
-        ws = wb.active
-        for i, formula in enumerate([
-            "=1/0", "=SQRT(-1)", "=NA()", "=BAD_FN()", "=#REF!", '="a"+1',
-        ], start=1):
-            ws.cell(row=i, column=1, value=formula)
-        return 6, 6, "errors galore"
-
-    @_mk("adv_broken_refs")
-    def _(wb):
-        ws = wb.active
-        ws["A1"] = "=MissingSheet!B5"
-        ws["A2"] = "=OtherBook.xlsx!Sheet1!A1"
-        ws["A3"] = "=#REF!+1"
-        return 3, 3, "dangling references"
-
-    @_mk("adv_long_formula")
-    def _(wb):
-        ws = wb.active
-        ws["A1"] = 1
-        long_expr = "=" + "+".join("A1" for _ in range(2000))
-        ws["B1"] = long_expr
-        return 2, 1, "very long formula"
-
-    @_mk("adv_long_cell_string")
-    def _(wb):
-        ws = wb.active
-        ws["A1"] = "X" * 32_767  # Excel limit
-        return 1, 0, "32k char cell"
-
-    @_mk("adv_all_formulas_sheet")
-    def _(wb):
-        ws = wb.active
-        for r in range(1, 101):
-            for c in range(1, 6):
-                ws.cell(row=r, column=c, value=f"={get_column_letter(c)}{((r - 1) % 5) + 1}+1")
-        return 500, 500, "500 formulas"
-
-    @_mk("adv_massive_table")
-    def _(wb):
-        ws = wb.active
-        for c in range(1, 51):
-            ws.cell(row=1, column=c, value=f"C{c}")
-        for r in range(2, 202):
-            for c in range(1, 51):
-                ws.cell(row=r, column=c, value=(r * c) % 997)
-        ws.add_table(Table(displayName="Huge", ref=f"A1:{get_column_letter(50)}201"))
-        return 10_050, 0, "50x200 table"
-
-    @_mk("adv_cyclic_cross_sheet")
-    def _(wb):
-        a = wb.active
-        a.title = "A"
-        a["A1"] = "=B!A1"
-        b = wb.create_sheet("B")
-        b["A1"] = "=A!A1"
-        return 2, 2, "cross-sheet cycle"
-
-    @_mk("adv_many_named_ranges")
-    def _(wb):
-        ws = wb.active
-        for i in range(1, 301):
-            wb.defined_names.add(DefinedName(f"N{i}", attr_text=f"Sheet!${get_column_letter((i % 30) + 1)}${(i % 100) + 1}"))
-        ws["A1"] = "seed"
-        return 1, 0, "300 named ranges"
-
-    @_mk("adv_duplicate_sheet_names_almost")
-    def _(wb):
-        wb.active.title = "Data"
-        wb.create_sheet("data")
-        wb.create_sheet("DATA")
-        return 0, 0, "case-sensitive sheet names"
-
-    @_mk("adv_rtl_sheet")
-    def _(wb):
-        ws = wb.active
-        ws.sheet_view.rightToLeft = True
-        ws["A1"] = "النص يقرأ من اليمين"
-        return 1, 0, "RTL view"
-
-    @_mk("adv_extreme_column_width")
-    def _(wb):
-        ws = wb.active
-        ws.column_dimensions["A"].width = 255
-        ws.row_dimensions[1].height = 409  # excel max
-        ws["A1"] = "wide+tall"
-        return 1, 0, "max col/row size"
-
-    @_mk("adv_autofilter_large")
-    def _(wb):
-        ws = wb.active
-        for c in range(1, 11):
-            ws.cell(row=1, column=c, value=f"H{c}")
-        for r in range(2, 301):
-            for c in range(1, 11):
-                ws.cell(row=r, column=c, value=r * c)
-        ws.auto_filter.ref = "A1:J300"
-        return 3000, 0, "autofilter 3k cells"
-
-    @_mk("adv_mixed_types_same_column")
-    def _(wb):
-        ws = wb.active
-        for r in range(1, 51):
-            if r % 5 == 0:
-                ws.cell(row=r, column=1, value=f"text_{r}")
-            elif r % 5 == 1:
-                ws.cell(row=r, column=1, value=r)
-            elif r % 5 == 2:
-                ws.cell(row=r, column=1, value=float(r) / 7.0)
-            elif r % 5 == 3:
-                ws.cell(row=r, column=1, value=date(2024, (r % 12) + 1, 1))
-            else:
-                ws.cell(row=r, column=1, value=(r % 2 == 0))
-        return 50, 0, "mixed types in one column"
-
-    _SAFE_STR_CHARS = string.ascii_letters + string.digits + " -_.,:;!?@#$%^&*()[]{}<>+=/|~"
-
-    # adversarial via parametrised generator to pad counts to ~1000 total
-    for i in range(1, 278):  # 277 parametric adversarial files → 1000 total generated
-        rng = random.Random(10_000 + i)
-
-        @_mk(f"adv_param_{i:03d}")
-        def _(wb, rng=rng, i=i):
-            ws = wb.active
-            # Keep sizes modest so the full bench runs under 10 min wall-clock.
-            n_cells = rng.randint(100, 800)
-            cells = 0
-            formulas = 0
-            for _ in range(n_cells):
-                r = rng.randint(1, 300)
-                c = rng.randint(1, 50)
-                kind = rng.choice(["int", "str", "formula", "date", "bool"])
-                try:
-                    if kind == "int":
-                        val = rng.randint(-1_000_000, 1_000_000)
-                    elif kind == "str":
-                        val = "".join(rng.choices(_SAFE_STR_CHARS, k=rng.randint(1, 50)))
-                    elif kind == "formula":
-                        val = f"={get_column_letter(max(1, c - 1))}{max(1, r - 1)}+1"
-                    elif kind == "date":
-                        val = date(rng.randint(1900, 2099), rng.randint(1, 12), rng.randint(1, 28))
-                    else:
-                        val = rng.choice([True, False])
-                    if _safe_set(ws, r, c, val):
-                        cells += 1
-                        if kind == "formula":
-                            formulas += 1
-                except Exception:
-                    pass
-            for _ in range(rng.randint(0, 20)):
-                try:
-                    r0 = rng.randint(1, 100)
-                    c0 = rng.randint(1, 50)
-                    ws.merge_cells(start_row=r0, start_column=c0, end_row=r0 + rng.randint(0, 5), end_column=c0 + rng.randint(0, 5))
-                except Exception:
-                    pass
-            return cells, formulas, f"param seed {i}"
-
-    files: list[GeneratedFile] = []
-    count = 0
-    for slug, fn in specs:
-        if limit is not None and count >= limit:
-            break
-        wb = Workbook()
-        try:
-            cells, formulas, notes = fn(wb)
-        except Exception as exc:
-            # skip uncooperative generators
-            print(f"  ⚠ adversarial {slug} failed to build: {exc}", file=sys.stderr)
-            continue
-        out = _adv_path(slug)
-        try:
-            _finalize(wb, out)
-        except Exception as exc:
-            print(f"  ⚠ adversarial {slug} failed to save: {exc}", file=sys.stderr)
-            continue
-        files.append(
-            GeneratedFile(
-                path=out,
-                group="adversarial",
-                features=["adversarial", slug],
-                expected_cells=cells,
-                expected_formulas=formulas,
-                notes=notes,
-            )
-        )
-        count += 1
-    return files
-
-
-# ----------------------------------------------------------------------------
-# Entry point
-# ----------------------------------------------------------------------------
-
-
-def build_all(groups: set[str], force: bool, limit: int | None) -> list[GeneratedFile]:
-    all_files: list[GeneratedFile] = []
-    if "matrix" in groups:
-        MATRIX_DIR.mkdir(parents=True, exist_ok=True)
-        for builder in MATRIX_BUILDERS:
-            for gf in builder():
-                all_files.append(gf)
-                if limit is not None and len(all_files) >= limit:
-                    return all_files
-    if "combo" in groups:
-        COMBO_DIR.mkdir(parents=True, exist_ok=True)
-        remaining = None if limit is None else max(0, limit - len(all_files))
-        all_files.extend(build_combo_files(remaining))
-        if limit is not None and len(all_files) >= limit:
-            return all_files
-    if "adversarial" in groups:
-        ADVERSARIAL_DIR.mkdir(parents=True, exist_ok=True)
-        remaining = None if limit is None else max(0, limit - len(all_files))
-        all_files.extend(build_adversarial_files(remaining))
-    return all_files
-
-
-def write_manifest(files: list[GeneratedFile]) -> None:
-    by_group: dict[str, int] = {}
-    rows = []
-    for gf in files:
-        rows.append(gf.to_manifest_row())
-        by_group[gf.group] = by_group.get(gf.group, 0) + 1
-    manifest = {
-        "version": 1,
-        "generated_at": "deterministic",
-        "total_files": len(files),
-        "by_group": by_group,
-        "files": rows,
-    }
-    MANIFEST_PATH.write_text(json.dumps(manifest, indent=2, sort_keys=False))
-    print(f"✓ manifest written → {MANIFEST_PATH.relative_to(ROOT)}")
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--group", choices=["matrix", "combo", "adversarial", "all"], default="all")
-    parser.add_argument("--force", action="store_true", help="regenerate even if outputs exist")
-    parser.add_argument("--limit", type=int, help="stop after N files (smoke mode)")
-    parser.add_argument("--clean", action="store_true", help="wipe testBench/generated/ first")
-    args = parser.parse_args()
-
-    if args.clean and OUT_ROOT.exists():
-        import shutil
-        shutil.rmtree(OUT_ROOT)
-        print(f"✓ cleaned {OUT_ROOT.relative_to(ROOT)}")
-
-    groups = {"matrix", "combo", "adversarial"} if args.group == "all" else {args.group}
-    OUT_ROOT.mkdir(parents=True, exist_ok=True)
-
-    print(f"building testBench into {OUT_ROOT.relative_to(ROOT)}  groups={sorted(groups)}  limit={args.limit}")
-    files = build_all(groups, args.force, args.limit)
-    write_manifest(files)
-
-    print(f"\n{'═' * 60}")
-    print(f"  Generated {len(files)} workbooks")
-    by_group: dict[str, int] = {}
-    for gf in files:
-        by_group[gf.group] = by_group.get(gf.group, 0) + 1
-    for g in sorted(by_group):
-        print(f"    {g:32s} {by_group[g]:4d}")
-    print(f"{'═' * 60}")
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/scripts/generate_enterprise_fixtures.py b/scripts/generate_enterprise_fixtures.py
deleted file mode 100644
index 189bb78..0000000
--- a/scripts/generate_enterprise_fixtures.py
+++ /dev/null
@@ -1,150 +0,0 @@
-"""Generate small, deterministic enterprise-style Excel fixtures.
-
-These fixtures are used by enterprise scoring tests and corpus metrics.
-They are intentionally lightweight so they can be generated at test time
-without network access or large disk usage.
-"""
-
-
-
-from pathlib import Path
-from typing import Callable
-
-from openpyxl import Workbook
-from openpyxl.styles import Font
-from openpyxl.workbook.defined_name import DefinedName
-
-
-ROOT = Path(__file__).resolve().parent.parent
-TARGET_DIR = ROOT / "testBench" / "enterprise"
-
-
-def _prepare_target() -> None:
-    TARGET_DIR.mkdir(parents=True, exist_ok=True)
-
-
-def create_financial_model() -> Workbook:
-    wb = Workbook()
-    ws = wb.active
-    ws.title = "Model"
-
-    ws.merge_cells("A1:D1")
-    ws["A1"] = "Financial Model Q1 2026"
-    ws["A1"].font = Font(bold=True, size=14)
-
-    ws["A3"] = "ASSUMPTIONS"
-    ws["A4"] = "Rent per unit"
-    ws["B4"] = 2500
-    ws["A5"] = "Units occupied"
-    ws["B5"] = 42
-
-    ws["A7"] = "RESULTS"
-    ws["A8"] = "Total Revenue"
-    ws["B8"] = "=B4*B5"
-
-    wb.defined_names.add(DefinedName("UnitCount", attr_text="Model!$B$5"))
-    wb.defined_names.add(DefinedName("RentPerUnit", attr_text="Model!$B$4"))
-
-    return wb
-
-
-def create_inventory_tracker() -> Workbook:
-    wb = Workbook()
-    ws = wb.active
-    ws.title = "Master"
-
-    ws["A1"] = "SKU"
-    ws["B1"] = "Description"
-    ws["C1"] = "Qty"
-    ws["D1"] = "Unit Cost"
-
-    for i in range(2, 52):
-        ws[f"A{i}"] = f"SKU-{i:04d}"
-        ws[f"B{i}"] = f"Product {i}"
-        ws[f"C{i}"] = i * 100
-        ws[f"D{i}"] = i * 1.5
-
-    tx = wb.create_sheet("Transactions")
-    tx["A1"] = "SKU"
-    tx["B1"] = "Qty"
-    tx["C1"] = "Total"
-
-    for i in range(2, 102):
-        tx[f"A{i}"] = f"=Master!A{(i % 50) + 2}"
-        tx[f"B{i}"] = (i % 10) + 1
-        tx[f"C{i}"] = f"=VLOOKUP(A{i},Master!A:D,4,0)*B{i}"
-
-    return wb
-
-
-def create_forecast_model() -> Workbook:
-    wb = Workbook()
-    base = wb.active
-    base.title = "Base"
-
-    for month in range(1, 13):
-        base[f"A{month}"] = f"Month {month}"
-        base[f"B{month}"] = 10000 * (1 + month * 0.05)
-
-    pess = wb.create_sheet("Pessimistic")
-    opt = wb.create_sheet("Optimistic")
-    for month in range(1, 13):
-        pess[f"B{month}"] = f"=Base!B{month}*0.8"
-        opt[f"B{month}"] = f"=Base!B{month}*1.2"
-
-    return wb
-
-
-def create_operations_tracker() -> Workbook:
-    wb = Workbook()
-    ws = wb.active
-    ws.title = "Ops"
-
-    ws["A1"] = "Project"
-    ws["B1"] = "Status"
-    ws["C1"] = "Budget"
-    ws["D1"] = "Actual"
-    ws["E1"] = "Variance %"
-
-    statuses = ["Active", "Complete", "On Hold"]
-    for i in range(2, 22):
-        ws[f"A{i}"] = f"Project {i-1}"
-        ws[f"B{i}"] = statuses[i % 3]
-        ws[f"C{i}"] = i * 50000
-        ws[f"D{i}"] = i * 50000 * (1 + (i % 5) * 0.1)
-        ws[f"E{i}"] = f"=(D{i}-C{i})/C{i}"
-
-    ref = wb.create_sheet("Reference", 1)
-    ref.sheet_state = "hidden"
-    ref["A1"] = "Rate"
-    ref["A2"] = 1.05
-
-    return wb
-
-
-def _write_workbook(name: str, builder: Callable[[], Workbook]) -> Path:
-    _prepare_target()
-    path = TARGET_DIR / name
-    if path.exists():
-        return path
-    wb = builder()
-    wb.save(path)
-    return path
-
-
-def generate_all() -> list[Path]:
-    """Generate all enterprise fixtures and return their paths."""
-    fixtures = [
-        ("financial_model.xlsx", create_financial_model),
-        ("inventory_tracker.xlsx", create_inventory_tracker),
-        ("forecast_model.xlsx", create_forecast_model),
-        ("operations_tracker.xlsx", create_operations_tracker),
-    ]
-
-    return [_write_workbook(name, builder) for name, builder in fixtures]
-
-
-if __name__ == "__main__":
-    paths = generate_all()
-    for p in paths:
-        print(f"✓ Generated {p.relative_to(ROOT)}")
diff --git a/site/index.html b/site/index.html
index 585f9af..37fca39 100644
--- a/site/index.html
+++ b/site/index.html
@@ -160,7 +160,7 @@
         "name": "What file formats does ks-xlsx-parser support?",
         "acceptedAnswer": {
           "@type": "Answer",
-          "text": "ks-xlsx-parser supports .xlsx and .xlsm (OOXML). Legacy .xls (BIFF) is not supported — convert those externally first. The parser handles unicode content, very wide sheets, very tall sheets, sparse workbooks, 250-sheet workbooks, circular formula chains, and files with 32k-character cells, all covered in the 1054-workbook testBench that runs in CI."
+          "text": "ks-xlsx-parser supports .xlsx and .xlsm (OOXML). Legacy .xls (BIFF) is not supported — convert those externally first. The parser handles unicode content, very wide sheets, very tall sheets, sparse workbooks, 250-sheet workbooks, circular formula chains, and files with 32k-character cells, all benchmarked on the 5,458-workbook SpreadsheetBench corpus."
         }
       },
       {
@@ -168,7 +168,7 @@
         "name": "How fast is ks-xlsx-parser?",
         "acceptedAnswer": {
           "@type": "Answer",
-          "text": "The full 1054-workbook testBench round-trips in approximately 70 seconds on a single machine. A real-world 21k-cell, 13-sheet financial model parses in about 4.6 seconds (previously 307 seconds before a circular-ref caching fix). Sparse workbooks with extreme addresses parse in under 200 ms."
+          "text": "SpreadsheetBench's full 5,458-workbook corpus parses end-to-end in roughly 20 minutes on a single machine (low double-digit ms P50 parse time). A real-world 21k-cell, 13-sheet financial model parses in about 4.6 seconds (previously 307 seconds before a circular-ref caching fix). Sparse workbooks with extreme addresses parse in under 200 ms."
         }
       }
     ]
@@ -432,7 +432,7 @@
       <a class="link" href="#what-you-get">Features</a>
       <a class="link" href="#demo">Demo</a>
       <a class="link" href="#compare">Compare</a>
-      <a class="link" href="#bench">testBench</a>
+      <a class="link" href="#bench">Benchmarks</a>
       <a class="link" href="https://github.com/knowledgestack/ks-xlsx-parser/tree/main/docs/wiki">Docs</a>
       <a class="gh-btn" href="https://github.com/knowledgestack/ks-xlsx-parser" target="_blank" rel="noopener">⭐ Star on GitHub</a>
     </div>
@@ -551,14 +551,14 @@ <h3 style="margin-top:0">What you get back</h3>
 <section id="bench">
   <div class="wrap">
     <div class="kicker">TESTED &amp; FAST</div>
-    <h2>1054-workbook stress corpus. Every commit.</h2>
-    <p class="lead">testBench ships with the repo and runs in CI. One-feature-per-file matrix, randomised density cocktails, and engineered adversarial files — unicode bombs, circular refs, sparse 1M-row sheets, 250-sheet workbooks.</p>
+    <h2>SpreadsheetBench: 5,458 real-world workbooks.</h2>
+    <p class="lead">We benchmark against the public SpreadsheetBench v0.1 corpus — 912 instruction tasks, 5,458 unique xlsx files spanning financial models, project trackers, HR records, and a long tail of small-business spreadsheets.</p>
 
     <div class="stats">
-      <div class="stat"><b>1054/1054</b><span>tests passing on every CI run</span></div>
-      <div class="stat"><b>~70s</b><span>end-to-end bench wall time</span></div>
-      <div class="stat"><b>66×</b><span>Walbridge financial model speedup (0.1.1)</span></div>
-      <div class="stat"><b>17 MB</b><span>dataset zip attached to each release</span></div>
+      <div class="stat"><b>5,455 / 5,458</b><span>parsed cleanly (99.945%)</span></div>
+      <div class="stat"><b>912</b><span>instruction × retrieval tasks measured</span></div>
+      <div class="stat"><b>66×</b><span>21k-cell financial model speedup (0.1.1)</span></div>
+      <div class="stat"><b>vs Docling</b><span>tied @1, +2.7pp @3, +1.8pp @5</span></div>
     </div>
   </div>
 </section>
@@ -684,7 +684,7 @@ <h2>Frequently asked questions</h2>
 
     <details style="background:var(--panel);border:1px solid var(--border);border-radius:var(--radius);padding:14px 18px;margin-top:10px">
       <summary style="font-weight:600;cursor:pointer">How fast is it?</summary>
-      <p style="color:var(--ink-dim);margin:10px 0 0">The full 1054-workbook testBench round-trips in about 70 seconds. A real 21k-cell, 13-sheet financial model parses in ~4.6 s. Sparse workbooks with extreme addresses parse in under 200 ms. Details in the <a href="https://github.com/knowledgestack/ks-xlsx-parser/blob/main/CHANGELOG.md">CHANGELOG</a>.</p>
+      <p style="color:var(--ink-dim);margin:10px 0 0">SpreadsheetBench's full 5,458-workbook corpus parses end-to-end in roughly 20 minutes on a single machine. A real 21k-cell, 13-sheet financial model parses in ~4.6 s. Sparse workbooks with extreme addresses parse in under 200 ms. Details in the <a href="https://github.com/knowledgestack/ks-xlsx-parser/blob/main/CHANGELOG.md">CHANGELOG</a>.</p>
     </details>
   </div>
 </section>
diff --git a/src/models/common.py b/src/models/common.py
index d199da8..0d4af5b 100644
--- a/src/models/common.py
+++ b/src/models/common.py
@@ -64,7 +64,7 @@ class CellCoord:
     """A single cell coordinate (1-indexed row and column).
 
     **Not a Pydantic model** — frozen slotted dataclass. Profiling showed
-    339k Pydantic inits on Walbridge contributed ~0.65 s of parse time;
+    339k Pydantic inits on a real-world workbook contributed ~0.65 s of parse time;
     dataclass construction is ~2.2× faster with the same immutability
     and equality semantics. Validation of ``row >= 1`` / ``col >= 1`` is
     dropped: all producers in this codebase build coords from parsed
diff --git a/src/parsers/workbook_parser.py b/src/parsers/workbook_parser.py
index 899402f..bee0452 100644
--- a/src/parsers/workbook_parser.py
+++ b/src/parsers/workbook_parser.py
@@ -81,9 +81,9 @@ def __init__(
             max_workers: Number of parallel workers.
             build_dep_graph: Build the formula dependency graph + run cycle
                 detection. Fast mode sets this False — on formula-heavy
-                workbooks (Walbridge: 17.6k formulas → 48k edges) the dep
-                graph is one of the largest remaining costs and nothing in
-                fast mode consumes it.
+                workbooks (17k formulas → 48k edges is typical for a real
+                financial model) the dep graph is one of the largest
+                remaining costs and nothing in fast mode consumes it.
         """
         if path is None and content is None:
             raise ValueError("Either path or content must be provided")
@@ -249,8 +249,8 @@ def parse(self) -> WorkbookDTO:
 
         # Build dependency graph (skippable in fast mode — this stage scans
         # every formula, runs the parser, creates thousands of edges, and
-        # then runs cycle detection; on Walbridge alone it accounts for
-        # ~25% of the full-mode wall clock).
+        # then runs cycle detection; on a 17k-formula real-world workbook it
+        # accounts for ~25% of the full-mode wall clock).
         if self._build_dep_graph:
             try:
                 from formula.dependency_builder import DependencyBuilder
diff --git a/testBench/README.md b/testBench/README.md
deleted file mode 100644
index 68cc9eb..0000000
--- a/testBench/README.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# testBench — the ks-xlsx-parser stress corpus
-
-A single, self-contained dataset of **1053 `.xlsx` workbooks** used to
-regression-test and stress-test [ks-xlsx-parser](https://github.com/knowledgestack/ks-xlsx-parser).
-
-It is MIT-licensed, free to reuse for any Excel parser research (commercial or
-otherwise). If it saves you time, please [star the repo](https://github.com/knowledgestack/ks-xlsx-parser) —
-that's the only signal we have that open-sourcing this was worth doing.
-
-## Layout
-
-| Directory | Files | What's in it |
-|-----------|------:|--------------|
-| `real_world/`           | 8    | Real anonymised workbooks shipped as demos (financial models, project trackers, engineering calcs). |
-| `enterprise/`           | 4    | Deterministic enterprise templates (financial / forecast / inventory / operations). |
-| `github_datasets/`      | 10   | Public CSV→XLSX conversions (iris, titanic, superstore, apple stock, …). |
-| `stress/curated/`       | 26   | 26 hand-authored progressive stress levels (`stress_level_0`…`stress_level_25`). |
-| `stress/merges/`        | 5    | Pathological merge patterns that historically broke parsers. |
-| `generated/matrix/`     | ~297 | **One feature per file** across 18 categories (formulas, merges, named ranges, data validation, conditional formatting, tables, charts, styles, dates, errors, hidden rows/cols, hyperlinks, comments, rich text, freeze panes, edge addresses, sheet names, 3D refs). |
-| `generated/combo/`      | 400  | Deterministically randomised cocktails at 5 densities × 80 seeds. |
-| `generated/adversarial/`| 300  | Files engineered to break parsers: deep formula chains, 1M-row sparse sheets, 250-sheet workbooks, unicode bombs, huge merges, broken refs, 32 k-char cells, circular refs, long formulas. |
-| **Total**               | **1053** | |
-
-The `generated/` tree is produced by [`scripts/build_testbench.py`](../scripts/build_testbench.py)
-and is deterministic — identical commits produce byte-identical files. The other
-directories are checked in as-is.
-
-## Manifest
-
-`generated/MANIFEST.json` lists every generated file with:
-
-* `group`              — matrix category, combo, or adversarial
-* `features`           — tags describing what the file exercises
-* `expected_cells`     — sanity check count
-* `expected_formulas`  — sanity check count
-* `sha256` / `size_bytes` — integrity + packaging info
-* `notes`              — e.g. seed/density for combo files
-
-## How we use it
-
-```bash
-# regenerate the 1000-file generated tree (idempotent)
-make testbench-build
-
-# parse every file and record failures to metrics/testbench/failures.json
-make testbench
-
-# package for a GitHub release
-make testbench-zip
-```
-
-The round-trip test (`tests/test_testbench_roundtrip.py`) asserts every
-workbook parses without raising and produces a non-empty JSON result. The
-failure log is a first-class artifact — every parser regression shows up as a
-new entry.
-
-## Licensing
-
-All files generated by `build_testbench.py` are synthetic and released under
-MIT alongside the parser. The `real_world/`, `enterprise/`, and
-`github_datasets/` contents are either authored for this project or sourced
-from public-domain datasets; attribution is in the parent repo.
diff --git a/testBench/enterprise/financial_model.xlsx b/testBench/enterprise/financial_model.xlsx
deleted file mode 100644
index f84c12d..0000000
Binary files a/testBench/enterprise/financial_model.xlsx and /dev/null differ
diff --git a/testBench/enterprise/forecast_model.xlsx b/testBench/enterprise/forecast_model.xlsx
deleted file mode 100644
index 7f08d91..0000000
Binary files a/testBench/enterprise/forecast_model.xlsx and /dev/null differ
diff --git a/testBench/enterprise/inventory_tracker.xlsx b/testBench/enterprise/inventory_tracker.xlsx
deleted file mode 100644
index a13fcd1..0000000
Binary files a/testBench/enterprise/inventory_tracker.xlsx and /dev/null differ
diff --git a/testBench/enterprise/operations_tracker.xlsx b/testBench/enterprise/operations_tracker.xlsx
deleted file mode 100644
index a3997b3..0000000
Binary files a/testBench/enterprise/operations_tracker.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/apple_stock.xlsx b/testBench/github_datasets/apple_stock.xlsx
deleted file mode 100644
index 62edeb6..0000000
Binary files a/testBench/github_datasets/apple_stock.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/bestsellers.xlsx b/testBench/github_datasets/bestsellers.xlsx
deleted file mode 100644
index 665b312..0000000
Binary files a/testBench/github_datasets/bestsellers.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/boston.xlsx b/testBench/github_datasets/boston.xlsx
deleted file mode 100644
index ab85439..0000000
Binary files a/testBench/github_datasets/boston.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/breast_cancer.xlsx b/testBench/github_datasets/breast_cancer.xlsx
deleted file mode 100644
index adca3b2..0000000
Binary files a/testBench/github_datasets/breast_cancer.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/iris.xlsx b/testBench/github_datasets/iris.xlsx
deleted file mode 100644
index fede151..0000000
Binary files a/testBench/github_datasets/iris.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/superstore.xlsx b/testBench/github_datasets/superstore.xlsx
deleted file mode 100644
index c51783b..0000000
Binary files a/testBench/github_datasets/superstore.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/titanic.xlsx b/testBench/github_datasets/titanic.xlsx
deleted file mode 100644
index 5cba13b..0000000
Binary files a/testBench/github_datasets/titanic.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/winequality_red.xlsx b/testBench/github_datasets/winequality_red.xlsx
deleted file mode 100644
index 58ddf1e..0000000
Binary files a/testBench/github_datasets/winequality_red.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/world_happiness_2019.xlsx b/testBench/github_datasets/world_happiness_2019.xlsx
deleted file mode 100644
index 6de5ad7..0000000
Binary files a/testBench/github_datasets/world_happiness_2019.xlsx and /dev/null differ
diff --git a/testBench/github_datasets/worldcups.xlsx b/testBench/github_datasets/worldcups.xlsx
deleted file mode 100644
index 4b122f5..0000000
Binary files a/testBench/github_datasets/worldcups.xlsx and /dev/null differ
diff --git a/testBench/real_world/Employee Sample Data.xlsx b/testBench/real_world/Employee Sample Data.xlsx
deleted file mode 100644
index 4cc5a38..0000000
Binary files a/testBench/real_world/Employee Sample Data.xlsx and /dev/null differ
diff --git a/testBench/real_world/Financials Sample Data.xlsx b/testBench/real_world/Financials Sample Data.xlsx
deleted file mode 100644
index 76bc6dd..0000000
Binary files a/testBench/real_world/Financials Sample Data.xlsx and /dev/null differ
diff --git a/testBench/real_world/data_inventory.xlsx b/testBench/real_world/data_inventory.xlsx
deleted file mode 100644
index 3371e0c..0000000
Binary files a/testBench/real_world/data_inventory.xlsx and /dev/null differ
diff --git a/testBench/real_world/engineering_calcs.xlsx b/testBench/real_world/engineering_calcs.xlsx
deleted file mode 100644
index 49e1fb0..0000000
Binary files a/testBench/real_world/engineering_calcs.xlsx and /dev/null differ
diff --git a/testBench/real_world/financial_model.xlsx b/testBench/real_world/financial_model.xlsx
deleted file mode 100644
index 276ea8a..0000000
Binary files a/testBench/real_world/financial_model.xlsx and /dev/null differ
diff --git a/testBench/real_world/project_tracker.xlsx b/testBench/real_world/project_tracker.xlsx
deleted file mode 100644
index bca638a..0000000
Binary files a/testBench/real_world/project_tracker.xlsx and /dev/null differ
diff --git a/testBench/real_world/sales_dashboard.xlsx b/testBench/real_world/sales_dashboard.xlsx
deleted file mode 100644
index fb05bb8..0000000
Binary files a/testBench/real_world/sales_dashboard.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_0.xlsx b/testBench/stress/curated/stress_level_0.xlsx
deleted file mode 100644
index 4a620f0..0000000
Binary files a/testBench/stress/curated/stress_level_0.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_1.xlsx b/testBench/stress/curated/stress_level_1.xlsx
deleted file mode 100644
index 76a7e01..0000000
Binary files a/testBench/stress/curated/stress_level_1.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_10.xlsx b/testBench/stress/curated/stress_level_10.xlsx
deleted file mode 100644
index 7578615..0000000
Binary files a/testBench/stress/curated/stress_level_10.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_11.xlsx b/testBench/stress/curated/stress_level_11.xlsx
deleted file mode 100644
index 72d5c8d..0000000
Binary files a/testBench/stress/curated/stress_level_11.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_12.xlsx b/testBench/stress/curated/stress_level_12.xlsx
deleted file mode 100644
index 56e10e4..0000000
Binary files a/testBench/stress/curated/stress_level_12.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_13.xlsx b/testBench/stress/curated/stress_level_13.xlsx
deleted file mode 100644
index 274c560..0000000
Binary files a/testBench/stress/curated/stress_level_13.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_14.xlsx b/testBench/stress/curated/stress_level_14.xlsx
deleted file mode 100644
index 7d69a4c..0000000
Binary files a/testBench/stress/curated/stress_level_14.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_15.xlsx b/testBench/stress/curated/stress_level_15.xlsx
deleted file mode 100644
index 50aa2a4..0000000
Binary files a/testBench/stress/curated/stress_level_15.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_16.xlsx b/testBench/stress/curated/stress_level_16.xlsx
deleted file mode 100644
index a22617a..0000000
Binary files a/testBench/stress/curated/stress_level_16.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_17.xlsx b/testBench/stress/curated/stress_level_17.xlsx
deleted file mode 100644
index 3e8fc4c..0000000
Binary files a/testBench/stress/curated/stress_level_17.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_18.xlsx b/testBench/stress/curated/stress_level_18.xlsx
deleted file mode 100644
index 56ae03b..0000000
Binary files a/testBench/stress/curated/stress_level_18.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_19.xlsx b/testBench/stress/curated/stress_level_19.xlsx
deleted file mode 100644
index 98c9f4a..0000000
Binary files a/testBench/stress/curated/stress_level_19.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_2.xlsx b/testBench/stress/curated/stress_level_2.xlsx
deleted file mode 100644
index 97fb325..0000000
Binary files a/testBench/stress/curated/stress_level_2.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_20.xlsx b/testBench/stress/curated/stress_level_20.xlsx
deleted file mode 100644
index 72154d7..0000000
Binary files a/testBench/stress/curated/stress_level_20.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_21.xlsx b/testBench/stress/curated/stress_level_21.xlsx
deleted file mode 100644
index 7df3bc8..0000000
Binary files a/testBench/stress/curated/stress_level_21.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_22.xlsx b/testBench/stress/curated/stress_level_22.xlsx
deleted file mode 100644
index 1dca4d7..0000000
Binary files a/testBench/stress/curated/stress_level_22.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_23.xlsx b/testBench/stress/curated/stress_level_23.xlsx
deleted file mode 100644
index 489bae5..0000000
Binary files a/testBench/stress/curated/stress_level_23.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_24.xlsx b/testBench/stress/curated/stress_level_24.xlsx
deleted file mode 100644
index 82f946e..0000000
Binary files a/testBench/stress/curated/stress_level_24.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_25.xlsx b/testBench/stress/curated/stress_level_25.xlsx
deleted file mode 100644
index 6ba2f67..0000000
Binary files a/testBench/stress/curated/stress_level_25.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_3.xlsx b/testBench/stress/curated/stress_level_3.xlsx
deleted file mode 100644
index e43c5d2..0000000
Binary files a/testBench/stress/curated/stress_level_3.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_4.xlsx b/testBench/stress/curated/stress_level_4.xlsx
deleted file mode 100644
index 0464f9d..0000000
Binary files a/testBench/stress/curated/stress_level_4.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_5.xlsx b/testBench/stress/curated/stress_level_5.xlsx
deleted file mode 100644
index f279818..0000000
Binary files a/testBench/stress/curated/stress_level_5.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_6.xlsx b/testBench/stress/curated/stress_level_6.xlsx
deleted file mode 100644
index e5b3f85..0000000
Binary files a/testBench/stress/curated/stress_level_6.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_7.xlsx b/testBench/stress/curated/stress_level_7.xlsx
deleted file mode 100644
index dff80f4..0000000
Binary files a/testBench/stress/curated/stress_level_7.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_8.xlsx b/testBench/stress/curated/stress_level_8.xlsx
deleted file mode 100644
index 780d0a3..0000000
Binary files a/testBench/stress/curated/stress_level_8.xlsx and /dev/null differ
diff --git a/testBench/stress/curated/stress_level_9.xlsx b/testBench/stress/curated/stress_level_9.xlsx
deleted file mode 100644
index a3a6650..0000000
Binary files a/testBench/stress/curated/stress_level_9.xlsx and /dev/null differ
diff --git a/testBench/stress/merges/merge_stress_across.xlsx b/testBench/stress/merges/merge_stress_across.xlsx
deleted file mode 100644
index 52db4d7..0000000
Binary files a/testBench/stress/merges/merge_stress_across.xlsx and /dev/null differ
diff --git a/testBench/stress/merges/merge_stress_dense_grid.xlsx b/testBench/stress/merges/merge_stress_dense_grid.xlsx
deleted file mode 100644
index 7c938bf..0000000
Binary files a/testBench/stress/merges/merge_stress_dense_grid.xlsx and /dev/null differ
diff --git a/testBench/stress/merges/merge_stress_empty_master.xlsx b/testBench/stress/merges/merge_stress_empty_master.xlsx
deleted file mode 100644
index 06713b0..0000000
Binary files a/testBench/stress/merges/merge_stress_empty_master.xlsx and /dev/null differ
diff --git a/testBench/stress/merges/merge_stress_table_header.xlsx b/testBench/stress/merges/merge_stress_table_header.xlsx
deleted file mode 100644
index 13d1092..0000000
Binary files a/testBench/stress/merges/merge_stress_table_header.xlsx and /dev/null differ
diff --git a/testBench/stress/merges/merge_stress_vertical.xlsx b/testBench/stress/merges/merge_stress_vertical.xlsx
deleted file mode 100644
index 1a44d8e..0000000
Binary files a/testBench/stress/merges/merge_stress_vertical.xlsx and /dev/null differ
diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md
index 1cb056f..412102e 100644
--- a/tests/benchmarks/README.md
+++ b/tests/benchmarks/README.md
@@ -4,7 +4,7 @@ Two benchmarks, both reproducible:
 
 | Benchmark | What it measures | Corpus | Cost |
 |---|---|---|---|
-| `vs_hucre.py` (structural) | Parse-success rate + structural counts (cells, formulas, tables, merges, etc.) across many files | `testBench/` (53 curated) or `data/corpora/spreadsheetbench/` (5,458 real-world) | Cheap — 1–20 min |
+| `vs_hucre.py` (structural) | Parse-success rate + structural counts (cells, formulas, tables, merges, etc.) across many files | `data/corpora/spreadsheetbench/` (5,458 real-world) | Cheap — 1–20 min |
 | `scripts/eval_retrieval.py` (chunk quality) | Recall@k for retrieving the relevant chunk given a natural-language instruction, + table-integrity fragmentation rate | SpreadsheetBench `dataset.json` (912 instruction + position pairs) | Medium — 10 min on 100 instances |
 
 ## 1. Structural benchmark — `vs_hucre.py`
@@ -18,9 +18,9 @@ Long-running NDJSON-protocol workers, per-file timeout, batch respawn, randomize
 Supported parsers today: `ks` (ks-xlsx-parser), `hucre` (TypeScript, requires `pnpm install` under `hucre_node/`), `docling` (IBM Docling — `uv pip install docling`).
 
 ```bash
-# Quick smoke (50 random files from testBench)
+# Quick smoke (50 random files from SpreadsheetBench)
 PYTHONPATH=src uv run python -m tests.benchmarks.vs_hucre \
-    --corpus testBench --sample 50 --parsers ks
+    --corpus data/corpora/spreadsheetbench --sample 50 --parsers ks
 
 # Robustness on full SpreadsheetBench (5,458 files, ~20 min)
 PYTHONPATH=src uv run python -m tests.benchmarks.vs_hucre \
diff --git a/tests/benchmarks/__init__.py b/tests/benchmarks/__init__.py
index 4e77399..4558721 100644
--- a/tests/benchmarks/__init__.py
+++ b/tests/benchmarks/__init__.py
@@ -2,11 +2,11 @@
 Local-only benchmark harness. Not part of the public test suite.
 
 Runs `ks-xlsx-parser` head-to-head against external parsers (currently `hucre`,
-a TypeScript zero-dependency spreadsheet I/O library) across the `testBench/`
-corpus and produces per-file perf + feature-coverage records.
+a TypeScript zero-dependency spreadsheet I/O library) across the
+SpreadsheetBench corpus and produces per-file perf + feature-coverage records.
 
 Not committed by default — reports and node_modules are git-ignored. Invoke
-via `python -m tests.benchmarks.vs_hucre --corpus testBench`.
+via `python -m tests.benchmarks.vs_hucre --corpus data/corpora/spreadsheetbench`.
 
 Pitfalls this harness is designed to avoid (read before editing):
 
diff --git a/tests/benchmarks/_driver.py b/tests/benchmarks/_driver.py
index dad231d..a66acd5 100644
--- a/tests/benchmarks/_driver.py
+++ b/tests/benchmarks/_driver.py
@@ -257,13 +257,7 @@ def generate_summary(out_dir: Path) -> None:
             continue
         try:
             rel = Path(r["file"]).resolve()
-            # Find segment after 'testBench/' or use file's parent name.
-            parts = rel.parts
-            if "testBench" in parts:
-                idx = parts.index("testBench")
-                sub = "/".join(parts[idx + 1: idx + 3]) if idx + 2 < len(parts) else parts[idx + 1]
-            else:
-                sub = rel.parent.name
+            sub = rel.parent.name
         except Exception:  # noqa: BLE001
             sub = "?"
         by_sub[(r["parser"], sub)].append(r["parse_time_ms"])
diff --git a/tests/benchmarks/vs_hucre.py b/tests/benchmarks/vs_hucre.py
index 24e0e50..44ca561 100644
--- a/tests/benchmarks/vs_hucre.py
+++ b/tests/benchmarks/vs_hucre.py
@@ -4,7 +4,7 @@
 Usage (from repo root, with venv active):
 
     python -m tests.benchmarks.vs_hucre \\
-        --corpus testBench \\
+        --corpus data/corpora/spreadsheetbench \\
         --out tests/benchmarks/reports \\
         [--subset real_world,enterprise] \\
         [--sample 50] \\
@@ -33,7 +33,7 @@
 
 def main(argv: list[str] | None = None) -> int:
     parser = argparse.ArgumentParser(description=__doc__.splitlines()[1] if __doc__ else "")
-    parser.add_argument("--corpus", type=Path, default=Path("testBench"),
+    parser.add_argument("--corpus", type=Path, default=Path("data/corpora/spreadsheetbench"),
                         help="Corpus directory containing .xlsx/.xlsm files.")
     parser.add_argument("--out", type=Path, default=Path("tests/benchmarks/reports"),
                         help="Root directory for reports; a timestamped subdir is created.")
diff --git a/tests/conftest.py b/tests/conftest.py
index 85d21a0..d422b9b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,14 +22,9 @@
 from openpyxl.worksheet.table import Table, TableStyleInfo
 
 # ---------------------------------------------------------------------------
-# All-xlsx-files collection for cross-validation and invariant tests
+# Programmatic fixture collection for cross-validation and invariant tests
 # ---------------------------------------------------------------------------
 
-_PROJECT_ROOT = Path(__file__).parent.parent
-_TESTBENCH_DIR = _PROJECT_ROOT / "testBench"
-_EXAMPLES_DIR = _TESTBENCH_DIR / "real_world"
-_DATASETS_DIR = _TESTBENCH_DIR / "github_datasets"
-
 # Names of conftest fixtures that produce .xlsx files
 PROGRAMMATIC_FIXTURE_NAMES = [
     "simple_workbook",
@@ -69,33 +64,12 @@
 ]
 
 
-def collect_static_xlsx_files() -> list[Path]:
-    """Collect all static .xlsx files from examples and github_datasets."""
-    files = []
-    for d in [_EXAMPLES_DIR, _DATASETS_DIR]:
-        if d.exists():
-            files.extend(sorted(d.glob("*.xlsx")))
-    return files
-
-
-STATIC_XLSX_FILES = collect_static_xlsx_files()
-
-
 @pytest.fixture(params=PROGRAMMATIC_FIXTURE_NAMES)
 def programmatic_xlsx(request, tmp_dir) -> Path:
     """Yields each programmatic fixture as a Path (re-uses other fixtures)."""
     return request.getfixturevalue(request.param)
 
 
-@pytest.fixture(
-    params=STATIC_XLSX_FILES,
-    ids=[f.stem for f in STATIC_XLSX_FILES],
-)
-def static_xlsx(request) -> Path:
-    """Yields each static .xlsx file path."""
-    return request.param
-
-
 @pytest.fixture
 def tmp_dir():
     """Provide a temporary directory for test workbooks."""
diff --git a/tests/test_cross_validation.py b/tests/test_cross_validation.py
deleted file mode 100644
index 94f73f1..0000000
--- a/tests/test_cross_validation.py
+++ /dev/null
@@ -1,334 +0,0 @@
-"""
-Cross-validation tests comparing parser output against python-calamine.
-
-Calamine is a Rust-based Excel reader, completely independent from openpyxl.
-These tests verify that our parser reads the same data that calamine does.
-"""
-
-
-
-import datetime
-
-import pytest
-
-from pipeline import parse_workbook
-
-from tests.helpers.calamine_reader import CalamineResult
-from tests.helpers.value_comparator import Mismatch, compare_cell_value, values_match
-
-
-# ---------------------------------------------------------------------------
-# Cross-validation on programmatic fixtures
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.crossval
-class TestSheetNamesCrossVal:
-    """Verify sheet names match between parser and calamine."""
-
-    def test_sheet_names_match(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-
-        parser_names = [s.sheet_name for s in parser_result.workbook.sheets]
-        assert parser_names == calamine.sheet_names, (
-            f"Sheet names differ:\n  parser:   {parser_names}\n"
-            f"  calamine: {calamine.sheet_names}"
-        )
-
-    def test_sheet_count_match(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-        assert len(parser_result.workbook.sheets) == len(calamine.sheet_names)
-
-
-@pytest.mark.crossval
-class TestCellValuesCrossVal:
-    """Verify cell values match between parser and calamine."""
-
-    def test_non_formula_values_match(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-        mismatches = _collect_mismatches(parser_result, calamine, formula_cells=False)
-        assert len(mismatches) == 0, (
-            f"{len(mismatches)} non-formula value mismatches:\n"
-            + _format_mismatches(mismatches[:10])
-        )
-
-    def test_formula_computed_values_match(self, programmatic_xlsx):
-        """For formula cells with cached values, parser's formula_value should
-        match calamine's computed value. Programmatic fixtures often have no
-        cached values, so we use a lenient threshold."""
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-        mismatches = _collect_mismatches(parser_result, calamine, formula_cells=True)
-
-        total_formulas = sum(
-            1 for s in parser_result.workbook.sheets
-            for c in s.cells.values()
-            if c.formula
-        )
-        # Allow up to 100% mismatch for programmatic fixtures (no cached values)
-        # This test is more meaningful for real-world files
-        if total_formulas > 0 and len(mismatches) > 0:
-            rate = len(mismatches) / total_formulas
-            # Only fail if we have actual cached values but they don't match
-            hard_mismatches = [
-                m for m in mismatches
-                if m.parser_value is not None and m.calamine_value is not None
-            ]
-            assert len(hard_mismatches) == 0, (
-                f"{len(hard_mismatches)} formula value mismatches "
-                f"(with cached values):\n"
-                + _format_mismatches(hard_mismatches[:10])
-            )
-
-
-@pytest.mark.crossval
-class TestDimensionsCrossVal:
-    """Verify dimensions roughly match between parser and calamine."""
-
-    def test_row_count_similar(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-
-        for sheet in parser_result.workbook.sheets:
-            cal_sheet = calamine.sheets.get(sheet.sheet_name)
-            if not cal_sheet or not sheet.used_range:
-                continue
-            parser_rows = sheet.used_range.row_count()
-            # calamine total_height is the total row count of the sheet
-            # For comparison, use the data area (start/end)
-            if cal_sheet.start is not None and cal_sheet.end is not None:
-                cal_rows = cal_sheet.end[0] - cal_sheet.start[0] + 1
-                # Allow ±2 row difference (calamine may include trailing empty rows)
-                assert abs(parser_rows - cal_rows) <= 2, (
-                    f"Sheet '{sheet.sheet_name}' row count: "
-                    f"parser={parser_rows}, calamine={cal_rows}"
-                )
-
-    def test_column_count_similar(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-
-        for sheet in parser_result.workbook.sheets:
-            cal_sheet = calamine.sheets.get(sheet.sheet_name)
-            if not cal_sheet or not sheet.used_range:
-                continue
-            parser_cols = sheet.used_range.col_count()
-            if cal_sheet.start is not None and cal_sheet.end is not None:
-                cal_cols = cal_sheet.end[1] - cal_sheet.start[1] + 1
-                assert abs(parser_cols - cal_cols) <= 2, (
-                    f"Sheet '{sheet.sheet_name}' col count: "
-                    f"parser={parser_cols}, calamine={cal_cols}"
-                )
-
-
-@pytest.mark.crossval
-class TestMergedRegionsCrossVal:
-    """Verify merged regions match between parser and calamine."""
-
-    def test_merged_region_count(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-
-        for sheet in parser_result.workbook.sheets:
-            cal_sheet = calamine.sheets.get(sheet.sheet_name)
-            if not cal_sheet or cal_sheet.merged_ranges is None:
-                continue
-            parser_count = len(sheet.merged_regions)
-            cal_count = len(cal_sheet.merged_ranges)
-            assert parser_count == cal_count, (
-                f"Sheet '{sheet.sheet_name}' merge count: "
-                f"parser={parser_count}, calamine={cal_count}"
-            )
-
-    def test_merged_region_ranges(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-
-        for sheet in parser_result.workbook.sheets:
-            cal_sheet = calamine.sheets.get(sheet.sheet_name)
-            if not cal_sheet or cal_sheet.merged_ranges is None:
-                continue
-            # Convert calamine ranges to comparable format
-            # calamine: ((start_row, start_col), (end_row, end_col)) 0-indexed
-            cal_ranges = set()
-            for (sr, sc), (er, ec) in cal_sheet.merged_ranges:
-                cal_ranges.add((sr + 1, sc + 1, er + 1, ec + 1))
-
-            parser_ranges = set()
-            for region in sheet.merged_regions:
-                parser_ranges.add((
-                    region.range.top_left.row,
-                    region.range.top_left.col,
-                    region.range.bottom_right.row,
-                    region.range.bottom_right.col,
-                ))
-
-            assert parser_ranges == cal_ranges, (
-                f"Sheet '{sheet.sheet_name}' merge ranges differ:\n"
-                f"  parser:   {sorted(parser_ranges)}\n"
-                f"  calamine: {sorted(cal_ranges)}"
-            )
-
-
-@pytest.mark.crossval
-class TestMismatchRateCrossVal:
-    """Overall mismatch rate must be below threshold."""
-
-    def test_overall_mismatch_rate(self, programmatic_xlsx):
-        parser_result = parse_workbook(path=programmatic_xlsx)
-        calamine = CalamineResult.from_path(programmatic_xlsx)
-        mismatches = _collect_mismatches(
-            parser_result, calamine, formula_cells=False
-        )
-        total_cells = sum(
-            s.cell_count() for s in parser_result.workbook.sheets
-        )
-        if total_cells > 0:
-            rate = len(mismatches) / total_cells
-            assert rate < 0.01, (
-                f"Mismatch rate {rate:.1%} ({len(mismatches)}/{total_cells}) "
-                f"exceeds 1% threshold"
-            )
-
-
-# ---------------------------------------------------------------------------
-# Cross-validation on static files (examples + github datasets)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.crossval
-class TestSheetNamesStatic:
-    def test_sheet_names_match(self, static_xlsx):
-        parser_result = parse_workbook(path=static_xlsx)
-        calamine = CalamineResult.from_path(static_xlsx)
-        parser_names = [s.sheet_name for s in parser_result.workbook.sheets]
-        assert parser_names == calamine.sheet_names
-
-
-@pytest.mark.crossval
-class TestCellValuesStatic:
-    def test_non_formula_values_match(self, static_xlsx):
-        parser_result = parse_workbook(path=static_xlsx)
-        calamine = CalamineResult.from_path(static_xlsx)
-        mismatches = _collect_mismatches(parser_result, calamine, formula_cells=False)
-        total_cells = sum(s.cell_count() for s in parser_result.workbook.sheets)
-        if total_cells > 0:
-            rate = len(mismatches) / total_cells
-            assert rate < 0.01, (
-                f"{static_xlsx.name}: {len(mismatches)}/{total_cells} "
-                f"({rate:.1%}) mismatches:\n"
-                + _format_mismatches(mismatches[:10])
-            )
-
-    def test_formula_cached_values_match(self, static_xlsx):
-        """For real-world files, formula cached values should match calamine.
-
-        Threshold: <5% mismatch overall. A handful of files with highly nested
-        dynamic-array or volatile formulas are known to exceed this because
-        openpyxl doesn't always surface the latest cached value Excel wrote —
-        we allow up to 15% for those, tracked in docs/PARSER_KNOWN_ISSUES.md.
-        """
-        known_loose_files = {
-            "Walbridge Coatings 8.9.23.xlsx",  # openpyxl cached-value gap
-        }
-        threshold = 0.15 if static_xlsx.name in known_loose_files else 0.05
-
-        parser_result = parse_workbook(path=static_xlsx)
-        calamine = CalamineResult.from_path(static_xlsx)
-        mismatches = _collect_mismatches(parser_result, calamine, formula_cells=True)
-        hard_mismatches = [
-            m for m in mismatches
-            if m.parser_value is not None and m.calamine_value is not None
-        ]
-        total_formulas = sum(
-            1 for s in parser_result.workbook.sheets
-            for c in s.cells.values()
-            if c.formula
-        )
-        if total_formulas > 0 and len(hard_mismatches) > 0:
-            rate = len(hard_mismatches) / total_formulas
-            assert rate < threshold, (
-                f"{static_xlsx.name}: {len(hard_mismatches)}/{total_formulas} "
-                f"formula mismatches ({rate:.1%}, threshold {threshold:.0%}):\n"
-                + _format_mismatches(hard_mismatches[:10])
-            )
-
-
-@pytest.mark.crossval
-class TestDimensionsStatic:
-    def test_dimensions_similar(self, static_xlsx):
-        parser_result = parse_workbook(path=static_xlsx)
-        calamine = CalamineResult.from_path(static_xlsx)
-        for sheet in parser_result.workbook.sheets:
-            cal_sheet = calamine.sheets.get(sheet.sheet_name)
-            if not cal_sheet or not sheet.used_range:
-                continue
-            if cal_sheet.start is not None and cal_sheet.end is not None:
-                parser_rows = sheet.used_range.row_count()
-                cal_rows = cal_sheet.end[0] - cal_sheet.start[0] + 1
-                # Allow ±5 for real-world files (empty trailing rows)
-                assert abs(parser_rows - cal_rows) <= 5, (
-                    f"{static_xlsx.name} sheet '{sheet.sheet_name}' rows: "
-                    f"parser={parser_rows}, calamine={cal_rows}"
-                )
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _collect_mismatches(
-    parser_result,
-    calamine: CalamineResult,
-    formula_cells: bool,
-) -> list[Mismatch]:
-    """Collect all mismatches between parser and calamine."""
-    mismatches = []
-    for sheet in parser_result.workbook.sheets:
-        cal_sheet = calamine.sheets.get(sheet.sheet_name)
-        if not cal_sheet:
-            continue
-
-        for cell in sheet.cells.values():
-            # Filter by formula/non-formula
-            if formula_cells and not cell.formula:
-                continue
-            if not formula_cells and cell.formula:
-                continue
-
-            # Skip merged slaves
-            if cell.is_merged_slave:
-                continue
-
-            cal_val = cal_sheet.get_value(cell.coord.row, cell.coord.col)
-
-            if not compare_cell_value(cell, cal_val):
-                parser_val = (
-                    cell.formula_value if cell.formula else cell.raw_value
-                )
-                mismatches.append(Mismatch(
-                    sheet=sheet.sheet_name,
-                    row=cell.coord.row,
-                    col=cell.coord.col,
-                    a1_ref=cell.a1_ref,
-                    parser_value=parser_val,
-                    calamine_value=cal_val,
-                    category="formula" if cell.formula else "value",
-                ))
-
-    return mismatches
-
-
-def _format_mismatches(mismatches: list[Mismatch]) -> str:
-    """Format mismatch list for error messages."""
-    lines = []
-    for m in mismatches:
-        lines.append(
-            f"  {m.a1_ref}: parser={m.parser_value!r} ({type(m.parser_value).__name__}) "
-            f"vs calamine={m.calamine_value!r} ({type(m.calamine_value).__name__})"
-        )
-    return "\n".join(lines)
diff --git a/tests/test_enterprise_scoring.py b/tests/test_enterprise_scoring.py
deleted file mode 100644
index 55b5cb4..0000000
--- a/tests/test_enterprise_scoring.py
+++ /dev/null
@@ -1,149 +0,0 @@
-"""Enterprise-focused scoring of parser output on synthetic fixtures.
-
-These tests provide lightweight, deterministic benchmarks that run without
-network access. They exercise formulas, tables, cross-sheet references,
-named ranges, hidden sheets, and simple calculation lineage.
-"""
-
-
-
-import json
-from pathlib import Path
-
-import pytest
-
-from ks_xlsx_parser import parse_workbook
-
-from scripts.generate_enterprise_fixtures import generate_all
-
-
-ROOT = Path(__file__).resolve().parents[1]
-FIXTURE_DIR = ROOT / "testBench" / "enterprise"
-
-
-@pytest.fixture(scope="session")
-def enterprise_workbooks() -> list[Path]:
-    """Generate (or reuse) enterprise fixtures and return their paths."""
-    return generate_all()
-
-
-class EnterpriseScorecard:
-    def __init__(self, parse_result, expected_metadata=None):
-        self.result = parse_result
-        self.expected = expected_metadata or {}
-
-    def formula_fidelity(self) -> float:
-        workbook = self.result.workbook
-        extracted = 0
-        total = 0
-        for sheet in workbook.sheets:
-            for cell in sheet.cells.values():
-                if cell.formula:
-                    total += 1
-                    if cell.formula_value is not None or cell.raw_value is not None:
-                        extracted += 1
-        return extracted / total if total else 0.0
-
-    def table_detection_f1(self) -> float:
-        detected = len(self.result.workbook.tables)
-        expected = self.expected.get("expected_tables", detected)
-        if expected == 0 and detected == 0:
-            return 1.0
-        precision = detected / max(detected, 1)
-        recall = detected / max(expected, 1)
-        return 2 * (precision * recall) / (precision + recall + 1e-10)
-
-    def lineage_accuracy(self) -> float:
-        graph = self.result.workbook.dependency_graph
-        edges = len(graph.edges)
-        cycles = 0  # DependencyGraph does not expose cycles directly
-        accuracy = 1.0 - (cycles / (edges + 1)) * 0.1
-        return max(accuracy, 0.0)
-
-    def chunk_quality(self) -> float:
-        chunks = self.result.chunks
-        tokens = [c.token_count for c in chunks]
-        if not tokens:
-            return 0.0
-        mean_tokens = sum(tokens) / len(tokens)
-        variance = sum((t - mean_tokens) ** 2 for t in tokens) / len(tokens)
-        std_dev = variance ** 0.5
-        cv = std_dev / (mean_tokens + 1e-10)
-        return max(1.0 - cv, 0.0)
-
-    def layout_recovery(self) -> float:
-        blocks_by_type = {}
-        for chunk in self.result.chunks:
-            blocks_by_type[chunk.block_type] = blocks_by_type.get(chunk.block_type, 0) + 1
-        type_count = len(blocks_by_type)
-        return min(type_count / 3.0, 1.0)
-
-    def composite_score(self):
-        weights = {
-            "formula_fidelity": 0.25,
-            "table_detection": 0.20,
-            "lineage_accuracy": 0.20,
-            "chunk_quality": 0.20,
-            "layout_recovery": 0.15,
-        }
-        scores = {
-            "formula_fidelity": self.formula_fidelity(),
-            "table_detection": self.table_detection_f1(),
-            "lineage_accuracy": self.lineage_accuracy(),
-            "chunk_quality": self.chunk_quality(),
-            "layout_recovery": self.layout_recovery(),
-        }
-        composite = sum(scores[k] * weights[k] for k in weights)
-        return scores, composite
-
-    def metrics(self):
-        scores, composite = self.composite_score()
-        scores["composite"] = composite
-        return scores
-
-
-@pytest.mark.enterprise
-@pytest.mark.parametrize(
-    "filename,expected",
-    [
-        ("financial_model.xlsx", {"expected_tables": 0, "expected_formulas": 2}),
-        ("inventory_tracker.xlsx", {"expected_tables": 0, "expected_formulas": 100}),
-        ("forecast_model.xlsx", {"expected_tables": 0, "expected_formulas": 24}),
-        ("operations_tracker.xlsx", {"expected_tables": 0, "expected_formulas": 20}),
-    ],
-)
-def test_enterprise_scorecard(enterprise_workbooks, filename, expected):
-    path = FIXTURE_DIR / filename
-    assert path.exists(), f"Fixture missing: {path}"
-
-    result = parse_workbook(path=path)
-    scorecard = EnterpriseScorecard(result, expected_metadata=expected)
-    scores, composite = scorecard.composite_score()
-
-    metrics_dir = ROOT / "metrics" / "corpus"
-    metrics_dir.mkdir(parents=True, exist_ok=True)
-    with open(metrics_dir / f"{path.stem}_scorecard.json", "w") as f:
-        json.dump(scorecard.metrics(), f, indent=2)
-
-    print(scorecard.metrics())
-    assert composite >= 0.45, f"Composite {composite:.2%} too low for {filename}"
-
-
-@pytest.mark.enterprise
-def test_enterprise_summary(enterprise_workbooks):
-    paths = enterprise_workbooks
-    results = []
-    for p in paths:
-        result = parse_workbook(path=p)
-        scorecard = EnterpriseScorecard(result)
-        scores = scorecard.metrics()
-        scores["file"] = p.name
-        results.append(scores)
-
-    metrics_dir = ROOT / "metrics"
-    metrics_dir.mkdir(parents=True, exist_ok=True)
-    summary_path = metrics_dir / "corpus_summary.json"
-    with open(summary_path, "w") as f:
-        json.dump({"files": results}, f, indent=2)
-
-    assert len(results) == len(paths)
diff --git a/tests/test_real_world_datasets.py b/tests/test_real_world_datasets.py
deleted file mode 100644
index d10905a..0000000
--- a/tests/test_real_world_datasets.py
+++ /dev/null
@@ -1,433 +0,0 @@
-"""
-Tests against real-world Excel datasets from GitHub.
-
-Source: https://github.com/rohanmistry231/Practice-Datasets-for-Excel
-
-Validates that the parser produces correct, complete JSON output for
-a variety of public datasets covering different shapes, sizes, and
-content types (numeric, text, dates, mixed).
-"""
-
-
-
-import json
-from pathlib import Path
-
-import pytest
-
-from chunking.segmenter import LayoutSegmenter
-from models import BlockType
-from parsers import WorkbookParser
-from pipeline import parse_workbook
-from storage.serializer import WorkbookSerializer
-
-
-FIXTURES_DIR = Path(__file__).parent.parent / "testBench" / "github_datasets"
-
-# Each entry: (filename, expected_sheets, expected_min_rows, expected_header_sample)
-DATASET_CATALOG = [
-    ("iris.xlsx", 1, 150, ["sepal_length", "sepal_width", "petal_length"]),
-    ("titanic.xlsx", 1, 891, ["PassengerId", "Survived", "Pclass"]),
-    ("boston.xlsx", 1, 506, ["CRIM", "ZN", "INDUS"]),
-    ("world_happiness_2019.xlsx", 1, 156, ["Overall rank", "Country or region", "Score"]),
-    ("bestsellers.xlsx", 1, 550, ["Name", "Author", "User Rating"]),
-    ("superstore.xlsx", 3, 1952, ["Row ID", "Order Priority", "Discount"]),
-    ("worldcups.xlsx", 1, 20, ["Year", "Country", "Winner"]),
-    ("breast_cancer.xlsx", 1, 569, ["id", "diagnosis", "radius_mean"]),
-    ("apple_stock.xlsx", 1, 10016, ["Date", "Open", "High"]),
-    ("winequality_red.xlsx", 1, 1599, None),  # semicolon-separated header, skip header check
-]
-
-
-def _fixture_path(name: str) -> Path:
-    return FIXTURES_DIR / name
-
-
-# ---------------------------------------------------------------------------
-# Parametrized: every dataset parses without error
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.parametrize(
-    "filename,expected_sheets,expected_min_rows,expected_headers",
-    DATASET_CATALOG,
-    ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG],
-)
-class TestDatasetParsing:
-    """Core parsing validation across all datasets."""
-
-    def test_parses_without_error(self, filename, expected_sheets, expected_min_rows, expected_headers):
-        """Parser completes without raising an exception."""
-        result = parse_workbook(path=_fixture_path(filename))
-        assert result.workbook is not None
-
-    def test_correct_sheet_count(self, filename, expected_sheets, expected_min_rows, expected_headers):
-        """Workbook has the expected number of sheets."""
-        result = parse_workbook(path=_fixture_path(filename))
-        assert len(result.workbook.sheets) == expected_sheets
-
-    def test_minimum_data_rows(self, filename, expected_sheets, expected_min_rows, expected_headers):
-        """First sheet has at least the expected number of data rows."""
-        result = parse_workbook(path=_fixture_path(filename))
-        sheet = result.workbook.sheets[0]
-        if sheet.used_range:
-            data_rows = sheet.used_range.row_count() - 1  # minus header row
-            assert data_rows >= expected_min_rows
-
-    def test_headers_detected(self, filename, expected_sheets, expected_min_rows, expected_headers):
-        """First row contains the expected column headers."""
-        if expected_headers is None:
-            pytest.skip("Header check skipped for this dataset")
-        result = parse_workbook(path=_fixture_path(filename))
-        sheet = result.workbook.sheets[0]
-        first_row = sheet.used_range.top_left.row
-        actual_headers = []
-        for col in range(sheet.used_range.top_left.col, sheet.used_range.bottom_right.col + 1):
-            cell = sheet.get_cell(first_row, col)
-            if cell and cell.raw_value is not None:
-                actual_headers.append(str(cell.raw_value))
-        for expected in expected_headers:
-            assert expected in actual_headers, (
-                f"Expected header '{expected}' not found in {actual_headers[:10]}"
-            )
-
-    def test_produces_chunks(self, filename, expected_sheets, expected_min_rows, expected_headers):
-        """Pipeline produces at least one chunk per sheet."""
-        result = parse_workbook(path=_fixture_path(filename))
-        assert result.total_chunks >= expected_sheets
-
-
-# ---------------------------------------------------------------------------
-# JSON serialization
-# ---------------------------------------------------------------------------
-
-
-class TestJsonSerialization:
-    """Verify JSON output is valid, complete, and contains expected fields."""
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_to_json_valid(self, filename):
-        """to_json() returns a dict that round-trips through json.dumps/loads."""
-        result = parse_workbook(path=_fixture_path(filename))
-        data = result.to_json()
-        json_str = json.dumps(data)
-        roundtripped = json.loads(json_str)
-        assert roundtripped["total_chunks"] == result.total_chunks
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_to_json_has_required_keys(self, filename):
-        """JSON output contains all required top-level keys."""
-        result = parse_workbook(path=_fixture_path(filename))
-        data = result.to_json()
-        assert "workbook" in data
-        assert "chunks" in data
-        assert "total_chunks" in data
-        assert "total_tokens" in data
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_workbook_metadata_in_json(self, filename):
-        """Workbook section has all required metadata fields."""
-        result = parse_workbook(path=_fixture_path(filename))
-        wb_json = result.to_json()["workbook"]
-        assert wb_json["workbook_id"]
-        assert wb_json["filename"]
-        assert wb_json["workbook_hash"]
-        assert isinstance(wb_json["total_sheets"], int)
-        assert isinstance(wb_json["total_cells"], int)
-        assert isinstance(wb_json["errors"], list)
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_chunk_json_has_required_keys(self, filename):
-        """Each chunk in JSON has all required fields."""
-        result = parse_workbook(path=_fixture_path(filename))
-        for chunk in result.to_json()["chunks"]:
-            assert "chunk_id" in chunk
-            assert "source_uri" in chunk
-            assert "sheet_name" in chunk
-            assert "block_type" in chunk
-            assert "top_left" in chunk
-            assert "bottom_right" in chunk
-            assert "render_text" in chunk
-            assert chunk["render_text"]  # not empty
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_chunk_render_text_contains_data(self, filename):
-        """Rendered text in chunks contains actual cell data, not just structure."""
-        result = parse_workbook(path=_fixture_path(filename))
-        sheet = result.workbook.sheets[0]
-        # Get a data value from the sheet (short values to avoid semicolon-delimited lines)
-        if sheet.used_range:
-            first_data_row = sheet.used_range.top_left.row + 1
-            for col in range(sheet.used_range.top_left.col, sheet.used_range.bottom_right.col + 1):
-                cell = sheet.get_cell(first_data_row, col)
-                if cell and cell.display_value and 2 < len(str(cell.display_value)) <= 30:
-                    # At least one chunk should contain this value
-                    found = any(
-                        str(cell.display_value) in c.render_text
-                        for c in result.chunks
-                    )
-                    assert found, f"Value '{cell.display_value}' not found in any chunk render_text"
-                    return
-        pytest.skip("No suitable data value found to check")
-
-
-# ---------------------------------------------------------------------------
-# Serializer records (Postgres-ready)
-# ---------------------------------------------------------------------------
-
-
-class TestSerializerRecords:
-    """Verify WorkbookSerializer produces valid storage records."""
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_workbook_record(self, filename):
-        """Workbook record has all required fields for Postgres."""
-        result = parse_workbook(path=_fixture_path(filename))
-        serializer = WorkbookSerializer(result.workbook, result.chunks)
-        rec = serializer.to_workbook_record()
-        assert rec["id"]
-        assert rec["file_hash"]
-        assert rec["filename"]
-        assert isinstance(rec["total_sheets"], int)
-        assert isinstance(rec["total_cells"], int)
-        # Ensure JSON-serializable
-        json.dumps(rec)
-
-    @pytest.mark.parametrize(
-        "filename,expected_sheets",
-        [(d[0], d[1]) for d in DATASET_CATALOG],
-        ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG],
-    )
-    def test_sheet_records_count(self, filename, expected_sheets):
-        """Correct number of sheet records produced."""
-        result = parse_workbook(path=_fixture_path(filename))
-        serializer = WorkbookSerializer(result.workbook, result.chunks)
-        sheets = serializer.to_sheet_records()
-        assert len(sheets) == expected_sheets
-        for s in sheets:
-            assert s["sheet_name"]
-            assert s["workbook_id"]
-            json.dumps(s)
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_chunk_records(self, filename):
-        """Chunk records are valid and JSON-serializable."""
-        result = parse_workbook(path=_fixture_path(filename))
-        serializer = WorkbookSerializer(result.workbook, result.chunks)
-        chunks = serializer.to_chunk_records()
-        assert len(chunks) >= 1
-        for c in chunks:
-            assert c["id"]
-            assert c["sheet_name"]
-            assert c["block_type"]
-            assert c["render_text"]
-            json.dumps(c)
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_vector_store_entries(self, filename):
-        """Vector store entries have text and metadata for embedding."""
-        result = parse_workbook(path=_fixture_path(filename))
-        serializer = WorkbookSerializer(result.workbook, result.chunks)
-        entries = serializer.to_vector_store_entries()
-        assert len(entries) >= 1
-        for e in entries:
-            assert e["id"]
-            assert e["text"]
-            assert e["metadata"]["workbook_hash"]
-            assert e["metadata"]["sheet_name"]
-            assert e["metadata"]["source_uri"]
-            json.dumps(e)
-
-
-# ---------------------------------------------------------------------------
-# Layout detection on real data
-# ---------------------------------------------------------------------------
-
-
-class TestRealWorldLayout:
-    """Verify layout segmentation works correctly on real datasets."""
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_blocks_have_valid_ranges(self, filename):
-        """All detected blocks have non-degenerate cell ranges."""
-        result = WorkbookParser(path=_fixture_path(filename)).parse()
-        for sheet in result.sheets:
-            tables = [t for t in result.tables if t.sheet_name == sheet.sheet_name]
-            segmenter = LayoutSegmenter(sheet, tables=tables)
-            blocks = segmenter.segment()
-            for block in blocks:
-                assert block.cell_range is not None
-                assert block.cell_range.row_count() >= 1
-                assert block.cell_range.col_count() >= 1
-                assert block.cell_count > 0
-
-    @pytest.mark.parametrize("filename", [d[0] for d in DATASET_CATALOG],
-                             ids=[d[0].replace(".xlsx", "") for d in DATASET_CATALOG])
-    def test_blocks_have_valid_types(self, filename):
-        """All block types are valid BlockType enum values."""
-        result = WorkbookParser(path=_fixture_path(filename)).parse()
-        for sheet in result.sheets:
-            tables = [t for t in result.tables if t.sheet_name == sheet.sheet_name]
-            segmenter = LayoutSegmenter(sheet, tables=tables)
-            blocks = segmenter.segment()
-            valid_types = set(BlockType)
-            for block in blocks:
-                assert block.block_type in valid_types
-
-    def test_superstore_multi_sheet_layout(self):
-        """SuperStore has 3 sheets, each producing at least one block."""
-        result = WorkbookParser(path=_fixture_path("superstore.xlsx")).parse()
-        assert len(result.sheets) == 3
-        for sheet in result.sheets:
-            tables = [t for t in result.tables if t.sheet_name == sheet.sheet_name]
-            segmenter = LayoutSegmenter(sheet, tables=tables)
-            blocks = segmenter.segment()
-            assert len(blocks) >= 1, f"Sheet '{sheet.sheet_name}' has no blocks"
-
-    def test_world_happiness_has_table(self):
-        """World Happiness dataset has an Excel ListObject table."""
-        result = WorkbookParser(path=_fixture_path("world_happiness_2019.xlsx")).parse()
-        assert len(result.tables) >= 1
-        table = result.tables[0]
-        assert table.table_name
-        assert table.ref_range is not None
-
-
-# ---------------------------------------------------------------------------
-# Determinism on real data
-# ---------------------------------------------------------------------------
-
-
-class TestRealWorldDeterminism:
-    """Parsing the same file twice produces identical output."""
-
-    @pytest.mark.parametrize("filename", ["iris.xlsx", "worldcups.xlsx", "bestsellers.xlsx"],
-                             ids=["iris", "worldcups", "bestsellers"])
-    def test_deterministic_json(self, filename):
-        """Two parses of the same file produce identical JSON (excluding timing)."""
-        r1 = parse_workbook(path=_fixture_path(filename))
-        r2 = parse_workbook(path=_fixture_path(filename))
-        j1 = r1.to_json()
-        j2 = r2.to_json()
-        # parse_duration_ms varies between runs; exclude from comparison
-        j1["workbook"]["parse_duration_ms"] = 0
-        j2["workbook"]["parse_duration_ms"] = 0
-        assert json.dumps(j1, sort_keys=True) == json.dumps(j2, sort_keys=True)
-
-    @pytest.mark.parametrize("filename", ["iris.xlsx", "worldcups.xlsx", "bestsellers.xlsx"],
-                             ids=["iris", "worldcups", "bestsellers"])
-    def test_deterministic_hashes(self, filename):
-        """Chunk IDs and content hashes are stable across runs."""
-        r1 = parse_workbook(path=_fixture_path(filename))
-        r2 = parse_workbook(path=_fixture_path(filename))
-        assert r1.total_chunks == r2.total_chunks
-        for c1, c2 in zip(r1.chunks, r2.chunks):
-            assert c1.chunk_id == c2.chunk_id
-            assert c1.content_hash == c2.content_hash
-
-
-# ---------------------------------------------------------------------------
-# Specific dataset content validation
-# ---------------------------------------------------------------------------
-
-
-class TestDatasetContent:
-    """Spot-check specific known values in well-known datasets."""
-
-    def test_iris_species_values(self):
-        """Iris dataset contains known species names."""
-        result = parse_workbook(path=_fixture_path("iris.xlsx"))
-        sheet = result.workbook.sheets[0]
-        species_col = None
-        # Find the species column
-        for col in range(1, 20):
-            cell = sheet.get_cell(1, col)
-            if cell and cell.raw_value == "species":
-                species_col = col
-                break
-        assert species_col is not None, "species column not found"
-        # Check known species
-        species_values = set()
-        for row in range(2, 152):
-            cell = sheet.get_cell(row, species_col)
-            if cell and cell.raw_value:
-                species_values.add(cell.raw_value)
-        assert "setosa" in species_values
-        assert "versicolor" in species_values
-        assert "virginica" in species_values
-
-    def test_worldcups_has_known_winners(self):
-        """WorldCups dataset contains known World Cup winners."""
-        result = parse_workbook(path=_fixture_path("worldcups.xlsx"))
-        sheet = result.workbook.sheets[0]
-        winner_col = None
-        for col in range(1, 20):
-            cell = sheet.get_cell(1, col)
-            if cell and cell.raw_value == "Winner":
-                winner_col = col
-                break
-        assert winner_col is not None, "Winner column not found"
-        winners = set()
-        for row in range(2, 25):
-            cell = sheet.get_cell(row, winner_col)
-            if cell and cell.raw_value:
-                winners.add(cell.raw_value)
-        assert "Brazil" in winners
-        assert "Germany" in winners
-
-    def test_titanic_numeric_columns(self):
-        """Titanic dataset has numeric columns (Survived, Pclass, Age)."""
-        result = parse_workbook(path=_fixture_path("titanic.xlsx"))
-        sheet = result.workbook.sheets[0]
-        # Check Survived column has 0/1 values
-        survived_col = None
-        for col in range(1, 30):
-            cell = sheet.get_cell(1, col)
-            if cell and cell.raw_value == "Survived":
-                survived_col = col
-                break
-        assert survived_col is not None
-        cell_val = sheet.get_cell(2, survived_col)
-        assert cell_val is not None
-        assert cell_val.raw_value in (0, 1, 0.0, 1.0)
-
-    def test_apple_stock_date_column(self):
-        """Apple stock dataset has a Date column with date values."""
-        result = parse_workbook(path=_fixture_path("apple_stock.xlsx"))
-        sheet = result.workbook.sheets[0]
-        date_col = None
-        for col in range(1, 10):
-            cell = sheet.get_cell(1, col)
-            if cell and cell.raw_value == "Date":
-                date_col = col
-                break
-        assert date_col is not None
-        # Check that at least one date cell has a date-like display value
-        date_cell = sheet.get_cell(2, date_col)
-        assert date_cell is not None
-        assert date_cell.display_value is not None
-
-    def test_superstore_multiple_sheets_content(self):
-        """SuperStore has Orders, Returns, and Users sheets with distinct content."""
-        result = parse_workbook(path=_fixture_path("superstore.xlsx"))
-        sheet_names = {s.sheet_name for s in result.workbook.sheets}
-        assert "Orders" in sheet_names
-        assert "Returns" in sheet_names
-        assert "Users" in sheet_names
-
-        # Orders sheet should be large
-        orders = next(s for s in result.workbook.sheets if s.sheet_name == "Orders")
-        assert orders.cell_count() > 40000
-
-        # Users sheet should be small
-        users = next(s for s in result.workbook.sheets if s.sheet_name == "Users")
-        assert users.cell_count() <= 20
diff --git a/tests/test_structural_invariants.py b/tests/test_structural_invariants.py
index f11a467..612d11a 100644
--- a/tests/test_structural_invariants.py
+++ b/tests/test_structural_invariants.py
@@ -278,29 +278,3 @@ def test_sheet_ids_populated(self, programmatic_xlsx):
             )
 
 
-# ---------------------------------------------------------------------------
-# Same invariants on static files (examples + github datasets)
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.invariant
-class TestAllInvariantsStatic:
-    """Run full invariant checker against each static xlsx file."""
-
-    def test_all_invariants_pass(self, static_xlsx):
-        result = parse_workbook(path=static_xlsx)
-        violations = check_invariants(result.workbook)
-        assert len(violations) == 0, (
-            f"{len(violations)} violations in {static_xlsx.name}:\n"
-            + "\n".join(violations[:10])
-        )
-
-    def test_deterministic_hashes(self, static_xlsx):
-        r1 = parse_workbook(path=static_xlsx)
-        r2 = parse_workbook(path=static_xlsx)
-        assert r1.workbook.workbook_hash == r2.workbook.workbook_hash
-
-    def test_json_serializable(self, static_xlsx):
-        result = parse_workbook(path=static_xlsx)
-        data = result.to_json()
-        json.dumps(data)  # must not raise
diff --git a/tests/test_testbench_roundtrip.py b/tests/test_testbench_roundtrip.py
deleted file mode 100644
index bfa4fd7..0000000
--- a/tests/test_testbench_roundtrip.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-testBench round-trip tests.
-
-Parses every .xlsx under ``testBench/`` and asserts:
-
-* ``parse_workbook()`` returns without raising.
-* ``result.to_json()`` produces non-empty JSON (> 100 bytes).
-* ``result.workbook`` has at least one sheet.
-
-Failures are collected into ``metrics/testbench/failures.json`` so parser
-regressions across the whole bench are easy to diff.
-
-Runs under the ``testbench`` marker only (skipped by default). Invoke with:
-
-    pytest tests/test_testbench_roundtrip.py -m testbench -q
-    make testbench   # convenience wrapper
-"""
-
-
-import json
-import os
-import traceback
-from pathlib import Path
-
-import pytest
-
-from ks_xlsx_parser import parse_workbook
-
-ROOT = Path(__file__).resolve().parent.parent
-TESTBENCH_DIR = ROOT / "testBench"
-METRICS_DIR = ROOT / "metrics" / "testbench"
-FAILURES_PATH = METRICS_DIR / "failures.json"
-FAILURES_JSONL = METRICS_DIR / "failures.jsonl"  # append-only, xdist-safe
-
-
-def _collect_files() -> list[Path]:
-    if not TESTBENCH_DIR.exists():
-        return []
-    return sorted(TESTBENCH_DIR.rglob("*.xlsx"))
-
-
-ALL_FILES = _collect_files()
-
-pytestmark = [pytest.mark.testbench, pytest.mark.timeout(60)]
-
-
-def _record_failure(entry: dict) -> None:
-    """Append one failure row to the JSONL log. Safe under xdist parallelism."""
-    METRICS_DIR.mkdir(parents=True, exist_ok=True)
-    entry["worker"] = os.environ.get("PYTEST_XDIST_WORKER", "main")
-    with FAILURES_JSONL.open("a", encoding="utf-8") as f:
-        f.write(json.dumps(entry) + "\n")
-
-
-@pytest.fixture(scope="session", autouse=True)
-def _reset_log():
-    """Reset the append log at the start of the session (master worker only)."""
-    # Under xdist, PYTEST_XDIST_WORKER is set for workers but not the master.
-    # The master is responsible for cleanup before workers start writing.
-    if os.environ.get("PYTEST_XDIST_WORKER") is None:
-        METRICS_DIR.mkdir(parents=True, exist_ok=True)
-        if FAILURES_JSONL.exists():
-            FAILURES_JSONL.unlink()
-    yield
-    # After session, aggregate JSONL → JSON summary (master only)
-    if os.environ.get("PYTEST_XDIST_WORKER") is None:
-        failures: list[dict] = []
-        if FAILURES_JSONL.exists():
-            for line in FAILURES_JSONL.read_text().splitlines():
-                if line.strip():
-                    failures.append(json.loads(line))
-        FAILURES_PATH.write_text(
-            json.dumps(
-                {"total": len(ALL_FILES), "failure_count": len(failures), "failures": failures},
-                indent=2,
-            )
-        )
-
-
-def _relpath(p: Path) -> str:
-    return str(p.relative_to(ROOT))
-
-
-@pytest.mark.parametrize("path", ALL_FILES, ids=lambda p: _relpath(p))
-def test_parse_roundtrip(path: Path):
-    """Each workbook must parse, serialize to JSON, and report ≥1 sheet."""
-    try:
-        result = parse_workbook(path=path)
-    except Exception as exc:
-        _record_failure({
-            "file": _relpath(path),
-            "stage": "parse",
-            "error": f"{type(exc).__name__}: {exc}",
-            "traceback": traceback.format_exc(limit=5),
-        })
-        raise
-
-    assert result.workbook is not None, f"no workbook DTO for {path}"
-    assert result.workbook.total_sheets >= 1, f"{path} reports zero sheets"
-
-    try:
-        js = result.to_json()
-    except Exception as exc:
-        _record_failure({
-            "file": _relpath(path),
-            "stage": "to_json",
-            "error": f"{type(exc).__name__}: {exc}",
-            "traceback": traceback.format_exc(limit=5),
-        })
-        raise
-
-    assert isinstance(js, dict), f"to_json returned non-dict for {path}"
-    assert "workbook" in js, f"to_json result missing 'workbook' key for {path}"
-    try:
-        encoded = json.dumps(js, default=str)
-    except Exception as exc:
-        _record_failure({
-            "file": _relpath(path),
-            "stage": "json_encode",
-            "error": f"{type(exc).__name__}: {exc}",
-            "traceback": traceback.format_exc(limit=5),
-        })
-        raise
-    assert len(encoded) > 100, f"encoded JSON suspiciously short ({len(encoded)} chars) for {path}"
-
-
-def test_testbench_has_files():
-    """Guard against an empty testBench (e.g. missing dataset zip)."""
-    assert ALL_FILES, (
-        f"No .xlsx files found under {TESTBENCH_DIR}. "
-        "Run `make testbench-build` or download the dataset zip from the GitHub release."
-    )