From 253e43e9a5d52ef9eeaa5270b5abc90029fab890 Mon Sep 17 00:00:00 2001
From: Michael Sitarzewski <msitarzewski@users.noreply.github.com>
Date: Wed, 18 Feb 2026 00:17:02 -0600
Subject: [PATCH] =?UTF-8?q?Epistemic=20confidence=20Phase=20A=20=E2=80=94?=
 =?UTF-8?q?=20honest=20confidence=20scoring?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Confidence now reflects inherent uncertainty of the question domain,
not just challenge quality. Rigor (renamed from old confidence) measures
challenge genuineness [0.5–1.0]; confidence = min(domain_cap, rigor)
where domain caps are factual=0.95, technical=0.90, creative=0.85,
judgment=0.80, strategic=0.70. Adds calibration module (ECE metric),
duh calibration CLI, GET /api/calibration endpoint, and calibration
dashboard in web UI. Full-stack propagation of rigor field across ORM,
handlers, CLI, API, WebSocket, MCP, and frontend (47 source files +
5 memory-bank files). 1586 Python + 126 Vitest = 1712 tests passing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 memory-bank/activeContext.md                  | 148 ++---
 memory-bank/decisions.md                      |  32 +-
 memory-bank/progress.md                       |  21 +-
 memory-bank/tasks/2026-02/README.md           |  37 ++
 memory-bank/toc.md                            |   4 +-
 src/duh/api/routes/ask.py                     |  15 +-
 src/duh/api/routes/crud.py                    |  68 ++
 src/duh/api/routes/threads.py                 |   3 +
 src/duh/api/routes/ws.py                      |   5 +-
 src/duh/calibration.py                        | 150 +++++
 src/duh/cli/app.py                            | 608 ++++++++++++++----
 src/duh/cli/display.py                        |  18 +-
 src/duh/consensus/handlers.py                 |  40 +-
 src/duh/consensus/machine.py                  |   4 +
 src/duh/consensus/scheduler.py                |  14 +-
 src/duh/consensus/synthesis.py                |   7 +
 src/duh/consensus/voting.py                   |   2 +
 src/duh/mcp/server.py                         |   7 +-
 src/duh/memory/context.py                     |   2 +-
 src/duh/memory/migrations.py                  |  34 +
 src/duh/memory/models.py                      |   1 +
 src/duh/memory/repository.py                  |   2 +
 tests/integration/test_consensus_loop.py      |   4 +-
 tests/sycophancy/test_confidence_impact.py    |  28 +-
 tests/sycophancy/test_known_flaws.py          |   4 +-
 tests/unit/test_api_crud.py                   |  91 +++
 tests/unit/test_api_ws.py                     |   5 +-
 tests/unit/test_calibration.py                | 162 +++++
 tests/unit/test_cli.py                        |  10 +-
 tests/unit/test_cli_batch.py                  |  16 +-
 tests/unit/test_cli_calibration.py            | 154 +++++
 tests/unit/test_cli_decompose.py              |   2 +-
 tests/unit/test_cli_display.py                |  19 +-
 tests/unit/test_cli_tools.py                  |   4 +-
 tests/unit/test_cli_voting.py                 |   4 +-
 tests/unit/test_commit_handler.py             |  34 +-
 tests/unit/test_confidence_scoring.py         | 163 +++++
 tests/unit/test_context_builder.py            |   4 +-
 tests/unit/test_mcp_server.py                 |   4 +-
 tests/unit/test_scheduler.py                  |   3 +-
 web/src/App.tsx                               |   2 +
 .../__tests__/consensus-components.test.tsx   |   7 +
 web/src/__tests__/stores.test.ts              |  83 +++
 web/src/api/client.ts                         |  16 +
 web/src/api/types.ts                          |  27 +
 .../calibration/CalibrationDashboard.tsx      | 243 +++++++
 web/src/components/calibration/index.ts       |   1 +
 .../components/consensus/ConfidenceMeter.tsx  |   8 +-
 .../consensus/ConsensusComplete.tsx           |  13 +-
 .../components/consensus/ConsensusPanel.tsx   |   4 +-
 .../decision-space/DecisionCloud.tsx          |   1 +
 web/src/components/layout/Sidebar.tsx         |   1 +
 web/src/components/shared/ExportMenu.tsx      |   6 +-
 web/src/components/threads/ThreadDetail.tsx   |  14 +-
 web/src/components/threads/TurnCard.tsx       |   5 +-
 web/src/pages/CalibrationPage.tsx             |  12 +
 web/src/pages/index.ts                        |   1 +
 web/src/stores/calibration.ts                 |  57 ++
 web/src/stores/consensus.ts                   |   8 +
 web/src/stores/index.ts                       |   1 +
 web/tsconfig.tsbuildinfo                      |   2 +-
 61 files changed, 2118 insertions(+), 327 deletions(-)
 create mode 100644 src/duh/calibration.py
 create mode 100644 src/duh/memory/migrations.py
 create mode 100644 tests/unit/test_calibration.py
 create mode 100644 tests/unit/test_cli_calibration.py
 create mode 100644 tests/unit/test_confidence_scoring.py
 create mode 100644 web/src/components/calibration/CalibrationDashboard.tsx
 create mode 100644 web/src/components/calibration/index.ts
 create mode 100644 web/src/pages/CalibrationPage.tsx
 create mode 100644 web/src/stores/calibration.ts

diff --git a/memory-bank/activeContext.md b/memory-bank/activeContext.md
index 8d150a2..040cda1 100644
--- a/memory-bank/activeContext.md
+++ b/memory-bank/activeContext.md
@@ -1,104 +1,68 @@
 # Active Context
 
-**Last Updated**: 2026-02-17
-**Current Phase**: v0.5 + Export Feature
-**Next Action**: Merge v0.5.0 to main. Export to Markdown & PDF feature implemented.
-
-## Next Task: Model Selection Controls + Provider Updates
-
-### Context
-Users can't control which models participate in consensus. `select_proposer()` picks highest `output_cost_per_mtok`, `select_challengers()` picks next-costliest. Problems: no user control (`ConsensusConfig.panel` exists but unused), Google catalog outdated, Perplexity should be challengers-only (search-grounded), Anthropic missing `claude-sonnet-4-6`.
-
-### Changes (6 steps)
-
-1. **Update provider model catalogs**
-   - `src/duh/providers/google.py:34-67` — Gemini 3 GA + early-access models (web search for latest)
-   - `src/duh/providers/anthropic.py:36-61` — Add `claude-sonnet-4-6`
-   - `src/duh/providers/perplexity.py:35-60` — Verify current model IDs/pricing
-
-2. **Add `proposer_eligible` flag to ModelInfo**
-   - `src/duh/providers/base.py:28-45` — Add `proposer_eligible: bool = True`
-   - `src/duh/providers/perplexity.py` — Set `proposer_eligible=False` (challengers only, user decision)
-
-3. **Wire `ConsensusConfig.panel` + update selection functions**
-   - `src/duh/consensus/handlers.py:185-202` (`select_proposer`) — Accept optional `panel`, filter to `proposer_eligible=True`
-   - `src/duh/consensus/handlers.py:322-356` (`select_challengers`) — Accept optional `panel`
-   - `src/duh/cli/app.py:236-246`, `src/duh/api/routes/ws.py:108,128`, `src/duh/api/routes/ask.py` — Pass panel
-
-4. **Add CLI flags**: `--proposer MODEL_REF`, `--challengers MODEL_REF,MODEL_REF`, `--panel MODEL_REF,...`
-   - `src/duh/cli/app.py` (ask command)
-
-5. **Add to REST API**: Optional `panel`, `proposer`, `challengers` fields in ask request body
-   - `src/duh/api/routes/ask.py`
-
-6. **Tests**: Update `test_propose_handler.py`, `test_challenge_handler.py` for panel filtering + proposer_eligible. Test CLI flags. Fix any tests with hardcoded model catalogs.
-
-7. **Documentation + CLI help**
-   - `docs/cli/ask.md` — Document `--proposer`, `--challengers`, `--panel` flags
-   - `docs/api-reference.md` — Document panel/proposer/challengers in `/api/ask`
-   - `docs/concepts/providers-and-models.md` — Update model lists, model selection explanation
-   - `docs/getting-started/configuration.md` — Document `[consensus] panel` config
-   - `docs/reference/config-reference.md` — Add panel, proposer_strategy fields
-   - `src/duh/cli/app.py` — Update Click help strings for new flags
-   - `docs/index.md` — Update feature list if needed
-
-### Current model cost ranking (for reference)
-| Model | output_cost | Provider |
-|-------|------------|----------|
-| Opus 4.6 | $25.00 | anthropic |
-| Sonar Pro | $15.00 | perplexity |
-| Sonnet 4.5 | $15.00 | anthropic |
-| GPT-5.2 | $14.00 | openai |
-| Gemini 3 Pro | $12.00 | google |
-| Gemini 2.5 Pro | $10.00 | google |
-| Mistral Medium | $8.10 | mistral |
-| o3 | $8.00 | openai |
-| Sonar Deep Research | $8.00 | perplexity |
-| Mistral Large | $6.00 | mistral |
-| Haiku 4.5 | $5.00 | anthropic |
+**Last Updated**: 2026-02-18
+**Current Phase**: Epistemic Confidence (Phase A) — on branch `epistemic-confidence-phase-a`
+**Next Action**: Commit, push, create PR to merge to main.
+
+## What Just Shipped: Epistemic Confidence Phase A
+
+### Core Change
+Confidence scoring is now **epistemic** — it reflects inherent uncertainty of the question domain, not just challenge quality.
+
+**Before**: `confidence = _compute_confidence(challenges)` — measured rigor only (0.5–1.0 based on sycophancy ratio).
+**After**: Two separate scores:
+- **Rigor** (renamed from old confidence) — how genuine the challenges were (0.5–1.0)
+- **Confidence** — `min(domain_cap(intent), rigor)` — rigor clamped by question type ceiling
+
+### Domain Caps
+| Intent | Cap | Rationale |
+|--------|-----|-----------|
+| factual | 0.95 | Verifiable answers, near-certain |
+| technical | 0.90 | Strong consensus possible |
+| creative | 0.85 | Subjective, multiple valid answers |
+| judgment | 0.80 | Requires weighing trade-offs |
+| strategic | 0.70 | Inherent future uncertainty |
+| unknown/None | 0.85 | Default conservative cap |
+
+### Files Changed (47 files, +997, -230)
+**New files:**
+- `src/duh/calibration.py` — ECE (Expected Calibration Error) computation
+- `src/duh/memory/migrations.py` — SQLite schema migration (adds rigor column)
+- `tests/unit/test_calibration.py` — 15 calibration tests
+- `tests/unit/test_confidence_scoring.py` — 20 epistemic confidence tests
+- `tests/unit/test_cli_calibration.py` — 4 CLI calibration tests
+- `web/src/components/calibration/CalibrationDashboard.tsx` — Calibration viz
+- `web/src/pages/CalibrationPage.tsx` — Calibration page
+- `web/src/stores/calibration.ts` — Calibration Zustand store
+
+**Modified across full stack:**
+- `consensus/handlers.py` — Renamed `_compute_confidence` → `_compute_rigor`, added `_domain_cap()`, `DOMAIN_CAPS`, epistemic formula
+- `consensus/machine.py` — Added `rigor` to ConsensusContext, RoundResult
+- `consensus/scheduler.py` — Propagates rigor through subtask results
+- `consensus/synthesis.py` — Averages rigor across subtask results
+- `consensus/voting.py` — Added rigor to VoteResult, VotingAggregation
+- `memory/models.py` — Added `rigor` column to Decision ORM
+- `memory/repository.py` — Accepts `rigor` param in `save_decision()`
+- `memory/context.py` — Shows rigor in context builder output
+- `cli/app.py` — All output paths show rigor; new `duh calibration` command; PDF export enhanced
+- `cli/display.py` — `show_commit()` and `show_final_decision()` show rigor
+- `api/routes/crud.py` — `GET /api/calibration` endpoint; rigor in decision space
+- `api/routes/ask.py`, `ws.py`, `threads.py` — Propagate rigor
+- `mcp/server.py` — Propagates rigor
+- Frontend: ConfidenceMeter, ConsensusComplete, ConsensusPanel, ThreadDetail, TurnCard, ExportMenu, Sidebar, DecisionCloud, stores updated
 
 ---
 
 ## Current State
 
-- **v0.5 + Export feature on branch `v0.5.0`.** All v0.5 tasks done + export feature added.
-- **6 providers shipping**: Anthropic (3 models), OpenAI (3 models), Google (4 models), Mistral (4 models), Perplexity (3 models) — 17 total.
-- **1539 Python unit/load tests + 122 Vitest tests** (1661 total), ruff clean.
-- **~60 Python source files + 67 frontend source files** (~127 total).
-- REST API, WebSocket streaming, MCP server, Python client library, web UI all built.
-- Multi-user auth (JWT + RBAC), PostgreSQL support, Prometheus metrics, backup/restore, Playwright E2E.
-- CLI commands: `duh ask`, `duh recall`, `duh threads`, `duh show`, `duh models`, `duh cost`, `duh serve`, `duh mcp`, `duh batch`, `duh export`, `duh feedback`, `duh backup`, `duh restore`, `duh user-create`, `duh user-list`.
-- Export: `duh export <id> --format pdf/markdown --content full/decision --no-dissent -o file`
-- Docs: production-deployment.md, monitoring.md, authentication.md added.
-- MkDocs docs site: https://msitarzewski.github.io/duh/
-- GitHub repo: https://github.com/msitarzewski/duh
+- **Branch `epistemic-confidence-phase-a`** — all changes uncommitted, ready to commit.
+- **1586 Python tests + 126 Vitest tests** (1712 total), ruff clean, mypy strict clean.
+- **~62 Python source files + 70 frontend source files** (~132 total).
+- All previous features intact (v0.1–v0.5 + export).
 
-## v0.5 Delivered
-
-**Theme**: Production hardening, multi-user, enterprise readiness.
-**18 tasks across 7 phases** — all complete.
-
-### What Shipped
-- User accounts + JWT auth + RBAC (admin/contributor/viewer) — `api/auth.py`, `api/rbac.py`, `models.py:User`
-- PostgreSQL support (asyncpg) with connection pooling (`pool_pre_ping`, compound indexes)
-- Perplexity provider adapter (6th provider, search-grounded) — `providers/perplexity.py`
-- Prometheus metrics (`/api/metrics`) + extended health checks (`/api/health/detailed`)
-- Backup/restore CLI (`duh backup`, `duh restore`) with SQLite copy + JSON export/import
-- Playwright E2E browser tests (`web/e2e/`)
-- Per-user + per-provider rate limiting (middleware keys by user_id > api_key > IP)
-- Production deployment documentation (3 new guides)
-- 26 multi-user integration tests + 12 load tests (latency, concurrency, rate limiting)
-- Alembic migration `005_v05_users.py` (users table, user_id FKs on threads/decisions/api_keys)
+## Next Task: Model Selection Controls + Provider Updates
 
-### New Source Files (v0.5)
-- `src/duh/api/auth.py` — JWT authentication endpoints
-- `src/duh/api/rbac.py` — Role-based access control
-- `src/duh/api/metrics.py` — Prometheus metrics endpoint
-- `src/duh/api/health.py` — Extended health checks
-- `src/duh/memory/backup.py` — Backup/restore utilities
-- `src/duh/providers/perplexity.py` — Perplexity provider adapter
-- `alembic/versions/005_v05_users.py` — User migration
-- `docs/guides/production-deployment.md`, `authentication.md`, `monitoring.md`
+Deferred from before Phase A. See `progress.md` for details.
 
 ## Open Questions (Still Unresolved)
 
diff --git a/memory-bank/decisions.md b/memory-bank/decisions.md
index 9d49d9d..b2eaef2 100644
--- a/memory-bank/decisions.md
+++ b/memory-bank/decisions.md
@@ -1,6 +1,6 @@
 # Architectural Decisions
 
-**Last Updated**: 2026-02-17
+**Last Updated**: 2026-02-18
 
 ---
 
@@ -324,3 +324,33 @@
 - Remove `create_all` entirely — breaks in-memory test fixtures that don't run alembic
 **Consequences**: Tests continue to work (in-memory SQLite still uses `create_all`). Production databases must run `alembic upgrade head` after code updates. This was already the expected workflow but is now enforced.
 **References**: `src/duh/cli/app.py:101-104`
+
+---
+
+## 2026-02-18: Epistemic Confidence — Separate Rigor from Confidence
+
+**Status**: Approved
+**Context**: The original `_compute_confidence()` in `handlers.py` measured challenge quality (ratio of genuine vs sycophantic challenges), producing a score in [0.5, 1.0]. This was misleading: a factual question ("What is the capital of France?") and a strategic question ("Will AI replace software engineers by 2035?") could both score 1.0 confidence if all challenges were genuine. But inherently uncertain questions should never report near-certain confidence.
+**Decision**: Split into two metrics:
+- **Rigor** (renamed from old confidence): measures challenge quality, [0.5, 1.0]
+- **Confidence** (epistemic): `min(domain_cap(intent), rigor)` — rigor clamped by a per-domain ceiling based on question intent (factual=0.95, technical=0.90, creative=0.85, judgment=0.80, strategic=0.70, default=0.85).
+**Alternatives**:
+- Single blended score (simpler, but hides the two distinct signals)
+- User-configurable caps (more flexible, but adds UX complexity without clear benefit)
+- LLM-estimated confidence (model judges own uncertainty — unreliable, circular)
+**Consequences**: Confidence scores are now more honest. Strategic questions max out at 70% even with perfect rigor. Rigor is preserved as a separate signal for calibration analysis. Requires `rigor` column added to Decision model. Full-stack change: ORM, handlers, CLI, API, WebSocket, MCP, frontend all updated.
+**References**: `src/duh/consensus/handlers.py:641-670`, `src/duh/calibration.py`
+
+---
+
+## 2026-02-18: Lightweight SQLite Migrations (Not Alembic)
+
+**Status**: Approved
+**Context**: Adding the `rigor` column to the `decisions` table requires a migration for existing file-based SQLite databases. Alembic handles PostgreSQL migrations, but for SQLite (the default local dev DB), running `alembic upgrade head` is a friction point for casual users.
+**Decision**: Created `src/duh/memory/migrations.py` with `ensure_schema()` that runs on startup for file-based SQLite only. Uses `PRAGMA table_info()` to detect missing columns and `ALTER TABLE` to add them. In-memory SQLite uses `create_all` (unchanged). PostgreSQL uses Alembic (unchanged).
+**Alternatives**:
+- Alembic-only (requires users to run migration command)
+- create_all for all databases (can't alter existing tables)
+- Manual migration instructions in docs (user friction)
+**Consequences**: File-based SQLite databases auto-migrate on startup. Zero friction for local users. PostgreSQL still requires `alembic upgrade head`. Lightweight and self-contained.
+**References**: `src/duh/memory/migrations.py`, `src/duh/cli/app.py:107-110`
diff --git a/memory-bank/progress.md b/memory-bank/progress.md
index 7aa3754..5d6b98a 100644
--- a/memory-bank/progress.md
+++ b/memory-bank/progress.md
@@ -1,10 +1,26 @@
 # Progress
 
-**Last Updated**: 2026-02-17
+**Last Updated**: 2026-02-18
 
 ---
 
-## Current State: v0.5 COMPLETE — Production Hardening & Multi-User
+## Current State: Epistemic Confidence Phase A COMPLETE
+
+### Epistemic Confidence Phase A
+
+- **Renamed `_compute_confidence` → `_compute_rigor`** — old "confidence" measured challenge quality, now called "rigor"
+- **Added `rigor` field** to Decision ORM model, ConsensusContext, RoundResult, SubtaskResult, VoteResult, VotingAggregation, SynthesisResult
+- **Domain caps** — confidence capped by question intent: factual (0.95), technical (0.90), creative (0.85), judgment (0.80), strategic (0.70), default (0.85)
+- **Epistemic formula**: `confidence = min(domain_cap(intent), rigor)` — rigor clamped by domain ceiling
+- **Calibration module** — `src/duh/calibration.py` computes ECE (Expected Calibration Error) from decisions with outcomes
+- **`duh calibration` CLI command** — shows calibration analysis with bucket breakdown
+- **`GET /api/calibration` endpoint** — serves calibration data with category/date filters
+- **Calibration frontend** — CalibrationDashboard, CalibrationPage, calibration Zustand store
+- **SQLite migration** — `src/duh/memory/migrations.py` adds rigor column on startup for file-based SQLite
+- **Full-stack propagation** — rigor shown in CLI, API, WebSocket, MCP, frontend across all views
+- **Enhanced PDF export** — research-paper quality: header/footer, TOC, provider callouts, confidence meter, Unicode TTF
+- 1586 Python tests + 126 Vitest tests (1712 total), ruff clean, mypy strict clean
+- New files: calibration.py, migrations.py, test_calibration.py, test_confidence_scoring.py, test_cli_calibration.py, CalibrationDashboard.tsx, CalibrationPage.tsx, calibration.ts
 
 ### v0.5 Additions
 
@@ -151,3 +167,4 @@ Phase 0 benchmark framework — fully functional, pilot-tested on 5 questions.
 | 2026-02-17 | v0.5 T14-T18 (Phase 7: Ship) — multi-user integration tests, load tests, docs, migration finalized, version bump | Done |
 | 2026-02-17 | v0.5.0 — "It Scales" | **Complete** |
 | 2026-02-17 | Export to Markdown & PDF (CLI + API + Web UI) | Done |
+| 2026-02-18 | Epistemic Confidence Phase A (rigor + domain caps + calibration) | Done |
diff --git a/memory-bank/tasks/2026-02/README.md b/memory-bank/tasks/2026-02/README.md
index 151d440..9850c2a 100644
--- a/memory-bank/tasks/2026-02/README.md
+++ b/memory-bank/tasks/2026-02/README.md
@@ -462,3 +462,40 @@
 - Manual override classes: `.theme-dark` / `.theme-light` on any ancestor element
 - Light mode code block overrides in `animations.css`
 - Variables: backgrounds (5), text (3), primary accent, semantic colors (3), borders (3), glass (2), layout (3), typography (1)
+
+---
+
+## Epistemic Confidence Phase A — "Honest Confidence"
+
+### 2026-02-18: Epistemic Confidence Scoring
+- Renamed `_compute_confidence()` → `_compute_rigor()` — old metric now properly named
+- Added `DOMAIN_CAPS` dict and `_domain_cap(intent)` lookup
+- New formula: `confidence = min(domain_cap(intent), rigor)`
+- Domain caps: factual (0.95), technical (0.90), creative (0.85), judgment (0.80), strategic (0.70), default (0.85)
+- `handle_commit()` now always attempts taxonomy classification to get intent for capping
+- Files: `src/duh/consensus/handlers.py`
+
+### 2026-02-18: Rigor Field Propagation (Full Stack)
+- Added `rigor: float` to Decision ORM, ConsensusContext, RoundResult, SubtaskResult, VoteResult, VotingAggregation, SynthesisResult
+- Updated save_decision(), scheduler, synthesis, voting to propagate rigor
+- Updated all CLI outputs (ask, recall, show, export JSON/markdown/PDF)
+- Updated API responses (crud, ask, ws, threads) and MCP server
+- Updated display (show_commit, show_final_decision)
+- Updated context builder to show rigor alongside confidence
+- Frontend: ConfidenceMeter, ConsensusComplete, ConsensusPanel, ThreadDetail, TurnCard, ExportMenu, DecisionCloud, stores
+- Files: 47 files changed, +997 insertions, -230 deletions
+
+### 2026-02-18: SQLite Schema Migration
+- Created `src/duh/memory/migrations.py` — `ensure_schema()` adds rigor column on startup
+- Runs for file-based SQLite only (PRAGMA table_info check → ALTER TABLE)
+- In-memory SQLite: create_all handles it. PostgreSQL: Alembic handles it.
+- Wired into `_create_db()` in `cli/app.py`
+
+### 2026-02-18: Calibration Module + CLI + API + Frontend
+- Created `src/duh/calibration.py` — `compute_calibration()` buckets decisions by confidence, computes ECE
+- `CalibrationBucket` and `CalibrationResult` dataclasses
+- `duh calibration [--category CAT]` CLI command
+- `GET /api/calibration` endpoint with category/since/until filters
+- Frontend: CalibrationDashboard (metric cards + bar chart + bucket table), CalibrationPage, calibration Zustand store
+- Tests: 15 calibration tests, 20 confidence scoring tests, 4 CLI calibration tests
+- **Total: 1586 Python + 126 Vitest = 1712 tests**
diff --git a/memory-bank/toc.md b/memory-bank/toc.md
index 7a9f592..cc4a0b7 100644
--- a/memory-bank/toc.md
+++ b/memory-bank/toc.md
@@ -3,8 +3,8 @@
 ## Core Files
 - [projectbrief.md](./projectbrief.md) — Vision, tenets, architecture, build sequence
 - [techContext.md](./techContext.md) — Tech stack decisions with rationale (Python, Docker, SQLAlchemy, frontend, tools, etc.)
-- [decisions.md](./decisions.md) — Architectural decisions with context, alternatives, and consequences (18 ADRs)
-- [activeContext.md](./activeContext.md) — Current state, v0.5 complete, ready to merge to main
+- [decisions.md](./decisions.md) — Architectural decisions with context, alternatives, and consequences (20 ADRs)
+- [activeContext.md](./activeContext.md) — Current state, epistemic confidence Phase A complete
 - [progress.md](./progress.md) — Milestone tracking, what's built, what's next
 - [competitive-landscape.md](./competitive-landscape.md) — Research on existing tools, frameworks, and academic work
 - [quick-start.md](./quick-start.md) — Session entry point, v0.5 complete, key file references
diff --git a/src/duh/api/routes/ask.py b/src/duh/api/routes/ask.py
index 0be2ba6..3d926bf 100644
--- a/src/duh/api/routes/ask.py
+++ b/src/duh/api/routes/ask.py
@@ -29,6 +29,7 @@ class AskRequest(BaseModel):
 class AskResponse(BaseModel):
     decision: str
     confidence: float
+    rigor: float = 0.0
     dissent: str | None = None
     cost: float
     thread_id: str | None = None
@@ -82,7 +83,7 @@ async def _handle_consensus(  # type: ignore[no-untyped-def]
     """Run the consensus protocol."""
     from duh.cli.app import _run_consensus
 
-    decision, confidence, dissent, cost = await _run_consensus(
+    decision, confidence, rigor, dissent, cost = await _run_consensus(
         body.question,
         config,
         pm,
@@ -95,7 +96,7 @@ async def _handle_consensus(  # type: ignore[no-untyped-def]
     if db_factory is not None:
         try:
             thread_id = await _persist_result(
-                db_factory, body.question, decision, confidence, dissent
+                db_factory, body.question, decision, confidence, dissent, rigor=rigor
             )
         except Exception:
             logger.exception("Failed to persist consensus thread")
@@ -103,6 +104,7 @@ async def _handle_consensus(  # type: ignore[no-untyped-def]
     return AskResponse(
         decision=decision,
         confidence=confidence,
+        rigor=rigor,
         dissent=dissent,
         cost=cost,
         thread_id=thread_id,
@@ -118,6 +120,7 @@ async def _handle_voting(body: AskRequest, config, pm) -> AskResponse:  # type:
     return AskResponse(
         decision=result.decision,
         confidence=result.confidence,
+        rigor=result.rigor,
         cost=pm.total_cost,
         protocol_used="voting",
     )
@@ -150,12 +153,13 @@ async def _handle_decompose(body: AskRequest, config, pm) -> AskResponse:  # typ
     if len(subtask_specs) == 1:
         from duh.cli.app import _run_consensus
 
-        decision, confidence, dissent, cost = await _run_consensus(
+        decision, confidence, rigor, dissent, cost = await _run_consensus(
             body.question, config, pm
         )
         return AskResponse(
             decision=decision,
             confidence=confidence,
+            rigor=rigor,
             dissent=dissent,
             cost=cost,
             protocol_used="decompose",
@@ -168,6 +172,7 @@ async def _handle_decompose(body: AskRequest, config, pm) -> AskResponse:  # typ
     return AskResponse(
         decision=synthesis_result.content,
         confidence=synthesis_result.confidence,
+        rigor=synthesis_result.rigor,
         cost=pm.total_cost,
         protocol_used="decompose",
     )
@@ -179,6 +184,8 @@ async def _persist_result(
     decision: str,
     confidence: float,
     dissent: str | None,
+    *,
+    rigor: float = 0.0,
 ) -> str:
     """Persist a consensus result to the database.
 
@@ -192,7 +199,7 @@ async def _persist_result(
         thread.status = "complete"
         turn = await repo.create_turn(thread.id, 1, "COMMIT")
         await repo.save_decision(
-            turn.id, thread.id, decision, confidence, dissent=dissent
+            turn.id, thread.id, decision, confidence, rigor=rigor, dissent=dissent
         )
         await session.commit()
         return str(thread.id)
diff --git a/src/duh/api/routes/crud.py b/src/duh/api/routes/crud.py
index 8bb8c14..f2254a0 100644
--- a/src/duh/api/routes/crud.py
+++ b/src/duh/api/routes/crud.py
@@ -196,6 +196,72 @@ async def cost(request: Request) -> CostResponse:
     )
 
 
+# -- GET /api/calibration ---------------------------------------------------
+
+
+class CalibrationBucketResponse(BaseModel):
+    range_lo: float
+    range_hi: float
+    count: int
+    with_outcomes: int
+    success: int
+    failure: int
+    partial: int
+    accuracy: float
+    mean_confidence: float
+
+
+class CalibrationResponse(BaseModel):
+    buckets: list[CalibrationBucketResponse]
+    total_decisions: int
+    total_with_outcomes: int
+    overall_accuracy: float
+    ece: float
+
+
+@router.get("/calibration", response_model=CalibrationResponse)
+async def calibration(
+    request: Request,
+    category: str | None = None,
+    since: str | None = None,
+    until: str | None = None,
+) -> CalibrationResponse:
+    """Confidence calibration analysis."""
+    from duh.calibration import compute_calibration
+    from duh.memory.repository import MemoryRepository
+
+    db_factory = request.app.state.db_factory
+    async with db_factory() as session:
+        repo = MemoryRepository(session)
+        decisions = await repo.get_all_decisions_for_space(
+            category=category,
+            since=since,
+            until=until,
+        )
+
+    result = compute_calibration(decisions)
+    return CalibrationResponse(
+        buckets=[
+            CalibrationBucketResponse(
+                range_lo=b.range_lo,
+                range_hi=b.range_hi,
+                count=b.count,
+                with_outcomes=b.with_outcomes,
+                success=b.success,
+                failure=b.failure,
+                partial=b.partial,
+                accuracy=b.accuracy,
+                mean_confidence=b.mean_confidence,
+            )
+            for b in result.buckets
+        ],
+        total_decisions=result.total_decisions,
+        total_with_outcomes=result.total_with_outcomes,
+        overall_accuracy=result.overall_accuracy,
+        ece=result.ece,
+    )
+
+
 # -- GET /api/decisions/space -----------------------------------------------
 
 
@@ -204,6 +270,7 @@ class SpaceDecisionResponse(BaseModel):
     thread_id: str
     question: str
     confidence: float
+    rigor: float = 0.0
     intent: str | None = None
     category: str | None = None
     genus: str | None = None
@@ -270,6 +337,7 @@ async def decision_space(
                     thread_id=d.thread_id,
                     question=question,
                     confidence=d.confidence,
+                    rigor=d.rigor,
                     intent=d.intent,
                     category=d.category,
                     genus=d.genus,
diff --git a/src/duh/api/routes/threads.py b/src/duh/api/routes/threads.py
index 9b4f0a9..7222b4d 100644
--- a/src/duh/api/routes/threads.py
+++ b/src/duh/api/routes/threads.py
@@ -23,6 +23,7 @@ class ContributionResponse(BaseModel):
 class DecisionResponse(BaseModel):
     content: str
     confidence: float
+    rigor: float = 0.0
     dissent: str | None = None
 
 
@@ -125,6 +126,7 @@ async def get_thread(thread_id: str, request: Request) -> ThreadDetailResponse:
             dec = DecisionResponse(
                 content=turn.decision.content,
                 confidence=turn.decision.confidence,
+                rigor=turn.decision.rigor,
                 dissent=turn.decision.dissent,
             )
         turns.append(
@@ -179,6 +181,7 @@ async def get_shared_thread(share_token: str, request: Request) -> ThreadDetailR
             dec = DecisionResponse(
                 content=turn.decision.content,
                 confidence=turn.decision.confidence,
+                rigor=turn.decision.rigor,
                 dissent=turn.decision.dissent,
             )
         turns.append(
diff --git a/src/duh/api/routes/ws.py b/src/duh/api/routes/ws.py
index e9d1bda..36ff8ff 100644
--- a/src/duh/api/routes/ws.py
+++ b/src/duh/api/routes/ws.py
@@ -186,11 +186,12 @@ async def _stream_consensus(
 
         # COMMIT
         sm.transition(ConsensusState.COMMIT)
-        await handle_commit(ctx)
+        await handle_commit(ctx, pm)
         await ws.send_json(
             {
                 "type": "commit",
                 "confidence": ctx.confidence,
+                "rigor": ctx.rigor,
                 "dissent": ctx.dissent,
                 "round": ctx.current_round,
             }
@@ -217,6 +218,7 @@ async def _stream_consensus(
             "type": "complete",
             "decision": ctx.decision or "",
             "confidence": ctx.confidence,
+            "rigor": ctx.rigor,
             "dissent": ctx.dissent,
             "cost": pm.total_cost,
             "thread_id": thread_id,
@@ -258,6 +260,7 @@ async def _persist_consensus(
                 thread.id,
                 rr.decision,
                 rr.confidence,
+                rigor=rr.rigor,
                 dissent=rr.dissent,
             )
 
diff --git a/src/duh/calibration.py b/src/duh/calibration.py
new file mode 100644
index 0000000..542d6da
--- /dev/null
+++ b/src/duh/calibration.py
@@ -0,0 +1,150 @@
+"""Confidence calibration analysis.
+
+Computes calibration metrics for decisions with tracked outcomes.
+Buckets decisions by confidence range and compares predicted
+confidence against actual accuracy (ECE metric).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from duh.memory.models import Decision
+
+
+@dataclass(frozen=True)
+class CalibrationBucket:
+    """One confidence range bucket with accuracy stats."""
+
+    range_lo: float
+    range_hi: float
+    count: int
+    with_outcomes: int
+    success: int
+    failure: int
+    partial: int
+    accuracy: float
+    mean_confidence: float
+
+
+@dataclass(frozen=True)
+class CalibrationResult:
+    """Full calibration analysis result."""
+
+    buckets: list[CalibrationBucket] = field(default_factory=list)
+    total_decisions: int = 0
+    total_with_outcomes: int = 0
+    overall_accuracy: float = 0.0
+    ece: float = 0.0
+
+
+def compute_calibration(
+    decisions: Sequence[Decision],
+    *,
+    n_buckets: int = 10,
+) -> CalibrationResult:
+    """Compute calibration metrics from decisions with outcomes.
+
+    Buckets decisions by confidence into ``n_buckets`` equal-width bins.
+    For each bucket, accuracy = (success + 0.5 * partial) / with_outcomes.
+    ECE is the weighted average of |accuracy - mean_confidence| across
+    non-empty buckets.
+
+    Args:
+        decisions: Sequence of Decision model instances (with .outcome loaded).
+        n_buckets: Number of equal-width confidence bins (default 10).
+
+    Returns:
+        CalibrationResult with per-bucket stats and overall ECE.
+    """
+    if n_buckets < 1:
+        n_buckets = 1
+
+    # Initialize per-bucket accumulators
+    bucket_counts = [0] * n_buckets
+    bucket_with_outcomes = [0] * n_buckets
+    bucket_success = [0] * n_buckets
+    bucket_failure = [0] * n_buckets
+    bucket_partial = [0] * n_buckets
+    bucket_conf_sum = [0.0] * n_buckets
+
+    total = len(decisions)
+
+    for d in decisions:
+        # Determine bucket index from confidence
+        idx = int(d.confidence * n_buckets)
+        if idx >= n_buckets:
+            idx = n_buckets - 1
+        if idx < 0:
+            idx = 0
+
+        bucket_counts[idx] += 1
+        bucket_conf_sum[idx] += d.confidence
+
+        if d.outcome is not None:
+            result = d.outcome.result
+            bucket_with_outcomes[idx] += 1
+            if result == "success":
+                bucket_success[idx] += 1
+            elif result == "failure":
+                bucket_failure[idx] += 1
+            elif result == "partial":
+                bucket_partial[idx] += 1
+
+    # Build bucket objects
+    width = 1.0 / n_buckets
+    buckets: list[CalibrationBucket] = []
+    total_with_outcomes = 0
+    total_accuracy_sum = 0.0
+    ece_sum = 0.0
+    ece_weight_sum = 0
+
+    for i in range(n_buckets):
+        lo = round(i * width, 10)
+        hi = round((i + 1) * width, 10)
+        count = bucket_counts[i]
+        with_out = bucket_with_outcomes[i]
+        s = bucket_success[i]
+        f = bucket_failure[i]
+        p = bucket_partial[i]
+
+        mean_conf = bucket_conf_sum[i] / count if count > 0 else (lo + hi) / 2
+        accuracy = (s + 0.5 * p) / with_out if with_out > 0 else 0.0
+
+        buckets.append(
+            CalibrationBucket(
+                range_lo=lo,
+                range_hi=hi,
+                count=count,
+                with_outcomes=with_out,
+                success=s,
+                failure=f,
+                partial=p,
+                accuracy=accuracy,
+                mean_confidence=mean_conf,
+            )
+        )
+
+        total_with_outcomes += with_out
+        total_accuracy_sum += s + 0.5 * p
+
+        if with_out > 0:
+            ece_sum += with_out * abs(accuracy - mean_conf)
+            ece_weight_sum += with_out
+
+    overall_accuracy = (
+        total_accuracy_sum / total_with_outcomes if total_with_outcomes > 0 else 0.0
+    )
+    ece = ece_sum / ece_weight_sum if ece_weight_sum > 0 else 0.0
+
+    return CalibrationResult(
+        buckets=buckets,
+        total_decisions=total,
+        total_with_outcomes=total_with_outcomes,
+        overall_accuracy=overall_accuracy,
+        ece=ece,
+    )
diff --git a/src/duh/cli/app.py b/src/duh/cli/app.py
index 3a85774..56ddc91 100644
--- a/src/duh/cli/app.py
+++ b/src/duh/cli/app.py
@@ -10,6 +10,7 @@
 import json as json_mod
 import sys
 import time
+from datetime import UTC
 from pathlib import Path
 from typing import TYPE_CHECKING
 
@@ -104,6 +105,10 @@ def _enable_fks(dbapi_conn, connection_record):  # type: ignore[no-untyped-def]
     if is_memory:
         async with engine.begin() as conn:
             await conn.run_sync(Base.metadata.create_all)
+    elif url.startswith("sqlite"):
+        from duh.memory.migrations import ensure_schema
+
+        await ensure_schema(engine)
 
     factory = async_sessionmaker(engine, expire_on_commit=False)
     return factory, engine
@@ -204,10 +209,10 @@ async def _run_consensus(
     panel: list[str] | None = None,
     proposer_override: str | None = None,
     challengers_override: list[str] | None = None,
-) -> tuple[str, float, str | None, float]:
+) -> tuple[str, float, float, str | None, float]:
     """Run the full consensus loop.
 
-    Returns (decision, confidence, dissent, total_cost).
+    Returns (decision, confidence, rigor, dissent, total_cost).
     """
     from duh.consensus.convergence import check_convergence
     from duh.consensus.handlers import (
@@ -275,9 +280,9 @@ async def _run_consensus(
 
         # COMMIT
         sm.transition(ConsensusState.COMMIT)
-        await handle_commit(ctx)
+        await handle_commit(ctx, pm)
         if display:
-            display.show_commit(ctx.confidence, ctx.dissent)
+            display.show_commit(ctx.confidence, ctx.rigor, ctx.dissent)
             display.round_footer(
                 ctx.current_round,
                 config.general.max_rounds,
@@ -303,6 +308,7 @@ async def _run_consensus(
     return (
         ctx.decision or "",
         ctx.confidence,
+        ctx.rigor,
         ctx.dissent,
         pm.total_cost,
     )
@@ -442,12 +448,12 @@ def ask(
         _error(str(e))
         return  # unreachable
 
-    decision, confidence, dissent, cost = result
+    decision, confidence, rigor, dissent, cost = result
 
     from duh.cli.display import ConsensusDisplay
 
     display = ConsensusDisplay()
-    display.show_final_decision(decision, confidence, cost, dissent)
+    display.show_final_decision(decision, confidence, rigor, cost, dissent)
 
 
 async def _ask_async(
@@ -457,7 +463,7 @@ async def _ask_async(
     panel: list[str] | None = None,
     proposer_override: str | None = None,
     challengers_override: list[str] | None = None,
-) -> tuple[str, float, str | None, float]:
+) -> tuple[str, float, float, str | None, float]:
     """Async implementation for the ask command."""
     from duh.cli.display import ConsensusDisplay
 
@@ -528,6 +534,7 @@ async def _ask_voting_async(
                 thread.id,
                 result.decision,
                 result.confidence,
+                rigor=result.rigor,
             )
         await session.commit()
     await engine.dispose()
@@ -563,10 +570,10 @@ async def _ask_auto_async(
 
         display = ConsensusDisplay()
         display.start()
-        decision, confidence, dissent, cost = await _run_consensus(
+        decision, confidence, rigor, dissent, cost = await _run_consensus(
             question, config, pm, display=display
         )
-        display.show_final_decision(decision, confidence, cost, dissent)
+        display.show_final_decision(decision, confidence, rigor, cost, dissent)
 
 
 async def _ask_decompose_async(
@@ -639,8 +646,8 @@ async def _ask_decompose_async(
     # Single-subtask optimization: skip synthesis
     if len(subtask_specs) == 1:
         result = await _run_consensus(question, config, pm, display=display)
-        decision, confidence, dissent, cost = result
-        display.show_final_decision(decision, confidence, cost, dissent)
+        decision, confidence, rigor, dissent, cost = result
+        display.show_final_decision(decision, confidence, rigor, cost, dissent)
         await engine.dispose()
         return
 
@@ -660,6 +667,7 @@ async def _ask_decompose_async(
     display.show_final_decision(
         synthesis_result.content,
         synthesis_result.confidence,
+        synthesis_result.rigor,
         pm.total_cost,
         None,
     )
@@ -711,7 +719,9 @@ async def _recall_async(config: DuhConfig, query: str, limit: int) -> None:
             latest = thread.decisions[-1]
             snippet = latest.content[:120].replace("\n", " ")
             click.echo(f"    Decision: {snippet}...")
-            click.echo(f"    Confidence: {latest.confidence:.0%}")
+            click.echo(
+                f"    Confidence: {latest.confidence:.0%}  Rigor: {latest.rigor:.0%}"
+            )
         click.echo()
 
 
@@ -830,7 +840,10 @@ async def _show_async(config: DuhConfig, thread_id: str) -> None:
             click.echo(f"  {contrib.content}")
             click.echo()
         if turn.decision:
-            click.echo(f"  Decision (confidence {turn.decision.confidence:.0%}):")
+            click.echo(
+                f"  Decision (confidence {turn.decision.confidence:.0%},"
+                f" rigor {turn.decision.rigor:.0%}):"
+            )
             click.echo(f"  {turn.decision.content}")
             if turn.decision.dissent:
                 click.echo(f"  Dissent: {turn.decision.dissent}")
@@ -1086,6 +1099,7 @@ def _format_thread_json(
             decision_data = {
                 "content": turn.decision.content,
                 "confidence": turn.decision.confidence,
+                "rigor": turn.decision.rigor,
                 "dissent": turn.decision.dissent,
             }
 
@@ -1153,7 +1167,8 @@ def _format_thread_markdown(
         lines.append(final_decision.content)
         lines.append("")
         conf_pct = f"{final_decision.confidence:.0%}"
-        lines.append(f"Confidence: {conf_pct}")
+        rigor_pct = f"{final_decision.rigor:.0%}"
+        lines.append(f"Confidence: {conf_pct}  Rigor: {rigor_pct}")
         lines.append("")
 
         if include_dissent and final_decision.dissent:
@@ -1220,19 +1235,30 @@ def _format_thread_pdf(
     content: str = "full",
     include_dissent: bool = True,
 ) -> bytes:
-    """Format a thread as PDF for export.
+    """Format a thread as a research-paper quality PDF.
 
-    Args:
-        content: "full" for complete report, "decision" for decision only.
-        include_dissent: Whether to include the dissent section.
+    Features: repeating header/footer, TOC with bookmarks, provider-colored
+    callout boxes, confidence meter, and full Unicode via TTF fonts (with
+    graceful fallback to core Helvetica).
     """
     import html as html_mod
     import re
+    from datetime import datetime
 
     from fpdf import FPDF  # type: ignore[import-untyped]
 
     total_cost = sum(c.cost_usd for turn in thread.turns for c in turn.contributions)
+    total_input = sum(
+        c.input_tokens for turn in thread.turns for c in turn.contributions
+    )
+    total_output = sum(
+        c.output_tokens for turn in thread.turns for c in turn.contributions
+    )
     created = thread.created_at.strftime("%Y-%m-%d")
+    exported = datetime.now(tz=UTC).strftime("%Y-%m-%d")
+    model_refs = sorted(
+        {c.model_ref for turn in thread.turns for c in turn.contributions}
+    )
 
     final_decision = None
     for turn in reversed(thread.turns):
@@ -1240,23 +1266,93 @@ def _format_thread_pdf(
             final_decision = turn.decision
             break
 
-    def _pdf_safe(text: str) -> str:
-        """Replace Unicode chars unsupported by core PDF fonts."""
-        for char, repl in (
-            ("\u2014", "--"),
-            ("\u2013", "-"),
-            ("\u2018", "'"),
-            ("\u2019", "'"),
-            ("\u201c", '"'),
-            ("\u201d", '"'),
-            ("\u2026", "..."),
-            ("\u2022", "*"),
-            ("\u00a0", " "),
-            ("\u2192", "->"),
-            ("\u2190", "<-"),
-        ):
-            text = text.replace(char, repl)
-        return text.encode("latin-1", errors="replace").decode("latin-1")
+    # ── Provider color map ──────────────────────────────────────
+    provider_colors: dict[str, tuple[int, int, int]] = {
+        "anthropic": (204, 107, 43),
+        "openai": (16, 163, 127),
+        "google": (66, 133, 244),
+        "mistral": (131, 56, 236),
+        "perplexity": (0, 160, 160),
+    }
+    default_color = (120, 120, 120)
+
+    def _provider_color(model_ref: str) -> tuple[int, int, int]:
+        provider = model_ref.split(":")[0].lower() if ":" in model_ref else ""
+        return provider_colors.get(provider, default_color)
+
+    # ── PDF subclass with header/footer ─────────────────────────
+
+    class ConsensusReport(FPDF):  # type: ignore[misc]
+        """FPDF subclass with repeating header and footer."""
+
+        def __init__(self) -> None:
+            super().__init__()
+            self._use_ttf = False
+            self._font_family = "Helvetica"
+            self._mono_family = "Courier"
+
+        def _setup_fonts(self) -> None:
+            """Try to load a TTF font for Unicode support."""
+            import os
+
+            search_paths = [
+                "/System/Library/Fonts/Helvetica.ttc",
+                "/System/Library/Fonts/HelveticaNeue.ttc",
+                "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+                "/usr/share/fonts/TTF/DejaVuSans.ttf",
+            ]
+            for path in search_paths:
+                if os.path.isfile(path):
+                    try:
+                        self.add_font("DuhSans", "", path)
+                        self.add_font("DuhSans", "B", path)
+                        self.add_font("DuhSans", "I", path)
+                        self._use_ttf = True
+                        self._font_family = "DuhSans"
+                        break
+                    except Exception:
+                        continue
+
+        def header(self) -> None:
+            self.set_font(self._font_family, "", 8)
+            self.set_text_color(160, 160, 160)
+            self.cell(0, 5, "duh consensus report", align="L")
+            self.cell(0, 5, exported, align="R", new_x="LMARGIN", new_y="NEXT")
+            self.set_draw_color(220, 220, 220)
+            self.line(10, self.get_y(), 200, self.get_y())
+            self.ln(4)
+
+        def footer(self) -> None:
+            self.set_y(-15)
+            self.set_font(self._font_family, "", 8)
+            self.set_text_color(160, 160, 160)
+            self.set_draw_color(220, 220, 220)
+            self.line(10, self.get_y(), 200, self.get_y())
+            self.ln(2)
+            self.cell(0, 5, f"Page {self.page_no()}/{{nb}}", align="C")
+            self.cell(0, 5, f"duh v{__version__}", align="R")
+
+        def _safe(self, text: str) -> str:
+            """Make text safe for the current font encoding."""
+            if self._use_ttf:
+                return text
+            for char, repl in (
+                ("\u2014", "--"),
+                ("\u2013", "-"),
+                ("\u2018", "'"),
+                ("\u2019", "'"),
+                ("\u201c", '"'),
+                ("\u201d", '"'),
+                ("\u2026", "..."),
+                ("\u2022", "*"),
+                ("\u00a0", " "),
+                ("\u2192", "->"),
+                ("\u2190", "<-"),
+            ):
+                text = text.replace(char, repl)
+            return text.encode("latin-1", errors="replace").decode("latin-1")
+
+    # ── Markdown rendering helpers ──────────────────────────────
 
     def _inline_fmt(text: str) -> str:
         """Convert inline markdown (bold, italic, code) to HTML."""
@@ -1265,7 +1361,8 @@ def _inline_fmt(text: str) -> str:
         for part in parts:
             if part.startswith("`") and part.endswith("`"):
                 result.append(
-                    f"<font face='Courier'>{html_mod.escape(part[1:-1])}</font>"
+                    f"<font face='{pdf._mono_family}'>"
+                    f"{html_mod.escape(part[1:-1])}</font>"
                 )
             else:
                 escaped = html_mod.escape(part)
@@ -1309,7 +1406,6 @@ def _md_to_html(md: str) -> str:
                     in_list = False
                 continue
 
-            # Headers -> bold paragraph
             m = re.match(r"^#{1,6}\s+(.+)$", stripped)
             if m:
                 if in_list:
@@ -1318,7 +1414,6 @@ def _md_to_html(md: str) -> str:
                 parts.append(f"<p><b>{_inline_fmt(m.group(1))}</b></p>")
                 continue
 
-            # Unordered list
             m = re.match(r"^[-*]\s+(.+)$", stripped)
             if m:
                 if not in_list or list_tag != "ul":
@@ -1330,7 +1425,6 @@ def _md_to_html(md: str) -> str:
                 parts.append(f"<li>{_inline_fmt(m.group(1))}</li>")
                 continue
 
-            # Ordered list
             m = re.match(r"^\d+[.)]\s+(.+)$", stripped)
             if m:
                 if not in_list or list_tag != "ol":
@@ -1342,7 +1436,6 @@ def _md_to_html(md: str) -> str:
                 parts.append(f"<li>{_inline_fmt(m.group(1))}</li>")
                 continue
 
-            # Regular text
             if in_list:
                 parts.append(f"</{list_tag}>")
                 in_list = False
@@ -1357,122 +1450,277 @@ def _md_to_html(md: str) -> str:
 
     def _write_md(md_text: str) -> None:
         """Render markdown content as formatted PDF."""
-        pdf.write_html(_pdf_safe(_md_to_html(md_text)))
+        pdf.write_html(pdf._safe(_md_to_html(md_text)))
+
+    # ── Callout box helper ──────────────────────────────────────
+
+    def _draw_accent_bar(
+        start_y: float, end_y: float, color: tuple[int, int, int]
+    ) -> None:
+        """Draw a thick colored accent bar on the left margin."""
+        saved_draw = (pdf.draw_color.r, pdf.draw_color.g, pdf.draw_color.b)
+        saved_width = pdf.line_width
+        pdf.set_draw_color(*color)
+        pdf.set_line_width(2.5)
+        x = pdf.l_margin - 1
+        # Clamp to page content area
+        top = max(start_y, pdf.t_margin)
+        bot = min(end_y, pdf.h - pdf.b_margin)
+        if bot > top:
+            pdf.line(x, top, x, bot)
+        pdf.set_draw_color(*saved_draw)
+        pdf.set_line_width(saved_width)
+
+    def _callout_box(
+        model_ref: str,
+        role: str,
+        body: str,
+        *,
+        accent: tuple[int, int, int] | None = None,
+    ) -> None:
+        """Draw a colored callout box with provider accent line."""
+        color = accent or _provider_color(model_ref)
+        start_y = pdf.get_y()
+
+        # Indent content to leave room for accent bar
+        saved_margin = pdf.l_margin
+        pdf.set_left_margin(saved_margin + 6)
+        pdf.set_x(pdf.l_margin)
+
+        # Header: model + role
+        pdf.set_font(pdf._font_family, "B", 9)
+        pdf.set_text_color(*color)
+        pdf.cell(0, 5, pdf._safe(f"{model_ref}  |  {role.upper()}"))
+        pdf.ln(5)
+
+        # Body
+        pdf.set_text_color(40, 40, 40)
+        pdf.set_font(pdf._font_family, "", 10)
+        _write_md(body)
+        pdf.ln(2)
+
+        end_y = pdf.get_y()
+
+        # Draw accent bar on left edge (doesn't overlap text)
+        _draw_accent_bar(start_y, end_y, color)
+
+        # Restore margin
+        pdf.set_left_margin(saved_margin)
+        pdf.ln(4)
 
-    pdf = FPDF()
+    # ── Build the PDF ───────────────────────────────────────────
+
+    pdf = ConsensusReport()
+    pdf._setup_fonts()
+    pdf.alias_nb_pages()
+    pdf.set_auto_page_break(auto=True, margin=20)
+    pdf.set_text_color(40, 40, 40)
+
+    # -- Title page / header area --
     pdf.add_page()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    font = "Helvetica"
 
-    # Title
-    pdf.set_font(font, "B", 16)
-    pdf.multi_cell(0, 10, _pdf_safe(f"Consensus: {thread.question}"))
-    pdf.ln(5)
+    pdf.set_font(pdf._font_family, "B", 20)
+    pdf.multi_cell(0, 10, pdf._safe(thread.question))
+    pdf.ln(3)
+
+    # Metadata line
+    pdf.set_font(pdf._font_family, "", 9)
+    pdf.set_text_color(130, 130, 130)
+    meta_parts = [
+        f"Thread {thread.id[:8]}",
+        f"Created {created}",
+        f"{len(model_refs)} model{'s' if len(model_refs) != 1 else ''}",
+    ]
+    if total_cost > 0:
+        meta_parts.append(f"Cost ${total_cost:.4f}")
+    pdf.cell(0, 5, pdf._safe("  |  ".join(meta_parts)))
+    pdf.ln(6)
+
+    # Horizontal rule
+    pdf.set_draw_color(200, 200, 200)
+    pdf.set_line_width(0.5)
+    pdf.line(10, pdf.get_y(), 200, pdf.get_y())
+    pdf.ln(6)
+    pdf.set_text_color(40, 40, 40)
+    pdf.set_line_width(0.2)
+
+    # -- TOC placeholder --
+    if content == "full":
+        pdf.insert_toc_placeholder(
+            render_toc,
+            pages=1,
+        )
 
-    # Decision
+    # -- Decision section --
     if final_decision:
-        pdf.set_font(font, "B", 13)
+        pdf.start_section("Decision")
+        pdf.set_font(pdf._font_family, "B", 15)
         pdf.cell(0, 8, "Decision")
-        pdf.ln()
-        pdf.ln(2)
-        pdf.set_font(font, "", 11)
-        _write_md(final_decision.content)
-        pdf.ln(3)
-        conf_pct = f"{final_decision.confidence:.0%}"
-        pdf.set_font(font, "I", 10)
-        pdf.cell(0, 6, f"Confidence: {conf_pct}")
         pdf.ln(8)
 
+        decision_start_y = pdf.get_y()
+
+        # Indent for accent bar
+        pdf.set_left_margin(16)
+        pdf.set_x(16)
+
+        # Decision content
+        pdf.set_font(pdf._font_family, "", 11)
+        pdf.set_text_color(40, 40, 40)
+        _write_md(final_decision.content)
+        pdf.ln(4)
+
+        # Confidence meter
+        conf_pct = final_decision.confidence
+        pdf.set_font(pdf._font_family, "B", 10)
+        pdf.cell(30, 6, pdf._safe(f"Confidence: {conf_pct:.0%}"))
+        bar_x = pdf.get_x() + 2
+        bar_y = pdf.get_y() + 1
+        bar_w = 60
+        bar_h = 4
+        pdf.set_fill_color(230, 230, 230)
+        pdf.rect(bar_x, bar_y, bar_w, bar_h, style="F")
+        g = int(100 + 155 * conf_pct)
+        pdf.set_fill_color(40, min(g, 200), 80)
+        pdf.rect(bar_x, bar_y, bar_w * conf_pct, bar_h, style="F")
+        pdf.ln(10)
+
+        # Rigor meter
+        rigor_pct = final_decision.rigor
+        pdf.set_font(pdf._font_family, "B", 10)
+        pdf.cell(30, 6, pdf._safe(f"Rigor: {rigor_pct:.0%}"))
+        bar_x = pdf.get_x() + 2
+        bar_y = pdf.get_y() + 1
+        pdf.set_fill_color(230, 230, 230)
+        pdf.rect(bar_x, bar_y, bar_w, bar_h, style="F")
+        g = int(100 + 155 * rigor_pct)
+        pdf.set_fill_color(40, min(g, 200), 80)
+        pdf.rect(bar_x, bar_y, bar_w * rigor_pct, bar_h, style="F")
+        pdf.ln(10)
+
+        # Draw green accent bar
+        _draw_accent_bar(decision_start_y, pdf.get_y(), (40, 160, 80))
+        pdf.set_left_margin(10)
+
+        # Dissent
         if include_dissent and final_decision.dissent:
-            pdf.set_font(font, "B", 13)
+            pdf.start_section("Dissent", level=1)
+            pdf.set_font(pdf._font_family, "B", 13)
+            pdf.set_text_color(40, 40, 40)
             pdf.cell(0, 8, "Dissent")
-            pdf.ln()
-            pdf.ln(2)
-            pdf.set_font(font, "", 11)
+            pdf.ln(6)
+
+            dissent_start_y = pdf.get_y()
+            pdf.set_left_margin(16)
+            pdf.set_x(16)
+
+            pdf.set_font(pdf._font_family, "I", 10)
+            pdf.set_text_color(100, 100, 100)
             _write_md(final_decision.dissent)
-            pdf.ln(5)
+            pdf.ln(4)
+
+            # Amber accent bar
+            _draw_accent_bar(dissent_start_y, pdf.get_y(), (200, 140, 80))
+            pdf.set_left_margin(10)
+            pdf.set_text_color(40, 40, 40)
 
+    # -- Consensus process --
     if content == "full":
-        # Separator
-        pdf.set_draw_color(180, 180, 180)
+        pdf.set_draw_color(200, 200, 200)
         pdf.line(10, pdf.get_y(), 200, pdf.get_y())
-        pdf.ln(5)
+        pdf.ln(6)
 
-        pdf.set_font(font, "B", 13)
+        pdf.start_section("Consensus Process")
+        pdf.set_font(pdf._font_family, "B", 15)
         pdf.cell(0, 8, "Consensus Process")
-        pdf.ln()
-        pdf.ln(3)
+        pdf.ln(8)
 
         for turn in thread.turns:
-            pdf.set_font(font, "B", 12)
-            pdf.cell(0, 7, f"Round {turn.round_number}")
-            pdf.ln()
-            pdf.ln(2)
+            section_title = f"Round {turn.round_number}"
+            pdf.start_section(section_title, level=1)
+            pdf.set_font(pdf._font_family, "B", 13)
+            pdf.set_text_color(80, 80, 80)
+            pdf.cell(0, 7, section_title)
+            pdf.ln(7)
+            pdf.set_text_color(40, 40, 40)
+
+            for c in turn.contributions:
+                _callout_box(c.model_ref, c.role, c.content)
+
+        # Votes
+        if votes:
+            pdf.start_section("Votes", level=1)
+            pdf.set_font(pdf._font_family, "B", 13)
+            pdf.cell(0, 8, "Votes")
+            pdf.ln(6)
 
-            proposers = [c for c in turn.contributions if c.role == "proposer"]
-            challengers = [c for c in turn.contributions if c.role == "challenger"]
-            revisers = [c for c in turn.contributions if c.role == "reviser"]
-            others = [
-                c
-                for c in turn.contributions
-                if c.role not in ("proposer", "challenger", "reviser")
-            ]
+            for v in votes:
+                color = _provider_color(v.model_ref)
+                pdf.set_font(pdf._font_family, "B", 10)
+                pdf.set_text_color(*color)
+                pdf.cell(55, 6, pdf._safe(v.model_ref))
+                pdf.set_font(pdf._font_family, "", 10)
+                pdf.set_text_color(60, 60, 60)
+                pdf.cell(0, 6, pdf._safe(v.content))
+                pdf.ln(6)
+
+            pdf.ln(4)
+            pdf.set_text_color(40, 40, 40)
+
+    # -- Appendix: metadata footer ───────────────────────────────
+    pdf.ln(4)
+    pdf.set_draw_color(200, 200, 200)
+    pdf.line(10, pdf.get_y(), 200, pdf.get_y())
+    pdf.ln(4)
+
+    pdf.set_font(pdf._font_family, "", 8)
+    pdf.set_text_color(140, 140, 140)
+    footer_parts = [
+        f"Cost: ${total_cost:.4f}",
+        f"Tokens: {total_input:,} in / {total_output:,} out",
+        f"Models: {', '.join(model_refs)}",
+    ]
+    pdf.cell(0, 4, pdf._safe("  |  ".join(footer_parts)))
+    pdf.set_text_color(40, 40, 40)
 
-            for p in proposers:
-                pdf.set_font(font, "B", 11)
-                pdf.cell(0, 6, f"Proposal ({p.model_ref})")
-                pdf.ln()
-                pdf.set_font(font, "", 10)
-                _write_md(p.content)
-                pdf.ln(3)
+    return bytes(pdf.output())
 
-            if challengers:
-                pdf.set_font(font, "B", 11)
-                pdf.cell(0, 6, "Challenges")
-                pdf.ln()
-                for ch in challengers:
-                    pdf.set_font(font, "B", 10)
-                    pdf.cell(0, 5, f"{ch.model_ref}:")
-                    pdf.ln()
-                    pdf.set_font(font, "", 10)
-                    _write_md(ch.content)
-                    pdf.ln(2)
 
-            for r in revisers:
-                pdf.set_font(font, "B", 11)
-                pdf.cell(0, 6, f"Revision ({r.model_ref})")
-                pdf.ln()
-                pdf.set_font(font, "", 10)
-                _write_md(r.content)
-                pdf.ln(3)
+def render_toc(pdf: object, outline: list[object]) -> None:
+    """Render a table of contents page for the PDF.
 
-            for o in others:
-                role_label = o.role.capitalize()
-                pdf.set_font(font, "B", 11)
-                pdf.cell(0, 6, f"{role_label} ({o.model_ref})")
-                pdf.ln()
-                pdf.set_font(font, "", 10)
-                _write_md(o.content)
-                pdf.ln(3)
+    Called by fpdf2's ``insert_toc_placeholder`` mechanism.
+    """
+    from fpdf import FPDF
 
-        if votes:
-            pdf.set_font(font, "B", 11)
-            pdf.cell(0, 6, "Votes")
-            pdf.ln()
-            for v in votes:
-                pdf.set_font(font, "", 10)
-                pdf.cell(0, 5, _pdf_safe(f"{v.model_ref}: {v.content}"))
-                pdf.ln()
-            pdf.ln(3)
+    assert isinstance(pdf, FPDF)
+    font = getattr(pdf, "_font_family", "Helvetica")
+    pdf.set_font(font, "B", 15)
+    pdf.set_text_color(40, 40, 40)
+    pdf.cell(0, 10, "Table of Contents")
+    pdf.ln(10)
 
-    # Footer
-    pdf.set_draw_color(180, 180, 180)
-    pdf.line(10, pdf.get_y(), 200, pdf.get_y())
-    pdf.ln(3)
-    pdf.set_font(font, "I", 9)
-    pdf.cell(0, 5, f"duh v{__version__} | {created} | Cost: ${total_cost:.4f}")
+    for entry in outline:
+        level = getattr(entry, "level", 0)
+        name = getattr(entry, "name", "")
+        page_number = getattr(entry, "page_number", 0)
+        link = getattr(entry, "link", None)
 
-    return bytes(pdf.output())
+        indent = 4 * level
+        pdf.set_x(pdf.l_margin + indent)
+
+        if level == 0:
+            pdf.set_font(font, "B", 11)
+        else:
+            pdf.set_font(font, "", 10)
+
+        pdf.set_text_color(60, 60, 60)
+        w = pdf.w - pdf.l_margin - pdf.r_margin - indent - 15
+        # Use safe method if available
+        safe = getattr(pdf, "_safe", lambda t: t)
+        pdf.cell(w, 6, safe(name), link=link)
+        pdf.cell(15, 6, str(page_number), align="R")
+        pdf.ln(6)
 
 
 # ── models ───────────────────────────────────────────────────────
@@ -1581,6 +1829,99 @@ async def _cost_async(config: DuhConfig) -> None:
             click.echo(f"  {model_ref}: ${model_cost:.4f} ({call_count} calls)")
 
 
+# ── calibration ──────────────────────────────────────────────────
+
+
+@cli.command()
+@click.option("--category", default=None, help="Filter by decision category.")
+@click.option("--since", default=None, help="Only decisions after this date (ISO).")
+@click.option("--until", default=None, help="Only decisions before this date (ISO).")
+@click.pass_context
+def calibration(
+    ctx: click.Context,
+    category: str | None,
+    since: str | None,
+    until: str | None,
+) -> None:
+    """Show confidence calibration analysis.
+
+    Compares predicted confidence against actual outcomes to
+    measure how well-calibrated the consensus engine is.
+    """
+    config = _load_config(ctx.obj["config_path"])
+    try:
+        asyncio.run(_calibration_async(config, category, since, until))
+    except DuhError as e:
+        _error(str(e))
+
+
+async def _calibration_async(
+    config: DuhConfig,
+    category: str | None,
+    since: str | None,
+    until: str | None,
+) -> None:
+    """Async implementation for the calibration command."""
+    from duh.calibration import compute_calibration
+    from duh.memory.repository import MemoryRepository
+
+    factory, engine = await _create_db(config)
+    async with factory() as session:
+        repo = MemoryRepository(session)
+        decisions = await repo.get_all_decisions_for_space(
+            category=category,
+            since=since,
+            until=until,
+        )
+
+    await engine.dispose()
+
+    result = compute_calibration(decisions)
+
+    if result.total_decisions == 0:
+        click.echo("No decisions found.")
+        return
+
+    click.echo(f"Total decisions: {result.total_decisions}")
+    click.echo(f"With outcomes: {result.total_with_outcomes}")
+    click.echo(f"Overall accuracy: {result.overall_accuracy:.1%}")
+    click.echo(f"ECE: {result.ece:.4f}")
+
+    if result.ece < 0.05:
+        rating = "excellent"
+    elif result.ece < 0.10:
+        rating = "good"
+    elif result.ece < 0.20:
+        rating = "fair"
+    else:
+        rating = "poor"
+    click.echo(f"Calibration: {rating}")
+
+    if result.total_with_outcomes > 0:
+        click.echo()
+        click.echo(
+            f"{'Range':<12} {'Count':>6} {'Outcomes':>9} "
+            f"{'Accuracy':>9} {'Conf':>6} {'Gap':>6}"
+        )
+        for b in result.buckets:
+            if b.count == 0:
+                continue
+            lo_pct = f"{b.range_lo:.0%}"
+            hi_pct = f"{b.range_hi:.0%}"
+            label = f"{lo_pct}-{hi_pct}"
+            acc_str = f"{b.accuracy:.1%}" if b.with_outcomes > 0 else "-"
+            conf_str = f"{b.mean_confidence:.1%}"
+            gap_str = (
+                f"{abs(b.accuracy - b.mean_confidence):.1%}"
+                if b.with_outcomes > 0
+                else "-"
+            )
+            click.echo(
+                f"{label:<12} {b.count:>6} {b.with_outcomes:>9} "
+                f"{acc_str:>9} {conf_str:>6} {gap_str:>6}"
+            )
+
+
 # ── backup ───────────────────────────────────────────────────────
 
 
@@ -1916,8 +2257,9 @@ async def _batch_async(
                 vr = await run_voting(question, pm, aggregation=aggregation)
                 decision = vr.decision or ""
                 confidence = vr.confidence
+                rigor = vr.rigor
             else:
-                decision, confidence, _dissent, _cost = await _run_consensus(
+                decision, confidence, rigor, _dissent, _cost = await _run_consensus(
                     question, config, pm
                 )
 
@@ -1929,13 +2271,14 @@ async def _batch_async(
                     "question": question,
                     "decision": decision,
                     "confidence": confidence,
+                    "rigor": rigor,
                     "cost": round(q_cost, 4),
                 }
             )
 
             if output_fmt == "text":
                 click.echo(f"Decision: {decision[:200]}")
-                click.echo(f"Confidence: {confidence:.0%}")
+                click.echo(f"Confidence: {confidence:.0%}  Rigor: {rigor:.0%}")
                 click.echo(f"Cost: ${q_cost:.4f}")
 
         except Exception as e:
@@ -1946,6 +2289,7 @@ async def _batch_async(
                     "question": question,
                     "error": str(e),
                     "confidence": 0.0,
+                    "rigor": 0.0,
                     "cost": round(q_cost, 4),
                 }
             )
diff --git a/src/duh/cli/display.py b/src/duh/cli/display.py
index 27ec841..ac34bba 100644
--- a/src/duh/cli/display.py
+++ b/src/duh/cli/display.py
@@ -154,10 +154,10 @@ def show_revise(self, model_ref: str, content: str) -> None:
             )
         )
 
-    def show_commit(self, confidence: float, dissent: str | None) -> None:
+    def show_commit(self, confidence: float, rigor: float, dissent: str | None) -> None:
         """Display commit result line."""
         check = "[bold green]\\u2713[/bold green]"
-        line = f"{check} COMMIT  Confidence: {confidence:.0%}"
+        line = f"{check} COMMIT  Confidence: {confidence:.0%}  Rigor: {rigor:.0%}"
         if dissent is None:
             line += "  (no dissent)"
         self._console.print(line)
@@ -265,7 +265,8 @@ def show_subtask_progress(self, subtask_result: SubtaskResult) -> None:
         check = "[bold green]\\u2713[/bold green]"
         self._console.print(
             f"{check} [{subtask_result.label}]  "
-            f"Confidence: {subtask_result.confidence:.0%}"
+            f"Confidence: {subtask_result.confidence:.0%}  "
+            f"Rigor: {subtask_result.rigor:.0%}"
         )
         self._console.print(
             Panel(
@@ -288,7 +289,10 @@ def show_synthesis(self, synthesis_result: SynthesisResult) -> None:
                 border_style="bright_white",
             )
         )
-        self._console.print(f"Aggregate confidence: {synthesis_result.confidence:.0%}")
+        self._console.print(
+            f"Aggregate confidence: {synthesis_result.confidence:.0%}"
+            f" | Rigor: {synthesis_result.rigor:.0%}"
+        )
 
     # ── Voting ─────────────────────────────────────────────────
 
@@ -325,6 +329,7 @@ def show_voting_result(self, result: VotingAggregation, cost: float) -> None:
         self._console.print(
             f"Strategy: {result.strategy} | "
             f"Confidence: {result.confidence:.0%} | "
+            f"Rigor: {result.rigor:.0%} | "
             f"Votes: {len(result.votes)} | "
             f"Cost: ${cost:.4f}"
         )
@@ -358,6 +363,7 @@ def show_final_decision(
         self,
         decision: str,
         confidence: float,
+        rigor: float,
         cost: float,
         dissent: str | None,
     ) -> None:
@@ -371,7 +377,9 @@ def show_final_decision(
                 border_style="bright_white",
             )
         )
-        self._console.print(f"Confidence: {confidence:.0%} | Cost: ${cost:.4f}")
+        self._console.print(
+            f"Confidence: {confidence:.0%} | Rigor: {rigor:.0%} | Cost: ${cost:.4f}"
+        )
 
         if dissent:
             self._console.print()
diff --git a/src/duh/consensus/handlers.py b/src/duh/consensus/handlers.py
index 4f2ed8b..6dde013 100644
--- a/src/duh/consensus/handlers.py
+++ b/src/duh/consensus/handlers.py
@@ -625,10 +625,10 @@ async def handle_revise(
 # ── COMMIT helpers + handler ─────────────────────────────────
 
 
-def _compute_confidence(challenges: list[ChallengeResult]) -> float:
-    """Compute confidence score from challenge quality.
+def _compute_rigor(challenges: list[ChallengeResult]) -> float:
+    """Compute rigor score from challenge quality.
 
-    Genuine (non-sycophantic) challenges improve confidence because
+    Genuine (non-sycophantic) challenges improve rigor because
     they indicate the revision was rigorously tested.
 
     Returns a float in [0.5, 1.0]:
@@ -641,6 +641,26 @@ def _compute_confidence(challenges: list[ChallengeResult]) -> float:
     return 0.5 + (genuine / len(challenges)) * 0.5
 
 
+# Domain caps for epistemic confidence scoring.
+# Caps confidence based on question intent to reflect inherent
+# uncertainty of different question types.
+DOMAIN_CAPS: dict[str, float] = {
+    "factual": 0.95,
+    "technical": 0.90,
+    "creative": 0.85,
+    "judgment": 0.80,
+    "strategic": 0.70,
+}
+_DEFAULT_DOMAIN_CAP = 0.85
+
+
+def _domain_cap(intent: str | None) -> float:
+    """Return the confidence ceiling for a given question intent."""
+    if intent is None:
+        return _DEFAULT_DOMAIN_CAP
+    return DOMAIN_CAPS.get(intent, _DEFAULT_DOMAIN_CAP)
+
+
 def _extract_dissent(challenges: list[ChallengeResult]) -> str | None:
     """Extract dissent from non-sycophantic challenges.
 
@@ -693,14 +713,22 @@ async def handle_commit(
         raise ConsensusError(msg)
 
     ctx.decision = ctx.revision
-    ctx.confidence = _compute_confidence(ctx.challenges)
+    ctx.rigor = _compute_rigor(ctx.challenges)
     ctx.dissent = _extract_dissent(ctx.challenges)
 
-    # Optional taxonomy classification
-    if classify and provider_manager is not None:
+    # Taxonomy classification (always attempt when provider available)
+    intent: str | None = None
+    if provider_manager is not None:
         taxonomy = await _classify_decision(ctx, provider_manager)
         if taxonomy:
             ctx.taxonomy = taxonomy
+            intent = taxonomy.get("intent") or None
+    elif classify:
+        # Legacy path: explicit classify without provider is a no-op
+        pass
+
+    # Epistemic confidence = rigor clamped by domain ceiling
+    ctx.confidence = min(_domain_cap(intent), ctx.rigor)
 
 
 async def _classify_decision(
diff --git a/src/duh/consensus/machine.py b/src/duh/consensus/machine.py
index 264d18e..5ed42fd 100644
--- a/src/duh/consensus/machine.py
+++ b/src/duh/consensus/machine.py
@@ -54,6 +54,7 @@ class RoundResult:
     revision: str
     decision: str
     confidence: float
+    rigor: float = 0.0
     dissent: str | None = None
 
 
@@ -90,6 +91,7 @@ class ConsensusContext:
     revision_model: str | None = None
     decision: str | None = None
     confidence: float = 0.0
+    rigor: float = 0.0
     dissent: str | None = None
     converged: bool = False
 
@@ -117,6 +119,7 @@ def _clear_round_data(self) -> None:
         self.revision_model = None
         self.decision = None
         self.confidence = 0.0
+        self.rigor = 0.0
         self.dissent = None
         self.converged = False
 
@@ -131,6 +134,7 @@ def _archive_round(self) -> None:
                 revision=self.revision or "",
                 decision=self.decision or "",
                 confidence=self.confidence,
+                rigor=self.rigor,
                 dissent=self.dissent,
             )
         )
diff --git a/src/duh/consensus/scheduler.py b/src/duh/consensus/scheduler.py
index 3e79dbf..e9820c2 100644
--- a/src/duh/consensus/scheduler.py
+++ b/src/duh/consensus/scheduler.py
@@ -43,6 +43,7 @@ class SubtaskResult:
     label: str
     decision: str
     confidence: float
+    rigor: float = 0.0
     cost: float = 0.0
 
 
@@ -52,7 +53,7 @@ async def _run_mini_consensus(
     *,
     max_rounds: int = 1,
     display: ConsensusDisplay | None = None,
-) -> tuple[str, float]:
+) -> tuple[str, float, float]:
     """Run a simplified single-round consensus for one subtask.
 
     Executes PROPOSE -> CHALLENGE -> REVISE -> COMMIT with the
@@ -65,7 +66,7 @@ async def _run_mini_consensus(
         display: Optional display for real-time progress output.
 
     Returns:
-        (decision, confidence) tuple.
+        (decision, confidence, rigor) tuple.
 
     Raises:
         ConsensusError: If any handler phase fails.
@@ -110,11 +111,11 @@ async def _run_mini_consensus(
 
     # COMMIT
     sm.transition(ConsensusState.COMMIT)
-    await handle_commit(ctx)
+    await handle_commit(ctx, provider_manager)
     if display:
-        display.show_commit(ctx.confidence, ctx.dissent)
+        display.show_commit(ctx.confidence, ctx.rigor, ctx.dissent)
 
-    return ctx.decision or "", ctx.confidence
+    return ctx.decision or "", ctx.confidence, ctx.rigor
 
 
 async def _execute_subtask(
@@ -155,7 +156,7 @@ async def _execute_subtask(
         augmented_question += f"\n\nContext from prior subtasks:\n{dep_text}"
 
     cost_before = provider_manager.total_cost
-    decision, confidence = await _run_mini_consensus(
+    decision, confidence, rigor = await _run_mini_consensus(
         augmented_question, provider_manager, display=display
     )
     subtask_cost = provider_manager.total_cost - cost_before
@@ -164,6 +165,7 @@ async def _execute_subtask(
         label=subtask.label,
         decision=decision,
         confidence=confidence,
+        rigor=rigor,
         cost=subtask_cost,
     )
 
diff --git a/src/duh/consensus/synthesis.py b/src/duh/consensus/synthesis.py
index 2f3ee25..9f042ea 100644
--- a/src/duh/consensus/synthesis.py
+++ b/src/duh/consensus/synthesis.py
@@ -24,6 +24,7 @@ class SynthesisResult:
     content: str
     confidence: float
     strategy: str
+    rigor: float = 0.0
 
 
 def _build_merge_prompt(
@@ -187,9 +188,15 @@ async def synthesize(
     # Aggregate confidence: weighted average of subtask confidences
     total_conf = sum(r.confidence for r in subtask_results)
     avg_confidence = total_conf / len(subtask_results) if subtask_results else 0.0
+    avg_rigor = (
+        sum(r.rigor for r in subtask_results) / len(subtask_results)
+        if subtask_results
+        else 0.0
+    )
 
     return SynthesisResult(
         content=response.content,
         confidence=avg_confidence,
         strategy=strategy,
+        rigor=avg_rigor,
     )
diff --git a/src/duh/consensus/voting.py b/src/duh/consensus/voting.py
index e1df5e2..f9c0143 100644
--- a/src/duh/consensus/voting.py
+++ b/src/duh/consensus/voting.py
@@ -30,6 +30,7 @@ class VoteResult:
     model_ref: str
     content: str
     confidence: float = 0.0
+    rigor: float = 0.5
 
 
 @dataclass(frozen=True, slots=True)
@@ -40,6 +41,7 @@ class VotingAggregation:
     decision: str
     strategy: str
     confidence: float
+    rigor: float = 0.5
 
 
 # ── Internal helpers ─────────────────────────────────────────────
diff --git a/src/duh/mcp/server.py b/src/duh/mcp/server.py
index d61c7fe..f91663d 100644
--- a/src/duh/mcp/server.py
+++ b/src/duh/mcp/server.py
@@ -127,6 +127,7 @@ async def _handle_ask(args: dict) -> list[TextContent]:  # type: ignore[type-arg
                     {
                         "decision": result.decision,
                         "confidence": result.confidence,
+                        "rigor": result.rigor,
                         "votes": len(result.votes),
                         "cost": pm.total_cost,
                     }
@@ -134,7 +135,9 @@ async def _handle_ask(args: dict) -> list[TextContent]:  # type: ignore[type-arg
             )
         ]
     else:
-        decision, confidence, dissent, cost = await _run_consensus(question, config, pm)
+        decision, confidence, rigor, dissent, cost = await _run_consensus(
+            question, config, pm
+        )
         return [
             TextContent(
                 type="text",
@@ -142,6 +145,7 @@ async def _handle_ask(args: dict) -> list[TextContent]:  # type: ignore[type-arg
                     {
                         "decision": decision,
                         "confidence": confidence,
+                        "rigor": rigor,
                         "dissent": dissent,
                         "cost": cost,
                     }
@@ -179,6 +183,7 @@ async def _handle_recall(args: dict) -> list[TextContent]:  # type: ignore[type-
                 latest = thread.decisions[-1]
                 entry["decision"] = latest.content[:200]
                 entry["confidence"] = latest.confidence
+                entry["rigor"] = latest.rigor
             results.append(entry)
 
     await engine.dispose()
diff --git a/src/duh/memory/context.py b/src/duh/memory/context.py
index c679312..9c9ac66 100644
--- a/src/duh/memory/context.py
+++ b/src/duh/memory/context.py
@@ -76,7 +76,7 @@ def build_context(
     if decisions and remaining > 0:
         decision_parts: list[str] = []
         for d in decisions:
-            part = f"- [{d.confidence:.0%} confidence] {d.content}"
+            part = f"- [{d.confidence:.0%} confidence, {d.rigor:.0%} rigor] {d.content}"
             if d.dissent:
                 part += f"\n  Dissent: {d.dissent}"
             outcome = outcome_map.get(d.id)
diff --git a/src/duh/memory/migrations.py b/src/duh/memory/migrations.py
new file mode 100644
index 0000000..1e7c159
--- /dev/null
+++ b/src/duh/memory/migrations.py
@@ -0,0 +1,34 @@
+"""Lightweight schema migrations for SQLite.
+
+Runs on startup for file-based SQLite databases to add new columns
+that were added after the initial schema. In-memory SQLite uses
+``create_all`` which handles new columns automatically.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from sqlalchemy.ext.asyncio import AsyncEngine
+
+logger = logging.getLogger(__name__)
+
+
+async def ensure_schema(engine: AsyncEngine) -> None:
+    """Apply pending schema migrations.
+
+    Currently handles:
+    - Adding ``rigor`` column to ``decisions`` table (Phase A).
+    """
+    async with engine.begin() as conn:
+        # Check if rigor column exists
+        rows = await conn.exec_driver_sql("PRAGMA table_info(decisions)")
+        columns = {row[1] for row in rows}
+
+        if "rigor" not in columns:
+            logger.info("Adding 'rigor' column to decisions table")
+            await conn.exec_driver_sql(
+                "ALTER TABLE decisions ADD COLUMN rigor FLOAT DEFAULT 0.0"
+            )
diff --git a/src/duh/memory/models.py b/src/duh/memory/models.py
index 4ef6d7f..ff5de20 100644
--- a/src/duh/memory/models.py
+++ b/src/duh/memory/models.py
@@ -189,6 +189,7 @@ class Decision(Base):
     )
     content: Mapped[str] = mapped_column(Text)
     confidence: Mapped[float] = mapped_column(Float, default=0.0)
+    rigor: Mapped[float] = mapped_column(Float, default=0.0)
     dissent: Mapped[str | None] = mapped_column(Text, nullable=True, default=None)
     intent: Mapped[str | None] = mapped_column(String(50), nullable=True, default=None)
     category: Mapped[str | None] = mapped_column(
diff --git a/src/duh/memory/repository.py b/src/duh/memory/repository.py
index 1f9ba31..d62911c 100644
--- a/src/duh/memory/repository.py
+++ b/src/duh/memory/repository.py
@@ -149,6 +149,7 @@ async def save_decision(
         content: str,
         confidence: float,
         *,
+        rigor: float = 0.0,
         dissent: str | None = None,
         intent: str | None = None,
         category: str | None = None,
@@ -160,6 +161,7 @@ async def save_decision(
             thread_id=thread_id,
             content=content,
             confidence=confidence,
+            rigor=rigor,
             dissent=dissent,
             intent=intent,
             category=category,
diff --git a/tests/integration/test_consensus_loop.py b/tests/integration/test_consensus_loop.py
index 88f2af6..3420ab6 100644
--- a/tests/integration/test_consensus_loop.py
+++ b/tests/integration/test_consensus_loop.py
@@ -90,6 +90,7 @@ async def test_full_loop_to_complete(self, mock_provider: MockProvider) -> None:
         assert ctx.revision is not None
         assert ctx.decision is not None
         assert ctx.confidence > 0
+        assert ctx.rigor > 0
         assert ctx.current_round == 1
 
         # Transition to COMPLETE (max_rounds=1)
@@ -367,8 +368,9 @@ async def test_sycophantic_challenges_lower_confidence(self) -> None:
 
         await _run_single_round(ctx, sm, pm)
 
-        # Both challengers are sycophantic → confidence should be 0.5
+        # All sycophantic → rigor=0.5, confidence=min(0.85,0.5)
         assert ctx.confidence < 1.0
+        assert ctx.rigor < 1.0
         # Dissent should be None (all sycophantic)
         assert ctx.dissent is None
 
diff --git a/tests/sycophancy/test_confidence_impact.py b/tests/sycophancy/test_confidence_impact.py
index 92281f0..c3cd701 100644
--- a/tests/sycophancy/test_confidence_impact.py
+++ b/tests/sycophancy/test_confidence_impact.py
@@ -1,13 +1,13 @@
-"""Tests for sycophancy impact on confidence scoring and dissent.
+"""Tests for sycophancy impact on rigor scoring and dissent.
 
 Verifies the mathematical relationship between sycophantic challenge
-counts and resulting confidence, and that dissent extraction correctly
+counts and resulting rigor, and that dissent extraction correctly
 filters out sycophantic responses.
 """
 
 from __future__ import annotations
 
-from duh.consensus.handlers import _compute_confidence, _extract_dissent
+from duh.consensus.handlers import _compute_rigor, _extract_dissent
 from duh.consensus.machine import ChallengeResult
 
 # ── Helpers ──────────────────────────────────────────────────────
@@ -30,44 +30,44 @@ def _sycophantic(
 # ── Confidence computation ───────────────────────────────────────
 
 
-class TestConfidenceComputation:
+class TestRigorComputation:
     def test_all_genuine_two_challengers(self) -> None:
         """2/2 genuine → 0.5 + (2/2)*0.5 = 1.0."""
         challenges = [_genuine("a"), _genuine("b")]
-        assert _compute_confidence(challenges) == 1.0
+        assert _compute_rigor(challenges) == 1.0
 
     def test_all_sycophantic_two_challengers(self) -> None:
         """0/2 genuine → 0.5 + (0/2)*0.5 = 0.5."""
         challenges = [_sycophantic("a"), _sycophantic("b")]
-        assert _compute_confidence(challenges) == 0.5
+        assert _compute_rigor(challenges) == 0.5
 
     def test_one_genuine_one_sycophantic(self) -> None:
         """1/2 genuine → 0.5 + (1/2)*0.5 = 0.75."""
         challenges = [_genuine("a"), _sycophantic("b")]
-        assert _compute_confidence(challenges) == 0.75
+        assert _compute_rigor(challenges) == 0.75
 
     def test_empty_challenges(self) -> None:
         """No challenges → 0.5 (untested)."""
-        assert _compute_confidence([]) == 0.5
+        assert _compute_rigor([]) == 0.5
 
     def test_single_genuine(self) -> None:
         """1/1 genuine → 1.0."""
-        assert _compute_confidence([_genuine()]) == 1.0
+        assert _compute_rigor([_genuine()]) == 1.0
 
     def test_single_sycophantic(self) -> None:
         """0/1 genuine → 0.5."""
-        assert _compute_confidence([_sycophantic()]) == 0.5
+        assert _compute_rigor([_sycophantic()]) == 0.5
 
     def test_three_challengers_two_genuine(self) -> None:
         """2/3 genuine → 0.5 + (2/3)*0.5 ≈ 0.833."""
         challenges = [_genuine("a"), _genuine("b"), _sycophantic("c")]
-        result = _compute_confidence(challenges)
+        result = _compute_rigor(challenges)
         assert abs(result - (0.5 + (2 / 3) * 0.5)) < 1e-10
 
     def test_three_challengers_one_genuine(self) -> None:
         """1/3 genuine → 0.5 + (1/3)*0.5 ≈ 0.667."""
         challenges = [_genuine("a"), _sycophantic("b"), _sycophantic("c")]
-        result = _compute_confidence(challenges)
+        result = _compute_rigor(challenges)
         assert abs(result - (0.5 + (1 / 3) * 0.5)) < 1e-10
 
     def test_confidence_always_between_half_and_one(self) -> None:
@@ -79,7 +79,7 @@ def test_confidence_always_between_half_and_one(self) -> None:
                 challenges = [_genuine(f"g{i}") for i in range(n_genuine)] + [
                     _sycophantic(f"s{i}") for i in range(n_syc)
                 ]
-                conf = _compute_confidence(challenges)
+                conf = _compute_rigor(challenges)
                 assert 0.5 <= conf <= 1.0, f"{n_genuine}g/{n_syc}s → {conf}"
 
     def test_confidence_monotonic_with_genuine_ratio(self) -> None:
@@ -91,7 +91,7 @@ def test_confidence_monotonic_with_genuine_ratio(self) -> None:
             challenges = [_genuine(f"g{i}") for i in range(n_genuine)] + [
                 _sycophantic(f"s{i}") for i in range(n_syc)
             ]
-            conf = _compute_confidence(challenges)
+            conf = _compute_rigor(challenges)
             assert conf >= prev, f"Not monotonic at {n_genuine}/{total}"
             prev = conf
 
diff --git a/tests/sycophancy/test_known_flaws.py b/tests/sycophancy/test_known_flaws.py
index 5e84746..fb3d6c2 100644
--- a/tests/sycophancy/test_known_flaws.py
+++ b/tests/sycophancy/test_known_flaws.py
@@ -84,7 +84,9 @@ async def test_full_loop_genuine_high_confidence(
         sm.transition(ConsensusState.COMMIT)
         await handle_commit(ctx)
 
-        assert ctx.confidence == 1.0
+        # rigor=1.0 (all genuine), but no pm → default cap 0.85
+        assert ctx.rigor == 1.0
+        assert ctx.confidence == 0.85
 
     async def test_genuine_challenges_produce_dissent(
         self, known_flaw_genuine_provider: MockProvider
diff --git a/tests/unit/test_api_crud.py b/tests/unit/test_api_crud.py
index b111870..14f41bd 100644
--- a/tests/unit/test_api_crud.py
+++ b/tests/unit/test_api_crud.py
@@ -393,3 +393,94 @@ async def test_by_model_ordering(self) -> None:
         assert len(data["by_model"]) == 2
         assert data["by_model"][0]["model_ref"] == "expensive:model"
         assert data["by_model"][1]["model_ref"] == "cheap:model"
+
+
+# -- Helpers for calibration -----------------------------------------------
+
+
+async def _seed_decision_with_outcome(
+    app: FastAPI,
+    confidence: float,
+    outcome_result: str | None = None,
+    *,
+    category: str | None = None,
+) -> tuple[str, str]:
+    """Seed a thread with decision and optional outcome.
+
+    Returns (thread_id, decision_id).
+    """
+    async with app.state.db_factory() as session:
+        repo = MemoryRepository(session)
+        thread = await repo.create_thread("Calibration question")
+        turn = await repo.create_turn(thread.id, 1, "COMMIT")
+        decision = await repo.save_decision(
+            turn.id, thread.id, "Decision content", confidence, category=category
+        )
+        if outcome_result is not None:
+            await repo.save_outcome(decision.id, thread.id, outcome_result)
+        await session.commit()
+        return thread.id, decision.id
+
+
+# -- TestCalibration -----------------------------------------------------------
+
+
+class TestCalibration:
+    async def test_empty_returns_zeros(self) -> None:
+        app = await _make_app()
+        client = TestClient(app, raise_server_exceptions=False)
+        resp = client.get("/api/calibration")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total_decisions"] == 0
+        assert data["total_with_outcomes"] == 0
+        assert data["overall_accuracy"] == 0.0
+        assert data["ece"] == 0.0
+        assert len(data["buckets"]) == 10
+
+    async def test_with_outcomes(self) -> None:
+        app = await _make_app()
+        await _seed_decision_with_outcome(app, 0.9, "success")
+        await _seed_decision_with_outcome(app, 0.9, "success")
+        await _seed_decision_with_outcome(app, 0.3, "failure")
+
+        client = TestClient(app, raise_server_exceptions=False)
+        resp = client.get("/api/calibration")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total_decisions"] == 3
+        assert data["total_with_outcomes"] == 3
+        assert data["overall_accuracy"] > 0.0
+        assert data["ece"] >= 0.0
+
+    async def test_category_filter(self) -> None:
+        app = await _make_app()
+        await _seed_decision_with_outcome(app, 0.8, "success", category="tech")
+        await _seed_decision_with_outcome(app, 0.5, "failure", category="other")
+
+        client = TestClient(app, raise_server_exceptions=False)
+        resp = client.get("/api/calibration", params={"category": "tech"})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total_decisions"] == 1
+        assert data["total_with_outcomes"] == 1
+
+    async def test_bucket_structure(self) -> None:
+        app = await _make_app()
+        await _seed_decision_with_outcome(app, 0.5, "success")
+
+        client = TestClient(app, raise_server_exceptions=False)
+        resp = client.get("/api/calibration")
+        assert resp.status_code == 200
+        data = resp.json()
+        buckets = data["buckets"]
+        assert len(buckets) == 10
+
+        # Check bucket 5 (0.5-0.6) has the decision
+        b5 = buckets[5]
+        assert b5["count"] == 1
+        assert b5["with_outcomes"] == 1
+        assert b5["success"] == 1
+        assert b5["accuracy"] == 1.0
+        assert b5["range_lo"] == 0.5
+        assert b5["range_hi"] == 0.6
diff --git a/tests/unit/test_api_ws.py b/tests/unit/test_api_ws.py
index 0c330d8..212f0da 100644
--- a/tests/unit/test_api_ws.py
+++ b/tests/unit/test_api_ws.py
@@ -97,8 +97,9 @@ async def mock_revise(ctx, pm, **kwargs):
         ctx.revision_model = ctx.proposal_model
         return _make_response(revision)
 
-    async def mock_commit(ctx, **kwargs):
+    async def mock_commit(ctx, *args, **kwargs):
         ctx.decision = ctx.revision
+        ctx.rigor = 1.0
         ctx.confidence = confidence
         ctx.dissent = dissent
 
@@ -240,6 +241,7 @@ def test_complete_event_has_decision_confidence_cost(self):
         complete = next(e for e in events if e["type"] == "complete")
         assert complete["decision"] == "Final answer"
         assert complete["confidence"] == 0.85
+        assert complete["rigor"] == 1.0
         assert complete["dissent"] == "Minor dissent"
         assert "cost" in complete
 
@@ -256,6 +258,7 @@ def test_commit_event_has_confidence_and_dissent(self):
 
         commit = next(e for e in events if e["type"] == "commit")
         assert commit["confidence"] == 0.9
+        assert commit["rigor"] == 1.0
         assert commit["dissent"] == "Some dissent"
         assert "round" in commit
 
diff --git a/tests/unit/test_calibration.py b/tests/unit/test_calibration.py
new file mode 100644
index 0000000..1375e09
--- /dev/null
+++ b/tests/unit/test_calibration.py
@@ -0,0 +1,162 @@
+"""Tests for duh.calibration — confidence calibration analysis."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from duh.calibration import compute_calibration
+
+
+def _decision(confidence: float, outcome: str | None = None) -> SimpleNamespace:
+    """Create a fake Decision with optional Outcome for testing."""
+    out = None
+    if outcome is not None:
+        out = SimpleNamespace(result=outcome)
+    return SimpleNamespace(confidence=confidence, outcome=out)
+
+
+class TestComputeCalibration:
+    def test_empty_input(self) -> None:
+        result = compute_calibration([])
+        assert result.total_decisions == 0
+        assert result.total_with_outcomes == 0
+        assert result.overall_accuracy == 0.0
+        assert result.ece == 0.0
+        assert len(result.buckets) == 10
+
+    def test_no_outcomes(self) -> None:
+        decisions = [_decision(0.5), _decision(0.8), _decision(0.3)]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.total_decisions == 3
+        assert result.total_with_outcomes == 0
+        assert result.overall_accuracy == 0.0
+        assert result.ece == 0.0
+
+    def test_single_success(self) -> None:
+        decisions = [_decision(0.9, "success")]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.total_decisions == 1
+        assert result.total_with_outcomes == 1
+        assert result.overall_accuracy == 1.0
+        # Bucket 9 (0.9-1.0): accuracy=1.0, mean_conf=0.9, |1.0-0.9|=0.1
+        assert result.ece == pytest.approx(0.1)
+
+    def test_single_failure(self) -> None:
+        decisions = [_decision(0.7, "failure")]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.total_decisions == 1
+        assert result.total_with_outcomes == 1
+        assert result.overall_accuracy == 0.0
+        # Bucket 7 (0.7-0.8): accuracy=0.0, mean_conf=0.7, |0.0-0.7|=0.7
+        assert result.ece == pytest.approx(0.7)
+
+    def test_partial_counts_as_half(self) -> None:
+        decisions = [_decision(0.5, "partial")]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.total_with_outcomes == 1
+        assert result.overall_accuracy == 0.5
+        # Bucket 5 (0.5-0.6): accuracy=0.5, mean_conf=0.5, |0.5-0.5|=0.0
+        assert result.ece == pytest.approx(0.0)
+
+    def test_perfect_calibration(self) -> None:
+        """When accuracy matches confidence, ECE should be near 0."""
+        # Put 10 decisions at confidence 0.85:
+        # 8 or 9 successes needed for accuracy ~0.85
+        # With 10 decisions: 8 success + 1 partial + 1 failure
+        # accuracy = (8 + 0.5) / 10 = 0.85 matches mean_conf=0.85
+        decisions = (
+            [_decision(0.85, "success")] * 8
+            + [_decision(0.85, "partial")]
+            + [_decision(0.85, "failure")]
+        )
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.total_decisions == 10
+        assert result.total_with_outcomes == 10
+        assert result.overall_accuracy == pytest.approx(0.85)
+        assert result.ece == pytest.approx(0.0)
+
+    def test_overconfident(self) -> None:
+        """High confidence but all failures = high ECE."""
+        decisions = [_decision(0.95, "failure")] * 10
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.overall_accuracy == 0.0
+        assert result.ece > 0.8  # ~0.95
+
+    def test_multiple_buckets(self) -> None:
+        decisions = [
+            _decision(0.15, "success"),
+            _decision(0.15, "failure"),
+            _decision(0.85, "success"),
+            _decision(0.85, "success"),
+        ]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.total_decisions == 4
+        assert result.total_with_outcomes == 4
+
+        # Bucket 1 (0.1-0.2): 2 with_outcomes, 1 success, accuracy=0.5
+        bucket1 = result.buckets[1]
+        assert bucket1.count == 2
+        assert bucket1.with_outcomes == 2
+        assert bucket1.success == 1
+        assert bucket1.accuracy == pytest.approx(0.5)
+
+        # Bucket 8 (0.8-0.9): 2 with_outcomes, 2 success, accuracy=1.0
+        bucket8 = result.buckets[8]
+        assert bucket8.count == 2
+        assert bucket8.with_outcomes == 2
+        assert bucket8.success == 2
+        assert bucket8.accuracy == pytest.approx(1.0)
+
+    def test_custom_n_buckets(self) -> None:
+        decisions = [_decision(0.5, "success")]
+        result = compute_calibration(decisions, n_buckets=5)  # type: ignore[arg-type]
+        assert len(result.buckets) == 5
+        # confidence 0.5 -> bucket index 2 (0.4-0.6)
+        assert result.buckets[2].count == 1
+        assert result.buckets[2].with_outcomes == 1
+
+    def test_boundary_zero(self) -> None:
+        """Confidence 0.0 goes into the first bucket."""
+        decisions = [_decision(0.0, "failure")]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.buckets[0].count == 1
+        assert result.buckets[0].with_outcomes == 1
+        assert result.buckets[0].accuracy == 0.0
+
+    def test_boundary_one(self) -> None:
+        """Confidence 1.0 goes into the last bucket."""
+        decisions = [_decision(1.0, "success")]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.buckets[9].count == 1
+        assert result.buckets[9].with_outcomes == 1
+        assert result.buckets[9].accuracy == 1.0
+
+    def test_boundary_exact_tenth(self) -> None:
+        """Confidence exactly 0.1 goes into bucket 1 (0.1-0.2)."""
+        decisions = [_decision(0.1, "success")]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.buckets[1].count == 1
+
+    def test_overall_accuracy(self) -> None:
+        decisions = [
+            _decision(0.5, "success"),
+            _decision(0.5, "failure"),
+            _decision(0.5, "partial"),
+            _decision(0.5, "success"),
+        ]
+        # accuracy = (2 + 0.5) / 4 = 0.625
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.overall_accuracy == pytest.approx(0.625)
+
+    def test_mixed_with_and_without_outcomes(self) -> None:
+        decisions = [
+            _decision(0.5, "success"),
+            _decision(0.5),  # no outcome
+            _decision(0.5, "failure"),
+        ]
+        result = compute_calibration(decisions)  # type: ignore[arg-type]
+        assert result.total_decisions == 3
+        assert result.total_with_outcomes == 2
+        assert result.overall_accuracy == pytest.approx(0.5)
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 172d73f..b6bef3b 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -72,6 +72,7 @@ def test_displays_decision(
         mock_run.return_value = (
             "Use SQLite for v0.1.",
             1.0,
+            1.0,
             None,
             0.0042,
         )
@@ -97,6 +98,7 @@ def test_displays_dissent(
         mock_run.return_value = (
             "Use SQLite.",
             0.75,
+            1.0,
             "[model-a]: PostgreSQL would be better for scale.",
             0.01,
         )
@@ -119,7 +121,7 @@ def test_no_dissent_when_none(
         from duh.config.schema import DuhConfig
 
         mock_config.return_value = DuhConfig()
-        mock_run.return_value = ("Answer.", 1.0, None, 0.0)
+        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0)
 
         result = runner.invoke(cli, ["ask", "Question?"])
 
@@ -138,7 +140,7 @@ def test_rounds_option(
 
         config = DuhConfig()
         mock_config.return_value = config
-        mock_run.return_value = ("Answer.", 1.0, None, 0.0)
+        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0)
 
         result = runner.invoke(cli, ["ask", "--rounds", "5", "Question?"])
 
@@ -432,7 +434,7 @@ async def _seed() -> str:
         assert "Use PostgreSQL" in result.output
         assert "[CHALLENGER] mock:challenger-1" in result.output
         assert "SQLite is simpler" in result.output
-        assert "Decision (confidence 85%)" in result.output
+        assert "Decision (confidence 85%, rigor 0%)" in result.output
         assert "Use SQLite for v0.1." in result.output
         assert "Dissent: PostgreSQL for future scale." in result.output
         asyncio.run(engine.dispose())
@@ -639,7 +641,7 @@ def test_ask_full_loop(self, runner: CliRunner) -> None:
 
         async def fake_ask(
             question: str, cfg: Any, **kwargs: Any
-        ) -> tuple[str, float, str | None, float]:
+        ) -> tuple[str, float, float, str | None, float]:
             pm = ProviderManager()
             await pm.register(provider)
             from duh.cli.app import _run_consensus
diff --git a/tests/unit/test_cli_batch.py b/tests/unit/test_cli_batch.py
index 0f35a94..b9c2956 100644
--- a/tests/unit/test_cli_batch.py
+++ b/tests/unit/test_cli_batch.py
@@ -452,10 +452,10 @@ async def fake_consensus(
             pm: Any,
             display: Any = None,
             tool_registry: Any = None,
-        ) -> tuple[str, float, str | None, float]:
+        ) -> tuple[str, float, float, str | None, float]:
             nonlocal consensus_called
             consensus_called = True
-            return ("Use SQLite.", 0.85, None, 0.01)
+            return ("Use SQLite.", 0.85, 1.0, None, 0.01)
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
@@ -546,8 +546,8 @@ async def fake_consensus(
             pm: Any,
             display: Any = None,
             tool_registry: Any = None,
-        ) -> tuple[str, float, str | None, float]:
-            return ("Answer.", 0.9, None, 0.01)
+        ) -> tuple[str, float, float, str | None, float]:
+            return ("Answer.", 0.9, 1.0, None, 0.01)
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
@@ -601,12 +601,12 @@ async def fake_consensus(
             pm: Any,
             display: Any = None,
             tool_registry: Any = None,
-        ) -> tuple[str, float, str | None, float]:
+        ) -> tuple[str, float, float, str | None, float]:
             nonlocal call_count
             call_count += 1
             if question == "Q2":
                 raise RuntimeError("Provider timeout")
-            return ("Answer.", 0.9, None, 0.01)
+            return ("Answer.", 0.9, 1.0, None, 0.01)
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
@@ -650,10 +650,10 @@ async def fake_consensus(
             pm: Any,
             display: Any = None,
             tool_registry: Any = None,
-        ) -> tuple[str, float, str | None, float]:
+        ) -> tuple[str, float, float, str | None, float]:
             if question == "Q2":
                 raise RuntimeError("Model unavailable")
-            return ("Answer.", 0.9, None, 0.01)
+            return ("Answer.", 0.9, 1.0, None, 0.01)
 
         with (
             patch("duh.cli.app.load_config", return_value=config),
diff --git a/tests/unit/test_cli_calibration.py b/tests/unit/test_cli_calibration.py
new file mode 100644
index 0000000..e2618de
--- /dev/null
+++ b/tests/unit/test_cli_calibration.py
@@ -0,0 +1,154 @@
+"""Tests for the duh calibration CLI command."""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from click.testing import CliRunner
+
+from duh.cli.app import cli
+
+
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+
+
+# ── DB helpers (same pattern as test_cli_export.py) ──────────────────
+
+
+def _make_db() -> tuple[Any, Any]:
+    """Create an in-memory SQLite engine + sessionmaker synchronously."""
+    from sqlalchemy import event
+    from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
+    from sqlalchemy.pool import StaticPool
+
+    engine = create_async_engine(
+        "sqlite+aiosqlite://",
+        poolclass=StaticPool,
+        connect_args={"check_same_thread": False},
+    )
+
+    @event.listens_for(engine.sync_engine, "connect")
+    def _enable_fks(dbapi_conn, connection_record):  # type: ignore[no-untyped-def]
+        cursor = dbapi_conn.cursor()
+        cursor.execute("PRAGMA foreign_keys=ON")
+        cursor.close()
+
+    asyncio.run(_init_tables(engine))
+    factory = async_sessionmaker(engine, expire_on_commit=False)
+    return factory, engine
+
+
+async def _init_tables(engine: Any) -> None:
+    from duh.memory.models import Base
+
+    async with engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
+
+
+def _mem_config() -> Any:
+    from duh.config.schema import DuhConfig
+
+    return DuhConfig(
+        database={"url": "sqlite+aiosqlite://"},  # type: ignore[arg-type]
+    )
+
+
+async def _seed_decision_with_outcome(
+    factory: Any,
+    confidence: float,
+    outcome_result: str | None = None,
+) -> str:
+    """Seed a thread + turn + decision, optionally with an outcome."""
+    from duh.memory.repository import MemoryRepository
+
+    async with factory() as session:
+        repo = MemoryRepository(session)
+        thread = await repo.create_thread("Test question")
+        turn = await repo.create_turn(thread.id, 1, "COMMIT")
+        decision = await repo.save_decision(
+            turn.id, thread.id, "Some decision", confidence
+        )
+        if outcome_result is not None:
+            await repo.save_outcome(decision.id, thread.id, outcome_result)
+        await session.commit()
+        return thread.id
+
+
+# ── Tests ────────────────────────────────────────────────────────
+
+
+class TestCalibrationCLI:
+    def test_no_decisions(self, runner: CliRunner) -> None:
+        factory, engine = _make_db()
+        config = _mem_config()
+
+        with (
+            patch("duh.cli.app._load_config", return_value=config),
+            patch("duh.cli.app._create_db", new_callable=AsyncMock) as mock_db,
+        ):
+            mock_db.return_value = (factory, engine)
+            result = runner.invoke(cli, ["calibration"])
+
+        assert result.exit_code == 0
+        assert "No decisions found" in result.output
+
+    def test_with_outcomes(self, runner: CliRunner) -> None:
+        factory, engine = _make_db()
+        config = _mem_config()
+
+        # Seed some decisions with outcomes
+        asyncio.run(_seed_decision_with_outcome(factory, 0.9, "success"))
+        asyncio.run(_seed_decision_with_outcome(factory, 0.9, "success"))
+        asyncio.run(_seed_decision_with_outcome(factory, 0.5, "failure"))
+
+        with (
+            patch("duh.cli.app._load_config", return_value=config),
+            patch("duh.cli.app._create_db", new_callable=AsyncMock) as mock_db,
+        ):
+            mock_db.return_value = (factory, engine)
+            result = runner.invoke(cli, ["calibration"])
+
+        assert result.exit_code == 0
+        assert "Total decisions: 3" in result.output
+        assert "With outcomes: 3" in result.output
+        assert "ECE:" in result.output
+        assert "Calibration:" in result.output
+
+    def test_without_outcomes(self, runner: CliRunner) -> None:
+        factory, engine = _make_db()
+        config = _mem_config()
+
+        # Seed decisions without outcomes
+        asyncio.run(_seed_decision_with_outcome(factory, 0.8))
+        asyncio.run(_seed_decision_with_outcome(factory, 0.6))
+
+        with (
+            patch("duh.cli.app._load_config", return_value=config),
+            patch("duh.cli.app._create_db", new_callable=AsyncMock) as mock_db,
+        ):
+            mock_db.return_value = (factory, engine)
+            result = runner.invoke(cli, ["calibration"])
+
+        assert result.exit_code == 0
+        assert "Total decisions: 2" in result.output
+        assert "With outcomes: 0" in result.output
+        assert "Overall accuracy: 0.0%" in result.output
+
+    def test_category_filter(self, runner: CliRunner) -> None:
+        factory, engine = _make_db()
+        config = _mem_config()
+
+        with (
+            patch("duh.cli.app._load_config", return_value=config),
+            patch("duh.cli.app._create_db", new_callable=AsyncMock) as mock_db,
+        ):
+            mock_db.return_value = (factory, engine)
+            result = runner.invoke(cli, ["calibration", "--category", "tech"])
+
+        assert result.exit_code == 0
+        assert "No decisions found" in result.output
diff --git a/tests/unit/test_cli_decompose.py b/tests/unit/test_cli_decompose.py
index 52b0bfa..6bb3764 100644
--- a/tests/unit/test_cli_decompose.py
+++ b/tests/unit/test_cli_decompose.py
@@ -225,7 +225,7 @@ def test_complete_decompose_flow(self) -> None:
             strategy="merge",
         )
         display.show_synthesis(synthesis)
-        display.show_final_decision("Use SQLite for v0.1.", 0.85, 0.042, None)
+        display.show_final_decision("Use SQLite for v0.1.", 0.85, 1.0, 0.042, None)
 
         out = _output(buf)
         assert "DECOMPOSE" in out
diff --git a/tests/unit/test_cli_display.py b/tests/unit/test_cli_display.py
index 21e97db..5c72456 100644
--- a/tests/unit/test_cli_display.py
+++ b/tests/unit/test_cli_display.py
@@ -230,20 +230,20 @@ def test_truncates_long_content(self) -> None:
 class TestShowCommit:
     def test_shows_confidence(self) -> None:
         display, buf = _make_display()
-        display.show_commit(0.85, "Some dissent here.")
+        display.show_commit(0.85, 1.0, "Some dissent here.")
         out = _output(buf)
         assert "COMMIT" in out
         assert "85%" in out
 
     def test_shows_no_dissent_marker(self) -> None:
         display, buf = _make_display()
-        display.show_commit(1.0, None)
+        display.show_commit(1.0, 1.0, None)
         out = _output(buf)
         assert "no dissent" in out
 
     def test_confidence_formatting(self) -> None:
         display, buf = _make_display()
-        display.show_commit(0.5, "dissent text")
+        display.show_commit(0.5, 1.0, "dissent text")
         out = _output(buf)
         assert "50%" in out
 
@@ -291,14 +291,14 @@ def test_footer_different_values(self) -> None:
 class TestShowFinalDecision:
     def test_shows_decision_text(self) -> None:
         display, buf = _make_display()
-        display.show_final_decision("Use SQLite for v0.1.", 0.85, 0.0042, None)
+        display.show_final_decision("Use SQLite for v0.1.", 0.85, 1.0, 0.0042, None)
         out = _output(buf)
         assert "Use SQLite for v0.1." in out
         assert "Decision" in out
 
     def test_shows_confidence_and_cost(self) -> None:
         display, buf = _make_display()
-        display.show_final_decision("Answer.", 1.0, 0.0042, None)
+        display.show_final_decision("Answer.", 1.0, 1.0, 0.0042, None)
         out = _output(buf)
         assert "Confidence: 100%" in out
         assert "Cost: $0.0042" in out
@@ -308,6 +308,7 @@ def test_shows_dissent_when_present(self) -> None:
         display.show_final_decision(
             "Answer.",
             0.75,
+            1.0,
             0.01,
             "[model-a]: PostgreSQL would be better for scale.",
         )
@@ -317,14 +318,14 @@ def test_shows_dissent_when_present(self) -> None:
 
     def test_no_dissent_panel_when_none(self) -> None:
         display, buf = _make_display()
-        display.show_final_decision("Answer.", 1.0, 0.0, None)
+        display.show_final_decision("Answer.", 1.0, 1.0, 0.0, None)
         out = _output(buf)
         assert "Dissent" not in out
 
     def test_decision_not_truncated(self) -> None:
         display, buf = _make_display()
         long_decision = "x" * 1000
-        display.show_final_decision(long_decision, 0.9, 0.05, None)
+        display.show_final_decision(long_decision, 0.9, 1.0, 0.05, None)
         out = _output(buf)
         # Final decision should NOT be truncated
         assert "..." not in out
@@ -356,10 +357,10 @@ def test_complete_round_display(self) -> None:
             ]
         )
         display.show_revise("mock:model-a", "Revised with challenges.")
-        display.show_commit(0.75, "Some dissent.")
+        display.show_commit(0.75, 1.0, "Some dissent.")
         display.round_footer(1, 2, 3, 0.05)
         display.show_final_decision(
-            "Final consensus answer.", 0.75, 0.05, "Some dissent."
+            "Final consensus answer.", 0.75, 1.0, 0.05, "Some dissent."
         )
 
         out = _output(buf)
diff --git a/tests/unit/test_cli_tools.py b/tests/unit/test_cli_tools.py
index 1973dca..3e24d94 100644
--- a/tests/unit/test_cli_tools.py
+++ b/tests/unit/test_cli_tools.py
@@ -242,7 +242,7 @@ def test_tools_enabled_passes_registry(
         config = DuhConfig(tools=ToolsConfig(enabled=True))
         mock_config.return_value = config
         mock_providers.return_value.list_all_models.return_value = ["model1"]
-        mock_consensus.return_value = ("Answer", 0.9, None, 0.01)
+        mock_consensus.return_value = ("Answer", 0.9, 1.0, None, 0.01)
 
         runner.invoke(cli, ["ask", "test question"])
 
@@ -263,7 +263,7 @@ def test_tools_disabled_passes_none(
         config = DuhConfig(tools=ToolsConfig(enabled=False))
         mock_config.return_value = config
         mock_providers.return_value.list_all_models.return_value = ["model1"]
-        mock_consensus.return_value = ("Answer", 0.9, None, 0.01)
+        mock_consensus.return_value = ("Answer", 0.9, 1.0, None, 0.01)
 
         runner.invoke(cli, ["ask", "test question"])
 
diff --git a/tests/unit/test_cli_voting.py b/tests/unit/test_cli_voting.py
index 02b031d..a57b303 100644
--- a/tests/unit/test_cli_voting.py
+++ b/tests/unit/test_cli_voting.py
@@ -147,7 +147,7 @@ def test_default_protocol_is_consensus(
         from duh.config.schema import DuhConfig
 
         mock_config.return_value = DuhConfig()
-        mock_run.return_value = ("Answer.", 1.0, None, 0.0)
+        mock_run.return_value = ("Answer.", 1.0, 1.0, None, 0.0)
 
         result = runner.invoke(cli, ["ask", "Question?"])
         assert result.exit_code == 0
@@ -293,7 +293,7 @@ async def _seed() -> str:
         assert "Use Django" in result.output
         assert "mock:model-b" in result.output
         assert "Use FastAPI" in result.output
-        assert "Decision (confidence 80%)" in result.output
+        assert "Decision (confidence 80%, rigor 0%)" in result.output
         asyncio.run(engine.dispose())
 
     def test_show_without_votes(self, runner: CliRunner) -> None:
diff --git a/tests/unit/test_commit_handler.py b/tests/unit/test_commit_handler.py
index df56f1d..9351288 100644
--- a/tests/unit/test_commit_handler.py
+++ b/tests/unit/test_commit_handler.py
@@ -1,4 +1,4 @@
-"""Tests for the COMMIT handler: confidence, dissent, context."""
+"""Tests for the COMMIT handler: rigor, confidence, dissent, context."""
 
 from __future__ import annotations
 
@@ -7,7 +7,7 @@
 import pytest
 
 from duh.consensus.handlers import (
-    _compute_confidence,
+    _compute_rigor,
     _extract_dissent,
     handle_commit,
 )
@@ -61,30 +61,30 @@ def _commit_ctx(**kwargs: object) -> ConsensusContext:
 # ── Confidence computation ───────────────────────────────────────
 
 
-class TestComputeConfidence:
+class TestComputeRigor:
     def test_all_genuine(self) -> None:
         challenges = [
             ChallengeResult("m1", "real issue"),
             ChallengeResult("m2", "another issue"),
         ]
-        assert _compute_confidence(challenges) == 1.0
+        assert _compute_rigor(challenges) == 1.0
 
     def test_all_sycophantic(self) -> None:
         challenges = [
             ChallengeResult("m1", "great answer", sycophantic=True),
             ChallengeResult("m2", "looks good", sycophantic=True),
         ]
-        assert _compute_confidence(challenges) == 0.5
+        assert _compute_rigor(challenges) == 0.5
 
     def test_mixed(self) -> None:
         challenges = [
             ChallengeResult("m1", "real issue"),
             ChallengeResult("m2", "great answer", sycophantic=True),
         ]
-        assert _compute_confidence(challenges) == 0.75
+        assert _compute_rigor(challenges) == 0.75
 
     def test_empty(self) -> None:
-        assert _compute_confidence([]) == 0.5
+        assert _compute_rigor([]) == 0.5
 
 
 # ── Dissent extraction ───────────────────────────────────────────
@@ -147,12 +147,20 @@ async def test_decision_equals_revision(self) -> None:
 
         assert ctx.decision == revision
 
-    async def test_confidence_computed(self) -> None:
+    async def test_rigor_computed(self) -> None:
         ctx = _commit_ctx()
         # Default challenges are all genuine
         await handle_commit(ctx)
 
-        assert ctx.confidence == 1.0
+        assert ctx.rigor == 1.0
+
+    async def test_confidence_capped_by_domain(self) -> None:
+        ctx = _commit_ctx()
+        # All genuine → rigor=1.0, but no pm → no classification → cap=0.85
+        await handle_commit(ctx)
+
+        assert ctx.rigor == 1.0
+        assert ctx.confidence == 0.85  # min(0.85, 1.0)
 
     async def test_confidence_with_sycophantic(self) -> None:
         ctx = _commit_ctx()
@@ -162,7 +170,8 @@ async def test_confidence_with_sycophantic(self) -> None:
         ]
         await handle_commit(ctx)
 
-        assert ctx.confidence == 0.75
+        assert ctx.rigor == 0.75
+        assert ctx.confidence == 0.75  # min(0.85, 0.75) = rigor is lower
 
     async def test_dissent_preserved(self) -> None:
         ctx = _commit_ctx()
@@ -235,7 +244,8 @@ async def test_full_commit_flow(self) -> None:
         await handle_commit(ctx)
 
         assert ctx.decision == "Use SQLite instead"
-        assert ctx.confidence == 1.0
+        assert ctx.rigor == 1.0
+        assert ctx.confidence == 0.85  # no pm → default cap
         assert ctx.dissent is not None
         assert "Too complex" in ctx.dissent
 
@@ -295,6 +305,7 @@ async def test_decision_db_round_trip(self, db_session: AsyncSession) -> None:
             thread_id=thread.id,
             content=ctx.decision or "",
             confidence=ctx.confidence,
+            rigor=ctx.rigor,
             dissent=ctx.dissent,
         )
         await db_session.commit()
@@ -305,6 +316,7 @@ async def test_decision_db_round_trip(self, db_session: AsyncSession) -> None:
         loaded = decisions[0]
         assert loaded.content == ctx.decision
         assert loaded.confidence == ctx.confidence
+        assert loaded.rigor == ctx.rigor
         assert loaded.dissent == ctx.dissent
         assert loaded.turn_id == turn.id
         assert loaded.thread_id == thread.id
diff --git a/tests/unit/test_confidence_scoring.py b/tests/unit/test_confidence_scoring.py
new file mode 100644
index 0000000..675e602
--- /dev/null
+++ b/tests/unit/test_confidence_scoring.py
@@ -0,0 +1,163 @@
+"""Tests for the epistemic confidence scoring system.
+
+Tests the renamed _compute_rigor(), new _domain_cap(), and the
+combined confidence = min(domain_cap, rigor) formula.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from duh.consensus.handlers import (
+    DOMAIN_CAPS,
+    _compute_rigor,
+    _domain_cap,
+)
+from duh.consensus.machine import ChallengeResult
+
+# ── Rigor computation (renamed from _compute_confidence) ─────
+
+
+class TestComputeRigor:
+    def test_all_genuine(self) -> None:
+        challenges = [
+            ChallengeResult("m1", "real issue"),
+            ChallengeResult("m2", "another issue"),
+        ]
+        assert _compute_rigor(challenges) == 1.0
+
+    def test_all_sycophantic(self) -> None:
+        challenges = [
+            ChallengeResult("m1", "great", sycophantic=True),
+            ChallengeResult("m2", "good", sycophantic=True),
+        ]
+        assert _compute_rigor(challenges) == 0.5
+
+    def test_mixed(self) -> None:
+        challenges = [
+            ChallengeResult("m1", "real issue"),
+            ChallengeResult("m2", "great", sycophantic=True),
+        ]
+        assert _compute_rigor(challenges) == 0.75
+
+    def test_empty(self) -> None:
+        assert _compute_rigor([]) == 0.5
+
+    def test_range_always_half_to_one(self) -> None:
+        for n_genuine in range(5):
+            for n_syc in range(5):
+                if n_genuine + n_syc == 0:
+                    continue
+                challenges = [
+                    ChallengeResult(f"g{i}", "issue") for i in range(n_genuine)
+                ] + [
+                    ChallengeResult(f"s{i}", "good", sycophantic=True)
+                    for i in range(n_syc)
+                ]
+                rigor = _compute_rigor(challenges)
+                assert 0.5 <= rigor <= 1.0, f"{n_genuine}g/{n_syc}s -> {rigor}"
+
+
+# ── Domain cap lookup ────────────────────────────────────────
+
+
+class TestDomainCap:
+    def test_factual(self) -> None:
+        assert _domain_cap("factual") == 0.95
+
+    def test_technical(self) -> None:
+        assert _domain_cap("technical") == 0.90
+
+    def test_creative(self) -> None:
+        assert _domain_cap("creative") == 0.85
+
+    def test_judgment(self) -> None:
+        assert _domain_cap("judgment") == 0.80
+
+    def test_strategic(self) -> None:
+        assert _domain_cap("strategic") == 0.70
+
+    def test_unknown_intent(self) -> None:
+        assert _domain_cap("nonexistent") == 0.85
+
+    def test_none_intent(self) -> None:
+        assert _domain_cap(None) == 0.85
+
+    def test_all_caps_below_one(self) -> None:
+        for intent, cap in DOMAIN_CAPS.items():
+            assert cap < 1.0, f"{intent} cap {cap} >= 1.0"
+
+    def test_all_caps_above_zero(self) -> None:
+        for intent, cap in DOMAIN_CAPS.items():
+            assert cap > 0.0, f"{intent} cap {cap} <= 0.0"
+
+
+# ── Combined epistemic confidence ────────────────────────────
+
+
+class TestEpistemicConfidence:
+    """Test the formula: confidence = min(domain_cap, rigor)."""
+
+    def test_factual_all_genuine(self) -> None:
+        """Capital of France: rigor=1.0, cap=0.95 -> confidence=0.95."""
+        rigor = _compute_rigor(
+            [
+                ChallengeResult("m1", "real issue"),
+                ChallengeResult("m2", "another issue"),
+            ]
+        )
+        assert rigor == 1.0
+        cap = _domain_cap("factual")
+        confidence = min(cap, rigor)
+        assert confidence == 0.95
+
+    def test_strategic_all_genuine(self) -> None:
+        """Will X happen by 2035: rigor=1.0, cap=0.70 -> confidence=0.70."""
+        rigor = _compute_rigor(
+            [
+                ChallengeResult("m1", "real issue"),
+                ChallengeResult("m2", "another issue"),
+            ]
+        )
+        assert rigor == 1.0
+        cap = _domain_cap("strategic")
+        confidence = min(cap, rigor)
+        assert confidence == 0.70
+
+    def test_rigor_below_cap(self) -> None:
+        """When rigor < cap, confidence = rigor."""
+        rigor = _compute_rigor(
+            [
+                ChallengeResult("m1", "issue"),
+                ChallengeResult("m2", "great", sycophantic=True),
+            ]
+        )
+        assert rigor == 0.75
+        cap = _domain_cap("factual")
+        confidence = min(cap, rigor)
+        assert confidence == 0.75  # rigor is the binding constraint
+
+    def test_unknown_domain_capped(self) -> None:
+        """Unknown intent uses default cap of 0.85."""
+        rigor = 1.0
+        cap = _domain_cap(None)
+        assert min(cap, rigor) == 0.85
+
+    @pytest.mark.parametrize(
+        ("intent", "expected_cap"),
+        [
+            ("factual", 0.95),
+            ("technical", 0.90),
+            ("creative", 0.85),
+            ("judgment", 0.80),
+            ("strategic", 0.70),
+            (None, 0.85),
+        ],
+    )
+    def test_max_confidence_per_intent(
+        self, intent: str | None, expected_cap: float
+    ) -> None:
+        """With perfect rigor, confidence equals the domain cap."""
+        rigor = 1.0  # all genuine challenges
+        confidence = min(_domain_cap(intent), rigor)
+        assert confidence == expected_cap
diff --git a/tests/unit/test_context_builder.py b/tests/unit/test_context_builder.py
index 507021b..44adae7 100644
--- a/tests/unit/test_context_builder.py
+++ b/tests/unit/test_context_builder.py
@@ -30,12 +30,14 @@ def __init__(
         self,
         content: str,
         confidence: float = 1.0,
+        rigor: float = 0.0,
         dissent: str | None = None,
     ) -> None:
         _FakeDecision._counter += 1
         self.id = f"fake-decision-{_FakeDecision._counter}"
         self.content = content
         self.confidence = confidence
+        self.rigor = rigor
         self.dissent = dissent
 
 
@@ -93,7 +95,7 @@ def test_decision_confidence_formatted(self) -> None:
             _FakeDecision("Use Redis", confidence=0.75),
         ]
         result = build_context(None, decisions)  # type: ignore[arg-type]
-        assert "75% confidence" in result
+        assert "75% confidence, 0% rigor" in result
 
     def test_decisions_with_dissent(self) -> None:
         decisions = [
diff --git a/tests/unit/test_mcp_server.py b/tests/unit/test_mcp_server.py
index 4d28aa2..01dd139 100644
--- a/tests/unit/test_mcp_server.py
+++ b/tests/unit/test_mcp_server.py
@@ -177,7 +177,7 @@ async def test_consensus_protocol(self) -> None:
             patch(
                 "duh.cli.app._run_consensus",
                 new_callable=AsyncMock,
-                return_value=("Use SQLite.", 0.9, "minor dissent", 0.05),
+                return_value=("Use SQLite.", 0.9, 1.0, "minor dissent", 0.05),
             ),
         ):
             result = await _handle_ask({"question": "What DB?", "rounds": 2})
@@ -185,6 +185,7 @@ async def test_consensus_protocol(self) -> None:
         data = json.loads(result[0].text)
         assert data["decision"] == "Use SQLite."
         assert data["confidence"] == 0.9
+        assert data["rigor"] == 1.0
         assert data["dissent"] == "minor dissent"
         assert data["cost"] == 0.05
 
@@ -205,6 +206,7 @@ class FakeAggregation:
             decision: str
             strategy: str
             confidence: float
+            rigor: float = 0.5
 
         fake_result = FakeAggregation(
             votes=(FakeVote("m1", "Use X", 0.9),),
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index 8394dd1..5596c66 100644
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -75,12 +75,13 @@ async def test_returns_decision_and_confidence(self) -> None:
         pm = ProviderManager()
         await pm.register(provider)
 
-        decision, confidence = await _run_mini_consensus(
+        decision, confidence, rigor = await _run_mini_consensus(
             "What database should I use?", pm
         )
         assert isinstance(decision, str)
         assert len(decision) > 0
         assert 0.0 <= confidence <= 1.0
+        assert 0.5 <= rigor <= 1.0
 
     async def test_runs_all_four_phases(self) -> None:
         from duh.providers.manager import ProviderManager
diff --git a/web/src/App.tsx b/web/src/App.tsx
index eec04fd..b7e9acb 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -6,6 +6,7 @@ import {
   ThreadsPage,
   ThreadDetailPage,
   DecisionSpacePage,
+  CalibrationPage,
   PreferencesPage,
   SharePage,
 } from '@/pages'
@@ -21,6 +22,7 @@ export function App() {
             <Route path="/threads" element={<ThreadsPage />} />
             <Route path="/threads/:id" element={<ThreadDetailPage />} />
             <Route path="/space" element={<DecisionSpacePage />} />
+            <Route path="/calibration" element={<CalibrationPage />} />
             <Route path="/preferences" element={<PreferencesPage />} />
           </Route>
         </Routes>
diff --git a/web/src/__tests__/consensus-components.test.tsx b/web/src/__tests__/consensus-components.test.tsx
index 5262284..5109027 100644
--- a/web/src/__tests__/consensus-components.test.tsx
+++ b/web/src/__tests__/consensus-components.test.tsx
@@ -232,6 +232,7 @@ describe('generateExportMarkdown', () => {
       reviser: 'anthropic:claude-3',
       revision: 'Use SQLite for v0.1.',
       confidence: 0.85,
+      rigor: 0.78,
       dissent: 'PostgreSQL for scale.',
     },
   ]
@@ -241,6 +242,7 @@ describe('generateExportMarkdown', () => {
       'Best database?',
       'Use SQLite.',
       0.85,
+      0.78,
       'PostgreSQL for scale.',
       0.003,
       rounds,
@@ -252,6 +254,7 @@ describe('generateExportMarkdown', () => {
     expect(md).toContain('## Decision')
     expect(md).toContain('Use SQLite.')
     expect(md).toContain('Confidence: 85%')
+    expect(md).toContain('Rigor: 78%')
     expect(md).toContain('## Dissent')
     expect(md).toContain('PostgreSQL for scale.')
     expect(md).not.toContain('## Consensus Process')
@@ -263,6 +266,7 @@ describe('generateExportMarkdown', () => {
       'Best database?',
       'Use SQLite.',
       0.85,
+      0.78,
       'PostgreSQL for scale.',
       0.003,
       rounds,
@@ -285,6 +289,7 @@ describe('generateExportMarkdown', () => {
       'Best database?',
       'Use SQLite.',
       0.85,
+      0.78,
       'PostgreSQL for scale.',
       0.003,
       rounds,
@@ -302,6 +307,7 @@ describe('generateExportMarkdown', () => {
       'Question',
       'Answer',
       0.9,
+      0.82,
       null,
       0.0512,
       [],
@@ -317,6 +323,7 @@ describe('generateExportMarkdown', () => {
       null,
       'Answer',
       0.9,
+      0.82,
       null,
       null,
       [],
diff --git a/web/src/__tests__/stores.test.ts b/web/src/__tests__/stores.test.ts
index e13124d..ead0eb0 100644
--- a/web/src/__tests__/stores.test.ts
+++ b/web/src/__tests__/stores.test.ts
@@ -19,6 +19,7 @@ vi.mock('@/api/client', () => ({
     recall: vi.fn(),
     feedback: vi.fn(),
     decisionSpace: vi.fn(),
+    calibration: vi.fn(),
   },
 }))
 
@@ -26,6 +27,7 @@ import { useConsensusStore } from '@/stores/consensus'
 import { useThreadsStore } from '@/stores/threads'
 import { usePreferencesStore } from '@/stores/preferences'
 import { useDecisionSpaceStore } from '@/stores/decision-space'
+import { useCalibrationStore } from '@/stores/calibration'
 import { api } from '@/api/client'
 
 const mockedApi = vi.mocked(api)
@@ -46,6 +48,7 @@ describe('useConsensusStore', () => {
     expect(state.rounds).toEqual([])
     expect(state.decision).toBeNull()
     expect(state.confidence).toBeNull()
+    expect(state.rigor).toBeNull()
     expect(state.dissent).toBeNull()
     expect(state.cost).toBeNull()
   })
@@ -317,6 +320,7 @@ describe('useDecisionSpaceStore', () => {
           thread_id: 't1',
           question: 'Q1',
           confidence: 0.85,
+          rigor: 0.72,
           intent: null,
           category: 'tech',
           genus: null,
@@ -390,3 +394,82 @@ describe('useDecisionSpaceStore', () => {
     expect(useDecisionSpaceStore.getState().timelineSpeed).toBe(4)
   })
 })
+
+// ── Calibration Store ────────────────────────────────────
+
+describe('useCalibrationStore', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    useCalibrationStore.setState({
+      buckets: [],
+      totalDecisions: 0,
+      totalWithOutcomes: 0,
+      overallAccuracy: 0,
+      ece: 0,
+      loading: false,
+      error: null,
+      category: null,
+    })
+  })
+
+  it('has correct initial state', () => {
+    const state = useCalibrationStore.getState()
+    expect(state.buckets).toEqual([])
+    expect(state.totalDecisions).toBe(0)
+    expect(state.totalWithOutcomes).toBe(0)
+    expect(state.overallAccuracy).toBe(0)
+    expect(state.ece).toBe(0)
+    expect(state.loading).toBe(false)
+    expect(state.error).toBeNull()
+    expect(state.category).toBeNull()
+  })
+
+  it('fetchCalibration populates data', async () => {
+    const mockData = {
+      buckets: [
+        {
+          range_lo: 0.0,
+          range_hi: 0.1,
+          count: 0,
+          with_outcomes: 0,
+          success: 0,
+          failure: 0,
+          partial: 0,
+          accuracy: 0,
+          mean_confidence: 0.05,
+        },
+      ],
+      total_decisions: 5,
+      total_with_outcomes: 3,
+      overall_accuracy: 0.75,
+      ece: 0.08,
+    }
+    mockedApi.calibration.mockResolvedValue(mockData)
+
+    await useCalibrationStore.getState().fetchCalibration()
+    const state = useCalibrationStore.getState()
+    expect(state.totalDecisions).toBe(5)
+    expect(state.totalWithOutcomes).toBe(3)
+    expect(state.overallAccuracy).toBe(0.75)
+    expect(state.ece).toBe(0.08)
+    expect(state.buckets).toEqual(mockData.buckets)
+    expect(state.loading).toBe(false)
+  })
+
+  it('fetchCalibration handles errors', async () => {
+    mockedApi.calibration.mockRejectedValue(new Error('Server error'))
+
+    await useCalibrationStore.getState().fetchCalibration()
+    const state = useCalibrationStore.getState()
+    expect(state.error).toBe('Server error')
+    expect(state.loading).toBe(false)
+  })
+
+  it('setCategory updates category filter', () => {
+    useCalibrationStore.getState().setCategory('tech')
+    expect(useCalibrationStore.getState().category).toBe('tech')
+
+    useCalibrationStore.getState().setCategory(null)
+    expect(useCalibrationStore.getState().category).toBeNull()
+  })
+})
diff --git a/web/src/api/client.ts b/web/src/api/client.ts
index 094398a..9ded7df 100644
--- a/web/src/api/client.ts
+++ b/web/src/api/client.ts
@@ -1,6 +1,7 @@
 import type {
   AskRequest,
   AskResponse,
+  CalibrationResponse,
   CostResponse,
   DecisionSpaceResponse,
   FeedbackRequest,
@@ -110,6 +111,21 @@ export const api = {
     return request(`/decisions/space${suffix}`)
   },
 
+  calibration(params?: {
+    category?: string
+    since?: string
+    until?: string
+  }): Promise<CalibrationResponse> {
+    const qs = new URLSearchParams()
+    if (params) {
+      for (const [k, v] of Object.entries(params)) {
+        if (v != null) qs.set(k, String(v))
+      }
+    }
+    const suffix = qs.toString() ? `?${qs}` : ''
+    return request(`/calibration${suffix}`)
+  },
+
   getShare(shareToken: string): Promise<ThreadDetail> {
     return request(`/share/${encodeURIComponent(shareToken)}`)
   },
diff --git a/web/src/api/types.ts b/web/src/api/types.ts
index 0fcb67d..26ed5d6 100644
--- a/web/src/api/types.ts
+++ b/web/src/api/types.ts
@@ -19,6 +19,7 @@ export interface FeedbackRequest {
 export interface AskResponse {
   decision: string
   confidence: number
+  rigor: number
   dissent: string | null
   cost: number
   thread_id: string | null
@@ -49,6 +50,7 @@ export interface Contribution {
 export interface Decision {
   content: string
   confidence: number
+  rigor: number
   dissent: string | null
 }
 
@@ -130,6 +132,7 @@ export interface SpaceDecision {
   thread_id: string
   question: string
   confidence: number
+  rigor: number
   intent: string | null
   category: string | null
   genus: string | null
@@ -148,6 +151,28 @@ export interface DecisionSpaceResponse {
   total: number
 }
 
+// ── Calibration types ────────────────────────────────────
+
+export interface CalibrationBucket {
+  range_lo: number
+  range_hi: number
+  count: number
+  with_outcomes: number
+  success: number
+  failure: number
+  partial: number
+  accuracy: number
+  mean_confidence: number
+}
+
+export interface CalibrationResponse {
+  buckets: CalibrationBucket[]
+  total_decisions: number
+  total_with_outcomes: number
+  overall_accuracy: number
+  ece: number
+}
+
 // ── WebSocket event types ─────────────────────────────────
 
 export type WSEventType =
@@ -183,6 +208,7 @@ export interface WSChallenge {
 export interface WSCommit {
   type: 'commit'
   confidence: number
+  rigor: number
   dissent: string | null
   round: number
 }
@@ -191,6 +217,7 @@ export interface WSComplete {
   type: 'complete'
   decision: string
   confidence: number
+  rigor: number
   dissent: string | null
   cost: number
   thread_id: string | null
diff --git a/web/src/components/calibration/CalibrationDashboard.tsx b/web/src/components/calibration/CalibrationDashboard.tsx
new file mode 100644
index 0000000..502a25e
--- /dev/null
+++ b/web/src/components/calibration/CalibrationDashboard.tsx
@@ -0,0 +1,243 @@
+import { useEffect } from 'react'
+import { useCalibrationStore } from '@/stores'
+
+function eceRating(ece: number): { label: string; color: string } {
+  if (ece < 0.05) return { label: 'Excellent', color: 'var(--color-success, #22c55e)' }
+  if (ece < 0.1) return { label: 'Good', color: 'var(--color-primary)' }
+  if (ece < 0.2) return { label: 'Fair', color: 'var(--color-warning, #eab308)' }
+  return { label: 'Poor', color: 'var(--color-error, #ef4444)' }
+}
+
+export function CalibrationDashboard() {
+  const {
+    buckets,
+    totalDecisions,
+    totalWithOutcomes,
+    overallAccuracy,
+    ece,
+    loading,
+    error,
+    fetchCalibration,
+  } = useCalibrationStore()
+
+  useEffect(() => {
+    fetchCalibration()
+  }, [fetchCalibration])
+
+  if (loading) {
+    return (
+      <div className="flex items-center justify-center py-20 text-[var(--color-text-dim)]">
+        Loading calibration data...
+      </div>
+    )
+  }
+
+  if (error) {
+    return (
+      <div className="text-center py-20 text-[var(--color-error,#ef4444)]">
+        Error: {error}
+      </div>
+    )
+  }
+
+  const rating = eceRating(ece)
+
+  return (
+    <div className="space-y-6 max-w-4xl">
+      <h2 className="text-xl font-bold text-[var(--color-text)]">
+        Confidence Calibration
+      </h2>
+      <p className="text-sm text-[var(--color-text-secondary)]">
+        Are confidence scores accurate? Compare predicted confidence against actual
+        outcomes.
+      </p>
+
+      {/* Metric cards */}
+      <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
+        <MetricCard label="Decisions" value={String(totalDecisions)} />
+        <MetricCard label="With Outcomes" value={String(totalWithOutcomes)} />
+        <MetricCard
+          label="Overall Accuracy"
+          value={totalWithOutcomes > 0 ? `${(overallAccuracy * 100).toFixed(1)}%` : '-'}
+        />
+        <MetricCard
+          label="ECE"
+          value={totalWithOutcomes > 0 ? ece.toFixed(4) : '-'}
+          sublabel={totalWithOutcomes > 0 ? rating.label : undefined}
+          sublabelColor={totalWithOutcomes > 0 ? rating.color : undefined}
+        />
+      </div>
+
+      {totalWithOutcomes === 0 && (
+        <div className="text-center py-12 text-[var(--color-text-dim)] text-sm">
+          No outcomes recorded yet. Use{' '}
+          <code className="bg-[var(--color-surface-hover)] px-1.5 py-0.5 rounded text-xs">
+            duh feedback
+          </code>{' '}
+          to record outcomes for your decisions.
+        </div>
+      )}
+
+      {totalWithOutcomes > 0 && (
+        <>
+          {/* Calibration chart */}
+          <div className="bg-[var(--color-surface)] border border-[var(--color-border)] rounded-[var(--radius-md)] p-5">
+            <h3 className="text-sm font-semibold text-[var(--color-text)] mb-4">
+              Calibration Chart
+            </h3>
+            <div className="space-y-2">
+              {buckets.map((b) => {
+                const lo = Math.round(b.range_lo * 100)
+                const hi = Math.round(b.range_hi * 100)
+                const label = `${lo}-${hi}%`
+                const accWidth =
+                  b.with_outcomes > 0 ? (b.accuracy * 100).toFixed(0) : '0'
+                const confWidth = (b.mean_confidence * 100).toFixed(0)
+                return (
+                  <div key={lo} className="flex items-center gap-3 text-xs">
+                    <span className="w-14 text-right text-[var(--color-text-dim)] font-mono">
+                      {label}
+                    </span>
+                    <div className="flex-1 relative h-5">
+                      {/* Accuracy bar */}
+                      {b.with_outcomes > 0 && (
+                        <div
+                          className="absolute inset-y-0 rounded-sm"
+                          style={{
+                            width: `${accWidth}%`,
+                            backgroundColor: 'var(--color-primary)',
+                            opacity: 0.7,
+                          }}
+                        />
+                      )}
+                      {/* Perfect calibration line */}
+                      <div
+                        className="absolute top-0 bottom-0 w-px"
+                        style={{
+                          left: `${confWidth}%`,
+                          backgroundColor: 'var(--color-text-dim)',
+                          opacity: 0.5,
+                        }}
+                      />
+                    </div>
+                    <span className="w-20 text-[var(--color-text-secondary)] font-mono">
+                      {b.with_outcomes > 0
+                        ? `${(b.accuracy * 100).toFixed(0)}% (n=${b.with_outcomes})`
+                        : '-'}
+                    </span>
+                  </div>
+                )
+              })}
+            </div>
+            <div className="flex items-center gap-4 mt-3 text-[10px] text-[var(--color-text-dim)]">
+              <span className="flex items-center gap-1">
+                <span
+                  className="inline-block w-3 h-2 rounded-sm"
+                  style={{ backgroundColor: 'var(--color-primary)', opacity: 0.7 }}
+                />
+                Actual accuracy
+              </span>
+              <span className="flex items-center gap-1">
+                <span
+                  className="inline-block w-px h-3"
+                  style={{ backgroundColor: 'var(--color-text-dim)' }}
+                />
+                Expected (mean confidence)
+              </span>
+            </div>
+          </div>
+
+          {/* Bucket table */}
+          <div className="bg-[var(--color-surface)] border border-[var(--color-border)] rounded-[var(--radius-md)] p-5 overflow-x-auto">
+            <h3 className="text-sm font-semibold text-[var(--color-text)] mb-3">
+              Bucket Details
+            </h3>
+            <table className="w-full text-xs font-mono">
+              <thead>
+                <tr className="text-[var(--color-text-dim)] border-b border-[var(--color-border)]">
+                  <th className="text-left py-2 pr-3">Range</th>
+                  <th className="text-right py-2 px-2">Count</th>
+                  <th className="text-right py-2 px-2">Outcomes</th>
+                  <th className="text-right py-2 px-2">Success</th>
+                  <th className="text-right py-2 px-2">Partial</th>
+                  <th className="text-right py-2 px-2">Failure</th>
+                  <th className="text-right py-2 px-2">Accuracy</th>
+                  <th className="text-right py-2 px-2">Confidence</th>
+                  <th className="text-right py-2 pl-2">Gap</th>
+                </tr>
+              </thead>
+              <tbody>
+                {buckets
+                  .filter((b) => b.count > 0)
+                  .map((b) => {
+                    const lo = Math.round(b.range_lo * 100)
+                    const hi = Math.round(b.range_hi * 100)
+                    const gap =
+                      b.with_outcomes > 0
+                        ? Math.abs(b.accuracy - b.mean_confidence)
+                        : null
+
+                    return (
+                      <tr
+                        key={lo}
+                        className="border-b border-[var(--color-border)] last:border-0 text-[var(--color-text-secondary)]"
+                      >
+                        <td className="py-1.5 pr-3">{lo}-{hi}%</td>
+                        <td className="text-right py-1.5 px-2">{b.count}</td>
+                        <td className="text-right py-1.5 px-2">{b.with_outcomes}</td>
+                        <td className="text-right py-1.5 px-2">{b.success}</td>
+                        <td className="text-right py-1.5 px-2">{b.partial}</td>
+                        <td className="text-right py-1.5 px-2">{b.failure}</td>
+                        <td className="text-right py-1.5 px-2">
+                          {b.with_outcomes > 0
+                            ? `${(b.accuracy * 100).toFixed(1)}%`
+                            : '-'}
+                        </td>
+                        <td className="text-right py-1.5 px-2">
+                          {(b.mean_confidence * 100).toFixed(1)}%
+                        </td>
+                        <td className="text-right py-1.5 pl-2">
+                          {gap != null ? `${(gap * 100).toFixed(1)}%` : '-'}
+                        </td>
+                      </tr>
+                    )
+                  })}
+              </tbody>
+            </table>
+          </div>
+        </>
+      )}
+    </div>
+  )
+}
+
+function MetricCard({
+  label,
+  value,
+  sublabel,
+  sublabelColor,
+}: {
+  label: string
+  value: string
+  sublabel?: string
+  sublabelColor?: string
+}) {
+  return (
+    <div className="bg-[var(--color-surface)] border border-[var(--color-border)] rounded-[var(--radius-md)] p-4">
+      <div className="text-[10px] text-[var(--color-text-dim)] uppercase tracking-wider mb-1">
+        {label}
+      </div>
+      <div className="text-xl font-bold text-[var(--color-text)] font-mono">
+        {value}
+      </div>
+      {sublabel && (
+        <div
+          className="text-xs font-semibold mt-0.5"
+          style={{ color: sublabelColor }}
+        >
+          {sublabel}
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/web/src/components/calibration/index.ts b/web/src/components/calibration/index.ts
new file mode 100644
index 0000000..2a81cc6
--- /dev/null
+++ b/web/src/components/calibration/index.ts
@@ -0,0 +1 @@
+export { CalibrationDashboard } from './CalibrationDashboard'
diff --git a/web/src/components/consensus/ConfidenceMeter.tsx b/web/src/components/consensus/ConfidenceMeter.tsx
index 865bdde..4971ca6 100644
--- a/web/src/components/consensus/ConfidenceMeter.tsx
+++ b/web/src/components/consensus/ConfidenceMeter.tsx
@@ -3,6 +3,7 @@ import { useEffect, useState } from 'react'
 interface ConfidenceMeterProps {
   value: number
   size?: number
+  label?: string
 }
 
 function getColor(value: number): string {
@@ -12,7 +13,7 @@ function getColor(value: number): string {
   return 'var(--color-green)'
 }
 
-export function ConfidenceMeter({ value, size = 64 }: ConfidenceMeterProps) {
+export function ConfidenceMeter({ value, size = 64, label }: ConfidenceMeterProps) {
   const radius = (size - 8) / 2
   const circumference = 2 * Math.PI * radius
   const targetOffset = circumference * (1 - value)
@@ -54,6 +55,11 @@ export function ConfidenceMeter({ value, size = 64 }: ConfidenceMeterProps) {
       <span className="font-mono text-xs" style={{ color }}>
         {(value * 100).toFixed(0)}%
       </span>
+      {label && (
+        <span className="font-mono text-[10px] text-[var(--color-text-dim)]">
+          {label}
+        </span>
+      )}
     </div>
   )
 }
diff --git a/web/src/components/consensus/ConsensusComplete.tsx b/web/src/components/consensus/ConsensusComplete.tsx
index b841c1c..88942b0 100644
--- a/web/src/components/consensus/ConsensusComplete.tsx
+++ b/web/src/components/consensus/ConsensusComplete.tsx
@@ -9,6 +9,7 @@ import type { RoundData } from '@/stores/consensus'
 interface ConsensusCompleteProps {
   decision: string
   confidence: number
+  rigor: number
   dissent: string | null
   cost: number | null
 }
@@ -17,6 +18,7 @@ export function generateExportMarkdown(
   question: string | null,
   decision: string,
   confidence: number,
+  rigor: number,
   dissent: string | null,
   cost: number | null,
   rounds: RoundData[],
@@ -29,7 +31,7 @@ export function generateExportMarkdown(
   lines.push('## Decision')
   lines.push(decision)
   lines.push('')
-  lines.push(`Confidence: ${Math.round(confidence * 100)}%`)
+  lines.push(`Confidence: ${Math.round(confidence * 100)}%  Rigor: ${Math.round(rigor * 100)}%`)
   lines.push('')
 
   if (includeDissent && dissent) {
@@ -86,7 +88,7 @@ function downloadFile(content: string | Blob, filename: string, mimeType: string
   URL.revokeObjectURL(url)
 }
 
-export function ConsensusComplete({ decision, confidence, dissent, cost }: ConsensusCompleteProps) {
+export function ConsensusComplete({ decision, confidence, rigor, dissent, cost }: ConsensusCompleteProps) {
   const [copied, setCopied] = useState(false)
   const [exportOpen, setExportOpen] = useState(false)
   const { question, rounds, threadId } = useConsensusStore()
@@ -98,7 +100,7 @@ export function ConsensusComplete({ decision, confidence, dissent, cost }: Conse
   }
 
   const handleExportMarkdown = (content: 'full' | 'decision') => {
-    const md = generateExportMarkdown(question, decision, confidence, dissent, cost, rounds, content, true)
+    const md = generateExportMarkdown(question, decision, confidence, rigor, dissent, cost, rounds, content, true)
     downloadFile(md, `consensus-${content}.md`, 'text/markdown')
     setExportOpen(false)
   }
@@ -121,7 +123,10 @@ export function ConsensusComplete({ decision, confidence, dissent, cost }: Conse
             <span className="font-mono text-xs text-[var(--color-green)] font-semibold">CONSENSUS REACHED</span>
             <CostTicker cost={cost} />
           </div>
-          <ConfidenceMeter value={confidence} />
+          <div className="flex items-center gap-3">
+            <ConfidenceMeter value={confidence} label="Confidence" />
+            <ConfidenceMeter value={rigor} size={48} label="Rigor" />
+          </div>
         </div>
 
         <Markdown className="text-sm">{decision}</Markdown>
diff --git a/web/src/components/consensus/ConsensusPanel.tsx b/web/src/components/consensus/ConsensusPanel.tsx
index 5fb4386..2cc3e0a 100644
--- a/web/src/components/consensus/ConsensusPanel.tsx
+++ b/web/src/components/consensus/ConsensusPanel.tsx
@@ -8,7 +8,7 @@ import { CostTicker } from './CostTicker'
 export function ConsensusPanel() {
   const {
     status, error, currentPhase, currentRound, rounds,
-    decision, confidence, dissent, cost,
+    decision, confidence, rigor, dissent, cost,
     startConsensus, reset,
   } = useConsensusStore()
 
@@ -73,6 +73,7 @@ export function ConsensusPanel() {
               {round.confidence !== null && (
                 <div className="flex items-center gap-3 text-xs font-mono text-[var(--color-text-dim)]">
                   <span>Confidence: {(round.confidence * 100).toFixed(0)}%</span>
+                  {round.rigor !== null && <span>Rigor: {(round.rigor * 100).toFixed(0)}%</span>}
                   {round.dissent && <span className="text-[var(--color-amber)]">Dissent noted</span>}
                 </div>
               )}
@@ -85,6 +86,7 @@ export function ConsensusPanel() {
         <ConsensusComplete
           decision={decision}
           confidence={confidence}
+          rigor={rigor ?? 0}
           dissent={dissent}
           cost={cost}
         />
diff --git a/web/src/components/decision-space/DecisionCloud.tsx b/web/src/components/decision-space/DecisionCloud.tsx
index 4208dde..7fdd137 100644
--- a/web/src/components/decision-space/DecisionCloud.tsx
+++ b/web/src/components/decision-space/DecisionCloud.tsx
@@ -158,6 +158,7 @@ export function DecisionCloud({ decisions, categories, genera, timelinePosition
             <p className="text-[11px] text-[#e4e4e7] line-clamp-2 mb-1">{hoveredDecision.question}</p>
             <div className="flex items-center gap-2 text-[10px] font-mono">
               <span className="text-[#00d4ff]">{(hoveredDecision.confidence * 100).toFixed(0)}%</span>
+              <span className="text-[#a1a1aa]">rigor {(hoveredDecision.rigor * 100).toFixed(0)}%</span>
               {hoveredDecision.category && (
                 <span className="text-[#a1a1aa]">{hoveredDecision.category}</span>
               )}
diff --git a/web/src/components/layout/Sidebar.tsx b/web/src/components/layout/Sidebar.tsx
index 45cfbba..143f751 100644
--- a/web/src/components/layout/Sidebar.tsx
+++ b/web/src/components/layout/Sidebar.tsx
@@ -4,6 +4,7 @@ const navItems = [
   { path: '/', label: 'Consensus', icon: '\u2B21' },
   { path: '/threads', label: 'Threads', icon: '\u2261' },
   { path: '/space', label: 'Decision Space', icon: '\u25CE' },
+  { path: '/calibration', label: 'Calibration', icon: '\u25C9' },
   { path: '/preferences', label: 'Preferences', icon: '\u2699' },
 ]
 
diff --git a/web/src/components/shared/ExportMenu.tsx b/web/src/components/shared/ExportMenu.tsx
index 4e900c2..b63469b 100644
--- a/web/src/components/shared/ExportMenu.tsx
+++ b/web/src/components/shared/ExportMenu.tsx
@@ -14,7 +14,7 @@ function generateMarkdown(thread: ThreadDetail, content: ContentMode, includeDis
   lines.push('')
 
   // Find last decision
-  let finalDecision: { content: string; confidence: number; dissent: string | null } | null = null
+  let finalDecision: { content: string; confidence: number; rigor: number; dissent: string | null } | null = null
   for (let i = thread.turns.length - 1; i >= 0; i--) {
     if (thread.turns[i]?.decision) {
       finalDecision = thread.turns[i]!.decision
@@ -32,7 +32,7 @@ function generateMarkdown(thread: ThreadDetail, content: ContentMode, includeDis
     lines.push('## Decision')
     lines.push(finalDecision.content)
     lines.push('')
-    lines.push(`Confidence: ${Math.round(finalDecision.confidence * 100)}%`)
+    lines.push(`Confidence: ${Math.round(finalDecision.confidence * 100)}%  Rigor: ${Math.round(finalDecision.rigor * 100)}%`)
     lines.push('')
 
     if (includeDissent && finalDecision.dissent) {
@@ -148,7 +148,7 @@ export function ExportMenu({ thread }: ExportMenuProps) {
         {downloading ? 'Exporting...' : 'Export'}
       </GlowButton>
       {open && (
-        <div className="absolute bottom-full left-0 mb-1 bg-[var(--color-bg-secondary)] border border-[var(--color-border)] rounded-lg shadow-lg py-1 min-w-[200px] z-10">
+        <div className="absolute top-full right-0 mt-1 bg-[var(--color-bg)] border border-[var(--color-border)] rounded-lg shadow-lg py-1 min-w-[200px] z-50">
           <button
             className="w-full text-left px-3 py-1.5 text-xs hover:bg-[var(--color-bg-tertiary)] text-[var(--color-text)]"
             onClick={() => handleMarkdown('decision')}
diff --git a/web/src/components/threads/ThreadDetail.tsx b/web/src/components/threads/ThreadDetail.tsx
index 0de8c75..c79ee41 100644
--- a/web/src/components/threads/ThreadDetail.tsx
+++ b/web/src/components/threads/ThreadDetail.tsx
@@ -56,7 +56,7 @@ export function ThreadDetail() {
 
   return (
     <div className="space-y-4">
-      <GlassPanel padding="md">
+      <GlassPanel padding="md" className="relative z-20">
         <div className="flex items-start justify-between gap-4">
           <div className="flex-1">
             <p className="text-[var(--color-text)] font-medium">{currentThread.question}</p>
@@ -65,9 +65,12 @@ export function ThreadDetail() {
               <span>{currentThread.thread_id.slice(0, 8)}</span>
             </div>
           </div>
-          <Badge variant={statusVariant[currentThread.status] ?? 'default'} size="md">
-            {currentThread.status}
-          </Badge>
+          <div className="flex flex-col items-end gap-2">
+            <Badge variant={statusVariant[currentThread.status] ?? 'default'} size="md">
+              {currentThread.status}
+            </Badge>
+            {currentThread.status === 'complete' && <ExportMenu thread={currentThread} />}
+          </div>
         </div>
       </GlassPanel>
 
@@ -96,11 +99,10 @@ export function ThreadDetail() {
         <p className="text-center text-xs font-mono text-[var(--color-green)]">Feedback recorded</p>
       )}
 
-      <div className="flex justify-center gap-2">
+      <div className="flex justify-center">
         <GlowButton variant="ghost" size="sm" onClick={() => navigate('/threads')}>
           Back to threads
         </GlowButton>
-        <ExportMenu thread={currentThread} />
       </div>
     </div>
   )
diff --git a/web/src/components/threads/TurnCard.tsx b/web/src/components/threads/TurnCard.tsx
index e8e9ebe..c098090 100644
--- a/web/src/components/threads/TurnCard.tsx
+++ b/web/src/components/threads/TurnCard.tsx
@@ -46,7 +46,10 @@ export function TurnCard({ turn }: { turn: Turn }) {
                 </div>
               )}
             </div>
-            <ConfidenceMeter value={turn.decision.confidence} size={48} />
+            <div className="flex items-center gap-2">
+              <ConfidenceMeter value={turn.decision.confidence} size={48} label="Confidence" />
+              <ConfidenceMeter value={turn.decision.rigor} size={36} label="Rigor" />
+            </div>
           </div>
         </div>
       )}
diff --git a/web/src/pages/CalibrationPage.tsx b/web/src/pages/CalibrationPage.tsx
new file mode 100644
index 0000000..a169c05
--- /dev/null
+++ b/web/src/pages/CalibrationPage.tsx
@@ -0,0 +1,12 @@
+import { CalibrationDashboard } from '@/components/calibration'
+import { PageTransition } from '@/components/shared'
+
+export function CalibrationPage() {
+  return (
+    <PageTransition>
+      <div className="p-6">
+        <CalibrationDashboard />
+      </div>
+    </PageTransition>
+  )
+}
diff --git a/web/src/pages/index.ts b/web/src/pages/index.ts
index 353f229..9683931 100644
--- a/web/src/pages/index.ts
+++ b/web/src/pages/index.ts
@@ -4,3 +4,4 @@ export { ThreadDetailPage } from './ThreadDetailPage'
 export { DecisionSpacePage } from './DecisionSpacePage'
 export { PreferencesPage } from './PreferencesPage'
 export { SharePage } from './SharePage'
+export { CalibrationPage } from './CalibrationPage'
diff --git a/web/src/stores/calibration.ts b/web/src/stores/calibration.ts
new file mode 100644
index 0000000..efce8f1
--- /dev/null
+++ b/web/src/stores/calibration.ts
@@ -0,0 +1,57 @@
+import { create } from 'zustand'
+import { api } from '@/api/client'
+import type { CalibrationBucket } from '@/api/types'
+
+interface CalibrationState {
+  buckets: CalibrationBucket[]
+  totalDecisions: number
+  totalWithOutcomes: number
+  overallAccuracy: number
+  ece: number
+  loading: boolean
+  error: string | null
+
+  // Filters
+  category: string | null
+
+  // Actions
+  fetchCalibration: () => Promise<void>
+  setCategory: (category: string | null) => void
+}
+
+export const useCalibrationStore = create<CalibrationState>((set, get) => ({
+  buckets: [],
+  totalDecisions: 0,
+  totalWithOutcomes: 0,
+  overallAccuracy: 0,
+  ece: 0,
+  loading: false,
+  error: null,
+
+  category: null,
+
+  fetchCalibration: async () => {
+    set({ loading: true, error: null })
+    try {
+      const { category } = get()
+      const params: { category?: string } = {}
+      if (category) params.category = category
+
+      const data = await api.calibration(params)
+      set({
+        buckets: data.buckets,
+        totalDecisions: data.total_decisions,
+        totalWithOutcomes: data.total_with_outcomes,
+        overallAccuracy: data.overall_accuracy,
+        ece: data.ece,
+        loading: false,
+      })
+    } catch (e) {
+      set({ error: (e as Error).message, loading: false })
+    }
+  },
+
+  setCategory: (category) => {
+    set({ category })
+  },
+}))
diff --git a/web/src/stores/consensus.ts b/web/src/stores/consensus.ts
index cfc7b0e..3c2607a 100644
--- a/web/src/stores/consensus.ts
+++ b/web/src/stores/consensus.ts
@@ -23,6 +23,7 @@ export interface RoundData {
   reviser: string | null
   revision: string | null
   confidence: number | null
+  rigor: number | null
   dissent: string | null
 }
 
@@ -42,6 +43,7 @@ interface ConsensusState {
   question: string | null
   decision: string | null
   confidence: number | null
+  rigor: number | null
   dissent: string | null
   cost: number | null
   threadId: string | null
@@ -64,6 +66,7 @@ function createEmptyRound(round: number): RoundData {
     reviser: null,
     revision: null,
     confidence: null,
+    rigor: null,
     dissent: null,
   }
 }
@@ -77,6 +80,7 @@ export const useConsensusStore = create<ConsensusState>((set, get) => ({
   question: null,
   decision: null,
   confidence: null,
+  rigor: null,
   dissent: null,
   cost: null,
   threadId: null,
@@ -91,6 +95,7 @@ export const useConsensusStore = create<ConsensusState>((set, get) => ({
       question,
       decision: null,
       confidence: null,
+      rigor: null,
       dissent: null,
       cost: null,
       threadId: null,
@@ -129,6 +134,7 @@ export const useConsensusStore = create<ConsensusState>((set, get) => ({
       question: null,
       decision: null,
       confidence: null,
+      rigor: null,
       dissent: null,
       cost: null,
       threadId: null,
@@ -216,6 +222,7 @@ function handleEvent(
         currentPhase: 'COMMIT' as ConsensusPhase,
         rounds: updateRound(state.rounds, idx, {
           confidence: event.confidence,
+          rigor: event.rigor,
           dissent: event.dissent,
         }),
       })
@@ -227,6 +234,7 @@ function handleEvent(
         status: 'complete',
         decision: event.decision,
         confidence: event.confidence,
+        rigor: event.rigor,
         dissent: event.dissent,
         cost: event.cost,
         threadId: event.thread_id ?? null,
diff --git a/web/src/stores/index.ts b/web/src/stores/index.ts
index cae5eed..f5babdb 100644
--- a/web/src/stores/index.ts
+++ b/web/src/stores/index.ts
@@ -4,3 +4,4 @@ export { useThreadsStore } from './threads'
 export { useDecisionSpaceStore } from './decision-space'
 export type { SpaceFilters } from './decision-space'
 export { usePreferencesStore } from './preferences'
+export { useCalibrationStore } from './calibration'
diff --git a/web/tsconfig.tsbuildinfo b/web/tsconfig.tsbuildinfo
index cf66e67..4db173d 100644
--- a/web/tsconfig.tsbuildinfo
+++ b/web/tsconfig.tsbuildinfo
@@ -1 +1 @@
-{"root":["./src/app.tsx","./src/main.tsx","./src/test-setup.ts","./src/three-types.d.ts","./src/api/client.ts","./src/api/index.ts","./src/api/types.ts","./src/api/websocket.ts","./src/components/consensus/confidencemeter.tsx","./src/components/consensus/consensuscomplete.tsx","./src/components/consensus/consensuspanel.tsx","./src/components/consensus/costticker.tsx","./src/components/consensus/dissentbanner.tsx","./src/components/consensus/modelbadge.tsx","./src/components/consensus/phasecard.tsx","./src/components/consensus/questioninput.tsx","./src/components/consensus/streamingtext.tsx","./src/components/consensus/index.ts","./src/components/decision-space/decisioncloud.tsx","./src/components/decision-space/decisionspace.tsx","./src/components/decision-space/filterpanel.tsx","./src/components/decision-space/gridfloor.tsx","./src/components/decision-space/scatterfallback.tsx","./src/components/decision-space/scene3d.tsx","./src/components/decision-space/timelineslider.tsx","./src/components/decision-space/index.ts","./src/components/layout/shell.tsx","./src/components/layout/sidebar.tsx","./src/components/layout/topbar.tsx","./src/components/layout/index.ts","./src/components/preferences/preferencespanel.tsx","./src/components/preferences/index.ts","./src/components/shared/badge.tsx","./src/components/shared/errorboundary.tsx","./src/components/shared/exportmenu.tsx","./src/components/shared/glasspanel.tsx","./src/components/shared/glowbutton.tsx","./src/components/shared/gridoverlay.tsx","./src/components/shared/markdown.tsx","./src/components/shared/pagetransition.tsx","./src/components/shared/particlefield.tsx","./src/components/shared/skeleton.tsx","./src/components/shared/index.ts","./src/components/threads/threadbrowser.tsx","./src/components/threads/threadcard.tsx","./src/components/threads/threaddetail.tsx","./src/components/threads/threadfilters.tsx","./src/components/threads/threadsearch.tsx","./src/components/threads/turncard.tsx","./src/components/threads/index.ts","./src/hooks/index.ts","./src/hooks/usemediaquery.ts","./src/pages/consensuspage.tsx","./src/pages/decisionspacepage.tsx","./src/pages/preferencespage.tsx","./src/pages/sharepage.tsx","./src/pages/threaddetailpage.tsx","./src/pages/threadspage.tsx","./src/pages/index.ts","./src/stores/consensus.ts","./src/stores/decision-space.ts","./src/stores/index.ts","./src/stores/preferences.ts","./src/stores/threads.ts","./src/utils/colors.ts","./src/utils/index.ts"],"version":"5.9.3"}
\ No newline at end of file
+{"root":["./src/app.tsx","./src/main.tsx","./src/test-setup.ts","./src/three-types.d.ts","./src/api/client.ts","./src/api/index.ts","./src/api/types.ts","./src/api/websocket.ts","./src/components/calibration/calibrationdashboard.tsx","./src/components/calibration/index.ts","./src/components/consensus/confidencemeter.tsx","./src/components/consensus/consensuscomplete.tsx","./src/components/consensus/consensuspanel.tsx","./src/components/consensus/costticker.tsx","./src/components/consensus/dissentbanner.tsx","./src/components/consensus/modelbadge.tsx","./src/components/consensus/phasecard.tsx","./src/components/consensus/questioninput.tsx","./src/components/consensus/streamingtext.tsx","./src/components/consensus/index.ts","./src/components/decision-space/decisioncloud.tsx","./src/components/decision-space/decisionspace.tsx","./src/components/decision-space/filterpanel.tsx","./src/components/decision-space/gridfloor.tsx","./src/components/decision-space/scatterfallback.tsx","./src/components/decision-space/scene3d.tsx","./src/components/decision-space/timelineslider.tsx","./src/components/decision-space/index.ts","./src/components/layout/shell.tsx","./src/components/layout/sidebar.tsx","./src/components/layout/topbar.tsx","./src/components/layout/index.ts","./src/components/preferences/preferencespanel.tsx","./src/components/preferences/index.ts","./src/components/shared/badge.tsx","./src/components/shared/errorboundary.tsx","./src/components/shared/exportmenu.tsx","./src/components/shared/glasspanel.tsx","./src/components/shared/glowbutton.tsx","./src/components/shared/gridoverlay.tsx","./src/components/shared/markdown.tsx","./src/components/shared/pagetransition.tsx","./src/components/shared/particlefield.tsx","./src/components/shared/skeleton.tsx","./src/components/shared/index.ts","./src/components/threads/threadbrowser.tsx","./src/components/threads/threadcard.tsx","./src/components/threads/threaddetail.tsx","./src/components/threads/threadfilters.tsx","./src/components/threads/threadsearch.tsx","./src/components/threads/turncard.tsx","./src/components/threads/index.ts","./src/hooks/index.ts","./src/hooks/usemediaquery.ts","./src/pages/calibrationpage.tsx","./src/pages/consensuspage.tsx","./src/pages/decisionspacepage.tsx","./src/pages/preferencespage.tsx","./src/pages/sharepage.tsx","./src/pages/threaddetailpage.tsx","./src/pages/threadspage.tsx","./src/pages/index.ts","./src/stores/calibration.ts","./src/stores/consensus.ts","./src/stores/decision-space.ts","./src/stores/index.ts","./src/stores/preferences.ts","./src/stores/threads.ts","./src/utils/colors.ts","./src/utils/index.ts"],"version":"5.9.3"}
\ No newline at end of file