From 69d28ea977f43c917ae94a6b8f9c2bb7fbb72644 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 21 Jun 2026 16:08:30 +0000
Subject: [PATCH 1/2] Document the 2026-06 platform layer in the docs site
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The website auto-imports docs/design/ and shows the 2026-06 design
docs under Internals → Design notes — but the user-facing guides
sidebar didn't mention any of the new platform features. Fixing
that: 8 new hand-authored Starlight guide pages + sidebar group +
landing-page section + intro doc cross-link.

docs/cli.md — top-level shape now lists every platform subcommand;
seven new sections describe vx mcp, vx coordinator, vx run --worker,
vx insights serve, vx serve (with /version /events /stream), vx dev.

apps/docs/src/content/docs/guides/ (NEW pages, hand-authored
Starlight content):

- mcp.md — Model Context Protocol server for AI agents (Claude
  Code, Cursor, Continue.dev). Tool catalog, config snippets, what
  the agent can actually do today.
- distributed-ci.md — vx coordinator + vx run --worker e2e. Two-
  terminal quick start, GHA matrix pattern, dispatch diagram,
  honest list of Phase A-B limits.
- plugins.md — Plugin contract + 3 worked examples (Sentry, Slack
  summary, timeseries metrics). Crash-isolation rules. What plugins
  can NOT do today.
- predictive-scheduling.md — opt-in via predictive: true. How the
  topo-DP works, two example graphs showing where it differs from
  the default, observability via vx info --history and vx mcp
  getRunHistory.
- insights.md — vx insights serve. SPA + static cache.db server
  diagram, DuckDB-WASM sqlite_scanner explanation, env vars,
  known limits.
- vx-cloud.md — Cloudflare deploy walk-through. Bindings table,
  HMAC signing setup, event ingest pipeline, what's deferred,
  cost notes (free tier sufficient for small teams).
- otel-bridge.md — OpenTelemetry CI/CD spans. Backend config
  snippets for Grafana / Honeycomb / Datadog / Jaeger. Sample
  event shape with cicd.pipeline.* attributes.
- wire-protocol.md — JSON-RPC 2.0 envelope. SSE curl + WS TS
  client examples in both legacy and envelope forms. Channels
  table, error codes, sample subscribers.

apps/docs/astro.config.mjs — new 'Platform & extensions' sidebar
group listing all 8 guides between 'Build your monorepo' and
'Migrate to vx'.

apps/docs/src/pages/index.astro — new 'The open platform layer'
section after 'What sets vx apart.' Six cards link to the guides:
MCP, Distributed CI, Plugin API, Wire protocol, vx Cloud,
Predictive scheduling. Card icons added to featIcons; CSS for
.platform-card hover state in landing.css.

apps/docs/src/content/docs/introduction.md — new 'The open
platform layer' section after the differentiators. One-line each
for the eight surfaces with cross-links to the guides.

Verification: bun src/bin.ts run @vzn/vx-docs#build produces 111
HTML pages including /guides/mcp/, /guides/distributed-ci/,
/guides/plugins/, /guides/predictive-scheduling/, /guides/insights/,
/guides/vx-cloud/, /guides/otel-bridge/, /guides/wire-protocol/.
Full CI gate: 3 success / 3 success / 3 success.
---
 apps/docs/astro.config.mjs                    |  13 ++
 .../src/content/docs/guides/distributed-ci.md | 147 ++++++++++++
 apps/docs/src/content/docs/guides/insights.md | 119 ++++++++++
 apps/docs/src/content/docs/guides/mcp.md      |  93 ++++++++
 .../src/content/docs/guides/otel-bridge.md    | 144 ++++++++++++
 apps/docs/src/content/docs/guides/plugins.md  | 198 ++++++++++++++++
 .../docs/guides/predictive-scheduling.md      | 145 ++++++++++++
 apps/docs/src/content/docs/guides/vx-cloud.md | 177 +++++++++++++++
 .../src/content/docs/guides/wire-protocol.md  | 211 ++++++++++++++++++
 apps/docs/src/content/docs/introduction.md    |  30 +++
 apps/docs/src/pages/index.astro               | 101 +++++++++
 apps/docs/src/styles/landing.css              |  22 ++
 docs/cli.md                                   | 201 +++++++++++++++++
 13 files changed, 1601 insertions(+)
 create mode 100644 apps/docs/src/content/docs/guides/distributed-ci.md
 create mode 100644 apps/docs/src/content/docs/guides/insights.md
 create mode 100644 apps/docs/src/content/docs/guides/mcp.md
 create mode 100644 apps/docs/src/content/docs/guides/otel-bridge.md
 create mode 100644 apps/docs/src/content/docs/guides/plugins.md
 create mode 100644 apps/docs/src/content/docs/guides/predictive-scheduling.md
 create mode 100644 apps/docs/src/content/docs/guides/vx-cloud.md
 create mode 100644 apps/docs/src/content/docs/guides/wire-protocol.md

diff --git a/apps/docs/astro.config.mjs b/apps/docs/astro.config.mjs
index 541a0b2..fa7245a 100644
--- a/apps/docs/astro.config.mjs
+++ b/apps/docs/astro.config.mjs
@@ -63,6 +63,19 @@ export default defineConfig({
             { label: 'Workspace configuration', link: '/guides/workspace-config/' },
           ],
         },
+        {
+          label: 'Platform & extensions',
+          items: [
+            { label: 'vx mcp — AI agents', link: '/guides/mcp/' },
+            { label: 'Distributed CI execution', link: '/guides/distributed-ci/' },
+            { label: 'Writing a vx plugin', link: '/guides/plugins/' },
+            { label: 'Predictive scheduling', link: '/guides/predictive-scheduling/' },
+            { label: 'vx insights — local dashboard', link: '/guides/insights/' },
+            { label: 'vx Cloud (Cloudflare deploy)', link: '/guides/vx-cloud/' },
+            { label: 'OpenTelemetry CI/CD spans', link: '/guides/otel-bridge/' },
+            { label: 'vx serve wire protocol', link: '/guides/wire-protocol/' },
+          ],
+        },
         {
           label: 'Migrate to vx',
           items: [
diff --git a/apps/docs/src/content/docs/guides/distributed-ci.md b/apps/docs/src/content/docs/guides/distributed-ci.md
new file mode 100644
index 0000000..7c9b51e
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/distributed-ci.md
@@ -0,0 +1,147 @@
+---
+title: Distributed CI execution
+description: Run your task graph across multiple machines. vx coordinator dispatches assignments to vx workers over WebSocket. Content-addressed; workers are fungible. OSS, self-hostable, no daemon.
+---
+
+vx ships an OSS distributed task execution layer: one coordinator
+holds the graph, many workers pull and execute. Tasks are
+content-addressed by their v22 cache hash, so any worker producing
+artifact `<hash>` satisfies every consumer of `<hash>` — workers
+are fungible.
+
+This is the Nx-Cloud-DTE equivalent, OSS and self-hostable.
+Phase A-B today; capability labels, cache-affinity, and a hosted
+deployment are deferred — see
+`docs/design/distributed-ci-2026-06.md` for the full roadmap.
+
+## The two roles
+
+- **Coordinator** (`vx coordinator <tasks…>`) — one per CI build.
+  Holds the global ready queue, dispatches to workers, exits when
+  every task ends.
+- **Worker** (`vx run --worker <coord-url>`) — N per build.
+  Stateless and fungible. Pulls assignments, executes via
+  `runCommand`, reports outcomes, repeats.
+
+## Quick start (two terminals)
+
+```sh
+# Terminal 1: start the coordinator
+vx coordinator lint test build --port 5180 --workers 2
+
+# Terminal 2: attach a worker
+vx run --worker ws://127.0.0.1:5180 --capacity 4
+```
+
+The coordinator dispatches every ready task to the worker; the
+worker executes them in parallel up to `--capacity`. When every
+task terminates, the coordinator sends `coord:drain`, the worker
+exits, and the coordinator exits with 0 (or 1 if any task failed).
+
+## GitHub Actions
+
+The canonical pattern: one matrix index hosts the coordinator,
+the rest attach as workers.
+
+```yaml
+jobs:
+  build:
+    strategy:
+      matrix:
+        worker: [0, 1, 2, 3]   # 4-way parallelism
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - run: curl -fsSL https://raw.githubusercontent.com/vznjs/vx/main/install.sh | sh
+
+      # Worker 0 hosts the coordinator; others wait for it and attach.
+      - if: matrix.worker == 0
+        run: vx coordinator lint test build --port 5180 --workers 4
+
+      - if: matrix.worker != 0
+        run: |
+          until nc -z runner-0 5180; do sleep 1; done
+          vx run --worker ws://runner-0:5180 --capacity 2
+```
+
+The cross-runner networking (`runner-0` resolves to the matrix
+index 0 runner) needs either a tunnel (Tailscale free tier
+works), self-hosted runners on the same LAN, or one of the
+GHA-specific runner-link patterns. A composite action
+(`vx/distributed-action@v1`) packaging this is on the roadmap.
+
+## How dispatch works
+
+```
+        ┌────────────────────────────────────────┐
+        │ vx coordinator                          │
+        │  • prepareRun → workspace + task graph  │
+        │  • per-node v22 hash (assignment key)   │
+        │  • ready queue, in-flight per worker    │
+        │  • WS server                            │
+        └─────┬──────────────────────────┬───────┘
+              │                          │
+              ▼ task:assign              ▼ task:assign
+        ┌────────────┐             ┌────────────┐
+        │  worker N  │             │  worker M  │
+        │  pulls     │             │  pulls     │
+        │  spawns    │             │  spawns    │
+        │  reports   │             │  reports   │
+        └────────────┘             └────────────┘
+```
+
+1. Coordinator runs the same `prepareRun` pipeline `vx run` uses
+   locally — it builds the same graph, with the same v22 cache
+   hashes per node.
+2. Workers register via `worker:hello { workerId, capacity, labels }`.
+3. Coordinator dispatches via `task:assign { hash, node }` up to
+   each worker's capacity.
+4. Workers spawn `runCommand`, stream stdout/stderr back over
+   `worker:stdout` / `worker:stderr` messages.
+5. Workers report `worker:done { taskHash, outcome }` on completion.
+6. Downstream tasks become ready as their upstream finishes.
+
+The wire format is JSON-RPC 2.0 — same envelope `vx serve` speaks.
+Full spec: `docs/design/wire-protocol-2026-06.md`.
+
+## Disconnect recovery
+
+If a worker disconnects mid-task, the coordinator detects the WS
+close, pulls every in-flight assignment off that worker, and
+puts the hashes back on the ready queue. The next attached worker
+picks them up.
+
+## Performance characteristics
+
+| Workload | Local single-host | Distributed (4 workers) | Notes |
+| --- | --- | --- | --- |
+| Cold run, deep graph | 1× | 0.25–0.4× wall time | Bounded by graph's critical path |
+| Warm full-cache | ≤ 200 ms | similar | Cache-hit shortcircuits — coordinator dispatch overhead dominates |
+| Mixed cache state | 1× | 0.4–0.7× | Worker mix-and-match wins on long tasks |
+
+These numbers will improve once workers probe the remote cache
+before executing (next iteration).
+
+## Known limits today (Phase A-B)
+
+- **Workers don't probe the remote cache yet.** Every assigned task
+  spawns fresh. Set up `VX_REMOTE_CACHE_URL` for fastest results
+  via the local prefetch path.
+- **No capability labels filter.** Workers report labels; the
+  coordinator doesn't filter `task:assign` by them.
+- **No critical-path priority on the coordinator.** The ready
+  queue is FIFO. The local scheduler has predictive priorities
+  (`predictive: true`); the coordinator doesn't read them.
+- **Submitter logs aren't aggregated.** Worker stdout reaches the
+  coordinator but no submitter-side `vx run --coordinator` is wired
+  yet to fan it back to a user.
+- **No TLS.** Hardcoded `ws://`. For cross-host, terminate TLS at a
+  reverse proxy or use Tailscale/cloudflared.
+
+The protocol extension (`worker:*` + `task:assign` + `coord:drain`)
+lives in `src/orchestrator/protocol.ts`; the JSON-RPC 2.0 envelope
+adapters live in `src/orchestrator/wire.ts`. Both are stable; the
+gaps above are wiring follow-ups.
+
+See also: `docs/design/distributed-ci-2026-06.md` (full design),
+`docs/progress/implementation-log-2026-06.md` (Step 4 narrative).
diff --git a/apps/docs/src/content/docs/guides/insights.md b/apps/docs/src/content/docs/guides/insights.md
new file mode 100644
index 0000000..27724a2
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/insights.md
@@ -0,0 +1,119 @@
+---
+title: vx insights — local run dashboard
+description: Boot a Solid + DuckDB-WASM SPA against your workspace's cache.db. Historical run flamegraphs, per-task trends, no backend, no daemon.
+---
+
+`vx insights serve` opens a localhost dashboard backed by your
+workspace's `cache.db`. Pure read-only analytics — no backend, no
+upload, no daemon. The page reads SQLite in the browser via DuckDB-WASM.
+
+## Quick start
+
+```sh
+cd your-workspace
+vx insights serve
+```
+
+That prints two URLs:
+
+- The SPA on `http://127.0.0.1:5290` (Vite dev server).
+- A tiny static HTTP server (kernel-assigned port) exposing
+  `cache.db` at `/cache.db` with the SQLite MIME so the SPA can
+  fetch it.
+
+`Ctrl-C` stops both.
+
+Override the SPA port with `--port`:
+
+```sh
+vx insights serve --port 7000
+```
+
+## What you see
+
+Two pages:
+
+- **Overview** — recent runs list, sorted by start time descending.
+  Each row shows project, task name, status, duration, cache
+  source. Click a row → run detail.
+- **Run detail** — per-task timeline (flamegraph), one lane per
+  project, bars colored by status / cache source. The same data
+  drives both — replayable in the browser.
+
+## How it works
+
+```
+   Browser
+   ┌─────────────────────────────┐
+   │ apps/insights SPA (Solid)   │
+   │  • UnoCSS dark theme        │
+   │  • Solid Router (hash)      │
+   │  • DuckDB-WASM (~30 MB lazy)│
+   └────────┬────────────────────┘
+            │ fetch /cache.db
+            ▼
+   ┌─────────────────────────────┐
+   │ Tiny static server (Bun)    │
+   │  • cache.db @ vnd.sqlite3   │
+   │  • CORS *                   │
+   │  • /health                  │
+   └────────┬────────────────────┘
+            │ reads
+            ▼
+   ┌─────────────────────────────┐
+   │ <workspaceRoot>/.vx/cache/  │
+   │  cache.db                   │
+   └─────────────────────────────┘
+```
+
+DuckDB-WASM reads SQLite files directly via the `sqlite_scanner`
+extension — no ETL, no conversion. The SPA `ATTACH`-es the fetched
+bytes as a database, runs aggregations client-side, renders Solid
+components. Queries stay in the browser.
+
+## What's needed on disk
+
+- `<workspaceRoot>/.vx/cache/cache.db` — at least one `vx run`
+  in the workspace.
+- `<vx checkout>/apps/insights/` — the SPA source. Set
+  `VX_INSIGHTS_DIR` if the installed binary can't find a checkout
+  alongside its `import.meta.dir`.
+
+If `cache.db` is missing, `vx insights` prints a clean hint and
+exits 1. If the SPA scaffold is missing, the binary points you at
+`VX_INSIGHTS_DIR`.
+
+## Why client-side analytics
+
+- **Zero backend.** Nothing to provision, nothing to operate. The
+  static server just serves bytes.
+- **Privacy by default.** Data never leaves your laptop.
+- **Read-only by construction.** The SPA fetches `cache.db` once
+  per page load. Mutating queries can't touch your real cache.
+- **Open at the data layer.** Anyone can write a DuckDB query
+  against `cache.db` directly — the SPA is just a UI on top.
+
+For team-wide analytics, see the
+[vx Cloud guide](/vx/guides/vx-cloud/).
+
+## Known limits
+
+- **DuckDB-WASM is heavy** (~30 MB). First query is slow because
+  the WASM bundle and SQLite extension download. Subsequent queries
+  are fast.
+- **No real-time.** The page snapshots `cache.db` on load. Reload
+  to see new runs.
+- **Charts are minimal today.** The Overview and Run detail pages
+  ship; cache hit-rate trends, per-author breakdowns, and the
+  "Bottleneck atlas" from the cloud spec are scaffold-pending.
+
+## What's coming
+
+- More pages (per-task trends, cache cliff detection, regression
+  surfacing).
+- Auto-refresh when `cache.db` mtime changes.
+- An option to embed the SPA inside `vx serve` for an in-browser
+  live view of running runs.
+
+See also: `docs/design/vx-cloud-2026-06.md` §2.1 (local face),
+`apps/insights/README.md`.
diff --git a/apps/docs/src/content/docs/guides/mcp.md b/apps/docs/src/content/docs/guides/mcp.md
new file mode 100644
index 0000000..b5db50b
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/mcp.md
@@ -0,0 +1,93 @@
+---
+title: vx mcp — Model Context Protocol server
+description: Expose vx as a typed tool surface to AI coding agents (Claude Code, Cursor, Continue.dev, GitHub Copilot). Read cache stats, run history, and explain rebuild causes through the standard MCP protocol.
+---
+
+`vx mcp` boots a Model Context Protocol (MCP) server over stdio so
+AI coding agents can query your repo's build state through the
+standard agent-tool protocol. No HTTP, no auth — stdio is process-
+private.
+
+## What is MCP
+
+The Model Context Protocol is the de-facto standard for AI agents to
+discover and call typed tools. Claude Code, Cursor, Continue.dev,
+VS Code GitHub Copilot, and many others all speak it. By shipping an
+MCP server, vx gives any of them a typed surface for your build
+state.
+
+Spec: <https://modelcontextprotocol.io/>
+
+## Quick start
+
+```sh
+vx mcp                           # stdio transport (default)
+```
+
+Add to your agent's MCP config (Claude Code example):
+
+```jsonc
+// ~/.claude/mcp.json
+{
+  "mcpServers": {
+    "vx": { "command": "vx", "args": ["mcp"] }
+  }
+}
+```
+
+Cursor reads `.cursorrules`-adjacent config; Continue.dev reads
+`~/.continue/config.json`. The shape is identical: `command + args`.
+
+## Tools exposed
+
+| Tool | What it answers |
+| --- | --- |
+| `getCacheStats` | "What's the state of my cache right now?" — entries, total size, runs/hits last 24h, hit rate |
+| `getRunHistory` | "Which tasks have I been running, and how fast?" — distinct (project, task) pairs with p50/p99/successRate/hitRate aggregates |
+| `explainCacheKey` | "What's the cache identity for `pkg#build`?" — latest entries-row (hash, command, exit code, duration, size, created_at) |
+| `whyDidThisRerun` | "Why did this task re-execute instead of using the cache?" — compares the run's cache hash against the previous run for the same task |
+
+All four read your workspace's local `cache.db` on demand. Open
+your agent and ask things like:
+
+- "What's my cache hit rate this week?"
+- "Why did `pkg-a#test` re-run in the last build?"
+- "Which tasks miss the cache most often?"
+- "What was the slowest task in the last 50 runs?"
+
+## How it works
+
+The server is the same `@modelcontextprotocol/sdk` package every MCP
+implementation uses. `vx mcp` opens cache.db, exposes the four tools
+via `setRequestHandler(ListToolsRequestSchema, …)` /
+`setRequestHandler(CallToolRequestSchema, …)`, and pipes JSON-RPC
+2.0 over stdin/stdout. The agent reads tool results as text content
+(stringified JSON).
+
+vx's MCP tools share dispatch with the inspector RPC channel
+(`vx:rpc` from `docs/design/wire-protocol-2026-06.md`). When the
+WebSocket-side inspector ships, every MCP tool will work over WS
+too — one handler, two transports.
+
+## What's coming
+
+- `runTasks` — agents trigger a `vx run` directly (driver surface).
+- `getRunState` — live state of an in-flight run (works with
+  `vx serve --ui`).
+- MCP resources for `vx://runs/{runId}` and `vx://history` (browseable).
+- HTTP transport (Streamable) so a single `vx mcp` instance can
+  serve multiple agent clients.
+
+## Troubleshooting
+
+- **Agent says "no MCP tools" after adding the config.** Restart
+  the agent. Most MCP clients only re-read config on launch.
+- **`vx mcp: requires @modelcontextprotocol/sdk`** — the binary
+  was built without the SDK. Rebuild with `bun install &&
+bun src/bin.ts run build`.
+- **Empty results from `getCacheStats`** — you haven't run any
+  `vx run` yet, or you're pointing at the wrong workspace. The
+  server discovers the workspace via `findWorkspaceRoot(cwd)`; run
+  the agent from the workspace root.
+
+See also: `docs/design/extension-protocol-2026-06.md`.
diff --git a/apps/docs/src/content/docs/guides/otel-bridge.md b/apps/docs/src/content/docs/guides/otel-bridge.md
new file mode 100644
index 0000000..c09c158
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/otel-bridge.md
@@ -0,0 +1,144 @@
+---
+title: OpenTelemetry CI/CD spans
+description: Pipe every vx run's events into any OTLP-compatible backend (Grafana / Tempo / Honeycomb / Datadog / Jaeger). Single env var, single npm install, zero config in code.
+---
+
+vx ships an opt-in OpenTelemetry exporter at
+`@vzn/vx-otel-bridge`. Set one env var, install one package, and
+every `vx run` emits OTel CI/CD-conventions log records to your
+existing observability stack.
+
+## Why OTel
+
+The OpenTelemetry CI/CD semantic conventions
+(<https://opentelemetry.io/docs/specs/semconv/cicd/cicd-spans/>)
+define canonical attribute names for every CI concept:
+`cicd.pipeline.run.id`, `cicd.pipeline.task.name`,
+`cicd.pipeline.task.run.result`, `cicd.worker.id`. By emitting in
+this shape, vx events arrive at Grafana / Tempo / Honeycomb /
+Datadog / Jaeger / your-self-hosted-collector without any
+integration code — they already understand the spec.
+
+## Quick start
+
+```sh
+# 1. Set the OTLP endpoint (single env var)
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+export OTEL_SERVICE_NAME=vx
+
+# 2. Install the bridge in your workspace
+bun add @vzn/vx-otel-bridge
+
+# 3. Run anything
+vx run lint
+```
+
+vx detects the env var, dynamically imports the bridge, and
+attaches it as an additional event-bus subscriber. Every task's
+lifecycle becomes an OTel log record.
+
+If the env var is unset, or the bridge package isn't installed,
+core gains nothing — the runtime stays at 19 packages.
+
+## What lands in your backend
+
+For each task, vx emits a `task:complete` record shaped like:
+
+```jsonc
+{
+  "timeUnixNano": "1719009123000000000",
+  "severityNumber": 9,                              // INFO; 17 for failed
+  "severityText": "info",
+  "body": "pkg-a#build → success (123ms)",
+  "traceId": "01931d80-2c0c-7000-8000-000000000000", // vx run id
+  "spanId": "pkg-a#build",                          // task id
+  "attributes": {
+    "vx.kind": "task:complete",
+    "cicd.pipeline.run.id": "01931d80-2c0c-7000-8000-000000000000",
+    "cicd.pipeline.task.name": "pkg-a#build",
+    "cicd.pipeline.task.run.result": "success",
+    "vx.outcome": {
+      "status": "success",
+      "exitCode": 0,
+      "durationMs": 123,
+      "cacheHit": false
+    }
+  }
+}
+```
+
+Plus `run:start`, `task:start`, `task:stdout` / `task:stderr` (the
+chunks become log bodies), `run:status`, and `run:end`.
+
+## Backend pointers
+
+The bridge speaks OTLP/HTTP — every major backend accepts it.
+
+```sh
+# Grafana Cloud / Tempo
+export OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-<region>.grafana.net/otlp
+export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Basic <base64-creds>"
+
+# Honeycomb
+export OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io
+export OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=<api-key>"
+
+# Datadog (via OTel collector)
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+
+# Jaeger (running locally)
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+```
+
+Any header your backend requires can be set via
+`OTEL_EXPORTER_OTLP_HEADERS=key1=val1,key2=val2` — the standard
+OTLP discovery rules apply (`@opentelemetry/exporter-logs-otlp-http`
+handles it).
+
+## What this gives you
+
+- **Per-run timelines.** Each `vx run` is a trace; each task is a
+  log record with the run's trace id. Tools that show distributed
+  traces show every task of a run grouped.
+- **Per-task percentiles.** Honeycomb / Grafana can aggregate
+  `durationMs` by `cicd.pipeline.task.name` for p50/p99.
+- **Regression alerts.** Set up an alert on "p99 of `lint` exceeds
+  baseline by 3×" and your CI dashboard pings before the team
+  notices.
+- **Cross-build dashboards.** Filter by `cicd.pipeline.run.id` or
+  by repo/branch/commit (when the cloud uploader carries them).
+
+## How it works
+
+`vx run` checks `OTEL_EXPORTER_OTLP_ENDPOINT` at startup. If set
+and `options.log` is undefined (i.e. the real CLI path, not an
+embedder), it dynamically imports `@vzn/vx-otel-bridge` via a
+string-variable specifier (so the optional peer doesn't bloat
+core's dep tree). The bridge's `createOtelBridge({ endpoint,
+serviceName }).attach(bus)` subscribes to the event bus and pushes
+each event through an OTLP log-record exporter.
+
+On `run:end`, the bridge flushes pending records and is detached.
+
+## Limits today
+
+- **Spans, not traces.** Each event is a log record correlated with
+  a synthetic span id — tools that prefer real spans (start/end
+  pairs) see flat log streams. Real spans are coming.
+- **No metric export.** Only logs/events. Aggregations need to
+  happen on the backend side.
+- **Local-only attribution.** `cicd.pipeline.run.id` is vx's run
+  UUIDv7; mapping to your CI job (e.g. GHA's `${{ github.run_id }}`)
+  takes a tiny shell wrapper or a future env-var fold.
+
+## Combining with vx Cloud
+
+`vx-cloud` (the Cloudflare deployment) also persists events to D1
+via the EVENT_INGEST queue. The two are independent — you can run
+either, both, or neither. OTel is the "ship to my existing
+observability stack"; vx Cloud is the "spin up vx-native dashboards
+in my CF account."
+
+See also: `packages/otel-bridge/README.md`,
+`docs/design/wire-protocol-2026-06.md` §4 (the OTel LogRecord
+shape).
diff --git a/apps/docs/src/content/docs/guides/plugins.md b/apps/docs/src/content/docs/guides/plugins.md
new file mode 100644
index 0000000..342820f
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/plugins.md
@@ -0,0 +1,198 @@
+---
+title: Writing a vx plugin
+description: Hook into the run lifecycle from vx.workspace.ts. Plugins are in-process subscribers on the event bus with crash isolation per hook. Forward outcomes to Sentry, post summaries to Slack, ship metrics anywhere.
+---
+
+A plugin is a small object you register in `vx.workspace.ts` that
+subscribes to lifecycle hooks during every `vx run`. Plugins observe
+the run; they don't redirect it. Forward outcomes to Sentry, post
+summaries to Slack, ship metrics to your timeseries DB, or just print
+custom output.
+
+## The contract
+
+```ts
+type Plugin = {
+  readonly name: string                          // 'org/plugin-name'
+  setup(ctx: PluginContext): void | Promise<void>
+}
+
+type PluginContext = {
+  readonly workspaceRoot: string
+  readonly cacheDir: string
+  readonly bus: EventBus                          // raw event stream
+  on<K extends PluginHookName>(hook: K, handler: PluginHookHandlers[K]): void
+}
+```
+
+Available hooks (call from inside `setup`):
+
+| Hook | Fires when | Args |
+| --- | --- | --- |
+| `onRunStart` | The run begins | `(info: { total, concurrency, requestedCount })` |
+| `onTaskStart` | A task starts executing | `(node: TaskNode)` |
+| `onTaskStdout` | A task emits a stdout chunk | `(node, chunk)` |
+| `onTaskStderr` | A task emits a stderr chunk | `(node, chunk)` |
+| `onTaskComplete` | A task ends in any terminal state | `(node, outcome: TaskOutcome)` |
+| `onRunStatus` | A run-level status line is printed | `(line: string)` |
+| `onRunEnd` | The run finishes | `()` |
+
+## Hello, plugin
+
+```ts
+// vx.workspace.ts
+import { defineWorkspace } from '@vzn/vx'
+
+export default defineWorkspace({
+  plugins: [
+    {
+      name: 'org/hello',
+      setup(ctx) {
+        ctx.on('onRunStart', (info) => {
+          console.log(`[hello] starting run of ${info.total} tasks`)
+        })
+        ctx.on('onTaskComplete', (node, outcome) => {
+          console.log(`[hello] ${node.id} → ${outcome.status} (${outcome.durationMs}ms)`)
+        })
+      },
+    },
+  ],
+})
+```
+
+Run `vx run lint` and you'll see the plugin print before/after.
+
+## A Sentry plugin (failed tasks → exceptions)
+
+```ts
+// plugins/sentry.ts
+import * as Sentry from '@sentry/node'
+import type { Plugin } from '@vzn/vx'
+
+export function sentryPlugin(opts: { dsn: string }): Plugin {
+  Sentry.init({ dsn: opts.dsn })
+  return {
+    name: 'org/sentry',
+    setup(ctx) {
+      ctx.on('onTaskComplete', (node, outcome) => {
+        if (outcome.status !== 'failed') return
+        Sentry.captureException(new Error(`vx task failed: ${node.id}`), {
+          extra: { exitCode: outcome.exitCode, durationMs: outcome.durationMs },
+        })
+      })
+    },
+  }
+}
+
+// vx.workspace.ts
+import { defineWorkspace } from '@vzn/vx'
+import { sentryPlugin } from './plugins/sentry'
+
+export default defineWorkspace({
+  plugins: [sentryPlugin({ dsn: process.env.SENTRY_DSN! })],
+})
+```
+
+## A Slack-summary plugin
+
+```ts
+// plugins/slack-summary.ts
+import type { Plugin } from '@vzn/vx'
+
+export function slackSummary(opts: { webhookUrl: string }): Plugin {
+  let failed = 0
+  let success = 0
+  return {
+    name: 'org/slack-summary',
+    setup(ctx) {
+      ctx.on('onTaskComplete', (_, outcome) => {
+        if (outcome.status === 'failed') failed++
+        else if (outcome.status === 'success') success++
+      })
+      ctx.on('onRunEnd', async () => {
+        await fetch(opts.webhookUrl, {
+          method: 'POST',
+          headers: { 'content-type': 'application/json' },
+          body: JSON.stringify({
+            text: failed === 0 ? `:white_check_mark: vx run: ${success} passed` : `:x: vx run: ${failed} failed`,
+          }),
+        })
+      })
+    },
+  }
+}
+```
+
+## A metrics plugin (timeseries DB)
+
+```ts
+// plugins/timeseries.ts
+import type { Plugin } from '@vzn/vx'
+
+export function timeseriesPlugin(opts: { url: string }): Plugin {
+  const buffer: Array<Record<string, unknown>> = []
+  return {
+    name: 'org/timeseries',
+    setup(ctx) {
+      ctx.on('onTaskComplete', (node, outcome) => {
+        buffer.push({
+          ts: Date.now(),
+          project: node.projectName,
+          task: node.taskName,
+          status: outcome.status,
+          durationMs: outcome.durationMs,
+        })
+      })
+      ctx.on('onRunEnd', async () => {
+        if (buffer.length === 0) return
+        await fetch(opts.url, {
+          method: 'POST',
+          headers: { 'content-type': 'application/json' },
+          body: JSON.stringify({ points: buffer }),
+        })
+        buffer.length = 0
+      })
+    },
+  }
+}
+```
+
+## Crash isolation
+
+Plugins are **isolated from execution by design**:
+
+- If `setup()` throws, the run aborts with a `UserError` naming
+  the plugin. Configs with broken plugins fail loudly.
+- If a hook handler throws, the plugin is **disabled for the rest
+  of the run** and a warning prints. Other plugins keep firing;
+  the run continues. (Same pattern as the safe-observer wrapping
+  in the bus.)
+- Hooks may return promises — vx fires them as fire-and-forget
+  via `void` so a slow plugin can't block the run.
+
+This is the rule from the architecture docs: **subscribers cannot
+slow the run**. A wedged plugin loses events, never blocks producers.
+
+## What plugins CANNOT do today
+
+- **Skip a cache lookup** — `onCacheLookup` is reserved for a
+  future API revision.
+- **Register custom MCP/RPC methods** — plugins observe, they
+  don't extend the inspector surface (yet).
+- **Replace the terminal renderer** — the renderer is the default
+  bus subscriber; plugins layer on top.
+
+The full set of rules + future hook plans:
+`docs/design/extension-protocol-2026-06.md`.
+
+## Plumbing details
+
+- Plugin order = config order. Deterministic.
+- The bus is synchronous and order-preserving; chunks reach you in
+  the same order the terminal renderer sees them.
+- `ctx.bus` is the raw subscriber — register through `bus.subscribe`
+  if your needs exceed the lifecycle hooks (e.g. you want raw
+  `task:stdout` events without a node lookup).
+
+Reference impl + lifecycle test:
+`src/orchestrator/plugin.ts`, `tests/plugin-e2e.test.ts`.
diff --git a/apps/docs/src/content/docs/guides/predictive-scheduling.md b/apps/docs/src/content/docs/guides/predictive-scheduling.md
new file mode 100644
index 0000000..c3f685d
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/predictive-scheduling.md
@@ -0,0 +1,145 @@
+---
+title: Predictive scheduling
+description: Opt in to history-aware scheduling. vx loads the per-task duration history from cache.db and dispatches tasks by expected remaining critical path. The only task runner that learns from itself.
+---
+
+The default scheduler picks ready tasks by reverse-deps count — a
+task that unblocks the most downstream work runs first. That's a
+reasonable static heuristic but it doesn't know that a 30-second
+test blocks less wall-time than a 4-second build that unblocks 40
+downstream tasks.
+
+Predictive scheduling reads `cache.db.runs` for every task in your
+graph, computes the expected remaining critical-path duration per
+node, and dispatches by that instead. The only task runner that
+learns from itself.
+
+## Quick start
+
+```ts
+// vx.workspace.ts
+import { defineWorkspace } from '@vzn/vx'
+
+export default defineWorkspace({
+  predictive: true,
+})
+```
+
+That's it. On the next run, vx loads history, computes priorities,
+and uses them.
+
+## How it works
+
+1. **Load history.** `prepareRun` instantiates `LocalHistoryProvider`
+   over the cache.db handle and calls `loadFor(taskIds)` to get a
+   `HistoryTable` keyed by `project#task`. Each entry has p50/p99
+   durations (cache hits excluded), success rate, hit rate.
+
+2. **Compute expected critical path per node.** For each node,
+   topo-DP sums the node's own p50 + the max across downstream
+   chains:
+   ```
+   ECP(n) = p50(n) + max(ECP(d) for d in dependents(n))
+   ```
+   A leaf with no dependents has `ECP = p50(itself)`.
+
+3. **Pick highest ECP from the ready set.** The scheduler's
+   priority map merges history-aware ECP on top of the static
+   baseline. Override > baseline by a large factor so a node with
+   history beats one without; among nodes without history, the
+   baseline still tiebreaks.
+
+4. **Fall back when history is sparse.** A task with no prior
+   runs falls back to the workspace median across tasks. If the
+   workspace itself has no history, the default is 1000 ms — a
+   sane "I don't know" that puts the priority in the right order
+   of magnitude.
+
+## What this changes vs. the default
+
+Two example graphs where the heuristic differs:
+
+### Graph A: long leaf
+
+```
+       build (5s)
+        ↙   ↘
+     test    publish
+    (30s)    (2s)
+```
+
+- **Default**: starts with `build` (it unblocks 2 downstream). Then
+  whichever of `test`/`publish` happens to be ordered first in the
+  ready queue (typically graph-insertion).
+- **Predictive**: starts with `build`. Once it finishes, picks
+  `test` first (ECP = 30s + 0 = 30s) over `publish` (ECP = 2s).
+  On a single worker, ordering doesn't change total wall time —
+  but it surfaces the slow path earlier (better UX) and prevents
+  worker starvation on multi-worker graphs.
+
+### Graph B: lots of fast vs. one slow
+
+```
+    db_test (90s, blocks nothing)
+    lint    (2s, blocks 40 build tasks)
+```
+
+- **Default**: picks `lint` (blocks 40). Critical for single-worker
+  graphs.
+- **Predictive**: still picks `lint` on a single worker; on a
+  multi-worker graph where there's spare capacity, dispatches
+  `db_test` early in parallel.
+
+The merge function (`mergePriorities` in `src/graph/scheduler.ts`)
+preserves correctness: nodes covered by the override sort above
+all baseline-only nodes, and within the override set the baseline
+tiebreaks. Nodes the override didn't see fall back to baseline.
+
+## When predictive helps most
+
+- **Multi-worker graphs** with mixed task durations. The historical
+  ordering surfaces the long tail earlier so workers don't go idle
+  waiting for the last slow task.
+- **CI matrices** where you care about wall-time-to-failure for
+  human debugging. A failing test surfaces faster.
+- **Established repos** with weeks of history. Cold workspaces get
+  the workspace-median fallback.
+
+## When it doesn't help
+
+- **Cache-warm full-hit runs.** Cache hits cost ~ms; there's
+  nothing to prioritize.
+- **Single-task runs.** No queue to reorder.
+- **Brand-new workspaces.** No history; falls back to baseline +
+  default duration.
+
+## Observability
+
+`vx info --history` surfaces the same HistoryTable the scheduler
+consumes. `vx mcp` exposes `getRunHistory` so AI agents can ask
+"what does the scheduler think about this task" in plain English.
+
+## Trade-offs
+
+- **Bias toward the slow path.** Predictive dispatches long tasks
+  early. If you'd rather see fast feedback (lint failures first),
+  leave it off.
+- **Cold start.** A new task with no history uses the workspace
+  median. If your tasks are wildly different durations, the median
+  is a poor estimator until you have ~10 runs of the task.
+- **Opt-in only today.** No "default-on" plan; needs more telemetry
+  on the wall-time win before flipping the default. See
+  `docs/design/predictive-execution-2026-06.md` Phase F.
+
+## What's coming
+
+Phase C-E are deferred but designed:
+
+- **Speculative pre-warming**: `posix_fadvise(WILLNEED)` on input
+  files, module preload for `bun`/`node` runtimes.
+- **Bandit-driven retry**: flaky tasks (history success rate < 95%)
+  auto-retry once on transient failure.
+- **Regression detection**: at runEnd, compare this run's
+  durations against the rolling p50; flag significant deviations.
+
+See also: `docs/design/predictive-execution-2026-06.md`.
diff --git a/apps/docs/src/content/docs/guides/vx-cloud.md b/apps/docs/src/content/docs/guides/vx-cloud.md
new file mode 100644
index 0000000..a74f9fc
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/vx-cloud.md
@@ -0,0 +1,177 @@
+---
+title: vx Cloud — Cloudflare-template deployment
+description: Spin up a private vx Cloud in your Cloudflare account in 5 minutes. Workers + R2 + D1 + Durable Objects + Queues + KV. HMAC artifact signing, queue→D1 event ingest, OAuth coming.
+---
+
+`apps/cloud/` in the vx repo is a Cloudflare Workers project that
+ships **template-spawnable hosted observability + cache + execution**.
+`bun wrangler deploy` from a fresh clone of the repo gives you a
+private vx Cloud running in your own Cloudflare account in about five
+minutes. No proprietary glue; the OSS binary IS the hosted runtime.
+
+This guide walks through the deploy. Full design:
+`docs/design/vx-cloud-2026-06.md`.
+
+## What you get
+
+A Cloudflare Workers project with these bindings, all declared in
+`apps/cloud/wrangler.toml`:
+
+| Binding | Purpose |
+| --- | --- |
+| **Workers** | Edge HTTP for cache + insights API + Turbo-wire endpoint |
+| **R2** (`ARTIFACTS`) | Cache artifact storage (S3-API-compatible, **zero egress fees**) |
+| **D1** (`DB`) | SQLite at the edge — orgs, members, tokens, runs, run_tasks, run_events |
+| **Durable Objects** (`RUN_COORDINATOR`, `INFLIGHT_DEDUP`) | Stateful actors for per-run coordination + content-addressed dedup |
+| **Queues** (`EVENT_INGEST`) | Buffered event ingest from CI runs into D1 |
+| **KV** (`TOKEN_CACHE`) | Sub-ms hot lookups for bearer tokens |
+
+## Deploy
+
+Prerequisites: Cloudflare account, `bun` ≥ 1.3.
+
+```sh
+git clone https://github.com/vznjs/vx
+cd vx/apps/cloud
+bun install
+bun wrangler login                       # one-time auth
+bun wrangler d1 create vx_cloud
+bun wrangler r2 bucket create vx-cloud-artifacts
+bun wrangler kv namespace create TOKEN_CACHE
+bun wrangler queues create vx-event-ingest
+bun wrangler d1 migrations apply vx_cloud
+bun wrangler deploy
+```
+
+Each `create` command prints an ID — paste it into the matching
+`TODO: replace with id from wrangler create` line in
+`wrangler.toml`. Then `bun wrangler deploy` ships the worker; the
+output URL is your vx Cloud origin.
+
+## Point your runner at it
+
+Once deployed, set two env vars in your CI:
+
+```sh
+export VX_REMOTE_CACHE_URL=https://vx-cloud-<your-subdomain>.workers.dev/v8/artifacts
+export VX_REMOTE_CACHE_TOKEN=<a bearer token from your D1 api_tokens table>
+```
+
+`vx run` now reads/writes the remote cache via the standard
+Turbo-wire endpoint (which is what `apps/cloud/` exposes).
+
+## HMAC artifact signing
+
+Set `VX_REMOTE_CACHE_SIGNATURE_KEY` on **both** the client and the
+Worker:
+
+```sh
+# Client (CI machine running vx run)
+export VX_REMOTE_CACHE_SIGNATURE_KEY=<shared secret>
+
+# Worker (set via wrangler)
+bun wrangler secret put VX_REMOTE_CACHE_SIGNATURE_KEY
+```
+
+When set, every cache PUT carries a `x-artifact-tag` HMAC-SHA256
+header over `(hash || teamId || body)`. The Worker rejects unsigned
+or tampered artifacts with 401/500; the client treats those as
+cache miss and re-runs the task. Same scheme Turbo uses; the wire is
+compatible.
+
+Tag scheme: `base64(HMAC-SHA256(secret, hash || teamId || body))`.
+
+## Event ingest pipeline
+
+```
+   vx run (locally / CI)
+        │ POST /v1/events/ingest
+        ▼
+   ┌──────────────┐   batch    ┌────────────┐  consume   ┌────────┐
+   │ Worker route │ ─────────▶ │ Queue      │ ─────────▶ │ Worker │
+   │              │            │ EVENT_INGEST│            │ queue()│
+   └──────────────┘            └────────────┘            └────┬───┘
+                                                              │
+                                              groups by runId │
+                                              ensures runs row│
+                                              allocates seq   │
+                                              D1.batch INSERT │
+                                                              ▼
+                                                         ┌──────┐
+                                                         │  D1  │
+                                                         │ runs │
+                                                         │ run_ │
+                                                         │ events│
+                                                         └──────┘
+```
+
+The consumer (`apps/cloud/src/index.ts` `queue()`) groups messages
+by `runId`, ensures the parent `runs` row exists via `ON CONFLICT DO
+NOTHING`, allocates `seq` once per run by SELECT MAX + offsets, and
+batches inserts atomically via D1's `batch()` API.
+
+## RunCoordinatorDO
+
+One Durable Object per active run, addressed by `runId`. WebSocket
+Hibernation pattern: the DO sleeps between events; cost is per
+event, not per idle connection. Methods over the JSON-RPC envelope:
+
+- `submit.run` → persists `RunMeta` (runId, orgId, startedAt,
+  status='running') and accepts the run.
+- `events.append` → broadcasts to subscribed WS clients +
+  durably persists via the queue.
+- `state.snapshot` → returns the latest `RunMeta`.
+- `run.end` → transitions `status='ended'`.
+
+## What's deferred
+
+The hard things; the doc tracks them:
+
+- **GitHub OAuth + multi-tenant org provisioning.** Today auth is
+  bearer-token only; tokens are inserted manually into the `api_tokens`
+  D1 table.
+- **RBAC** beyond the column existing.
+- **Per-task InflightDedupDO fan-out.** The DO class is shipped;
+  RunCoordinatorDO doesn't address by task hash yet.
+- **Hosted SaaS at `cloud.vx.dev`.** When you can spin up your own,
+  the SaaS is just convenience.
+- **Hyperdrive escape hatch.** Designed for when D1's 10GB cap is
+  tight; not wired into `wrangler.toml` by default.
+
+## Costs
+
+Cloudflare free tier covers small teams forever:
+
+- Workers: 100k requests/day
+- R2: 10 GB storage, **zero egress**
+- D1: 5 GB/database, 100k reads/day
+- DOs: 1M requests/month
+- KV: 100k reads/day, 1k writes/day
+- Queues: 1M operations/month
+
+At workload sizes where these limits bite, Hyperdrive into your own
+Postgres is the escape hatch.
+
+## OSS-first guarantees
+
+There is no proprietary component in this stack. Every Worker,
+every DO, every migration, every test is in this repo under
+`apps/cloud/`. If `cloud.vx.dev` shuts down tomorrow, every customer
+spins their own up in an afternoon.
+
+The hosted SaaS we may eventually run will be one CF account
+deployment of the same template you just deployed — no special
+branch, no closed-source modules, no "community edition" with
+crippled features.
+
+## Tests
+
+```sh
+cd apps/cloud
+bun test tests/                          # HMAC compute/verify round-trips
+```
+
+Hardcoded into CI as the apps/cloud test task.
+
+See also: `docs/design/vx-cloud-2026-06.md`,
+`apps/cloud/README.md` (the deploy guide that ships with the template).
diff --git a/apps/docs/src/content/docs/guides/wire-protocol.md b/apps/docs/src/content/docs/guides/wire-protocol.md
new file mode 100644
index 0000000..948923d
--- /dev/null
+++ b/apps/docs/src/content/docs/guides/wire-protocol.md
@@ -0,0 +1,211 @@
+---
+title: vx serve wire protocol — JSON-RPC 2.0
+description: One envelope, three transports. Connect your own tooling to vx serve via WebSocket, Server-Sent Events, or NDJSON. JSON-RPC 2.0 compatible — every existing JSON-RPC client works.
+---
+
+`vx serve` exposes the run event stream + the submit-a-run API
+over a single JSON-RPC 2.0 wire. Three transports off the same bus:
+
+- **WebSocket** at `/` — bidirectional; submit runs, receive events.
+- **Server-Sent Events** at `/events` — read-only; `curl -N` works.
+- **NDJSON** at `/stream` — read-only; jq-friendly, one envelope
+  per line.
+
+Every external consumer that already speaks JSON-RPC works against
+vx out of the box. That includes MCP clients, A2A agents, web
+SPAs, shell scripts, custom dashboards, anything.
+
+Full spec: `docs/design/wire-protocol-2026-06.md`. This guide is
+practical.
+
+## Discover the server
+
+```sh
+vx serve --port 5176             # in one terminal
+curl http://localhost:5176/version
+```
+
+```json
+{
+  "protocol": "1.0",
+  "vx": "0.0.0",
+  "channels": ["vx:events", "vx:state", "vx:rpc", "vx:submit"],
+  "rpc": [
+    "getCacheStats",
+    "getRunHistory",
+    "explainCacheKey",
+    "whyDidThisRerun"
+  ]
+}
+```
+
+Version-prefix matching: a v1.x client talks to a v1.y server. The
+RPC method list is the inspector capability list.
+
+## Tail events (the read-only path)
+
+### SSE
+
+```sh
+curl -N http://localhost:5176/events
+```
+
+Each event arrives as `data: <json>\n\n`. Submit a run elsewhere:
+
+```sh
+vx run lint
+```
+
+Your `curl` window now prints every event of the run as a JSON-RPC
+notification:
+
+```
+data: { "jsonrpc": "2.0", "method": "events.append", "params": { "kind": "run:start", … } }
+
+data: { "jsonrpc": "2.0", "method": "events.append", "params": { "kind": "task:start", … } }
+```
+
+### NDJSON for jq
+
+```sh
+curl -N http://localhost:5176/stream | jq -r '.params.kind'
+```
+
+One envelope per line; `jq` reads them streamingly.
+
+## Submit a run over WebSocket
+
+Two ways — both work on the same WS endpoint.
+
+### Legacy `t:'run'` frame
+
+```ts
+const ws = new WebSocket('ws://localhost:5176/')
+ws.onopen = () => {
+  ws.send(JSON.stringify({
+    t: 'run',
+    request: { cwd: '/path/to/workspace', tasks: ['lint'] },
+  }))
+}
+ws.onmessage = (ev) => {
+  const msg = JSON.parse(ev.data) // { t: 'event' | 'result' | 'error', … }
+}
+```
+
+### JSON-RPC 2.0 `submit.run` request
+
+```ts
+const ws = new WebSocket('ws://localhost:5176/')
+ws.onopen = () => {
+  ws.send(JSON.stringify({
+    jsonrpc: '2.0',
+    id: 1,
+    method: 'submit.run',
+    params: { cwd: '/path/to/workspace', tasks: ['lint'] },
+  }))
+}
+ws.onmessage = (ev) => {
+  const env = JSON.parse(ev.data)
+  if ('result' in env) console.log('done', env.result)
+  else if ('error' in env) console.log('failed', env.error)
+  else if (env.method === 'events.append') console.log('event', env.params)
+}
+```
+
+The server accepts both formats on the same connection — parse
+once, classify, dispatch. Use whichever your client lib makes
+easier.
+
+## Channels
+
+| Channel | Methods | Direction |
+| --- | --- | --- |
+| `vx:events` | notification `events.append(event)` | server → client |
+| `vx:state` | request `state.snapshot()` + notification `state.patch(patches)` | both (not yet shipped over WS — MCP-only) |
+| `vx:rpc` | request `<method>(params)` — typed inspector queries | client → server (not yet shipped over WS — `vx mcp` stdio works) |
+| `vx:submit` | request `submit.run(request)` + streamed `events.append` → response `RunResult` | client → server |
+
+## Event shape
+
+Each `events.append` notification carries a `WireEvent` whose body
+is built from one of seven `vx.kind` values:
+
+```ts
+type WireEventKind =
+  | 'run:start'      // run begins
+  | 'task:start'     // a task begins executing
+  | 'task:stdout'    // stdout chunk; body = the chunk
+  | 'task:stderr'    // stderr chunk
+  | 'task:complete'  // task ends; attributes carry the outcome
+  | 'run:status'     // run-level status line
+  | 'run:end'        // run finishes
+```
+
+In the wire-spec doc you'll see this referred to as
+"OTel-LogRecord-shaped" — that's the planned canonical form. Today's
+emitted shape uses the legacy `kind`/`atMs`/`taskId` fields plus
+attributes. The OTel rename is a follow-up.
+
+## Error envelopes
+
+JSON-RPC 2.0 errors are returned with their standard codes. vx
+also defines its own range:
+
+| Code | Meaning |
+| --- | --- |
+| -32700 | parse error |
+| -32600 | invalid request |
+| -32601 | method not found |
+| -32602 | invalid params |
+| -32603 | internal error |
+| -32000 | vx UserError (clean message, no stack) |
+| -32001 | task hash unknown |
+| -32002 | run not found |
+| -32003 | unauthorized |
+| -32004 | rate limited (cloud) |
+
+## Auth
+
+Localhost loopback: no auth. Remote (vx cloud): `Authorization:
+Bearer <token>` header on every request and on the WS handshake.
+
+## Example — phone notification when CI passes
+
+Bash one-liner using SSE:
+
+```sh
+curl -N https://vx-cloud-xxx.workers.dev/events \
+  -H "Authorization: Bearer $TOKEN" \
+  | jq -r '.params | select(.kind == "run:end")' \
+  | while read; do curl -X POST https://api.pushover.net/1/messages.json \
+      -d "token=$P_TOKEN" -d "user=$P_USER" -d "message=Build done"; done
+```
+
+Three lines and you have a phone push for every CI finish, OS-
+independent.
+
+## Example — TS subscriber
+
+```ts
+const ws = new WebSocket('ws://localhost:5176/')
+ws.onmessage = (ev) => {
+  const env = JSON.parse(ev.data)
+  if ('method' in env && env.method === 'events.append') {
+    const e = env.params
+    if (e.kind === 'task:complete' && e.outcome?.status === 'failed') {
+      console.log(`🚨 ${e.taskId} failed`)
+    }
+  }
+}
+```
+
+Equivalent in Python, Go, Rust — any language with a JSON-RPC
+client. The wire is the SDK.
+
+## See also
+
+- `docs/design/wire-protocol-2026-06.md` — full spec
+- `docs/design/extension-protocol-2026-06.md` — subscribers,
+  inspectors, drivers, plugins
+- [`vx mcp` guide](/vx/guides/mcp/) — agent-side
+- [`vx serve` CLI](/vx/cli/#vx-serve--execution--event-stream-service)
diff --git a/apps/docs/src/content/docs/introduction.md b/apps/docs/src/content/docs/introduction.md
index 4448a65..c367016 100644
--- a/apps/docs/src/content/docs/introduction.md
+++ b/apps/docs/src/content/docs/introduction.md
@@ -55,6 +55,36 @@ The full, sourced comparison lives in
 [vx vs Turborepo vs Nx](../comparison/). The performance mechanics are in
 [Why vx is fast](../concepts/why-vx-is-fast/).
 
+## The open platform layer
+
+Beyond the core task runner, vx ships an OSS open platform — every
+contract is documented; every wire is JSON-RPC 2.0. None of these
+require additional services:
+
+- **[vx mcp](../guides/mcp/)** — Model Context Protocol server (stdio).
+  Claude Code, Cursor, Continue.dev query cache stats and run history
+  through the standard agent-tool protocol.
+- **[Distributed CI](../guides/distributed-ci/)** — `vx coordinator` +
+  `vx run --worker` dispatch tasks across machines. Content-addressed.
+- **[Plugin API](../guides/plugins/)** — register lifecycle hooks in
+  `vx.workspace.ts`. Forward outcomes to Sentry, post to Slack, ship
+  metrics anywhere.
+- **[Predictive scheduling](../guides/predictive-scheduling/)** — opt
+  in with `predictive: true`; the scheduler reads run history and
+  dispatches by expected remaining critical path.
+- **[vx Cloud (Cloudflare)](../guides/vx-cloud/)** — `bun wrangler
+  deploy` from a fresh clone of `apps/cloud/` gives you a private vx
+  Cloud in your CF account in 5 minutes.
+- **[vx insights serve](../guides/insights/)** — localhost
+  Solid+DuckDB-WASM SPA over your `cache.db`. Historical run
+  flamegraphs, no backend.
+- **[OpenTelemetry CI/CD spans](../guides/otel-bridge/)** — single env
+  var, single npm install; every event lands in Grafana / Honeycomb /
+  Datadog / Tempo.
+- **[Wire protocol](../guides/wire-protocol/)** — `vx serve` speaks
+  JSON-RPC 2.0 over WS, SSE, and NDJSON. `curl -N
+  http://localhost:5176/events | jq` streams every envelope.
+
 ## What vx is *not*
 
 vx is small on purpose. It deliberately has **no** generators or
diff --git a/apps/docs/src/pages/index.astro b/apps/docs/src/pages/index.astro
index 364833c..89323ff 100644
--- a/apps/docs/src/pages/index.astro
+++ b/apps/docs/src/pages/index.astro
@@ -72,6 +72,47 @@ const features = [
   },
 ]
 
+// Platform layer — capabilities Nx and Turbo don't have OSS. Every claim
+// shipped in the 2026-06 arc (see docs/design/*-2026-06.md).
+const platform = [
+  {
+    icon: 'spark',
+    tone: 'var(--phosphor)',
+    title: 'MCP for AI agents',
+    body: 'vx mcp boots a Model Context Protocol server over stdio. Claude Code, Cursor, Continue.dev query cache stats, run history, and rebuild causes through the standard agent-tool protocol. No other runner ships an MCP surface.',
+  },
+  {
+    icon: 'mesh',
+    tone: 'var(--plasma)',
+    title: 'Distributed CI, OSS',
+    body: 'vx coordinator + vx run --worker dispatch your task graph across machines. Content-addressed: any worker producing artifact <hash> satisfies every consumer. Nx Cloud DTE without the paywall.',
+  },
+  {
+    icon: 'plug',
+    tone: 'var(--amber)',
+    title: 'Plugin API',
+    body: 'Register plugins in vx.workspace.ts to subscribe to lifecycle hooks. Forward outcomes to Sentry, post to Slack, ship metrics anywhere. Vite-style lifecycle hooks, crash-isolated per hook.',
+  },
+  {
+    icon: 'wire',
+    tone: 'var(--phosphor)',
+    title: 'Open wire protocol',
+    body: 'vx serve speaks JSON-RPC 2.0 over WS, SSE and NDJSON. Any JSON-RPC client works — curl streams events, jq filters them, MCP and A2A interop free. No proprietary protocol.',
+  },
+  {
+    icon: 'cloud',
+    tone: 'var(--plasma)',
+    title: 'Cloudflare-template hosted',
+    body: 'apps/cloud is a Wrangler project. bun wrangler deploy gives you a private vx Cloud in your CF account in 5 minutes — Workers + R2 + D1 + Durable Objects + Queues + KV. Template-spawnable, OSS-first.',
+  },
+  {
+    icon: 'brain',
+    tone: 'var(--amber)',
+    title: 'Predictive scheduling',
+    body: 'predictive: true and the scheduler reads run history, computes expected remaining critical-path per task, and dispatches by it. The only task runner that learns from itself.',
+  },
+]
+
 const featIcons: Record<string, string> = {
   cache:
     '<ellipse cx="12" cy="6" rx="8" ry="3"/><path d="M4 6v12c0 1.66 3.58 3 8 3s8-1.34 8-3V6"/><path d="M4 12c0 1.66 3.58 3 8 3s8-1.34 8-3"/>',
@@ -81,6 +122,13 @@ const featIcons: Record<string, string> = {
   bolt: '<path d="M13 2L4 14h7l-1 8 9-12h-7l1-8z"/>',
   box: '<path d="M21 8l-9-5-9 5v8l9 5 9-5V8z"/><path d="M3.5 8l8.5 5 8.5-5M12 13v8"/>',
   lock: '<rect x="3" y="11" width="18" height="11" rx="2"/><path d="M7 11V7a5 5 0 0110 0v4"/>',
+  // Platform icons
+  spark: '<path d="M12 2v6M12 16v6M2 12h6M16 12h6M5 5l3 3M16 16l3 3M5 19l3-3M16 8l3-3"/>',
+  mesh: '<circle cx="6" cy="6" r="2"/><circle cx="18" cy="6" r="2"/><circle cx="6" cy="18" r="2"/><circle cx="18" cy="18" r="2"/><circle cx="12" cy="12" r="2"/><path d="M8 7l3 4M16 7l-3 4M11 13l-3 3M13 13l3 3"/>',
+  plug: '<path d="M9 2v6M15 2v6M7 8h10v4a5 5 0 01-10 0V8zM12 17v5"/>',
+  wire: '<path d="M3 8h6l3 8h6M3 16h6l3-8h6"/>',
+  cloud: '<path d="M17.5 19a4.5 4.5 0 100-9 6 6 0 00-11.5 1.5A4.5 4.5 0 006.5 19h11z"/>',
+  brain: '<path d="M9 3a3 3 0 00-3 3v1a3 3 0 00-3 3v2a3 3 0 003 3v1a3 3 0 003 3M15 3a3 3 0 013 3v1a3 3 0 013 3v2a3 3 0 01-3 3v1a3 3 0 01-3 3M9 12h6M9 7h6M9 17h6"/>',
 }
 
 const benchRows = [
@@ -387,6 +435,59 @@ const footCols = {
         </div>
       </section>
 
+      <!-- ===================== Platform layer ===================== -->
+      <section class="section container">
+        <div class="section-head">
+          <p class="vx-kicker">// beyond a task runner</p>
+          <h2>The open platform layer.</h2>
+          <p class="vx-sub">
+            One binary. Eight surfaces. Every contract is documented; every wire
+            is JSON-RPC 2.0. Subscribe, query, drive, extend — your stack, your
+            tools, your data.
+          </p>
+        </div>
+        <div class="feat-grid">
+          {
+            platform.map((f) => (
+              <a
+                class="feat-card platform-card"
+                style={`--c:${f.tone}`}
+                href={
+                  f.icon === 'spark'
+                    ? href('guides/mcp/')
+                    : f.icon === 'mesh'
+                      ? href('guides/distributed-ci/')
+                      : f.icon === 'plug'
+                        ? href('guides/plugins/')
+                        : f.icon === 'wire'
+                          ? href('guides/wire-protocol/')
+                          : f.icon === 'cloud'
+                            ? href('guides/vx-cloud/')
+                            : href('guides/predictive-scheduling/')
+                }
+              >
+                <span class="ico">
+                  <svg
+                    width="21"
+                    height="21"
+                    viewBox="0 0 24 24"
+                    fill="none"
+                    stroke="currentColor"
+                    stroke-width="1.6"
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                    set:html={featIcons[f.icon]}
+                  />
+                </span>
+                <h3>{f.title}</h3>
+                <p>{f.body}</p>
+                <span class="learn">Learn more →</span>
+              </a>
+            ))
+          }
+        </div>
+      </section>
+
       <!-- ===================== Benchmark ===================== -->
       <section class="bench">
         <div class="bench-panel container vx-scanlines">
diff --git a/apps/docs/src/styles/landing.css b/apps/docs/src/styles/landing.css
index 688776d..eb5b93a 100644
--- a/apps/docs/src/styles/landing.css
+++ b/apps/docs/src/styles/landing.css
@@ -646,6 +646,28 @@ body:has(.vx-landing) {
   margin: 14px 0 0;
   max-width: 620px;
 }
+.section-head .vx-sub {
+  color: var(--text-md);
+  margin-top: 14px;
+  max-width: 620px;
+  line-height: 1.55;
+}
+a.platform-card {
+  text-decoration: none;
+  color: inherit;
+  display: block;
+}
+a.platform-card .learn {
+  display: inline-block;
+  margin-top: 12px;
+  color: color-mix(in srgb, var(--c) 88%, var(--text-hi));
+  font-size: 13px;
+  font-weight: 600;
+  letter-spacing: 0.01em;
+}
+a.platform-card:hover .learn {
+  color: var(--c);
+}
 .feat-grid {
   display: grid;
   grid-template-columns: repeat(3, 1fr);
diff --git a/docs/cli.md b/docs/cli.md
index 91d7f42..dbdfa0c 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -18,6 +18,7 @@ bun src/bin.ts --version
 ## Top-level shape
 
 ```
+# Core
 vx run [OPTIONS] [TASK | PKG#TASK ...] [-- forwarded-args...]
 vx watch [OPTIONS] TASK [-- forwarded-args...]
 vx cache prune [--older-than <duration>] [--max-size <bytes>]
@@ -26,6 +27,16 @@ vx migrate [--dry] [--force]
 vx show [PROJECT[#TASK]] [--format pretty|json]
 vx info
 vx stats              # deprecated alias of vx info
+
+# Platform — the 2026-06 arc
+vx mcp [--stdio]                                       # MCP server for AI agents
+vx coordinator <tasks…> [--port N] [--host H] [--workers N]
+vx run --worker <coord-url> [--capacity N] [--label L] # join a coordinator as a worker
+vx serve [--port N]                                    # WS + SSE + NDJSON event service
+vx dev                                                 # local devtools hub
+vx insights serve [--port N]                           # local Solid+DuckDB-WASM SPA
+
+# Meta
 vx help
 vx --help, -h
 vx version
@@ -757,6 +768,196 @@ remote cache:   no
 - `vx stats` is a **deprecated alias** of `vx info` (info absorbed
   it); it prints byte-identical output.
 
+## `vx mcp` — Model Context Protocol server
+
+Boot an MCP server so AI coding agents (Claude Code, Cursor,
+Continue.dev, VS Code GitHub Copilot, …) can query vx state through
+the standard agent-tool protocol. Stdio transport only.
+
+```
+vx mcp                           # stdio transport (default)
+vx mcp --stdio                   # explicit
+```
+
+Add to an MCP client config (Claude Code example):
+
+```jsonc
+// ~/.claude/mcp.json
+{
+  "mcpServers": {
+    "vx": { "command": "vx", "args": ["mcp"] },
+  },
+}
+```
+
+Tools exposed:
+
+| Tool              | Purpose                                                                                                                     |
+| ----------------- | --------------------------------------------------------------------------------------------------------------------------- |
+| `getCacheStats`   | Aggregate cache stats (entries, total size, runs/hits last 24h, hit rate)                                                   |
+| `getRunHistory`   | Recent runs filtered by `project` / `task` / `limit`, with per-pair p50/p99/successRate/hitRate aggregates                  |
+| `explainCacheKey` | Persisted entry metadata for a `project#task` (hash, command, exit code, duration, size, created_at)                        |
+| `whyDidThisRerun` | Compares a `(runId, taskId)` against the immediately preceding run for the same task; reports whether the cache key changed |
+
+All tools read the local `cache.db` opened on demand. No network, no
+auth (stdio is process-private). Future tools (`runTasks`,
+`getRunState`) ship under the `vx:rpc` channel when the inspector WS
+surface lands.
+
+## `vx coordinator` — distributed-CI coordinator
+
+Start a per-build coordinator that holds the task graph + ready queue
+and dispatches assignments to attached workers over WebSocket.
+Content-addressed: any worker producing artifact `<hash>` satisfies
+every consumer of `<hash>`, so workers are fungible.
+
+```
+vx coordinator <tasks…>          # positional tasks (e.g. lint test build)
+    --port <n>                   # default 5180
+    --host <h>                   # default 127.0.0.1
+    --workers <n>                # expected workers (display only)
+```
+
+Behavior:
+
+- Boots `Bun.serve` WS at `http://<host>:<port>`.
+- Runs `prepareRun` against the workspace to build the same graph
+  the local CLI would.
+- Computes the v22 cache hash per node — the assignment key.
+- Workers register via `worker:hello`, pull via `worker:pull`,
+  report outcomes via `worker:done`.
+- A worker that disconnects mid-task strands its in-flight; those
+  hashes go back on the ready queue for the next attached worker.
+- Exits 0 when every task ends in a terminal state with
+  `outcome.status === 'success'`, 1 otherwise.
+- Writes `<workspaceRoot>/.vx/coordinator.json` advertising the
+  origin + pid (cleaned up on stop).
+
+GHA-style usage:
+
+```yaml
+jobs:
+  coord:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - run: vx coordinator lint test build --port 5180 --workers 4 &
+        # expose 5180 to peers via tailscale / cloudflared / direct GHA runner IPs
+  worker:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        worker: [1, 2, 3, 4]
+    steps:
+      - uses: actions/checkout@v4
+      - run: vx run --worker ws://coord:5180 --capacity 2
+```
+
+Phase A-B only today: real coordinator + worker, content-addressed
+dispatch, disconnect recovery. Capability labels, cache-affinity
+hints, and Buck2-style hybrid execution land later
+(`docs/design/distributed-ci-2026-06.md`).
+
+## `vx run --worker` / `--coordinator` — distributed-CI worker
+
+Attach to a coordinator and execute its assignments. Stateless and
+fungible.
+
+```
+vx run --worker ws://coord:5180  # connect, register, pull, execute
+    --capacity <n>               # max concurrent in-flight (default 1)
+    --label <l>                  # capability label (repeatable; default linux-x64)
+```
+
+`--coordinator` is a synonym of `--worker`. Behavior:
+
+- Connects to the coordinator's WS endpoint.
+- Sends `worker:hello { workerId, capacity, labels }`.
+- Pulls work via `worker:pull { available }`.
+- On `task:assign`, spawns the command via `runCommand`
+  (orchestrator-level helper), streams stdout/stderr back over
+  `worker:stdout` / `worker:stderr`, reports `worker:done` with the
+  outcome.
+- On `coord:drain`, waits for in-flight to finish, sends
+  `worker:bye`, exits.
+- Exits 0 if every assigned task succeeded, 1 otherwise.
+
+Workers do NOT yet probe the remote cache before executing — every
+assigned task spawns fresh. Cache integration is the next iteration.
+
+## `vx insights serve` — historical run dashboard (local)
+
+Boot a Solid + UnoCSS + DuckDB-WASM SPA against the workspace's
+`cache.db`. Pure read-only analytics — no backend, no upload, no
+daemon.
+
+```
+vx insights serve                # SPA dev server on port 5290
+    --port <n>                   # override SPA port
+```
+
+What it does:
+
+- Starts a tiny static HTTP server exposing `cache.db` at
+  `/cache.db` with the SQLite MIME (kernel-assigned port).
+- Boots Vite dev for `apps/insights/` with
+  `VITE_CACHE_DB_URL=http://127.0.0.1:<staticPort>/cache.db`.
+- The SPA lazy-loads DuckDB-WASM (~30 MB on first query), ATTACHes
+  the SQLite file via DuckDB's `sqlite_scanner` extension, and runs
+  every aggregation client-side.
+
+Pages: Overview (recent runs list, click to detail) → Run detail
+(per-task flamegraph, durations, cache provenance).
+
+Requires `apps/insights/` to be on disk — set `VX_INSIGHTS_DIR` to
+point at a checkout if the installed binary can't find it
+adjacent to `import.meta.dir`. A first run is needed to populate
+`cache.db`; an empty workspace prints a clean hint and exits.
+
+## `vx serve` — execution + event-stream service
+
+WebSocket + SSE + NDJSON service that other clients connect to and
+either (a) submit runs for delegated execution or (b) subscribe to
+the live event stream.
+
+```
+vx serve                         # bind a kernel-assigned port
+    --port <n>                   # explicit
+```
+
+HTTP routes (all return JSON unless noted):
+
+| Route            | Purpose                                                                                    |
+| ---------------- | ------------------------------------------------------------------------------------------ |
+| `GET /health`    | Liveness probe (`200 ok`)                                                                  |
+| `GET /version`   | Protocol version + channels + RPC capability list                                          |
+| `GET /events`    | Server-Sent Events stream of every envelope from every concurrent run                      |
+| `GET /stream`    | NDJSON stream (jq-friendly) of the same                                                    |
+| `WS /` (upgrade) | Bidirectional; accepts both legacy `{ t: 'run', ... }` and JSON-RPC `submit.run` envelopes |
+
+Every wire frame is a JSON-RPC 2.0 envelope per
+`docs/design/wire-protocol-2026-06.md`. Service-emitted events use
+the `events.append` notification method; client-submitted runs use
+the `submit.run` request method. A `vx run` against a workspace where
+`vx serve` is already up auto-delegates via `.vx/serve.json`
+discovery + a 300 ms `/health` probe.
+
+`curl -N http://localhost:<port>/events` prints every envelope as
+SSE; `curl -N http://localhost:<port>/stream | jq` for one envelope
+per line.
+
+## `vx dev` — devtools hub
+
+Foreground devtools hub that ingests forwarded NDJSON events from a
+local `vx run` and renders them through a connected web client.
+
+```
+vx dev                           # bind a kernel-assigned local socket
+```
+
+Optional and dev-time only. Production observability is the OTel
+bridge (set `OTEL_EXPORTER_OTLP_ENDPOINT`).
+
 ## Output format
 
 `vx run` emits framed blocks. Stdout/stderr from each task is

From 797e0e9989023cb209bbb5ff07836b1c078a1917 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 21 Jun 2026 16:38:41 +0000
Subject: [PATCH 2/2] =?UTF-8?q?Fix=20vx=20insights=20=E2=80=94=20register?=
 =?UTF-8?q?=20cache.db=20bytes=20before=20ATTACH?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DuckDB-WASM's sqlite_scanner can't read SQLite over HTTP. ATTACH
against an http URL hits the 'unable to open database file' error
the user reported.

Fix: fetch the bytes via standard fetch(), register them as a
virtual file via db.registerFileBuffer('cache.db', bytes), then
ATTACH 'cache.db' AS cachedb (TYPE SQLITE). Same query API on top.

The default URL is unchanged ('/cache.db' from the static server
'vx insights serve' boots; or VITE_CACHE_DB_URL when overridden).
A clear UserError surfaces if the fetch fails (server not running
yet, or cache.db missing because no vx run has populated it).

Verified: SPA builds cleanly (171 modules, 200KB DuckDB chunk);
the local static server smoke-test returns 200 with real SQLite
bytes.
---
 apps/insights/src/duckdb.ts | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/apps/insights/src/duckdb.ts b/apps/insights/src/duckdb.ts
index 36a62d0..87cb73d 100644
--- a/apps/insights/src/duckdb.ts
+++ b/apps/insights/src/duckdb.ts
@@ -1,9 +1,14 @@
-// Lazy DuckDB-WASM loader. DuckDB reads SQLite files directly via the
-// `sqlite_scanner` extension — no ETL, no server. The cache.db URL is
-// resolved once and ATTACHed; queries then read the live SQLite via
-// `sqlite_attached.<table>` aliases exposed in api.ts.
+// Lazy DuckDB-WASM loader. DuckDB reads SQLite files via the
+// `sqlite_scanner` extension — but only from its own virtual
+// filesystem. We can't `ATTACH 'http://...'` directly because the
+// SQLite reader doesn't speak HTTP. The flow is:
+//   1. fetch the cache.db bytes from the static server
+//   2. register them as a virtual file via `db.registerFileBuffer`
+//   3. `ATTACH '<virtual-name>' AS cachedb (TYPE SQLITE)`
 //
-// The bundle is ~30MB; loadDuckDb() is deferred until the first call.
+// Queries then read the live SQLite via `cachedb.<table>` aliases
+// exposed in api.ts. The bundle is ~30MB; loadDuckDb() is deferred
+// until the first call.
 
 import type { AsyncDuckDB, AsyncDuckDBConnection } from '@duckdb/duckdb-wasm'
 
@@ -17,6 +22,18 @@ function resolveCacheDbUrl(): string {
   return '/cache.db'
 }
 
+async function fetchCacheDbBytes(url: string): Promise<Uint8Array> {
+  const res = await fetch(url, { cache: 'no-store' })
+  if (!res.ok) {
+    throw new Error(
+      `vx insights: failed to fetch ${url} (${res.status} ${res.statusText}). ` +
+        `Is \`vx insights serve\` running? Did a \`vx run\` populate the cache yet?`,
+    )
+  }
+  const buf = await res.arrayBuffer()
+  return new Uint8Array(buf)
+}
+
 async function bootstrap(): Promise<{ db: AsyncDuckDB; conn: AsyncDuckDBConnection }> {
   const duckdb = await import('@duckdb/duckdb-wasm')
   const bundles = duckdb.getJsDelivrBundles()
@@ -33,9 +50,14 @@ async function bootstrap(): Promise<{ db: AsyncDuckDB; conn: AsyncDuckDBConnecti
   URL.revokeObjectURL(workerUrl)
 
   const conn = await db.connect()
+  // sqlite_scanner reads a SQLite file from DuckDB's own virtual
+  // filesystem. We fetch the bytes once and register them as a
+  // virtual file named 'cache.db'; ATTACH then reads from there.
   await conn.query(`INSTALL sqlite_scanner; LOAD sqlite_scanner;`)
   const cacheDbUrl = resolveCacheDbUrl()
-  await conn.query(`ATTACH '${cacheDbUrl}' AS cachedb (TYPE SQLITE);`)
+  const bytes = await fetchCacheDbBytes(cacheDbUrl)
+  await db.registerFileBuffer('cache.db', bytes)
+  await conn.query(`ATTACH 'cache.db' AS cachedb (TYPE SQLITE);`)
   return { db, conn }
 }