diff --git a/.gitignore b/.gitignore
index 8c1b2ea6..4f57d598 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,8 @@ deno.lock
 reference/
 codex/
 ghostty/
+observability/data/
+tmp/verification/
 
 # Worktree script SQLite artifacts
 scripts/worktree-feature/*.db*
diff --git a/docs/guides/agent-trace-otel-verification.mdx b/docs/guides/agent-trace-otel-verification.mdx
new file mode 100644
index 00000000..2838d1eb
--- /dev/null
+++ b/docs/guides/agent-trace-otel-verification.mdx
@@ -0,0 +1,200 @@
+---
+title: Agent Trace OTEL Log Verification
+description: Reproducible local verification for canonical agent trace OTEL log export with Loki.
+---
+
+## Local stack
+
+Start the local stack:
+
+```bash
+smithers observability --detach
+```
+
+Expected endpoints:
+
+- Grafana: `http://localhost:3001`
+- Prometheus: `http://localhost:9090`
+- Tempo: `http://localhost:3200`
+- Loki: `http://localhost:3100`
+- OTEL collector HTTP: `http://localhost:4318`
+
+Validate the stack:
+
+```bash
+docker compose -f observability/docker-compose.otel.yml ps
+curl -sf http://localhost:3100/ready
+curl -sf http://localhost:3001/api/datasources | jq 'map({name,type,url})'
+docker logs observability-otel-collector-1 | tail -n 80
+```
+
+## Enable OTEL export
+
+```bash
+export SMITHERS_OTEL_ENABLED=1
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+export OTEL_SERVICE_NAME=smithers-dev
+export SMITHERS_LOG_FORMAT=json
+```
+
+## Demo workflow
+
+Use the built-in reproducible workflow at `workflows/agent-trace-otel-demo.tsx`.
+It emits:
+
+- one Pi-like high-fidelity attempt with text deltas, thinking deltas, tool lifecycle, usage, and a redaction fixture
+- one SDK-style final-only attempt
+- stable run annotations for Loki queries
+
+Run the success case:
+
+```bash
+smithers run workflows/agent-trace-otel-demo.tsx \
+  --run-id agent-trace-otel-demo \
+  --annotations '{"custom.demo":true,"custom.ticket":"OBS-123"}'
+```
+
+Run the malformed JSON failure case:
+
+```bash
+smithers run workflows/agent-trace-otel-demo.tsx \
+  --run-id agent-trace-otel-demo-fail \
+  --input '{"failureMode":"malformed-json"}' \
+  --annotations '{"custom.demo":true,"custom.ticket":"OBS-ERR"}'
+```
+
+Optional durable-local verification:
+
+```bash
+jq 'select(.type == "AgentTraceEvent" or .type == "AgentTraceSummary")' \
+  .smithers/executions/agent-trace-otel-demo/logs/stream.ndjson
+
+find .smithers/executions/agent-trace-otel-demo/logs/agent-trace -type f -maxdepth 1 -print
+```
+
+## Loki queries
+
+Smithers OTEL attributes are exposed to Loki as sanitized structured metadata fields such as:
+
+- `run_id`
+- `workflow_path`
+- `node_id`
+- `node_attempt`
+- `agent_family`
+- `agent_capture_mode`
+- `trace_completeness`
+- `event_kind`
+
+Use `{service_name="smithers-dev"}` as the stream selector, then filter on structured metadata in the LogQL pipeline. Use `| json` to inspect the structured log body.
+
+All events for one run:
+
+```logql
+{service_name="smithers-dev"} | run_id="agent-trace-otel-demo"
+```
+
+One node attempt:
+
+```logql
+{service_name="smithers-dev"} | run_id="agent-trace-otel-demo" | node_id="pi-rich-trace" | node_attempt="1"
+```
+
+Thinking deltas only:
+
+```logql
+{service_name="smithers-dev"} | run_id="agent-trace-otel-demo" | event_kind="assistant.thinking.delta"
+```
+
+Tool execution only:
+
+```logql
+{service_name="smithers-dev"} | run_id="agent-trace-otel-demo" | event_kind=~"tool\.execution\..*"
+```
+
+Capture errors only:
+
+```logql
+{service_name="smithers-dev"} | event_kind="capture.error"
+```
+
+Inspect the structured JSON body for one run:
+
+```logql
+{service_name="smithers-dev"} | run_id="agent-trace-otel-demo" | json
+```
+
+Redaction proof query:
+
+```logql
+{service_name="smithers-dev"} | run_id="agent-trace-otel-demo" |= "REDACTED"
+```
+
+## API query examples
+
+Equivalent direct Loki API checks:
+
+```bash
+curl -sG 'http://localhost:3100/loki/api/v1/query_range' \
+  --data-urlencode 'query={service_name="smithers-dev"} | run_id="agent-trace-otel-demo"' \
+  --data-urlencode 'limit=200' | jq '.data.result[] | {stream, values: (.values | length)}'
+
+curl -sG 'http://localhost:3100/loki/api/v1/query_range' \
+  --data-urlencode 'query={service_name="smithers-dev"} | run_id="agent-trace-otel-demo" | event_kind="assistant.thinking.delta"' \
+  --data-urlencode 'limit=20' | jq '.data.result[]?.values[]?[1]'
+```
+
+## Tempo trace checks
+
+Tempo search should show Smithers spans once a workflow has run:
+
+```bash
+curl -s http://localhost:3200/api/search | jq .
+curl -s http://localhost:3200/api/search/tags | jq .
+curl -s 'http://localhost:3200/api/search/tag/service.name/values' | jq .
+curl -s 'http://localhost:3200/api/search/tag/runId/values' | jq .
+```
+
+Inspect one trace directly:
+
+```bash
+TRACE_ID=$(curl -s http://localhost:3200/api/search \
+  | jq -r '.traces[] | select(.rootTraceName=="engine:run-workflow") | .traceID' \
+  | head -n 1)
+
+curl -s http://localhost:3200/api/traces/$TRACE_ID | jq .
+```
+
+Expected trace attributes include at least:
+
+- `service.name = smithers-dev`
+- `runId = <RUN_ID>`
+- `workflowPath = <workflow path>`
+
+## Verification checklist
+
+- stack starts successfully in Docker
+- Loki is present and queryable
+- collector logs pipeline is active
+- Pi traces show text deltas, thinking deltas, tool execution lifecycle, final message, usage, and run/node/attempt correlation
+- second agent family is exported with truthful `final-only` completeness classification
+- Gemini `stream-json` attempts preserve structured deltas truthfully
+- Kimi `stream-json` attempts preserve structured tool lifecycle truthfully
+- malformed or truncated structured streams emit `capture.error` and classify as `capture-failed`
+- artifact write failures emit `capture.warning` and degrade to `partial-observed` without losing durable DB truth
+- Tempo search shows Smithers spans and trace attributes including `runId`
+- Prometheus is still scraping the collector successfully
+- secrets are redacted from canonical events, OTEL log bodies, and persisted trace artifacts
+
+## Automated coverage
+
+The fixture-style and workflow-backed tests in `tests/agent-trace.test.tsx` now cover:
+
+- Pi rich success and malformed JSON failure
+- SDK truthful `final-only`
+- Claude Code structured stream preservation
+- Codex structured delta + usage preservation
+- Gemini structured `stream-json` and coarse `cli-json` classification
+- Kimi structured `stream-json` tool lifecycle preservation
+- truncated structured stream failure classification
+- artifact write failure classification
+- OTEL body/attribute shaping for Loki query fields
diff --git a/docs/guides/monitoring-logs.mdx b/docs/guides/monitoring-logs.mdx
index 9e355de9..ea086c94 100644
--- a/docs/guides/monitoring-logs.mdx
+++ b/docs/guides/monitoring-logs.mdx
@@ -58,8 +58,9 @@ That starts:
 
 - OTLP collector on `http://localhost:4318`
 - Prometheus on `http://localhost:9090`
-- Grafana on `http://localhost:3000`
+- Grafana on `http://localhost:3001`
 - Tempo on `http://localhost:3200`
+- Loki on `http://localhost:3100`
 
 The collector exports metrics on `:8889`, Prometheus scrapes that endpoint, and Grafana comes pre-provisioned with a Smithers dashboard.
 
diff --git a/observability/docker-compose.otel.yml b/observability/docker-compose.otel.yml
index 7dd0e55d..510b3f23 100644
--- a/observability/docker-compose.otel.yml
+++ b/observability/docker-compose.otel.yml
@@ -2,7 +2,8 @@ version: "3.9"
 
 services:
   otel-collector:
-    image: otel/opentelemetry-collector-contrib:latest
+    image: otel/opentelemetry-collector-contrib:0.148.0
+    restart: unless-stopped
     command: ["--config", "/etc/otelcol/config.yml"]
     volumes:
       - ./otel-collector-config.yml:/etc/otelcol/config.yml:ro
@@ -11,28 +12,71 @@ services:
       - "4318:4318"   # HTTP (Smithers sends here)
       - "8889:8889"   # Prometheus exporter scrape endpoint
     depends_on:
-      - tempo
+      tempo:
+        condition: service_healthy
+      loki:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "/otelcol-contrib", "--version"]
+      interval: 10s
+      timeout: 5s
+      retries: 12
+      start_period: 10s
+
+  loki:
+    image: grafana/loki:3.3.2
+    restart: unless-stopped
+    command: ["-config.file=/etc/loki/local-config.yaml"]
+    volumes:
+      - ./loki/loki-config.yaml:/etc/loki/local-config.yaml:ro
+      - ./data/loki:/loki
+    ports:
+      - "3100:3100"
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O-", "http://127.0.0.1:3100/ready"]
+      interval: 10s
+      timeout: 5s
+      retries: 18
+      start_period: 10s
 
   prometheus:
     image: prom/prometheus:v3.2.0
+    restart: unless-stopped
     volumes:
       - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - ./data/prometheus:/prometheus
     ports:
       - "9090:9090"
     depends_on:
-      - otel-collector
+      otel-collector:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O-", "http://127.0.0.1:9090/-/ready"]
+      interval: 10s
+      timeout: 5s
+      retries: 18
+      start_period: 10s
 
   tempo:
     image: grafana/tempo:2.7.0
+    restart: unless-stopped
     command: ["-config.file=/etc/tempo/tempo.yml"]
     volumes:
       - ./tempo/tempo.yml:/etc/tempo/tempo.yml:ro
+      - ./data/tempo:/tmp/tempo
     ports:
       - "3200:3200"   # Tempo query API
       - "4317"        # OTLP gRPC (internal)
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O-", "http://127.0.0.1:3200/api/search"]
+      interval: 10s
+      timeout: 5s
+      retries: 24
+      start_period: 20s
 
   grafana:
     image: grafana/grafana:11.5.0
+    restart: unless-stopped
     environment:
       - GF_SECURITY_ADMIN_PASSWORD=admin
       - GF_AUTH_ANONYMOUS_ENABLED=true
@@ -40,8 +84,19 @@ services:
     volumes:
       - ./grafana/provisioning:/etc/grafana/provisioning:ro
       - ./grafana/dashboards:/var/lib/grafana/dashboards:ro
+      - ./data/grafana:/var/lib/grafana
     ports:
       - "3001:3000"
     depends_on:
-      - prometheus
-      - tempo
+      prometheus:
+        condition: service_healthy
+      tempo:
+        condition: service_healthy
+      loki:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O-", "http://127.0.0.1:3000/api/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 24
+      start_period: 20s
diff --git a/observability/grafana/provisioning/datasources/datasources.yml b/observability/grafana/provisioning/datasources/datasources.yml
index 8b8c6244..3c885539 100644
--- a/observability/grafana/provisioning/datasources/datasources.yml
+++ b/observability/grafana/provisioning/datasources/datasources.yml
@@ -14,6 +14,15 @@ datasources:
         - name: traceID
           datasourceUid: tempo
 
+  - name: Loki
+    type: loki
+    uid: loki
+    access: proxy
+    url: http://loki:3100
+    editable: false
+    jsonData:
+      maxLines: 1000
+
   - name: Tempo
     type: tempo
     uid: tempo
@@ -21,6 +30,17 @@ datasources:
     url: http://tempo:3200
     editable: false
     jsonData:
+      tracesToLogs:
+        datasourceUid: loki
+        tags: ["service.name"]
+        mappedTags:
+          - key: service.name
+            value: service.name
+        mapTagNamesEnabled: false
+        spanStartTimeShift: "-1h"
+        spanEndTimeShift: "1h"
+        filterByTraceID: false
+        filterBySpanID: false
       tracesToMetrics:
         datasourceUid: prometheus
         spanStartTimeShift: "-1h"
diff --git a/observability/loki/loki-config.yaml b/observability/loki/loki-config.yaml
new file mode 100644
index 00000000..eb20f384
--- /dev/null
+++ b/observability/loki/loki-config.yaml
@@ -0,0 +1,35 @@
+auth_enabled: false
+
+server:
+  http_listen_port: 3100
+
+common:
+  instance_addr: 127.0.0.1
+  path_prefix: /loki
+  storage:
+    filesystem:
+      chunks_directory: /loki/chunks
+      rules_directory: /loki/rules
+  replication_factor: 1
+  ring:
+    kvstore:
+      store: inmemory
+
+schema_config:
+  configs:
+    - from: 2020-10-24
+      store: tsdb
+      object_store: filesystem
+      schema: v13
+      index:
+        prefix: index_
+        period: 24h
+
+limits_config:
+  allow_structured_metadata: true
+
+ruler:
+  alertmanager_url: http://localhost:9093
+
+analytics:
+  reporting_enabled: false
diff --git a/observability/otel-collector-config.yml b/observability/otel-collector-config.yml
index 68f29b27..daa29aef 100644
--- a/observability/otel-collector-config.yml
+++ b/observability/otel-collector-config.yml
@@ -10,10 +10,12 @@ exporters:
   prometheus:
     endpoint: "0.0.0.0:8889"
     namespace: smithers
-  otlp/tempo:
+  otlp_grpc/tempo:
     endpoint: "tempo:4317"
     tls:
       insecure: true
+  otlp_http/loki:
+    endpoint: "http://loki:3100/otlp"
 
 processors:
   batch:
@@ -29,4 +31,8 @@ service:
     traces:
       receivers: [otlp]
       processors: [batch]
-      exporters: [otlp/tempo]
+      exporters: [otlp_grpc/tempo]
+    logs:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [otlp_http/loki]
diff --git a/scripts/obs-reset.sh b/scripts/obs-reset.sh
new file mode 100755
index 00000000..1dfdf798
--- /dev/null
+++ b/scripts/obs-reset.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
+COMPOSE_FILE="$ROOT_DIR/observability/docker-compose.otel.yml"
+
+if ! command -v docker >/dev/null 2>&1; then
+  echo "docker is required" >&2
+  exit 1
+fi
+
+DATA_DIR="$ROOT_DIR/observability/data"
+
+echo "[obs-reset] bringing stack down and removing volumes..."
+docker compose -f "$COMPOSE_FILE" down -v --remove-orphans || true
+
+echo "[obs-reset] resetting local data dirs..."
+rm -rf "$DATA_DIR"
+mkdir -p "$DATA_DIR"/loki "$DATA_DIR"/tempo "$DATA_DIR"/prometheus "$DATA_DIR"/grafana
+chmod -R 0777 "$DATA_DIR"
+
+echo "[obs-reset] starting fresh stack..."
+docker compose -f "$COMPOSE_FILE" up -d
+
+echo "[obs-reset] waiting for health..."
+"$ROOT_DIR/scripts/obs-wait-healthy.sh"
+
+echo "[obs-reset] stack is healthy"
diff --git a/scripts/obs-wait-healthy.sh b/scripts/obs-wait-healthy.sh
new file mode 100755
index 00000000..57587e0a
--- /dev/null
+++ b/scripts/obs-wait-healthy.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
+COMPOSE_FILE="$ROOT_DIR/observability/docker-compose.otel.yml"
+TIMEOUT_SECONDS="${1:-240}"
+START_TS=$(date +%s)
+
+services=(loki tempo otel-collector prometheus grafana)
+
+status_of() {
+  local container=$1
+  docker inspect "$container" --format '{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' 2>/dev/null || echo "missing"
+}
+
+container_name() {
+  local service=$1
+  docker compose -f "$COMPOSE_FILE" ps -q "$service" | head -n 1
+}
+
+echo "[obs-wait] timeout=${TIMEOUT_SECONDS}s"
+
+while true; do
+  all_ready=1
+  for service in "${services[@]}"; do
+    cid=$(container_name "$service")
+    if [[ -z "$cid" ]]; then
+      echo "[obs-wait] $service: missing"
+      all_ready=0
+      continue
+    fi
+    state=$(status_of "$cid")
+    echo "[obs-wait] $service: $state"
+    if [[ "$state" != "healthy" && "$state" != "running" ]]; then
+      all_ready=0
+    fi
+    if [[ "$service" != "otel-collector" && "$state" != "healthy" ]]; then
+      all_ready=0
+    fi
+  done
+
+  if [[ "$all_ready" == "1" ]]; then
+    break
+  fi
+
+  now=$(date +%s)
+  if (( now - START_TS >= TIMEOUT_SECONDS )); then
+    echo "[obs-wait] timed out waiting for health" >&2
+    docker compose -f "$COMPOSE_FILE" ps >&2 || true
+    exit 1
+  fi
+
+  sleep 5
+  echo
+ done
+
+echo "[obs-wait] endpoint checks"
+curl --max-time 10 -fsS http://localhost:3100/ready >/dev/null
+curl --max-time 10 -fsS http://localhost:9090/-/ready >/dev/null
+curl --max-time 20 -fsS http://localhost:3001/api/health >/dev/null
+
+# Tempo can report not-ready briefly even when query/search already works, so allow retries.
+tempo_ready=0
+for _ in $(seq 1 12); do
+  if curl --max-time 10 -fsS http://localhost:3200/ready >/dev/null; then
+    tempo_ready=1
+    break
+  fi
+  sleep 5
+done
+
+if [[ "$tempo_ready" != "1" ]]; then
+  echo "[obs-wait] tempo never became ready" >&2
+  exit 1
+fi
+
+echo "[obs-wait] healthy"
diff --git a/scripts/verify-observability.sh b/scripts/verify-observability.sh
new file mode 100755
index 00000000..e4e1f194
--- /dev/null
+++ b/scripts/verify-observability.sh
@@ -0,0 +1,229 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
+COMPOSE_FILE="$ROOT_DIR/observability/docker-compose.otel.yml"
+STAMP=$(date +%Y%m%d-%H%M%S)
+OUT_DIR="$ROOT_DIR/tmp/verification/$STAMP"
+WORKFLOW_PATH="$ROOT_DIR/workflows/agent-trace-otel-demo.tsx"
+RUN_ID="agent-trace-otel-demo-$STAMP"
+FAIL_RUN_ID="agent-trace-otel-demo-fail-$STAMP"
+mkdir -p "$OUT_DIR"
+
+export SMITHERS_OTEL_ENABLED=1
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+export OTEL_SERVICE_NAME=smithers-dev
+export SMITHERS_LOG_FORMAT=json
+
+run_success() {
+  bun run "$ROOT_DIR/src/cli/index.ts" run "$WORKFLOW_PATH" --run-id "$RUN_ID" --annotations '{"custom.demo":true,"custom.ticket":"OBS-123"}'
+}
+
+run_failure() {
+  bun run "$ROOT_DIR/src/cli/index.ts" run "$WORKFLOW_PATH" --run-id "$FAIL_RUN_ID" --input '{"failureMode":"malformed-json"}' --annotations '{"custom.demo":true,"custom.ticket":"OBS-ERR"}'
+}
+
+retry_until() {
+  local name=$1
+  local attempts=$2
+  local validator=$3
+  local command=$4
+  local delay=2
+  local out="$OUT_DIR/$name.json"
+
+  for attempt in $(seq 1 "$attempts"); do
+    echo "[verify] $name attempt $attempt/$attempts"
+    eval "$command" > "$out.tmp"
+    if jq -e "$validator" "$out.tmp" >/dev/null 2>&1; then
+      mv "$out.tmp" "$out"
+      return 0
+    fi
+    sleep "$delay"
+    delay=$((delay * 2))
+  done
+
+  echo "[verify] failed: $name" >&2
+  cat "$out.tmp" >&2 || true
+  return 1
+}
+
+loki_query() {
+  local query=$1
+  local limit=${2:-200}
+  curl --max-time 30 -sG 'http://localhost:3100/loki/api/v1/query_range' \
+    --data-urlencode "query=$query" \
+    --data-urlencode "limit=$limit"
+}
+
+tempo_search() {
+  curl --max-time 30 -s 'http://localhost:3200/api/search'
+}
+
+tempo_trace() {
+  local trace_id=$1
+  curl --max-time 30 -s "http://localhost:3200/api/traces/$trace_id"
+}
+
+append_json() {
+  local title=$1
+  local path=$2
+  {
+    echo
+    echo "## $title"
+    echo
+    echo '```json'
+    cat "$path"
+    echo
+    echo '```'
+  } >> "$OUT_DIR/report.md"
+}
+
+trace_summary_json() {
+  local path=$1
+  node -e "const fs=require('fs'); const rows=fs.readFileSync(process.argv[1],'utf8').trim().split('\n').map(JSON.parse); console.log(JSON.stringify(rows.at(-1).summary));" "$path"
+}
+
+echo "[verify] reset stack"
+"$ROOT_DIR/scripts/obs-reset.sh"
+
+echo "[verify] capture stack state"
+docker compose -f "$COMPOSE_FILE" ps > "$OUT_DIR/compose-ps.txt"
+curl --max-time 20 -s http://localhost:3001/api/health | jq . > "$OUT_DIR/grafana-health.json"
+curl --max-time 20 -s http://localhost:3001/api/datasources | jq 'map({name,type,url,uid})' > "$OUT_DIR/grafana-datasources.json"
+curl --max-time 20 -s http://localhost:9090/api/v1/query --data-urlencode 'query=up' | jq . > "$OUT_DIR/prometheus-up.json"
+
+echo "[verify] run success workflow"
+run_success | tee "$OUT_DIR/success-run.txt"
+
+echo "[verify] run failure workflow"
+set +e
+run_failure | tee "$OUT_DIR/failure-run.txt"
+FAIL_EXIT=$?
+set -e
+printf '%s\n' "$FAIL_EXIT" > "$OUT_DIR/failure-exit-code.txt"
+
+sleep 6
+
+retry_until loki-all-events 6 '.streams >= 2' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\"' 200 | jq '{streams:(.data.result|length), counts:(.data.result|map(.values|length)), lines:[.data.result[]?.values[]?[1]][0:5]}'"
+
+retry_until loki-node-attempt 6 '.streams >= 10' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"pi-rich-trace\" | node_attempt=\"1\"' 200 | jq '{streams:(.data.result|length), counts:(.data.result|map(.values|length))}'"
+
+retry_until loki-claude-structured 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"claude-structured-trace\"' 50 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]][0:10]}'"
+
+retry_until loki-gemini-structured 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"gemini-structured-trace\"' 50 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]][0:10]}'"
+
+retry_until loki-codex-structured 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"codex-structured-trace\"' 50 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]][0:10]}'"
+
+retry_until loki-thinking 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | event_kind=\"assistant.thinking.delta\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-tool-start 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | event_kind=\"tool.execution.start\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-tool-update 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | event_kind=\"tool.execution.update\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-tool-end 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | event_kind=\"tool.execution.end\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-sdk-final-only 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"sdk-final-only\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-claude-message-update 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"claude-structured-trace\" | event_kind=\"message.update\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-gemini-text-delta 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"gemini-structured-trace\" | event_kind=\"assistant.text.delta\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-claude-usage 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"claude-structured-trace\" | event_kind=\"usage\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-gemini-usage 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"gemini-structured-trace\" | event_kind=\"usage\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-codex-text-delta 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"codex-structured-trace\" | event_kind=\"assistant.text.delta\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-codex-usage 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" | node_id=\"codex-structured-trace\" | event_kind=\"usage\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until trace-summary-claude 6 '.captureMode == "cli-json-stream" and .traceCompleteness == "full-observed" and (.missingExpectedEventKinds | length) == 0' \
+  "trace_summary_json '$ROOT_DIR/workflows/.smithers/executions/$RUN_ID/logs/agent-trace/claude-structured-trace-0-1.ndjson'"
+
+retry_until trace-summary-codex 6 '.captureMode == "cli-json-stream" and .traceCompleteness == "full-observed" and (.missingExpectedEventKinds | length) == 0' \
+  "trace_summary_json '$ROOT_DIR/workflows/.smithers/executions/$RUN_ID/logs/agent-trace/codex-structured-trace-0-1.ndjson'"
+
+retry_until loki-capture-errors 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$FAIL_RUN_ID\" | event_kind=\"capture.error\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-redaction-presence 6 '.streams >= 1' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" |= \"REDACTED_SECRET\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until loki-secret-absence 6 '.streams == 0' \
+  "loki_query '{service_name=\"smithers-dev\"} | run_id=\"$RUN_ID\" |= \"sk_demo_secret_123456789\"' 20 | jq '{streams:(.data.result|length), lines:[.data.result[]?.values[]?[1]]}'"
+
+retry_until tempo-search 6 '.smithers | length >= 1' \
+  "tempo_search | jq '{smithers:[.traces[] | select(.rootServiceName==\"smithers-dev\") | {traceID, rootServiceName, rootTraceName}]}'"
+
+TRACE_ID=""
+for candidate in $(jq -r '.smithers[]?.traceID' "$OUT_DIR/tempo-search.json"); do
+  if tempo_trace "$candidate" \
+    | jq '{resourceAttrs:[.batches[].resource.attributes], spans:[.batches[].scopeSpans[].spans[] | {name, attrs:.attributes}]}' > "$OUT_DIR/tempo-trace-candidate.json.tmp"; then
+    if jq -e ".spans | any(.attrs | any(.key==\"runId\" and .value.stringValue==\"$RUN_ID\"))" "$OUT_DIR/tempo-trace-candidate.json.tmp" >/dev/null 2>&1; then
+      TRACE_ID="$candidate"
+      mv "$OUT_DIR/tempo-trace-candidate.json.tmp" "$OUT_DIR/tempo-trace-with-runid.json"
+      break
+    fi
+  fi
+done
+
+if [[ -z "$TRACE_ID" ]]; then
+  echo "[verify] failed: unable to find Tempo trace for success run id $RUN_ID" >&2
+  exit 1
+fi
+
+cat > "$OUT_DIR/report.md" <<EOF
+# Observability verification bundle
+
+- script: scripts/verify-observability.sh
+- workflow: $WORKFLOW_PATH
+- success run id: $RUN_ID
+- failure run id: $FAIL_RUN_ID
+- success agents exercised: pi-rich-trace, claude-structured-trace, gemini-structured-trace, codex-structured-trace, sdk-final-only
+
+EOF
+
+append_json grafana-health "$OUT_DIR/grafana-health.json"
+append_json grafana-datasources "$OUT_DIR/grafana-datasources.json"
+append_json prometheus-up "$OUT_DIR/prometheus-up.json"
+append_json loki-all-events "$OUT_DIR/loki-all-events.json"
+append_json loki-node-attempt "$OUT_DIR/loki-node-attempt.json"
+append_json loki-claude-structured "$OUT_DIR/loki-claude-structured.json"
+append_json loki-gemini-structured "$OUT_DIR/loki-gemini-structured.json"
+append_json loki-codex-structured "$OUT_DIR/loki-codex-structured.json"
+append_json loki-thinking "$OUT_DIR/loki-thinking.json"
+append_json loki-tool-start "$OUT_DIR/loki-tool-start.json"
+append_json loki-tool-update "$OUT_DIR/loki-tool-update.json"
+append_json loki-tool-end "$OUT_DIR/loki-tool-end.json"
+append_json loki-sdk-final-only "$OUT_DIR/loki-sdk-final-only.json"
+append_json loki-claude-message-update "$OUT_DIR/loki-claude-message-update.json"
+append_json loki-gemini-text-delta "$OUT_DIR/loki-gemini-text-delta.json"
+append_json loki-claude-usage "$OUT_DIR/loki-claude-usage.json"
+append_json loki-gemini-usage "$OUT_DIR/loki-gemini-usage.json"
+append_json loki-codex-text-delta "$OUT_DIR/loki-codex-text-delta.json"
+append_json loki-codex-usage "$OUT_DIR/loki-codex-usage.json"
+append_json trace-summary-claude "$OUT_DIR/trace-summary-claude.json"
+append_json trace-summary-codex "$OUT_DIR/trace-summary-codex.json"
+append_json loki-capture-errors "$OUT_DIR/loki-capture-errors.json"
+append_json loki-redaction-presence "$OUT_DIR/loki-redaction-presence.json"
+append_json loki-secret-absence "$OUT_DIR/loki-secret-absence.json"
+append_json tempo-search "$OUT_DIR/tempo-search.json"
+append_json tempo-trace-with-runid "$OUT_DIR/tempo-trace-with-runid.json"
+
+echo "[verify] wrote evidence bundle to $OUT_DIR"
+echo "$OUT_DIR"
diff --git a/src/RunOptions.ts b/src/RunOptions.ts
index 767424c9..ae614ea7 100644
--- a/src/RunOptions.ts
+++ b/src/RunOptions.ts
@@ -27,4 +27,5 @@ export type RunOptions = {
   maxOutputBytes?: number;
   toolTimeoutMs?: number;
   hot?: boolean | HotReloadOptions;
+  annotations?: Record<string, string | number | boolean>;
 };
diff --git a/src/SmithersEvent.ts b/src/SmithersEvent.ts
index 6b25e4ff..87e73384 100644
--- a/src/SmithersEvent.ts
+++ b/src/SmithersEvent.ts
@@ -1,4 +1,5 @@
 import type { RunStatus } from "./RunStatus";
+import type { AgentTraceSummary, CanonicalAgentTraceEvent } from "./agent-trace";
 
 export type SmithersEvent =
   | { type: "RunStarted"; runId: string; timestampMs: number }
@@ -108,6 +109,7 @@ export type SmithersEvent =
       nodeId: string;
       iteration: number;
       attempt: number;
+      toolCallId: string;
       toolName: string;
       seq: number;
       timestampMs: number;
@@ -118,6 +120,7 @@ export type SmithersEvent =
       nodeId: string;
       iteration: number;
       attempt: number;
+      toolCallId: string;
       toolName: string;
       seq: number;
       status: "success" | "error";
@@ -194,4 +197,22 @@ export type SmithersEvent =
       cacheWriteTokens?: number;
       reasoningTokens?: number;
       timestampMs: number;
+    }
+  | {
+      type: "AgentTraceEvent";
+      runId: string;
+      nodeId: string;
+      iteration: number;
+      attempt: number;
+      trace: CanonicalAgentTraceEvent;
+      timestampMs: number;
+    }
+  | {
+      type: "AgentTraceSummary";
+      runId: string;
+      nodeId: string;
+      iteration: number;
+      attempt: number;
+      summary: AgentTraceSummary;
+      timestampMs: number;
     };
diff --git a/src/agent-trace.ts b/src/agent-trace.ts
new file mode 100644
index 00000000..5c509a03
--- /dev/null
+++ b/src/agent-trace.ts
@@ -0,0 +1,1687 @@
+import { appendFile, mkdir, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import type { SmithersEvent } from "./SmithersEvent";
+import type { EventBus } from "./events";
+import { logErrorAwait, logInfoAwait, logWarningAwait } from "./effect/logging";
+import { extractTextFromJsonValue } from "./utils/text-extraction";
+import { nowMs } from "./utils/time";
+import { normalizeTokenUsage } from "./utils/usage";
+
+export type AgentFamily =
+  | "pi"
+  | "codex"
+  | "claude-code"
+  | "gemini"
+  | "kimi"
+  | "openai"
+  | "anthropic"
+  | "amp"
+  | "forge"
+  | "unknown";
+
+export type AgentCaptureMode =
+  | "sdk-events"
+  | "rpc-events"
+  | "cli-json-stream"
+  | "cli-json"
+  | "cli-text"
+  | "artifact-import";
+
+export type TraceCompleteness =
+  | "full-observed"
+  | "partial-observed"
+  | "final-only"
+  | "capture-failed";
+
+export type CanonicalAgentTraceEventKind =
+  | "session.start"
+  | "session.end"
+  | "turn.start"
+  | "turn.end"
+  | "message.start"
+  | "message.update"
+  | "message.end"
+  | "assistant.text.delta"
+  | "assistant.thinking.delta"
+  | "assistant.message.final"
+  | "tool.execution.start"
+  | "tool.execution.update"
+  | "tool.execution.end"
+  | "tool.result"
+  | "retry.start"
+  | "retry.end"
+  | "compaction.start"
+  | "compaction.end"
+  | "stderr"
+  | "stdout"
+  | "usage"
+  | "capture.warning"
+  | "capture.error"
+  | "artifact.created";
+
+export type CanonicalAgentTraceEvent = {
+  traceVersion: "1";
+  runId: string;
+  workflowPath?: string;
+  workflowHash?: string;
+  nodeId: string;
+  iteration: number;
+  attempt: number;
+  timestampMs: number;
+  event: {
+    sequence: number;
+    kind: CanonicalAgentTraceEventKind;
+    phase:
+      | "agent"
+      | "turn"
+      | "message"
+      | "tool"
+      | "session"
+      | "capture"
+      | "artifact";
+  };
+  source: {
+    agentFamily: AgentFamily;
+    captureMode: AgentCaptureMode;
+    rawType?: string;
+    rawEventId?: string;
+    observed: boolean;
+  };
+  traceCompleteness: TraceCompleteness;
+  payload: Record<string, unknown> | null;
+  raw: unknown;
+  redaction: {
+    applied: boolean;
+    ruleIds: string[];
+  };
+  annotations: Record<string, string | number | boolean>;
+};
+
+export type AgentTraceSummary = {
+  traceVersion: "1";
+  runId: string;
+  workflowPath?: string;
+  workflowHash?: string;
+  nodeId: string;
+  iteration: number;
+  attempt: number;
+  traceStartedAtMs: number;
+  traceFinishedAtMs: number;
+  agentFamily: AgentFamily;
+  agentId?: string;
+  model?: string;
+  captureMode: AgentCaptureMode;
+  traceCompleteness: TraceCompleteness;
+  unsupportedEventKinds: CanonicalAgentTraceEventKind[];
+  missingExpectedEventKinds: CanonicalAgentTraceEventKind[];
+  rawArtifactRefs: string[];
+};
+
+export type AgentTraceSmithersEvent = {
+  type: "AgentTraceEvent";
+  runId: string;
+  nodeId: string;
+  iteration: number;
+  attempt: number;
+  trace: CanonicalAgentTraceEvent;
+  timestampMs: number;
+};
+
+export type AgentTraceSummarySmithersEvent = {
+  type: "AgentTraceSummary";
+  runId: string;
+  nodeId: string;
+  iteration: number;
+  attempt: number;
+  summary: AgentTraceSummary;
+  timestampMs: number;
+};
+
+export type AgentTraceCapabilityProfile = {
+  sessionMetadata: boolean;
+  assistantTextDeltas: boolean;
+  visibleThinkingDeltas: boolean;
+  finalAssistantMessage: boolean;
+  toolExecutionStart: boolean;
+  toolExecutionUpdate: boolean;
+  toolExecutionEnd: boolean;
+  retryEvents: boolean;
+  compactionEvents: boolean;
+  rawStderrDiagnostics: boolean;
+  persistedSessionArtifact: boolean;
+};
+
+export const agentTraceCapabilities: Record<
+  AgentFamily,
+  AgentTraceCapabilityProfile
+> = {
+  pi: {
+    sessionMetadata: true,
+    assistantTextDeltas: true,
+    visibleThinkingDeltas: true,
+    finalAssistantMessage: true,
+    toolExecutionStart: true,
+    toolExecutionUpdate: true,
+    toolExecutionEnd: true,
+    retryEvents: true,
+    compactionEvents: true,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: true,
+  },
+  codex: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: false,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: false,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: false,
+  },
+  "claude-code": {
+    sessionMetadata: false,
+    assistantTextDeltas: true,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: false,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: false,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: false,
+  },
+  gemini: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: false,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: false,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: false,
+  },
+  kimi: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: false,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: false,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: false,
+  },
+  openai: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: true,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: true,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: false,
+    persistedSessionArtifact: false,
+  },
+  anthropic: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: true,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: true,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: false,
+    persistedSessionArtifact: false,
+  },
+  amp: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: true,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: true,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: false,
+  },
+  forge: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: true,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: true,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: false,
+  },
+  unknown: {
+    sessionMetadata: false,
+    assistantTextDeltas: false,
+    visibleThinkingDeltas: false,
+    finalAssistantMessage: true,
+    toolExecutionStart: true,
+    toolExecutionUpdate: false,
+    toolExecutionEnd: true,
+    retryEvents: false,
+    compactionEvents: false,
+    rawStderrDiagnostics: true,
+    persistedSessionArtifact: false,
+  },
+};
+
+const capabilityKindMap: Array<
+  [keyof AgentTraceCapabilityProfile, CanonicalAgentTraceEventKind[]]
+> = [
+  ["sessionMetadata", ["session.start", "session.end"]],
+  ["assistantTextDeltas", ["assistant.text.delta"]],
+  ["visibleThinkingDeltas", ["assistant.thinking.delta"]],
+  ["finalAssistantMessage", ["assistant.message.final"]],
+  ["toolExecutionStart", ["tool.execution.start"]],
+  ["toolExecutionUpdate", ["tool.execution.update"]],
+  ["toolExecutionEnd", ["tool.execution.end", "tool.result"]],
+  ["retryEvents", ["retry.start", "retry.end"]],
+  ["compactionEvents", ["compaction.start", "compaction.end"]],
+  ["rawStderrDiagnostics", ["stderr"]],
+  ["persistedSessionArtifact", ["artifact.created"]],
+];
+
+export function resolveAgentTraceCapabilities(
+  agentFamily: AgentFamily,
+  captureMode: AgentCaptureMode,
+): AgentTraceCapabilityProfile {
+  const base = {
+    ...agentTraceCapabilities[agentFamily],
+    // Smithers persists a canonical NDJSON trace artifact for every successful
+    // flush regardless of the upstream agent family.
+    persistedSessionArtifact: true,
+  };
+
+  if (captureMode === "sdk-events" || captureMode === "cli-text") {
+    return base;
+  }
+
+  if (agentFamily === "codex") {
+    return {
+      ...base,
+      assistantTextDeltas: captureMode === "cli-json-stream",
+      toolExecutionStart: captureMode === "cli-json-stream",
+      toolExecutionUpdate: captureMode === "cli-json-stream",
+      toolExecutionEnd: captureMode === "cli-json-stream",
+    };
+  }
+
+  if (agentFamily === "claude-code") {
+    return {
+      ...base,
+      toolExecutionStart: captureMode === "cli-json-stream",
+      toolExecutionUpdate: captureMode === "cli-json-stream",
+      toolExecutionEnd: captureMode === "cli-json-stream",
+    };
+  }
+
+  if (agentFamily === "gemini") {
+    return {
+      ...base,
+      assistantTextDeltas: captureMode === "cli-json-stream",
+      toolExecutionStart: captureMode === "cli-json-stream",
+      toolExecutionUpdate: captureMode === "cli-json-stream",
+      toolExecutionEnd: captureMode === "cli-json-stream",
+    };
+  }
+
+  if (agentFamily === "kimi") {
+    return {
+      ...base,
+      assistantTextDeltas: captureMode === "cli-json-stream",
+      toolExecutionStart: captureMode === "cli-json-stream",
+      toolExecutionUpdate: captureMode === "cli-json-stream",
+      toolExecutionEnd: captureMode === "cli-json-stream",
+    };
+  }
+
+  return base;
+}
+
+function unsupportedKindsForCapabilities(
+  profile: AgentTraceCapabilityProfile,
+): CanonicalAgentTraceEventKind[] {
+  const kinds: CanonicalAgentTraceEventKind[] = [];
+  for (const [field, mappedKinds] of capabilityKindMap) {
+    if (!profile[field]) kinds.push(...mappedKinds);
+  }
+  return kinds;
+}
+
+export function detectAgentFamily(agent: any): AgentFamily {
+  const constructorName = String(agent?.constructor?.name ?? "").toLowerCase();
+  const idName = String(agent?.id ?? "").toLowerCase();
+  const name =
+    constructorName && constructorName !== "object"
+      ? `${constructorName} ${idName}`
+      : idName;
+  if (name.includes("pi")) return "pi";
+  if (name.includes("codex")) return "codex";
+  if (name.includes("claude")) return "claude-code";
+  if (name.includes("gemini")) return "gemini";
+  if (name.includes("kimi")) return "kimi";
+  if (name.includes("openai")) return "openai";
+  if (name.includes("anthropic")) return "anthropic";
+  if (name.includes("amp")) return "amp";
+  if (name.includes("forge")) return "forge";
+  return "unknown";
+}
+
+export function detectCaptureMode(agent: any): AgentCaptureMode {
+  const family = detectAgentFamily(agent);
+  const mode = agent?.opts?.mode ?? agent?.mode;
+  if (family === "pi") {
+    if (mode === "rpc") return "rpc-events";
+    if (mode === "json") return "cli-json-stream";
+    return "cli-text";
+  }
+  const outputFormat = agent?.opts?.outputFormat ?? agent?.outputFormat;
+  if (family === "openai" || family === "anthropic") return "sdk-events";
+  if (outputFormat === "stream-json") return "cli-json-stream";
+  if (outputFormat === "json" || agent?.opts?.json) return "cli-json";
+  return "cli-text";
+}
+
+function normalizeAnnotations(
+  annotations?: Record<string, string | number | boolean>,
+): Record<string, string | number | boolean> {
+  const normalized: Record<string, string | number | boolean> = {};
+  for (const [key, value] of Object.entries(annotations ?? {})) {
+    if (["string", "number", "boolean"].includes(typeof value)) {
+      normalized[key] = value as string | number | boolean;
+    }
+  }
+  return normalized;
+}
+
+function redactValue(value: unknown): {
+  value: unknown;
+  applied: boolean;
+  ruleIds: string[];
+} {
+  const rules: Array<{ id: string; pattern: RegExp; replace: string }> = [
+    {
+      id: "api-key",
+      pattern: /\b(?:sk|pk)_[A-Za-z0-9_-]{8,}\b/g,
+      replace: "[REDACTED_API_KEY]",
+    },
+    {
+      id: "bearer-token",
+      pattern: /Bearer\s+[A-Za-z0-9._-]{8,}/gi,
+      replace: "Bearer [REDACTED_TOKEN]",
+    },
+    {
+      id: "auth-header",
+      pattern: /"authorization"\s*:\s*"[^"]+"/gi,
+      replace: '"authorization":"[REDACTED]"',
+    },
+    {
+      id: "cookie-header",
+      pattern: /"cookie"\s*:\s*"[^"]+"/gi,
+      replace: '"cookie":"[REDACTED]"',
+    },
+    {
+      id: "secret-ish",
+      pattern: /\b(?:api[_-]?key|token|secret|password)=([^\s"']+)/gi,
+      replace: "$&" as unknown as string,
+    },
+  ];
+  const input =
+    typeof value === "string" ? value : JSON.stringify(value ?? null);
+  let next = input;
+  const applied = new Set<string>();
+  for (const rule of rules) {
+    const replaced = next.replace(rule.pattern, (match) => {
+      applied.add(rule.id);
+      if (rule.id === "secret-ish") {
+        const idx = match.indexOf("=");
+        return `${match.slice(0, idx + 1)}[REDACTED_SECRET]`;
+      }
+      return rule.replace;
+    });
+    next = replaced;
+  }
+  if (applied.size === 0) return { value, applied: false, ruleIds: [] };
+  if (typeof value === "string")
+    return { value: next, applied: true, ruleIds: [...applied] };
+  try {
+    return { value: JSON.parse(next), applied: true, ruleIds: [...applied] };
+  } catch {
+    return { value: next, applied: true, ruleIds: [...applied] };
+  }
+}
+
+function kindPhase(
+  kind: CanonicalAgentTraceEventKind,
+): CanonicalAgentTraceEvent["event"]["phase"] {
+  if (kind.startsWith("session.")) return "session";
+  if (kind.startsWith("turn.")) return "turn";
+  if (kind.startsWith("message.") || kind.startsWith("assistant."))
+    return "message";
+  if (kind.startsWith("tool.")) return "tool";
+  if (kind.startsWith("artifact.")) return "artifact";
+  return "capture";
+}
+
+type PayloadKind = "message" | "tool" | "pi" | "none";
+
+type MappedStructuredEvent = {
+  kind: CanonicalAgentTraceEventKind;
+  payloadKind: PayloadKind;
+  expect?: CanonicalAgentTraceEventKind;
+};
+
+type NormalizedTraceEvent = {
+  kind: CanonicalAgentTraceEventKind;
+  payload: Record<string, unknown> | null;
+  raw: unknown;
+  rawType?: string;
+};
+
+type NormalizedTraceBatch = {
+  events: NormalizedTraceEvent[];
+  expectedKinds?: CanonicalAgentTraceEventKind[];
+};
+
+const piSimpleEventMap: Record<string, MappedStructuredEvent> = {
+  session: { kind: "session.start", payloadKind: "pi" },
+  agent_start: { kind: "session.start", payloadKind: "pi" },
+  agent_end: { kind: "session.end", payloadKind: "pi" },
+  turn_start: {
+    kind: "turn.start",
+    payloadKind: "pi",
+    expect: "turn.end",
+  },
+  message_start: { kind: "message.start", payloadKind: "pi" },
+  tool_execution_start: {
+    kind: "tool.execution.start",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  tool_execution_update: {
+    kind: "tool.execution.update",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  tool_execution_end: { kind: "tool.execution.end", payloadKind: "tool" },
+  auto_compaction_start: { kind: "compaction.start", payloadKind: "pi" },
+  auto_compaction_end: { kind: "compaction.end", payloadKind: "pi" },
+  auto_retry_start: { kind: "retry.start", payloadKind: "pi" },
+  auto_retry_end: { kind: "retry.end", payloadKind: "pi" },
+};
+
+const genericStructuredEventMap: Record<string, MappedStructuredEvent> = {
+  message_start: { kind: "message.start", payloadKind: "message" },
+  assistant_message_start: { kind: "message.start", payloadKind: "message" },
+  "response.started": { kind: "message.start", payloadKind: "message" },
+  tool_call_start: {
+    kind: "tool.execution.start",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  tool_execution_start: {
+    kind: "tool.execution.start",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  "tool_call.started": {
+    kind: "tool.execution.start",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  tool_call_delta: {
+    kind: "tool.execution.update",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  tool_call_update: {
+    kind: "tool.execution.update",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  tool_execution_update: {
+    kind: "tool.execution.update",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  "tool_call.delta": {
+    kind: "tool.execution.update",
+    payloadKind: "tool",
+    expect: "tool.execution.end",
+  },
+  tool_call_end: { kind: "tool.execution.end", payloadKind: "tool" },
+  tool_execution_end: { kind: "tool.execution.end", payloadKind: "tool" },
+  "tool_call.completed": { kind: "tool.execution.end", payloadKind: "tool" },
+  tool_result: { kind: "tool.result", payloadKind: "tool" },
+  "tool_result.completed": { kind: "tool.result", payloadKind: "tool" },
+};
+
+function extractGenericDeltaText(parsed: any): string | undefined {
+  const candidates = [
+    parsed?.delta?.text,
+    parsed?.delta,
+    parsed?.text,
+    parsed?.content_block?.text,
+    parsed?.contentBlock?.text,
+    parsed?.message?.text,
+    parsed?.message?.content,
+    parsed?.output_text,
+  ];
+  for (const candidate of candidates) {
+    if (typeof candidate === "string" && candidate) return candidate;
+  }
+  return undefined;
+}
+
+function extractGenericMessageText(parsed: any): string | undefined {
+  return extractTextFromJsonValue(
+    parsed?.message ?? parsed?.response ?? parsed?.item ?? parsed,
+  );
+}
+
+function extractGenericMessagePayload(parsed: any): Record<string, unknown> {
+  const payload: Record<string, unknown> = {};
+  const role =
+    parsed?.message?.role ?? parsed?.role ?? parsed?.response?.role;
+  if (typeof role === "string") payload.role = role;
+  const text = extractGenericMessageText(parsed);
+  if (text) payload.text = text;
+  if (parsed?.id) payload.id = parsed.id;
+  return payload;
+}
+
+function extractGenericToolPayload(parsed: any): Record<string, unknown> {
+  const tool =
+    parsed?.tool ??
+    parsed?.toolCall ??
+    parsed?.tool_call ??
+    parsed?.toolExecution ??
+    parsed;
+  return {
+    toolCallId: tool?.id ?? tool?.toolCallId ?? parsed?.id,
+    toolName: tool?.name ?? tool?.toolName ?? parsed?.toolName,
+    argsPreview: tool?.args ?? tool?.arguments ?? parsed?.args,
+    resultPreview: tool?.result ?? parsed?.result,
+    isError: Boolean(tool?.isError ?? parsed?.isError ?? parsed?.error),
+  };
+}
+
+function extractPiPayload(parsed: any): Record<string, unknown> {
+  const payload: Record<string, unknown> = {};
+  if (parsed?.message?.role) payload.role = parsed.message.role;
+  const text = extractGenericMessageText(parsed?.message);
+  if (text) payload.text = text;
+  if (parsed?.id) payload.id = parsed.id;
+  return payload;
+}
+
+function extractMappedPayload(
+  parsed: any,
+  payloadKind: PayloadKind,
+): Record<string, unknown> | null {
+  if (payloadKind === "message") return extractGenericMessagePayload(parsed);
+  if (payloadKind === "tool") return extractGenericToolPayload(parsed);
+  if (payloadKind === "pi") return extractPiPayload(parsed);
+  return {};
+}
+
+function buildNormalizedEvent(
+  kind: CanonicalAgentTraceEventKind,
+  payload: Record<string, unknown> | null,
+  raw: unknown,
+  rawType?: string,
+): NormalizedTraceEvent {
+  return { kind, payload, raw, rawType };
+}
+
+function normalizeMappedEvent(
+  parsed: any,
+  rawType: string,
+  mapped: MappedStructuredEvent,
+): NormalizedTraceBatch {
+  return {
+    events: [
+      buildNormalizedEvent(
+        mapped.kind,
+        extractMappedPayload(parsed, mapped.payloadKind),
+        parsed,
+        rawType,
+      ),
+    ],
+    expectedKinds: mapped.expect ? [mapped.expect] : undefined,
+  };
+}
+
+function normalizeClaudeStructuredEvent(
+  parsed: any,
+  rawType: string,
+): NormalizedTraceBatch | null {
+  if (rawType === "assistant") {
+    const text = extractGenericMessageText(parsed?.message ?? parsed);
+    const events = text
+      ? [
+          buildNormalizedEvent(
+            "message.update",
+            extractGenericMessagePayload(parsed?.message ?? parsed),
+            parsed,
+            rawType,
+          ),
+        ]
+      : [
+          buildNormalizedEvent("stdout", { eventType: rawType }, parsed, rawType),
+        ];
+    const usage = normalizeTokenUsage(parsed?.message?.usage);
+    if (usage) events.push(buildNormalizedEvent("usage", usage, parsed, rawType));
+    return { events };
+  }
+
+  if (rawType === "result") {
+    const events: NormalizedTraceEvent[] = [];
+    const usage = normalizeTokenUsage(parsed?.usage);
+    if (usage) events.push(buildNormalizedEvent("usage", usage, parsed, rawType));
+    const text = extractGenericMessageText(parsed);
+    if (text) {
+      events.push(
+        buildNormalizedEvent("assistant.message.final", { text }, parsed, rawType),
+      );
+    }
+    return events.length > 0 ? { events } : null;
+  }
+
+  return null;
+}
+
+function normalizeGeminiStructuredEvent(
+  parsed: any,
+  rawType: string,
+): NormalizedTraceBatch | null {
+  if (rawType === "message") {
+    const text = extractGenericMessageText(parsed);
+    if (parsed?.role === "assistant" && typeof text === "string" && text) {
+      return {
+        events: [
+          buildNormalizedEvent(
+            parsed?.delta ? "assistant.text.delta" : "assistant.message.final",
+            { text },
+            parsed,
+            rawType,
+          ),
+        ],
+      };
+    }
+  }
+
+  if (rawType === "result" && parsed?.stats) {
+    const usage = normalizeTokenUsage(parsed.stats);
+    return usage
+      ? { events: [buildNormalizedEvent("usage", usage, parsed, rawType)] }
+      : null;
+  }
+
+  return null;
+}
+
+function normalizeCodexStructuredEvent(
+  parsed: any,
+  rawType: string,
+): NormalizedTraceBatch | null {
+  if (rawType === "thread.started") {
+    return {
+      events: [buildNormalizedEvent("stdout", { eventType: rawType }, parsed, rawType)],
+    };
+  }
+
+  if (rawType === "turn.started") {
+    return {
+      events: [buildNormalizedEvent("turn.start", {}, parsed, rawType)],
+      expectedKinds: ["turn.end"],
+    };
+  }
+
+  if (rawType === "item.completed" && parsed?.item?.type === "agent_message") {
+    const text = extractGenericMessageText(parsed.item);
+    if (typeof text === "string" && text) {
+      return {
+        events: [
+          buildNormalizedEvent(
+            "assistant.message.final",
+            { text },
+            parsed,
+            rawType,
+          ),
+        ],
+      };
+    }
+  }
+
+  if (rawType === "turn.completed") {
+    const events: NormalizedTraceEvent[] = [];
+    const usage = normalizeTokenUsage(parsed?.usage);
+    if (usage) events.push(buildNormalizedEvent("usage", usage, parsed, rawType));
+    events.push(buildNormalizedEvent("turn.end", {}, parsed, rawType));
+    const text = extractGenericMessageText(parsed);
+    if (text) {
+      events.push(
+        buildNormalizedEvent("assistant.message.final", { text }, parsed, rawType),
+      );
+    }
+    return { events };
+  }
+
+  return null;
+}
+
+function normalizePiStructuredEvent(
+  parsed: any,
+  rawType: string,
+): NormalizedTraceBatch | null {
+  const simple = piSimpleEventMap[rawType];
+  if (simple) return normalizeMappedEvent(parsed, rawType, simple);
+
+  if (rawType === "turn_end") {
+    const events: NormalizedTraceEvent[] = [
+      buildNormalizedEvent("turn.end", extractPiPayload(parsed), parsed, rawType),
+    ];
+    const text = extractGenericMessageText(parsed?.message);
+    if (text) {
+      events.push(
+        buildNormalizedEvent(
+          "assistant.message.final",
+          { text },
+          parsed?.message,
+          rawType,
+        ),
+      );
+    }
+    const usage = normalizeTokenUsage(parsed?.message?.usage);
+    if (usage) events.push(buildNormalizedEvent("usage", usage, parsed.message.usage, "usage"));
+    return { events };
+  }
+
+  if (rawType === "message_end") {
+    const events: NormalizedTraceEvent[] = [
+      buildNormalizedEvent("message.end", extractPiPayload(parsed), parsed, rawType),
+    ];
+    const text = extractGenericMessageText(parsed?.message);
+    if (parsed?.message?.role === "assistant" && text) {
+      events.push(
+        buildNormalizedEvent(
+          "assistant.message.final",
+          { text },
+          parsed?.message,
+          rawType,
+        ),
+      );
+    }
+    return { events };
+  }
+
+  if (rawType === "message_update") {
+    const evt = parsed?.assistantMessageEvent;
+    if (evt?.type === "text_delta" && typeof evt.delta === "string") {
+      return {
+        events: [
+          buildNormalizedEvent(
+            "assistant.text.delta",
+            { text: evt.delta },
+            parsed,
+            evt.type,
+          ),
+        ],
+      };
+    }
+    if (
+      (evt?.type === "thinking_delta" || evt?.type === "reasoning_delta") &&
+      typeof evt.delta === "string"
+    ) {
+      return {
+        events: [
+          buildNormalizedEvent(
+            "assistant.thinking.delta",
+            { text: evt.delta },
+            parsed,
+            evt.type,
+          ),
+        ],
+      };
+    }
+    return {
+      events: [
+        buildNormalizedEvent("message.update", extractPiPayload(parsed), parsed, rawType),
+      ],
+    };
+  }
+
+  return {
+    events: [buildNormalizedEvent("stdout", { eventType: rawType }, parsed, rawType)],
+  };
+}
+
+function normalizeSharedStructuredEvent(
+  parsed: any,
+  rawType: string,
+): NormalizedTraceBatch | null {
+  const mapped = genericStructuredEventMap[rawType];
+  if (mapped) return normalizeMappedEvent(parsed, rawType, mapped);
+
+  if (
+    [
+      "message_delta",
+      "assistant_message.delta",
+      "assistant_message_delta",
+      "response.output_text.delta",
+      "content_block_delta",
+    ].includes(rawType)
+  ) {
+    const text = extractGenericDeltaText(parsed);
+    if (typeof text === "string" && text) {
+      return {
+        events: [
+          buildNormalizedEvent("assistant.text.delta", { text }, parsed, rawType),
+        ],
+      };
+    }
+  }
+
+  if (
+    [
+      "thinking_delta",
+      "reasoning_delta",
+      "response.reasoning.delta",
+    ].includes(rawType)
+  ) {
+    const text = extractGenericDeltaText(parsed);
+    if (typeof text === "string" && text) {
+      return {
+        events: [
+          buildNormalizedEvent(
+            "assistant.thinking.delta",
+            { text },
+            parsed,
+            rawType,
+          ),
+        ],
+      };
+    }
+  }
+
+  if (
+    [
+      "message_end",
+      "assistant_message_end",
+      "response.completed",
+      "message_stop",
+    ].includes(rawType)
+  ) {
+    const events: NormalizedTraceEvent[] = [
+      buildNormalizedEvent(
+        "message.end",
+        extractGenericMessagePayload(parsed),
+        parsed,
+        rawType,
+      ),
+    ];
+    const text = extractGenericMessageText(parsed);
+    if (text) {
+      events.push(
+        buildNormalizedEvent("assistant.message.final", { text }, parsed, rawType),
+      );
+    }
+    const usage = normalizeTokenUsage(parsed?.usage);
+    if (usage) events.push(buildNormalizedEvent("usage", usage, parsed, rawType));
+    return { events };
+  }
+
+  return null;
+}
+
+function normalizeStructuredEvent(
+  agentFamily: AgentFamily,
+  parsed: any,
+  rawType: string,
+): NormalizedTraceBatch {
+  if (agentFamily === "pi") {
+    return normalizePiStructuredEvent(parsed, rawType) ?? {
+      events: [buildNormalizedEvent("stdout", { eventType: rawType }, parsed, rawType)],
+    };
+  }
+
+  if (agentFamily === "claude-code") {
+    const normalized = normalizeClaudeStructuredEvent(parsed, rawType);
+    if (normalized) return normalized;
+  }
+
+  if (agentFamily === "gemini") {
+    const normalized = normalizeGeminiStructuredEvent(parsed, rawType);
+    if (normalized) return normalized;
+  }
+
+  if (agentFamily === "codex") {
+    const normalized = normalizeCodexStructuredEvent(parsed, rawType);
+    if (normalized) return normalized;
+  }
+
+  const shared = normalizeSharedStructuredEvent(parsed, rawType);
+  if (shared) return shared;
+
+  return {
+    events: [buildNormalizedEvent("stdout", { eventType: rawType }, parsed, rawType)],
+  };
+}
+
+export function canonicalTraceEventToOtelLogRecord(
+  event: CanonicalAgentTraceEvent,
+  context?: { agentId?: string; model?: string },
+): {
+  body: string;
+  attributes: Record<string, unknown>;
+  severity: "INFO" | "WARN" | "ERROR";
+} {
+  const attributes: Record<string, unknown> = {
+    "smithers.trace.version": event.traceVersion,
+    "run.id": event.runId,
+    "workflow.path": event.workflowPath,
+    "workflow.hash": event.workflowHash,
+    "node.id": event.nodeId,
+    "node.iteration": event.iteration,
+    "node.attempt": event.attempt,
+    "agent.family": event.source.agentFamily,
+    "agent.id": context?.agentId,
+    "agent.model": context?.model,
+    "agent.capture_mode": event.source.captureMode,
+    "trace.completeness": event.traceCompleteness,
+    "event.kind": event.event.kind,
+    "event.phase": event.event.phase,
+    "event.sequence": event.event.sequence,
+    "source.raw_type": event.source.rawType,
+    "source.raw_event_id": event.source.rawEventId,
+    "source.observed": event.source.observed,
+  };
+  for (const [key, value] of Object.entries(event.annotations)) {
+    attributes[key.startsWith("custom.") ? key : `custom.${key}`] = value;
+  }
+
+  const severity =
+    event.event.kind === "capture.error"
+      ? "ERROR"
+      : event.event.kind === "capture.warning" || event.event.kind === "stderr"
+        ? "WARN"
+        : "INFO";
+
+  return {
+    body: JSON.stringify({
+      payload: event.payload,
+      raw: event.raw,
+      redaction: event.redaction,
+      annotations: event.annotations,
+    }),
+    attributes,
+    severity,
+  };
+}
+
+type TraceCollectorOptions = {
+  eventBus: EventBus;
+  runId: string;
+  workflowPath?: string | null;
+  workflowHash?: string | null;
+  nodeId: string;
+  iteration: number;
+  attempt: number;
+  agent: any;
+  agentId?: string;
+  model?: string;
+  logDir?: string;
+  annotations?: Record<string, string | number | boolean>;
+};
+
+export class AgentTraceCollector {
+  private readonly eventBus: EventBus;
+  private readonly runId: string;
+  private readonly workflowPath?: string;
+  private readonly workflowHash?: string;
+  private readonly nodeId: string;
+  private readonly iteration: number;
+  private readonly attempt: number;
+  private readonly agentFamily: AgentFamily;
+  private readonly captureMode: AgentCaptureMode;
+  private readonly agentId?: string;
+  private readonly model?: string;
+  private readonly annotations: Record<string, string | number | boolean>;
+  private readonly logDir?: string;
+  private readonly capabilities: AgentTraceCapabilityProfile;
+  private readonly startedAtMs = nowMs();
+  private readonly events: CanonicalAgentTraceEvent[] = [];
+  private readonly rawArtifactRefs: string[] = [];
+  private readonly seenKinds = new Set<CanonicalAgentTraceEventKind>();
+  private readonly directKinds = new Set<CanonicalAgentTraceEventKind>();
+  private readonly expectedKinds = new Set<CanonicalAgentTraceEventKind>();
+  private readonly failures: string[] = [];
+  private readonly warnings: string[] = [];
+  private sequence = 0;
+  private rawEventSequence = 0;
+  private stdoutBuffer = "";
+  private stderrBuffer = "";
+  private assistantTextBuffer = "";
+  private finalText: string | null = null;
+  private currentRawEventId?: string;
+  private listener?: (event: SmithersEvent) => void;
+
+  constructor(opts: TraceCollectorOptions) {
+    this.eventBus = opts.eventBus;
+    this.runId = opts.runId;
+    this.workflowPath = opts.workflowPath ?? undefined;
+    this.workflowHash = opts.workflowHash ?? undefined;
+    this.nodeId = opts.nodeId;
+    this.iteration = opts.iteration;
+    this.attempt = opts.attempt;
+    this.agentFamily = detectAgentFamily(opts.agent);
+    this.captureMode = detectCaptureMode(opts.agent);
+    this.capabilities = resolveAgentTraceCapabilities(
+      this.agentFamily,
+      this.captureMode,
+    );
+    this.agentId = opts.agentId;
+    this.model = opts.model;
+    this.annotations = normalizeAnnotations(opts.annotations);
+    this.logDir = opts.logDir;
+
+    const profile = this.capabilities;
+    if (profile.sessionMetadata && this.agentFamily === "pi") {
+      this.expectedKinds.add("session.start");
+      this.expectedKinds.add("session.end");
+      this.expectedKinds.add("turn.start");
+      this.expectedKinds.add("turn.end");
+    }
+    if (profile.finalAssistantMessage)
+      this.expectedKinds.add("assistant.message.final");
+  }
+
+  begin() {
+    this.listener = (event) => this.observeSmithersEvent(event);
+    this.eventBus.on("event", this.listener);
+  }
+
+  endListener() {
+    if (this.listener) this.eventBus.off("event", this.listener);
+    this.listener = undefined;
+  }
+
+  onStdout(text: string) {
+    this.processChunk("stdout", text);
+  }
+
+  onStderr(text: string) {
+    this.processChunk("stderr", text);
+  }
+
+  observeResult(result: any) {
+    const text = String(result?.text ?? "").trim();
+    const rawEventId = this.nextRawEventId("result");
+    if (
+      text &&
+      (!this.finalText ||
+        (!this.seenKinds.has("assistant.text.delta") &&
+          !this.seenKinds.has("assistant.message.final")))
+    ) {
+      this.finalText = text;
+    }
+    if (this.captureMode === "sdk-events" && text) {
+      this.pushDerived(
+        "assistant.message.final",
+        { text },
+        text,
+        undefined,
+        true,
+        rawEventId,
+      );
+    }
+    const usage = normalizeTokenUsage(result?.usage ?? result?.totalUsage);
+    if (usage) {
+      this.pushDerived(
+        "usage",
+        usage,
+        usage,
+        "usage",
+        true,
+        rawEventId,
+      );
+    }
+  }
+
+  observeError(error: unknown) {
+    this.failures.push(error instanceof Error ? error.message : String(error));
+    const rawEventId = this.nextRawEventId("error");
+    this.pushDerived(
+      "capture.error",
+      { error: this.failures.at(-1) },
+      { error: this.failures.at(-1) },
+      "error",
+      true,
+      rawEventId,
+    );
+  }
+
+  async flush() {
+    this.endListener();
+    const finishedAtMs = nowMs();
+    this.flushStructuredBuffers();
+    if (
+      this.captureMode !== "sdk-events" &&
+      !this.seenKinds.has("assistant.message.final") &&
+      this.finalText &&
+      this.failures.length === 0
+    ) {
+      this.pushDerived(
+        "assistant.message.final",
+        { text: this.finalText },
+        this.finalText,
+        undefined,
+        false,
+      );
+    }
+    if (
+      (this.captureMode === "cli-json-stream" ||
+        this.captureMode === "rpc-events") &&
+      this.events.length > 0 &&
+      !this.seenKinds.has("assistant.message.final") &&
+      this.failures.length === 0
+    ) {
+      this.warnings.push(
+        "structured stream ended without a terminal assistant message",
+      );
+      this.pushDerived(
+        "capture.warning",
+        { reason: "missing-terminal-event" },
+        { reason: "missing-terminal-event" },
+        "capture",
+      );
+    }
+
+    let traceCompleteness = this.resolveCompleteness();
+    let missingExpectedEventKinds = [...this.expectedKinds].filter(
+      (kind) => !this.directKinds.has(kind),
+    );
+    this.applyTraceCompleteness(traceCompleteness);
+    let summary: AgentTraceSummary = {
+      traceVersion: "1",
+      runId: this.runId,
+      workflowPath: this.workflowPath,
+      workflowHash: this.workflowHash,
+      nodeId: this.nodeId,
+      iteration: this.iteration,
+      attempt: this.attempt,
+      traceStartedAtMs: this.startedAtMs,
+      traceFinishedAtMs: finishedAtMs,
+      agentFamily: this.agentFamily,
+      agentId: this.agentId,
+      model: this.model,
+      captureMode: this.captureMode,
+      traceCompleteness,
+      unsupportedEventKinds: unsupportedKindsForCapabilities(
+        this.capabilities,
+      ).filter((kind) => !this.seenKinds.has(kind)),
+      missingExpectedEventKinds,
+      rawArtifactRefs: this.rawArtifactRefs,
+    };
+
+    const persistedArtifact = await this.persistNdjson(summary);
+    if (persistedArtifact.ok && persistedArtifact.file) {
+      const artifactPath = persistedArtifact.file;
+      this.rawArtifactRefs.push(artifactPath);
+      this.pushDerived(
+        "artifact.created",
+        {
+          artifactKind: "agent-trace.ndjson",
+          artifactPath,
+          contentType: "application/x-ndjson",
+        },
+        {
+          artifactKind: "agent-trace.ndjson",
+          artifactPath,
+          contentType: "application/x-ndjson",
+        },
+        "artifact",
+      );
+      this.applyTraceCompleteness(traceCompleteness);
+      summary = { ...summary, rawArtifactRefs: [...this.rawArtifactRefs] };
+      try {
+        await this.rewriteNdjson(artifactPath, summary);
+      } catch (error) {
+        const message =
+          error instanceof Error ? error.message : String(error);
+        this.warnings.push(message);
+        this.pushDerived(
+          "capture.warning",
+          { reason: "artifact-rewrite-failed", error: message },
+          { reason: "artifact-rewrite-failed", error: message },
+          "artifact",
+        );
+        traceCompleteness = this.resolveCompleteness();
+        missingExpectedEventKinds = [...this.expectedKinds].filter(
+          (kind) => !this.directKinds.has(kind),
+        );
+        this.applyTraceCompleteness(traceCompleteness);
+        summary = {
+          ...summary,
+          traceCompleteness,
+          missingExpectedEventKinds,
+          rawArtifactRefs: [...this.rawArtifactRefs],
+        };
+      }
+    } else if (!persistedArtifact.ok) {
+      this.warnings.push(persistedArtifact.error);
+      this.pushDerived(
+        "capture.warning",
+        { reason: "artifact-write-failed", error: persistedArtifact.error },
+        { reason: "artifact-write-failed", error: persistedArtifact.error },
+        "artifact",
+      );
+      traceCompleteness = this.resolveCompleteness();
+      missingExpectedEventKinds = [...this.expectedKinds].filter(
+        (kind) => !this.directKinds.has(kind),
+      );
+      this.applyTraceCompleteness(traceCompleteness);
+      summary = {
+        ...summary,
+        traceCompleteness,
+        missingExpectedEventKinds,
+        rawArtifactRefs: [...this.rawArtifactRefs],
+      };
+    }
+
+    this.applyTraceCompleteness(traceCompleteness);
+    for (const event of this.events) {
+      const smithersEvent: AgentTraceSmithersEvent = {
+        type: "AgentTraceEvent",
+        runId: this.runId,
+        nodeId: this.nodeId,
+        iteration: this.iteration,
+        attempt: this.attempt,
+        trace: event,
+        timestampMs: event.timestampMs,
+      };
+      await this.eventBus.emitEventQueued(
+        smithersEvent as unknown as SmithersEvent,
+      );
+      const record = canonicalTraceEventToOtelLogRecord(event, {
+        agentId: this.agentId,
+        model: this.model,
+      });
+      if (record.severity === "ERROR") {
+        await logErrorAwait(record.body, record.attributes, "agent-trace");
+      } else if (record.severity === "WARN") {
+        await logWarningAwait(record.body, record.attributes, "agent-trace");
+      } else {
+        await logInfoAwait(record.body, record.attributes, "agent-trace");
+      }
+    }
+
+    await this.eventBus.emitEventQueued({
+      type: "AgentTraceSummary",
+      runId: this.runId,
+      nodeId: this.nodeId,
+      iteration: this.iteration,
+      attempt: this.attempt,
+      summary,
+      timestampMs: finishedAtMs,
+    } as unknown as SmithersEvent);
+  }
+
+  private processChunk(stream: "stdout" | "stderr", text: string) {
+    if (stream === "stderr") {
+      this.stderrBuffer += text;
+      this.pushObserved("stderr", { text }, text, stream);
+      return;
+    }
+    this.stdoutBuffer += text;
+    if (this.captureMode === "cli-text" || this.captureMode === "sdk-events") {
+      this.pushObserved("stdout", { text }, text, stream);
+      return;
+    }
+    const lines = this.stdoutBuffer.split(/\r?\n/);
+    this.stdoutBuffer = lines.pop() ?? "";
+    for (const line of lines) {
+      if (!line.trim()) continue;
+      this.processStructuredStdoutLine(line);
+    }
+  }
+
+  private flushStructuredBuffers() {
+    if (this.captureMode === "cli-text" || this.captureMode === "sdk-events") {
+      this.stdoutBuffer = "";
+      this.stderrBuffer = "";
+      return;
+    }
+
+    const line = this.stdoutBuffer.trim();
+    this.stdoutBuffer = "";
+    this.stderrBuffer = "";
+    if (!line) return;
+
+    this.failures.push(`truncated structured stream: ${line.slice(0, 200)}`);
+    this.pushObserved(
+      "capture.error",
+      { reason: "truncated-json-stream", linePreview: line.slice(0, 200) },
+      line,
+      "stdout",
+    );
+  }
+
+  private processStructuredStdoutLine(line: string) {
+    let parsed: any;
+    try {
+      parsed = JSON.parse(line);
+    } catch {
+      this.failures.push(`malformed upstream JSON: ${line.slice(0, 200)}`);
+      this.pushObserved(
+        "capture.error",
+        { linePreview: line.slice(0, 200), reason: "malformed-json" },
+        line,
+        "stdout",
+      );
+      return;
+    }
+    const rawType =
+      typeof parsed?.type === "string" ? parsed.type : "structured";
+    const previousRawEventId = this.currentRawEventId;
+    this.currentRawEventId = this.nextRawEventId(rawType);
+    try {
+      this.emitObservedBatch(
+        normalizeStructuredEvent(this.agentFamily, parsed, rawType),
+      );
+    } finally {
+      this.currentRawEventId = previousRawEventId;
+    }
+  }
+
+  private appendAssistantText(text: string) {
+    this.assistantTextBuffer += text;
+    this.finalText = this.assistantTextBuffer;
+  }
+
+  private setFinalAssistantText(text: string) {
+    this.assistantTextBuffer = text;
+    this.finalText = text;
+  }
+
+  private observeSmithersEvent(event: SmithersEvent) {
+    const sameAttempt =
+      (event as any).runId === this.runId &&
+      (event as any).nodeId === this.nodeId &&
+      (event as any).iteration === this.iteration &&
+      (event as any).attempt === this.attempt;
+    if (!sameAttempt) return;
+    if (this.agentFamily === "pi") return;
+
+    if (event.type === "ToolCallStarted") {
+      const rawEventId = this.nextRawEventId(event.type);
+      this.pushDerived(
+        "tool.execution.start",
+        {
+          toolCallId: event.toolCallId,
+          toolName: event.toolName,
+        },
+        event,
+        event.type,
+        true,
+        rawEventId,
+      );
+      this.expectedKinds.add("tool.execution.end");
+    }
+    if (event.type === "ToolCallFinished") {
+      const rawEventId = this.nextRawEventId(event.type);
+      this.pushDerived(
+        "tool.execution.end",
+        {
+          toolCallId: event.toolCallId,
+          toolName: event.toolName,
+          isError: event.status === "error",
+        },
+        event,
+        event.type,
+        true,
+        rawEventId,
+      );
+    }
+    if (event.type === "TokenUsageReported") {
+      const rawEventId = this.nextRawEventId(event.type);
+      this.pushDerived(
+        "usage",
+        {
+          model: event.model,
+          agent: event.agent,
+          inputTokens: event.inputTokens,
+          outputTokens: event.outputTokens,
+          cacheReadTokens: event.cacheReadTokens,
+          cacheWriteTokens: event.cacheWriteTokens,
+          reasoningTokens: event.reasoningTokens,
+        },
+        event,
+        event.type,
+        true,
+        rawEventId,
+      );
+    }
+  }
+
+  private resolveCompleteness(): TraceCompleteness {
+    if (this.failures.length > 0) return "capture-failed";
+
+    const richKinds = new Set<CanonicalAgentTraceEventKind>([
+      "session.start",
+      "session.end",
+      "turn.start",
+      "turn.end",
+      "message.start",
+      "message.update",
+      "message.end",
+      "assistant.text.delta",
+      "assistant.thinking.delta",
+      "tool.execution.start",
+      "tool.execution.update",
+      "tool.execution.end",
+      "tool.result",
+      "retry.start",
+      "retry.end",
+      "compaction.start",
+      "compaction.end",
+    ]);
+    const sawRichStructure = [...this.directKinds].some((kind) =>
+      richKinds.has(kind),
+    );
+
+    const coarseCaptureMode =
+      this.captureMode === "sdk-events" ||
+      this.captureMode === "cli-text" ||
+      this.captureMode === "cli-json";
+
+    if (!sawRichStructure && this.warnings.length === 0 && coarseCaptureMode) {
+      return "final-only";
+    }
+
+    const missing = [...this.expectedKinds].filter(
+      (kind) => !this.directKinds.has(kind),
+    );
+    if (missing.length > 0 || this.warnings.length > 0)
+      return "partial-observed";
+
+    if (coarseCaptureMode) {
+      return sawRichStructure ? "partial-observed" : "final-only";
+    }
+
+    if (!sawRichStructure) return "final-only";
+    return "full-observed";
+  }
+
+  private push(
+    kind: CanonicalAgentTraceEventKind,
+    payload: Record<string, unknown> | null,
+    raw: unknown,
+    observed: boolean,
+    rawType?: string,
+    direct = true,
+    rawEventId?: string,
+  ) {
+    const redactedPayload = redactValue(payload);
+    const redactedRaw = redactValue(raw);
+    const event: CanonicalAgentTraceEvent = {
+      traceVersion: "1",
+      runId: this.runId,
+      workflowPath: this.workflowPath,
+      workflowHash: this.workflowHash,
+      nodeId: this.nodeId,
+      iteration: this.iteration,
+      attempt: this.attempt,
+      timestampMs: nowMs(),
+      event: {
+        sequence: this.sequence++,
+        kind,
+        phase: kindPhase(kind),
+      },
+      source: {
+        agentFamily: this.agentFamily,
+        captureMode: this.captureMode,
+        rawType,
+        rawEventId:
+          rawEventId ??
+          (observed
+            ? (this.currentRawEventId ?? this.nextRawEventId(rawType ?? kind))
+            : undefined),
+        observed,
+      },
+      traceCompleteness: "partial-observed",
+      payload: redactedPayload.value as Record<string, unknown> | null,
+      raw: redactedRaw.value,
+      redaction: {
+        applied: redactedPayload.applied || redactedRaw.applied,
+        ruleIds: [
+          ...new Set([...redactedPayload.ruleIds, ...redactedRaw.ruleIds]),
+        ],
+      },
+      annotations: this.annotations,
+    };
+    this.events.push(event);
+    this.seenKinds.add(kind);
+    if (direct) this.directKinds.add(kind);
+  }
+
+  private pushObserved(
+    kind: CanonicalAgentTraceEventKind,
+    payload: Record<string, unknown> | null,
+    raw: unknown,
+    rawType?: string,
+    rawEventId?: string,
+  ) {
+    this.push(kind, payload, raw, true, rawType, true, rawEventId);
+  }
+
+  private pushDerived(
+    kind: CanonicalAgentTraceEventKind,
+    payload: Record<string, unknown> | null,
+    raw: unknown,
+    rawType?: string,
+    direct = true,
+    rawEventId?: string,
+  ) {
+    this.push(kind, payload, raw, false, rawType, direct, rawEventId);
+  }
+
+  private applyTraceCompleteness(traceCompleteness: TraceCompleteness) {
+    for (const event of this.events) {
+      event.traceCompleteness = traceCompleteness;
+    }
+  }
+
+  private emitObservedBatch(
+    batch: NormalizedTraceBatch,
+    rawEventId = this.currentRawEventId,
+  ) {
+    for (const kind of batch.expectedKinds ?? []) {
+      this.expectedKinds.add(kind);
+    }
+    for (const event of batch.events) {
+      this.observeNormalizedEvent(event);
+      this.pushObserved(
+        event.kind,
+        event.payload,
+        event.raw,
+        event.rawType,
+        rawEventId,
+      );
+    }
+  }
+
+  private observeNormalizedEvent(event: NormalizedTraceEvent) {
+    if (
+      event.kind === "assistant.text.delta" &&
+      typeof event.payload?.text === "string"
+    ) {
+      this.appendAssistantText(event.payload.text);
+      return;
+    }
+    if (
+      event.kind === "assistant.message.final" &&
+      typeof event.payload?.text === "string"
+    ) {
+      this.setFinalAssistantText(event.payload.text);
+    }
+  }
+
+  private nextRawEventId(rawType: string) {
+    return `${rawType}:${this.rawEventSequence++}`;
+  }
+
+  private async persistNdjson(
+    summary: AgentTraceSummary,
+  ): Promise<{ ok: true; file?: string } | { ok: false; error: string }> {
+    if (!this.logDir) return { ok: true };
+    const dir = join(this.logDir, "agent-trace");
+    const file = join(
+      dir,
+      `${this.nodeId}-${this.iteration}-${this.attempt}.ndjson`,
+    );
+    const lines = this.events
+      .map((event) => JSON.stringify(event))
+      .concat(JSON.stringify({ summary }));
+    try {
+      await mkdir(dir, { recursive: true });
+      await appendFile(file, `${lines.join("\n")}\n`, "utf8");
+      return { ok: true, file };
+    } catch (error) {
+      return {
+        ok: false,
+        error: error instanceof Error ? error.message : String(error),
+      };
+    }
+  }
+
+  private async rewriteNdjson(file: string, summary: AgentTraceSummary) {
+    const lines = this.events
+      .map((event) => JSON.stringify(event))
+      .concat(JSON.stringify({ summary }));
+    await writeFile(file, `${lines.join("\n")}\n`, "utf8");
+  }
+}
diff --git a/src/agents/BaseCliAgent.ts b/src/agents/BaseCliAgent.ts
index 40f1bd3b..3316c673 100644
--- a/src/agents/BaseCliAgent.ts
+++ b/src/agents/BaseCliAgent.ts
@@ -13,6 +13,8 @@ import { spawnCaptureEffect } from "../effect/child-process";
 import { runPromise } from "../effect/runtime";
 import { getToolContext } from "../tools/context";
 import { SmithersError } from "../utils/errors";
+import { extractTextFromJsonValue } from "../utils/text-extraction";
+import { normalizeTokenUsage } from "../utils/usage";
 import { launchDiagnostics, enrichReportWithErrorAnalysis } from "./diagnostics";
 
 type TimeoutInput = number | { totalMs?: number; idleMs?: number } | undefined;
@@ -58,6 +60,7 @@ type RunRpcCommandOptions = {
   idleTimeoutMs?: number;
   signal?: AbortSignal;
   maxOutputBytes?: number;
+  onStdout?: (chunk: string) => void;
   onStderr?: (chunk: string) => void;
   onExtensionUiRequest?: (request: PiExtensionUiRequest) =>
     | Promise<PiExtensionUiResponse | null>
@@ -200,30 +203,7 @@ export function tryParseJson(text: string): unknown | undefined {
   return undefined;
 }
 
-export function extractTextFromJsonValue(value: any): string | undefined {
-  if (typeof value === "string") return value;
-  if (!value || typeof value !== "object") return undefined;
-  if (typeof value.text === "string") return value.text;
-  if (typeof value.content === "string") return value.content;
-  if (Array.isArray(value.content)) {
-    const parts = value.content
-      .map((part: any) => {
-        if (!part) return "";
-        if (typeof part === "string") return part;
-        if (typeof part.text === "string") return part.text;
-        if (typeof part.content === "string") return part.content;
-        return "";
-      })
-      .join("");
-    if (parts.trim()) return parts;
-  }
-  if (value.response) return extractTextFromJsonValue(value.response);
-  if (value.message) return extractTextFromJsonValue(value.message);
-  if (value.result) return extractTextFromJsonValue(value.result);
-  if (value.output) return extractTextFromJsonValue(value.output);
-  if (value.data) return extractTextFromJsonValue(value.data);
-  return undefined;
-}
+export { extractTextFromJsonValue };
 
 function extractTextFromJsonPayload(raw: string): string | undefined {
   const trimmed = raw.trim();
@@ -314,6 +294,19 @@ export function extractUsageFromOutput(raw: string): CliUsageInfo | undefined {
   const usage: CliUsageInfo = {};
   let found = false;
 
+  const addUsage = (next: CliUsageInfo | null | undefined) => {
+    if (!next) return false;
+    let changed = false;
+    for (const [key, value] of Object.entries(next) as Array<
+      [keyof CliUsageInfo, number | undefined]
+    >) {
+      if (typeof value !== "number" || value <= 0) continue;
+      usage[key] = (usage[key] ?? 0) + value;
+      changed = true;
+    }
+    return changed;
+  };
+
   for (const line of lines) {
     let parsed: any;
     try { parsed = JSON.parse(line); } catch { continue; }
@@ -321,56 +314,34 @@ export function extractUsageFromOutput(raw: string): CliUsageInfo | undefined {
 
     // Claude Code stream-json: message_start contains input token counts
     if (parsed.type === "message_start" && parsed.message?.usage) {
-      const u = parsed.message.usage;
-      usage.inputTokens = (usage.inputTokens ?? 0) + (u.input_tokens ?? 0);
-      if (u.cache_read_input_tokens) {
-        usage.cacheReadTokens = (usage.cacheReadTokens ?? 0) + u.cache_read_input_tokens;
-      }
-      if (u.cache_creation_input_tokens) {
-        usage.cacheWriteTokens = (usage.cacheWriteTokens ?? 0) + u.cache_creation_input_tokens;
-      }
-      found = true;
+      const normalized = normalizeTokenUsage(parsed.message.usage);
+      found = addUsage({
+        inputTokens: normalized?.inputTokens,
+        cacheReadTokens: normalized?.cacheReadTokens,
+        cacheWriteTokens: normalized?.cacheWriteTokens,
+      }) || found;
       continue;
     }
 
     // Claude Code stream-json: message_delta has output token count
     if (parsed.type === "message_delta" && parsed.usage) {
-      if (parsed.usage.output_tokens) {
-        usage.outputTokens = (usage.outputTokens ?? 0) + parsed.usage.output_tokens;
-      }
-      found = true;
+      const normalized = normalizeTokenUsage(parsed.usage);
+      found = addUsage({
+        outputTokens: normalized?.outputTokens,
+      }) || found;
       continue;
     }
 
     // Codex --json: turn.completed event
     if (parsed.type === "turn.completed" && parsed.usage) {
-      const u = parsed.usage;
-      if (u.input_tokens) usage.inputTokens = (usage.inputTokens ?? 0) + u.input_tokens;
-      if (u.output_tokens) usage.outputTokens = (usage.outputTokens ?? 0) + u.output_tokens;
-      if (u.cached_input_tokens) usage.cacheReadTokens = (usage.cacheReadTokens ?? 0) + u.cached_input_tokens;
-      found = true;
+      found = addUsage(normalizeTokenUsage(parsed.usage)) || found;
       continue;
     }
 
     // Generic: any event with a top-level "usage" containing token fields
     if (parsed.usage && typeof parsed.usage === "object") {
-      const u = parsed.usage;
-      const inTok = u.input_tokens ?? u.inputTokens ?? u.prompt_tokens ?? 0;
-      const outTok = u.output_tokens ?? u.outputTokens ?? u.completion_tokens ?? 0;
-      if (inTok > 0 || outTok > 0) {
-        usage.inputTokens = (usage.inputTokens ?? 0) + inTok;
-        usage.outputTokens = (usage.outputTokens ?? 0) + outTok;
-        if (u.cache_read_input_tokens || u.cacheReadTokens || u.cached_input_tokens) {
-          usage.cacheReadTokens = (usage.cacheReadTokens ?? 0) +
-            (u.cache_read_input_tokens ?? u.cacheReadTokens ?? u.cached_input_tokens ?? 0);
-        }
-        if (u.reasoning_tokens ?? u.reasoningTokens) {
-          usage.reasoningTokens = (usage.reasoningTokens ?? 0) +
-            (u.reasoning_tokens ?? u.reasoningTokens ?? 0);
-        }
-        found = true;
-        continue;
-      }
+      found = addUsage(normalizeTokenUsage(parsed.usage)) || found;
+      continue;
     }
   }
 
@@ -381,9 +352,7 @@ export function extractUsageFromOutput(raw: string): CliUsageInfo | undefined {
       if (parsed?.stats?.models && typeof parsed.stats.models === "object") {
         for (const data of Object.values(parsed.stats.models as Record<string, any>)) {
           if (data?.tokens) {
-            usage.inputTokens = (usage.inputTokens ?? 0) + (data.tokens.input ?? data.tokens.prompt ?? 0);
-            usage.outputTokens = (usage.outputTokens ?? 0) + (data.tokens.output ?? 0);
-            found = true;
+            found = addUsage(normalizeTokenUsage(data.tokens)) || found;
           }
         }
       }
@@ -594,7 +563,19 @@ export function runCommandEffect(
       agentArgs: args.join(" "),
       cwd,
     }),
+    Effect.annotateSpans({
+      agentCommand: command,
+      agentArgs: args.join(" "),
+      cwd,
+    }),
     Effect.withLogSpan(`agent:${command}`),
+    Effect.withSpan(`agent:${command}`, {
+      attributes: {
+        agentCommand: command,
+        agentArgs: args.join(" "),
+        cwd,
+      },
+    }),
   );
 }
 
@@ -613,7 +594,7 @@ export function runRpcCommandEffect(command: string, args: string[], options: Ru
    exitCode: number | null;
    usage?: any;
  }, Error> {
-   const { cwd, env, prompt, timeoutMs, idleTimeoutMs, signal, maxOutputBytes, onStderr, onExtensionUiRequest } = options;
+   const { cwd, env, prompt, timeoutMs, idleTimeoutMs, signal, maxOutputBytes, onStdout, onStderr, onExtensionUiRequest } = options;
    return Effect.async<{
      text: string;
      output: unknown;
@@ -823,8 +804,10 @@ export function runRpcCommandEffect(command: string, args: string[], options: Ru
        });
      });
 
-     child.stdout?.on("data", () => {
+     child.stdout?.on("data", (chunk) => {
        inactivity.reset();
+       const text = chunk.toString("utf8");
+       onStdout?.(text);
      });
 
      child.stderr?.on("data", (chunk) => {
@@ -882,7 +865,21 @@ export function runRpcCommandEffect(command: string, args: string[], options: Ru
        cwd,
        rpc: true,
      }),
+     Effect.annotateSpans({
+       agentCommand: command,
+       agentArgs: args.join(" "),
+       cwd,
+       rpc: true,
+     }),
      Effect.withLogSpan(`agent:${command}:rpc`),
+     Effect.withSpan(`agent:${command}:rpc`, {
+       attributes: {
+         agentCommand: command,
+         agentArgs: args.join(" "),
+         cwd,
+         rpc: true,
+       },
+     }),
    );
 }
 
diff --git a/src/agents/CodexAgent.ts b/src/agents/CodexAgent.ts
index 28d93733..b917b64c 100644
--- a/src/agents/CodexAgent.ts
+++ b/src/agents/CodexAgent.ts
@@ -16,6 +16,7 @@ type CodexAgentOptions = BaseCliAgentOptions & {
   disable?: string[];
   image?: string[];
   model?: string;
+  outputFormat?: "text" | "json" | "stream-json";
   oss?: boolean;
   localProvider?: string;
   sandbox?: "read-only" | "workspace-write" | "danger-full-access";
@@ -35,6 +36,11 @@ export class CodexAgent extends BaseCliAgent {
   private readonly opts: CodexAgentOptions;
 
   constructor(opts: CodexAgentOptions = {}) {
+    opts = {
+      ...opts,
+      json: opts.json ?? true,
+      outputFormat: opts.outputFormat ?? "stream-json",
+    };
     super(opts);
     this.opts = opts;
   }
diff --git a/src/agents/PiAgent.ts b/src/agents/PiAgent.ts
index a3f71e2c..9506a5ac 100644
--- a/src/agents/PiAgent.ts
+++ b/src/agents/PiAgent.ts
@@ -220,6 +220,27 @@ export class PiAgent extends BaseCliAgent {
       }
       throw err;
     }
+
+    // RPC mode
+    const rpcResult = await runRpcCommand("pi", args, {
+      cwd,
+      env,
+      prompt,
+      timeoutMs: callTimeouts.totalMs,
+      idleTimeoutMs: callTimeouts.idleMs,
+      signal: options?.abortSignal,
+      maxOutputBytes: this.maxOutputBytes ?? getToolContext()?.maxOutputBytes,
+      onStdout: options?.onStdout,
+      onStderr: options?.onStderr,
+      onExtensionUiRequest: this.opts.onExtensionUiRequest,
+    });
+
+    return buildGenerateResult(
+      rpcResult.text,
+      rpcResult.output,
+      this.opts.model ?? "pi",
+      rpcResult.usage,
+    );
   }
 
   protected async buildCommand(_params: {
diff --git a/src/cli/index.ts b/src/cli/index.ts
index 6d830e3c..a1310d7a 100755
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
@@ -11,7 +11,7 @@ import { ensureSmithersTables } from "../db/ensure";
 import { SmithersDb } from "../db/adapter";
 import { buildContext } from "../context";
 import { fromPromise } from "../effect/interop";
-import { runPromise } from "../effect/runtime";
+import { runPromise, shutdownRuntime } from "../effect/runtime";
 import type { SmithersWorkflow } from "../SmithersWorkflow";
 import { Smithers } from "../effect/builder";
 import { revertToAttempt } from "../revert";
@@ -90,6 +90,30 @@ function parseJsonInput(raw: string | undefined, label: string, fail: FailFn) {
   }
 }
 
+function parseAnnotations(raw: string | undefined, fail: FailFn) {
+  const parsed = parseJsonInput(raw, "annotations", fail);
+  if (parsed === undefined) return undefined;
+  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+    return fail({
+      code: "INVALID_ANNOTATIONS",
+      message: "Run annotations must be a flat JSON object of string/number/boolean values",
+      exitCode: 4,
+    });
+  }
+  const annotations: Record<string, string | number | boolean> = {};
+  for (const [key, value] of Object.entries(parsed)) {
+    if (!["string", "number", "boolean"].includes(typeof value)) {
+      return fail({
+        code: "INVALID_ANNOTATIONS",
+        message: `Run annotation ${key} must be a string, number, or boolean`,
+        exitCode: 4,
+      });
+    }
+    annotations[key] = value as string | number | boolean;
+  }
+  return annotations;
+}
+
 function formatStatusExitCode(status: string | undefined) {
   if (status === "finished") return 0;
   if (status === "waiting-approval") return 3;
@@ -219,6 +243,7 @@ const commonRunOptions = z.object({
   maxOutputBytes: z.number().int().min(1).optional().describe("Max bytes a single tool call can return (default: 200000)"),
   toolTimeoutMs: z.number().int().min(1).optional().describe("Max wall-clock time per tool call in ms (default: 60000)"),
   hot: z.boolean().default(false).describe("Enable hot module replacement for .tsx workflows"),
+  annotations: z.string().optional().describe("Run annotations as a flat JSON object of string/number/boolean values"),
 });
 
 const runOptions = commonRunOptions.extend({
@@ -296,6 +321,7 @@ const cli = Cli.create({
         const resolvedWorkflowPath = resolve(process.cwd(), workflowPath);
         const isToon = extname(resolvedWorkflowPath) === ".toon";
         const input = parseJsonInput(options.input, "input", fail) ?? {};
+        const annotations = parseAnnotations(options.annotations, fail);
         const runId = options.runId;
         const resume = Boolean(options.resume);
 
@@ -388,6 +414,7 @@ const cli = Cli.create({
                 maxOutputBytes: options.maxOutputBytes,
                 toolTimeoutMs: options.toolTimeoutMs,
                 hot: options.hot,
+                annotations,
                 onProgress,
                 signal: abort.signal,
               })
@@ -397,6 +424,7 @@ const cli = Cli.create({
           process.exitCode = formatStatusExitCode(
             typeof status === "string" ? status : undefined,
           );
+          await shutdownRuntime();
           return ok(result);
         }
 
@@ -412,11 +440,13 @@ const cli = Cli.create({
           maxOutputBytes: options.maxOutputBytes,
           toolTimeoutMs: options.toolTimeoutMs,
           hot: options.hot,
+          annotations,
           onProgress,
           signal: abort.signal,
         });
 
         process.exitCode = formatStatusExitCode(result.status);
+        await shutdownRuntime();
         return ok(result);
       } catch (err: any) {
         return fail({
@@ -443,6 +473,7 @@ const cli = Cli.create({
         const resolvedWorkflowPath = resolve(process.cwd(), workflowPath);
         const isToon = extname(resolvedWorkflowPath) === ".toon";
         const input = parseJsonInput(options.input, "input", fail) ?? {};
+        const annotations = parseAnnotations(options.annotations, fail);
         const runId = options.runId;
 
         if (options.hot) {
@@ -501,6 +532,7 @@ const cli = Cli.create({
                 maxOutputBytes: options.maxOutputBytes,
                 toolTimeoutMs: options.toolTimeoutMs,
                 hot: options.hot,
+                annotations,
                 onProgress,
                 signal: abort.signal,
               })
@@ -510,6 +542,7 @@ const cli = Cli.create({
           process.exitCode = formatStatusExitCode(
             typeof status === "string" ? status : undefined,
           );
+          await shutdownRuntime();
           return ok(result);
         }
 
@@ -525,11 +558,13 @@ const cli = Cli.create({
           maxOutputBytes: options.maxOutputBytes,
           toolTimeoutMs: options.toolTimeoutMs,
           hot: options.hot,
+          annotations,
           onProgress,
           signal: abort.signal,
         });
 
         process.exitCode = formatStatusExitCode(result.status);
+        await shutdownRuntime();
         return ok(result);
       } catch (err: any) {
         return fail({
@@ -873,7 +908,7 @@ const cli = Cli.create({
     },
   })
   .command("observability", {
-    description: "Start the local observability stack (Grafana, Prometheus, Tempo, OTLP Collector) via Docker Compose.",
+    description: "Start the local observability stack (Grafana, Prometheus, Tempo, Loki, OTLP Collector) via Docker Compose.",
     options: z.object({
       detach: z.boolean().default(false).describe("Run containers in the background"),
       down: z.boolean().default(false).describe("Stop and remove the observability stack"),
@@ -909,7 +944,9 @@ const cli = Cli.create({
           : `[smithers] Starting observability stack...\n` +
             `  Grafana:    http://localhost:3001\n` +
             `  Prometheus: http://localhost:9090\n` +
-            `  Tempo:      http://localhost:3200\n`,
+            `  Tempo:      http://localhost:3200\n` +
+            `  Loki:       http://localhost:3100\n` +
+            `  OTEL HTTP:  http://localhost:4318\n`,
       );
 
       const child = spawn("docker", composeArgs, {
diff --git a/src/effect/child-process.ts b/src/effect/child-process.ts
index c00241f3..ffaa892c 100644
--- a/src/effect/child-process.ts
+++ b/src/effect/child-process.ts
@@ -172,6 +172,22 @@ export function spawnCaptureEffect(
       timeoutMs: timeoutMs ?? null,
       idleTimeoutMs: idleTimeoutMs ?? null,
     }),
+    Effect.annotateSpans({
+      command,
+      args: args.join(" "),
+      cwd,
+      timeoutMs: timeoutMs ?? null,
+      idleTimeoutMs: idleTimeoutMs ?? null,
+    }),
     Effect.withLogSpan(`process:${command}`),
+    Effect.withSpan(`process:${command}`, {
+      attributes: {
+        command,
+        args: args.join(" "),
+        cwd,
+        timeoutMs: timeoutMs ?? null,
+        idleTimeoutMs: idleTimeoutMs ?? null,
+      },
+    }),
   );
 }
diff --git a/src/effect/logging.ts b/src/effect/logging.ts
index cbdac8a3..79a3bf26 100644
--- a/src/effect/logging.ts
+++ b/src/effect/logging.ts
@@ -1,9 +1,9 @@
 import { Effect } from "effect";
-import { runFork } from "./runtime";
+import { runFork, runPromise } from "./runtime";
 
 type LogAnnotations = Record<string, unknown> | undefined;
 
-function emitLog(
+function withLogContext(
   effect: Effect.Effect<void, never, never>,
   annotations?: LogAnnotations,
   span?: string,
@@ -15,7 +15,23 @@ function emitLog(
   if (span) {
     program = program.pipe(Effect.withLogSpan(span));
   }
-  void runFork(program);
+  return program;
+}
+
+function emitLog(
+  effect: Effect.Effect<void, never, never>,
+  annotations?: LogAnnotations,
+  span?: string,
+) {
+  void runFork(withLogContext(effect, annotations, span));
+}
+
+async function emitLogAwait(
+  effect: Effect.Effect<void, never, never>,
+  annotations?: LogAnnotations,
+  span?: string,
+) {
+  await runPromise(withLogContext(effect, annotations, span));
 }
 
 export function logDebug(
@@ -26,6 +42,14 @@ export function logDebug(
   emitLog(Effect.logDebug(message), annotations, span);
 }
 
+export async function logDebugAwait(
+  message: string,
+  annotations?: LogAnnotations,
+  span?: string,
+) {
+  await emitLogAwait(Effect.logDebug(message), annotations, span);
+}
+
 export function logInfo(
   message: string,
   annotations?: LogAnnotations,
@@ -34,6 +58,14 @@ export function logInfo(
   emitLog(Effect.logInfo(message), annotations, span);
 }
 
+export async function logInfoAwait(
+  message: string,
+  annotations?: LogAnnotations,
+  span?: string,
+) {
+  await emitLogAwait(Effect.logInfo(message), annotations, span);
+}
+
 export function logWarning(
   message: string,
   annotations?: LogAnnotations,
@@ -42,6 +74,14 @@ export function logWarning(
   emitLog(Effect.logWarning(message), annotations, span);
 }
 
+export async function logWarningAwait(
+  message: string,
+  annotations?: LogAnnotations,
+  span?: string,
+) {
+  await emitLogAwait(Effect.logWarning(message), annotations, span);
+}
+
 export function logError(
   message: string,
   annotations?: LogAnnotations,
@@ -49,3 +89,11 @@ export function logError(
 ) {
   emitLog(Effect.logError(message), annotations, span);
 }
+
+export async function logErrorAwait(
+  message: string,
+  annotations?: LogAnnotations,
+  span?: string,
+) {
+  await emitLogAwait(Effect.logError(message), annotations, span);
+}
diff --git a/src/effect/runtime.ts b/src/effect/runtime.ts
index b4aa8484..8a958506 100644
--- a/src/effect/runtime.ts
+++ b/src/effect/runtime.ts
@@ -50,4 +50,8 @@ export function runSync<A, E, R>(effect: Effect.Effect<A, E, R>) {
   return runtime.runSync(decorate(effect) as Effect.Effect<A, E, never>);
 }
 
+export async function shutdownRuntime() {
+  await runtime.dispose();
+}
+
 export { SmithersRuntimeLayer };
diff --git a/src/engine/approvals.ts b/src/engine/approvals.ts
index dbe84f8c..d1f5be03 100644
--- a/src/engine/approvals.ts
+++ b/src/engine/approvals.ts
@@ -47,7 +47,9 @@ export function approveNodeEffect(
     });
   }).pipe(
     Effect.annotateLogs({ runId, nodeId, iteration, approvalStatus: "approved" }),
+    Effect.annotateSpans({ runId, nodeId, iteration, approvalStatus: "approved" }),
     Effect.withLogSpan("approval:grant"),
+    Effect.withSpan("approval:grant", { attributes: { runId, nodeId, iteration, approvalStatus: "approved" } }),
   );
 }
 
@@ -108,7 +110,9 @@ export function denyNodeEffect(
     });
   }).pipe(
     Effect.annotateLogs({ runId, nodeId, iteration, approvalStatus: "denied" }),
+    Effect.annotateSpans({ runId, nodeId, iteration, approvalStatus: "denied" }),
     Effect.withLogSpan("approval:deny"),
+    Effect.withSpan("approval:deny", { attributes: { runId, nodeId, iteration, approvalStatus: "denied" } }),
   );
 }
 
diff --git a/src/engine/index.ts b/src/engine/index.ts
index 589aa874..713232e5 100644
--- a/src/engine/index.ts
+++ b/src/engine/index.ts
@@ -54,6 +54,7 @@ import { spawn as nodeSpawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
 import { platform } from "node:os";
 import { withTaskRuntime } from "../effect/task-runtime";
+import { AgentTraceCollector } from "../agent-trace";
 
 /**
  * Track which worktree paths have already been created this run so we don't
@@ -1291,6 +1292,12 @@ async function executeTask(
   },
   workflowName: string,
   cacheEnabled: boolean,
+  traceContext?: {
+    workflowPath?: string | null;
+    workflowHash?: string | null;
+    logDir?: string;
+    annotations?: Record<string, string | number | boolean>;
+  },
   signal?: AbortSignal,
   disabledAgents?: Set<any>,
 ) {
@@ -1495,6 +1502,21 @@ async function executeTask(
       let agentResult: any;
       if (effectiveAgent) {
         // Use fallback agent on retry attempts when available
+        const traceCollector = new AgentTraceCollector({
+          eventBus,
+          runId,
+          workflowPath: traceContext?.workflowPath ?? null,
+          workflowHash: traceContext?.workflowHash ?? null,
+          nodeId: desc.nodeId,
+          iteration: desc.iteration,
+          attempt: attemptNo,
+          agent: effectiveAgent,
+          agentId: (effectiveAgent as any).id,
+          model: (effectiveAgent as any).model,
+          logDir: traceContext?.logDir,
+          annotations: traceContext?.annotations,
+        });
+        traceCollector.begin();
         const result = await runWithToolContext(
           {
             db: adapter,
@@ -1545,6 +1567,8 @@ async function executeTask(
                 stream,
                 timestampMs: nowMs(),
               });
+              if (stream === "stdout") traceCollector.onStdout(text);
+              else traceCollector.onStderr(text);
             };
             return (effectiveAgent as any).generate({
               options: undefined as any,
@@ -1556,7 +1580,15 @@ async function executeTask(
               outputSchema: desc.outputSchema,
             });
           },
-        );
+        ).then(async (result) => {
+          traceCollector.observeResult(result);
+          await traceCollector.flush();
+          return result;
+        }, async (error) => {
+          traceCollector.observeError(error);
+          await traceCollector.flush();
+          throw error;
+        });
 
         agentResult = result;
 
@@ -2299,12 +2331,15 @@ export function renderFrameEffect<Schema>(
   ctx: any,
   opts?: { baseRootDir?: string },
 ) {
+  const spanAttributes = {
+    runId: ctx?.runId ?? "",
+    iteration: ctx?.iteration ?? 0,
+  };
   return fromPromise("render frame", () => renderFrameAsync(workflow, ctx, opts)).pipe(
-    Effect.annotateLogs({
-      runId: ctx?.runId ?? "",
-      iteration: ctx?.iteration ?? 0,
-    }),
+    Effect.annotateLogs(spanAttributes),
+    Effect.annotateSpans(spanAttributes),
     Effect.withLogSpan("engine:render-frame"),
+    Effect.withSpan("engine:render-frame", { attributes: spanAttributes }),
   );
 }
 
@@ -3090,6 +3125,12 @@ async function runWorkflowBody<Schema>(
           toolConfig,
           workflowName,
           cacheEnabled,
+          {
+            workflowPath: resolvedWorkflowPath ?? opts.workflowPath ?? null,
+            workflowHash: runMetadata.workflowHash,
+            logDir: logDir ?? undefined,
+            annotations: opts.annotations,
+          },
           runAbortController.signal,
           disabledAgents,
         ).finally(() => inflight.delete(p));
@@ -3164,14 +3205,17 @@ export function runWorkflowEffect<Schema>(
   workflow: SmithersWorkflow<Schema>,
   opts: RunOptions,
 ) {
+  const spanAttributes = {
+    runId: opts.runId ?? "",
+    workflowPath: opts.workflowPath ?? "",
+    maxConcurrency: opts.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
+    hot: Boolean(opts.hot),
+  };
   return fromPromise("run workflow", () => runWorkflowAsync(workflow, opts)).pipe(
-    Effect.annotateLogs({
-      runId: opts.runId ?? "",
-      workflowPath: opts.workflowPath ?? "",
-      maxConcurrency: opts.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
-      hot: Boolean(opts.hot),
-    }),
+    Effect.annotateLogs(spanAttributes),
+    Effect.annotateSpans(spanAttributes),
     Effect.withLogSpan("engine:run-workflow"),
+    Effect.withSpan("engine:run-workflow", { attributes: spanAttributes }),
   );
 }
 
diff --git a/src/events.ts b/src/events.ts
index 99256de6..b7ff5ccc 100644
--- a/src/events.ts
+++ b/src/events.ts
@@ -30,7 +30,9 @@ export class EventBus extends EventEmitter {
       }
     }).pipe(
       Effect.annotateLogs({ runId: event.runId, eventType: event.type }),
+      Effect.annotateSpans({ runId: event.runId, eventType: event.type }),
       Effect.withLogSpan(`event:${event.type}`),
+      Effect.withSpan(`event:${event.type}`, { attributes: { runId: event.runId, eventType: event.type } }),
     );
   }
 
@@ -44,7 +46,9 @@ export class EventBus extends EventEmitter {
       yield* this.persistEffect(event);
     }).pipe(
       Effect.annotateLogs({ runId: event.runId, eventType: event.type }),
+      Effect.annotateSpans({ runId: event.runId, eventType: event.type }),
       Effect.withLogSpan(`event:${event.type}:persist`),
+      Effect.withSpan(`event:${event.type}:persist`, { attributes: { runId: event.runId, eventType: event.type } }),
     );
   }
 
@@ -65,7 +69,10 @@ export class EventBus extends EventEmitter {
         this.persistError = null;
         throw err;
       }
-    }).pipe(Effect.withLogSpan("event:flush"));
+    }).pipe(
+      Effect.withLogSpan("event:flush"),
+      Effect.withSpan("event:flush"),
+    );
   }
 
   async flush(): Promise<void> {
@@ -83,7 +90,9 @@ export class EventBus extends EventEmitter {
       }
     }).pipe(
       Effect.annotateLogs({ runId: event.runId, eventType: event.type }),
+      Effect.annotateSpans({ runId: event.runId, eventType: event.type }),
       Effect.withLogSpan("event:persist"),
+      Effect.withSpan("event:persist", { attributes: { runId: event.runId, eventType: event.type } }),
     );
   }
 
diff --git a/src/index.ts b/src/index.ts
index 893f9ace..3e1a48ba 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -14,6 +14,15 @@ export type { SmithersCtx } from "./SmithersCtx";
 export type { OutputAccessor, InferRow, InferOutputEntry } from "./OutputAccessor";
 export type { SmithersEvent } from "./SmithersEvent";
 export type { SmithersError } from "./SmithersError";
+export type {
+  AgentCaptureMode,
+  AgentFamily,
+  AgentTraceCapabilityProfile,
+  AgentTraceSummary,
+  CanonicalAgentTraceEvent,
+  CanonicalAgentTraceEventKind,
+  TraceCompleteness,
+} from "./agent-trace";
 export { SmithersError as SmithersErrorInstance, isSmithersError, errorToJson } from "./utils/errors";
 export type { SmithersErrorCode } from "./utils/errors";
 export type {
@@ -88,6 +97,7 @@ export type {
 export { createSmithers } from "./create";
 export type { CreateSmithersApi } from "./create";
 export { runWorkflow, renderFrame } from "./engine";
+export { agentTraceCapabilities, detectAgentFamily, detectCaptureMode } from "./agent-trace";
 
 // Tools
 export { tools, read, write, edit, grep, bash } from "./tools/index";
diff --git a/src/observability/index.ts b/src/observability/index.ts
index b29835ff..bae08048 100644
--- a/src/observability/index.ts
+++ b/src/observability/index.ts
@@ -373,11 +373,19 @@ function makeService(
   return {
     options,
     annotate: (attributes) => Effect.void.pipe(Effect.annotateLogs(attributes)),
-    withSpan: (name, effect, attributes) =>
-      (attributes && Object.keys(attributes).length > 0
-        ? effect.pipe(Effect.annotateLogs(attributes))
-        : effect
-      ).pipe(Effect.withLogSpan(name)),
+    withSpan: (name, effect, attributes) => {
+      let program = effect;
+      if (attributes && Object.keys(attributes).length > 0) {
+        program = program.pipe(
+          Effect.annotateLogs(attributes),
+          Effect.annotateSpans(attributes),
+        );
+      }
+      return program.pipe(
+        Effect.withLogSpan(name),
+        Effect.withSpan(name, attributes ? { attributes } : undefined),
+      );
+    },
   };
 }
 
diff --git a/src/tools/bash.ts b/src/tools/bash.ts
index 90cb9497..577f92c4 100644
--- a/src/tools/bash.ts
+++ b/src/tools/bash.ts
@@ -24,8 +24,11 @@ export function bashToolEffect(
   const allowNetwork = ctx?.allowNetwork ?? false;
   const started = nowMs();
   let seq: number | undefined;
+  let toolCallId: string | undefined;
   return Effect.gen(function* () {
-    seq = yield* logToolCallStartEffect("bash", started);
+    const startedCall = yield* logToolCallStartEffect("bash", started);
+    seq = startedCall?.seq;
+    toolCallId = startedCall?.toolCallId;
     const cwd = opts?.cwd
       ? yield* fromSync("resolve sandbox path", () =>
           resolveSandboxPath(root, opts.cwd!),
@@ -78,6 +81,7 @@ export function bashToolEffect(
       undefined,
       started,
       seq,
+      toolCallId,
     );
     return output;
   }).pipe(
@@ -98,6 +102,7 @@ export function bashToolEffect(
         error,
         started,
         seq,
+        toolCallId,
       ),
     ),
   );
diff --git a/src/tools/edit.ts b/src/tools/edit.ts
index 9436fdb4..72de673f 100644
--- a/src/tools/edit.ts
+++ b/src/tools/edit.ts
@@ -20,8 +20,11 @@ export function editToolEffect(path: string, patch: string) {
   const patchBytes = Buffer.byteLength(patch, "utf8");
   const logInput = { path, patchBytes, patchHash: sha256Hex(patch) };
   let seq: number | undefined;
+  let toolCallId: string | undefined;
   return Effect.gen(function* () {
-    seq = yield* logToolCallStartEffect("edit", started);
+    const startedCall = yield* logToolCallStartEffect("edit", started);
+    seq = startedCall?.seq;
+    toolCallId = startedCall?.toolCallId;
     const fs = yield* FileSystem.FileSystem;
     const resolved = yield* fromSync("resolve sandbox path", () =>
       resolveSandboxPath(root, path),
@@ -50,6 +53,7 @@ export function editToolEffect(path: string, patch: string) {
       undefined,
       started,
       seq,
+      toolCallId,
     );
     return "ok";
   }).pipe(
@@ -61,7 +65,16 @@ export function editToolEffect(path: string, patch: string) {
     }),
     Effect.withLogSpan("tool:edit"),
     Effect.tapError((error) =>
-      logToolCallEffect("edit", logInput, null, "error", error, started, seq),
+      logToolCallEffect(
+        "edit",
+        logInput,
+        null,
+        "error",
+        error,
+        started,
+        seq,
+        toolCallId,
+      ),
     ),
   );
 }
diff --git a/src/tools/grep.ts b/src/tools/grep.ts
index c9a99957..7b4f3eb9 100644
--- a/src/tools/grep.ts
+++ b/src/tools/grep.ts
@@ -15,8 +15,11 @@ export function grepToolEffect(pattern: string, path?: string) {
   const root = ctx?.rootDir ?? process.cwd();
   const started = nowMs();
   let seq: number | undefined;
+  let toolCallId: string | undefined;
   return Effect.gen(function* () {
-    seq = yield* logToolCallStartEffect("grep", started);
+    const startedCall = yield* logToolCallStartEffect("grep", started);
+    seq = startedCall?.seq;
+    toolCallId = startedCall?.toolCallId;
     const resolvedRoot = yield* fromSync("resolve sandbox path", () =>
       resolveSandboxPath(root, path ?? "."),
     );
@@ -45,6 +48,7 @@ export function grepToolEffect(pattern: string, path?: string) {
       undefined,
       started,
       seq,
+      toolCallId,
     );
     return result.stdout;
   }).pipe(
@@ -64,6 +68,7 @@ export function grepToolEffect(pattern: string, path?: string) {
         error,
         started,
         seq,
+        toolCallId,
       ),
     ),
   );
diff --git a/src/tools/logToolCall.ts b/src/tools/logToolCall.ts
index 9e5fe2bb..3110d893 100644
--- a/src/tools/logToolCall.ts
+++ b/src/tools/logToolCall.ts
@@ -14,11 +14,13 @@ export function logToolCallEffect(
   error?: unknown,
   startedAtMs?: number,
   seqOverride?: number,
+  toolCallIdOverride?: string,
 ) {
   const ctx = getToolContext();
   if (!ctx) return Effect.void;
   const seq =
     typeof seqOverride === "number" ? seqOverride : nextToolSeq(ctx);
+  const toolCallId = toolCallIdOverride ?? `${toolName}:${seq}`;
   const started = startedAtMs ?? nowMs();
   const finished = nowMs();
   const durationMs = finished - started;
@@ -32,11 +34,20 @@ export function logToolCallEffect(
     nodeId: ctx.nodeId,
     iteration: ctx.iteration,
     attempt: ctx.attempt,
+    toolCallId,
     toolName,
     seq,
     status,
     timestampMs: finished,
   });
+  const spanAttributes = {
+    runId: ctx.runId,
+    nodeId: ctx.nodeId,
+    iteration: ctx.iteration,
+    attempt: ctx.attempt,
+    toolName,
+    toolStatus: status,
+  };
   return Metric.update(toolDuration, durationMs).pipe(
     Effect.andThen(
       ctx.db.insertToolCallEffect({
@@ -54,15 +65,10 @@ export function logToolCallEffect(
         errorJson,
       }),
     ),
-    Effect.annotateLogs({
-      runId: ctx.runId,
-      nodeId: ctx.nodeId,
-      iteration: ctx.iteration,
-      attempt: ctx.attempt,
-      toolName,
-      toolStatus: status,
-    }),
+    Effect.annotateLogs(spanAttributes),
+    Effect.annotateSpans(spanAttributes),
     Effect.withLogSpan(`tool:${toolName}:log`),
+    Effect.withSpan(`tool:${toolName}:log`, { attributes: spanAttributes }),
   );
 }
 
@@ -86,6 +92,7 @@ export function logToolCallStartEffect(
   const ctx = getToolContext();
   if (!ctx) return Effect.succeed(undefined);
   const seq = nextToolSeq(ctx);
+  const toolCallId = `${toolName}:${seq}`;
   const started = startedAtMs ?? nowMs();
   void ctx.emitEvent?.({
     type: "ToolCallStarted",
@@ -93,11 +100,12 @@ export function logToolCallStartEffect(
     nodeId: ctx.nodeId,
     iteration: ctx.iteration,
     attempt: ctx.attempt,
+    toolCallId,
     toolName,
     seq,
     timestampMs: started,
   });
-  return Effect.succeed(seq);
+  return Effect.succeed({ seq, toolCallId });
 }
 
 export function truncateToBytes(text: string, maxBytes: number): string {
diff --git a/src/tools/read.ts b/src/tools/read.ts
index bcd4cf23..34ddf58c 100644
--- a/src/tools/read.ts
+++ b/src/tools/read.ts
@@ -19,8 +19,11 @@ export function readToolEffect(path: string) {
   const root = ctx?.rootDir ?? process.cwd();
   const started = nowMs();
   let seq: number | undefined;
+  let toolCallId: string | undefined;
   return Effect.gen(function* () {
-    seq = yield* logToolCallStartEffect("read", started);
+    const startedCall = yield* logToolCallStartEffect("read", started);
+    seq = startedCall?.seq;
+    toolCallId = startedCall?.toolCallId;
     const fs = yield* FileSystem.FileSystem;
     const resolved = yield* fromSync("resolve sandbox path", () =>
       resolveSandboxPath(root, path),
@@ -41,6 +44,7 @@ export function readToolEffect(path: string) {
       undefined,
       started,
       seq,
+      toolCallId,
     );
     return output;
   }).pipe(
@@ -51,7 +55,16 @@ export function readToolEffect(path: string) {
     }),
     Effect.withLogSpan("tool:read"),
     Effect.tapError((error) =>
-      logToolCallEffect("read", { path }, null, "error", error, started, seq),
+      logToolCallEffect(
+        "read",
+        { path },
+        null,
+        "error",
+        error,
+        started,
+        seq,
+        toolCallId,
+      ),
     ),
   );
 }
diff --git a/src/tools/write.ts b/src/tools/write.ts
index 3783f057..e5c73698 100644
--- a/src/tools/write.ts
+++ b/src/tools/write.ts
@@ -20,8 +20,11 @@ export function writeToolEffect(path: string, content: string) {
   const contentBytes = Buffer.byteLength(content, "utf8");
   const logInput = { path, contentBytes, contentHash: sha256Hex(content) };
   let seq: number | undefined;
+  let toolCallId: string | undefined;
   return Effect.gen(function* () {
-    seq = yield* logToolCallStartEffect("write", started);
+    const startedCall = yield* logToolCallStartEffect("write", started);
+    seq = startedCall?.seq;
+    toolCallId = startedCall?.toolCallId;
     const fs = yield* FileSystem.FileSystem;
     const resolved = yield* fromSync("resolve sandbox path", () =>
       resolveSandboxPath(root, path),
@@ -40,6 +43,7 @@ export function writeToolEffect(path: string, content: string) {
       undefined,
       started,
       seq,
+      toolCallId,
     );
     return "ok";
   }).pipe(
@@ -51,7 +55,16 @@ export function writeToolEffect(path: string, content: string) {
     }),
     Effect.withLogSpan("tool:write"),
     Effect.tapError((error) =>
-      logToolCallEffect("write", logInput, null, "error", error, started, seq),
+      logToolCallEffect(
+        "write",
+        logInput,
+        null,
+        "error",
+        error,
+        started,
+        seq,
+        toolCallId,
+      ),
     ),
   );
 }
diff --git a/src/utils/text-extraction.ts b/src/utils/text-extraction.ts
new file mode 100644
index 00000000..14723dd5
--- /dev/null
+++ b/src/utils/text-extraction.ts
@@ -0,0 +1,25 @@
+export function extractTextFromJsonValue(value: any): string | undefined {
+  if (typeof value === "string") return value;
+  if (Array.isArray(value)) {
+    const text = value.map((item) => extractTextFromJsonValue(item) ?? "").join("");
+    return text || undefined;
+  }
+  if (!value || typeof value !== "object") return undefined;
+  if (typeof value.text === "string") return value.text;
+  if (typeof value.content === "string") return value.content;
+  if (typeof value.output_text === "string") return value.output_text;
+
+  if (Array.isArray(value.content)) {
+    const text = value.content
+      .map((part: any) => extractTextFromJsonValue(part) ?? "")
+      .join("");
+    if (text) return text;
+  }
+
+  for (const field of ["response", "message", "result", "output", "data", "item"]) {
+    const text = extractTextFromJsonValue(value[field]);
+    if (text) return text;
+  }
+
+  return undefined;
+}
diff --git a/src/utils/usage.ts b/src/utils/usage.ts
new file mode 100644
index 00000000..f02e33ef
--- /dev/null
+++ b/src/utils/usage.ts
@@ -0,0 +1,89 @@
+export type NormalizedTokenUsage = {
+  inputTokens?: number;
+  outputTokens?: number;
+  cacheReadTokens?: number;
+  cacheWriteTokens?: number;
+  reasoningTokens?: number;
+  totalTokens?: number;
+};
+
+const usageFieldAliases: Record<
+  keyof NormalizedTokenUsage,
+  ReadonlyArray<ReadonlyArray<string>>
+> = {
+  inputTokens: [
+    ["inputTokens"],
+    ["promptTokens"],
+    ["prompt_tokens"],
+    ["input_tokens"],
+    ["input"],
+    ["models", "gemini", "tokens", "input"],
+  ],
+  outputTokens: [
+    ["outputTokens"],
+    ["completionTokens"],
+    ["completion_tokens"],
+    ["output_tokens"],
+    ["output"],
+    ["models", "gemini", "tokens", "output"],
+  ],
+  cacheReadTokens: [
+    ["cacheReadTokens"],
+    ["cache_read_input_tokens"],
+    ["cached_input_tokens"],
+    ["cache_read_tokens"],
+    ["inputTokenDetails", "cacheReadTokens"],
+  ],
+  cacheWriteTokens: [
+    ["cacheWriteTokens"],
+    ["cache_write_input_tokens"],
+    ["cache_creation_input_tokens"],
+    ["cache_write_tokens"],
+    ["inputTokenDetails", "cacheWriteTokens"],
+  ],
+  reasoningTokens: [
+    ["reasoningTokens"],
+    ["reasoning_tokens"],
+    ["outputTokenDetails", "reasoningTokens"],
+  ],
+  totalTokens: [
+    ["totalTokens"],
+    ["total_tokens"],
+  ],
+};
+
+function readUsagePath(value: unknown, path: ReadonlyArray<string>): unknown {
+  let current = value;
+  for (const segment of path) {
+    if (!current || typeof current !== "object") return undefined;
+    current = (current as Record<string, unknown>)[segment];
+  }
+  return current;
+}
+
+export function hasMeaningfulTokenUsage(usage: NormalizedTokenUsage): boolean {
+  return Object.values(usage).some(
+    (value) => typeof value === "number" && Number.isFinite(value) && value > 0,
+  );
+}
+
+export function normalizeTokenUsage(
+  usage: unknown,
+): NormalizedTokenUsage | null {
+  if (!usage || typeof usage !== "object") return null;
+
+  const normalized: NormalizedTokenUsage = {};
+  for (const [field, aliases] of Object.entries(usageFieldAliases) as Array<
+    [keyof NormalizedTokenUsage, ReadonlyArray<ReadonlyArray<string>>]
+  >) {
+    for (const path of aliases) {
+      const value = readUsagePath(usage, path);
+      if (typeof value === "number") {
+        normalized[field] = value;
+        break;
+      }
+    }
+  }
+
+  return hasMeaningfulTokenUsage(normalized) ? normalized : null;
+}
diff --git a/tests/agent-trace.test.tsx b/tests/agent-trace.test.tsx
new file mode 100644
index 00000000..1a192952
--- /dev/null
+++ b/tests/agent-trace.test.tsx
@@ -0,0 +1,1795 @@
+/** @jsxImportSource smithers */
+import { describe, expect, test } from "bun:test";
+import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Workflow, Task, runWorkflow } from "../src";
+import {
+  AgentTraceCollector,
+  canonicalTraceEventToOtelLogRecord,
+} from "../src/agent-trace";
+import { SmithersDb } from "../src/db/adapter";
+import { runPromise } from "../src/effect/runtime";
+import {
+  logToolCallEffect,
+  logToolCallStartEffect,
+} from "../src/tools/logToolCall";
+import { createTestSmithers } from "./helpers";
+import { z } from "zod";
+
+async function listRunEvents(db: any, runId: string) {
+  const adapter = new SmithersDb(db as any);
+  return adapter.listEvents(runId, -1, 500);
+}
+
+describe("agent trace capture", () => {
+  test("captures high-fidelity Pi canonical trace events with ordering and redaction", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-pi";
+
+    const piLikeAgent: any = {
+      id: "pi-agent-test",
+      opts: { mode: "json" },
+      generate: async (args: { onStdout?: (text: string) => void }) => {
+        args.onStdout?.(
+          JSON.stringify({ type: "session", id: "sess-1" }) + "\n",
+        );
+        args.onStdout?.(JSON.stringify({ type: "turn_start" }) + "\n");
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_start",
+            message: { role: "assistant", content: [] },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_update",
+            assistantMessageEvent: {
+              type: "thinking_delta",
+              delta: "thinking secret=sk_abc123456789",
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_update",
+            assistantMessageEvent: { type: "text_delta", delta: "hello" },
+          }) + "\n",
+        );
+        const startedCall = await runPromise(logToolCallStartEffect("bash"));
+        await runPromise(
+          logToolCallEffect(
+            "bash",
+            { cmd: "echo secret=sk_live_123456789" },
+            { ok: true },
+            "success",
+            undefined,
+            undefined,
+            startedCall?.seq,
+            startedCall?.toolCallId,
+          ),
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_execution_start",
+            toolExecution: { id: "t1", name: "bash", args: { cmd: "echo hi" } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_execution_end",
+            toolExecution: { id: "t1", name: "bash", result: { ok: true } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn_end",
+            message: {
+              role: "assistant",
+              content: [{ type: "text", text: "Final answer" }],
+              stopReason: "stop",
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(JSON.stringify({ type: "agent_end" }) + "\n");
+        return {
+          text: '{"answer":"Final answer"}',
+          output: { answer: "Final answer" },
+        };
+      },
+    };
+
+    const workflow = smithers(() => (
+      <Workflow name="pi-trace">
+        <Task id="task" output={outputs.result} agent={piLikeAgent}>
+          do the thing
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, {
+      input: {},
+      runId,
+      workflowPath: "/tmp/pi-trace.tsx",
+      annotations: { "custom.demo": true, ticket: 123 },
+    });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const artifactEvent = traceEvents.find(
+      (event: any) => event.event.kind === "artifact.created",
+    );
+    const persistedRows = readFileSync(
+      artifactEvent?.payload?.artifactPath,
+      "utf8",
+    )
+      .trim()
+      .split("\n")
+      .map((line) => JSON.parse(line));
+
+    expect(summary.agentFamily).toBe("pi");
+    expect(summary.captureMode).toBe("cli-json-stream");
+    expect(summary.traceCompleteness).toBe("full-observed");
+
+    const kinds = traceEvents.map((event: any) => event.event.kind);
+    expect(kinds).toContain("session.start");
+    expect(kinds).toContain("turn.start");
+    expect(kinds).toContain("assistant.thinking.delta");
+    expect(kinds).toContain("assistant.text.delta");
+    expect(kinds).toContain("tool.execution.start");
+    expect(kinds).toContain("tool.execution.end");
+    expect(kinds).toContain("assistant.message.final");
+
+    const sequences = traceEvents.map((event: any) => event.event.sequence);
+    expect(sequences).toEqual([...sequences].sort((a, b) => a - b));
+    expect(new Set(sequences).size).toBe(sequences.length);
+    expect(
+      traceEvents.filter(
+        (event: any) => event.event.kind === "tool.execution.start",
+      ),
+    ).toHaveLength(1);
+    expect(
+      traceEvents.filter(
+        (event: any) => event.event.kind === "tool.execution.end",
+      ),
+    ).toHaveLength(1);
+    expect(
+      persistedRows
+        .filter((row: any) => "event" in row)
+        .every((row: any) => row.traceCompleteness === "full-observed"),
+    ).toBe(true);
+    expect(persistedRows.at(-1)?.summary?.traceCompleteness).toBe(
+      "full-observed",
+    );
+
+    const redacted = JSON.stringify(traceEvents);
+    expect(redacted).not.toContain("sk_abc123456789");
+    expect(redacted).not.toContain("sk_live_123456789");
+    expect(traceEvents.some((event: any) => event.redaction.applied)).toBe(
+      true,
+    );
+    expect(
+      traceEvents.every(
+        (event: any) => event.annotations["custom.demo"] === true,
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("classifies malformed Pi JSON as capture-failed", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-pi-fail";
+
+    const piLikeAgent: any = {
+      id: "pi-agent-failure",
+      opts: { mode: "json" },
+      generate: async (args: { onStdout?: (text: string) => void }) => {
+        args.onStdout?.("{not json}\n");
+        throw new Error("subprocess exits early");
+      },
+    };
+
+    const workflow = smithers(() => (
+      <Workflow name="pi-trace-fail">
+        <Task id="task" output={outputs.result} agent={piLikeAgent} retries={0}>
+          fail please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("failed");
+
+    const events = await listRunEvents(db, runId);
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+
+    expect(summary.traceCompleteness).toBe("capture-failed");
+    expect(
+      traceEvents.some((event: any) => event.event.kind === "capture.error"),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("preserves Pi message.update fallback and tool updates from one transcript", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-pi-update-fallback";
+
+    const piLikeAgent: any = {
+      id: "pi-agent-update-fallback",
+      opts: { mode: "json" },
+      generate: async (args: { onStdout?: (text: string) => void }) => {
+        args.onStdout?.(JSON.stringify({ type: "session", id: "sess-2" }) + "\n");
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_update",
+            message: { role: "assistant", content: [{ type: "text", text: "partial state" }] },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_execution_update",
+            toolExecution: { id: "tool-2", name: "bash", result: { status: "running" } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn_end",
+            message: {
+              role: "assistant",
+              content: [{ type: "text", text: "Pi final update" }],
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(JSON.stringify({ type: "agent_end" }) + "\n");
+        return {
+          text: '{"answer":"Pi final update"}',
+          output: { answer: "Pi final update" },
+        };
+      },
+    };
+
+    const workflow = smithers(() => (
+      <Workflow name="pi-update-fallback">
+        <Task id="task" output={outputs.result} agent={piLikeAgent}>
+          pi update fallback please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "message.update" &&
+          event.payload.text === "partial state",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "tool.execution.update" &&
+          event.payload.toolCallId === "tool-2",
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("truthfully classifies sdk agents as final-only", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-sdk";
+
+    class OpenAIAgentFake {
+      id = "openai-sdk-fake";
+      tools = {};
+      async generate() {
+        return {
+          text: '{"answer":"sdk final"}',
+          output: { answer: "sdk final" },
+          usage: { inputTokens: 10, outputTokens: 5 },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="sdk-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new OpenAIAgentFake() as any}
+        >
+          sdk final only
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.agentFamily).toBe("openai");
+    expect(summary.captureMode).toBe("sdk-events");
+    expect(summary.traceCompleteness).toBe("final-only");
+    expect(summary.unsupportedEventKinds).toContain("assistant.thinking.delta");
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "assistant.message.final",
+      ),
+    ).toBe(true);
+    expect(traceEvents.some((event: any) => event.event.kind === "usage")).toBe(
+      true,
+    );
+    const finalMessage = traceEvents.find(
+      (event: any) => event.event.kind === "assistant.message.final",
+    );
+    const usage = traceEvents.find((event: any) => event.event.kind === "usage");
+    expect(finalMessage?.source.rawEventId).toBeTruthy();
+    expect(finalMessage?.source.rawEventId).toBe(usage?.source.rawEventId);
+
+    cleanup();
+  });
+
+  test("preserves structured Claude stream-json deltas truthfully", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-claude-structured";
+
+    class ClaudeCodeAgentFake {
+      id = "claude-code-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_start",
+            message: { role: "assistant" },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_delta",
+            delta: { text: "claude delta" },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_end",
+            message: { role: "assistant", content: "claude final" },
+            usage: { input_tokens: 4, output_tokens: 2 },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"claude final"}',
+          output: { answer: "claude final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="claude-structured-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new ClaudeCodeAgentFake() as any}
+        >
+          structured claude please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.agentFamily).toBe("claude-code");
+    expect(summary.captureMode).toBe("cli-json-stream");
+    expect(summary.traceCompleteness).toBe("full-observed");
+    expect(
+      traceEvents.some((event: any) => event.event.kind === "message.start"),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.text.delta" &&
+          event.payload.text === "claude delta",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "assistant.message.final",
+      ),
+    ).toBe(true);
+    expect(traceEvents.some((event: any) => event.event.kind === "usage")).toBe(
+      true,
+    );
+    cleanup();
+  });
+
+  test("normalizes real Claude stream-json assistant/result events", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-claude-real-schema";
+
+    class ClaudeCodeAgentRealSchemaFake {
+      id = "claude-code-real-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "system",
+            subtype: "init",
+            session_id: "sess-1",
+            model: "claude-opus-4-6[1m]",
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "assistant",
+            message: {
+              role: "assistant",
+              content: [{ type: "text", text: "claude real final" }],
+              usage: {
+                input_tokens: 3,
+                cache_read_input_tokens: 4,
+                output_tokens: 2,
+              },
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "result",
+            subtype: "success",
+            result: "claude real final",
+            usage: {
+              input_tokens: 3,
+              cache_read_input_tokens: 4,
+              output_tokens: 20,
+            },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"claude real final"}',
+          output: { answer: "claude real final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="claude-real-schema-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new ClaudeCodeAgentRealSchemaFake() as any}
+        >
+          structured claude real schema please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.traceCompleteness).toBe("full-observed");
+    expect(summary.unsupportedEventKinds).not.toContain("artifact.created");
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "message.update" &&
+          event.payload.text === "claude real final",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.text.delta" &&
+          event.payload.text === "claude real final",
+      ),
+    ).toBe(false);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.message.final" &&
+          event.payload.text === "claude real final",
+      ),
+    ).toBe(true);
+    expect(traceEvents.filter((event: any) => event.event.kind === "usage").length).toBe(2);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "usage" &&
+          event.payload.outputTokens === 20,
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.message.final" &&
+          event.source.observed === true &&
+          event.source.rawType === "result",
+      ),
+    ).toBe(true);
+    const assistantUpdate = traceEvents.find(
+      (event: any) =>
+        event.event.kind === "message.update" &&
+        event.source.rawType === "assistant",
+    );
+    const assistantUsage = traceEvents.find(
+      (event: any) =>
+        event.event.kind === "usage" &&
+        event.source.rawType === "assistant",
+    );
+    expect(assistantUpdate?.source.rawEventId).toBeTruthy();
+    expect(assistantUpdate?.source.rawEventId).toBe(
+      assistantUsage?.source.rawEventId,
+    );
+
+    cleanup();
+  });
+
+  test("preserves Claude structured tool lifecycle with shared raw event ids", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-claude-tool-lifecycle";
+
+    class ClaudeToolLifecycleFake {
+      id = "claude-tool-lifecycle-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_call.started",
+            toolCall: { id: "tool-claude-1", name: "read", args: { path: "README.md" } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_call.completed",
+            toolCall: { id: "tool-claude-1", name: "read", result: { ok: true } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_end",
+            message: { role: "assistant", content: "claude tool final" },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"claude tool final"}',
+          output: { answer: "claude tool final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="claude-tool-lifecycle-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new ClaudeToolLifecycleFake() as any}
+        >
+          structured claude tool lifecycle please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    const toolStart = traceEvents.find(
+      (event: any) => event.event.kind === "tool.execution.start",
+    );
+    const toolEnd = traceEvents.find(
+      (event: any) => event.event.kind === "tool.execution.end",
+    );
+
+    expect(toolStart?.payload.toolCallId).toBe("tool-claude-1");
+    expect(toolEnd?.payload.toolCallId).toBe("tool-claude-1");
+    expect(toolStart?.source.rawEventId).not.toBe(toolEnd?.source.rawEventId);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.message.final" &&
+          event.payload.text === "claude tool final",
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("preserves structured Codex completion usage and final message truthfully", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-codex-structured";
+
+    class CodexAgentFake {
+      id = "codex-fake";
+      opts = { outputFormat: "stream-json", json: true };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "assistant_message.delta",
+            delta: { text: "codex delta" },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn.completed",
+            usage: { input_tokens: 8, output_tokens: 3 },
+            message: { role: "assistant", content: "codex final" },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"codex final"}',
+          output: { answer: "codex final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="codex-structured-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new CodexAgentFake() as any}
+        >
+          structured codex please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.agentFamily).toBe("codex");
+    expect(summary.captureMode).toBe("cli-json-stream");
+    expect(summary.traceCompleteness).toBe("full-observed");
+    expect(summary.unsupportedEventKinds).not.toContain("artifact.created");
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.text.delta" &&
+          event.payload.text === "codex delta",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "assistant.message.final",
+      ),
+    ).toBe(true);
+    expect(traceEvents.some((event: any) => event.event.kind === "usage")).toBe(
+      true,
+    );
+    expect(summary.unsupportedEventKinds).not.toContain("assistant.text.delta");
+    const turnUsage = traceEvents.find(
+      (event: any) => event.event.kind === "usage",
+    );
+    const turnEnd = traceEvents.find(
+      (event: any) => event.event.kind === "turn.end",
+    );
+    const finalMessage = traceEvents.find(
+      (event: any) => event.event.kind === "assistant.message.final",
+    );
+    expect(turnUsage?.source.rawEventId).toBeTruthy();
+    expect(turnUsage?.source.rawEventId).toBe(turnEnd?.source.rawEventId);
+    expect(turnUsage?.source.rawEventId).toBe(finalMessage?.source.rawEventId);
+    cleanup();
+  });
+
+  test("treats Codex turn.completed without usage as a terminal structured event", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-codex-turn-completed-no-usage";
+
+    class CodexNoUsageAgentFake {
+      id = "codex-no-usage-fake";
+      opts = { outputFormat: "stream-json", json: true };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn.started",
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn.completed",
+            message: { role: "assistant", content: "codex final without usage" },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"codex final without usage"}',
+          output: { answer: "codex final without usage" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="codex-no-usage-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new CodexNoUsageAgentFake() as any}
+        >
+          codex without usage please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.traceCompleteness).toBe("full-observed");
+    expect(
+      traceEvents.some((event: any) => event.event.kind === "turn.end"),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.message.final" &&
+          event.payload.text === "codex final without usage",
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("preserves Codex structured tool lifecycle from one transcript", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-codex-tool-lifecycle";
+
+    class CodexToolLifecycleFake {
+      id = "codex-tool-lifecycle-fake";
+      opts = { outputFormat: "stream-json", json: true };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_call.started",
+            toolCall: { id: "codex-tool-1", name: "grep", args: { pattern: "trace" } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_call.delta",
+            toolCall: { id: "codex-tool-1", name: "grep", result: { status: "running" } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_call.completed",
+            toolCall: { id: "codex-tool-1", name: "grep", result: { ok: true } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn.completed",
+            message: { role: "assistant", content: "codex tool final" },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"codex tool final"}',
+          output: { answer: "codex tool final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="codex-tool-lifecycle-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new CodexToolLifecycleFake() as any}
+        >
+          codex tool lifecycle please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "tool.execution.start" &&
+          event.payload.toolCallId === "codex-tool-1",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "tool.execution.update" &&
+          event.payload.toolCallId === "codex-tool-1",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "tool.execution.end" &&
+          event.payload.toolCallId === "codex-tool-1",
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("captures real Codex dotted jsonl events as structured trace instead of cli-text fallback", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-codex-dotted-jsonl";
+
+    class CodexJsonlAgentFake {
+      readonly id = "codex-jsonl-fake";
+      readonly opts = { outputFormat: "stream-json" as const, json: true };
+      readonly family = "codex";
+
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "thread.started",
+            thread_id: "thread-1",
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn.started",
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "item.completed",
+            item: {
+              id: "item-1",
+              type: "agent_message",
+              text: "Codex dotted final",
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn.completed",
+            usage: {
+              input_tokens: 10,
+              cached_input_tokens: 2,
+              output_tokens: 4,
+            },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"Codex dotted final"}',
+          output: { answer: "Codex dotted final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="codex-dotted-jsonl-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new CodexJsonlAgentFake() as any}
+        >
+          structured codex dotted jsonl please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.captureMode).toBe("cli-json-stream");
+    expect(summary.traceCompleteness).toBe("full-observed");
+    expect(
+      traceEvents.some((event: any) => event.event.kind === "turn.start"),
+    ).toBe(true);
+    expect(
+      traceEvents.some((event: any) => event.event.kind === "turn.end"),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.message.final" &&
+          event.source.observed === true &&
+          event.payload.text === "Codex dotted final",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "usage" &&
+          event.payload.inputTokens === 10 &&
+          event.payload.cacheReadTokens === 2 &&
+          event.payload.outputTokens === 4,
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("classifies structured Gemini stream-json traces truthfully", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-gemini-structured";
+
+    class GeminiAgentFake {
+      id = "gemini-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "response.output_text.delta",
+            delta: { text: "gemini delta" },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "response.completed",
+            response: {
+              role: "assistant",
+              content: [{ text: "gemini final" }],
+            },
+            usage: { input_tokens: 6, output_tokens: 4 },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"gemini final"}',
+          output: { answer: "gemini final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="gemini-structured-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new GeminiAgentFake() as any}
+        >
+          structured gemini please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.agentFamily).toBe("gemini");
+    expect(summary.captureMode).toBe("cli-json-stream");
+    expect(summary.traceCompleteness).toBe("full-observed");
+    expect(summary.unsupportedEventKinds).not.toContain("assistant.text.delta");
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.text.delta" &&
+          event.payload.text === "gemini delta",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "assistant.message.final",
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("normalizes real Gemini stream-json message/result events", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-gemini-real-schema";
+
+    class GeminiAgentRealSchemaFake {
+      id = "gemini-real-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "init",
+            session_id: "sess-1",
+            model: "gemini-3-flash-preview",
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message",
+            role: "assistant",
+            content: "gemini ",
+            delta: true,
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message",
+            role: "assistant",
+            content: "real final",
+            delta: true,
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "result",
+            status: "success",
+            stats: {
+              input_tokens: 10,
+              output_tokens: 5,
+              total_tokens: 15,
+            },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"gemini real final"}',
+          output: { answer: "gemini real final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="gemini-real-schema-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new GeminiAgentRealSchemaFake() as any}
+        >
+          structured gemini real schema please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.traceCompleteness).toBe("partial-observed");
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.text.delta" &&
+          event.payload.text === "gemini ",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.text.delta" &&
+          event.payload.text === "real final",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.message.final" &&
+          event.payload.text === "gemini real final",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.message.final" &&
+          event.source.observed === false,
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "usage" &&
+          event.payload.inputTokens === 10 &&
+          event.payload.outputTokens === 5,
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("classifies Gemini cli-json coarse output as final-only", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-gemini-final-only";
+
+    class GeminiAgentJsonFake {
+      id = "gemini-json-fake";
+      opts = { outputFormat: "json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            text: "gemini coarse final",
+            stats: { models: { gemini: { tokens: { input: 3, output: 2 } } } },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"gemini coarse final"}',
+          output: { answer: "gemini coarse final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="gemini-final-only-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new GeminiAgentJsonFake() as any}
+        >
+          coarse gemini please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.agentFamily).toBe("gemini");
+    expect(summary.captureMode).toBe("cli-json");
+    expect(summary.traceCompleteness).toBe("final-only");
+    expect(summary.unsupportedEventKinds).toContain("assistant.text.delta");
+    expect(
+      traceEvents.some((event: any) => event.event.kind === "usage"),
+    ).toBe(false);
+
+    cleanup();
+  });
+
+  test("preserves structured Kimi stream-json tool lifecycle truthfully", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-kimi-structured";
+
+    class KimiAgentFake {
+      id = "kimi-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "assistant_message.delta",
+            delta: { text: "kimi delta" },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_execution_start",
+            toolExecution: {
+              id: "tool-1",
+              name: "bash",
+              args: { cmd: "echo hi" },
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_execution_update",
+            toolExecution: {
+              id: "tool-1",
+              name: "bash",
+              result: { status: "running" },
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "tool_execution_end",
+            toolExecution: { id: "tool-1", name: "bash", result: { ok: true } },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "response.completed",
+            response: { role: "assistant", content: [{ text: "kimi final" }] },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"kimi final"}',
+          output: { answer: "kimi final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="kimi-structured-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new KimiAgentFake() as any}
+        >
+          structured kimi please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.agentFamily).toBe("kimi");
+    expect(summary.captureMode).toBe("cli-json-stream");
+    expect(summary.traceCompleteness).toBe("full-observed");
+    expect(summary.unsupportedEventKinds).not.toContain("tool.execution.start");
+    expect(summary.unsupportedEventKinds).not.toContain("tool.execution.update");
+    expect(summary.unsupportedEventKinds).not.toContain("tool.execution.end");
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "assistant.text.delta" &&
+          event.payload.text === "kimi delta",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "tool.execution.start" &&
+          event.payload.toolCallId === "tool-1",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "tool.execution.update",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "tool.execution.end",
+      ),
+    ).toBe(true);
+
+    cleanup();
+  });
+
+  test("sdk traces with smithers-observed tool lifecycle degrade to partial-observed instead of final-only", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-sdk-tools";
+
+    class OpenAIAgentWithToolsFake {
+      id = "openai-sdk-tools-fake";
+      tools = {};
+      async generate() {
+        const startedCall = await runPromise(logToolCallStartEffect("bash"));
+        await runPromise(
+          logToolCallEffect(
+            "bash",
+            { cmd: "echo hi" },
+            { ok: true },
+            "success",
+            undefined,
+            undefined,
+            startedCall?.seq,
+            startedCall?.toolCallId,
+          ),
+        );
+        return {
+          text: '{"answer":"sdk tool final"}',
+          output: { answer: "sdk tool final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="sdk-tool-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new OpenAIAgentWithToolsFake() as any}
+        >
+          sdk tools please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.traceCompleteness).toBe("partial-observed");
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "tool.execution.start",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "tool.execution.end",
+      ),
+    ).toBe(true);
+    const toolStart = traceEvents.find(
+      (event: any) => event.event.kind === "tool.execution.start",
+    );
+    const toolEnd = traceEvents.find(
+      (event: any) => event.event.kind === "tool.execution.end",
+    );
+    expect(toolStart?.payload.toolCallId).toBe("bash:1");
+    expect(toolEnd?.payload.toolCallId).toBe("bash:1");
+    expect(toolStart?.source.rawEventId).toBeTruthy();
+    expect(toolEnd?.source.rawEventId).toBeTruthy();
+    expect(toolStart?.source.rawEventId).not.toBe(toolEnd?.source.rawEventId);
+
+    cleanup();
+  });
+
+  test("classifies truncated structured streams as capture-failed", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-truncated-stream";
+
+    class CodexAgentTruncatedFake {
+      id = "codex-truncated-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "assistant_message.delta",
+            delta: { text: "partial" },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          '{"type":"assistant_message.delta","delta":{"text":"unterminated"}',
+        );
+        throw new Error("subprocess exits early");
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="truncated-structured-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new CodexAgentTruncatedFake() as any}
+          retries={0}
+        >
+          truncated codex please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, { input: {}, runId });
+    expect(result.status).toBe("failed");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.traceCompleteness).toBe("capture-failed");
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "capture.error" &&
+          event.payload.reason === "truncated-json-stream",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "assistant.message.final",
+      ),
+    ).toBe(false);
+
+    cleanup();
+  });
+
+  test("records artifact write failures as capture warnings while durable DB truth remains", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-artifact-write-fail";
+    const tmp = mkdtempSync(join(tmpdir(), "smithers-agent-trace-"));
+    const badLogPath = join(tmp, "not-a-directory");
+    writeFileSync(badLogPath, "occupied");
+
+    class GeminiAgentArtifactFake {
+      id = "gemini-artifact-fake";
+      opts = { outputFormat: "stream-json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(
+          JSON.stringify({
+            type: "response.output_text.delta",
+            delta: { text: "artifact delta" },
+          }) + "\n",
+        );
+        args.onStdout?.(
+          JSON.stringify({
+            type: "response.completed",
+            response: {
+              role: "assistant",
+              content: [{ text: "artifact final" }],
+            },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"artifact final"}',
+          output: { answer: "artifact final" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="artifact-write-fail-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new GeminiAgentArtifactFake() as any}
+        >
+          artifact write failure please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, {
+      input: {},
+      runId,
+      logDir: badLogPath,
+    });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.traceCompleteness).toBe("partial-observed");
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "capture.warning" &&
+          event.payload.reason === "artifact-write-failed",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "assistant.message.final",
+      ),
+    ).toBe(true);
+    expect(
+      traceEvents.some(
+        (event: any) => event.event.kind === "artifact.created",
+      ),
+    ).toBe(false);
+
+    cleanup();
+  });
+
+  test("records artifact rewrite failures as capture warnings without failing the run", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-artifact-rewrite-fail";
+    const originalRewriteNdjson = (AgentTraceCollector.prototype as any).rewriteNdjson;
+    (AgentTraceCollector.prototype as any).rewriteNdjson = async function () {
+      throw new Error("rewrite exploded");
+    };
+
+    class PiAgentRewriteFake {
+      id = "pi-rewrite-fake";
+      opts = { mode: "json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(JSON.stringify({ type: "session", id: "sess-1" }) + "\n");
+        args.onStdout?.(JSON.stringify({ type: "turn_start" }) + "\n");
+        args.onStdout?.(
+          JSON.stringify({
+            type: "message_end",
+            message: {
+              role: "assistant",
+              content: [{ type: "text", text: "rewrite final" }],
+            },
+          }) + "\n",
+        );
+        return {
+          text: '{"answer":"rewrite final"}',
+          output: { answer: "rewrite final" },
+        };
+      }
+    }
+
+    try {
+      const workflow = smithers(() => (
+        <Workflow name="artifact-rewrite-fail-trace">
+          <Task
+            id="task"
+            output={outputs.result}
+            agent={new PiAgentRewriteFake() as any}
+          >
+            rewrite fail please
+          </Task>
+        </Workflow>
+      ));
+
+      const result = await runWorkflow(workflow, { input: {}, runId });
+      expect(result.status).toBe("finished");
+
+      const events = await listRunEvents(db, runId);
+      const summary = JSON.parse(
+        events.find((event: any) => event.type === "AgentTraceSummary")!
+          .payloadJson,
+      ).summary;
+      const traceEvents = events
+        .filter((event: any) => event.type === "AgentTraceEvent")
+        .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+      expect(summary.traceCompleteness).toBe("partial-observed");
+      expect(
+        traceEvents.some(
+          (event: any) =>
+            event.event.kind === "capture.warning" &&
+            event.payload.reason === "artifact-rewrite-failed",
+        ),
+      ).toBe(true);
+      expect(
+        traceEvents.some(
+          (event: any) => event.event.kind === "artifact.created",
+        ),
+      ).toBe(true);
+    } finally {
+      (AgentTraceCollector.prototype as any).rewriteNdjson = originalRewriteNdjson;
+      cleanup();
+    }
+  });
+
+  test("records persisted trace artifacts with a canonical artifact.created event", async () => {
+    const { smithers, outputs, db, cleanup } = createTestSmithers({
+      result: z.object({ answer: z.string() }),
+    });
+    const runId = "agent-trace-artifact-created";
+    const logDir = mkdtempSync(join(tmpdir(), "smithers-agent-trace-artifact-"));
+
+    class PiArtifactAgentFake {
+      id = "pi-artifact-fake";
+      opts = { mode: "json" };
+      async generate(args: { onStdout?: (text: string) => void }) {
+        args.onStdout?.(JSON.stringify({ type: "session", id: "sess-1" }) + "\n");
+        args.onStdout?.(
+          JSON.stringify({
+            type: "turn_end",
+            message: {
+              role: "assistant",
+              content: [{ type: "text", text: "artifact ok" }],
+            },
+          }) + "\n",
+        );
+        args.onStdout?.(JSON.stringify({ type: "agent_end" }) + "\n");
+        return {
+          text: '{"answer":"artifact ok"}',
+          output: { answer: "artifact ok" },
+        };
+      }
+    }
+
+    const workflow = smithers(() => (
+      <Workflow name="artifact-created-trace">
+        <Task
+          id="task"
+          output={outputs.result}
+          agent={new PiArtifactAgentFake() as any}
+        >
+          artifact created please
+        </Task>
+      </Workflow>
+    ));
+
+    const result = await runWorkflow(workflow, {
+      input: {},
+      runId,
+      logDir,
+    });
+    expect(result.status).toBe("finished");
+
+    const events = await listRunEvents(db, runId);
+    const summary = JSON.parse(
+      events.find((event: any) => event.type === "AgentTraceSummary")!
+        .payloadJson,
+    ).summary;
+    const traceEvents = events
+      .filter((event: any) => event.type === "AgentTraceEvent")
+      .map((event: any) => JSON.parse(event.payloadJson).trace);
+
+    expect(summary.rawArtifactRefs).toHaveLength(1);
+    expect(
+      traceEvents.some(
+        (event: any) =>
+          event.event.kind === "artifact.created" &&
+          event.payload.artifactPath === summary.rawArtifactRefs[0],
+      ),
+    ).toBe(true);
+    const artifactText = readFileSync(summary.rawArtifactRefs[0], "utf8");
+    expect(artifactText).toContain('"assistant.message.final"');
+    expect(artifactText).toContain('"artifact.created"');
+    expect(artifactText).toContain(summary.rawArtifactRefs[0]);
+
+    cleanup();
+  });
+
+  test("shapes OTEL log records with canonical query attributes and redacted body", () => {
+    const record = canonicalTraceEventToOtelLogRecord(
+      {
+        traceVersion: "1",
+        runId: "run-123",
+        workflowPath: "workflows/demo.tsx",
+        workflowHash: "abc123",
+        nodeId: "pi-rich-trace",
+        iteration: 0,
+        attempt: 1,
+        timestampMs: Date.now(),
+        event: {
+          sequence: 7,
+          kind: "assistant.thinking.delta",
+          phase: "message",
+        },
+        source: {
+          agentFamily: "pi",
+          captureMode: "cli-json-stream",
+          rawType: "thinking_delta",
+          rawEventId: "thinking_delta:0",
+          observed: true,
+        },
+        traceCompleteness: "full-observed",
+        payload: { text: "thought [REDACTED_SECRET]" },
+        raw: { text: "thought [REDACTED_SECRET]" },
+        redaction: { applied: true, ruleIds: ["secret-ish"] },
+        annotations: { "custom.demo": true, ticket: "OBS-1" },
+      },
+      {
+        agentId: "pi-agent",
+        model: "gpt-5.4",
+      },
+    );
+
+    expect(record.severity).toBe("INFO");
+    expect(record.attributes["run.id"]).toBe("run-123");
+    expect(record.attributes["node.id"]).toBe("pi-rich-trace");
+    expect(record.attributes["node.attempt"]).toBe(1);
+    expect(record.attributes["event.kind"]).toBe("assistant.thinking.delta");
+    expect(record.attributes["source.raw_event_id"]).toBe("thinking_delta:0");
+    expect(record.attributes["custom.ticket"]).toBe("OBS-1");
+    expect(record.body).toContain("REDACTED_SECRET");
+    expect(record.body).not.toContain("sk_live_");
+  });
+});
diff --git a/workflows/agent-trace-otel-demo.tsx b/workflows/agent-trace-otel-demo.tsx
new file mode 100644
index 00000000..92d23e1e
--- /dev/null
+++ b/workflows/agent-trace-otel-demo.tsx
@@ -0,0 +1,315 @@
+/** @jsxImportSource smithers */
+import { runPromise } from "../src/effect/runtime";
+import { logToolCallEffect, logToolCallStartEffect } from "../src/tools/logToolCall";
+import { createSmithers, Task, Workflow } from "../src";
+import { z } from "zod";
+
+const PiOutput = z.object({
+  answer: z.string(),
+});
+
+const ClaudeOutput = z.object({
+  answer: z.string(),
+});
+
+const CodexOutput = z.object({
+  answer: z.string(),
+});
+
+const GeminiOutput = z.object({
+  answer: z.string(),
+});
+
+const SdkOutput = z.object({
+  answer: z.string(),
+});
+
+const { smithers, outputs } = createSmithers(
+  {
+    piResult: PiOutput,
+    claudeResult: ClaudeOutput,
+    codexResult: CodexOutput,
+    geminiResult: GeminiOutput,
+    sdkResult: SdkOutput,
+  },
+  {
+    dbPath: "./workflows/agent-trace-otel-demo.db",
+    journalMode: "DELETE",
+  },
+);
+
+function createPiDemoAgent(failureMode?: string) {
+  return {
+    id: failureMode ? `pi-observability-demo-${failureMode}` : "pi-observability-demo",
+    model: "demo-pi",
+    opts: { mode: "json" },
+    generate: async (args: { onStdout?: (text: string) => void }) => {
+      if (failureMode === "malformed-json") {
+        args.onStdout?.("{not valid json}\n");
+        throw new Error("demo malformed-json failure");
+      }
+      args.onStdout?.(JSON.stringify({ type: "session", id: "demo-session" }) + "\n");
+      args.onStdout?.(JSON.stringify({ type: "turn_start", id: "turn-1" }) + "\n");
+      args.onStdout?.(
+        JSON.stringify({
+          type: "message_start",
+          message: { role: "assistant", content: [] },
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "message_update",
+          assistantMessageEvent: {
+            type: "thinking_delta",
+            delta: "Planning demo work with token=sk_demo_secret_123456789",
+          },
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "message_update",
+          assistantMessageEvent: {
+            type: "text_delta",
+            delta: "Working through the repository. ",
+          },
+        }) + "\n",
+      );
+      const seq = await runPromise(logToolCallStartEffect("bash"));
+      await runPromise(
+        logToolCallEffect(
+          "bash",
+          { command: "echo sk_live_secret_123456789" },
+          { ok: true, stdout: "done" },
+          "success",
+          undefined,
+          undefined,
+          typeof seq === "number" ? seq : undefined,
+        ),
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "tool_execution_start",
+          toolExecution: {
+            id: "tool-1",
+            name: "bash",
+            args: { command: "echo hi" },
+          },
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "tool_execution_update",
+          toolExecution: {
+            id: "tool-1",
+            name: "bash",
+            result: { status: "running" },
+          },
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "tool_execution_end",
+          toolExecution: {
+            id: "tool-1",
+            name: "bash",
+            result: { ok: true },
+          },
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "turn_end",
+          message: {
+            role: "assistant",
+            content: [{ type: "text", text: "Demo PI answer" }],
+            usage: { inputTokens: 11, outputTokens: 7 },
+          },
+        }) + "\n",
+      );
+      args.onStdout?.(JSON.stringify({ type: "agent_end", id: "demo-session" }) + "\n");
+      return {
+        text: '{"answer":"Demo PI answer"}',
+        output: { answer: "Demo PI answer" },
+        usage: { inputTokens: 11, outputTokens: 7 },
+      };
+    },
+  } as any;
+}
+
+function createClaudeDemoAgent() {
+  return {
+    id: "claude-observability-demo",
+    model: "demo-claude",
+    opts: { outputFormat: "stream-json" },
+    generate: async (args: { onStdout?: (text: string) => void }) => {
+      args.onStdout?.(
+        JSON.stringify({
+          type: "system",
+          subtype: "init",
+          session_id: "claude-demo-session",
+          model: "claude-opus-demo",
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "assistant",
+          message: {
+            role: "assistant",
+            content: [{ type: "text", text: "Claude demo answer" }],
+            usage: {
+              input_tokens: 13,
+              cache_read_input_tokens: 2,
+              output_tokens: 5,
+            },
+          },
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "result",
+          subtype: "success",
+          result: "Claude demo answer",
+          usage: {
+            input_tokens: 13,
+            cache_read_input_tokens: 2,
+            output_tokens: 5,
+          },
+        }) + "\n",
+      );
+      return {
+        text: '{"answer":"Claude demo answer"}',
+        output: { answer: "Claude demo answer" },
+      };
+    },
+  } as any;
+}
+
+function createGeminiDemoAgent() {
+  return {
+    id: "gemini-observability-demo",
+    model: "demo-gemini",
+    opts: { outputFormat: "stream-json" },
+    generate: async (args: { onStdout?: (text: string) => void }) => {
+      args.onStdout?.(
+        JSON.stringify({
+          type: "init",
+          session_id: "gemini-demo-session",
+          model: "gemini-demo",
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "message",
+          role: "assistant",
+          content: "Gemini ",
+          delta: true,
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "message",
+          role: "assistant",
+          content: "demo answer",
+          delta: true,
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "result",
+          status: "success",
+          stats: {
+            input_tokens: 9,
+            output_tokens: 4,
+            total_tokens: 13,
+          },
+        }) + "\n",
+      );
+      return {
+        text: '{"answer":"Gemini demo answer"}',
+        output: { answer: "Gemini demo answer" },
+      };
+    },
+  } as any;
+}
+
+function createCodexDemoAgent() {
+  return {
+    id: "codex-observability-demo",
+    model: "demo-codex",
+    opts: { outputFormat: "stream-json", json: true },
+    generate: async (args: { onStdout?: (text: string) => void }) => {
+      args.onStdout?.(
+        JSON.stringify({
+          type: "assistant_message.delta",
+          delta: { text: "Codex demo answer" },
+        }) + "\n",
+      );
+      args.onStdout?.(
+        JSON.stringify({
+          type: "turn.completed",
+          usage: { input_tokens: 12, output_tokens: 6 },
+          message: { role: "assistant", content: "Codex demo answer" },
+        }) + "\n",
+      );
+      return {
+        text: '{"answer":"Codex demo answer"}',
+        output: { answer: "Codex demo answer" },
+      };
+    },
+  } as any;
+}
+
+class OpenAIAgentDemo {
+  id = "openai-demo-agent";
+  model = "demo-sdk";
+  tools = {};
+
+  async generate() {
+    return {
+      text: '{"answer":"Demo SDK answer"}',
+      output: { answer: "Demo SDK answer" },
+      usage: { inputTokens: 5, outputTokens: 3 },
+    };
+  }
+}
+
+export default smithers((ctx) => {
+  const failureMode = typeof ctx.input.failureMode === "string" ? ctx.input.failureMode : undefined;
+  const piAgent = createPiDemoAgent(failureMode);
+
+  return (
+    <Workflow name="agent-trace-otel-demo">
+      <Task id="pi-rich-trace" output={outputs.piResult} agent={piAgent} retries={0}>
+        {`Emit a PI-like trace for observability verification. failureMode=${failureMode ?? "none"}`}
+      </Task>
+      {!failureMode && (
+        <>
+          <Task
+            id="claude-structured-trace"
+            output={outputs.claudeResult}
+            agent={createClaudeDemoAgent()}
+          >
+            {`Emit a Claude-like structured trace after the PI demo task completes.`}
+          </Task>
+          <Task
+            id="gemini-structured-trace"
+            output={outputs.geminiResult}
+            agent={createGeminiDemoAgent()}
+          >
+            {`Emit a Gemini-like structured trace after the Claude demo task completes.`}
+          </Task>
+          <Task
+            id="codex-structured-trace"
+            output={outputs.codexResult}
+            agent={createCodexDemoAgent()}
+          >
+            {`Emit a Codex-like structured trace after the Gemini demo task completes.`}
+          </Task>
+          <Task id="sdk-final-only" output={outputs.sdkResult} agent={new OpenAIAgentDemo() as any}>
+            {`Return a final-only SDK response after the structured PI, Claude, Gemini, and Codex demo tasks complete.`}
+          </Task>
+        </>
+      )}
+    </Workflow>
+  );
+});
diff --git a/workflows/agent-trace-otel-loop.tsx b/workflows/agent-trace-otel-loop.tsx
new file mode 100644
index 00000000..3182f85d
--- /dev/null
+++ b/workflows/agent-trace-otel-loop.tsx
@@ -0,0 +1,451 @@
+/** @jsxImportSource smithers */
+import { spawnSync } from "node:child_process";
+import {
+  createSmithers,
+  PiAgent,
+  Workflow,
+  Task,
+  Sequence,
+  Ralph,
+} from "smithers";
+import { z } from "zod";
+
+const DEFAULT_VALIDATION_COMMANDS = [
+  "bun run typecheck",
+  "bun run test",
+];
+const DEFAULT_PI_PROVIDER = "openai-codex";
+const DEFAULT_PI_MODEL = "gpt-5.4";
+
+const ImplementOutput = z.object({
+  summary: z.string(),
+  filesChanged: z.array(z.string()).default([]),
+  notableDecisions: z.array(z.string()).default([]),
+  remainingRisks: z.array(z.string()).default([]),
+});
+
+const ValidateOutput = z.object({
+  allPassed: z.boolean(),
+  summary: z.string(),
+  gitStatus: z.string(),
+  gitDiffStat: z.string(),
+  results: z.array(
+    z.object({
+      command: z.string(),
+      exitCode: z.number().int(),
+      passed: z.boolean(),
+      stdout: z.string(),
+      stderr: z.string(),
+    }),
+  ),
+});
+
+const ReviewOutput = z.object({
+  approved: z.boolean(),
+  summary: z.string(),
+  blockingFeedback: z.array(z.string()).default([]),
+  strengths: z.array(z.string()).default([]),
+  nextActions: z.array(z.string()).default([]),
+});
+
+const FinalOutput = z.object({
+  approved: z.boolean(),
+  reviewRounds: z.number().int().min(1),
+  implementationSummary: z.string(),
+  validationSummary: z.string(),
+  reviewSummary: z.string(),
+  blockingFeedback: z.array(z.string()).default([]),
+});
+
+const { smithers, outputs } = createSmithers(
+  {
+    implement: ImplementOutput,
+    validate: ValidateOutput,
+    review: ReviewOutput,
+    finalReport: FinalOutput,
+  },
+  {
+    dbPath: "./workflows/agent-trace-otel-loop.db",
+    journalMode: "DELETE",
+  },
+);
+
+function createImplementerAgent(provider: string, model: string) {
+  return new PiAgent({
+    provider,
+    model,
+    mode: "rpc",
+    yolo: true,
+    timeoutMs: 45 * 60 * 1000,
+    idleTimeoutMs: 5 * 60 * 1000,
+    thinking: "high",
+    appendSystemPrompt: [
+      "You are the implementation agent for Smithers.",
+      "Work directly in the current repository.",
+      "Read the specification before changing code.",
+      "You must make the smallest coherent set of changes that satisfies the specification.",
+      "Do not claim verification you did not actually obtain.",
+      "Return exact changed file paths and a concise risk summary.",
+    ].join("\n"),
+  });
+}
+
+function createReviewerAgent(provider: string, model: string) {
+  return new PiAgent({
+    provider,
+    model,
+    mode: "rpc",
+    yolo: true,
+    timeoutMs: 30 * 60 * 1000,
+    idleTimeoutMs: 5 * 60 * 1000,
+    thinking: "high",
+    appendSystemPrompt: [
+      "You are an extremely strict implementation reviewer.",
+      "Your job is to reject weak, incomplete, under-verified, or cosmetically-correct changes.",
+      "Be direct and unsentimental.",
+      "Approve only if the implementation clearly satisfies the specification and the deterministic validation evidence is strong.",
+      "If validation is weak, incomplete, or missing, that is blocking feedback.",
+      "If logs/traces/verification claims are hand-wavy, that is blocking feedback.",
+      "Prefer a short list of high-severity blocking issues over a long list of minor nits.",
+    ].join("\n"),
+  });
+}
+
+function asString(value: unknown, fallback = ""): string {
+  return typeof value === "string" ? value : fallback;
+}
+
+function asStringArray(value: unknown): string[] {
+  return Array.isArray(value) ? value.filter((v): v is string => typeof v === "string") : [];
+}
+
+function resolveValidationCommands(input: Record<string, unknown>): string[] {
+  const commands = input.validationCommands;
+  const parsed = asStringArray(commands);
+  return parsed.length > 0 ? parsed : DEFAULT_VALIDATION_COMMANDS;
+}
+
+function runShell(command: string) {
+  const result = spawnSync(command, {
+    shell: true,
+    cwd: process.cwd(),
+    env: process.env,
+    encoding: "utf8",
+    maxBuffer: 10 * 1024 * 1024,
+  });
+  return {
+    command,
+    exitCode: result.status ?? (result.error ? 1 : 0),
+    passed: (result.status ?? 0) === 0 && !result.error,
+    stdout: (result.stdout ?? "").trim(),
+    stderr: [result.stderr ?? "", result.error?.message ?? ""].filter(Boolean).join("\n").trim(),
+  };
+}
+
+export default smithers((ctx) => {
+  const latestImplement = ctx.latest("implement", "implement");
+  const latestValidate = ctx.latest("validate", "validate");
+  const latestReview = ctx.latest("review", "review");
+
+  const approved = latestReview?.approved === true;
+  const piProvider = asString(
+    (ctx.input as Record<string, unknown>).piProvider,
+    DEFAULT_PI_PROVIDER,
+  );
+  const piModel = asString(
+    (ctx.input as Record<string, unknown>).piModel,
+    DEFAULT_PI_MODEL,
+  );
+  const implementer = createImplementerAgent(piProvider, piModel);
+  const reviewer = createReviewerAgent(piProvider, piModel);
+  const validationCommands = resolveValidationCommands(ctx.input as Record<string, unknown>);
+  const branchName = asString((ctx.input as Record<string, unknown>).branchName, "feat/agent-trace-otel-logs");
+  const specPath = asString(
+    (ctx.input as Record<string, unknown>).specPath,
+    "docs/concepts/agent-trace-otel-logs-spec.mdx",
+  );
+  const composePath = asString(
+    (ctx.input as Record<string, unknown>).composePath,
+    "observability/docker-compose.otel.yml",
+  );
+
+  return (
+    <Workflow name="agent-trace-otel-loop">
+      <Sequence>
+        <Ralph
+          id="implement-review-loop"
+          until={approved}
+          maxIterations={5}
+          onMaxReached="return-last"
+        >
+          <Sequence>
+            <Task id="implement" output={outputs.implement} agent={implementer}>
+              {`Read the spec first: ${specPath}. Treat it as the source of truth. The work is not “add some logs.” The work is:
+
+1. define and implement a canonical agent trace model
+2. capture the fullest observable trace per agent family
+3. export those canonical trace events as OTEL logs
+4. verify end-to-end in Docker with Loki
+5. prove correctness using the verification section in the spec
+
+Also note the current local stack in ${composePath} does not include Loki yet. It only starts collector, Prometheus, Tempo, and Grafana. Adding Loki and a collector logs pipeline is part of the assignment.
+
+What He Should Understand Before Coding
+Read these first:
+
+- ${specPath}
+- ${composePath}
+- observability/otel-collector-config.yml
+- docs/guides/monitoring-logs.mdx
+- docs/runtime/events.mdx
+- docs/integrations/pi-integration.mdx
+- docs/integrations/cli-agents.mdx
+
+Then inspect the code paths that matter:
+
+- src/agents/PiAgent.ts
+- src/agents/BaseCliAgent.ts
+- src/events.ts
+- src/effect/logging.ts
+- src/observability/index.ts
+
+Expected Deliverable
+He should not stop at implementation. He should hand back:
+
+- code
+- updated Docker observability stack with Loki
+- tests
+- a reproducible verification procedure
+- sample queries proving the system works
+
+Required Verification Environment
+He should use Docker for the local stack:
+
+smithers observability --detach
+
+That CLI currently starts the stack from src/cli/index.ts:876, but he will need to extend it so the stack includes Loki too.
+
+Target local endpoints after his changes should be something like:
+
+- Grafana
+- Prometheus
+- Tempo
+- Loki
+- OTEL collector
+
+He should document exact ports if he changes them.
+
+How He Should Verify
+Use the spec’s verification classes as the acceptance gate. In practice, he should prove all of this:
+
+1. Canonical trace capture works
+   Run a workflow with at least one agent that has a rich trace surface, preferably PiAgent.
+   He must show:
+   - assistant text deltas
+   - visible thinking deltas if Pi emits them
+   - tool execution start/update/end
+   - final assistant message
+   - run/node/attempt correlation fields
+2. OTEL logs are really exported
+   Not just local files. He must prove the canonical trace events land in Loki or another OTEL log backend through the collector.
+3. Logs are queryable
+   In Grafana/Loki he must show queries that answer:
+   - all events for one run.id
+   - one node.id + attempt
+   - only assistant.thinking.delta
+   - only tool.execution.*
+   - only capture.error
+4. Current metrics/traces still work
+   He must ensure adding logs does not break existing Prometheus/Tempo behavior.
+5. Failure cases are classified
+   He should simulate at least:
+   - malformed upstream JSON
+   - collector unavailable
+   - subprocess exits early
+   - redaction case
+   and show the system reports capture.error, partial-observed, or capture-failed correctly.
+6. Redaction works
+   He must include at least one fixture with a secret-like token and show it is absent from:
+   - canonical persisted payload
+   - OTEL log body
+   - artifacts if artifacts are enabled
+
+Recommended Test Matrix
+Tell him to verify at least this matrix:
+
+- PiAgent:
+    - rich trace success case
+    - failure case
+    - redaction case
+- one structured CLI agent:
+    - CodexAgent or ClaudeCodeAgent
+    - prove structured events are preserved if available
+    - otherwise prove truthful partial classification
+- one SDK agent:
+    - OpenAIAgent or AnthropicAgent
+    - prove partial/final-only capture is explicit, not faked
+
+Suggested Demo Workflow
+He should create or reuse one simple workflow specifically for observability verification:
+
+- one task using PiAgent
+- one task using a second agent family
+- at least one tool invocation
+- stable run annotations
+- easy-to-query workflow path
+
+Then run it with OTEL enabled, for example:
+
+export SMITHERS_OTEL_ENABLED=1
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+export OTEL_SERVICE_NAME=smithers-dev
+
+What “Done” Means
+Use this language with him:
+
+Done means:
+
+- Loki is part of the local Docker stack
+- the collector has a real logs pipeline
+- canonical agent trace events exist
+- at least Pi is high-fidelity per spec
+- other agent families are truthfully classified
+- there are automated tests for schema, ordering, fidelity, completeness, export, redaction, and failure handling
+- there is a written verification section with commands and Grafana/Loki queries
+- a reviewer can reproduce the verification from a clean checkout
+
+What He Should Return
+Ask him to return:
+
+- the branch
+- a short “verification report” with:
+    - commands run
+    - workflow used
+    - exact Grafana/Loki queries
+    - screenshots or copied query results
+    - which verification classes from the spec passed
+    - any explicitly deferred items
+
+Execution context:
+- branch: ${branchName}
+- PI provider/model: ${piProvider}/${piModel}
+${latestReview ? `- Previous review summary: ${latestReview.summary}` : "- This is the first implementation pass."}
+${latestReview?.blockingFeedback?.length ? `- Blocking feedback to address:\n${latestReview.blockingFeedback.map((item: string) => `  - ${item}`).join("\n")}` : ""}
+${latestValidate ? `- Latest deterministic validation summary: ${latestValidate.summary}` : "- No deterministic validation results exist yet."}
+
+Return JSON only, with:
+- summary
+- filesChanged
+- notableDecisions
+- remainingRisks`}
+            </Task>
+
+            <Task id="validate" output={outputs.validate}>
+              {() => {
+                const results = validationCommands.map(runShell);
+                const allPassed = results.every((result) => result.passed);
+                const gitStatus = runShell("git status --short").stdout;
+                const gitDiffStat = runShell("git diff --stat").stdout;
+                const failed = results.filter((result) => !result.passed);
+
+                return {
+                  allPassed,
+                  summary: allPassed
+                    ? `All deterministic validation commands passed (${results.length} commands).`
+                    : `${failed.length} of ${results.length} deterministic validation commands failed.`,
+                  gitStatus,
+                  gitDiffStat,
+                  results,
+                };
+              }}
+            </Task>
+
+            <Task id="review" output={outputs.review} agent={reviewer}>
+              {`Review the implementation brutally.
+
+This is a Smithers implementation-review loop for the OTEL agent trace logging work.
+
+Primary brief files:
+- ${specPath}
+- ${composePath}
+- PI provider/model under review: ${piProvider}/${piModel}
+
+You are reviewing the current repository state, not an abstract plan.
+
+Implementation agent report:
+${latestImplement ? JSON.stringify(latestImplement, null, 2) : "No implementation report was produced."}
+
+Deterministic validation report:
+${latestValidate ? JSON.stringify(latestValidate, null, 2) : "No validation report was produced."}
+
+Your role:
+- You are the backpressure mechanism.
+- You are not a collaborator helping the implementer feel good.
+- You are the reviewer responsible for preventing weak, premature, under-verified changes from passing.
+- You must assume the implementer will take shortcuts unless the evidence proves otherwise.
+
+Verification duties:
+- Audit whether the implementation actually satisfies the verification section of the spec.
+- Require evidence, not intent.
+- Treat missing end-to-end verification as a blocking failure.
+- Treat unverified Docker + Loki + collector integration as a blocking failure.
+- Treat absent or weak query examples as a blocking failure.
+- Treat missing failure-mode validation as a blocking failure.
+- Treat redaction claims without proof as a blocking failure.
+- Treat “it should work” or “it is wired” language as evidence of incompleteness.
+
+Smithers coding-style duties:
+- Enforce repository-local conventions over generic agent habits.
+- Reject unnecessary abstraction, speculative generalization, and architecture without pressure.
+- Reject code that hides truthfulness about agent fidelity.
+- Reject code that collapses raw event boundaries into summaries when the spec requires preservation.
+- Reject code that uses high-cardinality labels for large bodies.
+- Reject implementation that weakens durable local truth in favor of OTEL-only export.
+- Reject code that does not fit the existing Smithers runtime, observability, and event model cleanly.
+- Reject hand-wavy docs that are not reproducible from a clean checkout.
+
+Specific rejection criteria:
+- Reject any vague or partial implementation.
+- Reject if the OTEL logs path is not actually verifiable with Docker + Loki + Grafana.
+- Reject if Pi trace capture is not clearly higher fidelity than the other agents.
+- Reject if unsupported agent capabilities are hand-waved instead of truthfully classified.
+- Reject if deterministic validation is weak, missing, or ignored.
+- Reject if the work appears to optimize appearance over correctness.
+- Reject if tests cover only the happy path.
+- Reject if the coworker handoff is missing commands, queries, or exact verification steps.
+
+Approval standard:
+- Approve only if the implementation appears specification-complete, verification-complete, reproducible, and operationally believable.
+- If there is any meaningful unresolved risk, set approved=false.
+- Default to rejection unless the evidence is boringly strong.
+
+Feedback style:
+- Be harsh, concise, and specific.
+- Prioritize the highest-severity blockers.
+- Do not soften critique.
+- Do not pad with praise unless it is genuinely earned and still useful.
+
+Return JSON only, with:
+- approved
+- summary
+- blockingFeedback
+- strengths
+- nextActions`}
+            </Task>
+          </Sequence>
+        </Ralph>
+
+        <Task id="final-report" output={outputs.finalReport}>
+          {() => ({
+            approved: latestReview?.approved === true,
+            reviewRounds: ctx.iterationCount("review", "review"),
+            implementationSummary: latestImplement?.summary ?? "No implementation summary available.",
+            validationSummary: latestValidate?.summary ?? "No validation summary available.",
+            reviewSummary: latestReview?.summary ?? "No review summary available.",
+            blockingFeedback: latestReview?.blockingFeedback ?? [],
+          })}
+        </Task>
+      </Sequence>
+    </Workflow>
+  );
+});
diff --git a/workflows/real-agent-trace-check.tsx b/workflows/real-agent-trace-check.tsx
new file mode 100644
index 00000000..686b9baa
--- /dev/null
+++ b/workflows/real-agent-trace-check.tsx
@@ -0,0 +1,63 @@
+/** @jsxImportSource smithers */
+import { createSmithers, Task, Workflow } from "../src";
+import { ClaudeCodeAgent } from "../src/agents/ClaudeCodeAgent";
+import { AmpAgent } from "../src/agents/AmpAgent";
+import { GeminiAgent } from "../src/agents/GeminiAgent";
+import { z } from "zod";
+
+const Output = z.object({
+  answer: z.string(),
+});
+
+const { smithers, outputs } = createSmithers(
+  {
+    result: Output,
+  },
+  {
+    dbPath: "./workflows/real-agent-trace-check.db",
+    journalMode: "DELETE",
+  },
+);
+
+function buildAgent(kind: string) {
+  switch (kind) {
+    case "claude":
+      return new ClaudeCodeAgent({
+        yolo: true,
+        outputFormat: "stream-json",
+        maxOutputBytes: 512 * 1024,
+      });
+    case "amp":
+      return new AmpAgent({
+        yolo: true,
+        maxOutputBytes: 512 * 1024,
+      });
+    case "gemini":
+      return new GeminiAgent({
+        approvalMode: "yolo",
+        outputFormat: "stream-json",
+        maxOutputBytes: 512 * 1024,
+      });
+    default:
+      throw new Error(`Unsupported agent kind: ${kind}`);
+  }
+}
+
+export default smithers((ctx) => {
+  const agentKind =
+    typeof ctx.input.agent === "string" ? ctx.input.agent : "claude";
+
+  return (
+    <Workflow name="real-agent-trace-check">
+      <Task
+        id={`${agentKind}-trace`}
+        output={outputs.result}
+        agent={buildAgent(agentKind)}
+        retries={0}
+        timeoutMs={120000}
+      >
+        {`Respond briefly. State which agent you are, then end with JSON matching the required schema.`}
+      </Task>
+    </Workflow>
+  );
+});