From 79e3873150c07e49ca45e93d8fa7494f0f1ad952 Mon Sep 17 00:00:00 2001
From: Samuel Huber <samuel@dtech.vision>
Date: Thu, 26 Mar 2026 15:53:22 +0100
Subject: [PATCH 1/7] Specify agent trace OTEL log export

---
 docs/concepts/agent-trace-otel-logs-spec.mdx | 1063 ++++++++++++++++++
 1 file changed, 1063 insertions(+)
 create mode 100644 docs/concepts/agent-trace-otel-logs-spec.mdx

diff --git a/docs/concepts/agent-trace-otel-logs-spec.mdx b/docs/concepts/agent-trace-otel-logs-spec.mdx
new file mode 100644
index 00000000..12307b6b
--- /dev/null
+++ b/docs/concepts/agent-trace-otel-logs-spec.mdx
@@ -0,0 +1,1063 @@
+---
+title: Agent Trace OTEL Logs Specification
+description: Full-fidelity specification for capturing agent-visible execution traces and exporting them as OpenTelemetry logs to Loki or any OTLP-compatible backend.
+---
+
+This document specifies how Smithers must capture, normalize, persist, export, and verify agent execution traces as OpenTelemetry logs.
+
+This is a design specification, not an implementation sketch. Every requirement in this document is normative unless explicitly marked as non-normative.
+
+## Status
+
+- Intended scope: new observability surface for agent trace logs
+- Intended audience: maintainers implementing runtime, agent, observability, and verification changes
+- Intended outcome: a system where every supported agent run produces a complete, queryable, correlated trace of what Smithers could observe
+
+## Problem Statement
+
+Smithers currently captures:
+
+- durable workflow lifecycle events
+- structured application logs
+- traces and metrics for runtime behavior
+- partial agent output in some cases
+
+Smithers does not currently guarantee a full-fidelity record of agent-visible execution behavior across all agent integrations.
+
+In particular:
+
+- `PiAgent` exposes a rich event stream, but Smithers currently collapses it to final text plus usage
+- several CLI agents emit machine-readable output that Smithers does not preserve as first-class trace events
+- SDK-based agents return final results and rely on Smithers-side tool logging, but do not provide a canonical agent trace model
+- there is no OTEL logs pipeline in the local collector configuration
+
+The result is that operators cannot reliably answer questions such as:
+
+- What did the agent stream before it failed?
+- Which tools did the agent invoke, in what order, with which visible arguments and results?
+- Did the agent emit visible thinking content, compaction events, retries, or queued follow-up behavior?
+- Can we reconstruct exactly what Smithers observed for a given run, node, and attempt?
+- Can we query this in Grafana Loki or another OTLP log backend with stable run-level correlation?
+
+This specification addresses that gap.
+
+## Goals
+
+The system defined here MUST:
+
+- capture the fullest agent-visible trace Smithers can obtain for each supported agent
+- export that trace as OTEL logs to Loki or any OTLP-compatible log backend
+- preserve run correlation through stable attributes such as `run.id`, `workflow.path`, `node.id`, `attempt`, and `iteration`
+- preserve raw trace fidelity without forcing operators to infer behavior from summary logs
+- remain explicit about what was directly observed versus what was derived by Smithers
+- provide deterministic verification criteria for correctness and task completion
+
+## Non-Goals
+
+The system defined here MUST NOT claim to provide:
+
+- provider-internal hidden chain-of-thought when the upstream agent or SDK does not expose it
+- exact reconstruction of invisible model-side planning not surfaced through events, messages, or tool calls
+- a replacement for the durable Smithers event log or database
+- a guarantee that every backend will index arbitrary high-cardinality fields efficiently
+
+## Core Principle
+
+Smithers MUST export what it observed, not what it inferred.
+
+Every exported trace record MUST be classifiable as one of:
+
+- raw upstream agent event
+- raw Smithers runtime event
+- Smithers-derived normalization of one raw event
+- Smithers-generated transport or export diagnostic
+
+If a record is derived, the derivation MUST be explicit.
+
+## Definitions
+
+### Agent Trace
+
+An agent trace is the ordered set of agent-visible execution records associated with one Smithers node attempt.
+
+Agent trace records include, where available:
+
+- streamed assistant text
+- streamed visible thinking content
+- message lifecycle events
+- tool call lifecycle events
+- tool result lifecycle events
+- compaction and retry events
+- session metadata
+- final assistant message
+- final tool results
+- agent stderr diagnostics when those are observable to Smithers
+
+### Full Trace
+
+For a given agent integration, a full trace means all upstream-visible records Smithers can access without patching the upstream model provider.
+
+Full trace does not mean hidden reasoning. It means all observable records available through:
+
+- subprocess stdout or stderr
+- structured CLI output modes
+- RPC event streams
+- SDK callback/event surfaces
+- persisted session artifacts intentionally provided by the agent system
+
+### Canonical Trace Event
+
+A canonical trace event is the Smithers-normalized representation of one raw observed record.
+
+Canonical trace events are the unit exported to OTEL logs and optionally persisted durably by Smithers.
+
+### Attempt
+
+An attempt is one execution of one node at one iteration with one attempt number. A canonical agent trace is scoped to exactly one attempt.
+
+## Invariants
+
+The implementation MUST satisfy all of the following invariants.
+
+### Identity Invariants
+
+Every canonical trace event MUST include:
+
+- `runId`
+- `nodeId` when the event is attempt-scoped
+- `iteration` when the event is attempt-scoped
+- `attempt` when the event is attempt-scoped
+- `timestampMs`
+- `source.agentFamily`
+- `source.captureMode`
+- `event.kind`
+- `event.sequence`
+
+### Ordering Invariants
+
+Canonical trace events for a single attempt MUST be totally ordered by `event.sequence`.
+
+If upstream events arrive out of wall-clock order, Smithers MUST preserve receive order and MUST NOT reorder them after capture.
+
+`event.sequence` MUST be monotonic within one attempt.
+
+### Fidelity Invariants
+
+Smithers MUST preserve raw upstream payloads for canonical trace events unless a redaction rule requires modification.
+
+If redaction occurs:
+
+- the record MUST indicate redaction occurred
+- the redaction reason MUST be attached
+- the original raw value MUST NOT be exported
+
+### Correlation Invariants
+
+Every OTEL log record derived from a canonical trace event MUST be queryable by:
+
+- run
+- workflow path
+- node
+- iteration
+- attempt
+- agent family
+- event kind
+
+### Completeness Invariants
+
+If Smithers receives a parseable upstream event, Smithers MUST either:
+
+- convert it into a canonical trace event and export it
+- or emit a diagnostic record explaining why it was dropped
+
+Silent drops are not allowed.
+
+### Truthfulness Invariants
+
+If an agent integration cannot expose a certain class of events, the system MUST record capability absence explicitly and MUST NOT pretend completeness.
+
+Example:
+
+- if an SDK-based integration does not expose thinking deltas, Smithers MUST mark that event class as unsupported for that agent family
+
+## Scope of Observability
+
+The system covers three layers.
+
+### Layer 1: Canonical Runtime Record
+
+Smithers SHOULD persist canonical trace events durably for replay and audit, alongside existing run events and attempt data.
+
+### Layer 2: OTEL Log Export
+
+Smithers MUST export canonical trace events as OTEL logs when OTEL log export is enabled.
+
+### Layer 3: Summary Metrics and Diagnostics
+
+Smithers MAY derive metrics from canonical trace events, but those metrics are secondary and MUST NOT be the sole evidence of capture correctness.
+
+## Agent Capability Model
+
+Each agent family MUST declare an explicit trace capability profile.
+
+The capability profile MUST enumerate support for:
+
+- session metadata
+- assistant text deltas
+- visible thinking deltas
+- final assistant message
+- tool execution start
+- tool execution update
+- tool execution end
+- retry events
+- compaction events
+- raw stderr diagnostics
+- persisted session artifact
+
+### PiAgent
+
+`PiAgent` MUST be treated as a high-fidelity integration.
+
+Available sources include:
+
+- JSON event stream mode
+- RPC mode event stream
+- Pi session JSONL artifacts
+
+Pi exposes event types such as:
+
+- `agent_start`
+- `agent_end`
+- `turn_start`
+- `turn_end`
+- `message_start`
+- `message_update`
+- `message_end`
+- `tool_execution_start`
+- `tool_execution_update`
+- `tool_execution_end`
+- `auto_compaction_start`
+- `auto_compaction_end`
+- `auto_retry_start`
+- `auto_retry_end`
+
+Visible thinking content emitted by Pi MUST be captured as trace content.
+
+Pi session artifacts, when enabled and available, SHOULD be recorded as canonical artifacts associated with the attempt.
+
+### CodexAgent
+
+`CodexAgent` MUST be treated as a structured CLI integration with medium fidelity.
+
+Codex emits JSON output. Smithers MUST preserve all parseable structured events made available by that mode.
+
+If Codex exposes usage, step, message, tool, or completion events, Smithers MUST map them to canonical trace events rather than extracting only final text.
+
+If a given Codex event schema is unstable, Smithers MUST preserve the raw event payload and classify the normalization conservatively.
+
+### ClaudeCodeAgent
+
+`ClaudeCodeAgent` MUST be treated as a structured CLI integration with medium fidelity.
+
+When `stream-json` is enabled, Smithers MUST preserve all parseable stream records and map them into canonical trace events where possible.
+
+Partial assistant messages, tool call indicators, and usage events MUST NOT be discarded if they are parseable.
+
+### GeminiAgent
+
+`GeminiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on output mode.
+
+Smithers MUST preserve parseable structured output and MUST explicitly mark unsupported event classes when the CLI exposes only final or coarse-grained results.
+
+### KimiAgent
+
+`KimiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on `outputFormat`.
+
+If `stream-json` mode is used, Smithers MUST preserve event records. If only final text is available, Smithers MUST mark the trace as partial.
+
+### OpenAIAgent and AnthropicAgent
+
+`OpenAIAgent` and `AnthropicAgent` MUST be treated as SDK integrations.
+
+They do not inherently expose a rich subprocess event stream in the current Smithers wrapper.
+
+For these agents, Smithers MUST capture:
+
+- prompt dispatch boundaries
+- final assistant response
+- token usage when surfaced
+- Smithers-side tool execution start and end
+- visible tool output recorded by Smithers
+- node output emitted by Smithers if any
+
+Smithers MUST mark thinking deltas and message lifecycle as unsupported unless the underlying SDK path is instrumented to provide them.
+
+### AmpAgent and ForgeAgent
+
+`AmpAgent` and `ForgeAgent` MUST be treated as text-first subprocess integrations unless a structured mode is added.
+
+Smithers MUST capture:
+
+- final response text
+- stderr diagnostics
+- Smithers-side tool execution and runtime events
+
+Smithers MUST mark full trace fidelity as unsupported for these integrations.
+
+## Capture Modes
+
+Each attempt MUST declare one capture mode:
+
+- `sdk-events`
+- `rpc-events`
+- `cli-json-stream`
+- `cli-json`
+- `cli-text`
+- `artifact-import`
+
+Capture mode is part of the canonical attempt metadata and MUST be exported with every trace record.
+
+## Canonical Data Model
+
+Smithers MUST introduce a canonical event model for agent traces.
+
+The exact TypeScript shape is an implementation detail, but the semantic fields are mandatory.
+
+### Attempt Metadata
+
+Each attempt MUST expose:
+
+- `traceVersion`
+- `agentFamily`
+- `agentId`
+- `model`
+- `captureMode`
+- `traceCompleteness`
+- `unsupportedEventKinds`
+- `traceStartedAtMs`
+- `traceFinishedAtMs`
+- `rawArtifactRefs`
+
+### `traceCompleteness`
+
+`traceCompleteness` MUST be one of:
+
+- `full-observed`
+- `partial-observed`
+- `final-only`
+- `capture-failed`
+
+Definitions:
+
+- `full-observed`: Smithers captured every event class the integration claims to support
+- `partial-observed`: Smithers captured some but not all supported classes
+- `final-only`: only final response and coarse metadata were available
+- `capture-failed`: Smithers expected trace events but could not capture them reliably
+
+### Canonical Event Fields
+
+Every canonical trace event MUST include:
+
+- `traceVersion`
+- `runId`
+- `workflowPath`
+- `workflowHash` when available
+- `nodeId`
+- `iteration`
+- `attempt`
+- `timestampMs`
+- `event.sequence`
+- `event.kind`
+- `event.phase`
+- `source.agentFamily`
+- `source.captureMode`
+- `source.rawType`
+- `source.observed`
+- `payload`
+- `raw`
+- `redaction`
+- `annotations`
+
+### `event.kind`
+
+`event.kind` MUST be chosen from a controlled vocabulary.
+
+The initial vocabulary MUST include:
+
+- `session.start`
+- `session.end`
+- `turn.start`
+- `turn.end`
+- `message.start`
+- `message.update`
+- `message.end`
+- `assistant.text.delta`
+- `assistant.thinking.delta`
+- `assistant.message.final`
+- `tool.execution.start`
+- `tool.execution.update`
+- `tool.execution.end`
+- `tool.result`
+- `retry.start`
+- `retry.end`
+- `compaction.start`
+- `compaction.end`
+- `stderr`
+- `stdout`
+- `usage`
+- `capture.warning`
+- `capture.error`
+- `artifact.created`
+
+No integration-specific naming is allowed in `event.kind`. Integration-specific names MUST remain in `source.rawType`.
+
+### `event.phase`
+
+`event.phase` MUST be one of:
+
+- `agent`
+- `turn`
+- `message`
+- `tool`
+- `session`
+- `capture`
+- `artifact`
+
+### `source.observed`
+
+`source.observed` MUST be a boolean indicating whether the payload was directly observed from the upstream integration.
+
+Derived normalization records MUST set `source.observed` to `false`.
+
+### `payload`
+
+`payload` MUST contain normalized fields intended for stable querying and display.
+
+Examples:
+
+- for `assistant.text.delta`: `{ text: string }`
+- for `assistant.thinking.delta`: `{ text: string }`
+- for `tool.execution.start`: `{ toolCallId: string, toolName: string, argsPreview: unknown }`
+- for `tool.execution.end`: `{ toolCallId: string, toolName: string, isError: boolean, resultPreview: unknown }`
+
+### `raw`
+
+`raw` MUST contain the raw upstream object or raw text fragment as captured after redaction.
+
+If no raw form exists, `raw` MAY be `null`.
+
+## Custom Annotations
+
+The system MUST support user-defined annotations attached at run start.
+
+Annotations MUST be:
+
+- provided in run options and server APIs
+- stored durably on the run
+- merged into every canonical trace event at export time
+
+Annotations MUST support scalar values only:
+
+- string
+- number
+- boolean
+
+Nested objects and arrays MUST be rejected or flattened before run start. The behavior MUST be explicit and deterministic.
+
+The following annotation namespaces are reserved:
+
+- `smithers.*`
+- `run.*`
+- `workflow.*`
+- `node.*`
+- `agent.*`
+- `otel.*`
+
+User annotations SHOULD use a `custom.*` prefix in canonical export.
+
+## Workflow Metadata Requirements
+
+Every canonical trace event MUST include:
+
+- `workflow.path` as an OTEL attribute when available
+- `workflow.hash` as an OTEL attribute when available
+
+If `workflow.path` is unavailable, Smithers MUST export `workflow.path` as absent rather than inventing a placeholder path.
+
+## Redaction Model
+
+Redaction is mandatory because agent traces can contain sensitive content.
+
+The implementation MUST support:
+
+- disabled redaction
+- default redaction
+- custom redaction rules
+
+### Minimum Default Redaction
+
+Default redaction MUST handle at least:
+
+- API keys
+- bearer tokens
+- common secret env vars
+- authorization headers
+- cookie headers
+- explicitly configured secret literals
+
+### Redaction Semantics
+
+Redaction MUST occur before:
+
+- durable canonical trace persistence
+- OTEL log export
+- artifact snapshot export
+
+If redaction modifies content, the trace event MUST record:
+
+- `redaction.applied = true`
+- `redaction.ruleIds = string[]`
+
+## Export Model
+
+Canonical trace events MUST be exportable as OTEL logs.
+
+### OTEL Collector Requirements
+
+The collector configuration MUST define a `logs` pipeline.
+
+The logs pipeline MUST accept OTLP input and MUST support at least one of:
+
+- OTLP logs exporter
+- Loki exporter
+
+The local development stack SHOULD include Loki for verification and human inspection.
+
+### OTEL Record Shape
+
+For each canonical trace event, Smithers MUST emit one OTEL log record.
+
+The log body MUST contain a compact structured JSON representation of:
+
+- canonical payload
+- raw payload when configured
+- redaction metadata
+
+The log attributes MUST include:
+
+- `service.name`
+- `smithers.trace.version`
+- `run.id`
+- `workflow.path`
+- `workflow.hash` when available
+- `node.id` when available
+- `node.iteration` when available
+- `node.attempt` when available
+- `agent.family`
+- `agent.id` when available
+- `agent.model` when available
+- `agent.capture_mode`
+- `trace.completeness`
+- `event.kind`
+- `event.phase`
+- `event.sequence`
+- `source.raw_type`
+- `source.observed`
+
+Custom annotations MUST be exported as OTEL attributes under `custom.*`.
+
+### Attribute Cardinality Rules
+
+The following MUST be attributes:
+
+- run identifiers
+- workflow identifiers
+- node identifiers
+- attempt identifiers
+- event kind
+- agent family
+- capture mode
+
+The following MUST NOT be indexed as labels in Loki-specific configurations:
+
+- full prompt text
+- full response text
+- thinking text
+- tool args bodies
+- tool result bodies
+- arbitrary user free-text annotations
+
+These large fields MUST remain in the log body.
+
+### Severity Mapping
+
+Severity SHOULD be assigned as follows:
+
+- normal trace events: `INFO`
+- stderr and non-terminal capture anomalies: `WARN`
+- capture failures and export failures: `ERROR`
+
+Severity MUST NOT be used to encode event kind.
+
+## Persistence Model
+
+Canonical trace events SHOULD be durably persisted by Smithers in addition to OTEL export.
+
+If durable persistence is implemented, the persistence layer MUST support:
+
+- ordered replay by attempt
+- filtering by event kind
+- pagination by sequence
+- artifact references
+
+OTEL export MUST NOT be the only storage location for canonical trace data.
+
+## Artifact Model
+
+Some agent integrations expose richer external artifacts than can be represented comfortably as log streams.
+
+Examples:
+
+- Pi session JSONL files
+- raw CLI JSON event transcripts
+- exported HTML or JSONL session artifacts
+
+Smithers SHOULD support trace artifacts with metadata:
+
+- `artifact.kind`
+- `artifact.path`
+- `artifact.contentType`
+- `artifact.bytes`
+- `artifact.createdAtMs`
+- `artifact.redacted`
+
+Artifact creation MUST also emit canonical `artifact.created` events.
+
+## Failure Model
+
+The implementation MUST classify failures explicitly.
+
+### Capture Failure
+
+Capture failure means Smithers could not reliably obtain agent trace input it expected from the selected capture mode.
+
+Examples:
+
+- malformed JSON stream
+- unexpected subprocess termination before terminal event
+- SDK callback channel failure
+
+Capture failure MUST:
+
+- mark attempt `traceCompleteness = capture-failed` when terminally broken
+- emit a `capture.error` canonical event
+- include diagnostic details
+
+### Partial Capture
+
+Partial capture means Smithers obtained some trace events but missed expected categories.
+
+Examples:
+
+- stdout stream cut off after several tool events
+- session artifact missing though event stream completed
+
+Partial capture MUST:
+
+- mark attempt `traceCompleteness = partial-observed`
+- record missing classes in `unsupportedEventKinds` or `missingExpectedEventKinds`
+
+### Export Failure
+
+Export failure means Smithers captured canonical trace events but could not deliver them to the OTEL backend.
+
+Export failure MUST NOT erase canonical local truth.
+
+If export fails:
+
+- canonical local persistence MUST still succeed when enabled
+- Smithers MUST emit operator diagnostics through existing logs
+- the run MUST remain inspectable from durable local records
+
+## Normalization Rules
+
+Normalization MUST be conservative.
+
+### One Raw Event to One Canonical Event
+
+As a default rule, one raw upstream event SHOULD map to one canonical trace event.
+
+If one raw event yields multiple canonical events, the implementation MUST document why and MUST include a stable parent link.
+
+### Text Deltas
+
+Assistant text deltas MUST remain deltas if the upstream protocol provided deltas.
+
+Smithers MUST NOT collapse deltas into a single blob during export.
+
+Final assembled messages MAY be emitted separately as `assistant.message.final`.
+
+### Thinking Deltas
+
+Visible thinking content MUST be captured as its own event class and MUST NOT be merged into assistant text.
+
+### Tool Calls
+
+Tool lifecycle MUST preserve:
+
+- stable tool call identifier when upstream provides one
+- tool name
+- visible arguments or argument preview
+- partial updates when available
+- final result preview
+- error flag
+
+### Usage
+
+Usage records MUST be separate canonical events or attached to terminal message events in a way that remains queryable.
+
+If usage is attached, it MUST still be accessible without parsing free-form text.
+
+## Required Runtime Integration Points
+
+The implementation MUST integrate at these boundaries.
+
+### Agent Boundary
+
+Every agent integration MUST report raw trace observations into the canonical trace capture layer.
+
+No agent integration is allowed to silently parse and discard upstream event records before the capture layer sees them.
+
+### Event Bus Boundary
+
+Canonical trace events SHOULD be emitted through or alongside the existing event bus so that:
+
+- they share run correlation
+- they can participate in durable persistence
+- they can reuse existing event-driven verification infrastructure
+
+### Attempt Finalization Boundary
+
+When an attempt finishes, Smithers MUST finalize trace metadata:
+
+- `traceFinishedAtMs`
+- `traceCompleteness`
+- `unsupportedEventKinds`
+- `rawArtifactRefs`
+
+## Required Configuration Surface
+
+The implementation MUST define explicit configuration for:
+
+- enabling OTEL log export
+- selecting backend endpoint
+- enabling or disabling canonical local trace persistence
+- selecting redaction mode
+- retaining or dropping raw payload bodies
+- retaining or dropping raw artifacts
+- maximum event body bytes
+- maximum artifact bytes
+
+The configuration MUST distinguish:
+
+- runtime operator policy
+- run-specific annotations
+
+## Required Operator Queries
+
+The design is incomplete unless the following operator queries are supported.
+
+### Query Set A: Run Reconstruction
+
+Operators MUST be able to answer:
+
+- show all trace records for one run
+- show all trace records for one run and node
+- show only one attempt for one node
+- show ordered assistant text deltas
+- show visible thinking deltas when present
+- show tool calls and results in order
+
+### Query Set B: Failure Analysis
+
+Operators MUST be able to answer:
+
+- which runs had trace capture failures
+- which agents only provide final-only traces
+- which attempts terminated without a terminal agent event
+- which traces were partially redacted
+
+### Query Set C: Audit
+
+Operators MUST be able to answer:
+
+- what annotations were attached to a run
+- which workflow file and workflow hash produced the trace
+- which raw artifact file corresponds to this attempt
+
+## Verification Specification
+
+Task completion is not defined by code existing. It is defined by observable correctness.
+
+The implementation is complete only if every verification class below passes.
+
+## Verification Class 1: Schema Correctness
+
+For each supported agent family, automated tests MUST verify that canonical trace events:
+
+- conform to the declared schema
+- contain required identity fields
+- maintain monotonic `event.sequence`
+- correctly classify `traceCompleteness`
+
+Completion criterion:
+
+- zero schema violations in test fixtures
+
+## Verification Class 2: Ordering Correctness
+
+Automated tests MUST verify that for one attempt:
+
+- event sequences are strictly monotonic
+- final events occur after preceding deltas
+- no duplicate sequence numbers appear
+
+Completion criterion:
+
+- deterministic ordering across repeated test runs
+
+## Verification Class 3: Fidelity Correctness
+
+Fixture-based tests MUST compare raw upstream inputs with canonical trace outputs.
+
+For each fixture:
+
+- every parseable upstream event MUST result in a canonical event or an explicit diagnostic drop event
+- visible thinking content MUST remain distinguishable from assistant text
+- tool call identifiers and names MUST survive normalization
+
+Completion criterion:
+
+- full fixture coverage for each agent family and capture mode supported by Smithers
+
+## Verification Class 4: Completeness Classification
+
+Tests MUST verify the semantics of:
+
+- `full-observed`
+- `partial-observed`
+- `final-only`
+- `capture-failed`
+
+Completion criterion:
+
+- each classification is produced by at least one explicit test case
+
+## Verification Class 5: OTEL Export Correctness
+
+Integration tests MUST verify that canonical trace events become OTEL log records with:
+
+- required attributes present
+- correct body shape
+- correct severity mapping
+- correct custom annotation export
+
+Completion criterion:
+
+- logs are queryable in the target backend by `run.id`, `workflow.path`, `node.id`, `attempt`, and `event.kind`
+
+## Verification Class 6: Loki Query Correctness
+
+In a local stack with Loki enabled, end-to-end tests MUST verify that an operator can query:
+
+- all records for a run
+- all records for a node attempt
+- only thinking deltas
+- only tool execution records
+- only capture errors
+
+Completion criterion:
+
+- documented query examples return expected results against test data
+
+## Verification Class 7: Artifact Correctness
+
+When artifact capture is enabled, tests MUST verify:
+
+- artifact references are recorded
+- artifacts exist on disk or in configured storage
+- artifact metadata matches actual content
+- artifact creation emits corresponding canonical events
+
+Completion criterion:
+
+- no dangling artifact references
+
+## Verification Class 8: Redaction Correctness
+
+Tests MUST verify that redaction:
+
+- removes required secrets from canonical payloads, raw payloads, OTEL bodies, and artifacts
+- leaves non-sensitive content intact
+- records which rules were applied
+
+Completion criterion:
+
+- zero known secret literals leak in test fixtures
+
+## Verification Class 9: Failure Resilience
+
+Tests MUST verify behavior when:
+
+- collector is unavailable
+- backend rejects logs
+- malformed upstream JSON is encountered
+- subprocess exits before terminal event
+- artifact write fails
+
+Completion criterion:
+
+- capture failures are classified
+- local diagnostics exist
+- durable local truth remains accessible when configured
+
+## Verification Class 10: Cross-Signal Correlation
+
+Tests MUST verify that logs correlate with:
+
+- run lifecycle events
+- metrics
+- spans
+
+At minimum, operators MUST be able to join by:
+
+- `run.id`
+- `node.id`
+- `attempt`
+
+Completion criterion:
+
+- one documented workflow run can be traced across event log, OTEL logs, and metrics without ambiguity
+
+## Acceptance Criteria
+
+The feature is not done until all of the following are true.
+
+### A. Canonical Model Exists
+
+Smithers has a canonical agent trace schema with explicit completeness states and per-agent capability declarations.
+
+### B. Pi Is High Fidelity
+
+`PiAgent` exports structured trace records for:
+
+- session lifecycle
+- turn lifecycle
+- message lifecycle
+- assistant text deltas
+- visible thinking deltas
+- tool execution lifecycle
+- retry and compaction events
+
+### C. Other Agents Are Truthfully Classified
+
+Every agent in `src/agents/` has a declared fidelity class and unsupported event set.
+
+### D. OTEL Logs Pipeline Exists
+
+The collector and local observability stack support OTEL logs end to end.
+
+### E. Queries Work
+
+Operators can answer the required run reconstruction, failure analysis, and audit queries from the exported logs.
+
+### F. Verification Is Automated
+
+Automated tests exist for schema, ordering, fidelity, completeness, OTEL export, redaction, failure handling, and query correctness.
+
+## Implementation Phasing
+
+This section is normative for rollout order.
+
+### Phase 1: Canonical Model
+
+Implement:
+
+- canonical trace schema
+- completeness classification
+- per-agent capability declarations
+
+### Phase 2: Pi Fidelity
+
+Implement:
+
+- Pi raw event capture
+- canonical normalization
+- OTEL export
+- artifact capture for session files if configured
+
+### Phase 3: Structured CLI Agents
+
+Implement:
+
+- Codex
+- Claude Code
+- Gemini
+- Kimi
+
+Each integration MUST ship with fixture-based normalization tests before being considered complete.
+
+### Phase 4: SDK and Text-Only Agents
+
+Implement:
+
+- explicit partial or final-only capture
+- truthful capability declarations
+- OTEL export for the observable subset
+
+### Phase 5: Redaction and Hardening
+
+Implement:
+
+- default redaction
+- export failure handling
+- artifact verification
+- documented local Loki queries
+
+## Explicit Non-Ambiguities
+
+The following choices are intentional.
+
+- Smithers MUST prefer truthful partial fidelity over fake completeness.
+- Smithers MUST preserve raw event boundaries rather than collapsing everything into summaries.
+- Smithers MUST keep large content in log bodies, not indexing labels.
+- Smithers MUST retain a local source of truth when OTEL export fails.
+- Smithers MUST separate assistant text from visible thinking.
+- Smithers MUST define task completion in terms of verification evidence, not implementation effort.
+
+## Out of Scope for the First Implementation
+
+The first implementation MAY defer:
+
+- remote artifact storage
+- cross-run session graph visualizations
+- backend-specific dashboards beyond minimal verification queries
+- universal reconstruction of provider-internal hidden reasoning
+
+If deferred, these items MUST be documented explicitly and MUST NOT be implied to exist.
+
+## Summary
+
+The required system is not “send some logs to Loki.”
+
+The required system is:
+
+- a canonical agent trace model
+- explicit capability declarations per integration
+- conservative capture of all observable upstream events
+- durable local truth
+- OTEL log export with stable correlation fields
+- redaction before persistence and export
+- verification that proves fidelity, completeness, and queryability
+
+Anything less produces observability that looks complete while remaining operationally unreliable.

From 09bddf4b0afa7471ba7c456b1a72cf7f0e539842 Mon Sep 17 00:00:00 2001
From: Samuel Huber <samuel@dtech.vision>
Date: Thu, 26 Mar 2026 20:29:55 +0100
Subject: [PATCH 2/7] fix(agent-trace): correct Pi RPC completion and add
 canonical trace capture

Pi was working at the CLI level, but Smithers was finalizing RPC sessions on the
first assistant turn_end. That is no longer correct when Pi emits a tool-use
turn followed by additional turns and a final assistant answer. The result was
that tool-using Pi sessions were truncated before the real final response, which
made long-running implementation tasks look idle or schema-only even when Pi was
continuing internally.

This change fixes the RPC completion logic so Smithers waits past tool-use turns
and can finalize from the true terminal assistant state. It also adds the
canonical Pi-first trace event model, persistence path, and tests needed to make
that trace capture observable and durable.
---
 src/SmithersEvent.ts        | 113 ++++++++++++++++++++++-
 src/agent-trace.ts          | 178 ++++++++++++++++++++++++++++++++++++
 src/agents/BaseCliAgent.ts  |  80 ++++++++++++----
 src/agents/PiAgent.ts       |  23 +++++
 src/events.ts               |  21 +++--
 src/index.ts                |  11 ++-
 src/observability/index.ts  |  89 ++++++++++++++++++
 src/tools/context.ts        |   2 +
 tests/observability.test.ts |  91 ++++++++++++++++++
 tests/pi-support.test.ts    | 127 ++++++++++++++++++++++++-
 10 files changed, 703 insertions(+), 32 deletions(-)
 create mode 100644 src/agent-trace.ts

diff --git a/src/SmithersEvent.ts b/src/SmithersEvent.ts
index 6b25e4ff..29d705f8 100644
--- a/src/SmithersEvent.ts
+++ b/src/SmithersEvent.ts
@@ -1,5 +1,113 @@
 import type { RunStatus } from "./RunStatus";
 
+export const CANONICAL_AGENT_TRACE_VERSION = 1 as const;
+
+export type AgentTraceCompleteness =
+  | "full-observed"
+  | "partial-observed"
+  | "final-only"
+  | "capture-failed";
+
+export type AgentTraceCaptureMode =
+  | "sdk-events"
+  | "rpc-events"
+  | "cli-json-stream"
+  | "cli-json"
+  | "cli-text"
+  | "artifact-import";
+
+export type AgentTraceEventKind =
+  | "session.start"
+  | "session.end"
+  | "turn.start"
+  | "turn.end"
+  | "message.start"
+  | "message.update"
+  | "message.end"
+  | "assistant.text.delta"
+  | "assistant.thinking.delta"
+  | "assistant.message.final"
+  | "tool.execution.start"
+  | "tool.execution.update"
+  | "tool.execution.end"
+  | "tool.result"
+  | "retry.start"
+  | "retry.end"
+  | "compaction.start"
+  | "compaction.end"
+  | "stderr"
+  | "stdout"
+  | "usage"
+  | "capture.warning"
+  | "capture.error"
+  | "artifact.created";
+
+export type AgentTraceEventPhase =
+  | "message"
+  | "tool"
+  | "agent"
+  | "session"
+  | "turn"
+  | "capture"
+  | "artifact";
+
+export const PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS = [
+  "assistant.text.delta",
+  "tool.execution.start",
+  "tool.execution.update",
+  "tool.execution.end",
+] as const satisfies readonly AgentTraceEventKind[];
+
+export const PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS = [
+  "session.start",
+  "session.end",
+  "turn.start",
+  "turn.end",
+  "message.start",
+  "message.update",
+  "message.end",
+  "assistant.thinking.delta",
+  "assistant.message.final",
+  "tool.result",
+  "retry.start",
+  "retry.end",
+  "compaction.start",
+  "compaction.end",
+  "usage",
+  "artifact.created",
+] as const satisfies readonly AgentTraceEventKind[];
+
+export type SmithersAgentTraceEvent = {
+  type: "AgentTraceEvent";
+  traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION;
+  traceCompleteness: AgentTraceCompleteness;
+  unsupportedEventKinds: AgentTraceEventKind[];
+  runId: string;
+  workflowPath?: string | null;
+  workflowHash?: string | null;
+  nodeId: string;
+  iteration: number;
+  attempt: number;
+  timestampMs: number;
+  event: {
+    sequence: number;
+    kind: AgentTraceEventKind;
+    phase: AgentTraceEventPhase;
+  };
+  source: {
+    agentFamily: "pi";
+    agentId?: string;
+    model?: string;
+    captureMode: AgentTraceCaptureMode;
+    rawType?: string;
+    observed: boolean;
+  };
+  payload: Record<string, unknown> | null;
+  raw: unknown;
+  redaction: { applied: boolean; ruleIds?: string[] } | null;
+  annotations: Record<string, string | number | boolean> | null;
+};
+
 export type SmithersEvent =
   | { type: "RunStarted"; runId: string; timestampMs: number }
   | {
@@ -194,4 +302,7 @@ export type SmithersEvent =
       cacheWriteTokens?: number;
       reasoningTokens?: number;
       timestampMs: number;
-    };
+    }
+  | SmithersAgentTraceEvent;
+
+export type ExtendedSmithersEvent = SmithersEvent;
diff --git a/src/agent-trace.ts b/src/agent-trace.ts
new file mode 100644
index 00000000..fa611798
--- /dev/null
+++ b/src/agent-trace.ts
@@ -0,0 +1,178 @@
+import { getToolContext } from "./tools/context";
+import { nowMs } from "./utils/time";
+import {
+  CANONICAL_AGENT_TRACE_VERSION,
+  PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS,
+} from "./SmithersEvent";
+import type {
+  AgentTraceCaptureMode,
+  AgentTraceEventKind,
+  AgentTraceEventPhase,
+  SmithersAgentTraceEvent,
+} from "./SmithersEvent";
+
+// Local, per-attempt monotonic sequence for canonical trace events
+const traceSeq = new WeakMap<object, number>();
+
+function nextTraceSeq(ctx: object): number {
+  const current = traceSeq.get(ctx) ?? 0;
+  const next = current + 1;
+  traceSeq.set(ctx, next);
+  return next;
+}
+
+export type CaptureMode = Extract<
+  AgentTraceCaptureMode,
+  "cli-json" | "cli-json-stream" | "rpc-events" | "cli-text"
+>;
+
+export type AgentTraceSourceMeta = {
+  agentId?: string;
+  model?: string;
+};
+
+export function emitAgentTrace(
+  kind: AgentTraceEventKind,
+  phase: AgentTraceEventPhase,
+  payload: Record<string, unknown> | null | undefined,
+  raw: unknown,
+  rawType: string | undefined,
+  captureMode: CaptureMode,
+  sourceMeta?: AgentTraceSourceMeta,
+) {
+  const ctx = getToolContext();
+  if (!ctx || typeof ctx.emitEvent !== "function") return; // outside workflow execution
+  const ts = nowMs();
+  const event: SmithersAgentTraceEvent = {
+    type: "AgentTraceEvent",
+    traceVersion: CANONICAL_AGENT_TRACE_VERSION,
+    traceCompleteness: "partial-observed",
+    unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS],
+    runId: ctx.runId,
+    workflowPath: ctx.workflowPath ?? null,
+    workflowHash: ctx.workflowHash ?? null,
+    nodeId: ctx.nodeId,
+    iteration: ctx.iteration,
+    attempt: ctx.attempt,
+    timestampMs: ts,
+    event: {
+      sequence: nextTraceSeq(ctx),
+      kind,
+      phase,
+    },
+    source: {
+      agentFamily: "pi",
+      agentId: sourceMeta?.agentId,
+      model: sourceMeta?.model,
+      captureMode,
+      rawType,
+      observed: true,
+    },
+    payload: payload ?? null,
+    raw,
+    redaction: null,
+    annotations: null,
+  };
+  void ctx.emitEvent(event);
+}
+
+export function capturePiEvent(
+  event: any,
+  captureMode: CaptureMode,
+  sourceMeta?: AgentTraceSourceMeta,
+) {
+  if (!event || typeof event !== "object") return;
+  const type = String((event as any).type ?? "");
+
+  // Assistant text deltas
+  if (type === "message_update") {
+    const assistant = (event as any).assistantMessageEvent;
+    if (assistant && assistant.type === "text_delta" && typeof assistant.delta === "string") {
+      emitAgentTrace(
+        "assistant.text.delta",
+        "message",
+        { text: assistant.delta },
+        event,
+        "message_update.text_delta",
+        captureMode,
+        sourceMeta,
+      );
+      return;
+    }
+  }
+
+  // Tool lifecycle (best-effort mapping of common Pi shapes)
+  if (type === "tool_execution_start") {
+    const call = (event as any).toolCall ?? (event as any).call ?? (event as any);
+    emitAgentTrace(
+      "tool.execution.start",
+      "tool",
+      {
+        toolCallId: String(call.id ?? call.toolCallId ?? ""),
+        toolName: String(call.name ?? call.toolName ?? call.tool ?? ""),
+        argsPreview: call.args ?? call.arguments ?? undefined,
+      },
+      event,
+      "tool_execution_start",
+      captureMode,
+      sourceMeta,
+    );
+    return;
+  }
+
+  if (type === "tool_execution_update") {
+    const call = (event as any).toolCall ?? (event as any).call ?? (event as any);
+    emitAgentTrace(
+      "tool.execution.update",
+      "tool",
+      {
+        toolCallId: String(call.id ?? call.toolCallId ?? ""),
+        toolName: String(call.name ?? call.toolName ?? call.tool ?? ""),
+      },
+      event,
+      "tool_execution_update",
+      captureMode,
+      sourceMeta,
+    );
+    return;
+  }
+
+  if (type === "tool_execution_end") {
+    const call = (event as any).toolCall ?? (event as any).call ?? (event as any);
+    const isError = Boolean((event as any).error || (event as any).failed);
+    emitAgentTrace(
+      "tool.execution.end",
+      "tool",
+      {
+        toolCallId: String(call.id ?? call.toolCallId ?? ""),
+        toolName: String(call.name ?? call.toolName ?? call.tool ?? ""),
+        isError,
+        resultPreview: (event as any).result ?? (event as any).output ?? undefined,
+      },
+      event,
+      "tool_execution_end",
+      captureMode,
+      sourceMeta,
+    );
+    return;
+  }
+}
+
+export function capturePiNdjson(
+  raw: string,
+  captureMode: CaptureMode,
+  sourceMeta?: AgentTraceSourceMeta,
+) {
+  const lines = String(raw ?? "")
+    .split(/\r?\n/)
+    .map((l) => l.trim())
+    .filter(Boolean);
+  for (const line of lines) {
+    try {
+      const parsed = JSON.parse(line);
+      capturePiEvent(parsed, captureMode, sourceMeta);
+    } catch {
+      // ignore malformed lines
+    }
+  }
+}
diff --git a/src/agents/BaseCliAgent.ts b/src/agents/BaseCliAgent.ts
index 4c94842a..3d4f9605 100644
--- a/src/agents/BaseCliAgent.ts
+++ b/src/agents/BaseCliAgent.ts
@@ -62,6 +62,7 @@ type RunRpcCommandOptions = {
     | Promise<PiExtensionUiResponse | null>
     | PiExtensionUiResponse
     | null;
+  onEvent?: (event: unknown) => void;
 };
 
 type PromptParts = {
@@ -711,16 +712,21 @@ export function runRpcCommandEffect(command: string, args: string[], options: Ru
       child.stdin.write(`${JSON.stringify(normalized)}\n`);
      };
  
-     const handleLine = async (line: string) => {
-       inactivity.reset();
-       let parsed: unknown;
-       try {
-         parsed = JSON.parse(line);
-       } catch {
-         return;
-       }
-       if (!parsed || typeof parsed !== "object") return;
-       const event = parsed as Record<string, unknown>;
+    const handleLine = async (line: string) => {
+      inactivity.reset();
+      let parsed: unknown;
+      try {
+        parsed = JSON.parse(line);
+      } catch {
+        return;
+      }
+      if (!parsed || typeof parsed !== "object") return;
+      try {
+        options.onEvent?.(parsed);
+      } catch {
+        // ignore observer errors
+      }
+      const event = parsed as Record<string, unknown>;
        const type = event.type;
        if (type === "response" && event.command === "prompt" && event.success === false) {
          const errorMessage = typeof event.error === "string" ? event.error : "PI RPC prompt failed";
@@ -754,20 +760,56 @@ export function runRpcCommandEffect(command: string, args: string[], options: Ru
            if (message.usage) extractedUsage = message.usage;
            if (message.stopReason === "error" || message.stopReason === "aborted") {
              promptResponseError = message.errorMessage || `Request ${message.stopReason}`;
-           }
-           const extracted = finalMessage ? extractTextFromJsonValue(finalMessage) : undefined;
-           const text = extracted ?? textDeltas;
-           inactivity.clear();
-           totalTimeout.clear();
-           if (promptResponseError) {
+             inactivity.clear();
+             totalTimeout.clear();
              handleError(new Error(promptResponseError));
              return;
            }
-           finalize(text, finalMessage ?? text);
-           child.stdin?.end();
-           terminateChild();
+           // Do not finalize on tool-use turns. Pi continues with additional
+           // turns after tool execution and only reaches the real final answer
+           // on a later turn/agent_end.
+           if (message.stopReason !== "toolUse") {
+             const extracted = finalMessage ? extractTextFromJsonValue(finalMessage) : undefined;
+             const text = extracted ?? textDeltas;
+             inactivity.clear();
+             totalTimeout.clear();
+             finalize(text, finalMessage ?? text);
+             child.stdin?.end();
+             terminateChild();
+             return;
+           }
          }
        }
+       if (type === "agent_end") {
+         const messages = (event as any).messages as Array<any> | undefined;
+         if (Array.isArray(messages)) {
+           for (let i = messages.length - 1; i >= 0; i--) {
+             const message = messages[i];
+             if (message?.role === "assistant") {
+               finalMessage = message;
+               if (message.usage) extractedUsage = message.usage;
+               if (message.stopReason === "error" || message.stopReason === "aborted") {
+                 promptResponseError = message.errorMessage || `Request ${message.stopReason}`;
+               }
+               break;
+             }
+           }
+         }
+         if (promptResponseError) {
+           inactivity.clear();
+           totalTimeout.clear();
+           handleError(new Error(promptResponseError));
+           return;
+         }
+         const extracted = finalMessage ? extractTextFromJsonValue(finalMessage) : undefined;
+         const text = extracted ?? textDeltas;
+         inactivity.clear();
+         totalTimeout.clear();
+         finalize(text, finalMessage ?? text);
+         child.stdin?.end();
+         terminateChild();
+         return;
+       }
        if (type === "extension_ui_request") {
          await maybeWriteExtensionResponse(event as PiExtensionUiRequest);
        }
diff --git a/src/agents/PiAgent.ts b/src/agents/PiAgent.ts
index 7d659c2b..0f7a62a0 100644
--- a/src/agents/PiAgent.ts
+++ b/src/agents/PiAgent.ts
@@ -14,6 +14,8 @@ import {
 import type { BaseCliAgentOptions, PiExtensionUiRequest, PiExtensionUiResponse } from "./BaseCliAgent";
 import { getToolContext } from "../tools/context";
 import { SmithersError } from "../utils/errors";
+import { capturePiNdjson } from "../agent-trace";
+import { capturePiEvent } from "../agent-trace";
 
 export type { PiExtensionUiRequest, PiExtensionUiResponse };
 
@@ -186,6 +188,17 @@ export class PiAgent extends BaseCliAgent {
       const extractedText = mode === "json"
         ? (extractTextFromPiNdjson(rawText) ?? rawText)
         : rawText;
+      // Capture canonical trace events for Pi NDJSON stream (assistant deltas, tool lifecycle)
+      try {
+        if (mode === "json") {
+          capturePiNdjson(rawText, "cli-json", {
+            agentId: this.id,
+            model: this.opts.model ?? this.model,
+          });
+        }
+      } catch {
+        // Best-effort capture; never fail the agent call due to trace capture.
+      }
       const output = tryParseJson(extractedText);
       return buildGenerateResult(extractedText, output, this.opts.model ?? "pi");
     }
@@ -201,6 +214,16 @@ export class PiAgent extends BaseCliAgent {
       maxOutputBytes: this.maxOutputBytes ?? getToolContext()?.maxOutputBytes,
       onStderr: options?.onStderr,
       onExtensionUiRequest: this.opts.onExtensionUiRequest,
+      onEvent: (evt) => {
+        try {
+          capturePiEvent(evt, "rpc-events", {
+            agentId: this.id,
+            model: this.opts.model ?? this.model,
+          });
+        } catch {
+          /* ignore */
+        }
+      },
     });
 
     return buildGenerateResult(rpcResult.text, rpcResult.output, this.opts.model ?? "pi", rpcResult.usage);
diff --git a/src/events.ts b/src/events.ts
index 99256de6..e83fd306 100644
--- a/src/events.ts
+++ b/src/events.ts
@@ -1,11 +1,12 @@
 import { EventEmitter } from "node:events";
-import * as FileSystem from "@effect/platform/FileSystem";
+import { promises as fs } from "node:fs";
 import { join } from "node:path";
 import { Effect } from "effect";
 import type { SmithersEvent } from "./SmithersEvent";
 import { fromPromise } from "./effect/interop";
 import { runPromise } from "./effect/runtime";
 import { trackEvent } from "./effect/metrics";
+import { isAgentTraceEvent, toPersistedAgentTraceRecord } from "./observability";
 
 export class EventBus extends EventEmitter {
   private seq = 0;
@@ -143,14 +144,16 @@ export class EventBus extends EventEmitter {
   private persistLogEffect(event: SmithersEvent) {
     if (!this.logDir) return Effect.void;
     const dir = this.logDir;
-    return Effect.gen(function* () {
-      const fs = yield* FileSystem.FileSystem;
-      yield* fs.makeDirectory(dir, { recursive: true });
-      const file = join(dir, "stream.ndjson");
-      const line = JSON.stringify(event) + "\n";
-      const current = yield* Effect.option(fs.readFileString(file, "utf8"));
-      const prefix = current._tag === "Some" ? current.value : "";
-      yield* fs.writeFileString(file, prefix + line);
+    return fromPromise("persist event log", async () => {
+      await fs.mkdir(dir, { recursive: true });
+      await fs.appendFile(join(dir, "stream.ndjson"), `${JSON.stringify(event)}\n`, "utf8");
+      if (isAgentTraceEvent(event)) {
+        await fs.appendFile(
+          join(dir, "agent-trace.ndjson"),
+          `${JSON.stringify(toPersistedAgentTraceRecord(event))}\n`,
+          "utf8",
+        );
+      }
     });
   }
 }
diff --git a/src/index.ts b/src/index.ts
index 893f9ace..0bddc29a 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -12,7 +12,13 @@ export type { SchemaRegistryEntry } from "./SchemaRegistryEntry";
 export type { SmithersWorkflow } from "./SmithersWorkflow";
 export type { SmithersCtx } from "./SmithersCtx";
 export type { OutputAccessor, InferRow, InferOutputEntry } from "./OutputAccessor";
-export type { SmithersEvent } from "./SmithersEvent";
+export type {
+  AgentTraceCaptureMode,
+  AgentTraceCompleteness,
+  AgentTraceEventKind,
+  SmithersAgentTraceEvent,
+  SmithersEvent,
+} from "./SmithersEvent";
 export type { SmithersError } from "./SmithersError";
 export { SmithersError as SmithersErrorInstance, isSmithersError, errorToJson } from "./utils/errors";
 export type { SmithersErrorCode } from "./utils/errors";
@@ -99,10 +105,13 @@ export type { ServerOptions } from "./server/index";
 // Observability
 export {
   SmithersObservability,
+  PI_AGENT_TRACE_CAPABILITY_PROFILE,
   createSmithersObservabilityLayer,
   createSmithersOtelLayer,
   createSmithersRuntimeLayer,
+  isAgentTraceEvent,
   smithersMetrics,
+  toPersistedAgentTraceRecord,
   trackSmithersEvent,
   activeNodes,
   activeRuns,
diff --git a/src/observability/index.ts b/src/observability/index.ts
index b29835ff..0439b524 100644
--- a/src/observability/index.ts
+++ b/src/observability/index.ts
@@ -65,6 +65,17 @@ import {
   updateProcessMetrics,
   vcsDuration,
 } from "../effect/metrics";
+import {
+  CANONICAL_AGENT_TRACE_VERSION,
+  PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS,
+  PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS,
+} from "../SmithersEvent";
+import type {
+  AgentTraceCaptureMode,
+  AgentTraceCompleteness,
+  AgentTraceEventKind,
+  SmithersAgentTraceEvent,
+} from "../SmithersEvent";
 
 export type SmithersLogFormat = "json" | "pretty" | "string" | "logfmt";
 
@@ -101,6 +112,84 @@ export class SmithersObservability extends Context.Tag("SmithersObservability")<
   SmithersObservabilityService
 >() {}
 
+export type AgentTraceCapabilityProfile = {
+  readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION;
+  readonly agentFamily: "pi";
+  readonly captureModes: readonly AgentTraceCaptureMode[];
+  readonly traceCompleteness: AgentTraceCompleteness;
+  readonly supportedEventKinds: readonly AgentTraceEventKind[];
+  readonly unsupportedEventKinds: readonly AgentTraceEventKind[];
+};
+
+export type PersistedAgentTraceRecord = {
+  readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION;
+  readonly traceCompleteness: AgentTraceCompleteness;
+  readonly unsupportedEventKinds: AgentTraceEventKind[];
+  readonly runId: string;
+  readonly workflowPath: string | null;
+  readonly workflowHash: string | null;
+  readonly nodeId: string;
+  readonly iteration: number;
+  readonly attempt: number;
+  readonly timestampMs: number;
+  readonly eventSequence: number;
+  readonly eventKind: AgentTraceEventKind;
+  readonly eventPhase: SmithersAgentTraceEvent["event"]["phase"];
+  readonly agentFamily: SmithersAgentTraceEvent["source"]["agentFamily"];
+  readonly agentId: string | null;
+  readonly agentModel: string | null;
+  readonly captureMode: AgentTraceCaptureMode;
+  readonly rawType: string | null;
+  readonly observed: boolean;
+  readonly payload: Record<string, unknown> | null;
+  readonly raw: unknown;
+  readonly redaction: SmithersAgentTraceEvent["redaction"];
+  readonly annotations: SmithersAgentTraceEvent["annotations"];
+};
+
+export const PI_AGENT_TRACE_CAPABILITY_PROFILE: AgentTraceCapabilityProfile = {
+  traceVersion: CANONICAL_AGENT_TRACE_VERSION,
+  agentFamily: "pi",
+  captureModes: ["cli-json", "rpc-events"],
+  traceCompleteness: "partial-observed",
+  supportedEventKinds: [...PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS],
+  unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS],
+};
+
+export function isAgentTraceEvent(event: unknown): event is SmithersAgentTraceEvent {
+  return !!event && typeof event === "object" && (event as { type?: unknown }).type === "AgentTraceEvent";
+}
+
+export function toPersistedAgentTraceRecord(
+  event: SmithersAgentTraceEvent,
+): PersistedAgentTraceRecord {
+  return {
+    traceVersion: event.traceVersion,
+    traceCompleteness: event.traceCompleteness,
+    unsupportedEventKinds: [...event.unsupportedEventKinds],
+    runId: event.runId,
+    workflowPath: event.workflowPath ?? null,
+    workflowHash: event.workflowHash ?? null,
+    nodeId: event.nodeId,
+    iteration: event.iteration,
+    attempt: event.attempt,
+    timestampMs: event.timestampMs,
+    eventSequence: event.event.sequence,
+    eventKind: event.event.kind,
+    eventPhase: event.event.phase,
+    agentFamily: event.source.agentFamily,
+    agentId: event.source.agentId ?? null,
+    agentModel: event.source.model ?? null,
+    captureMode: event.source.captureMode,
+    rawType: event.source.rawType ?? null,
+    observed: event.source.observed,
+    payload: event.payload ?? null,
+    raw: event.raw,
+    redaction: event.redaction,
+    annotations: event.annotations,
+  };
+}
+
 export const prometheusContentType =
   "text/plain; version=0.0.4; charset=utf-8";
 
diff --git a/src/tools/context.ts b/src/tools/context.ts
index c585790c..cd202db0 100644
--- a/src/tools/context.ts
+++ b/src/tools/context.ts
@@ -8,6 +8,8 @@ export type ToolContext = {
   nodeId: string;
   iteration: number;
   attempt: number;
+  workflowPath?: string | null;
+  workflowHash?: string | null;
   rootDir: string;
   allowNetwork: boolean;
   maxOutputBytes: number;
diff --git a/tests/observability.test.ts b/tests/observability.test.ts
index 0b80a2f1..5cadf2d5 100644
--- a/tests/observability.test.ts
+++ b/tests/observability.test.ts
@@ -1,11 +1,14 @@
 import { describe, expect, test } from "bun:test";
 import { Metric } from "effect";
 import {
+  PI_AGENT_TRACE_CAPABILITY_PROFILE,
   httpRequestDuration,
   renderPrometheusMetrics,
   runsTotal,
+  toPersistedAgentTraceRecord,
 } from "../src/observability";
 import { runPromise } from "../src/effect/runtime";
+import type { SmithersAgentTraceEvent } from "../src/SmithersEvent";
 
 describe("Prometheus metrics", () => {
   test("renders built-in Smithers metrics in Prometheus exposition format", async () => {
@@ -23,3 +26,91 @@ describe("Prometheus metrics", () => {
     expect(output).toContain("smithers_http_request_duration_ms_count");
   });
 });
+
+describe("agent trace observability", () => {
+  test("declares the implemented Pi trace slice truthfully", () => {
+    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceVersion).toBe(1);
+    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.agentFamily).toBe("pi");
+    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceCompleteness).toBe("partial-observed");
+    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.supportedEventKinds).toEqual([
+      "assistant.text.delta",
+      "tool.execution.start",
+      "tool.execution.update",
+      "tool.execution.end",
+    ]);
+    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.unsupportedEventKinds).toContain(
+      "assistant.thinking.delta",
+    );
+    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("cli-json");
+    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("rpc-events");
+  });
+
+  test("flattens canonical trace events into queryable persisted records", () => {
+    const event: SmithersAgentTraceEvent = {
+      type: "AgentTraceEvent",
+      traceVersion: 1,
+      traceCompleteness: "partial-observed",
+      unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"],
+      runId: "run-1",
+      workflowPath: "/tmp/workflow.tsx",
+      workflowHash: "workflow-hash",
+      nodeId: "node-a",
+      iteration: 2,
+      attempt: 3,
+      timestampMs: 123,
+      event: {
+        sequence: 4,
+        kind: "tool.execution.end",
+        phase: "tool",
+      },
+      source: {
+        agentFamily: "pi",
+        agentId: "pi-agent-id",
+        model: "gpt-5.2-codex",
+        captureMode: "rpc-events",
+        rawType: "tool_execution_end",
+        observed: true,
+      },
+      payload: {
+        toolCallId: "tool-1",
+        toolName: "read",
+        isError: false,
+      },
+      raw: { type: "tool_execution_end" },
+      redaction: null,
+      annotations: { "custom.test": true },
+    };
+
+    const record = toPersistedAgentTraceRecord(event);
+
+    expect(record).toEqual({
+      traceVersion: 1,
+      traceCompleteness: "partial-observed",
+      unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"],
+      runId: "run-1",
+      workflowPath: "/tmp/workflow.tsx",
+      workflowHash: "workflow-hash",
+      nodeId: "node-a",
+      iteration: 2,
+      attempt: 3,
+      timestampMs: 123,
+      eventSequence: 4,
+      eventKind: "tool.execution.end",
+      eventPhase: "tool",
+      agentFamily: "pi",
+      agentId: "pi-agent-id",
+      agentModel: "gpt-5.2-codex",
+      captureMode: "rpc-events",
+      rawType: "tool_execution_end",
+      observed: true,
+      payload: {
+        toolCallId: "tool-1",
+        toolName: "read",
+        isError: false,
+      },
+      raw: { type: "tool_execution_end" },
+      redaction: null,
+      annotations: { "custom.test": true },
+    });
+  });
+});
diff --git a/tests/pi-support.test.ts b/tests/pi-support.test.ts
index ca7c46bc..170d16cf 100644
--- a/tests/pi-support.test.ts
+++ b/tests/pi-support.test.ts
@@ -3,6 +3,9 @@ import { afterEach, describe, expect, test } from "bun:test";
  import { join } from "node:path";
  import { tmpdir } from "node:os";
  import { PiAgent } from "../src/agents";
+ import { EventBus } from "../src/events";
+ import { runWithToolContext } from "../src/tools/context";
+ import type { SmithersAgentTraceEvent } from "../src/SmithersEvent";
  
  const originalPath = process.env.PATH ?? "";
  
@@ -60,7 +63,8 @@ import { afterEach, describe, expect, test } from "bun:test";
          thinking: "low",
          verbose: true,
          env: { PATH: process.env.PATH! },
-       });
+  });
+
  
        const result = await agent.generate({
          messages: [
@@ -309,7 +313,8 @@ import { afterEach, describe, expect, test } from "bun:test";
          mode: "json",
          model: "test-model",
          env: { PATH: process.env.PATH! },
-       });
+  });
+
 
        const result = await agent.generate({
          messages: [{ role: "user", content: "Hello" }],
@@ -324,6 +329,124 @@ import { afterEach, describe, expect, test } from "bun:test";
      }
    });
 
+   test("PiAgent json mode emits canonical trace events and persists them", async () => {
+     // Fake Pi emits NDJSON with assistant text deltas and a tool lifecycle
+     const fake = await makeFakePi(`
+const lines = [
+ JSON.stringify({ type: "session", version: 3, id: "sess-1" }),
+ JSON.stringify({ type: "agent_start" }),
+ JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: "Hello" } }),
+ JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: ", world" } }),
+ JSON.stringify({ type: "tool_execution_start", toolCall: { id: "t1", name: "read", args: { path: "README.md" } } }),
+ JSON.stringify({ type: "tool_execution_end", toolCall: { id: "t1", name: "read" }, result: { ok: true } }),
+ JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Hello, world" }], stopReason: "stop" } })
+];
+process.stdout.write(lines.join("\\n") + "\\n");
+`);
+
+     const memoryEvents: { seq: number; row: any }[] = [];
+     const db = {
+       insertEventWithNextSeq: ({ runId, timestampMs, type, payloadJson }: any) => {
+         const seq = (memoryEvents.length > 0 ? memoryEvents[memoryEvents.length - 1].seq : -1) + 1;
+         memoryEvents.push({ seq, row: { runId, timestampMs, type, payloadJson } });
+         return Promise.resolve(seq);
+       },
+     } as any;
+
+     const logDir = await mkdtemp(join(tmpdir(), "smithers-agent-trace-"));
+     const bus = new EventBus({ db, logDir });
+
+     try {
+       process.env.PATH = `${fake.dir}:${originalPath}`;
+       const agent = new PiAgent({ mode: "json", model: "pi-test-model", env: { PATH: process.env.PATH! } });
+
+       const captured: SmithersAgentTraceEvent[] = [];
+
+       await runWithToolContext(
+         {
+           db: db as any,
+           runId: "run-1",
+           nodeId: "node-A",
+           iteration: 1,
+           attempt: 1,
+           workflowPath: "/tmp/workflows/pi-workflow.tsx",
+           workflowHash: "workflow-hash-1",
+           rootDir: process.cwd(),
+           allowNetwork: true,
+           maxOutputBytes: 200_000,
+           timeoutMs: 30_000,
+           seq: 0,
+           emitEvent: (e: any) => {
+             if (e && e.type === "AgentTraceEvent") {
+               captured.push(e as SmithersAgentTraceEvent);
+             }
+             return bus.emitEventQueued(e as any);
+           },
+         },
+         async () => {
+           const result = await agent.generate({ messages: [{ role: "user", content: "Ping" }] });
+           expect(result.text).toContain("Hello, world");
+         },
+       );
+       await bus.flush();
+
+       const sequences = captured.map((e) => e.event.sequence);
+       expect(sequences).toEqual([1, 2, 3, 4]);
+
+       // We should have assistant deltas and tool lifecycle mapped
+       const kinds = captured.map((e) => e.event.kind);
+       expect(kinds).toEqual([
+         "assistant.text.delta",
+         "assistant.text.delta",
+         "tool.execution.start",
+         "tool.execution.end",
+       ]);
+
+       // Correlation and truthfulness fields present
+       for (const e of captured) {
+         expect(e.traceVersion).toBe(1);
+         expect(e.traceCompleteness).toBe("partial-observed");
+         expect(e.unsupportedEventKinds).toContain("assistant.thinking.delta");
+         expect(e.runId).toBe("run-1");
+         expect(e.workflowPath).toBe("/tmp/workflows/pi-workflow.tsx");
+         expect(e.workflowHash).toBe("workflow-hash-1");
+         expect(e.nodeId).toBe("node-A");
+         expect(e.iteration).toBe(1);
+         expect(e.attempt).toBe(1);
+         expect(e.source.agentFamily).toBe("pi");
+         expect(e.source.agentId).toBe(agent.id);
+         expect(e.source.model).toBe("pi-test-model");
+         expect(e.source.captureMode).toBe("cli-json");
+       }
+
+       // Persisted to DB rows as durable event entries
+       const persistedTraceRows = memoryEvents.filter((r) => r.row.type === "AgentTraceEvent");
+       expect(persistedTraceRows).toHaveLength(captured.length);
+       expect(
+         persistedTraceRows.map((row) => JSON.parse(row.row.payloadJson).event.sequence),
+       ).toEqual([1, 2, 3, 4]);
+
+       // Persisted to a dedicated, flattened local trace log for later querying/export.
+       const persistedTraceLog = await readFile(join(logDir, "agent-trace.ndjson"), "utf8");
+       const persistedTraceRecords = persistedTraceLog
+         .trim()
+         .split(/\r?\n/)
+         .filter(Boolean)
+         .map((line) => JSON.parse(line) as Record<string, unknown>);
+       expect(persistedTraceRecords).toHaveLength(4);
+       expect(persistedTraceRecords.map((record) => record.eventKind)).toEqual(kinds);
+       expect(persistedTraceRecords[0]?.traceCompleteness).toBe("partial-observed");
+       expect(persistedTraceRecords[0]?.unsupportedEventKinds).toContain("assistant.thinking.delta");
+       expect(persistedTraceRecords[0]?.runId).toBe("run-1");
+       expect(persistedTraceRecords[0]?.nodeId).toBe("node-A");
+       expect(persistedTraceRecords[0]?.attempt).toBe(1);
+       expect(persistedTraceRecords[0]?.captureMode).toBe("cli-json");
+     } finally {
+       await rm(fake.dir, { recursive: true, force: true });
+       await rm(logDir, { recursive: true, force: true });
+     }
+   });
+
    test("PiAgent json mode extracts JSON from text content in turn_end", async () => {
      // Simulates pi output where the agent returns JSON in the text content
      const fake = await makeFakePi(`

From d8002e2e45ab8225e3a299008fd69b960deb1731 Mon Sep 17 00:00:00 2001
From: Samuel Huber <samuel@dtech.vision>
Date: Thu, 26 Mar 2026 21:05:21 +0100
Subject: [PATCH 3/7] chore(pi): remove OTEL trace spec doc from PI bookmark

---
 docs/concepts/agent-trace-otel-logs-spec.mdx | 1063 ------------------
 1 file changed, 1063 deletions(-)
 delete mode 100644 docs/concepts/agent-trace-otel-logs-spec.mdx

diff --git a/docs/concepts/agent-trace-otel-logs-spec.mdx b/docs/concepts/agent-trace-otel-logs-spec.mdx
deleted file mode 100644
index 12307b6b..00000000
--- a/docs/concepts/agent-trace-otel-logs-spec.mdx
+++ /dev/null
@@ -1,1063 +0,0 @@
----
-title: Agent Trace OTEL Logs Specification
-description: Full-fidelity specification for capturing agent-visible execution traces and exporting them as OpenTelemetry logs to Loki or any OTLP-compatible backend.
----
-
-This document specifies how Smithers must capture, normalize, persist, export, and verify agent execution traces as OpenTelemetry logs.
-
-This is a design specification, not an implementation sketch. Every requirement in this document is normative unless explicitly marked as non-normative.
-
-## Status
-
-- Intended scope: new observability surface for agent trace logs
-- Intended audience: maintainers implementing runtime, agent, observability, and verification changes
-- Intended outcome: a system where every supported agent run produces a complete, queryable, correlated trace of what Smithers could observe
-
-## Problem Statement
-
-Smithers currently captures:
-
-- durable workflow lifecycle events
-- structured application logs
-- traces and metrics for runtime behavior
-- partial agent output in some cases
-
-Smithers does not currently guarantee a full-fidelity record of agent-visible execution behavior across all agent integrations.
-
-In particular:
-
-- `PiAgent` exposes a rich event stream, but Smithers currently collapses it to final text plus usage
-- several CLI agents emit machine-readable output that Smithers does not preserve as first-class trace events
-- SDK-based agents return final results and rely on Smithers-side tool logging, but do not provide a canonical agent trace model
-- there is no OTEL logs pipeline in the local collector configuration
-
-The result is that operators cannot reliably answer questions such as:
-
-- What did the agent stream before it failed?
-- Which tools did the agent invoke, in what order, with which visible arguments and results?
-- Did the agent emit visible thinking content, compaction events, retries, or queued follow-up behavior?
-- Can we reconstruct exactly what Smithers observed for a given run, node, and attempt?
-- Can we query this in Grafana Loki or another OTLP log backend with stable run-level correlation?
-
-This specification addresses that gap.
-
-## Goals
-
-The system defined here MUST:
-
-- capture the fullest agent-visible trace Smithers can obtain for each supported agent
-- export that trace as OTEL logs to Loki or any OTLP-compatible log backend
-- preserve run correlation through stable attributes such as `run.id`, `workflow.path`, `node.id`, `attempt`, and `iteration`
-- preserve raw trace fidelity without forcing operators to infer behavior from summary logs
-- remain explicit about what was directly observed versus what was derived by Smithers
-- provide deterministic verification criteria for correctness and task completion
-
-## Non-Goals
-
-The system defined here MUST NOT claim to provide:
-
-- provider-internal hidden chain-of-thought when the upstream agent or SDK does not expose it
-- exact reconstruction of invisible model-side planning not surfaced through events, messages, or tool calls
-- a replacement for the durable Smithers event log or database
-- a guarantee that every backend will index arbitrary high-cardinality fields efficiently
-
-## Core Principle
-
-Smithers MUST export what it observed, not what it inferred.
-
-Every exported trace record MUST be classifiable as one of:
-
-- raw upstream agent event
-- raw Smithers runtime event
-- Smithers-derived normalization of one raw event
-- Smithers-generated transport or export diagnostic
-
-If a record is derived, the derivation MUST be explicit.
-
-## Definitions
-
-### Agent Trace
-
-An agent trace is the ordered set of agent-visible execution records associated with one Smithers node attempt.
-
-Agent trace records include, where available:
-
-- streamed assistant text
-- streamed visible thinking content
-- message lifecycle events
-- tool call lifecycle events
-- tool result lifecycle events
-- compaction and retry events
-- session metadata
-- final assistant message
-- final tool results
-- agent stderr diagnostics when those are observable to Smithers
-
-### Full Trace
-
-For a given agent integration, a full trace means all upstream-visible records Smithers can access without patching the upstream model provider.
-
-Full trace does not mean hidden reasoning. It means all observable records available through:
-
-- subprocess stdout or stderr
-- structured CLI output modes
-- RPC event streams
-- SDK callback/event surfaces
-- persisted session artifacts intentionally provided by the agent system
-
-### Canonical Trace Event
-
-A canonical trace event is the Smithers-normalized representation of one raw observed record.
-
-Canonical trace events are the unit exported to OTEL logs and optionally persisted durably by Smithers.
-
-### Attempt
-
-An attempt is one execution of one node at one iteration with one attempt number. A canonical agent trace is scoped to exactly one attempt.
-
-## Invariants
-
-The implementation MUST satisfy all of the following invariants.
-
-### Identity Invariants
-
-Every canonical trace event MUST include:
-
-- `runId`
-- `nodeId` when the event is attempt-scoped
-- `iteration` when the event is attempt-scoped
-- `attempt` when the event is attempt-scoped
-- `timestampMs`
-- `source.agentFamily`
-- `source.captureMode`
-- `event.kind`
-- `event.sequence`
-
-### Ordering Invariants
-
-Canonical trace events for a single attempt MUST be totally ordered by `event.sequence`.
-
-If upstream events arrive out of wall-clock order, Smithers MUST preserve receive order and MUST NOT reorder them after capture.
-
-`event.sequence` MUST be monotonic within one attempt.
-
-### Fidelity Invariants
-
-Smithers MUST preserve raw upstream payloads for canonical trace events unless a redaction rule requires modification.
-
-If redaction occurs:
-
-- the record MUST indicate redaction occurred
-- the redaction reason MUST be attached
-- the original raw value MUST NOT be exported
-
-### Correlation Invariants
-
-Every OTEL log record derived from a canonical trace event MUST be queryable by:
-
-- run
-- workflow path
-- node
-- iteration
-- attempt
-- agent family
-- event kind
-
-### Completeness Invariants
-
-If Smithers receives a parseable upstream event, Smithers MUST either:
-
-- convert it into a canonical trace event and export it
-- or emit a diagnostic record explaining why it was dropped
-
-Silent drops are not allowed.
-
-### Truthfulness Invariants
-
-If an agent integration cannot expose a certain class of events, the system MUST record capability absence explicitly and MUST NOT pretend completeness.
-
-Example:
-
-- if an SDK-based integration does not expose thinking deltas, Smithers MUST mark that event class as unsupported for that agent family
-
-## Scope of Observability
-
-The system covers three layers.
-
-### Layer 1: Canonical Runtime Record
-
-Smithers SHOULD persist canonical trace events durably for replay and audit, alongside existing run events and attempt data.
-
-### Layer 2: OTEL Log Export
-
-Smithers MUST export canonical trace events as OTEL logs when OTEL log export is enabled.
-
-### Layer 3: Summary Metrics and Diagnostics
-
-Smithers MAY derive metrics from canonical trace events, but those metrics are secondary and MUST NOT be the sole evidence of capture correctness.
-
-## Agent Capability Model
-
-Each agent family MUST declare an explicit trace capability profile.
-
-The capability profile MUST enumerate support for:
-
-- session metadata
-- assistant text deltas
-- visible thinking deltas
-- final assistant message
-- tool execution start
-- tool execution update
-- tool execution end
-- retry events
-- compaction events
-- raw stderr diagnostics
-- persisted session artifact
-
-### PiAgent
-
-`PiAgent` MUST be treated as a high-fidelity integration.
-
-Available sources include:
-
-- JSON event stream mode
-- RPC mode event stream
-- Pi session JSONL artifacts
-
-Pi exposes event types such as:
-
-- `agent_start`
-- `agent_end`
-- `turn_start`
-- `turn_end`
-- `message_start`
-- `message_update`
-- `message_end`
-- `tool_execution_start`
-- `tool_execution_update`
-- `tool_execution_end`
-- `auto_compaction_start`
-- `auto_compaction_end`
-- `auto_retry_start`
-- `auto_retry_end`
-
-Visible thinking content emitted by Pi MUST be captured as trace content.
-
-Pi session artifacts, when enabled and available, SHOULD be recorded as canonical artifacts associated with the attempt.
-
-### CodexAgent
-
-`CodexAgent` MUST be treated as a structured CLI integration with medium fidelity.
-
-Codex emits JSON output. Smithers MUST preserve all parseable structured events made available by that mode.
-
-If Codex exposes usage, step, message, tool, or completion events, Smithers MUST map them to canonical trace events rather than extracting only final text.
-
-If a given Codex event schema is unstable, Smithers MUST preserve the raw event payload and classify the normalization conservatively.
-
-### ClaudeCodeAgent
-
-`ClaudeCodeAgent` MUST be treated as a structured CLI integration with medium fidelity.
-
-When `stream-json` is enabled, Smithers MUST preserve all parseable stream records and map them into canonical trace events where possible.
-
-Partial assistant messages, tool call indicators, and usage events MUST NOT be discarded if they are parseable.
-
-### GeminiAgent
-
-`GeminiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on output mode.
-
-Smithers MUST preserve parseable structured output and MUST explicitly mark unsupported event classes when the CLI exposes only final or coarse-grained results.
-
-### KimiAgent
-
-`KimiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on `outputFormat`.
-
-If `stream-json` mode is used, Smithers MUST preserve event records. If only final text is available, Smithers MUST mark the trace as partial.
-
-### OpenAIAgent and AnthropicAgent
-
-`OpenAIAgent` and `AnthropicAgent` MUST be treated as SDK integrations.
-
-They do not inherently expose a rich subprocess event stream in the current Smithers wrapper.
-
-For these agents, Smithers MUST capture:
-
-- prompt dispatch boundaries
-- final assistant response
-- token usage when surfaced
-- Smithers-side tool execution start and end
-- visible tool output recorded by Smithers
-- node output emitted by Smithers if any
-
-Smithers MUST mark thinking deltas and message lifecycle as unsupported unless the underlying SDK path is instrumented to provide them.
-
-### AmpAgent and ForgeAgent
-
-`AmpAgent` and `ForgeAgent` MUST be treated as text-first subprocess integrations unless a structured mode is added.
-
-Smithers MUST capture:
-
-- final response text
-- stderr diagnostics
-- Smithers-side tool execution and runtime events
-
-Smithers MUST mark full trace fidelity as unsupported for these integrations.
-
-## Capture Modes
-
-Each attempt MUST declare one capture mode:
-
-- `sdk-events`
-- `rpc-events`
-- `cli-json-stream`
-- `cli-json`
-- `cli-text`
-- `artifact-import`
-
-Capture mode is part of the canonical attempt metadata and MUST be exported with every trace record.
-
-## Canonical Data Model
-
-Smithers MUST introduce a canonical event model for agent traces.
-
-The exact TypeScript shape is an implementation detail, but the semantic fields are mandatory.
-
-### Attempt Metadata
-
-Each attempt MUST expose:
-
-- `traceVersion`
-- `agentFamily`
-- `agentId`
-- `model`
-- `captureMode`
-- `traceCompleteness`
-- `unsupportedEventKinds`
-- `traceStartedAtMs`
-- `traceFinishedAtMs`
-- `rawArtifactRefs`
-
-### `traceCompleteness`
-
-`traceCompleteness` MUST be one of:
-
-- `full-observed`
-- `partial-observed`
-- `final-only`
-- `capture-failed`
-
-Definitions:
-
-- `full-observed`: Smithers captured every event class the integration claims to support
-- `partial-observed`: Smithers captured some but not all supported classes
-- `final-only`: only final response and coarse metadata were available
-- `capture-failed`: Smithers expected trace events but could not capture them reliably
-
-### Canonical Event Fields
-
-Every canonical trace event MUST include:
-
-- `traceVersion`
-- `runId`
-- `workflowPath`
-- `workflowHash` when available
-- `nodeId`
-- `iteration`
-- `attempt`
-- `timestampMs`
-- `event.sequence`
-- `event.kind`
-- `event.phase`
-- `source.agentFamily`
-- `source.captureMode`
-- `source.rawType`
-- `source.observed`
-- `payload`
-- `raw`
-- `redaction`
-- `annotations`
-
-### `event.kind`
-
-`event.kind` MUST be chosen from a controlled vocabulary.
-
-The initial vocabulary MUST include:
-
-- `session.start`
-- `session.end`
-- `turn.start`
-- `turn.end`
-- `message.start`
-- `message.update`
-- `message.end`
-- `assistant.text.delta`
-- `assistant.thinking.delta`
-- `assistant.message.final`
-- `tool.execution.start`
-- `tool.execution.update`
-- `tool.execution.end`
-- `tool.result`
-- `retry.start`
-- `retry.end`
-- `compaction.start`
-- `compaction.end`
-- `stderr`
-- `stdout`
-- `usage`
-- `capture.warning`
-- `capture.error`
-- `artifact.created`
-
-No integration-specific naming is allowed in `event.kind`. Integration-specific names MUST remain in `source.rawType`.
-
-### `event.phase`
-
-`event.phase` MUST be one of:
-
-- `agent`
-- `turn`
-- `message`
-- `tool`
-- `session`
-- `capture`
-- `artifact`
-
-### `source.observed`
-
-`source.observed` MUST be a boolean indicating whether the payload was directly observed from the upstream integration.
-
-Derived normalization records MUST set `source.observed` to `false`.
-
-### `payload`
-
-`payload` MUST contain normalized fields intended for stable querying and display.
-
-Examples:
-
-- for `assistant.text.delta`: `{ text: string }`
-- for `assistant.thinking.delta`: `{ text: string }`
-- for `tool.execution.start`: `{ toolCallId: string, toolName: string, argsPreview: unknown }`
-- for `tool.execution.end`: `{ toolCallId: string, toolName: string, isError: boolean, resultPreview: unknown }`
-
-### `raw`
-
-`raw` MUST contain the raw upstream object or raw text fragment as captured after redaction.
-
-If no raw form exists, `raw` MAY be `null`.
-
-## Custom Annotations
-
-The system MUST support user-defined annotations attached at run start.
-
-Annotations MUST be:
-
-- provided in run options and server APIs
-- stored durably on the run
-- merged into every canonical trace event at export time
-
-Annotations MUST support scalar values only:
-
-- string
-- number
-- boolean
-
-Nested objects and arrays MUST be rejected or flattened before run start. The behavior MUST be explicit and deterministic.
-
-The following annotation namespaces are reserved:
-
-- `smithers.*`
-- `run.*`
-- `workflow.*`
-- `node.*`
-- `agent.*`
-- `otel.*`
-
-User annotations SHOULD use a `custom.*` prefix in canonical export.
-
-## Workflow Metadata Requirements
-
-Every canonical trace event MUST include:
-
-- `workflow.path` as an OTEL attribute when available
-- `workflow.hash` as an OTEL attribute when available
-
-If `workflow.path` is unavailable, Smithers MUST export `workflow.path` as absent rather than inventing a placeholder path.
-
-## Redaction Model
-
-Redaction is mandatory because agent traces can contain sensitive content.
-
-The implementation MUST support:
-
-- disabled redaction
-- default redaction
-- custom redaction rules
-
-### Minimum Default Redaction
-
-Default redaction MUST handle at least:
-
-- API keys
-- bearer tokens
-- common secret env vars
-- authorization headers
-- cookie headers
-- explicitly configured secret literals
-
-### Redaction Semantics
-
-Redaction MUST occur before:
-
-- durable canonical trace persistence
-- OTEL log export
-- artifact snapshot export
-
-If redaction modifies content, the trace event MUST record:
-
-- `redaction.applied = true`
-- `redaction.ruleIds = string[]`
-
-## Export Model
-
-Canonical trace events MUST be exportable as OTEL logs.
-
-### OTEL Collector Requirements
-
-The collector configuration MUST define a `logs` pipeline.
-
-The logs pipeline MUST accept OTLP input and MUST support at least one of:
-
-- OTLP logs exporter
-- Loki exporter
-
-The local development stack SHOULD include Loki for verification and human inspection.
-
-### OTEL Record Shape
-
-For each canonical trace event, Smithers MUST emit one OTEL log record.
-
-The log body MUST contain a compact structured JSON representation of:
-
-- canonical payload
-- raw payload when configured
-- redaction metadata
-
-The log attributes MUST include:
-
-- `service.name`
-- `smithers.trace.version`
-- `run.id`
-- `workflow.path`
-- `workflow.hash` when available
-- `node.id` when available
-- `node.iteration` when available
-- `node.attempt` when available
-- `agent.family`
-- `agent.id` when available
-- `agent.model` when available
-- `agent.capture_mode`
-- `trace.completeness`
-- `event.kind`
-- `event.phase`
-- `event.sequence`
-- `source.raw_type`
-- `source.observed`
-
-Custom annotations MUST be exported as OTEL attributes under `custom.*`.
-
-### Attribute Cardinality Rules
-
-The following MUST be attributes:
-
-- run identifiers
-- workflow identifiers
-- node identifiers
-- attempt identifiers
-- event kind
-- agent family
-- capture mode
-
-The following MUST NOT be indexed as labels in Loki-specific configurations:
-
-- full prompt text
-- full response text
-- thinking text
-- tool args bodies
-- tool result bodies
-- arbitrary user free-text annotations
-
-These large fields MUST remain in the log body.
-
-### Severity Mapping
-
-Severity SHOULD be assigned as follows:
-
-- normal trace events: `INFO`
-- stderr and non-terminal capture anomalies: `WARN`
-- capture failures and export failures: `ERROR`
-
-Severity MUST NOT be used to encode event kind.
-
-## Persistence Model
-
-Canonical trace events SHOULD be durably persisted by Smithers in addition to OTEL export.
-
-If durable persistence is implemented, the persistence layer MUST support:
-
-- ordered replay by attempt
-- filtering by event kind
-- pagination by sequence
-- artifact references
-
-OTEL export MUST NOT be the only storage location for canonical trace data.
-
-## Artifact Model
-
-Some agent integrations expose richer external artifacts than can be represented comfortably as log streams.
-
-Examples:
-
-- Pi session JSONL files
-- raw CLI JSON event transcripts
-- exported HTML or JSONL session artifacts
-
-Smithers SHOULD support trace artifacts with metadata:
-
-- `artifact.kind`
-- `artifact.path`
-- `artifact.contentType`
-- `artifact.bytes`
-- `artifact.createdAtMs`
-- `artifact.redacted`
-
-Artifact creation MUST also emit canonical `artifact.created` events.
-
-## Failure Model
-
-The implementation MUST classify failures explicitly.
-
-### Capture Failure
-
-Capture failure means Smithers could not reliably obtain agent trace input it expected from the selected capture mode.
-
-Examples:
-
-- malformed JSON stream
-- unexpected subprocess termination before terminal event
-- SDK callback channel failure
-
-Capture failure MUST:
-
-- mark attempt `traceCompleteness = capture-failed` when terminally broken
-- emit a `capture.error` canonical event
-- include diagnostic details
-
-### Partial Capture
-
-Partial capture means Smithers obtained some trace events but missed expected categories.
-
-Examples:
-
-- stdout stream cut off after several tool events
-- session artifact missing though event stream completed
-
-Partial capture MUST:
-
-- mark attempt `traceCompleteness = partial-observed`
-- record missing classes in `unsupportedEventKinds` or `missingExpectedEventKinds`
-
-### Export Failure
-
-Export failure means Smithers captured canonical trace events but could not deliver them to the OTEL backend.
-
-Export failure MUST NOT erase canonical local truth.
-
-If export fails:
-
-- canonical local persistence MUST still succeed when enabled
-- Smithers MUST emit operator diagnostics through existing logs
-- the run MUST remain inspectable from durable local records
-
-## Normalization Rules
-
-Normalization MUST be conservative.
-
-### One Raw Event to One Canonical Event
-
-As a default rule, one raw upstream event SHOULD map to one canonical trace event.
-
-If one raw event yields multiple canonical events, the implementation MUST document why and MUST include a stable parent link.
-
-### Text Deltas
-
-Assistant text deltas MUST remain deltas if the upstream protocol provided deltas.
-
-Smithers MUST NOT collapse deltas into a single blob during export.
-
-Final assembled messages MAY be emitted separately as `assistant.message.final`.
-
-### Thinking Deltas
-
-Visible thinking content MUST be captured as its own event class and MUST NOT be merged into assistant text.
-
-### Tool Calls
-
-Tool lifecycle MUST preserve:
-
-- stable tool call identifier when upstream provides one
-- tool name
-- visible arguments or argument preview
-- partial updates when available
-- final result preview
-- error flag
-
-### Usage
-
-Usage records MUST be separate canonical events or attached to terminal message events in a way that remains queryable.
-
-If usage is attached, it MUST still be accessible without parsing free-form text.
-
-## Required Runtime Integration Points
-
-The implementation MUST integrate at these boundaries.
-
-### Agent Boundary
-
-Every agent integration MUST report raw trace observations into the canonical trace capture layer.
-
-No agent integration is allowed to silently parse and discard upstream event records before the capture layer sees them.
-
-### Event Bus Boundary
-
-Canonical trace events SHOULD be emitted through or alongside the existing event bus so that:
-
-- they share run correlation
-- they can participate in durable persistence
-- they can reuse existing event-driven verification infrastructure
-
-### Attempt Finalization Boundary
-
-When an attempt finishes, Smithers MUST finalize trace metadata:
-
-- `traceFinishedAtMs`
-- `traceCompleteness`
-- `unsupportedEventKinds`
-- `rawArtifactRefs`
-
-## Required Configuration Surface
-
-The implementation MUST define explicit configuration for:
-
-- enabling OTEL log export
-- selecting backend endpoint
-- enabling or disabling canonical local trace persistence
-- selecting redaction mode
-- retaining or dropping raw payload bodies
-- retaining or dropping raw artifacts
-- maximum event body bytes
-- maximum artifact bytes
-
-The configuration MUST distinguish:
-
-- runtime operator policy
-- run-specific annotations
-
-## Required Operator Queries
-
-The design is incomplete unless the following operator queries are supported.
-
-### Query Set A: Run Reconstruction
-
-Operators MUST be able to answer:
-
-- show all trace records for one run
-- show all trace records for one run and node
-- show only one attempt for one node
-- show ordered assistant text deltas
-- show visible thinking deltas when present
-- show tool calls and results in order
-
-### Query Set B: Failure Analysis
-
-Operators MUST be able to answer:
-
-- which runs had trace capture failures
-- which agents only provide final-only traces
-- which attempts terminated without a terminal agent event
-- which traces were partially redacted
-
-### Query Set C: Audit
-
-Operators MUST be able to answer:
-
-- what annotations were attached to a run
-- which workflow file and workflow hash produced the trace
-- which raw artifact file corresponds to this attempt
-
-## Verification Specification
-
-Task completion is not defined by code existing. It is defined by observable correctness.
-
-The implementation is complete only if every verification class below passes.
-
-## Verification Class 1: Schema Correctness
-
-For each supported agent family, automated tests MUST verify that canonical trace events:
-
-- conform to the declared schema
-- contain required identity fields
-- maintain monotonic `event.sequence`
-- correctly classify `traceCompleteness`
-
-Completion criterion:
-
-- zero schema violations in test fixtures
-
-## Verification Class 2: Ordering Correctness
-
-Automated tests MUST verify that for one attempt:
-
-- event sequences are strictly monotonic
-- final events occur after preceding deltas
-- no duplicate sequence numbers appear
-
-Completion criterion:
-
-- deterministic ordering across repeated test runs
-
-## Verification Class 3: Fidelity Correctness
-
-Fixture-based tests MUST compare raw upstream inputs with canonical trace outputs.
-
-For each fixture:
-
-- every parseable upstream event MUST result in a canonical event or an explicit diagnostic drop event
-- visible thinking content MUST remain distinguishable from assistant text
-- tool call identifiers and names MUST survive normalization
-
-Completion criterion:
-
-- full fixture coverage for each agent family and capture mode supported by Smithers
-
-## Verification Class 4: Completeness Classification
-
-Tests MUST verify the semantics of:
-
-- `full-observed`
-- `partial-observed`
-- `final-only`
-- `capture-failed`
-
-Completion criterion:
-
-- each classification is produced by at least one explicit test case
-
-## Verification Class 5: OTEL Export Correctness
-
-Integration tests MUST verify that canonical trace events become OTEL log records with:
-
-- required attributes present
-- correct body shape
-- correct severity mapping
-- correct custom annotation export
-
-Completion criterion:
-
-- logs are queryable in the target backend by `run.id`, `workflow.path`, `node.id`, `attempt`, and `event.kind`
-
-## Verification Class 6: Loki Query Correctness
-
-In a local stack with Loki enabled, end-to-end tests MUST verify that an operator can query:
-
-- all records for a run
-- all records for a node attempt
-- only thinking deltas
-- only tool execution records
-- only capture errors
-
-Completion criterion:
-
-- documented query examples return expected results against test data
-
-## Verification Class 7: Artifact Correctness
-
-When artifact capture is enabled, tests MUST verify:
-
-- artifact references are recorded
-- artifacts exist on disk or in configured storage
-- artifact metadata matches actual content
-- artifact creation emits corresponding canonical events
-
-Completion criterion:
-
-- no dangling artifact references
-
-## Verification Class 8: Redaction Correctness
-
-Tests MUST verify that redaction:
-
-- removes required secrets from canonical payloads, raw payloads, OTEL bodies, and artifacts
-- leaves non-sensitive content intact
-- records which rules were applied
-
-Completion criterion:
-
-- zero known secret literals leak in test fixtures
-
-## Verification Class 9: Failure Resilience
-
-Tests MUST verify behavior when:
-
-- collector is unavailable
-- backend rejects logs
-- malformed upstream JSON is encountered
-- subprocess exits before terminal event
-- artifact write fails
-
-Completion criterion:
-
-- capture failures are classified
-- local diagnostics exist
-- durable local truth remains accessible when configured
-
-## Verification Class 10: Cross-Signal Correlation
-
-Tests MUST verify that logs correlate with:
-
-- run lifecycle events
-- metrics
-- spans
-
-At minimum, operators MUST be able to join by:
-
-- `run.id`
-- `node.id`
-- `attempt`
-
-Completion criterion:
-
-- one documented workflow run can be traced across event log, OTEL logs, and metrics without ambiguity
-
-## Acceptance Criteria
-
-The feature is not done until all of the following are true.
-
-### A. Canonical Model Exists
-
-Smithers has a canonical agent trace schema with explicit completeness states and per-agent capability declarations.
-
-### B. Pi Is High Fidelity
-
-`PiAgent` exports structured trace records for:
-
-- session lifecycle
-- turn lifecycle
-- message lifecycle
-- assistant text deltas
-- visible thinking deltas
-- tool execution lifecycle
-- retry and compaction events
-
-### C. Other Agents Are Truthfully Classified
-
-Every agent in `src/agents/` has a declared fidelity class and unsupported event set.
-
-### D. OTEL Logs Pipeline Exists
-
-The collector and local observability stack support OTEL logs end to end.
-
-### E. Queries Work
-
-Operators can answer the required run reconstruction, failure analysis, and audit queries from the exported logs.
-
-### F. Verification Is Automated
-
-Automated tests exist for schema, ordering, fidelity, completeness, OTEL export, redaction, failure handling, and query correctness.
-
-## Implementation Phasing
-
-This section is normative for rollout order.
-
-### Phase 1: Canonical Model
-
-Implement:
-
-- canonical trace schema
-- completeness classification
-- per-agent capability declarations
-
-### Phase 2: Pi Fidelity
-
-Implement:
-
-- Pi raw event capture
-- canonical normalization
-- OTEL export
-- artifact capture for session files if configured
-
-### Phase 3: Structured CLI Agents
-
-Implement:
-
-- Codex
-- Claude Code
-- Gemini
-- Kimi
-
-Each integration MUST ship with fixture-based normalization tests before being considered complete.
-
-### Phase 4: SDK and Text-Only Agents
-
-Implement:
-
-- explicit partial or final-only capture
-- truthful capability declarations
-- OTEL export for the observable subset
-
-### Phase 5: Redaction and Hardening
-
-Implement:
-
-- default redaction
-- export failure handling
-- artifact verification
-- documented local Loki queries
-
-## Explicit Non-Ambiguities
-
-The following choices are intentional.
-
-- Smithers MUST prefer truthful partial fidelity over fake completeness.
-- Smithers MUST preserve raw event boundaries rather than collapsing everything into summaries.
-- Smithers MUST keep large content in log bodies, not indexing labels.
-- Smithers MUST retain a local source of truth when OTEL export fails.
-- Smithers MUST separate assistant text from visible thinking.
-- Smithers MUST define task completion in terms of verification evidence, not implementation effort.
-
-## Out of Scope for the First Implementation
-
-The first implementation MAY defer:
-
-- remote artifact storage
-- cross-run session graph visualizations
-- backend-specific dashboards beyond minimal verification queries
-- universal reconstruction of provider-internal hidden reasoning
-
-If deferred, these items MUST be documented explicitly and MUST NOT be implied to exist.
-
-## Summary
-
-The required system is not “send some logs to Loki.”
-
-The required system is:
-
-- a canonical agent trace model
-- explicit capability declarations per integration
-- conservative capture of all observable upstream events
-- durable local truth
-- OTEL log export with stable correlation fields
-- redaction before persistence and export
-- verification that proves fidelity, completeness, and queryability
-
-Anything less produces observability that looks complete while remaining operationally unreliable.

From c82912a65e6eaaa54673df73b10c09db0cd4160d Mon Sep 17 00:00:00 2001
From: Samuel Huber <samuel@dtech.vision>
Date: Thu, 26 Mar 2026 21:08:30 +0100
Subject: [PATCH 4/7] docs(pi): add hello-world sample and brief PI usage note

---
 docs/integrations/pi-integration.mdx |  2 ++
 examples/pi-hello-world.tsx          | 31 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 examples/pi-hello-world.tsx

diff --git a/docs/integrations/pi-integration.mdx b/docs/integrations/pi-integration.mdx
index 4b584577..e8a30063 100644
--- a/docs/integrations/pi-integration.mdx
+++ b/docs/integrations/pi-integration.mdx
@@ -72,6 +72,8 @@ pi --version
 bun run test
 ```
 
+For a minimal end-to-end example, run `bun run cli run examples/pi-hello-world.tsx`. To choose a model, check your PI `models.json` or run `pi --list-models`.
+
 ## Design Guidance
 
 Use `PiAgent` task nodes when:
diff --git a/examples/pi-hello-world.tsx b/examples/pi-hello-world.tsx
new file mode 100644
index 00000000..19938c2c
--- /dev/null
+++ b/examples/pi-hello-world.tsx
@@ -0,0 +1,31 @@
+/** @jsxImportSource smithers-orchestrator */
+import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator";
+import { z } from "zod";
+
+const HelloSchema = z.object({
+  message: z.string(),
+});
+
+const { smithers, outputs } = createSmithers(
+  {
+    output: HelloSchema,
+  },
+  {
+    dbPath: "./examples/pi-hello-world.db",
+  },
+);
+
+const pi = new PiAgent({
+  provider: "openai-codex",
+  model: "gpt-5.4",
+  mode: "json",
+});
+
+export default smithers(() => (
+  <Workflow name="pi-hello-world">
+    <Task id="hello" output={outputs.output} agent={pi}>
+      {`Return exactly this JSON and nothing else:
+{"message":"hello world"}`}
+    </Task>
+  </Workflow>
+));

From b40d840420e0830341622b5499307aab15e0d704 Mon Sep 17 00:00:00 2001
From: Samuel Huber <samuel@dtech.vision>
Date: Thu, 26 Mar 2026 21:23:46 +0100
Subject: [PATCH 5/7] fix(pi): narrow PR to rpc completion only

---
 docs/integrations/pi-integration.mdx |   2 -
 examples/pi-hello-world.tsx          |  31 -----
 src/SmithersEvent.ts                 | 113 +----------------
 src/agent-trace.ts                   | 178 ---------------------------
 src/agents/PiAgent.ts                |  23 ----
 src/events.ts                        |  21 ++--
 src/index.ts                         |  11 +-
 src/observability/index.ts           |  89 --------------
 src/tools/context.ts                 |   2 -
 tests/observability.test.ts          |  91 --------------
 tests/pi-support.test.ts             | 169 +++++++------------------
 11 files changed, 55 insertions(+), 675 deletions(-)
 delete mode 100644 examples/pi-hello-world.tsx
 delete mode 100644 src/agent-trace.ts

diff --git a/docs/integrations/pi-integration.mdx b/docs/integrations/pi-integration.mdx
index e8a30063..4b584577 100644
--- a/docs/integrations/pi-integration.mdx
+++ b/docs/integrations/pi-integration.mdx
@@ -72,8 +72,6 @@ pi --version
 bun run test
 ```
 
-For a minimal end-to-end example, run `bun run cli run examples/pi-hello-world.tsx`. To choose a model, check your PI `models.json` or run `pi --list-models`.
-
 ## Design Guidance
 
 Use `PiAgent` task nodes when:
diff --git a/examples/pi-hello-world.tsx b/examples/pi-hello-world.tsx
deleted file mode 100644
index 19938c2c..00000000
--- a/examples/pi-hello-world.tsx
+++ /dev/null
@@ -1,31 +0,0 @@
-/** @jsxImportSource smithers-orchestrator */
-import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator";
-import { z } from "zod";
-
-const HelloSchema = z.object({
-  message: z.string(),
-});
-
-const { smithers, outputs } = createSmithers(
-  {
-    output: HelloSchema,
-  },
-  {
-    dbPath: "./examples/pi-hello-world.db",
-  },
-);
-
-const pi = new PiAgent({
-  provider: "openai-codex",
-  model: "gpt-5.4",
-  mode: "json",
-});
-
-export default smithers(() => (
-  <Workflow name="pi-hello-world">
-    <Task id="hello" output={outputs.output} agent={pi}>
-      {`Return exactly this JSON and nothing else:
-{"message":"hello world"}`}
-    </Task>
-  </Workflow>
-));
diff --git a/src/SmithersEvent.ts b/src/SmithersEvent.ts
index 29d705f8..6b25e4ff 100644
--- a/src/SmithersEvent.ts
+++ b/src/SmithersEvent.ts
@@ -1,113 +1,5 @@
 import type { RunStatus } from "./RunStatus";
 
-export const CANONICAL_AGENT_TRACE_VERSION = 1 as const;
-
-export type AgentTraceCompleteness =
-  | "full-observed"
-  | "partial-observed"
-  | "final-only"
-  | "capture-failed";
-
-export type AgentTraceCaptureMode =
-  | "sdk-events"
-  | "rpc-events"
-  | "cli-json-stream"
-  | "cli-json"
-  | "cli-text"
-  | "artifact-import";
-
-export type AgentTraceEventKind =
-  | "session.start"
-  | "session.end"
-  | "turn.start"
-  | "turn.end"
-  | "message.start"
-  | "message.update"
-  | "message.end"
-  | "assistant.text.delta"
-  | "assistant.thinking.delta"
-  | "assistant.message.final"
-  | "tool.execution.start"
-  | "tool.execution.update"
-  | "tool.execution.end"
-  | "tool.result"
-  | "retry.start"
-  | "retry.end"
-  | "compaction.start"
-  | "compaction.end"
-  | "stderr"
-  | "stdout"
-  | "usage"
-  | "capture.warning"
-  | "capture.error"
-  | "artifact.created";
-
-export type AgentTraceEventPhase =
-  | "message"
-  | "tool"
-  | "agent"
-  | "session"
-  | "turn"
-  | "capture"
-  | "artifact";
-
-export const PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS = [
-  "assistant.text.delta",
-  "tool.execution.start",
-  "tool.execution.update",
-  "tool.execution.end",
-] as const satisfies readonly AgentTraceEventKind[];
-
-export const PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS = [
-  "session.start",
-  "session.end",
-  "turn.start",
-  "turn.end",
-  "message.start",
-  "message.update",
-  "message.end",
-  "assistant.thinking.delta",
-  "assistant.message.final",
-  "tool.result",
-  "retry.start",
-  "retry.end",
-  "compaction.start",
-  "compaction.end",
-  "usage",
-  "artifact.created",
-] as const satisfies readonly AgentTraceEventKind[];
-
-export type SmithersAgentTraceEvent = {
-  type: "AgentTraceEvent";
-  traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION;
-  traceCompleteness: AgentTraceCompleteness;
-  unsupportedEventKinds: AgentTraceEventKind[];
-  runId: string;
-  workflowPath?: string | null;
-  workflowHash?: string | null;
-  nodeId: string;
-  iteration: number;
-  attempt: number;
-  timestampMs: number;
-  event: {
-    sequence: number;
-    kind: AgentTraceEventKind;
-    phase: AgentTraceEventPhase;
-  };
-  source: {
-    agentFamily: "pi";
-    agentId?: string;
-    model?: string;
-    captureMode: AgentTraceCaptureMode;
-    rawType?: string;
-    observed: boolean;
-  };
-  payload: Record<string, unknown> | null;
-  raw: unknown;
-  redaction: { applied: boolean; ruleIds?: string[] } | null;
-  annotations: Record<string, string | number | boolean> | null;
-};
-
 export type SmithersEvent =
   | { type: "RunStarted"; runId: string; timestampMs: number }
   | {
@@ -302,7 +194,4 @@ export type SmithersEvent =
       cacheWriteTokens?: number;
       reasoningTokens?: number;
       timestampMs: number;
-    }
-  | SmithersAgentTraceEvent;
-
-export type ExtendedSmithersEvent = SmithersEvent;
+    };
diff --git a/src/agent-trace.ts b/src/agent-trace.ts
deleted file mode 100644
index fa611798..00000000
--- a/src/agent-trace.ts
+++ /dev/null
@@ -1,178 +0,0 @@
-import { getToolContext } from "./tools/context";
-import { nowMs } from "./utils/time";
-import {
-  CANONICAL_AGENT_TRACE_VERSION,
-  PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS,
-} from "./SmithersEvent";
-import type {
-  AgentTraceCaptureMode,
-  AgentTraceEventKind,
-  AgentTraceEventPhase,
-  SmithersAgentTraceEvent,
-} from "./SmithersEvent";
-
-// Local, per-attempt monotonic sequence for canonical trace events
-const traceSeq = new WeakMap<object, number>();
-
-function nextTraceSeq(ctx: object): number {
-  const current = traceSeq.get(ctx) ?? 0;
-  const next = current + 1;
-  traceSeq.set(ctx, next);
-  return next;
-}
-
-export type CaptureMode = Extract<
-  AgentTraceCaptureMode,
-  "cli-json" | "cli-json-stream" | "rpc-events" | "cli-text"
->;
-
-export type AgentTraceSourceMeta = {
-  agentId?: string;
-  model?: string;
-};
-
-export function emitAgentTrace(
-  kind: AgentTraceEventKind,
-  phase: AgentTraceEventPhase,
-  payload: Record<string, unknown> | null | undefined,
-  raw: unknown,
-  rawType: string | undefined,
-  captureMode: CaptureMode,
-  sourceMeta?: AgentTraceSourceMeta,
-) {
-  const ctx = getToolContext();
-  if (!ctx || typeof ctx.emitEvent !== "function") return; // outside workflow execution
-  const ts = nowMs();
-  const event: SmithersAgentTraceEvent = {
-    type: "AgentTraceEvent",
-    traceVersion: CANONICAL_AGENT_TRACE_VERSION,
-    traceCompleteness: "partial-observed",
-    unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS],
-    runId: ctx.runId,
-    workflowPath: ctx.workflowPath ?? null,
-    workflowHash: ctx.workflowHash ?? null,
-    nodeId: ctx.nodeId,
-    iteration: ctx.iteration,
-    attempt: ctx.attempt,
-    timestampMs: ts,
-    event: {
-      sequence: nextTraceSeq(ctx),
-      kind,
-      phase,
-    },
-    source: {
-      agentFamily: "pi",
-      agentId: sourceMeta?.agentId,
-      model: sourceMeta?.model,
-      captureMode,
-      rawType,
-      observed: true,
-    },
-    payload: payload ?? null,
-    raw,
-    redaction: null,
-    annotations: null,
-  };
-  void ctx.emitEvent(event);
-}
-
-export function capturePiEvent(
-  event: any,
-  captureMode: CaptureMode,
-  sourceMeta?: AgentTraceSourceMeta,
-) {
-  if (!event || typeof event !== "object") return;
-  const type = String((event as any).type ?? "");
-
-  // Assistant text deltas
-  if (type === "message_update") {
-    const assistant = (event as any).assistantMessageEvent;
-    if (assistant && assistant.type === "text_delta" && typeof assistant.delta === "string") {
-      emitAgentTrace(
-        "assistant.text.delta",
-        "message",
-        { text: assistant.delta },
-        event,
-        "message_update.text_delta",
-        captureMode,
-        sourceMeta,
-      );
-      return;
-    }
-  }
-
-  // Tool lifecycle (best-effort mapping of common Pi shapes)
-  if (type === "tool_execution_start") {
-    const call = (event as any).toolCall ?? (event as any).call ?? (event as any);
-    emitAgentTrace(
-      "tool.execution.start",
-      "tool",
-      {
-        toolCallId: String(call.id ?? call.toolCallId ?? ""),
-        toolName: String(call.name ?? call.toolName ?? call.tool ?? ""),
-        argsPreview: call.args ?? call.arguments ?? undefined,
-      },
-      event,
-      "tool_execution_start",
-      captureMode,
-      sourceMeta,
-    );
-    return;
-  }
-
-  if (type === "tool_execution_update") {
-    const call = (event as any).toolCall ?? (event as any).call ?? (event as any);
-    emitAgentTrace(
-      "tool.execution.update",
-      "tool",
-      {
-        toolCallId: String(call.id ?? call.toolCallId ?? ""),
-        toolName: String(call.name ?? call.toolName ?? call.tool ?? ""),
-      },
-      event,
-      "tool_execution_update",
-      captureMode,
-      sourceMeta,
-    );
-    return;
-  }
-
-  if (type === "tool_execution_end") {
-    const call = (event as any).toolCall ?? (event as any).call ?? (event as any);
-    const isError = Boolean((event as any).error || (event as any).failed);
-    emitAgentTrace(
-      "tool.execution.end",
-      "tool",
-      {
-        toolCallId: String(call.id ?? call.toolCallId ?? ""),
-        toolName: String(call.name ?? call.toolName ?? call.tool ?? ""),
-        isError,
-        resultPreview: (event as any).result ?? (event as any).output ?? undefined,
-      },
-      event,
-      "tool_execution_end",
-      captureMode,
-      sourceMeta,
-    );
-    return;
-  }
-}
-
-export function capturePiNdjson(
-  raw: string,
-  captureMode: CaptureMode,
-  sourceMeta?: AgentTraceSourceMeta,
-) {
-  const lines = String(raw ?? "")
-    .split(/\r?\n/)
-    .map((l) => l.trim())
-    .filter(Boolean);
-  for (const line of lines) {
-    try {
-      const parsed = JSON.parse(line);
-      capturePiEvent(parsed, captureMode, sourceMeta);
-    } catch {
-      // ignore malformed lines
-    }
-  }
-}
diff --git a/src/agents/PiAgent.ts b/src/agents/PiAgent.ts
index 0f7a62a0..7d659c2b 100644
--- a/src/agents/PiAgent.ts
+++ b/src/agents/PiAgent.ts
@@ -14,8 +14,6 @@ import {
 import type { BaseCliAgentOptions, PiExtensionUiRequest, PiExtensionUiResponse } from "./BaseCliAgent";
 import { getToolContext } from "../tools/context";
 import { SmithersError } from "../utils/errors";
-import { capturePiNdjson } from "../agent-trace";
-import { capturePiEvent } from "../agent-trace";
 
 export type { PiExtensionUiRequest, PiExtensionUiResponse };
 
@@ -188,17 +186,6 @@ export class PiAgent extends BaseCliAgent {
       const extractedText = mode === "json"
         ? (extractTextFromPiNdjson(rawText) ?? rawText)
         : rawText;
-      // Capture canonical trace events for Pi NDJSON stream (assistant deltas, tool lifecycle)
-      try {
-        if (mode === "json") {
-          capturePiNdjson(rawText, "cli-json", {
-            agentId: this.id,
-            model: this.opts.model ?? this.model,
-          });
-        }
-      } catch {
-        // Best-effort capture; never fail the agent call due to trace capture.
-      }
       const output = tryParseJson(extractedText);
       return buildGenerateResult(extractedText, output, this.opts.model ?? "pi");
     }
@@ -214,16 +201,6 @@ export class PiAgent extends BaseCliAgent {
       maxOutputBytes: this.maxOutputBytes ?? getToolContext()?.maxOutputBytes,
       onStderr: options?.onStderr,
       onExtensionUiRequest: this.opts.onExtensionUiRequest,
-      onEvent: (evt) => {
-        try {
-          capturePiEvent(evt, "rpc-events", {
-            agentId: this.id,
-            model: this.opts.model ?? this.model,
-          });
-        } catch {
-          /* ignore */
-        }
-      },
     });
 
     return buildGenerateResult(rpcResult.text, rpcResult.output, this.opts.model ?? "pi", rpcResult.usage);
diff --git a/src/events.ts b/src/events.ts
index e83fd306..99256de6 100644
--- a/src/events.ts
+++ b/src/events.ts
@@ -1,12 +1,11 @@
 import { EventEmitter } from "node:events";
-import { promises as fs } from "node:fs";
+import * as FileSystem from "@effect/platform/FileSystem";
 import { join } from "node:path";
 import { Effect } from "effect";
 import type { SmithersEvent } from "./SmithersEvent";
 import { fromPromise } from "./effect/interop";
 import { runPromise } from "./effect/runtime";
 import { trackEvent } from "./effect/metrics";
-import { isAgentTraceEvent, toPersistedAgentTraceRecord } from "./observability";
 
 export class EventBus extends EventEmitter {
   private seq = 0;
@@ -144,16 +143,14 @@ export class EventBus extends EventEmitter {
   private persistLogEffect(event: SmithersEvent) {
     if (!this.logDir) return Effect.void;
     const dir = this.logDir;
-    return fromPromise("persist event log", async () => {
-      await fs.mkdir(dir, { recursive: true });
-      await fs.appendFile(join(dir, "stream.ndjson"), `${JSON.stringify(event)}\n`, "utf8");
-      if (isAgentTraceEvent(event)) {
-        await fs.appendFile(
-          join(dir, "agent-trace.ndjson"),
-          `${JSON.stringify(toPersistedAgentTraceRecord(event))}\n`,
-          "utf8",
-        );
-      }
+    return Effect.gen(function* () {
+      const fs = yield* FileSystem.FileSystem;
+      yield* fs.makeDirectory(dir, { recursive: true });
+      const file = join(dir, "stream.ndjson");
+      const line = JSON.stringify(event) + "\n";
+      const current = yield* Effect.option(fs.readFileString(file, "utf8"));
+      const prefix = current._tag === "Some" ? current.value : "";
+      yield* fs.writeFileString(file, prefix + line);
     });
   }
 }
diff --git a/src/index.ts b/src/index.ts
index 0bddc29a..893f9ace 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -12,13 +12,7 @@ export type { SchemaRegistryEntry } from "./SchemaRegistryEntry";
 export type { SmithersWorkflow } from "./SmithersWorkflow";
 export type { SmithersCtx } from "./SmithersCtx";
 export type { OutputAccessor, InferRow, InferOutputEntry } from "./OutputAccessor";
-export type {
-  AgentTraceCaptureMode,
-  AgentTraceCompleteness,
-  AgentTraceEventKind,
-  SmithersAgentTraceEvent,
-  SmithersEvent,
-} from "./SmithersEvent";
+export type { SmithersEvent } from "./SmithersEvent";
 export type { SmithersError } from "./SmithersError";
 export { SmithersError as SmithersErrorInstance, isSmithersError, errorToJson } from "./utils/errors";
 export type { SmithersErrorCode } from "./utils/errors";
@@ -105,13 +99,10 @@ export type { ServerOptions } from "./server/index";
 // Observability
 export {
   SmithersObservability,
-  PI_AGENT_TRACE_CAPABILITY_PROFILE,
   createSmithersObservabilityLayer,
   createSmithersOtelLayer,
   createSmithersRuntimeLayer,
-  isAgentTraceEvent,
   smithersMetrics,
-  toPersistedAgentTraceRecord,
   trackSmithersEvent,
   activeNodes,
   activeRuns,
diff --git a/src/observability/index.ts b/src/observability/index.ts
index 0439b524..b29835ff 100644
--- a/src/observability/index.ts
+++ b/src/observability/index.ts
@@ -65,17 +65,6 @@ import {
   updateProcessMetrics,
   vcsDuration,
 } from "../effect/metrics";
-import {
-  CANONICAL_AGENT_TRACE_VERSION,
-  PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS,
-  PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS,
-} from "../SmithersEvent";
-import type {
-  AgentTraceCaptureMode,
-  AgentTraceCompleteness,
-  AgentTraceEventKind,
-  SmithersAgentTraceEvent,
-} from "../SmithersEvent";
 
 export type SmithersLogFormat = "json" | "pretty" | "string" | "logfmt";
 
@@ -112,84 +101,6 @@ export class SmithersObservability extends Context.Tag("SmithersObservability")<
   SmithersObservabilityService
 >() {}
 
-export type AgentTraceCapabilityProfile = {
-  readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION;
-  readonly agentFamily: "pi";
-  readonly captureModes: readonly AgentTraceCaptureMode[];
-  readonly traceCompleteness: AgentTraceCompleteness;
-  readonly supportedEventKinds: readonly AgentTraceEventKind[];
-  readonly unsupportedEventKinds: readonly AgentTraceEventKind[];
-};
-
-export type PersistedAgentTraceRecord = {
-  readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION;
-  readonly traceCompleteness: AgentTraceCompleteness;
-  readonly unsupportedEventKinds: AgentTraceEventKind[];
-  readonly runId: string;
-  readonly workflowPath: string | null;
-  readonly workflowHash: string | null;
-  readonly nodeId: string;
-  readonly iteration: number;
-  readonly attempt: number;
-  readonly timestampMs: number;
-  readonly eventSequence: number;
-  readonly eventKind: AgentTraceEventKind;
-  readonly eventPhase: SmithersAgentTraceEvent["event"]["phase"];
-  readonly agentFamily: SmithersAgentTraceEvent["source"]["agentFamily"];
-  readonly agentId: string | null;
-  readonly agentModel: string | null;
-  readonly captureMode: AgentTraceCaptureMode;
-  readonly rawType: string | null;
-  readonly observed: boolean;
-  readonly payload: Record<string, unknown> | null;
-  readonly raw: unknown;
-  readonly redaction: SmithersAgentTraceEvent["redaction"];
-  readonly annotations: SmithersAgentTraceEvent["annotations"];
-};
-
-export const PI_AGENT_TRACE_CAPABILITY_PROFILE: AgentTraceCapabilityProfile = {
-  traceVersion: CANONICAL_AGENT_TRACE_VERSION,
-  agentFamily: "pi",
-  captureModes: ["cli-json", "rpc-events"],
-  traceCompleteness: "partial-observed",
-  supportedEventKinds: [...PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS],
-  unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS],
-};
-
-export function isAgentTraceEvent(event: unknown): event is SmithersAgentTraceEvent {
-  return !!event && typeof event === "object" && (event as { type?: unknown }).type === "AgentTraceEvent";
-}
-
-export function toPersistedAgentTraceRecord(
-  event: SmithersAgentTraceEvent,
-): PersistedAgentTraceRecord {
-  return {
-    traceVersion: event.traceVersion,
-    traceCompleteness: event.traceCompleteness,
-    unsupportedEventKinds: [...event.unsupportedEventKinds],
-    runId: event.runId,
-    workflowPath: event.workflowPath ?? null,
-    workflowHash: event.workflowHash ?? null,
-    nodeId: event.nodeId,
-    iteration: event.iteration,
-    attempt: event.attempt,
-    timestampMs: event.timestampMs,
-    eventSequence: event.event.sequence,
-    eventKind: event.event.kind,
-    eventPhase: event.event.phase,
-    agentFamily: event.source.agentFamily,
-    agentId: event.source.agentId ?? null,
-    agentModel: event.source.model ?? null,
-    captureMode: event.source.captureMode,
-    rawType: event.source.rawType ?? null,
-    observed: event.source.observed,
-    payload: event.payload ?? null,
-    raw: event.raw,
-    redaction: event.redaction,
-    annotations: event.annotations,
-  };
-}
-
 export const prometheusContentType =
   "text/plain; version=0.0.4; charset=utf-8";
 
diff --git a/src/tools/context.ts b/src/tools/context.ts
index cd202db0..c585790c 100644
--- a/src/tools/context.ts
+++ b/src/tools/context.ts
@@ -8,8 +8,6 @@ export type ToolContext = {
   nodeId: string;
   iteration: number;
   attempt: number;
-  workflowPath?: string | null;
-  workflowHash?: string | null;
   rootDir: string;
   allowNetwork: boolean;
   maxOutputBytes: number;
diff --git a/tests/observability.test.ts b/tests/observability.test.ts
index 5cadf2d5..0b80a2f1 100644
--- a/tests/observability.test.ts
+++ b/tests/observability.test.ts
@@ -1,14 +1,11 @@
 import { describe, expect, test } from "bun:test";
 import { Metric } from "effect";
 import {
-  PI_AGENT_TRACE_CAPABILITY_PROFILE,
   httpRequestDuration,
   renderPrometheusMetrics,
   runsTotal,
-  toPersistedAgentTraceRecord,
 } from "../src/observability";
 import { runPromise } from "../src/effect/runtime";
-import type { SmithersAgentTraceEvent } from "../src/SmithersEvent";
 
 describe("Prometheus metrics", () => {
   test("renders built-in Smithers metrics in Prometheus exposition format", async () => {
@@ -26,91 +23,3 @@ describe("Prometheus metrics", () => {
     expect(output).toContain("smithers_http_request_duration_ms_count");
   });
 });
-
-describe("agent trace observability", () => {
-  test("declares the implemented Pi trace slice truthfully", () => {
-    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceVersion).toBe(1);
-    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.agentFamily).toBe("pi");
-    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceCompleteness).toBe("partial-observed");
-    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.supportedEventKinds).toEqual([
-      "assistant.text.delta",
-      "tool.execution.start",
-      "tool.execution.update",
-      "tool.execution.end",
-    ]);
-    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.unsupportedEventKinds).toContain(
-      "assistant.thinking.delta",
-    );
-    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("cli-json");
-    expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("rpc-events");
-  });
-
-  test("flattens canonical trace events into queryable persisted records", () => {
-    const event: SmithersAgentTraceEvent = {
-      type: "AgentTraceEvent",
-      traceVersion: 1,
-      traceCompleteness: "partial-observed",
-      unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"],
-      runId: "run-1",
-      workflowPath: "/tmp/workflow.tsx",
-      workflowHash: "workflow-hash",
-      nodeId: "node-a",
-      iteration: 2,
-      attempt: 3,
-      timestampMs: 123,
-      event: {
-        sequence: 4,
-        kind: "tool.execution.end",
-        phase: "tool",
-      },
-      source: {
-        agentFamily: "pi",
-        agentId: "pi-agent-id",
-        model: "gpt-5.2-codex",
-        captureMode: "rpc-events",
-        rawType: "tool_execution_end",
-        observed: true,
-      },
-      payload: {
-        toolCallId: "tool-1",
-        toolName: "read",
-        isError: false,
-      },
-      raw: { type: "tool_execution_end" },
-      redaction: null,
-      annotations: { "custom.test": true },
-    };
-
-    const record = toPersistedAgentTraceRecord(event);
-
-    expect(record).toEqual({
-      traceVersion: 1,
-      traceCompleteness: "partial-observed",
-      unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"],
-      runId: "run-1",
-      workflowPath: "/tmp/workflow.tsx",
-      workflowHash: "workflow-hash",
-      nodeId: "node-a",
-      iteration: 2,
-      attempt: 3,
-      timestampMs: 123,
-      eventSequence: 4,
-      eventKind: "tool.execution.end",
-      eventPhase: "tool",
-      agentFamily: "pi",
-      agentId: "pi-agent-id",
-      agentModel: "gpt-5.2-codex",
-      captureMode: "rpc-events",
-      rawType: "tool_execution_end",
-      observed: true,
-      payload: {
-        toolCallId: "tool-1",
-        toolName: "read",
-        isError: false,
-      },
-      raw: { type: "tool_execution_end" },
-      redaction: null,
-      annotations: { "custom.test": true },
-    });
-  });
-});
diff --git a/tests/pi-support.test.ts b/tests/pi-support.test.ts
index 170d16cf..a7a89d81 100644
--- a/tests/pi-support.test.ts
+++ b/tests/pi-support.test.ts
@@ -3,9 +3,6 @@ import { afterEach, describe, expect, test } from "bun:test";
  import { join } from "node:path";
  import { tmpdir } from "node:os";
  import { PiAgent } from "../src/agents";
- import { EventBus } from "../src/events";
- import { runWithToolContext } from "../src/tools/context";
- import type { SmithersAgentTraceEvent } from "../src/SmithersEvent";
  
  const originalPath = process.env.PATH ?? "";
  
@@ -63,8 +60,7 @@ import { afterEach, describe, expect, test } from "bun:test";
          thinking: "low",
          verbose: true,
          env: { PATH: process.env.PATH! },
-  });
-
+       });
  
        const result = await agent.generate({
          messages: [
@@ -187,6 +183,48 @@ import { afterEach, describe, expect, test } from "bun:test";
      }
    });
  
+   test("PiAgent RPC mode waits past tool-use turns for the final assistant answer", async () => {
+     const fake = await makeFakePi(`
+ let buffer = "";
+ process.stdin.on("data", (chunk) => {
+   buffer += chunk.toString("utf8");
+   const lines = buffer.split(/\\r?\\n/);
+   buffer = lines.pop();
+   for (const line of lines) {
+     if (!line.trim()) continue;
+     const msg = JSON.parse(line);
+     if (msg.type === "prompt") {
+       process.stdout.write(JSON.stringify({ type: "response", command: "prompt", success: true, id: msg.id }) + "\\n");
+       process.stdout.write(JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: "Thinking" } }) + "\\n");
+       process.stdout.write(JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Tool turn" }], stopReason: "toolUse" } }) + "\\n");
+       setTimeout(() => {
+         process.stdout.write(JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: " final answer" } }) + "\\n");
+         process.stdout.write(JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Final answer" }], stopReason: "stop" } }) + "\\n");
+       }, 20);
+     }
+   }
+ });
+ `);
+ 
+     try {
+       process.env.PATH = `${fake.dir}:${originalPath}`;
+ 
+       const agent = new PiAgent({
+         mode: "rpc",
+         model: "gpt-4o-mini",
+         env: { PATH: process.env.PATH! },
+       });
+ 
+       const result = await agent.generate({
+         messages: [{ role: "user", content: "Use a tool and then answer" }],
+       });
+ 
+       expect(result.text).toBe("Final answer");
+     } finally {
+       await rm(fake.dir, { recursive: true, force: true });
+     }
+   });
+ 
    test("PiAgent RPC mode handles extension UI requests", async () => {
      const argsFileDir = await mkdtemp(join(tmpdir(), "smithers-pi-rpc-ui-"));
      const argsFile = join(argsFileDir, "prompt.json");
@@ -313,8 +351,7 @@ import { afterEach, describe, expect, test } from "bun:test";
          mode: "json",
          model: "test-model",
          env: { PATH: process.env.PATH! },
-  });
-
+       });
 
        const result = await agent.generate({
          messages: [{ role: "user", content: "Hello" }],
@@ -329,124 +366,6 @@ import { afterEach, describe, expect, test } from "bun:test";
      }
    });
 
-   test("PiAgent json mode emits canonical trace events and persists them", async () => {
-     // Fake Pi emits NDJSON with assistant text deltas and a tool lifecycle
-     const fake = await makeFakePi(`
-const lines = [
- JSON.stringify({ type: "session", version: 3, id: "sess-1" }),
- JSON.stringify({ type: "agent_start" }),
- JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: "Hello" } }),
- JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: ", world" } }),
- JSON.stringify({ type: "tool_execution_start", toolCall: { id: "t1", name: "read", args: { path: "README.md" } } }),
- JSON.stringify({ type: "tool_execution_end", toolCall: { id: "t1", name: "read" }, result: { ok: true } }),
- JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Hello, world" }], stopReason: "stop" } })
-];
-process.stdout.write(lines.join("\\n") + "\\n");
-`);
-
-     const memoryEvents: { seq: number; row: any }[] = [];
-     const db = {
-       insertEventWithNextSeq: ({ runId, timestampMs, type, payloadJson }: any) => {
-         const seq = (memoryEvents.length > 0 ? memoryEvents[memoryEvents.length - 1].seq : -1) + 1;
-         memoryEvents.push({ seq, row: { runId, timestampMs, type, payloadJson } });
-         return Promise.resolve(seq);
-       },
-     } as any;
-
-     const logDir = await mkdtemp(join(tmpdir(), "smithers-agent-trace-"));
-     const bus = new EventBus({ db, logDir });
-
-     try {
-       process.env.PATH = `${fake.dir}:${originalPath}`;
-       const agent = new PiAgent({ mode: "json", model: "pi-test-model", env: { PATH: process.env.PATH! } });
-
-       const captured: SmithersAgentTraceEvent[] = [];
-
-       await runWithToolContext(
-         {
-           db: db as any,
-           runId: "run-1",
-           nodeId: "node-A",
-           iteration: 1,
-           attempt: 1,
-           workflowPath: "/tmp/workflows/pi-workflow.tsx",
-           workflowHash: "workflow-hash-1",
-           rootDir: process.cwd(),
-           allowNetwork: true,
-           maxOutputBytes: 200_000,
-           timeoutMs: 30_000,
-           seq: 0,
-           emitEvent: (e: any) => {
-             if (e && e.type === "AgentTraceEvent") {
-               captured.push(e as SmithersAgentTraceEvent);
-             }
-             return bus.emitEventQueued(e as any);
-           },
-         },
-         async () => {
-           const result = await agent.generate({ messages: [{ role: "user", content: "Ping" }] });
-           expect(result.text).toContain("Hello, world");
-         },
-       );
-       await bus.flush();
-
-       const sequences = captured.map((e) => e.event.sequence);
-       expect(sequences).toEqual([1, 2, 3, 4]);
-
-       // We should have assistant deltas and tool lifecycle mapped
-       const kinds = captured.map((e) => e.event.kind);
-       expect(kinds).toEqual([
-         "assistant.text.delta",
-         "assistant.text.delta",
-         "tool.execution.start",
-         "tool.execution.end",
-       ]);
-
-       // Correlation and truthfulness fields present
-       for (const e of captured) {
-         expect(e.traceVersion).toBe(1);
-         expect(e.traceCompleteness).toBe("partial-observed");
-         expect(e.unsupportedEventKinds).toContain("assistant.thinking.delta");
-         expect(e.runId).toBe("run-1");
-         expect(e.workflowPath).toBe("/tmp/workflows/pi-workflow.tsx");
-         expect(e.workflowHash).toBe("workflow-hash-1");
-         expect(e.nodeId).toBe("node-A");
-         expect(e.iteration).toBe(1);
-         expect(e.attempt).toBe(1);
-         expect(e.source.agentFamily).toBe("pi");
-         expect(e.source.agentId).toBe(agent.id);
-         expect(e.source.model).toBe("pi-test-model");
-         expect(e.source.captureMode).toBe("cli-json");
-       }
-
-       // Persisted to DB rows as durable event entries
-       const persistedTraceRows = memoryEvents.filter((r) => r.row.type === "AgentTraceEvent");
-       expect(persistedTraceRows).toHaveLength(captured.length);
-       expect(
-         persistedTraceRows.map((row) => JSON.parse(row.row.payloadJson).event.sequence),
-       ).toEqual([1, 2, 3, 4]);
-
-       // Persisted to a dedicated, flattened local trace log for later querying/export.
-       const persistedTraceLog = await readFile(join(logDir, "agent-trace.ndjson"), "utf8");
-       const persistedTraceRecords = persistedTraceLog
-         .trim()
-         .split(/\r?\n/)
-         .filter(Boolean)
-         .map((line) => JSON.parse(line) as Record<string, unknown>);
-       expect(persistedTraceRecords).toHaveLength(4);
-       expect(persistedTraceRecords.map((record) => record.eventKind)).toEqual(kinds);
-       expect(persistedTraceRecords[0]?.traceCompleteness).toBe("partial-observed");
-       expect(persistedTraceRecords[0]?.unsupportedEventKinds).toContain("assistant.thinking.delta");
-       expect(persistedTraceRecords[0]?.runId).toBe("run-1");
-       expect(persistedTraceRecords[0]?.nodeId).toBe("node-A");
-       expect(persistedTraceRecords[0]?.attempt).toBe(1);
-       expect(persistedTraceRecords[0]?.captureMode).toBe("cli-json");
-     } finally {
-       await rm(fake.dir, { recursive: true, force: true });
-       await rm(logDir, { recursive: true, force: true });
-     }
-   });
-
    test("PiAgent json mode extracts JSON from text content in turn_end", async () => {
      // Simulates pi output where the agent returns JSON in the text content
      const fake = await makeFakePi(`

From ea17d51fef476663d5139cfc5fa4df1915bd40b2 Mon Sep 17 00:00:00 2001
From: Samuel Huber <samuel@dtech.vision>
Date: Thu, 26 Mar 2026 21:27:09 +0100
Subject: [PATCH 6/7] docs(pi): add hello-world sample and brief PI usage note

---
 docs/integrations/pi-integration.mdx |  2 ++
 examples/pi-hello-world.tsx          | 31 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 examples/pi-hello-world.tsx

diff --git a/docs/integrations/pi-integration.mdx b/docs/integrations/pi-integration.mdx
index 4b584577..e8a30063 100644
--- a/docs/integrations/pi-integration.mdx
+++ b/docs/integrations/pi-integration.mdx
@@ -72,6 +72,8 @@ pi --version
 bun run test
 ```
 
+For a minimal end-to-end example, run `bun run cli run examples/pi-hello-world.tsx`. To choose a model, check your PI `models.json` or run `pi --list-models`.
+
 ## Design Guidance
 
 Use `PiAgent` task nodes when:
diff --git a/examples/pi-hello-world.tsx b/examples/pi-hello-world.tsx
new file mode 100644
index 00000000..19938c2c
--- /dev/null
+++ b/examples/pi-hello-world.tsx
@@ -0,0 +1,31 @@
+/** @jsxImportSource smithers-orchestrator */
+import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator";
+import { z } from "zod";
+
+const HelloSchema = z.object({
+  message: z.string(),
+});
+
+const { smithers, outputs } = createSmithers(
+  {
+    output: HelloSchema,
+  },
+  {
+    dbPath: "./examples/pi-hello-world.db",
+  },
+);
+
+const pi = new PiAgent({
+  provider: "openai-codex",
+  model: "gpt-5.4",
+  mode: "json",
+});
+
+export default smithers(() => (
+  <Workflow name="pi-hello-world">
+    <Task id="hello" output={outputs.output} agent={pi}>
+      {`Return exactly this JSON and nothing else:
+{"message":"hello world"}`}
+    </Task>
+  </Workflow>
+));

From cf4dee9cc1e57de27f09002887bff618a789e8b7 Mon Sep 17 00:00:00 2001
From: Samuel Huber <samuel@dtech.vision>
Date: Thu, 26 Mar 2026 21:27:46 +0100
Subject: [PATCH 7/7] examples(pi): add tool-using workflow sample

---
 examples/pi-tools-input.txt    |  3 +++
 examples/pi-tools-workflow.tsx | 42 ++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 examples/pi-tools-input.txt
 create mode 100644 examples/pi-tools-workflow.tsx

diff --git a/examples/pi-tools-input.txt b/examples/pi-tools-input.txt
new file mode 100644
index 00000000..4f8a719e
--- /dev/null
+++ b/examples/pi-tools-input.txt
@@ -0,0 +1,3 @@
+Smithers PI tools sample
+Unique phrase: saffron-orbit-lantern
+This file is here so the PI agent has to read something real from disk.
diff --git a/examples/pi-tools-workflow.tsx b/examples/pi-tools-workflow.tsx
new file mode 100644
index 00000000..ad225b82
--- /dev/null
+++ b/examples/pi-tools-workflow.tsx
@@ -0,0 +1,42 @@
+/** @jsxImportSource smithers-orchestrator */
+import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator";
+import { z } from "zod";
+
+const OutputSchema = z.object({
+  phrase: z.string().regex(/^saffron-orbit-lantern$/),
+  lineCount: z.number().int().min(3).max(3),
+  cwdBasename: z.string().regex(/^examples$/),
+  summary: z.string(),
+});
+
+const { smithers, outputs } = createSmithers(
+  {
+    output: OutputSchema,
+  },
+  {
+    dbPath: "./examples/pi-tools-workflow.db",
+  },
+);
+
+const pi = new PiAgent({
+  provider: "openai-codex",
+  model: "gpt-5.4",
+  mode: "rpc",
+  tools: ["read", "bash"],
+});
+
+export default smithers(() => (
+  <Workflow name="pi-tools-workflow">
+    <Task id="inspect-file" output={outputs.output} agent={pi} retries={2}>
+      {`Use the read tool to inspect ./pi-tools-input.txt and use the bash tool to determine the basename of the current working directory.
+
+Then return exactly this JSON and nothing else:
+{
+  "phrase": "the unique phrase from the file",
+  "lineCount": 3,
+  "cwdBasename": "the basename of the current working directory",
+  "summary": "one short sentence confirming what you found"
+}`}
+    </Task>
+  </Workflow>
+));