From 79e3873150c07e49ca45e93d8fa7494f0f1ad952 Mon Sep 17 00:00:00 2001 From: Samuel Huber Date: Thu, 26 Mar 2026 15:53:22 +0100 Subject: [PATCH 1/7] Specify agent trace OTEL log export --- docs/concepts/agent-trace-otel-logs-spec.mdx | 1063 ++++++++++++++++++ 1 file changed, 1063 insertions(+) create mode 100644 docs/concepts/agent-trace-otel-logs-spec.mdx diff --git a/docs/concepts/agent-trace-otel-logs-spec.mdx b/docs/concepts/agent-trace-otel-logs-spec.mdx new file mode 100644 index 00000000..12307b6b --- /dev/null +++ b/docs/concepts/agent-trace-otel-logs-spec.mdx @@ -0,0 +1,1063 @@ +--- +title: Agent Trace OTEL Logs Specification +description: Full-fidelity specification for capturing agent-visible execution traces and exporting them as OpenTelemetry logs to Loki or any OTLP-compatible backend. +--- + +This document specifies how Smithers must capture, normalize, persist, export, and verify agent execution traces as OpenTelemetry logs. + +This is a design specification, not an implementation sketch. Every requirement in this document is normative unless explicitly marked as non-normative. + +## Status + +- Intended scope: new observability surface for agent trace logs +- Intended audience: maintainers implementing runtime, agent, observability, and verification changes +- Intended outcome: a system where every supported agent run produces a complete, queryable, correlated trace of what Smithers could observe + +## Problem Statement + +Smithers currently captures: + +- durable workflow lifecycle events +- structured application logs +- traces and metrics for runtime behavior +- partial agent output in some cases + +Smithers does not currently guarantee a full-fidelity record of agent-visible execution behavior across all agent integrations. + +In particular: + +- `PiAgent` exposes a rich event stream, but Smithers currently collapses it to final text plus usage +- several CLI agents emit machine-readable output that Smithers does not preserve as first-class trace events +- SDK-based agents return final results and rely on Smithers-side tool logging, but do not provide a canonical agent trace model +- there is no OTEL logs pipeline in the local collector configuration + +The result is that operators cannot reliably answer questions such as: + +- What did the agent stream before it failed? +- Which tools did the agent invoke, in what order, with which visible arguments and results? +- Did the agent emit visible thinking content, compaction events, retries, or queued follow-up behavior? +- Can we reconstruct exactly what Smithers observed for a given run, node, and attempt? +- Can we query this in Grafana Loki or another OTLP log backend with stable run-level correlation? + +This specification addresses that gap. + +## Goals + +The system defined here MUST: + +- capture the fullest agent-visible trace Smithers can obtain for each supported agent +- export that trace as OTEL logs to Loki or any OTLP-compatible log backend +- preserve run correlation through stable attributes such as `run.id`, `workflow.path`, `node.id`, `attempt`, and `iteration` +- preserve raw trace fidelity without forcing operators to infer behavior from summary logs +- remain explicit about what was directly observed versus what was derived by Smithers +- provide deterministic verification criteria for correctness and task completion + +## Non-Goals + +The system defined here MUST NOT claim to provide: + +- provider-internal hidden chain-of-thought when the upstream agent or SDK does not expose it +- exact reconstruction of invisible model-side planning not surfaced through events, messages, or tool calls +- a replacement for the durable Smithers event log or database +- a guarantee that every backend will index arbitrary high-cardinality fields efficiently + +## Core Principle + +Smithers MUST export what it observed, not what it inferred. + +Every exported trace record MUST be classifiable as one of: + +- raw upstream agent event +- raw Smithers runtime event +- Smithers-derived normalization of one raw event +- Smithers-generated transport or export diagnostic + +If a record is derived, the derivation MUST be explicit. + +## Definitions + +### Agent Trace + +An agent trace is the ordered set of agent-visible execution records associated with one Smithers node attempt. + +Agent trace records include, where available: + +- streamed assistant text +- streamed visible thinking content +- message lifecycle events +- tool call lifecycle events +- tool result lifecycle events +- compaction and retry events +- session metadata +- final assistant message +- final tool results +- agent stderr diagnostics when those are observable to Smithers + +### Full Trace + +For a given agent integration, a full trace means all upstream-visible records Smithers can access without patching the upstream model provider. + +Full trace does not mean hidden reasoning. It means all observable records available through: + +- subprocess stdout or stderr +- structured CLI output modes +- RPC event streams +- SDK callback/event surfaces +- persisted session artifacts intentionally provided by the agent system + +### Canonical Trace Event + +A canonical trace event is the Smithers-normalized representation of one raw observed record. + +Canonical trace events are the unit exported to OTEL logs and optionally persisted durably by Smithers. + +### Attempt + +An attempt is one execution of one node at one iteration with one attempt number. A canonical agent trace is scoped to exactly one attempt. + +## Invariants + +The implementation MUST satisfy all of the following invariants. + +### Identity Invariants + +Every canonical trace event MUST include: + +- `runId` +- `nodeId` when the event is attempt-scoped +- `iteration` when the event is attempt-scoped +- `attempt` when the event is attempt-scoped +- `timestampMs` +- `source.agentFamily` +- `source.captureMode` +- `event.kind` +- `event.sequence` + +### Ordering Invariants + +Canonical trace events for a single attempt MUST be totally ordered by `event.sequence`. + +If upstream events arrive out of wall-clock order, Smithers MUST preserve receive order and MUST NOT reorder them after capture. + +`event.sequence` MUST be monotonic within one attempt. + +### Fidelity Invariants + +Smithers MUST preserve raw upstream payloads for canonical trace events unless a redaction rule requires modification. + +If redaction occurs: + +- the record MUST indicate redaction occurred +- the redaction reason MUST be attached +- the original raw value MUST NOT be exported + +### Correlation Invariants + +Every OTEL log record derived from a canonical trace event MUST be queryable by: + +- run +- workflow path +- node +- iteration +- attempt +- agent family +- event kind + +### Completeness Invariants + +If Smithers receives a parseable upstream event, Smithers MUST either: + +- convert it into a canonical trace event and export it +- or emit a diagnostic record explaining why it was dropped + +Silent drops are not allowed. + +### Truthfulness Invariants + +If an agent integration cannot expose a certain class of events, the system MUST record capability absence explicitly and MUST NOT pretend completeness. + +Example: + +- if an SDK-based integration does not expose thinking deltas, Smithers MUST mark that event class as unsupported for that agent family + +## Scope of Observability + +The system covers three layers. + +### Layer 1: Canonical Runtime Record + +Smithers SHOULD persist canonical trace events durably for replay and audit, alongside existing run events and attempt data. + +### Layer 2: OTEL Log Export + +Smithers MUST export canonical trace events as OTEL logs when OTEL log export is enabled. + +### Layer 3: Summary Metrics and Diagnostics + +Smithers MAY derive metrics from canonical trace events, but those metrics are secondary and MUST NOT be the sole evidence of capture correctness. + +## Agent Capability Model + +Each agent family MUST declare an explicit trace capability profile. + +The capability profile MUST enumerate support for: + +- session metadata +- assistant text deltas +- visible thinking deltas +- final assistant message +- tool execution start +- tool execution update +- tool execution end +- retry events +- compaction events +- raw stderr diagnostics +- persisted session artifact + +### PiAgent + +`PiAgent` MUST be treated as a high-fidelity integration. + +Available sources include: + +- JSON event stream mode +- RPC mode event stream +- Pi session JSONL artifacts + +Pi exposes event types such as: + +- `agent_start` +- `agent_end` +- `turn_start` +- `turn_end` +- `message_start` +- `message_update` +- `message_end` +- `tool_execution_start` +- `tool_execution_update` +- `tool_execution_end` +- `auto_compaction_start` +- `auto_compaction_end` +- `auto_retry_start` +- `auto_retry_end` + +Visible thinking content emitted by Pi MUST be captured as trace content. + +Pi session artifacts, when enabled and available, SHOULD be recorded as canonical artifacts associated with the attempt. + +### CodexAgent + +`CodexAgent` MUST be treated as a structured CLI integration with medium fidelity. + +Codex emits JSON output. Smithers MUST preserve all parseable structured events made available by that mode. + +If Codex exposes usage, step, message, tool, or completion events, Smithers MUST map them to canonical trace events rather than extracting only final text. + +If a given Codex event schema is unstable, Smithers MUST preserve the raw event payload and classify the normalization conservatively. + +### ClaudeCodeAgent + +`ClaudeCodeAgent` MUST be treated as a structured CLI integration with medium fidelity. + +When `stream-json` is enabled, Smithers MUST preserve all parseable stream records and map them into canonical trace events where possible. + +Partial assistant messages, tool call indicators, and usage events MUST NOT be discarded if they are parseable. + +### GeminiAgent + +`GeminiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on output mode. + +Smithers MUST preserve parseable structured output and MUST explicitly mark unsupported event classes when the CLI exposes only final or coarse-grained results. + +### KimiAgent + +`KimiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on `outputFormat`. + +If `stream-json` mode is used, Smithers MUST preserve event records. If only final text is available, Smithers MUST mark the trace as partial. + +### OpenAIAgent and AnthropicAgent + +`OpenAIAgent` and `AnthropicAgent` MUST be treated as SDK integrations. + +They do not inherently expose a rich subprocess event stream in the current Smithers wrapper. + +For these agents, Smithers MUST capture: + +- prompt dispatch boundaries +- final assistant response +- token usage when surfaced +- Smithers-side tool execution start and end +- visible tool output recorded by Smithers +- node output emitted by Smithers if any + +Smithers MUST mark thinking deltas and message lifecycle as unsupported unless the underlying SDK path is instrumented to provide them. + +### AmpAgent and ForgeAgent + +`AmpAgent` and `ForgeAgent` MUST be treated as text-first subprocess integrations unless a structured mode is added. + +Smithers MUST capture: + +- final response text +- stderr diagnostics +- Smithers-side tool execution and runtime events + +Smithers MUST mark full trace fidelity as unsupported for these integrations. + +## Capture Modes + +Each attempt MUST declare one capture mode: + +- `sdk-events` +- `rpc-events` +- `cli-json-stream` +- `cli-json` +- `cli-text` +- `artifact-import` + +Capture mode is part of the canonical attempt metadata and MUST be exported with every trace record. + +## Canonical Data Model + +Smithers MUST introduce a canonical event model for agent traces. + +The exact TypeScript shape is an implementation detail, but the semantic fields are mandatory. + +### Attempt Metadata + +Each attempt MUST expose: + +- `traceVersion` +- `agentFamily` +- `agentId` +- `model` +- `captureMode` +- `traceCompleteness` +- `unsupportedEventKinds` +- `traceStartedAtMs` +- `traceFinishedAtMs` +- `rawArtifactRefs` + +### `traceCompleteness` + +`traceCompleteness` MUST be one of: + +- `full-observed` +- `partial-observed` +- `final-only` +- `capture-failed` + +Definitions: + +- `full-observed`: Smithers captured every event class the integration claims to support +- `partial-observed`: Smithers captured some but not all supported classes +- `final-only`: only final response and coarse metadata were available +- `capture-failed`: Smithers expected trace events but could not capture them reliably + +### Canonical Event Fields + +Every canonical trace event MUST include: + +- `traceVersion` +- `runId` +- `workflowPath` +- `workflowHash` when available +- `nodeId` +- `iteration` +- `attempt` +- `timestampMs` +- `event.sequence` +- `event.kind` +- `event.phase` +- `source.agentFamily` +- `source.captureMode` +- `source.rawType` +- `source.observed` +- `payload` +- `raw` +- `redaction` +- `annotations` + +### `event.kind` + +`event.kind` MUST be chosen from a controlled vocabulary. + +The initial vocabulary MUST include: + +- `session.start` +- `session.end` +- `turn.start` +- `turn.end` +- `message.start` +- `message.update` +- `message.end` +- `assistant.text.delta` +- `assistant.thinking.delta` +- `assistant.message.final` +- `tool.execution.start` +- `tool.execution.update` +- `tool.execution.end` +- `tool.result` +- `retry.start` +- `retry.end` +- `compaction.start` +- `compaction.end` +- `stderr` +- `stdout` +- `usage` +- `capture.warning` +- `capture.error` +- `artifact.created` + +No integration-specific naming is allowed in `event.kind`. Integration-specific names MUST remain in `source.rawType`. + +### `event.phase` + +`event.phase` MUST be one of: + +- `agent` +- `turn` +- `message` +- `tool` +- `session` +- `capture` +- `artifact` + +### `source.observed` + +`source.observed` MUST be a boolean indicating whether the payload was directly observed from the upstream integration. + +Derived normalization records MUST set `source.observed` to `false`. + +### `payload` + +`payload` MUST contain normalized fields intended for stable querying and display. + +Examples: + +- for `assistant.text.delta`: `{ text: string }` +- for `assistant.thinking.delta`: `{ text: string }` +- for `tool.execution.start`: `{ toolCallId: string, toolName: string, argsPreview: unknown }` +- for `tool.execution.end`: `{ toolCallId: string, toolName: string, isError: boolean, resultPreview: unknown }` + +### `raw` + +`raw` MUST contain the raw upstream object or raw text fragment as captured after redaction. + +If no raw form exists, `raw` MAY be `null`. + +## Custom Annotations + +The system MUST support user-defined annotations attached at run start. + +Annotations MUST be: + +- provided in run options and server APIs +- stored durably on the run +- merged into every canonical trace event at export time + +Annotations MUST support scalar values only: + +- string +- number +- boolean + +Nested objects and arrays MUST be rejected or flattened before run start. The behavior MUST be explicit and deterministic. + +The following annotation namespaces are reserved: + +- `smithers.*` +- `run.*` +- `workflow.*` +- `node.*` +- `agent.*` +- `otel.*` + +User annotations SHOULD use a `custom.*` prefix in canonical export. + +## Workflow Metadata Requirements + +Every canonical trace event MUST include: + +- `workflow.path` as an OTEL attribute when available +- `workflow.hash` as an OTEL attribute when available + +If `workflow.path` is unavailable, Smithers MUST export `workflow.path` as absent rather than inventing a placeholder path. + +## Redaction Model + +Redaction is mandatory because agent traces can contain sensitive content. + +The implementation MUST support: + +- disabled redaction +- default redaction +- custom redaction rules + +### Minimum Default Redaction + +Default redaction MUST handle at least: + +- API keys +- bearer tokens +- common secret env vars +- authorization headers +- cookie headers +- explicitly configured secret literals + +### Redaction Semantics + +Redaction MUST occur before: + +- durable canonical trace persistence +- OTEL log export +- artifact snapshot export + +If redaction modifies content, the trace event MUST record: + +- `redaction.applied = true` +- `redaction.ruleIds = string[]` + +## Export Model + +Canonical trace events MUST be exportable as OTEL logs. + +### OTEL Collector Requirements + +The collector configuration MUST define a `logs` pipeline. + +The logs pipeline MUST accept OTLP input and MUST support at least one of: + +- OTLP logs exporter +- Loki exporter + +The local development stack SHOULD include Loki for verification and human inspection. + +### OTEL Record Shape + +For each canonical trace event, Smithers MUST emit one OTEL log record. + +The log body MUST contain a compact structured JSON representation of: + +- canonical payload +- raw payload when configured +- redaction metadata + +The log attributes MUST include: + +- `service.name` +- `smithers.trace.version` +- `run.id` +- `workflow.path` +- `workflow.hash` when available +- `node.id` when available +- `node.iteration` when available +- `node.attempt` when available +- `agent.family` +- `agent.id` when available +- `agent.model` when available +- `agent.capture_mode` +- `trace.completeness` +- `event.kind` +- `event.phase` +- `event.sequence` +- `source.raw_type` +- `source.observed` + +Custom annotations MUST be exported as OTEL attributes under `custom.*`. + +### Attribute Cardinality Rules + +The following MUST be attributes: + +- run identifiers +- workflow identifiers +- node identifiers +- attempt identifiers +- event kind +- agent family +- capture mode + +The following MUST NOT be indexed as labels in Loki-specific configurations: + +- full prompt text +- full response text +- thinking text +- tool args bodies +- tool result bodies +- arbitrary user free-text annotations + +These large fields MUST remain in the log body. + +### Severity Mapping + +Severity SHOULD be assigned as follows: + +- normal trace events: `INFO` +- stderr and non-terminal capture anomalies: `WARN` +- capture failures and export failures: `ERROR` + +Severity MUST NOT be used to encode event kind. + +## Persistence Model + +Canonical trace events SHOULD be durably persisted by Smithers in addition to OTEL export. + +If durable persistence is implemented, the persistence layer MUST support: + +- ordered replay by attempt +- filtering by event kind +- pagination by sequence +- artifact references + +OTEL export MUST NOT be the only storage location for canonical trace data. + +## Artifact Model + +Some agent integrations expose richer external artifacts than can be represented comfortably as log streams. + +Examples: + +- Pi session JSONL files +- raw CLI JSON event transcripts +- exported HTML or JSONL session artifacts + +Smithers SHOULD support trace artifacts with metadata: + +- `artifact.kind` +- `artifact.path` +- `artifact.contentType` +- `artifact.bytes` +- `artifact.createdAtMs` +- `artifact.redacted` + +Artifact creation MUST also emit canonical `artifact.created` events. + +## Failure Model + +The implementation MUST classify failures explicitly. + +### Capture Failure + +Capture failure means Smithers could not reliably obtain agent trace input it expected from the selected capture mode. + +Examples: + +- malformed JSON stream +- unexpected subprocess termination before terminal event +- SDK callback channel failure + +Capture failure MUST: + +- mark attempt `traceCompleteness = capture-failed` when terminally broken +- emit a `capture.error` canonical event +- include diagnostic details + +### Partial Capture + +Partial capture means Smithers obtained some trace events but missed expected categories. + +Examples: + +- stdout stream cut off after several tool events +- session artifact missing though event stream completed + +Partial capture MUST: + +- mark attempt `traceCompleteness = partial-observed` +- record missing classes in `unsupportedEventKinds` or `missingExpectedEventKinds` + +### Export Failure + +Export failure means Smithers captured canonical trace events but could not deliver them to the OTEL backend. + +Export failure MUST NOT erase canonical local truth. + +If export fails: + +- canonical local persistence MUST still succeed when enabled +- Smithers MUST emit operator diagnostics through existing logs +- the run MUST remain inspectable from durable local records + +## Normalization Rules + +Normalization MUST be conservative. + +### One Raw Event to One Canonical Event + +As a default rule, one raw upstream event SHOULD map to one canonical trace event. + +If one raw event yields multiple canonical events, the implementation MUST document why and MUST include a stable parent link. + +### Text Deltas + +Assistant text deltas MUST remain deltas if the upstream protocol provided deltas. + +Smithers MUST NOT collapse deltas into a single blob during export. + +Final assembled messages MAY be emitted separately as `assistant.message.final`. + +### Thinking Deltas + +Visible thinking content MUST be captured as its own event class and MUST NOT be merged into assistant text. + +### Tool Calls + +Tool lifecycle MUST preserve: + +- stable tool call identifier when upstream provides one +- tool name +- visible arguments or argument preview +- partial updates when available +- final result preview +- error flag + +### Usage + +Usage records MUST be separate canonical events or attached to terminal message events in a way that remains queryable. + +If usage is attached, it MUST still be accessible without parsing free-form text. + +## Required Runtime Integration Points + +The implementation MUST integrate at these boundaries. + +### Agent Boundary + +Every agent integration MUST report raw trace observations into the canonical trace capture layer. + +No agent integration is allowed to silently parse and discard upstream event records before the capture layer sees them. + +### Event Bus Boundary + +Canonical trace events SHOULD be emitted through or alongside the existing event bus so that: + +- they share run correlation +- they can participate in durable persistence +- they can reuse existing event-driven verification infrastructure + +### Attempt Finalization Boundary + +When an attempt finishes, Smithers MUST finalize trace metadata: + +- `traceFinishedAtMs` +- `traceCompleteness` +- `unsupportedEventKinds` +- `rawArtifactRefs` + +## Required Configuration Surface + +The implementation MUST define explicit configuration for: + +- enabling OTEL log export +- selecting backend endpoint +- enabling or disabling canonical local trace persistence +- selecting redaction mode +- retaining or dropping raw payload bodies +- retaining or dropping raw artifacts +- maximum event body bytes +- maximum artifact bytes + +The configuration MUST distinguish: + +- runtime operator policy +- run-specific annotations + +## Required Operator Queries + +The design is incomplete unless the following operator queries are supported. + +### Query Set A: Run Reconstruction + +Operators MUST be able to answer: + +- show all trace records for one run +- show all trace records for one run and node +- show only one attempt for one node +- show ordered assistant text deltas +- show visible thinking deltas when present +- show tool calls and results in order + +### Query Set B: Failure Analysis + +Operators MUST be able to answer: + +- which runs had trace capture failures +- which agents only provide final-only traces +- which attempts terminated without a terminal agent event +- which traces were partially redacted + +### Query Set C: Audit + +Operators MUST be able to answer: + +- what annotations were attached to a run +- which workflow file and workflow hash produced the trace +- which raw artifact file corresponds to this attempt + +## Verification Specification + +Task completion is not defined by code existing. It is defined by observable correctness. + +The implementation is complete only if every verification class below passes. + +## Verification Class 1: Schema Correctness + +For each supported agent family, automated tests MUST verify that canonical trace events: + +- conform to the declared schema +- contain required identity fields +- maintain monotonic `event.sequence` +- correctly classify `traceCompleteness` + +Completion criterion: + +- zero schema violations in test fixtures + +## Verification Class 2: Ordering Correctness + +Automated tests MUST verify that for one attempt: + +- event sequences are strictly monotonic +- final events occur after preceding deltas +- no duplicate sequence numbers appear + +Completion criterion: + +- deterministic ordering across repeated test runs + +## Verification Class 3: Fidelity Correctness + +Fixture-based tests MUST compare raw upstream inputs with canonical trace outputs. + +For each fixture: + +- every parseable upstream event MUST result in a canonical event or an explicit diagnostic drop event +- visible thinking content MUST remain distinguishable from assistant text +- tool call identifiers and names MUST survive normalization + +Completion criterion: + +- full fixture coverage for each agent family and capture mode supported by Smithers + +## Verification Class 4: Completeness Classification + +Tests MUST verify the semantics of: + +- `full-observed` +- `partial-observed` +- `final-only` +- `capture-failed` + +Completion criterion: + +- each classification is produced by at least one explicit test case + +## Verification Class 5: OTEL Export Correctness + +Integration tests MUST verify that canonical trace events become OTEL log records with: + +- required attributes present +- correct body shape +- correct severity mapping +- correct custom annotation export + +Completion criterion: + +- logs are queryable in the target backend by `run.id`, `workflow.path`, `node.id`, `attempt`, and `event.kind` + +## Verification Class 6: Loki Query Correctness + +In a local stack with Loki enabled, end-to-end tests MUST verify that an operator can query: + +- all records for a run +- all records for a node attempt +- only thinking deltas +- only tool execution records +- only capture errors + +Completion criterion: + +- documented query examples return expected results against test data + +## Verification Class 7: Artifact Correctness + +When artifact capture is enabled, tests MUST verify: + +- artifact references are recorded +- artifacts exist on disk or in configured storage +- artifact metadata matches actual content +- artifact creation emits corresponding canonical events + +Completion criterion: + +- no dangling artifact references + +## Verification Class 8: Redaction Correctness + +Tests MUST verify that redaction: + +- removes required secrets from canonical payloads, raw payloads, OTEL bodies, and artifacts +- leaves non-sensitive content intact +- records which rules were applied + +Completion criterion: + +- zero known secret literals leak in test fixtures + +## Verification Class 9: Failure Resilience + +Tests MUST verify behavior when: + +- collector is unavailable +- backend rejects logs +- malformed upstream JSON is encountered +- subprocess exits before terminal event +- artifact write fails + +Completion criterion: + +- capture failures are classified +- local diagnostics exist +- durable local truth remains accessible when configured + +## Verification Class 10: Cross-Signal Correlation + +Tests MUST verify that logs correlate with: + +- run lifecycle events +- metrics +- spans + +At minimum, operators MUST be able to join by: + +- `run.id` +- `node.id` +- `attempt` + +Completion criterion: + +- one documented workflow run can be traced across event log, OTEL logs, and metrics without ambiguity + +## Acceptance Criteria + +The feature is not done until all of the following are true. + +### A. Canonical Model Exists + +Smithers has a canonical agent trace schema with explicit completeness states and per-agent capability declarations. + +### B. Pi Is High Fidelity + +`PiAgent` exports structured trace records for: + +- session lifecycle +- turn lifecycle +- message lifecycle +- assistant text deltas +- visible thinking deltas +- tool execution lifecycle +- retry and compaction events + +### C. Other Agents Are Truthfully Classified + +Every agent in `src/agents/` has a declared fidelity class and unsupported event set. + +### D. OTEL Logs Pipeline Exists + +The collector and local observability stack support OTEL logs end to end. + +### E. Queries Work + +Operators can answer the required run reconstruction, failure analysis, and audit queries from the exported logs. + +### F. Verification Is Automated + +Automated tests exist for schema, ordering, fidelity, completeness, OTEL export, redaction, failure handling, and query correctness. + +## Implementation Phasing + +This section is normative for rollout order. + +### Phase 1: Canonical Model + +Implement: + +- canonical trace schema +- completeness classification +- per-agent capability declarations + +### Phase 2: Pi Fidelity + +Implement: + +- Pi raw event capture +- canonical normalization +- OTEL export +- artifact capture for session files if configured + +### Phase 3: Structured CLI Agents + +Implement: + +- Codex +- Claude Code +- Gemini +- Kimi + +Each integration MUST ship with fixture-based normalization tests before being considered complete. + +### Phase 4: SDK and Text-Only Agents + +Implement: + +- explicit partial or final-only capture +- truthful capability declarations +- OTEL export for the observable subset + +### Phase 5: Redaction and Hardening + +Implement: + +- default redaction +- export failure handling +- artifact verification +- documented local Loki queries + +## Explicit Non-Ambiguities + +The following choices are intentional. + +- Smithers MUST prefer truthful partial fidelity over fake completeness. +- Smithers MUST preserve raw event boundaries rather than collapsing everything into summaries. +- Smithers MUST keep large content in log bodies, not indexing labels. +- Smithers MUST retain a local source of truth when OTEL export fails. +- Smithers MUST separate assistant text from visible thinking. +- Smithers MUST define task completion in terms of verification evidence, not implementation effort. + +## Out of Scope for the First Implementation + +The first implementation MAY defer: + +- remote artifact storage +- cross-run session graph visualizations +- backend-specific dashboards beyond minimal verification queries +- universal reconstruction of provider-internal hidden reasoning + +If deferred, these items MUST be documented explicitly and MUST NOT be implied to exist. + +## Summary + +The required system is not “send some logs to Loki.” + +The required system is: + +- a canonical agent trace model +- explicit capability declarations per integration +- conservative capture of all observable upstream events +- durable local truth +- OTEL log export with stable correlation fields +- redaction before persistence and export +- verification that proves fidelity, completeness, and queryability + +Anything less produces observability that looks complete while remaining operationally unreliable. From 09bddf4b0afa7471ba7c456b1a72cf7f0e539842 Mon Sep 17 00:00:00 2001 From: Samuel Huber Date: Thu, 26 Mar 2026 20:29:55 +0100 Subject: [PATCH 2/7] fix(agent-trace): correct Pi RPC completion and add canonical trace capture Pi was working at the CLI level, but Smithers was finalizing RPC sessions on the first assistant turn_end. That is no longer correct when Pi emits a tool-use turn followed by additional turns and a final assistant answer. The result was that tool-using Pi sessions were truncated before the real final response, which made long-running implementation tasks look idle or schema-only even when Pi was continuing internally. This change fixes the RPC completion logic so Smithers waits past tool-use turns and can finalize from the true terminal assistant state. It also adds the canonical Pi-first trace event model, persistence path, and tests needed to make that trace capture observable and durable. --- src/SmithersEvent.ts | 113 ++++++++++++++++++++++- src/agent-trace.ts | 178 ++++++++++++++++++++++++++++++++++++ src/agents/BaseCliAgent.ts | 80 ++++++++++++---- src/agents/PiAgent.ts | 23 +++++ src/events.ts | 21 +++-- src/index.ts | 11 ++- src/observability/index.ts | 89 ++++++++++++++++++ src/tools/context.ts | 2 + tests/observability.test.ts | 91 ++++++++++++++++++ tests/pi-support.test.ts | 127 ++++++++++++++++++++++++- 10 files changed, 703 insertions(+), 32 deletions(-) create mode 100644 src/agent-trace.ts diff --git a/src/SmithersEvent.ts b/src/SmithersEvent.ts index 6b25e4ff..29d705f8 100644 --- a/src/SmithersEvent.ts +++ b/src/SmithersEvent.ts @@ -1,5 +1,113 @@ import type { RunStatus } from "./RunStatus"; +export const CANONICAL_AGENT_TRACE_VERSION = 1 as const; + +export type AgentTraceCompleteness = + | "full-observed" + | "partial-observed" + | "final-only" + | "capture-failed"; + +export type AgentTraceCaptureMode = + | "sdk-events" + | "rpc-events" + | "cli-json-stream" + | "cli-json" + | "cli-text" + | "artifact-import"; + +export type AgentTraceEventKind = + | "session.start" + | "session.end" + | "turn.start" + | "turn.end" + | "message.start" + | "message.update" + | "message.end" + | "assistant.text.delta" + | "assistant.thinking.delta" + | "assistant.message.final" + | "tool.execution.start" + | "tool.execution.update" + | "tool.execution.end" + | "tool.result" + | "retry.start" + | "retry.end" + | "compaction.start" + | "compaction.end" + | "stderr" + | "stdout" + | "usage" + | "capture.warning" + | "capture.error" + | "artifact.created"; + +export type AgentTraceEventPhase = + | "message" + | "tool" + | "agent" + | "session" + | "turn" + | "capture" + | "artifact"; + +export const PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS = [ + "assistant.text.delta", + "tool.execution.start", + "tool.execution.update", + "tool.execution.end", +] as const satisfies readonly AgentTraceEventKind[]; + +export const PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS = [ + "session.start", + "session.end", + "turn.start", + "turn.end", + "message.start", + "message.update", + "message.end", + "assistant.thinking.delta", + "assistant.message.final", + "tool.result", + "retry.start", + "retry.end", + "compaction.start", + "compaction.end", + "usage", + "artifact.created", +] as const satisfies readonly AgentTraceEventKind[]; + +export type SmithersAgentTraceEvent = { + type: "AgentTraceEvent"; + traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION; + traceCompleteness: AgentTraceCompleteness; + unsupportedEventKinds: AgentTraceEventKind[]; + runId: string; + workflowPath?: string | null; + workflowHash?: string | null; + nodeId: string; + iteration: number; + attempt: number; + timestampMs: number; + event: { + sequence: number; + kind: AgentTraceEventKind; + phase: AgentTraceEventPhase; + }; + source: { + agentFamily: "pi"; + agentId?: string; + model?: string; + captureMode: AgentTraceCaptureMode; + rawType?: string; + observed: boolean; + }; + payload: Record | null; + raw: unknown; + redaction: { applied: boolean; ruleIds?: string[] } | null; + annotations: Record | null; +}; + export type SmithersEvent = | { type: "RunStarted"; runId: string; timestampMs: number } | { @@ -194,4 +302,7 @@ export type SmithersEvent = cacheWriteTokens?: number; reasoningTokens?: number; timestampMs: number; - }; + } + | SmithersAgentTraceEvent; + +export type ExtendedSmithersEvent = SmithersEvent; diff --git a/src/agent-trace.ts b/src/agent-trace.ts new file mode 100644 index 00000000..fa611798 --- /dev/null +++ b/src/agent-trace.ts @@ -0,0 +1,178 @@ +import { getToolContext } from "./tools/context"; +import { nowMs } from "./utils/time"; +import { + CANONICAL_AGENT_TRACE_VERSION, + PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS, +} from "./SmithersEvent"; +import type { + AgentTraceCaptureMode, + AgentTraceEventKind, + AgentTraceEventPhase, + SmithersAgentTraceEvent, +} from "./SmithersEvent"; + +// Local, per-attempt monotonic sequence for canonical trace events +const traceSeq = new WeakMap(); + +function nextTraceSeq(ctx: object): number { + const current = traceSeq.get(ctx) ?? 0; + const next = current + 1; + traceSeq.set(ctx, next); + return next; +} + +export type CaptureMode = Extract< + AgentTraceCaptureMode, + "cli-json" | "cli-json-stream" | "rpc-events" | "cli-text" +>; + +export type AgentTraceSourceMeta = { + agentId?: string; + model?: string; +}; + +export function emitAgentTrace( + kind: AgentTraceEventKind, + phase: AgentTraceEventPhase, + payload: Record | null | undefined, + raw: unknown, + rawType: string | undefined, + captureMode: CaptureMode, + sourceMeta?: AgentTraceSourceMeta, +) { + const ctx = getToolContext(); + if (!ctx || typeof ctx.emitEvent !== "function") return; // outside workflow execution + const ts = nowMs(); + const event: SmithersAgentTraceEvent = { + type: "AgentTraceEvent", + traceVersion: CANONICAL_AGENT_TRACE_VERSION, + traceCompleteness: "partial-observed", + unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS], + runId: ctx.runId, + workflowPath: ctx.workflowPath ?? null, + workflowHash: ctx.workflowHash ?? null, + nodeId: ctx.nodeId, + iteration: ctx.iteration, + attempt: ctx.attempt, + timestampMs: ts, + event: { + sequence: nextTraceSeq(ctx), + kind, + phase, + }, + source: { + agentFamily: "pi", + agentId: sourceMeta?.agentId, + model: sourceMeta?.model, + captureMode, + rawType, + observed: true, + }, + payload: payload ?? null, + raw, + redaction: null, + annotations: null, + }; + void ctx.emitEvent(event); +} + +export function capturePiEvent( + event: any, + captureMode: CaptureMode, + sourceMeta?: AgentTraceSourceMeta, +) { + if (!event || typeof event !== "object") return; + const type = String((event as any).type ?? ""); + + // Assistant text deltas + if (type === "message_update") { + const assistant = (event as any).assistantMessageEvent; + if (assistant && assistant.type === "text_delta" && typeof assistant.delta === "string") { + emitAgentTrace( + "assistant.text.delta", + "message", + { text: assistant.delta }, + event, + "message_update.text_delta", + captureMode, + sourceMeta, + ); + return; + } + } + + // Tool lifecycle (best-effort mapping of common Pi shapes) + if (type === "tool_execution_start") { + const call = (event as any).toolCall ?? (event as any).call ?? (event as any); + emitAgentTrace( + "tool.execution.start", + "tool", + { + toolCallId: String(call.id ?? call.toolCallId ?? ""), + toolName: String(call.name ?? call.toolName ?? call.tool ?? ""), + argsPreview: call.args ?? call.arguments ?? undefined, + }, + event, + "tool_execution_start", + captureMode, + sourceMeta, + ); + return; + } + + if (type === "tool_execution_update") { + const call = (event as any).toolCall ?? (event as any).call ?? (event as any); + emitAgentTrace( + "tool.execution.update", + "tool", + { + toolCallId: String(call.id ?? call.toolCallId ?? ""), + toolName: String(call.name ?? call.toolName ?? call.tool ?? ""), + }, + event, + "tool_execution_update", + captureMode, + sourceMeta, + ); + return; + } + + if (type === "tool_execution_end") { + const call = (event as any).toolCall ?? (event as any).call ?? (event as any); + const isError = Boolean((event as any).error || (event as any).failed); + emitAgentTrace( + "tool.execution.end", + "tool", + { + toolCallId: String(call.id ?? call.toolCallId ?? ""), + toolName: String(call.name ?? call.toolName ?? call.tool ?? ""), + isError, + resultPreview: (event as any).result ?? (event as any).output ?? undefined, + }, + event, + "tool_execution_end", + captureMode, + sourceMeta, + ); + return; + } +} + +export function capturePiNdjson( + raw: string, + captureMode: CaptureMode, + sourceMeta?: AgentTraceSourceMeta, +) { + const lines = String(raw ?? "") + .split(/\r?\n/) + .map((l) => l.trim()) + .filter(Boolean); + for (const line of lines) { + try { + const parsed = JSON.parse(line); + capturePiEvent(parsed, captureMode, sourceMeta); + } catch { + // ignore malformed lines + } + } +} diff --git a/src/agents/BaseCliAgent.ts b/src/agents/BaseCliAgent.ts index 4c94842a..3d4f9605 100644 --- a/src/agents/BaseCliAgent.ts +++ b/src/agents/BaseCliAgent.ts @@ -62,6 +62,7 @@ type RunRpcCommandOptions = { | Promise | PiExtensionUiResponse | null; + onEvent?: (event: unknown) => void; }; type PromptParts = { @@ -711,16 +712,21 @@ export function runRpcCommandEffect(command: string, args: string[], options: Ru child.stdin.write(`${JSON.stringify(normalized)}\n`); }; - const handleLine = async (line: string) => { - inactivity.reset(); - let parsed: unknown; - try { - parsed = JSON.parse(line); - } catch { - return; - } - if (!parsed || typeof parsed !== "object") return; - const event = parsed as Record; + const handleLine = async (line: string) => { + inactivity.reset(); + let parsed: unknown; + try { + parsed = JSON.parse(line); + } catch { + return; + } + if (!parsed || typeof parsed !== "object") return; + try { + options.onEvent?.(parsed); + } catch { + // ignore observer errors + } + const event = parsed as Record; const type = event.type; if (type === "response" && event.command === "prompt" && event.success === false) { const errorMessage = typeof event.error === "string" ? event.error : "PI RPC prompt failed"; @@ -754,20 +760,56 @@ export function runRpcCommandEffect(command: string, args: string[], options: Ru if (message.usage) extractedUsage = message.usage; if (message.stopReason === "error" || message.stopReason === "aborted") { promptResponseError = message.errorMessage || `Request ${message.stopReason}`; - } - const extracted = finalMessage ? extractTextFromJsonValue(finalMessage) : undefined; - const text = extracted ?? textDeltas; - inactivity.clear(); - totalTimeout.clear(); - if (promptResponseError) { + inactivity.clear(); + totalTimeout.clear(); handleError(new Error(promptResponseError)); return; } - finalize(text, finalMessage ?? text); - child.stdin?.end(); - terminateChild(); + // Do not finalize on tool-use turns. Pi continues with additional + // turns after tool execution and only reaches the real final answer + // on a later turn/agent_end. + if (message.stopReason !== "toolUse") { + const extracted = finalMessage ? extractTextFromJsonValue(finalMessage) : undefined; + const text = extracted ?? textDeltas; + inactivity.clear(); + totalTimeout.clear(); + finalize(text, finalMessage ?? text); + child.stdin?.end(); + terminateChild(); + return; + } } } + if (type === "agent_end") { + const messages = (event as any).messages as Array | undefined; + if (Array.isArray(messages)) { + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message?.role === "assistant") { + finalMessage = message; + if (message.usage) extractedUsage = message.usage; + if (message.stopReason === "error" || message.stopReason === "aborted") { + promptResponseError = message.errorMessage || `Request ${message.stopReason}`; + } + break; + } + } + } + if (promptResponseError) { + inactivity.clear(); + totalTimeout.clear(); + handleError(new Error(promptResponseError)); + return; + } + const extracted = finalMessage ? extractTextFromJsonValue(finalMessage) : undefined; + const text = extracted ?? textDeltas; + inactivity.clear(); + totalTimeout.clear(); + finalize(text, finalMessage ?? text); + child.stdin?.end(); + terminateChild(); + return; + } if (type === "extension_ui_request") { await maybeWriteExtensionResponse(event as PiExtensionUiRequest); } diff --git a/src/agents/PiAgent.ts b/src/agents/PiAgent.ts index 7d659c2b..0f7a62a0 100644 --- a/src/agents/PiAgent.ts +++ b/src/agents/PiAgent.ts @@ -14,6 +14,8 @@ import { import type { BaseCliAgentOptions, PiExtensionUiRequest, PiExtensionUiResponse } from "./BaseCliAgent"; import { getToolContext } from "../tools/context"; import { SmithersError } from "../utils/errors"; +import { capturePiNdjson } from "../agent-trace"; +import { capturePiEvent } from "../agent-trace"; export type { PiExtensionUiRequest, PiExtensionUiResponse }; @@ -186,6 +188,17 @@ export class PiAgent extends BaseCliAgent { const extractedText = mode === "json" ? (extractTextFromPiNdjson(rawText) ?? rawText) : rawText; + // Capture canonical trace events for Pi NDJSON stream (assistant deltas, tool lifecycle) + try { + if (mode === "json") { + capturePiNdjson(rawText, "cli-json", { + agentId: this.id, + model: this.opts.model ?? this.model, + }); + } + } catch { + // Best-effort capture; never fail the agent call due to trace capture. + } const output = tryParseJson(extractedText); return buildGenerateResult(extractedText, output, this.opts.model ?? "pi"); } @@ -201,6 +214,16 @@ export class PiAgent extends BaseCliAgent { maxOutputBytes: this.maxOutputBytes ?? getToolContext()?.maxOutputBytes, onStderr: options?.onStderr, onExtensionUiRequest: this.opts.onExtensionUiRequest, + onEvent: (evt) => { + try { + capturePiEvent(evt, "rpc-events", { + agentId: this.id, + model: this.opts.model ?? this.model, + }); + } catch { + /* ignore */ + } + }, }); return buildGenerateResult(rpcResult.text, rpcResult.output, this.opts.model ?? "pi", rpcResult.usage); diff --git a/src/events.ts b/src/events.ts index 99256de6..e83fd306 100644 --- a/src/events.ts +++ b/src/events.ts @@ -1,11 +1,12 @@ import { EventEmitter } from "node:events"; -import * as FileSystem from "@effect/platform/FileSystem"; +import { promises as fs } from "node:fs"; import { join } from "node:path"; import { Effect } from "effect"; import type { SmithersEvent } from "./SmithersEvent"; import { fromPromise } from "./effect/interop"; import { runPromise } from "./effect/runtime"; import { trackEvent } from "./effect/metrics"; +import { isAgentTraceEvent, toPersistedAgentTraceRecord } from "./observability"; export class EventBus extends EventEmitter { private seq = 0; @@ -143,14 +144,16 @@ export class EventBus extends EventEmitter { private persistLogEffect(event: SmithersEvent) { if (!this.logDir) return Effect.void; const dir = this.logDir; - return Effect.gen(function* () { - const fs = yield* FileSystem.FileSystem; - yield* fs.makeDirectory(dir, { recursive: true }); - const file = join(dir, "stream.ndjson"); - const line = JSON.stringify(event) + "\n"; - const current = yield* Effect.option(fs.readFileString(file, "utf8")); - const prefix = current._tag === "Some" ? current.value : ""; - yield* fs.writeFileString(file, prefix + line); + return fromPromise("persist event log", async () => { + await fs.mkdir(dir, { recursive: true }); + await fs.appendFile(join(dir, "stream.ndjson"), `${JSON.stringify(event)}\n`, "utf8"); + if (isAgentTraceEvent(event)) { + await fs.appendFile( + join(dir, "agent-trace.ndjson"), + `${JSON.stringify(toPersistedAgentTraceRecord(event))}\n`, + "utf8", + ); + } }); } } diff --git a/src/index.ts b/src/index.ts index 893f9ace..0bddc29a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,7 +12,13 @@ export type { SchemaRegistryEntry } from "./SchemaRegistryEntry"; export type { SmithersWorkflow } from "./SmithersWorkflow"; export type { SmithersCtx } from "./SmithersCtx"; export type { OutputAccessor, InferRow, InferOutputEntry } from "./OutputAccessor"; -export type { SmithersEvent } from "./SmithersEvent"; +export type { + AgentTraceCaptureMode, + AgentTraceCompleteness, + AgentTraceEventKind, + SmithersAgentTraceEvent, + SmithersEvent, +} from "./SmithersEvent"; export type { SmithersError } from "./SmithersError"; export { SmithersError as SmithersErrorInstance, isSmithersError, errorToJson } from "./utils/errors"; export type { SmithersErrorCode } from "./utils/errors"; @@ -99,10 +105,13 @@ export type { ServerOptions } from "./server/index"; // Observability export { SmithersObservability, + PI_AGENT_TRACE_CAPABILITY_PROFILE, createSmithersObservabilityLayer, createSmithersOtelLayer, createSmithersRuntimeLayer, + isAgentTraceEvent, smithersMetrics, + toPersistedAgentTraceRecord, trackSmithersEvent, activeNodes, activeRuns, diff --git a/src/observability/index.ts b/src/observability/index.ts index b29835ff..0439b524 100644 --- a/src/observability/index.ts +++ b/src/observability/index.ts @@ -65,6 +65,17 @@ import { updateProcessMetrics, vcsDuration, } from "../effect/metrics"; +import { + CANONICAL_AGENT_TRACE_VERSION, + PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS, + PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS, +} from "../SmithersEvent"; +import type { + AgentTraceCaptureMode, + AgentTraceCompleteness, + AgentTraceEventKind, + SmithersAgentTraceEvent, +} from "../SmithersEvent"; export type SmithersLogFormat = "json" | "pretty" | "string" | "logfmt"; @@ -101,6 +112,84 @@ export class SmithersObservability extends Context.Tag("SmithersObservability")< SmithersObservabilityService >() {} +export type AgentTraceCapabilityProfile = { + readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION; + readonly agentFamily: "pi"; + readonly captureModes: readonly AgentTraceCaptureMode[]; + readonly traceCompleteness: AgentTraceCompleteness; + readonly supportedEventKinds: readonly AgentTraceEventKind[]; + readonly unsupportedEventKinds: readonly AgentTraceEventKind[]; +}; + +export type PersistedAgentTraceRecord = { + readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION; + readonly traceCompleteness: AgentTraceCompleteness; + readonly unsupportedEventKinds: AgentTraceEventKind[]; + readonly runId: string; + readonly workflowPath: string | null; + readonly workflowHash: string | null; + readonly nodeId: string; + readonly iteration: number; + readonly attempt: number; + readonly timestampMs: number; + readonly eventSequence: number; + readonly eventKind: AgentTraceEventKind; + readonly eventPhase: SmithersAgentTraceEvent["event"]["phase"]; + readonly agentFamily: SmithersAgentTraceEvent["source"]["agentFamily"]; + readonly agentId: string | null; + readonly agentModel: string | null; + readonly captureMode: AgentTraceCaptureMode; + readonly rawType: string | null; + readonly observed: boolean; + readonly payload: Record | null; + readonly raw: unknown; + readonly redaction: SmithersAgentTraceEvent["redaction"]; + readonly annotations: SmithersAgentTraceEvent["annotations"]; +}; + +export const PI_AGENT_TRACE_CAPABILITY_PROFILE: AgentTraceCapabilityProfile = { + traceVersion: CANONICAL_AGENT_TRACE_VERSION, + agentFamily: "pi", + captureModes: ["cli-json", "rpc-events"], + traceCompleteness: "partial-observed", + supportedEventKinds: [...PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS], + unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS], +}; + +export function isAgentTraceEvent(event: unknown): event is SmithersAgentTraceEvent { + return !!event && typeof event === "object" && (event as { type?: unknown }).type === "AgentTraceEvent"; +} + +export function toPersistedAgentTraceRecord( + event: SmithersAgentTraceEvent, +): PersistedAgentTraceRecord { + return { + traceVersion: event.traceVersion, + traceCompleteness: event.traceCompleteness, + unsupportedEventKinds: [...event.unsupportedEventKinds], + runId: event.runId, + workflowPath: event.workflowPath ?? null, + workflowHash: event.workflowHash ?? null, + nodeId: event.nodeId, + iteration: event.iteration, + attempt: event.attempt, + timestampMs: event.timestampMs, + eventSequence: event.event.sequence, + eventKind: event.event.kind, + eventPhase: event.event.phase, + agentFamily: event.source.agentFamily, + agentId: event.source.agentId ?? null, + agentModel: event.source.model ?? null, + captureMode: event.source.captureMode, + rawType: event.source.rawType ?? null, + observed: event.source.observed, + payload: event.payload ?? null, + raw: event.raw, + redaction: event.redaction, + annotations: event.annotations, + }; +} + export const prometheusContentType = "text/plain; version=0.0.4; charset=utf-8"; diff --git a/src/tools/context.ts b/src/tools/context.ts index c585790c..cd202db0 100644 --- a/src/tools/context.ts +++ b/src/tools/context.ts @@ -8,6 +8,8 @@ export type ToolContext = { nodeId: string; iteration: number; attempt: number; + workflowPath?: string | null; + workflowHash?: string | null; rootDir: string; allowNetwork: boolean; maxOutputBytes: number; diff --git a/tests/observability.test.ts b/tests/observability.test.ts index 0b80a2f1..5cadf2d5 100644 --- a/tests/observability.test.ts +++ b/tests/observability.test.ts @@ -1,11 +1,14 @@ import { describe, expect, test } from "bun:test"; import { Metric } from "effect"; import { + PI_AGENT_TRACE_CAPABILITY_PROFILE, httpRequestDuration, renderPrometheusMetrics, runsTotal, + toPersistedAgentTraceRecord, } from "../src/observability"; import { runPromise } from "../src/effect/runtime"; +import type { SmithersAgentTraceEvent } from "../src/SmithersEvent"; describe("Prometheus metrics", () => { test("renders built-in Smithers metrics in Prometheus exposition format", async () => { @@ -23,3 +26,91 @@ describe("Prometheus metrics", () => { expect(output).toContain("smithers_http_request_duration_ms_count"); }); }); + +describe("agent trace observability", () => { + test("declares the implemented Pi trace slice truthfully", () => { + expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceVersion).toBe(1); + expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.agentFamily).toBe("pi"); + expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceCompleteness).toBe("partial-observed"); + expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.supportedEventKinds).toEqual([ + "assistant.text.delta", + "tool.execution.start", + "tool.execution.update", + "tool.execution.end", + ]); + expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.unsupportedEventKinds).toContain( + "assistant.thinking.delta", + ); + expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("cli-json"); + expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("rpc-events"); + }); + + test("flattens canonical trace events into queryable persisted records", () => { + const event: SmithersAgentTraceEvent = { + type: "AgentTraceEvent", + traceVersion: 1, + traceCompleteness: "partial-observed", + unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"], + runId: "run-1", + workflowPath: "/tmp/workflow.tsx", + workflowHash: "workflow-hash", + nodeId: "node-a", + iteration: 2, + attempt: 3, + timestampMs: 123, + event: { + sequence: 4, + kind: "tool.execution.end", + phase: "tool", + }, + source: { + agentFamily: "pi", + agentId: "pi-agent-id", + model: "gpt-5.2-codex", + captureMode: "rpc-events", + rawType: "tool_execution_end", + observed: true, + }, + payload: { + toolCallId: "tool-1", + toolName: "read", + isError: false, + }, + raw: { type: "tool_execution_end" }, + redaction: null, + annotations: { "custom.test": true }, + }; + + const record = toPersistedAgentTraceRecord(event); + + expect(record).toEqual({ + traceVersion: 1, + traceCompleteness: "partial-observed", + unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"], + runId: "run-1", + workflowPath: "/tmp/workflow.tsx", + workflowHash: "workflow-hash", + nodeId: "node-a", + iteration: 2, + attempt: 3, + timestampMs: 123, + eventSequence: 4, + eventKind: "tool.execution.end", + eventPhase: "tool", + agentFamily: "pi", + agentId: "pi-agent-id", + agentModel: "gpt-5.2-codex", + captureMode: "rpc-events", + rawType: "tool_execution_end", + observed: true, + payload: { + toolCallId: "tool-1", + toolName: "read", + isError: false, + }, + raw: { type: "tool_execution_end" }, + redaction: null, + annotations: { "custom.test": true }, + }); + }); +}); diff --git a/tests/pi-support.test.ts b/tests/pi-support.test.ts index ca7c46bc..170d16cf 100644 --- a/tests/pi-support.test.ts +++ b/tests/pi-support.test.ts @@ -3,6 +3,9 @@ import { afterEach, describe, expect, test } from "bun:test"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { PiAgent } from "../src/agents"; + import { EventBus } from "../src/events"; + import { runWithToolContext } from "../src/tools/context"; + import type { SmithersAgentTraceEvent } from "../src/SmithersEvent"; const originalPath = process.env.PATH ?? ""; @@ -60,7 +63,8 @@ import { afterEach, describe, expect, test } from "bun:test"; thinking: "low", verbose: true, env: { PATH: process.env.PATH! }, - }); + }); + const result = await agent.generate({ messages: [ @@ -309,7 +313,8 @@ import { afterEach, describe, expect, test } from "bun:test"; mode: "json", model: "test-model", env: { PATH: process.env.PATH! }, - }); + }); + const result = await agent.generate({ messages: [{ role: "user", content: "Hello" }], @@ -324,6 +329,124 @@ import { afterEach, describe, expect, test } from "bun:test"; } }); + test("PiAgent json mode emits canonical trace events and persists them", async () => { + // Fake Pi emits NDJSON with assistant text deltas and a tool lifecycle + const fake = await makeFakePi(` +const lines = [ + JSON.stringify({ type: "session", version: 3, id: "sess-1" }), + JSON.stringify({ type: "agent_start" }), + JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: "Hello" } }), + JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: ", world" } }), + JSON.stringify({ type: "tool_execution_start", toolCall: { id: "t1", name: "read", args: { path: "README.md" } } }), + JSON.stringify({ type: "tool_execution_end", toolCall: { id: "t1", name: "read" }, result: { ok: true } }), + JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Hello, world" }], stopReason: "stop" } }) +]; +process.stdout.write(lines.join("\\n") + "\\n"); +`); + + const memoryEvents: { seq: number; row: any }[] = []; + const db = { + insertEventWithNextSeq: ({ runId, timestampMs, type, payloadJson }: any) => { + const seq = (memoryEvents.length > 0 ? memoryEvents[memoryEvents.length - 1].seq : -1) + 1; + memoryEvents.push({ seq, row: { runId, timestampMs, type, payloadJson } }); + return Promise.resolve(seq); + }, + } as any; + + const logDir = await mkdtemp(join(tmpdir(), "smithers-agent-trace-")); + const bus = new EventBus({ db, logDir }); + + try { + process.env.PATH = `${fake.dir}:${originalPath}`; + const agent = new PiAgent({ mode: "json", model: "pi-test-model", env: { PATH: process.env.PATH! } }); + + const captured: SmithersAgentTraceEvent[] = []; + + await runWithToolContext( + { + db: db as any, + runId: "run-1", + nodeId: "node-A", + iteration: 1, + attempt: 1, + workflowPath: "/tmp/workflows/pi-workflow.tsx", + workflowHash: "workflow-hash-1", + rootDir: process.cwd(), + allowNetwork: true, + maxOutputBytes: 200_000, + timeoutMs: 30_000, + seq: 0, + emitEvent: (e: any) => { + if (e && e.type === "AgentTraceEvent") { + captured.push(e as SmithersAgentTraceEvent); + } + return bus.emitEventQueued(e as any); + }, + }, + async () => { + const result = await agent.generate({ messages: [{ role: "user", content: "Ping" }] }); + expect(result.text).toContain("Hello, world"); + }, + ); + await bus.flush(); + + const sequences = captured.map((e) => e.event.sequence); + expect(sequences).toEqual([1, 2, 3, 4]); + + // We should have assistant deltas and tool lifecycle mapped + const kinds = captured.map((e) => e.event.kind); + expect(kinds).toEqual([ + "assistant.text.delta", + "assistant.text.delta", + "tool.execution.start", + "tool.execution.end", + ]); + + // Correlation and truthfulness fields present + for (const e of captured) { + expect(e.traceVersion).toBe(1); + expect(e.traceCompleteness).toBe("partial-observed"); + expect(e.unsupportedEventKinds).toContain("assistant.thinking.delta"); + expect(e.runId).toBe("run-1"); + expect(e.workflowPath).toBe("/tmp/workflows/pi-workflow.tsx"); + expect(e.workflowHash).toBe("workflow-hash-1"); + expect(e.nodeId).toBe("node-A"); + expect(e.iteration).toBe(1); + expect(e.attempt).toBe(1); + expect(e.source.agentFamily).toBe("pi"); + expect(e.source.agentId).toBe(agent.id); + expect(e.source.model).toBe("pi-test-model"); + expect(e.source.captureMode).toBe("cli-json"); + } + + // Persisted to DB rows as durable event entries + const persistedTraceRows = memoryEvents.filter((r) => r.row.type === "AgentTraceEvent"); + expect(persistedTraceRows).toHaveLength(captured.length); + expect( + persistedTraceRows.map((row) => JSON.parse(row.row.payloadJson).event.sequence), + ).toEqual([1, 2, 3, 4]); + + // Persisted to a dedicated, flattened local trace log for later querying/export. + const persistedTraceLog = await readFile(join(logDir, "agent-trace.ndjson"), "utf8"); + const persistedTraceRecords = persistedTraceLog + .trim() + .split(/\r?\n/) + .filter(Boolean) + .map((line) => JSON.parse(line) as Record); + expect(persistedTraceRecords).toHaveLength(4); + expect(persistedTraceRecords.map((record) => record.eventKind)).toEqual(kinds); + expect(persistedTraceRecords[0]?.traceCompleteness).toBe("partial-observed"); + expect(persistedTraceRecords[0]?.unsupportedEventKinds).toContain("assistant.thinking.delta"); + expect(persistedTraceRecords[0]?.runId).toBe("run-1"); + expect(persistedTraceRecords[0]?.nodeId).toBe("node-A"); + expect(persistedTraceRecords[0]?.attempt).toBe(1); + expect(persistedTraceRecords[0]?.captureMode).toBe("cli-json"); + } finally { + await rm(fake.dir, { recursive: true, force: true }); + await rm(logDir, { recursive: true, force: true }); + } + }); + test("PiAgent json mode extracts JSON from text content in turn_end", async () => { // Simulates pi output where the agent returns JSON in the text content const fake = await makeFakePi(` From d8002e2e45ab8225e3a299008fd69b960deb1731 Mon Sep 17 00:00:00 2001 From: Samuel Huber Date: Thu, 26 Mar 2026 21:05:21 +0100 Subject: [PATCH 3/7] chore(pi): remove OTEL trace spec doc from PI bookmark --- docs/concepts/agent-trace-otel-logs-spec.mdx | 1063 ------------------ 1 file changed, 1063 deletions(-) delete mode 100644 docs/concepts/agent-trace-otel-logs-spec.mdx diff --git a/docs/concepts/agent-trace-otel-logs-spec.mdx b/docs/concepts/agent-trace-otel-logs-spec.mdx deleted file mode 100644 index 12307b6b..00000000 --- a/docs/concepts/agent-trace-otel-logs-spec.mdx +++ /dev/null @@ -1,1063 +0,0 @@ ---- -title: Agent Trace OTEL Logs Specification -description: Full-fidelity specification for capturing agent-visible execution traces and exporting them as OpenTelemetry logs to Loki or any OTLP-compatible backend. ---- - -This document specifies how Smithers must capture, normalize, persist, export, and verify agent execution traces as OpenTelemetry logs. - -This is a design specification, not an implementation sketch. Every requirement in this document is normative unless explicitly marked as non-normative. - -## Status - -- Intended scope: new observability surface for agent trace logs -- Intended audience: maintainers implementing runtime, agent, observability, and verification changes -- Intended outcome: a system where every supported agent run produces a complete, queryable, correlated trace of what Smithers could observe - -## Problem Statement - -Smithers currently captures: - -- durable workflow lifecycle events -- structured application logs -- traces and metrics for runtime behavior -- partial agent output in some cases - -Smithers does not currently guarantee a full-fidelity record of agent-visible execution behavior across all agent integrations. - -In particular: - -- `PiAgent` exposes a rich event stream, but Smithers currently collapses it to final text plus usage -- several CLI agents emit machine-readable output that Smithers does not preserve as first-class trace events -- SDK-based agents return final results and rely on Smithers-side tool logging, but do not provide a canonical agent trace model -- there is no OTEL logs pipeline in the local collector configuration - -The result is that operators cannot reliably answer questions such as: - -- What did the agent stream before it failed? -- Which tools did the agent invoke, in what order, with which visible arguments and results? -- Did the agent emit visible thinking content, compaction events, retries, or queued follow-up behavior? -- Can we reconstruct exactly what Smithers observed for a given run, node, and attempt? -- Can we query this in Grafana Loki or another OTLP log backend with stable run-level correlation? - -This specification addresses that gap. - -## Goals - -The system defined here MUST: - -- capture the fullest agent-visible trace Smithers can obtain for each supported agent -- export that trace as OTEL logs to Loki or any OTLP-compatible log backend -- preserve run correlation through stable attributes such as `run.id`, `workflow.path`, `node.id`, `attempt`, and `iteration` -- preserve raw trace fidelity without forcing operators to infer behavior from summary logs -- remain explicit about what was directly observed versus what was derived by Smithers -- provide deterministic verification criteria for correctness and task completion - -## Non-Goals - -The system defined here MUST NOT claim to provide: - -- provider-internal hidden chain-of-thought when the upstream agent or SDK does not expose it -- exact reconstruction of invisible model-side planning not surfaced through events, messages, or tool calls -- a replacement for the durable Smithers event log or database -- a guarantee that every backend will index arbitrary high-cardinality fields efficiently - -## Core Principle - -Smithers MUST export what it observed, not what it inferred. - -Every exported trace record MUST be classifiable as one of: - -- raw upstream agent event -- raw Smithers runtime event -- Smithers-derived normalization of one raw event -- Smithers-generated transport or export diagnostic - -If a record is derived, the derivation MUST be explicit. - -## Definitions - -### Agent Trace - -An agent trace is the ordered set of agent-visible execution records associated with one Smithers node attempt. - -Agent trace records include, where available: - -- streamed assistant text -- streamed visible thinking content -- message lifecycle events -- tool call lifecycle events -- tool result lifecycle events -- compaction and retry events -- session metadata -- final assistant message -- final tool results -- agent stderr diagnostics when those are observable to Smithers - -### Full Trace - -For a given agent integration, a full trace means all upstream-visible records Smithers can access without patching the upstream model provider. - -Full trace does not mean hidden reasoning. It means all observable records available through: - -- subprocess stdout or stderr -- structured CLI output modes -- RPC event streams -- SDK callback/event surfaces -- persisted session artifacts intentionally provided by the agent system - -### Canonical Trace Event - -A canonical trace event is the Smithers-normalized representation of one raw observed record. - -Canonical trace events are the unit exported to OTEL logs and optionally persisted durably by Smithers. - -### Attempt - -An attempt is one execution of one node at one iteration with one attempt number. A canonical agent trace is scoped to exactly one attempt. - -## Invariants - -The implementation MUST satisfy all of the following invariants. - -### Identity Invariants - -Every canonical trace event MUST include: - -- `runId` -- `nodeId` when the event is attempt-scoped -- `iteration` when the event is attempt-scoped -- `attempt` when the event is attempt-scoped -- `timestampMs` -- `source.agentFamily` -- `source.captureMode` -- `event.kind` -- `event.sequence` - -### Ordering Invariants - -Canonical trace events for a single attempt MUST be totally ordered by `event.sequence`. - -If upstream events arrive out of wall-clock order, Smithers MUST preserve receive order and MUST NOT reorder them after capture. - -`event.sequence` MUST be monotonic within one attempt. - -### Fidelity Invariants - -Smithers MUST preserve raw upstream payloads for canonical trace events unless a redaction rule requires modification. - -If redaction occurs: - -- the record MUST indicate redaction occurred -- the redaction reason MUST be attached -- the original raw value MUST NOT be exported - -### Correlation Invariants - -Every OTEL log record derived from a canonical trace event MUST be queryable by: - -- run -- workflow path -- node -- iteration -- attempt -- agent family -- event kind - -### Completeness Invariants - -If Smithers receives a parseable upstream event, Smithers MUST either: - -- convert it into a canonical trace event and export it -- or emit a diagnostic record explaining why it was dropped - -Silent drops are not allowed. - -### Truthfulness Invariants - -If an agent integration cannot expose a certain class of events, the system MUST record capability absence explicitly and MUST NOT pretend completeness. - -Example: - -- if an SDK-based integration does not expose thinking deltas, Smithers MUST mark that event class as unsupported for that agent family - -## Scope of Observability - -The system covers three layers. - -### Layer 1: Canonical Runtime Record - -Smithers SHOULD persist canonical trace events durably for replay and audit, alongside existing run events and attempt data. - -### Layer 2: OTEL Log Export - -Smithers MUST export canonical trace events as OTEL logs when OTEL log export is enabled. - -### Layer 3: Summary Metrics and Diagnostics - -Smithers MAY derive metrics from canonical trace events, but those metrics are secondary and MUST NOT be the sole evidence of capture correctness. - -## Agent Capability Model - -Each agent family MUST declare an explicit trace capability profile. - -The capability profile MUST enumerate support for: - -- session metadata -- assistant text deltas -- visible thinking deltas -- final assistant message -- tool execution start -- tool execution update -- tool execution end -- retry events -- compaction events -- raw stderr diagnostics -- persisted session artifact - -### PiAgent - -`PiAgent` MUST be treated as a high-fidelity integration. - -Available sources include: - -- JSON event stream mode -- RPC mode event stream -- Pi session JSONL artifacts - -Pi exposes event types such as: - -- `agent_start` -- `agent_end` -- `turn_start` -- `turn_end` -- `message_start` -- `message_update` -- `message_end` -- `tool_execution_start` -- `tool_execution_update` -- `tool_execution_end` -- `auto_compaction_start` -- `auto_compaction_end` -- `auto_retry_start` -- `auto_retry_end` - -Visible thinking content emitted by Pi MUST be captured as trace content. - -Pi session artifacts, when enabled and available, SHOULD be recorded as canonical artifacts associated with the attempt. - -### CodexAgent - -`CodexAgent` MUST be treated as a structured CLI integration with medium fidelity. - -Codex emits JSON output. Smithers MUST preserve all parseable structured events made available by that mode. - -If Codex exposes usage, step, message, tool, or completion events, Smithers MUST map them to canonical trace events rather than extracting only final text. - -If a given Codex event schema is unstable, Smithers MUST preserve the raw event payload and classify the normalization conservatively. - -### ClaudeCodeAgent - -`ClaudeCodeAgent` MUST be treated as a structured CLI integration with medium fidelity. - -When `stream-json` is enabled, Smithers MUST preserve all parseable stream records and map them into canonical trace events where possible. - -Partial assistant messages, tool call indicators, and usage events MUST NOT be discarded if they are parseable. - -### GeminiAgent - -`GeminiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on output mode. - -Smithers MUST preserve parseable structured output and MUST explicitly mark unsupported event classes when the CLI exposes only final or coarse-grained results. - -### KimiAgent - -`KimiAgent` MUST be treated as a structured CLI integration with low to medium fidelity depending on `outputFormat`. - -If `stream-json` mode is used, Smithers MUST preserve event records. If only final text is available, Smithers MUST mark the trace as partial. - -### OpenAIAgent and AnthropicAgent - -`OpenAIAgent` and `AnthropicAgent` MUST be treated as SDK integrations. - -They do not inherently expose a rich subprocess event stream in the current Smithers wrapper. - -For these agents, Smithers MUST capture: - -- prompt dispatch boundaries -- final assistant response -- token usage when surfaced -- Smithers-side tool execution start and end -- visible tool output recorded by Smithers -- node output emitted by Smithers if any - -Smithers MUST mark thinking deltas and message lifecycle as unsupported unless the underlying SDK path is instrumented to provide them. - -### AmpAgent and ForgeAgent - -`AmpAgent` and `ForgeAgent` MUST be treated as text-first subprocess integrations unless a structured mode is added. - -Smithers MUST capture: - -- final response text -- stderr diagnostics -- Smithers-side tool execution and runtime events - -Smithers MUST mark full trace fidelity as unsupported for these integrations. - -## Capture Modes - -Each attempt MUST declare one capture mode: - -- `sdk-events` -- `rpc-events` -- `cli-json-stream` -- `cli-json` -- `cli-text` -- `artifact-import` - -Capture mode is part of the canonical attempt metadata and MUST be exported with every trace record. - -## Canonical Data Model - -Smithers MUST introduce a canonical event model for agent traces. - -The exact TypeScript shape is an implementation detail, but the semantic fields are mandatory. - -### Attempt Metadata - -Each attempt MUST expose: - -- `traceVersion` -- `agentFamily` -- `agentId` -- `model` -- `captureMode` -- `traceCompleteness` -- `unsupportedEventKinds` -- `traceStartedAtMs` -- `traceFinishedAtMs` -- `rawArtifactRefs` - -### `traceCompleteness` - -`traceCompleteness` MUST be one of: - -- `full-observed` -- `partial-observed` -- `final-only` -- `capture-failed` - -Definitions: - -- `full-observed`: Smithers captured every event class the integration claims to support -- `partial-observed`: Smithers captured some but not all supported classes -- `final-only`: only final response and coarse metadata were available -- `capture-failed`: Smithers expected trace events but could not capture them reliably - -### Canonical Event Fields - -Every canonical trace event MUST include: - -- `traceVersion` -- `runId` -- `workflowPath` -- `workflowHash` when available -- `nodeId` -- `iteration` -- `attempt` -- `timestampMs` -- `event.sequence` -- `event.kind` -- `event.phase` -- `source.agentFamily` -- `source.captureMode` -- `source.rawType` -- `source.observed` -- `payload` -- `raw` -- `redaction` -- `annotations` - -### `event.kind` - -`event.kind` MUST be chosen from a controlled vocabulary. - -The initial vocabulary MUST include: - -- `session.start` -- `session.end` -- `turn.start` -- `turn.end` -- `message.start` -- `message.update` -- `message.end` -- `assistant.text.delta` -- `assistant.thinking.delta` -- `assistant.message.final` -- `tool.execution.start` -- `tool.execution.update` -- `tool.execution.end` -- `tool.result` -- `retry.start` -- `retry.end` -- `compaction.start` -- `compaction.end` -- `stderr` -- `stdout` -- `usage` -- `capture.warning` -- `capture.error` -- `artifact.created` - -No integration-specific naming is allowed in `event.kind`. Integration-specific names MUST remain in `source.rawType`. - -### `event.phase` - -`event.phase` MUST be one of: - -- `agent` -- `turn` -- `message` -- `tool` -- `session` -- `capture` -- `artifact` - -### `source.observed` - -`source.observed` MUST be a boolean indicating whether the payload was directly observed from the upstream integration. - -Derived normalization records MUST set `source.observed` to `false`. - -### `payload` - -`payload` MUST contain normalized fields intended for stable querying and display. - -Examples: - -- for `assistant.text.delta`: `{ text: string }` -- for `assistant.thinking.delta`: `{ text: string }` -- for `tool.execution.start`: `{ toolCallId: string, toolName: string, argsPreview: unknown }` -- for `tool.execution.end`: `{ toolCallId: string, toolName: string, isError: boolean, resultPreview: unknown }` - -### `raw` - -`raw` MUST contain the raw upstream object or raw text fragment as captured after redaction. - -If no raw form exists, `raw` MAY be `null`. - -## Custom Annotations - -The system MUST support user-defined annotations attached at run start. - -Annotations MUST be: - -- provided in run options and server APIs -- stored durably on the run -- merged into every canonical trace event at export time - -Annotations MUST support scalar values only: - -- string -- number -- boolean - -Nested objects and arrays MUST be rejected or flattened before run start. The behavior MUST be explicit and deterministic. - -The following annotation namespaces are reserved: - -- `smithers.*` -- `run.*` -- `workflow.*` -- `node.*` -- `agent.*` -- `otel.*` - -User annotations SHOULD use a `custom.*` prefix in canonical export. - -## Workflow Metadata Requirements - -Every canonical trace event MUST include: - -- `workflow.path` as an OTEL attribute when available -- `workflow.hash` as an OTEL attribute when available - -If `workflow.path` is unavailable, Smithers MUST export `workflow.path` as absent rather than inventing a placeholder path. - -## Redaction Model - -Redaction is mandatory because agent traces can contain sensitive content. - -The implementation MUST support: - -- disabled redaction -- default redaction -- custom redaction rules - -### Minimum Default Redaction - -Default redaction MUST handle at least: - -- API keys -- bearer tokens -- common secret env vars -- authorization headers -- cookie headers -- explicitly configured secret literals - -### Redaction Semantics - -Redaction MUST occur before: - -- durable canonical trace persistence -- OTEL log export -- artifact snapshot export - -If redaction modifies content, the trace event MUST record: - -- `redaction.applied = true` -- `redaction.ruleIds = string[]` - -## Export Model - -Canonical trace events MUST be exportable as OTEL logs. - -### OTEL Collector Requirements - -The collector configuration MUST define a `logs` pipeline. - -The logs pipeline MUST accept OTLP input and MUST support at least one of: - -- OTLP logs exporter -- Loki exporter - -The local development stack SHOULD include Loki for verification and human inspection. - -### OTEL Record Shape - -For each canonical trace event, Smithers MUST emit one OTEL log record. - -The log body MUST contain a compact structured JSON representation of: - -- canonical payload -- raw payload when configured -- redaction metadata - -The log attributes MUST include: - -- `service.name` -- `smithers.trace.version` -- `run.id` -- `workflow.path` -- `workflow.hash` when available -- `node.id` when available -- `node.iteration` when available -- `node.attempt` when available -- `agent.family` -- `agent.id` when available -- `agent.model` when available -- `agent.capture_mode` -- `trace.completeness` -- `event.kind` -- `event.phase` -- `event.sequence` -- `source.raw_type` -- `source.observed` - -Custom annotations MUST be exported as OTEL attributes under `custom.*`. - -### Attribute Cardinality Rules - -The following MUST be attributes: - -- run identifiers -- workflow identifiers -- node identifiers -- attempt identifiers -- event kind -- agent family -- capture mode - -The following MUST NOT be indexed as labels in Loki-specific configurations: - -- full prompt text -- full response text -- thinking text -- tool args bodies -- tool result bodies -- arbitrary user free-text annotations - -These large fields MUST remain in the log body. - -### Severity Mapping - -Severity SHOULD be assigned as follows: - -- normal trace events: `INFO` -- stderr and non-terminal capture anomalies: `WARN` -- capture failures and export failures: `ERROR` - -Severity MUST NOT be used to encode event kind. - -## Persistence Model - -Canonical trace events SHOULD be durably persisted by Smithers in addition to OTEL export. - -If durable persistence is implemented, the persistence layer MUST support: - -- ordered replay by attempt -- filtering by event kind -- pagination by sequence -- artifact references - -OTEL export MUST NOT be the only storage location for canonical trace data. - -## Artifact Model - -Some agent integrations expose richer external artifacts than can be represented comfortably as log streams. - -Examples: - -- Pi session JSONL files -- raw CLI JSON event transcripts -- exported HTML or JSONL session artifacts - -Smithers SHOULD support trace artifacts with metadata: - -- `artifact.kind` -- `artifact.path` -- `artifact.contentType` -- `artifact.bytes` -- `artifact.createdAtMs` -- `artifact.redacted` - -Artifact creation MUST also emit canonical `artifact.created` events. - -## Failure Model - -The implementation MUST classify failures explicitly. - -### Capture Failure - -Capture failure means Smithers could not reliably obtain agent trace input it expected from the selected capture mode. - -Examples: - -- malformed JSON stream -- unexpected subprocess termination before terminal event -- SDK callback channel failure - -Capture failure MUST: - -- mark attempt `traceCompleteness = capture-failed` when terminally broken -- emit a `capture.error` canonical event -- include diagnostic details - -### Partial Capture - -Partial capture means Smithers obtained some trace events but missed expected categories. - -Examples: - -- stdout stream cut off after several tool events -- session artifact missing though event stream completed - -Partial capture MUST: - -- mark attempt `traceCompleteness = partial-observed` -- record missing classes in `unsupportedEventKinds` or `missingExpectedEventKinds` - -### Export Failure - -Export failure means Smithers captured canonical trace events but could not deliver them to the OTEL backend. - -Export failure MUST NOT erase canonical local truth. - -If export fails: - -- canonical local persistence MUST still succeed when enabled -- Smithers MUST emit operator diagnostics through existing logs -- the run MUST remain inspectable from durable local records - -## Normalization Rules - -Normalization MUST be conservative. - -### One Raw Event to One Canonical Event - -As a default rule, one raw upstream event SHOULD map to one canonical trace event. - -If one raw event yields multiple canonical events, the implementation MUST document why and MUST include a stable parent link. - -### Text Deltas - -Assistant text deltas MUST remain deltas if the upstream protocol provided deltas. - -Smithers MUST NOT collapse deltas into a single blob during export. - -Final assembled messages MAY be emitted separately as `assistant.message.final`. - -### Thinking Deltas - -Visible thinking content MUST be captured as its own event class and MUST NOT be merged into assistant text. - -### Tool Calls - -Tool lifecycle MUST preserve: - -- stable tool call identifier when upstream provides one -- tool name -- visible arguments or argument preview -- partial updates when available -- final result preview -- error flag - -### Usage - -Usage records MUST be separate canonical events or attached to terminal message events in a way that remains queryable. - -If usage is attached, it MUST still be accessible without parsing free-form text. - -## Required Runtime Integration Points - -The implementation MUST integrate at these boundaries. - -### Agent Boundary - -Every agent integration MUST report raw trace observations into the canonical trace capture layer. - -No agent integration is allowed to silently parse and discard upstream event records before the capture layer sees them. - -### Event Bus Boundary - -Canonical trace events SHOULD be emitted through or alongside the existing event bus so that: - -- they share run correlation -- they can participate in durable persistence -- they can reuse existing event-driven verification infrastructure - -### Attempt Finalization Boundary - -When an attempt finishes, Smithers MUST finalize trace metadata: - -- `traceFinishedAtMs` -- `traceCompleteness` -- `unsupportedEventKinds` -- `rawArtifactRefs` - -## Required Configuration Surface - -The implementation MUST define explicit configuration for: - -- enabling OTEL log export -- selecting backend endpoint -- enabling or disabling canonical local trace persistence -- selecting redaction mode -- retaining or dropping raw payload bodies -- retaining or dropping raw artifacts -- maximum event body bytes -- maximum artifact bytes - -The configuration MUST distinguish: - -- runtime operator policy -- run-specific annotations - -## Required Operator Queries - -The design is incomplete unless the following operator queries are supported. - -### Query Set A: Run Reconstruction - -Operators MUST be able to answer: - -- show all trace records for one run -- show all trace records for one run and node -- show only one attempt for one node -- show ordered assistant text deltas -- show visible thinking deltas when present -- show tool calls and results in order - -### Query Set B: Failure Analysis - -Operators MUST be able to answer: - -- which runs had trace capture failures -- which agents only provide final-only traces -- which attempts terminated without a terminal agent event -- which traces were partially redacted - -### Query Set C: Audit - -Operators MUST be able to answer: - -- what annotations were attached to a run -- which workflow file and workflow hash produced the trace -- which raw artifact file corresponds to this attempt - -## Verification Specification - -Task completion is not defined by code existing. It is defined by observable correctness. - -The implementation is complete only if every verification class below passes. - -## Verification Class 1: Schema Correctness - -For each supported agent family, automated tests MUST verify that canonical trace events: - -- conform to the declared schema -- contain required identity fields -- maintain monotonic `event.sequence` -- correctly classify `traceCompleteness` - -Completion criterion: - -- zero schema violations in test fixtures - -## Verification Class 2: Ordering Correctness - -Automated tests MUST verify that for one attempt: - -- event sequences are strictly monotonic -- final events occur after preceding deltas -- no duplicate sequence numbers appear - -Completion criterion: - -- deterministic ordering across repeated test runs - -## Verification Class 3: Fidelity Correctness - -Fixture-based tests MUST compare raw upstream inputs with canonical trace outputs. - -For each fixture: - -- every parseable upstream event MUST result in a canonical event or an explicit diagnostic drop event -- visible thinking content MUST remain distinguishable from assistant text -- tool call identifiers and names MUST survive normalization - -Completion criterion: - -- full fixture coverage for each agent family and capture mode supported by Smithers - -## Verification Class 4: Completeness Classification - -Tests MUST verify the semantics of: - -- `full-observed` -- `partial-observed` -- `final-only` -- `capture-failed` - -Completion criterion: - -- each classification is produced by at least one explicit test case - -## Verification Class 5: OTEL Export Correctness - -Integration tests MUST verify that canonical trace events become OTEL log records with: - -- required attributes present -- correct body shape -- correct severity mapping -- correct custom annotation export - -Completion criterion: - -- logs are queryable in the target backend by `run.id`, `workflow.path`, `node.id`, `attempt`, and `event.kind` - -## Verification Class 6: Loki Query Correctness - -In a local stack with Loki enabled, end-to-end tests MUST verify that an operator can query: - -- all records for a run -- all records for a node attempt -- only thinking deltas -- only tool execution records -- only capture errors - -Completion criterion: - -- documented query examples return expected results against test data - -## Verification Class 7: Artifact Correctness - -When artifact capture is enabled, tests MUST verify: - -- artifact references are recorded -- artifacts exist on disk or in configured storage -- artifact metadata matches actual content -- artifact creation emits corresponding canonical events - -Completion criterion: - -- no dangling artifact references - -## Verification Class 8: Redaction Correctness - -Tests MUST verify that redaction: - -- removes required secrets from canonical payloads, raw payloads, OTEL bodies, and artifacts -- leaves non-sensitive content intact -- records which rules were applied - -Completion criterion: - -- zero known secret literals leak in test fixtures - -## Verification Class 9: Failure Resilience - -Tests MUST verify behavior when: - -- collector is unavailable -- backend rejects logs -- malformed upstream JSON is encountered -- subprocess exits before terminal event -- artifact write fails - -Completion criterion: - -- capture failures are classified -- local diagnostics exist -- durable local truth remains accessible when configured - -## Verification Class 10: Cross-Signal Correlation - -Tests MUST verify that logs correlate with: - -- run lifecycle events -- metrics -- spans - -At minimum, operators MUST be able to join by: - -- `run.id` -- `node.id` -- `attempt` - -Completion criterion: - -- one documented workflow run can be traced across event log, OTEL logs, and metrics without ambiguity - -## Acceptance Criteria - -The feature is not done until all of the following are true. - -### A. Canonical Model Exists - -Smithers has a canonical agent trace schema with explicit completeness states and per-agent capability declarations. - -### B. Pi Is High Fidelity - -`PiAgent` exports structured trace records for: - -- session lifecycle -- turn lifecycle -- message lifecycle -- assistant text deltas -- visible thinking deltas -- tool execution lifecycle -- retry and compaction events - -### C. Other Agents Are Truthfully Classified - -Every agent in `src/agents/` has a declared fidelity class and unsupported event set. - -### D. OTEL Logs Pipeline Exists - -The collector and local observability stack support OTEL logs end to end. - -### E. Queries Work - -Operators can answer the required run reconstruction, failure analysis, and audit queries from the exported logs. - -### F. Verification Is Automated - -Automated tests exist for schema, ordering, fidelity, completeness, OTEL export, redaction, failure handling, and query correctness. - -## Implementation Phasing - -This section is normative for rollout order. - -### Phase 1: Canonical Model - -Implement: - -- canonical trace schema -- completeness classification -- per-agent capability declarations - -### Phase 2: Pi Fidelity - -Implement: - -- Pi raw event capture -- canonical normalization -- OTEL export -- artifact capture for session files if configured - -### Phase 3: Structured CLI Agents - -Implement: - -- Codex -- Claude Code -- Gemini -- Kimi - -Each integration MUST ship with fixture-based normalization tests before being considered complete. - -### Phase 4: SDK and Text-Only Agents - -Implement: - -- explicit partial or final-only capture -- truthful capability declarations -- OTEL export for the observable subset - -### Phase 5: Redaction and Hardening - -Implement: - -- default redaction -- export failure handling -- artifact verification -- documented local Loki queries - -## Explicit Non-Ambiguities - -The following choices are intentional. - -- Smithers MUST prefer truthful partial fidelity over fake completeness. -- Smithers MUST preserve raw event boundaries rather than collapsing everything into summaries. -- Smithers MUST keep large content in log bodies, not indexing labels. -- Smithers MUST retain a local source of truth when OTEL export fails. -- Smithers MUST separate assistant text from visible thinking. -- Smithers MUST define task completion in terms of verification evidence, not implementation effort. - -## Out of Scope for the First Implementation - -The first implementation MAY defer: - -- remote artifact storage -- cross-run session graph visualizations -- backend-specific dashboards beyond minimal verification queries -- universal reconstruction of provider-internal hidden reasoning - -If deferred, these items MUST be documented explicitly and MUST NOT be implied to exist. - -## Summary - -The required system is not “send some logs to Loki.” - -The required system is: - -- a canonical agent trace model -- explicit capability declarations per integration -- conservative capture of all observable upstream events -- durable local truth -- OTEL log export with stable correlation fields -- redaction before persistence and export -- verification that proves fidelity, completeness, and queryability - -Anything less produces observability that looks complete while remaining operationally unreliable. From c82912a65e6eaaa54673df73b10c09db0cd4160d Mon Sep 17 00:00:00 2001 From: Samuel Huber Date: Thu, 26 Mar 2026 21:08:30 +0100 Subject: [PATCH 4/7] docs(pi): add hello-world sample and brief PI usage note --- docs/integrations/pi-integration.mdx | 2 ++ examples/pi-hello-world.tsx | 31 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 examples/pi-hello-world.tsx diff --git a/docs/integrations/pi-integration.mdx b/docs/integrations/pi-integration.mdx index 4b584577..e8a30063 100644 --- a/docs/integrations/pi-integration.mdx +++ b/docs/integrations/pi-integration.mdx @@ -72,6 +72,8 @@ pi --version bun run test ``` +For a minimal end-to-end example, run `bun run cli run examples/pi-hello-world.tsx`. To choose a model, check your PI `models.json` or run `pi --list-models`. + ## Design Guidance Use `PiAgent` task nodes when: diff --git a/examples/pi-hello-world.tsx b/examples/pi-hello-world.tsx new file mode 100644 index 00000000..19938c2c --- /dev/null +++ b/examples/pi-hello-world.tsx @@ -0,0 +1,31 @@ +/** @jsxImportSource smithers-orchestrator */ +import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator"; +import { z } from "zod"; + +const HelloSchema = z.object({ + message: z.string(), +}); + +const { smithers, outputs } = createSmithers( + { + output: HelloSchema, + }, + { + dbPath: "./examples/pi-hello-world.db", + }, +); + +const pi = new PiAgent({ + provider: "openai-codex", + model: "gpt-5.4", + mode: "json", +}); + +export default smithers(() => ( + + + {`Return exactly this JSON and nothing else: +{"message":"hello world"}`} + + +)); From b40d840420e0830341622b5499307aab15e0d704 Mon Sep 17 00:00:00 2001 From: Samuel Huber Date: Thu, 26 Mar 2026 21:23:46 +0100 Subject: [PATCH 5/7] fix(pi): narrow PR to rpc completion only --- docs/integrations/pi-integration.mdx | 2 - examples/pi-hello-world.tsx | 31 ----- src/SmithersEvent.ts | 113 +---------------- src/agent-trace.ts | 178 --------------------------- src/agents/PiAgent.ts | 23 ---- src/events.ts | 21 ++-- src/index.ts | 11 +- src/observability/index.ts | 89 -------------- src/tools/context.ts | 2 - tests/observability.test.ts | 91 -------------- tests/pi-support.test.ts | 169 +++++++------------------ 11 files changed, 55 insertions(+), 675 deletions(-) delete mode 100644 examples/pi-hello-world.tsx delete mode 100644 src/agent-trace.ts diff --git a/docs/integrations/pi-integration.mdx b/docs/integrations/pi-integration.mdx index e8a30063..4b584577 100644 --- a/docs/integrations/pi-integration.mdx +++ b/docs/integrations/pi-integration.mdx @@ -72,8 +72,6 @@ pi --version bun run test ``` -For a minimal end-to-end example, run `bun run cli run examples/pi-hello-world.tsx`. To choose a model, check your PI `models.json` or run `pi --list-models`. - ## Design Guidance Use `PiAgent` task nodes when: diff --git a/examples/pi-hello-world.tsx b/examples/pi-hello-world.tsx deleted file mode 100644 index 19938c2c..00000000 --- a/examples/pi-hello-world.tsx +++ /dev/null @@ -1,31 +0,0 @@ -/** @jsxImportSource smithers-orchestrator */ -import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator"; -import { z } from "zod"; - -const HelloSchema = z.object({ - message: z.string(), -}); - -const { smithers, outputs } = createSmithers( - { - output: HelloSchema, - }, - { - dbPath: "./examples/pi-hello-world.db", - }, -); - -const pi = new PiAgent({ - provider: "openai-codex", - model: "gpt-5.4", - mode: "json", -}); - -export default smithers(() => ( - - - {`Return exactly this JSON and nothing else: -{"message":"hello world"}`} - - -)); diff --git a/src/SmithersEvent.ts b/src/SmithersEvent.ts index 29d705f8..6b25e4ff 100644 --- a/src/SmithersEvent.ts +++ b/src/SmithersEvent.ts @@ -1,113 +1,5 @@ import type { RunStatus } from "./RunStatus"; -export const CANONICAL_AGENT_TRACE_VERSION = 1 as const; - -export type AgentTraceCompleteness = - | "full-observed" - | "partial-observed" - | "final-only" - | "capture-failed"; - -export type AgentTraceCaptureMode = - | "sdk-events" - | "rpc-events" - | "cli-json-stream" - | "cli-json" - | "cli-text" - | "artifact-import"; - -export type AgentTraceEventKind = - | "session.start" - | "session.end" - | "turn.start" - | "turn.end" - | "message.start" - | "message.update" - | "message.end" - | "assistant.text.delta" - | "assistant.thinking.delta" - | "assistant.message.final" - | "tool.execution.start" - | "tool.execution.update" - | "tool.execution.end" - | "tool.result" - | "retry.start" - | "retry.end" - | "compaction.start" - | "compaction.end" - | "stderr" - | "stdout" - | "usage" - | "capture.warning" - | "capture.error" - | "artifact.created"; - -export type AgentTraceEventPhase = - | "message" - | "tool" - | "agent" - | "session" - | "turn" - | "capture" - | "artifact"; - -export const PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS = [ - "assistant.text.delta", - "tool.execution.start", - "tool.execution.update", - "tool.execution.end", -] as const satisfies readonly AgentTraceEventKind[]; - -export const PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS = [ - "session.start", - "session.end", - "turn.start", - "turn.end", - "message.start", - "message.update", - "message.end", - "assistant.thinking.delta", - "assistant.message.final", - "tool.result", - "retry.start", - "retry.end", - "compaction.start", - "compaction.end", - "usage", - "artifact.created", -] as const satisfies readonly AgentTraceEventKind[]; - -export type SmithersAgentTraceEvent = { - type: "AgentTraceEvent"; - traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION; - traceCompleteness: AgentTraceCompleteness; - unsupportedEventKinds: AgentTraceEventKind[]; - runId: string; - workflowPath?: string | null; - workflowHash?: string | null; - nodeId: string; - iteration: number; - attempt: number; - timestampMs: number; - event: { - sequence: number; - kind: AgentTraceEventKind; - phase: AgentTraceEventPhase; - }; - source: { - agentFamily: "pi"; - agentId?: string; - model?: string; - captureMode: AgentTraceCaptureMode; - rawType?: string; - observed: boolean; - }; - payload: Record | null; - raw: unknown; - redaction: { applied: boolean; ruleIds?: string[] } | null; - annotations: Record | null; -}; - export type SmithersEvent = | { type: "RunStarted"; runId: string; timestampMs: number } | { @@ -302,7 +194,4 @@ export type SmithersEvent = cacheWriteTokens?: number; reasoningTokens?: number; timestampMs: number; - } - | SmithersAgentTraceEvent; - -export type ExtendedSmithersEvent = SmithersEvent; + }; diff --git a/src/agent-trace.ts b/src/agent-trace.ts deleted file mode 100644 index fa611798..00000000 --- a/src/agent-trace.ts +++ /dev/null @@ -1,178 +0,0 @@ -import { getToolContext } from "./tools/context"; -import { nowMs } from "./utils/time"; -import { - CANONICAL_AGENT_TRACE_VERSION, - PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS, -} from "./SmithersEvent"; -import type { - AgentTraceCaptureMode, - AgentTraceEventKind, - AgentTraceEventPhase, - SmithersAgentTraceEvent, -} from "./SmithersEvent"; - -// Local, per-attempt monotonic sequence for canonical trace events -const traceSeq = new WeakMap(); - -function nextTraceSeq(ctx: object): number { - const current = traceSeq.get(ctx) ?? 0; - const next = current + 1; - traceSeq.set(ctx, next); - return next; -} - -export type CaptureMode = Extract< - AgentTraceCaptureMode, - "cli-json" | "cli-json-stream" | "rpc-events" | "cli-text" ->; - -export type AgentTraceSourceMeta = { - agentId?: string; - model?: string; -}; - -export function emitAgentTrace( - kind: AgentTraceEventKind, - phase: AgentTraceEventPhase, - payload: Record | null | undefined, - raw: unknown, - rawType: string | undefined, - captureMode: CaptureMode, - sourceMeta?: AgentTraceSourceMeta, -) { - const ctx = getToolContext(); - if (!ctx || typeof ctx.emitEvent !== "function") return; // outside workflow execution - const ts = nowMs(); - const event: SmithersAgentTraceEvent = { - type: "AgentTraceEvent", - traceVersion: CANONICAL_AGENT_TRACE_VERSION, - traceCompleteness: "partial-observed", - unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS], - runId: ctx.runId, - workflowPath: ctx.workflowPath ?? null, - workflowHash: ctx.workflowHash ?? null, - nodeId: ctx.nodeId, - iteration: ctx.iteration, - attempt: ctx.attempt, - timestampMs: ts, - event: { - sequence: nextTraceSeq(ctx), - kind, - phase, - }, - source: { - agentFamily: "pi", - agentId: sourceMeta?.agentId, - model: sourceMeta?.model, - captureMode, - rawType, - observed: true, - }, - payload: payload ?? null, - raw, - redaction: null, - annotations: null, - }; - void ctx.emitEvent(event); -} - -export function capturePiEvent( - event: any, - captureMode: CaptureMode, - sourceMeta?: AgentTraceSourceMeta, -) { - if (!event || typeof event !== "object") return; - const type = String((event as any).type ?? ""); - - // Assistant text deltas - if (type === "message_update") { - const assistant = (event as any).assistantMessageEvent; - if (assistant && assistant.type === "text_delta" && typeof assistant.delta === "string") { - emitAgentTrace( - "assistant.text.delta", - "message", - { text: assistant.delta }, - event, - "message_update.text_delta", - captureMode, - sourceMeta, - ); - return; - } - } - - // Tool lifecycle (best-effort mapping of common Pi shapes) - if (type === "tool_execution_start") { - const call = (event as any).toolCall ?? (event as any).call ?? (event as any); - emitAgentTrace( - "tool.execution.start", - "tool", - { - toolCallId: String(call.id ?? call.toolCallId ?? ""), - toolName: String(call.name ?? call.toolName ?? call.tool ?? ""), - argsPreview: call.args ?? call.arguments ?? undefined, - }, - event, - "tool_execution_start", - captureMode, - sourceMeta, - ); - return; - } - - if (type === "tool_execution_update") { - const call = (event as any).toolCall ?? (event as any).call ?? (event as any); - emitAgentTrace( - "tool.execution.update", - "tool", - { - toolCallId: String(call.id ?? call.toolCallId ?? ""), - toolName: String(call.name ?? call.toolName ?? call.tool ?? ""), - }, - event, - "tool_execution_update", - captureMode, - sourceMeta, - ); - return; - } - - if (type === "tool_execution_end") { - const call = (event as any).toolCall ?? (event as any).call ?? (event as any); - const isError = Boolean((event as any).error || (event as any).failed); - emitAgentTrace( - "tool.execution.end", - "tool", - { - toolCallId: String(call.id ?? call.toolCallId ?? ""), - toolName: String(call.name ?? call.toolName ?? call.tool ?? ""), - isError, - resultPreview: (event as any).result ?? (event as any).output ?? undefined, - }, - event, - "tool_execution_end", - captureMode, - sourceMeta, - ); - return; - } -} - -export function capturePiNdjson( - raw: string, - captureMode: CaptureMode, - sourceMeta?: AgentTraceSourceMeta, -) { - const lines = String(raw ?? "") - .split(/\r?\n/) - .map((l) => l.trim()) - .filter(Boolean); - for (const line of lines) { - try { - const parsed = JSON.parse(line); - capturePiEvent(parsed, captureMode, sourceMeta); - } catch { - // ignore malformed lines - } - } -} diff --git a/src/agents/PiAgent.ts b/src/agents/PiAgent.ts index 0f7a62a0..7d659c2b 100644 --- a/src/agents/PiAgent.ts +++ b/src/agents/PiAgent.ts @@ -14,8 +14,6 @@ import { import type { BaseCliAgentOptions, PiExtensionUiRequest, PiExtensionUiResponse } from "./BaseCliAgent"; import { getToolContext } from "../tools/context"; import { SmithersError } from "../utils/errors"; -import { capturePiNdjson } from "../agent-trace"; -import { capturePiEvent } from "../agent-trace"; export type { PiExtensionUiRequest, PiExtensionUiResponse }; @@ -188,17 +186,6 @@ export class PiAgent extends BaseCliAgent { const extractedText = mode === "json" ? (extractTextFromPiNdjson(rawText) ?? rawText) : rawText; - // Capture canonical trace events for Pi NDJSON stream (assistant deltas, tool lifecycle) - try { - if (mode === "json") { - capturePiNdjson(rawText, "cli-json", { - agentId: this.id, - model: this.opts.model ?? this.model, - }); - } - } catch { - // Best-effort capture; never fail the agent call due to trace capture. - } const output = tryParseJson(extractedText); return buildGenerateResult(extractedText, output, this.opts.model ?? "pi"); } @@ -214,16 +201,6 @@ export class PiAgent extends BaseCliAgent { maxOutputBytes: this.maxOutputBytes ?? getToolContext()?.maxOutputBytes, onStderr: options?.onStderr, onExtensionUiRequest: this.opts.onExtensionUiRequest, - onEvent: (evt) => { - try { - capturePiEvent(evt, "rpc-events", { - agentId: this.id, - model: this.opts.model ?? this.model, - }); - } catch { - /* ignore */ - } - }, }); return buildGenerateResult(rpcResult.text, rpcResult.output, this.opts.model ?? "pi", rpcResult.usage); diff --git a/src/events.ts b/src/events.ts index e83fd306..99256de6 100644 --- a/src/events.ts +++ b/src/events.ts @@ -1,12 +1,11 @@ import { EventEmitter } from "node:events"; -import { promises as fs } from "node:fs"; +import * as FileSystem from "@effect/platform/FileSystem"; import { join } from "node:path"; import { Effect } from "effect"; import type { SmithersEvent } from "./SmithersEvent"; import { fromPromise } from "./effect/interop"; import { runPromise } from "./effect/runtime"; import { trackEvent } from "./effect/metrics"; -import { isAgentTraceEvent, toPersistedAgentTraceRecord } from "./observability"; export class EventBus extends EventEmitter { private seq = 0; @@ -144,16 +143,14 @@ export class EventBus extends EventEmitter { private persistLogEffect(event: SmithersEvent) { if (!this.logDir) return Effect.void; const dir = this.logDir; - return fromPromise("persist event log", async () => { - await fs.mkdir(dir, { recursive: true }); - await fs.appendFile(join(dir, "stream.ndjson"), `${JSON.stringify(event)}\n`, "utf8"); - if (isAgentTraceEvent(event)) { - await fs.appendFile( - join(dir, "agent-trace.ndjson"), - `${JSON.stringify(toPersistedAgentTraceRecord(event))}\n`, - "utf8", - ); - } + return Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem; + yield* fs.makeDirectory(dir, { recursive: true }); + const file = join(dir, "stream.ndjson"); + const line = JSON.stringify(event) + "\n"; + const current = yield* Effect.option(fs.readFileString(file, "utf8")); + const prefix = current._tag === "Some" ? current.value : ""; + yield* fs.writeFileString(file, prefix + line); }); } } diff --git a/src/index.ts b/src/index.ts index 0bddc29a..893f9ace 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,13 +12,7 @@ export type { SchemaRegistryEntry } from "./SchemaRegistryEntry"; export type { SmithersWorkflow } from "./SmithersWorkflow"; export type { SmithersCtx } from "./SmithersCtx"; export type { OutputAccessor, InferRow, InferOutputEntry } from "./OutputAccessor"; -export type { - AgentTraceCaptureMode, - AgentTraceCompleteness, - AgentTraceEventKind, - SmithersAgentTraceEvent, - SmithersEvent, -} from "./SmithersEvent"; +export type { SmithersEvent } from "./SmithersEvent"; export type { SmithersError } from "./SmithersError"; export { SmithersError as SmithersErrorInstance, isSmithersError, errorToJson } from "./utils/errors"; export type { SmithersErrorCode } from "./utils/errors"; @@ -105,13 +99,10 @@ export type { ServerOptions } from "./server/index"; // Observability export { SmithersObservability, - PI_AGENT_TRACE_CAPABILITY_PROFILE, createSmithersObservabilityLayer, createSmithersOtelLayer, createSmithersRuntimeLayer, - isAgentTraceEvent, smithersMetrics, - toPersistedAgentTraceRecord, trackSmithersEvent, activeNodes, activeRuns, diff --git a/src/observability/index.ts b/src/observability/index.ts index 0439b524..b29835ff 100644 --- a/src/observability/index.ts +++ b/src/observability/index.ts @@ -65,17 +65,6 @@ import { updateProcessMetrics, vcsDuration, } from "../effect/metrics"; -import { - CANONICAL_AGENT_TRACE_VERSION, - PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS, - PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS, -} from "../SmithersEvent"; -import type { - AgentTraceCaptureMode, - AgentTraceCompleteness, - AgentTraceEventKind, - SmithersAgentTraceEvent, -} from "../SmithersEvent"; export type SmithersLogFormat = "json" | "pretty" | "string" | "logfmt"; @@ -112,84 +101,6 @@ export class SmithersObservability extends Context.Tag("SmithersObservability")< SmithersObservabilityService >() {} -export type AgentTraceCapabilityProfile = { - readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION; - readonly agentFamily: "pi"; - readonly captureModes: readonly AgentTraceCaptureMode[]; - readonly traceCompleteness: AgentTraceCompleteness; - readonly supportedEventKinds: readonly AgentTraceEventKind[]; - readonly unsupportedEventKinds: readonly AgentTraceEventKind[]; -}; - -export type PersistedAgentTraceRecord = { - readonly traceVersion: typeof CANONICAL_AGENT_TRACE_VERSION; - readonly traceCompleteness: AgentTraceCompleteness; - readonly unsupportedEventKinds: AgentTraceEventKind[]; - readonly runId: string; - readonly workflowPath: string | null; - readonly workflowHash: string | null; - readonly nodeId: string; - readonly iteration: number; - readonly attempt: number; - readonly timestampMs: number; - readonly eventSequence: number; - readonly eventKind: AgentTraceEventKind; - readonly eventPhase: SmithersAgentTraceEvent["event"]["phase"]; - readonly agentFamily: SmithersAgentTraceEvent["source"]["agentFamily"]; - readonly agentId: string | null; - readonly agentModel: string | null; - readonly captureMode: AgentTraceCaptureMode; - readonly rawType: string | null; - readonly observed: boolean; - readonly payload: Record | null; - readonly raw: unknown; - readonly redaction: SmithersAgentTraceEvent["redaction"]; - readonly annotations: SmithersAgentTraceEvent["annotations"]; -}; - -export const PI_AGENT_TRACE_CAPABILITY_PROFILE: AgentTraceCapabilityProfile = { - traceVersion: CANONICAL_AGENT_TRACE_VERSION, - agentFamily: "pi", - captureModes: ["cli-json", "rpc-events"], - traceCompleteness: "partial-observed", - supportedEventKinds: [...PI_AGENT_TRACE_SUPPORTED_EVENT_KINDS], - unsupportedEventKinds: [...PI_AGENT_TRACE_UNSUPPORTED_EVENT_KINDS], -}; - -export function isAgentTraceEvent(event: unknown): event is SmithersAgentTraceEvent { - return !!event && typeof event === "object" && (event as { type?: unknown }).type === "AgentTraceEvent"; -} - -export function toPersistedAgentTraceRecord( - event: SmithersAgentTraceEvent, -): PersistedAgentTraceRecord { - return { - traceVersion: event.traceVersion, - traceCompleteness: event.traceCompleteness, - unsupportedEventKinds: [...event.unsupportedEventKinds], - runId: event.runId, - workflowPath: event.workflowPath ?? null, - workflowHash: event.workflowHash ?? null, - nodeId: event.nodeId, - iteration: event.iteration, - attempt: event.attempt, - timestampMs: event.timestampMs, - eventSequence: event.event.sequence, - eventKind: event.event.kind, - eventPhase: event.event.phase, - agentFamily: event.source.agentFamily, - agentId: event.source.agentId ?? null, - agentModel: event.source.model ?? null, - captureMode: event.source.captureMode, - rawType: event.source.rawType ?? null, - observed: event.source.observed, - payload: event.payload ?? null, - raw: event.raw, - redaction: event.redaction, - annotations: event.annotations, - }; -} - export const prometheusContentType = "text/plain; version=0.0.4; charset=utf-8"; diff --git a/src/tools/context.ts b/src/tools/context.ts index cd202db0..c585790c 100644 --- a/src/tools/context.ts +++ b/src/tools/context.ts @@ -8,8 +8,6 @@ export type ToolContext = { nodeId: string; iteration: number; attempt: number; - workflowPath?: string | null; - workflowHash?: string | null; rootDir: string; allowNetwork: boolean; maxOutputBytes: number; diff --git a/tests/observability.test.ts b/tests/observability.test.ts index 5cadf2d5..0b80a2f1 100644 --- a/tests/observability.test.ts +++ b/tests/observability.test.ts @@ -1,14 +1,11 @@ import { describe, expect, test } from "bun:test"; import { Metric } from "effect"; import { - PI_AGENT_TRACE_CAPABILITY_PROFILE, httpRequestDuration, renderPrometheusMetrics, runsTotal, - toPersistedAgentTraceRecord, } from "../src/observability"; import { runPromise } from "../src/effect/runtime"; -import type { SmithersAgentTraceEvent } from "../src/SmithersEvent"; describe("Prometheus metrics", () => { test("renders built-in Smithers metrics in Prometheus exposition format", async () => { @@ -26,91 +23,3 @@ describe("Prometheus metrics", () => { expect(output).toContain("smithers_http_request_duration_ms_count"); }); }); - -describe("agent trace observability", () => { - test("declares the implemented Pi trace slice truthfully", () => { - expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceVersion).toBe(1); - expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.agentFamily).toBe("pi"); - expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.traceCompleteness).toBe("partial-observed"); - expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.supportedEventKinds).toEqual([ - "assistant.text.delta", - "tool.execution.start", - "tool.execution.update", - "tool.execution.end", - ]); - expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.unsupportedEventKinds).toContain( - "assistant.thinking.delta", - ); - expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("cli-json"); - expect(PI_AGENT_TRACE_CAPABILITY_PROFILE.captureModes).toContain("rpc-events"); - }); - - test("flattens canonical trace events into queryable persisted records", () => { - const event: SmithersAgentTraceEvent = { - type: "AgentTraceEvent", - traceVersion: 1, - traceCompleteness: "partial-observed", - unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"], - runId: "run-1", - workflowPath: "/tmp/workflow.tsx", - workflowHash: "workflow-hash", - nodeId: "node-a", - iteration: 2, - attempt: 3, - timestampMs: 123, - event: { - sequence: 4, - kind: "tool.execution.end", - phase: "tool", - }, - source: { - agentFamily: "pi", - agentId: "pi-agent-id", - model: "gpt-5.2-codex", - captureMode: "rpc-events", - rawType: "tool_execution_end", - observed: true, - }, - payload: { - toolCallId: "tool-1", - toolName: "read", - isError: false, - }, - raw: { type: "tool_execution_end" }, - redaction: null, - annotations: { "custom.test": true }, - }; - - const record = toPersistedAgentTraceRecord(event); - - expect(record).toEqual({ - traceVersion: 1, - traceCompleteness: "partial-observed", - unsupportedEventKinds: ["assistant.thinking.delta", "assistant.message.final"], - runId: "run-1", - workflowPath: "/tmp/workflow.tsx", - workflowHash: "workflow-hash", - nodeId: "node-a", - iteration: 2, - attempt: 3, - timestampMs: 123, - eventSequence: 4, - eventKind: "tool.execution.end", - eventPhase: "tool", - agentFamily: "pi", - agentId: "pi-agent-id", - agentModel: "gpt-5.2-codex", - captureMode: "rpc-events", - rawType: "tool_execution_end", - observed: true, - payload: { - toolCallId: "tool-1", - toolName: "read", - isError: false, - }, - raw: { type: "tool_execution_end" }, - redaction: null, - annotations: { "custom.test": true }, - }); - }); -}); diff --git a/tests/pi-support.test.ts b/tests/pi-support.test.ts index 170d16cf..a7a89d81 100644 --- a/tests/pi-support.test.ts +++ b/tests/pi-support.test.ts @@ -3,9 +3,6 @@ import { afterEach, describe, expect, test } from "bun:test"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { PiAgent } from "../src/agents"; - import { EventBus } from "../src/events"; - import { runWithToolContext } from "../src/tools/context"; - import type { SmithersAgentTraceEvent } from "../src/SmithersEvent"; const originalPath = process.env.PATH ?? ""; @@ -63,8 +60,7 @@ import { afterEach, describe, expect, test } from "bun:test"; thinking: "low", verbose: true, env: { PATH: process.env.PATH! }, - }); - + }); const result = await agent.generate({ messages: [ @@ -187,6 +183,48 @@ import { afterEach, describe, expect, test } from "bun:test"; } }); + test("PiAgent RPC mode waits past tool-use turns for the final assistant answer", async () => { + const fake = await makeFakePi(` + let buffer = ""; + process.stdin.on("data", (chunk) => { + buffer += chunk.toString("utf8"); + const lines = buffer.split(/\\r?\\n/); + buffer = lines.pop(); + for (const line of lines) { + if (!line.trim()) continue; + const msg = JSON.parse(line); + if (msg.type === "prompt") { + process.stdout.write(JSON.stringify({ type: "response", command: "prompt", success: true, id: msg.id }) + "\\n"); + process.stdout.write(JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: "Thinking" } }) + "\\n"); + process.stdout.write(JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Tool turn" }], stopReason: "toolUse" } }) + "\\n"); + setTimeout(() => { + process.stdout.write(JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: " final answer" } }) + "\\n"); + process.stdout.write(JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Final answer" }], stopReason: "stop" } }) + "\\n"); + }, 20); + } + } + }); + `); + + try { + process.env.PATH = `${fake.dir}:${originalPath}`; + + const agent = new PiAgent({ + mode: "rpc", + model: "gpt-4o-mini", + env: { PATH: process.env.PATH! }, + }); + + const result = await agent.generate({ + messages: [{ role: "user", content: "Use a tool and then answer" }], + }); + + expect(result.text).toBe("Final answer"); + } finally { + await rm(fake.dir, { recursive: true, force: true }); + } + }); + test("PiAgent RPC mode handles extension UI requests", async () => { const argsFileDir = await mkdtemp(join(tmpdir(), "smithers-pi-rpc-ui-")); const argsFile = join(argsFileDir, "prompt.json"); @@ -313,8 +351,7 @@ import { afterEach, describe, expect, test } from "bun:test"; mode: "json", model: "test-model", env: { PATH: process.env.PATH! }, - }); - + }); const result = await agent.generate({ messages: [{ role: "user", content: "Hello" }], @@ -329,124 +366,6 @@ import { afterEach, describe, expect, test } from "bun:test"; } }); - test("PiAgent json mode emits canonical trace events and persists them", async () => { - // Fake Pi emits NDJSON with assistant text deltas and a tool lifecycle - const fake = await makeFakePi(` -const lines = [ - JSON.stringify({ type: "session", version: 3, id: "sess-1" }), - JSON.stringify({ type: "agent_start" }), - JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: "Hello" } }), - JSON.stringify({ type: "message_update", assistantMessageEvent: { type: "text_delta", delta: ", world" } }), - JSON.stringify({ type: "tool_execution_start", toolCall: { id: "t1", name: "read", args: { path: "README.md" } } }), - JSON.stringify({ type: "tool_execution_end", toolCall: { id: "t1", name: "read" }, result: { ok: true } }), - JSON.stringify({ type: "turn_end", message: { role: "assistant", content: [{ type: "text", text: "Hello, world" }], stopReason: "stop" } }) -]; -process.stdout.write(lines.join("\\n") + "\\n"); -`); - - const memoryEvents: { seq: number; row: any }[] = []; - const db = { - insertEventWithNextSeq: ({ runId, timestampMs, type, payloadJson }: any) => { - const seq = (memoryEvents.length > 0 ? memoryEvents[memoryEvents.length - 1].seq : -1) + 1; - memoryEvents.push({ seq, row: { runId, timestampMs, type, payloadJson } }); - return Promise.resolve(seq); - }, - } as any; - - const logDir = await mkdtemp(join(tmpdir(), "smithers-agent-trace-")); - const bus = new EventBus({ db, logDir }); - - try { - process.env.PATH = `${fake.dir}:${originalPath}`; - const agent = new PiAgent({ mode: "json", model: "pi-test-model", env: { PATH: process.env.PATH! } }); - - const captured: SmithersAgentTraceEvent[] = []; - - await runWithToolContext( - { - db: db as any, - runId: "run-1", - nodeId: "node-A", - iteration: 1, - attempt: 1, - workflowPath: "/tmp/workflows/pi-workflow.tsx", - workflowHash: "workflow-hash-1", - rootDir: process.cwd(), - allowNetwork: true, - maxOutputBytes: 200_000, - timeoutMs: 30_000, - seq: 0, - emitEvent: (e: any) => { - if (e && e.type === "AgentTraceEvent") { - captured.push(e as SmithersAgentTraceEvent); - } - return bus.emitEventQueued(e as any); - }, - }, - async () => { - const result = await agent.generate({ messages: [{ role: "user", content: "Ping" }] }); - expect(result.text).toContain("Hello, world"); - }, - ); - await bus.flush(); - - const sequences = captured.map((e) => e.event.sequence); - expect(sequences).toEqual([1, 2, 3, 4]); - - // We should have assistant deltas and tool lifecycle mapped - const kinds = captured.map((e) => e.event.kind); - expect(kinds).toEqual([ - "assistant.text.delta", - "assistant.text.delta", - "tool.execution.start", - "tool.execution.end", - ]); - - // Correlation and truthfulness fields present - for (const e of captured) { - expect(e.traceVersion).toBe(1); - expect(e.traceCompleteness).toBe("partial-observed"); - expect(e.unsupportedEventKinds).toContain("assistant.thinking.delta"); - expect(e.runId).toBe("run-1"); - expect(e.workflowPath).toBe("/tmp/workflows/pi-workflow.tsx"); - expect(e.workflowHash).toBe("workflow-hash-1"); - expect(e.nodeId).toBe("node-A"); - expect(e.iteration).toBe(1); - expect(e.attempt).toBe(1); - expect(e.source.agentFamily).toBe("pi"); - expect(e.source.agentId).toBe(agent.id); - expect(e.source.model).toBe("pi-test-model"); - expect(e.source.captureMode).toBe("cli-json"); - } - - // Persisted to DB rows as durable event entries - const persistedTraceRows = memoryEvents.filter((r) => r.row.type === "AgentTraceEvent"); - expect(persistedTraceRows).toHaveLength(captured.length); - expect( - persistedTraceRows.map((row) => JSON.parse(row.row.payloadJson).event.sequence), - ).toEqual([1, 2, 3, 4]); - - // Persisted to a dedicated, flattened local trace log for later querying/export. - const persistedTraceLog = await readFile(join(logDir, "agent-trace.ndjson"), "utf8"); - const persistedTraceRecords = persistedTraceLog - .trim() - .split(/\r?\n/) - .filter(Boolean) - .map((line) => JSON.parse(line) as Record); - expect(persistedTraceRecords).toHaveLength(4); - expect(persistedTraceRecords.map((record) => record.eventKind)).toEqual(kinds); - expect(persistedTraceRecords[0]?.traceCompleteness).toBe("partial-observed"); - expect(persistedTraceRecords[0]?.unsupportedEventKinds).toContain("assistant.thinking.delta"); - expect(persistedTraceRecords[0]?.runId).toBe("run-1"); - expect(persistedTraceRecords[0]?.nodeId).toBe("node-A"); - expect(persistedTraceRecords[0]?.attempt).toBe(1); - expect(persistedTraceRecords[0]?.captureMode).toBe("cli-json"); - } finally { - await rm(fake.dir, { recursive: true, force: true }); - await rm(logDir, { recursive: true, force: true }); - } - }); - test("PiAgent json mode extracts JSON from text content in turn_end", async () => { // Simulates pi output where the agent returns JSON in the text content const fake = await makeFakePi(` From ea17d51fef476663d5139cfc5fa4df1915bd40b2 Mon Sep 17 00:00:00 2001 From: Samuel Huber Date: Thu, 26 Mar 2026 21:27:09 +0100 Subject: [PATCH 6/7] docs(pi): add hello-world sample and brief PI usage note --- docs/integrations/pi-integration.mdx | 2 ++ examples/pi-hello-world.tsx | 31 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 examples/pi-hello-world.tsx diff --git a/docs/integrations/pi-integration.mdx b/docs/integrations/pi-integration.mdx index 4b584577..e8a30063 100644 --- a/docs/integrations/pi-integration.mdx +++ b/docs/integrations/pi-integration.mdx @@ -72,6 +72,8 @@ pi --version bun run test ``` +For a minimal end-to-end example, run `bun run cli run examples/pi-hello-world.tsx`. To choose a model, check your PI `models.json` or run `pi --list-models`. + ## Design Guidance Use `PiAgent` task nodes when: diff --git a/examples/pi-hello-world.tsx b/examples/pi-hello-world.tsx new file mode 100644 index 00000000..19938c2c --- /dev/null +++ b/examples/pi-hello-world.tsx @@ -0,0 +1,31 @@ +/** @jsxImportSource smithers-orchestrator */ +import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator"; +import { z } from "zod"; + +const HelloSchema = z.object({ + message: z.string(), +}); + +const { smithers, outputs } = createSmithers( + { + output: HelloSchema, + }, + { + dbPath: "./examples/pi-hello-world.db", + }, +); + +const pi = new PiAgent({ + provider: "openai-codex", + model: "gpt-5.4", + mode: "json", +}); + +export default smithers(() => ( + + + {`Return exactly this JSON and nothing else: +{"message":"hello world"}`} + + +)); From cf4dee9cc1e57de27f09002887bff618a789e8b7 Mon Sep 17 00:00:00 2001 From: Samuel Huber Date: Thu, 26 Mar 2026 21:27:46 +0100 Subject: [PATCH 7/7] examples(pi): add tool-using workflow sample --- examples/pi-tools-input.txt | 3 +++ examples/pi-tools-workflow.tsx | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 examples/pi-tools-input.txt create mode 100644 examples/pi-tools-workflow.tsx diff --git a/examples/pi-tools-input.txt b/examples/pi-tools-input.txt new file mode 100644 index 00000000..4f8a719e --- /dev/null +++ b/examples/pi-tools-input.txt @@ -0,0 +1,3 @@ +Smithers PI tools sample +Unique phrase: saffron-orbit-lantern +This file is here so the PI agent has to read something real from disk. diff --git a/examples/pi-tools-workflow.tsx b/examples/pi-tools-workflow.tsx new file mode 100644 index 00000000..ad225b82 --- /dev/null +++ b/examples/pi-tools-workflow.tsx @@ -0,0 +1,42 @@ +/** @jsxImportSource smithers-orchestrator */ +import { createSmithers, Task, Workflow, PiAgent } from "smithers-orchestrator"; +import { z } from "zod"; + +const OutputSchema = z.object({ + phrase: z.string().regex(/^saffron-orbit-lantern$/), + lineCount: z.number().int().min(3).max(3), + cwdBasename: z.string().regex(/^examples$/), + summary: z.string(), +}); + +const { smithers, outputs } = createSmithers( + { + output: OutputSchema, + }, + { + dbPath: "./examples/pi-tools-workflow.db", + }, +); + +const pi = new PiAgent({ + provider: "openai-codex", + model: "gpt-5.4", + mode: "rpc", + tools: ["read", "bash"], +}); + +export default smithers(() => ( + + + {`Use the read tool to inspect ./pi-tools-input.txt and use the bash tool to determine the basename of the current working directory. + +Then return exactly this JSON and nothing else: +{ + "phrase": "the unique phrase from the file", + "lineCount": 3, + "cwdBasename": "the basename of the current working directory", + "summary": "one short sentence confirming what you found" +}`} + + +));