From 9c273e809ee254df389e54460bbf1c04e93bf287 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 14:13:58 -0700
Subject: [PATCH 01/14] feat(schema): add RouteDef.on_error and AgentDef.raises
 for error routing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1, step 1 of on_error routing (per design brief in
docs/projects/error-routing/on-error-routing.brainstorm.md).

Adds two opt-in schema fields and a small shared module for the
constants both schema validation and the engine error path will use:

- src/conductor/error_kinds.py
  - KIND_PATTERN: dotted lowercase identifier (at least one dot).
  - RESERVED_KIND_PREFIXES: internal., provider., subworkflow., retry.
    (the runtime owns these namespaces; workflow authors cannot declare
    kinds under them).
  - RESERVED_ON_ERROR_ALLOWLIST: the closed set of runtime-synthesized
    kinds that ARE legal to match in on_error even though they're not
    legal to declare in raises (internal.script_error,
    internal.schema_violation, internal.undeclared_kind).
  - is_reserved_prefix(kind) helper.

- RouteDef.on_error: bool | str | list[str] | None
  - None = success route (existing behavior).
  - True = catch-all error route.
  - str  = single-kind error route.
  - list[str] = multi-kind error route.
  - False is rejected (no semantic meaning).
  - Kind format enforced via KIND_PATTERN.
  - 'before'-mode validator so Pydantic's bool/str coercion doesn't
    swallow the discriminator.

- AgentDef.raises: list[str] | None
  - Optional declaration of kinds the node may raise.
  - Powers a load-time lint (cross-checked against routes' on_error in
    the validator, landing in a follow-up commit) and a runtime
    undeclared-kind check (will land with the engine-wiring commit).
  - Reserved prefixes rejected so authors can't claim runtime
    namespaces; duplicates rejected; format enforced via KIND_PATTERN.

Tests:
- tests/test_error_kinds.py — 24 cases covering pattern + prefix +
  allowlist invariants (allowlist entries must themselves be reserved).
- tests/test_config/test_schema.py::TestRouteDefOnError — 14 cases.
- tests/test_config/test_schema.py::TestAgentDefRaises — 10 cases.

No semantics change for existing workflows: both fields default to None
and the engine doesn't observe them yet (wiring lands in subsequent
commits).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/config/schema.py   | 131 ++++++++++++++++++++++++++-
 src/conductor/error_kinds.py     |  57 ++++++++++++
 tests/test_config/test_schema.py | 149 +++++++++++++++++++++++++++++++
 tests/test_error_kinds.py        | 100 +++++++++++++++++++++
 4 files changed, 436 insertions(+), 1 deletion(-)
 create mode 100644 src/conductor/error_kinds.py
 create mode 100644 tests/test_error_kinds.py

diff --git a/src/conductor/config/schema.py b/src/conductor/config/schema.py
index 43e9d65..0a8ab7a 100644
--- a/src/conductor/config/schema.py
+++ b/src/conductor/config/schema.py
@@ -10,6 +10,7 @@
 
 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
+from conductor.error_kinds import KIND_PATTERN, RESERVED_KIND_PREFIXES, is_reserved_prefix
 from conductor.providers.reasoning import ReasoningEffort
 
 
@@ -86,7 +87,19 @@ def validate_type_specific_fields(self) -> OutputField:
 
 
 class RouteDef(BaseModel):
-    """Definition for a routing rule."""
+    """Definition for a routing rule.
+
+    Routes split into two buckets at evaluation time:
+
+    - **Success routes** (``on_error`` is ``None``, the default): matched
+      only when the producing node completed successfully.
+    - **Error routes** (``on_error`` is set): matched only when the
+      producing node raised a typed error envelope.
+
+    Within each bucket, routes are evaluated in declaration order; the
+    first ``when`` to evaluate truthy wins. A route with no ``when``
+    always matches within its bucket.
+    """
 
     model_config = ConfigDict(extra="forbid")
 
@@ -99,6 +112,24 @@ class RouteDef(BaseModel):
     output: dict[str, str] | None = None
     """Optional output transformation (template expressions)."""
 
+    on_error: bool | str | list[str] | None = None
+    """Marks this route as an error route and selects which error kinds it matches.
+
+    Accepted forms:
+
+    - ``None`` (default): success route. Matches only when the producing
+      node completed successfully.
+    - ``True``: catch-all error route. Matches any raised kind.
+    - ``str``: matches that single dotted-lowercase kind exactly
+      (e.g. ``external.git.fetch_failed``).
+    - ``list[str]``: matches if the raised kind is any entry in the list.
+
+    ``False`` is rejected: omit the field for a success route.
+
+    The kind format is enforced by :data:`conductor.error_kinds.KIND_PATTERN`.
+    See ``docs/projects/error-routing/on-error-routing.brainstorm.md``.
+    """
+
     @field_validator("to")
     @classmethod
     def validate_target(cls, v: str) -> str:
@@ -107,6 +138,47 @@ def validate_target(cls, v: str) -> str:
             raise ValueError("Route target cannot be empty")
         return v
 
+    @field_validator("on_error", mode="before")
+    @classmethod
+    def validate_on_error(cls, v: Any) -> bool | str | list[str] | None:
+        """Validate the on_error discriminator and kind format.
+
+        Runs in ``before`` mode so we see the raw YAML-decoded value
+        and aren't fighting Pydantic's bool/str coercion.
+        """
+        if v is None:
+            return None
+        if isinstance(v, bool):
+            if v is False:
+                raise ValueError(
+                    "on_error: false is not allowed; omit the field for a success route"
+                )
+            return True
+        if isinstance(v, str):
+            if not v:
+                raise ValueError("on_error kind cannot be empty")
+            if not KIND_PATTERN.match(v):
+                raise ValueError(
+                    f"on_error kind '{v}' must be a dotted lowercase identifier "
+                    "(e.g. 'external.git.fetch_failed')"
+                )
+            return v
+        if isinstance(v, list):
+            if not v:
+                raise ValueError("on_error kind list cannot be empty")
+            for entry in v:
+                if not isinstance(entry, str) or not entry:
+                    raise ValueError(
+                        f"on_error kind list entries must be non-empty strings, got {entry!r}"
+                    )
+                if not KIND_PATTERN.match(entry):
+                    raise ValueError(
+                        f"on_error kind '{entry}' must be a dotted lowercase identifier "
+                        "(e.g. 'external.git.fetch_failed')"
+                    )
+            return v
+        raise ValueError(f"on_error must be bool, str, or list[str]; got {type(v).__name__}")
+
 
 class ParallelGroup(BaseModel):
     """Definition for a parallel agent execution group."""
@@ -508,6 +580,30 @@ class AgentDef(BaseModel):
     routes: list[RouteDef] = Field(default_factory=list)
     """Routing rules evaluated in order after execution."""
 
+    raises: list[str] | None = None
+    """Optional declaration of error kinds this node may raise.
+
+    When present, two checks fire:
+
+    1. **Load-time lint** (in :mod:`conductor.config.validator`): every
+       ``on_error`` kind on routes leaving this node must either appear
+       in ``raises``, be in the reserved on_error allowlist, or be a
+       catch-all (``on_error: true``). Typos surface at validate time.
+    2. **Runtime undeclared-kind check**: if the node raises a kind not
+       in this list, the runtime rewrites the envelope's ``kind`` to
+       ``internal.undeclared_kind`` and preserves the original kind
+       under ``details.original_kind``. This makes contract drift
+       routable and visible.
+
+    Entries must match :data:`conductor.error_kinds.KIND_PATTERN` and
+    must NOT use a reserved prefix (``internal.``, ``provider.``,
+    ``subworkflow.``, ``retry.``) — those namespaces are owned by the
+    runtime.
+
+    Strictly optional: existing workflows can adopt ``on_error`` routing
+    without declaring ``raises`` and skip the contract enforcement.
+    """
+
     options: list[GateOption] | None = None
     """Options for human_gate type agents."""
 
@@ -679,6 +775,39 @@ def validate_timeout(cls, v: int | None) -> int | None:
             raise ValueError("timeout must be a positive integer")
         return v
 
+    @field_validator("raises")
+    @classmethod
+    def validate_raises(cls, v: list[str] | None) -> list[str] | None:
+        """Validate the optional ``raises`` declaration.
+
+        Each entry must match :data:`conductor.error_kinds.KIND_PATTERN`
+        and must NOT use a reserved prefix — those prefixes name
+        runtime-synthesized kinds, not author-classified failures.
+        """
+        if v is None:
+            return None
+        if not v:
+            raise ValueError("raises list cannot be empty; omit the field instead of declaring []")
+        seen: set[str] = set()
+        for kind in v:
+            if not isinstance(kind, str) or not kind:
+                raise ValueError(f"raises entries must be non-empty strings, got {kind!r}")
+            if not KIND_PATTERN.match(kind):
+                raise ValueError(
+                    f"raises kind '{kind}' must be a dotted lowercase identifier "
+                    "(e.g. 'external.git.fetch_failed')"
+                )
+            if is_reserved_prefix(kind):
+                raise ValueError(
+                    f"raises kind '{kind}' uses a reserved prefix; reserved prefixes "
+                    f"({', '.join(RESERVED_KIND_PREFIXES)}) name runtime-synthesized "
+                    "kinds and cannot be declared by workflow authors"
+                )
+            if kind in seen:
+                raise ValueError(f"raises kind '{kind}' is declared more than once")
+            seen.add(kind)
+        return v
+
     @model_validator(mode="after")
     def validate_agent_type(self) -> AgentDef:
         """Ensure agent has required fields for its type."""
diff --git a/src/conductor/error_kinds.py b/src/conductor/error_kinds.py
new file mode 100644
index 0000000..f3f36f2
--- /dev/null
+++ b/src/conductor/error_kinds.py
@@ -0,0 +1,57 @@
+"""Error kind constants and validation helpers for typed error envelopes.
+
+The "kind" of an error is a flat dotted lowercase identifier (e.g.
+``external.git.fetch_failed``) authored by workflow authors at the
+failure site. The runtime never infers a kind; it carries the authored
+kind verbatim or, in a small set of well-defined situations, synthesizes
+a reserved kind.
+
+See ``docs/projects/error-routing/on-error-routing.brainstorm.md`` for
+the full design.
+"""
+
+from __future__ import annotations
+
+import re
+
+KIND_PATTERN = re.compile(r"^[a-z_][a-z0-9_]*(\.[a-z_][a-z0-9_]*)+$")
+"""Pattern for an error kind: at least one dot, lowercase segments only.
+
+Examples that match: ``external.git.fetch_failed``, ``policy.budget_exceeded``.
+Examples that do NOT match: ``oops`` (no dot), ``Git.Fetch`` (uppercase),
+``.leading_dot``, ``trailing_dot.``.
+"""
+
+RESERVED_KIND_PREFIXES: tuple[str, ...] = (
+    "internal.",
+    "provider.",
+    "subworkflow.",
+    "retry.",
+)
+"""Prefixes the runtime reserves for synthetic envelopes.
+
+Workflow authors cannot declare a kind under these prefixes in their
+``raises:`` list. The runtime may emit kinds under these prefixes
+(e.g. ``internal.script_error``, ``internal.schema_violation``) when
+classifying its own failures.
+"""
+
+RESERVED_ON_ERROR_ALLOWLIST: frozenset[str] = frozenset(
+    {
+        "internal.script_error",
+        "internal.schema_violation",
+        "internal.undeclared_kind",
+    }
+)
+"""Reserved kinds that are legal to match in ``on_error`` even though
+they cannot be declared in ``raises``.
+
+This is the closed set of runtime-synthesized kinds in Phase 1. Phase 2
+will add ``subworkflow.*`` propagation kinds and Phase 3 will add
+``provider.exhausted``.
+"""
+
+
+def is_reserved_prefix(kind: str) -> bool:
+    """Return True if ``kind`` begins with a reserved prefix."""
+    return any(kind.startswith(prefix) for prefix in RESERVED_KIND_PREFIXES)
diff --git a/tests/test_config/test_schema.py b/tests/test_config/test_schema.py
index bba4994..3b57004 100644
--- a/tests/test_config/test_schema.py
+++ b/tests/test_config/test_schema.py
@@ -141,6 +141,89 @@ def test_empty_target_raises(self) -> None:
             RouteDef(to="")
 
 
+class TestRouteDefOnError:
+    """Tests for the ``on_error`` field added in Phase 1 error routing."""
+
+    def test_default_is_none_success_route(self) -> None:
+        """Without on_error a route is a success route."""
+        route = RouteDef(to="next")
+        assert route.on_error is None
+
+    def test_on_error_true_is_catch_all(self) -> None:
+        """``on_error: true`` marks a catch-all error route."""
+        route = RouteDef(to="handler", on_error=True)
+        assert route.on_error is True
+
+    def test_on_error_false_is_rejected(self) -> None:
+        """``on_error: false`` has no semantic meaning."""
+        with pytest.raises(ValidationError) as exc:
+            RouteDef(to="x", on_error=False)
+        assert "on_error: false" in str(exc.value)
+
+    def test_on_error_single_kind(self) -> None:
+        """A single dotted lowercase kind is accepted."""
+        route = RouteDef(to="handler", on_error="external.git.fetch_failed")
+        assert route.on_error == "external.git.fetch_failed"
+
+    def test_on_error_kind_list(self) -> None:
+        """A non-empty list of dotted lowercase kinds is accepted."""
+        route = RouteDef(to="handler", on_error=["external.git.fetch_failed", "policy.budget"])
+        assert route.on_error == ["external.git.fetch_failed", "policy.budget"]
+
+    def test_on_error_empty_string_rejected(self) -> None:
+        """Empty string is not a kind."""
+        with pytest.raises(ValidationError):
+            RouteDef(to="x", on_error="")
+
+    def test_on_error_uppercase_rejected(self) -> None:
+        """Kinds are lowercase."""
+        with pytest.raises(ValidationError) as exc:
+            RouteDef(to="x", on_error="External.Git.Fetch")
+        assert "dotted lowercase" in str(exc.value)
+
+    def test_on_error_undotted_rejected(self) -> None:
+        """Kinds must contain at least one dot — flat identifiers are not kinds."""
+        with pytest.raises(ValidationError):
+            RouteDef(to="x", on_error="oops")
+
+    def test_on_error_leading_dot_rejected(self) -> None:
+        """Leading dot is not a valid identifier."""
+        with pytest.raises(ValidationError):
+            RouteDef(to="x", on_error=".external.git")
+
+    def test_on_error_empty_list_rejected(self) -> None:
+        """Empty list is rejected — use catch-all ``true`` instead."""
+        with pytest.raises(ValidationError) as exc:
+            RouteDef(to="x", on_error=[])
+        assert "cannot be empty" in str(exc.value)
+
+    def test_on_error_list_with_bad_entry_rejected(self) -> None:
+        """One bad entry rejects the whole list."""
+        with pytest.raises(ValidationError):
+            RouteDef(to="x", on_error=["external.git.fetch_failed", "BAD"])
+
+    def test_on_error_wrong_type_rejected(self) -> None:
+        """on_error: 5 makes no sense."""
+        with pytest.raises(ValidationError):
+            RouteDef(to="x", on_error=5)
+
+    def test_on_error_reserved_kind_allowed(self) -> None:
+        """Reserved kinds (internal.*) ARE legal as on_error matchers
+        even though they're not legal in raises."""
+        route = RouteDef(to="handler", on_error="internal.schema_violation")
+        assert route.on_error == "internal.schema_violation"
+
+    def test_on_error_with_when_clause(self) -> None:
+        """``when:`` composes with on_error — both apply within the bucket."""
+        route = RouteDef(
+            to="handler",
+            on_error="external.git.fetch_failed",
+            when="{{ retry_count < 3 }}",
+        )
+        assert route.on_error == "external.git.fetch_failed"
+        assert route.when == "{{ retry_count < 3 }}"
+
+
 class TestGateOption:
     """Tests for GateOption model."""
 
@@ -311,6 +394,72 @@ def test_human_gate_without_prompt_raises(self) -> None:
         assert "prompt" in str(exc_info.value)
 
 
+class TestAgentDefRaises:
+    """Tests for the ``raises`` declaration added in Phase 1 error routing."""
+
+    def test_default_is_none(self) -> None:
+        """raises is opt-in; omitting it is the default."""
+        agent = AgentDef(name="a", model="gpt-4", prompt="x")
+        assert agent.raises is None
+
+    def test_single_kind(self) -> None:
+        agent = AgentDef(name="a", model="gpt-4", prompt="x", raises=["external.git.fetch_failed"])
+        assert agent.raises == ["external.git.fetch_failed"]
+
+    def test_multiple_kinds(self) -> None:
+        agent = AgentDef(
+            name="a",
+            model="gpt-4",
+            prompt="x",
+            raises=["external.git.fetch_failed", "policy.budget_exceeded"],
+        )
+        assert len(agent.raises) == 2
+
+    def test_empty_list_rejected(self) -> None:
+        """Empty list is rejected — omit the field instead."""
+        with pytest.raises(ValidationError) as exc:
+            AgentDef(name="a", model="gpt-4", prompt="x", raises=[])
+        assert "cannot be empty" in str(exc.value)
+
+    def test_reserved_prefix_rejected(self) -> None:
+        """Workflow authors cannot claim runtime-owned prefixes."""
+        for reserved in (
+            "internal.something",
+            "provider.exhausted",
+            "subworkflow.failed",
+            "retry.exhausted",
+        ):
+            with pytest.raises(ValidationError) as exc:
+                AgentDef(name="a", model="gpt-4", prompt="x", raises=[reserved])
+            assert "reserved prefix" in str(exc.value)
+
+    def test_uppercase_rejected(self) -> None:
+        with pytest.raises(ValidationError):
+            AgentDef(name="a", model="gpt-4", prompt="x", raises=["External.Git"])
+
+    def test_undotted_rejected(self) -> None:
+        with pytest.raises(ValidationError):
+            AgentDef(name="a", model="gpt-4", prompt="x", raises=["oops"])
+
+    def test_duplicate_rejected(self) -> None:
+        with pytest.raises(ValidationError) as exc:
+            AgentDef(
+                name="a",
+                model="gpt-4",
+                prompt="x",
+                raises=["external.git.fetch_failed", "external.git.fetch_failed"],
+            )
+        assert "more than once" in str(exc.value)
+
+    def test_non_string_entry_rejected(self) -> None:
+        with pytest.raises(ValidationError):
+            AgentDef(name="a", model="gpt-4", prompt="x", raises=["external.git", 5])
+
+    def test_empty_string_entry_rejected(self) -> None:
+        with pytest.raises(ValidationError):
+            AgentDef(name="a", model="gpt-4", prompt="x", raises=[""])
+
+
 class TestAgentDefMaxSessionSeconds:
     """Tests for max_session_seconds on AgentDef."""
 
diff --git a/tests/test_error_kinds.py b/tests/test_error_kinds.py
new file mode 100644
index 0000000..a483786
--- /dev/null
+++ b/tests/test_error_kinds.py
@@ -0,0 +1,100 @@
+"""Tests for the kind constants and helpers in ``conductor.error_kinds``."""
+
+from __future__ import annotations
+
+import pytest
+
+from conductor.error_kinds import (
+    KIND_PATTERN,
+    RESERVED_KIND_PREFIXES,
+    RESERVED_ON_ERROR_ALLOWLIST,
+    is_reserved_prefix,
+)
+
+
+class TestKindPattern:
+    """Tests for the KIND_PATTERN regex."""
+
+    @pytest.mark.parametrize(
+        "kind",
+        [
+            "external.git.fetch_failed",
+            "policy.budget",
+            "a.b",
+            "_private.x",
+            "x.y.z.aa",
+            "x.y_1.z2",
+        ],
+    )
+    def test_valid_kinds(self, kind: str) -> None:
+        assert KIND_PATTERN.match(kind) is not None
+
+    @pytest.mark.parametrize(
+        "kind",
+        [
+            "",
+            "oops",  # no dot
+            "External.Git",  # uppercase
+            ".leading",
+            "trailing.",
+            "double..dot",
+            "1starts_with_digit.x",
+            "x.1starts_with_digit",
+            "x-y.z",  # hyphen not allowed
+            "x y.z",  # space not allowed
+        ],
+    )
+    def test_invalid_kinds(self, kind: str) -> None:
+        assert KIND_PATTERN.match(kind) is None
+
+
+class TestReservedPrefix:
+    """Tests for ``is_reserved_prefix`` and the prefix tuple."""
+
+    @pytest.mark.parametrize(
+        "kind",
+        [
+            "internal.script_error",
+            "internal.schema_violation",
+            "internal.undeclared_kind",
+            "provider.exhausted",
+            "subworkflow.failed",
+            "retry.exhausted",
+        ],
+    )
+    def test_reserved_kinds_detected(self, kind: str) -> None:
+        assert is_reserved_prefix(kind)
+
+    @pytest.mark.parametrize(
+        "kind",
+        [
+            "external.git",
+            "internal_x.y",  # underscore not a dot — not reserved
+            "providers.x",  # plural is fine
+            "subworkflows.x",
+        ],
+    )
+    def test_non_reserved_kinds(self, kind: str) -> None:
+        assert not is_reserved_prefix(kind)
+
+    def test_all_reserved_prefixes_end_with_dot(self) -> None:
+        """A prefix without the trailing dot would false-positive
+        on flat identifiers like ``internalstuff``."""
+        for prefix in RESERVED_KIND_PREFIXES:
+            assert prefix.endswith(".")
+
+
+class TestReservedOnErrorAllowlist:
+    """Tests for the allowlist of runtime kinds matchable in ``on_error``."""
+
+    def test_allowlist_entries_are_reserved(self) -> None:
+        """Every allowlisted kind must itself live under a reserved
+        prefix — otherwise the matrix is inconsistent."""
+        for kind in RESERVED_ON_ERROR_ALLOWLIST:
+            assert is_reserved_prefix(kind), (
+                f"allowlist entry {kind!r} is not under a reserved prefix"
+            )
+
+    def test_allowlist_is_frozenset(self) -> None:
+        """Allowlist is immutable — callers shouldn't mutate it."""
+        assert isinstance(RESERVED_ON_ERROR_ALLOWLIST, frozenset)

From efbe23c91aab97aba16ff2ec678447be24d6a2ee Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 14:15:50 -0700
Subject: [PATCH 02/14] feat(engine): add ErrorEnvelope type, coerce_envelope,
 and unhandled exceptions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1, step 2 of on_error routing.

Adds src/conductor/engine/errors.py:

- ErrorEnvelope TypedDict — the internal {kind, message, details}
  shape. Strips the on-the-wire conductor_error: true discriminator so
  callers don't see it in {{ failing_node.error.* }} templates.
- EnvelopeValidationError — distinct from ValidationError so the
  engine can catch and translate malformed envelopes into synthetic
  internal.* kinds rather than halting with a generic config error.
- coerce_envelope(raw) — validates on-the-wire input, normalizes
  details to {} when absent.
- make_script_error(exit_code, stderr_tail, command) — synthesizes
  the internal.script_error envelope.
- make_schema_violation(node_name, source, original_message,
  failed_field?) — synthesizes the internal.schema_violation envelope
  with rich details for the swallowed-by-catch-all diagnostics case.
- wrap_undeclared_kind(original, declared) — wraps an envelope whose
  kind isn't in the node's declared raises list. Preserves the
  original kind/message/details under details.original_* so an author
  handling internal.undeclared_kind can still recover the intent.

Adds two exceptions to src/conductor/exceptions.py:

- UnhandledNodeError — internal signal raised by the router when an
  error envelope reaches no on_error route at the current level. The
  engine catches this at the per-node dispatch site and re-raises as
  UnhandledWorkflowError. Not intended to surface to end users.
- UnhandledWorkflowError — workflow halted on a typed error envelope.
  Carries the envelope and a frame trail (single frame in Phase 1;
  Phase 2 will accumulate frames across sub-workflow boundaries).
  CLI maps this to a distinct exit code so callers can distinguish
  'workflow ran and halted on typed error' from generic failures.

Tests: tests/test_engine/test_errors.py — 18 cases covering envelope
coercion (including discriminator stripping and details normalization),
the three synthetic-kind constructors, and the exception classes
including the empty-frames defensive path.

Nothing yet emits these envelopes or exceptions; the next commits wire
them through the router, executors, and engine dispatch.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/engine/errors.py   | 206 +++++++++++++++++++++++++++++++
 src/conductor/exceptions.py      |  81 ++++++++++++
 tests/test_engine/test_errors.py | 165 +++++++++++++++++++++++++
 3 files changed, 452 insertions(+)
 create mode 100644 src/conductor/engine/errors.py
 create mode 100644 tests/test_engine/test_errors.py

diff --git a/src/conductor/engine/errors.py b/src/conductor/engine/errors.py
new file mode 100644
index 0000000..891bd54
--- /dev/null
+++ b/src/conductor/engine/errors.py
@@ -0,0 +1,206 @@
+"""Typed error envelopes and helpers for ``on_error`` routing.
+
+The "envelope" is the runtime representation of a node-level raise:
+
+.. code-block:: python
+
+    {
+        "kind":    "external.git.fetch_failed",
+        "message": "git fetch origin failed (exit 128)",
+        "details": {"exit_code": 128, "stderr_tail": "fatal: ..."},
+    }
+
+The on-the-wire shape (what scripts write to ``$CONDUCTOR_ERROR_OUT``
+and what agents emit as their JSON response) additionally carries a
+``conductor_error: true`` discriminator. :func:`coerce_envelope`
+strips the discriminator and validates the structure, producing the
+internal :class:`ErrorEnvelope` shape.
+
+Three reserved synthetic kinds are produced by the runtime:
+
+- :func:`make_script_error` → ``internal.script_error`` when a script
+  exits non-zero without writing an envelope (and the node opts in via
+  ``raises`` or any ``on_error`` route).
+- :func:`make_schema_violation` → ``internal.schema_violation`` when an
+  agent's output fails its declared ``output:`` schema.
+- :func:`wrap_undeclared_kind` → ``internal.undeclared_kind`` when a
+  node with ``raises:`` raises a kind not in its declared list. The
+  original kind is preserved under ``details.original_kind``.
+
+See ``docs/projects/error-routing/on-error-routing.brainstorm.md``
+(D1, D2) for the full design.
+"""
+
+from __future__ import annotations
+
+from typing import Any, TypedDict
+
+from conductor.error_kinds import KIND_PATTERN
+
+
+class ErrorEnvelope(TypedDict):
+    """Internal representation of a node-level raise.
+
+    All three fields are present on every envelope after coercion.
+    ``details`` may be an empty dict but is never absent — that keeps
+    template access like ``{{ failing_node.error.details.foo }}``
+    safe even when the author didn't include details.
+    """
+
+    kind: str
+    message: str
+    details: dict[str, Any]
+
+
+class EnvelopeValidationError(ValueError):
+    """Raised when raw envelope input fails structural validation.
+
+    Distinct from :class:`conductor.exceptions.ValidationError` so the
+    engine can catch and translate this into an
+    ``internal.schema_violation`` or ``internal.script_error``
+    synthetic envelope, rather than halting the workflow with a
+    generic configuration error.
+    """
+
+
+def coerce_envelope(raw: Any) -> ErrorEnvelope:
+    """Validate and normalize raw envelope input into an :class:`ErrorEnvelope`.
+
+    Accepts the on-the-wire shape (with or without the
+    ``conductor_error: true`` discriminator) and returns the internal
+    shape with the discriminator stripped and ``details`` defaulted to
+    ``{}``.
+
+    Args:
+        raw: A dict that should describe an envelope. Anything else
+            raises :class:`EnvelopeValidationError`.
+
+    Returns:
+        A clean :class:`ErrorEnvelope`.
+
+    Raises:
+        EnvelopeValidationError: If ``raw`` is not a dict, is missing
+            required fields, or has malformed values.
+    """
+    if not isinstance(raw, dict):
+        raise EnvelopeValidationError(f"envelope must be a JSON object, got {type(raw).__name__}")
+
+    kind = raw.get("kind")
+    if not isinstance(kind, str) or not kind:
+        raise EnvelopeValidationError("envelope.kind must be a non-empty string")
+    if not KIND_PATTERN.match(kind):
+        raise EnvelopeValidationError(
+            f"envelope.kind '{kind}' must be a dotted lowercase identifier "
+            "(e.g. 'external.git.fetch_failed')"
+        )
+
+    message = raw.get("message")
+    if not isinstance(message, str) or not message:
+        raise EnvelopeValidationError("envelope.message must be a non-empty string")
+
+    details = raw.get("details", {})
+    if details is None:
+        details = {}
+    if not isinstance(details, dict):
+        raise EnvelopeValidationError(
+            f"envelope.details must be a JSON object, got {type(details).__name__}"
+        )
+
+    return ErrorEnvelope(kind=kind, message=message, details=details)
+
+
+def make_script_error(
+    *,
+    exit_code: int,
+    stderr_tail: str,
+    command: str,
+) -> ErrorEnvelope:
+    """Synthesize an ``internal.script_error`` envelope.
+
+    Used when a script exits non-zero, does not write an envelope, AND
+    the node has opted into error routing (``raises`` or any
+    ``on_error`` route present). Without opt-in the engine preserves
+    legacy ``exit_code`` routing.
+
+    Args:
+        exit_code: The non-zero exit code.
+        stderr_tail: Last N characters of stderr (truncated for sanity).
+        command: The rendered script command for diagnostic context.
+    """
+    return ErrorEnvelope(
+        kind="internal.script_error",
+        message=stderr_tail.strip().splitlines()[-1]
+        if stderr_tail.strip()
+        else f"script exited with code {exit_code}",
+        details={
+            "exit_code": exit_code,
+            "stderr_tail": stderr_tail,
+            "command": command,
+        },
+    )
+
+
+def make_schema_violation(
+    *,
+    node_name: str,
+    source: str,
+    original_message: str,
+    failed_field: str | None = None,
+) -> ErrorEnvelope:
+    """Synthesize an ``internal.schema_violation`` envelope.
+
+    Used when an agent's output fails ``output:`` schema validation
+    (Phase 1) or a script's structured output fails its schema.
+
+    Args:
+        node_name: The name of the node whose output failed.
+        source: ``"agent"`` or ``"script"``.
+        original_message: Message from the underlying
+            :class:`conductor.exceptions.ValidationError`.
+        failed_field: Optional name of the offending field if known.
+    """
+    details: dict[str, Any] = {
+        "node": node_name,
+        "source": source,
+        "original_message": original_message,
+    }
+    if failed_field is not None:
+        details["failed_field"] = failed_field
+    return ErrorEnvelope(
+        kind="internal.schema_violation",
+        message=f"{source} '{node_name}' output failed schema validation: {original_message}",
+        details=details,
+    )
+
+
+def wrap_undeclared_kind(
+    original: ErrorEnvelope,
+    *,
+    declared: list[str],
+) -> ErrorEnvelope:
+    """Wrap an envelope whose ``kind`` isn't in the node's ``raises`` list.
+
+    Preserves the original kind under ``details.original_kind`` and the
+    original details under ``details.original_details`` so an author
+    handling ``internal.undeclared_kind`` can still recover the intent.
+
+    Args:
+        original: The envelope as raised by the node.
+        declared: The node's ``raises`` list, for diagnostics.
+
+    Returns:
+        A new envelope with kind ``internal.undeclared_kind``.
+    """
+    return ErrorEnvelope(
+        kind="internal.undeclared_kind",
+        message=(
+            f"node raised kind '{original['kind']}' which is not in its declared "
+            f"raises list ({', '.join(declared)})"
+        ),
+        details={
+            "original_kind": original["kind"],
+            "original_message": original["message"],
+            "original_details": original["details"],
+            "declared": list(declared),
+        },
+    )
diff --git a/src/conductor/exceptions.py b/src/conductor/exceptions.py
index d3bac88..15cce77 100644
--- a/src/conductor/exceptions.py
+++ b/src/conductor/exceptions.py
@@ -603,3 +603,84 @@ def __init__(
             suggestion = f"All {max_attempts} retry attempts have been exhausted"
 
         super().__init__(message, suggestion)
+
+
+class UnhandledNodeError(ConductorError):
+    """Internal signal raised by the router when a node raises an
+    error envelope and no ``on_error`` route at the same level matches.
+
+    Engine catches this at the per-node dispatch site and re-raises as
+    :class:`UnhandledWorkflowError` (with a frame trail) so workflow-
+    level halt handling can emit ``errors.jsonl`` and map to the
+    distinct CLI exit code.
+
+    This exception is not intended to surface to end users; if you see
+    it in a stack trace, the engine missed a catch.
+    """
+
+    def __init__(
+        self,
+        envelope: dict[str, object],
+        node_name: str,
+    ) -> None:
+        """Initialize an UnhandledNodeError.
+
+        Args:
+            envelope: The :class:`conductor.engine.errors.ErrorEnvelope`
+                that no route handled.
+            node_name: Name of the node that raised the envelope.
+        """
+        self.envelope = envelope
+        self.node_name = node_name
+        super().__init__(
+            f"node '{node_name}' raised '{envelope.get('kind')}' but no on_error "
+            "route at this level matched",
+            suggestion=(
+                "Add an on_error route matching this kind (or `on_error: true` to "
+                "catch any kind) at the same routing level as the node, or remove "
+                "the raise from the node."
+            ),
+        )
+
+
+class UnhandledWorkflowError(ConductorError):
+    """Workflow halted because a node raised an error envelope that no
+    ``on_error`` route handled.
+
+    Carries the original envelope and a frame trail describing where
+    the raise originated. In Phase 1 the trail has a single frame for
+    the failing leaf node; Phase 2 will accumulate frames as envelopes
+    propagate across sub-workflow boundaries.
+
+    Caught at the CLI layer and mapped to a distinct exit code so
+    callers can distinguish "workflow ran and halted on typed error"
+    from generic failures.
+    """
+
+    def __init__(
+        self,
+        envelope: dict[str, object],
+        frames: list[dict[str, object]],
+    ) -> None:
+        """Initialize an UnhandledWorkflowError.
+
+        Args:
+            envelope: The :class:`conductor.engine.errors.ErrorEnvelope`
+                that propagated unhandled to workflow root.
+            frames: Frame trail (innermost first). Phase 1 always has
+                exactly one frame; preserved as a list so the shape is
+                stable across Phase 2 propagation work.
+        """
+        self.envelope = envelope
+        self.frames = frames
+        kind = envelope.get("kind", "<unknown>")
+        message = envelope.get("message", "")
+        node = frames[0].get("node", "<unknown>") if frames else "<unknown>"
+        super().__init__(
+            f"workflow halted: node '{node}' raised '{kind}' with no handling route: {message}",
+            suggestion=(
+                "Add an on_error route at the workflow level that matches this kind "
+                "(or `on_error: true` for a catch-all), or fix the underlying "
+                "condition the node is reporting."
+            ),
+        )
diff --git a/tests/test_engine/test_errors.py b/tests/test_engine/test_errors.py
new file mode 100644
index 0000000..7d25fa4
--- /dev/null
+++ b/tests/test_engine/test_errors.py
@@ -0,0 +1,165 @@
+"""Tests for ``conductor.engine.errors`` and the new exception classes."""
+
+from __future__ import annotations
+
+import pytest
+
+from conductor.engine.errors import (
+    EnvelopeValidationError,
+    ErrorEnvelope,
+    coerce_envelope,
+    make_schema_violation,
+    make_script_error,
+    wrap_undeclared_kind,
+)
+from conductor.exceptions import (
+    ConductorError,
+    UnhandledNodeError,
+    UnhandledWorkflowError,
+)
+
+
+class TestCoerceEnvelope:
+    """Tests for :func:`coerce_envelope`."""
+
+    def test_minimal_envelope(self) -> None:
+        env = coerce_envelope({"kind": "external.git.fetch_failed", "message": "boom"})
+        assert env["kind"] == "external.git.fetch_failed"
+        assert env["message"] == "boom"
+        assert env["details"] == {}
+
+    def test_full_envelope(self) -> None:
+        env = coerce_envelope(
+            {
+                "kind": "external.git.fetch_failed",
+                "message": "boom",
+                "details": {"exit_code": 128},
+            }
+        )
+        assert env["details"] == {"exit_code": 128}
+
+    def test_strips_conductor_error_discriminator(self) -> None:
+        """The on-the-wire ``conductor_error: true`` discriminator does
+        not survive into the internal envelope shape."""
+        env = coerce_envelope(
+            {
+                "conductor_error": True,
+                "kind": "external.git.fetch_failed",
+                "message": "boom",
+            }
+        )
+        assert "conductor_error" not in env
+
+    def test_details_none_becomes_empty_dict(self) -> None:
+        env = coerce_envelope({"kind": "x.y", "message": "m", "details": None})
+        assert env["details"] == {}
+
+    def test_non_dict_raw_rejected(self) -> None:
+        with pytest.raises(EnvelopeValidationError) as exc:
+            coerce_envelope("not a dict")
+        assert "JSON object" in str(exc.value)
+
+    def test_missing_kind_rejected(self) -> None:
+        with pytest.raises(EnvelopeValidationError):
+            coerce_envelope({"message": "m"})
+
+    def test_malformed_kind_rejected(self) -> None:
+        with pytest.raises(EnvelopeValidationError):
+            coerce_envelope({"kind": "Oops", "message": "m"})
+
+    def test_missing_message_rejected(self) -> None:
+        with pytest.raises(EnvelopeValidationError):
+            coerce_envelope({"kind": "x.y"})
+
+    def test_empty_message_rejected(self) -> None:
+        with pytest.raises(EnvelopeValidationError):
+            coerce_envelope({"kind": "x.y", "message": ""})
+
+    def test_non_dict_details_rejected(self) -> None:
+        with pytest.raises(EnvelopeValidationError):
+            coerce_envelope({"kind": "x.y", "message": "m", "details": "not a dict"})
+
+
+class TestMakeScriptError:
+    """Tests for :func:`make_script_error`."""
+
+    def test_basic(self) -> None:
+        env = make_script_error(
+            exit_code=128,
+            stderr_tail="fatal: could not resolve host",
+            command="git",
+        )
+        assert env["kind"] == "internal.script_error"
+        assert "fatal: could not resolve host" in env["message"]
+        assert env["details"]["exit_code"] == 128
+        assert env["details"]["command"] == "git"
+
+    def test_empty_stderr_uses_exit_code(self) -> None:
+        env = make_script_error(exit_code=42, stderr_tail="", command="x")
+        assert "42" in env["message"]
+
+
+class TestMakeSchemaViolation:
+    def test_basic(self) -> None:
+        env = make_schema_violation(
+            node_name="extractor",
+            source="agent",
+            original_message="missing field 'answer'",
+        )
+        assert env["kind"] == "internal.schema_violation"
+        assert env["details"]["node"] == "extractor"
+        assert env["details"]["source"] == "agent"
+
+    def test_with_failed_field(self) -> None:
+        env = make_schema_violation(
+            node_name="extractor",
+            source="agent",
+            original_message="bad",
+            failed_field="answer",
+        )
+        assert env["details"]["failed_field"] == "answer"
+
+
+class TestWrapUndeclaredKind:
+    def test_preserves_original(self) -> None:
+        original = ErrorEnvelope(
+            kind="external.git.fetch_failed",
+            message="boom",
+            details={"exit_code": 128},
+        )
+        wrapped = wrap_undeclared_kind(original, declared=["external.git.push_failed"])
+        assert wrapped["kind"] == "internal.undeclared_kind"
+        assert wrapped["details"]["original_kind"] == "external.git.fetch_failed"
+        assert wrapped["details"]["original_message"] == "boom"
+        assert wrapped["details"]["original_details"] == {"exit_code": 128}
+        assert wrapped["details"]["declared"] == ["external.git.push_failed"]
+
+
+class TestExceptions:
+    """Tests for the new exception classes."""
+
+    def test_unhandled_node_error_carries_envelope(self) -> None:
+        env: ErrorEnvelope = {"kind": "x.y", "message": "m", "details": {}}
+        exc = UnhandledNodeError(env, node_name="step1")
+        assert exc.envelope is env
+        assert exc.node_name == "step1"
+        assert isinstance(exc, ConductorError)
+        assert "step1" in str(exc)
+        assert "x.y" in str(exc)
+
+    def test_unhandled_workflow_error_carries_envelope_and_frames(self) -> None:
+        env: ErrorEnvelope = {"kind": "x.y", "message": "m", "details": {}}
+        frames = [{"node": "step1", "workflow": "root", "type": "agent"}]
+        exc = UnhandledWorkflowError(env, frames=frames)
+        assert exc.envelope is env
+        assert exc.frames == frames
+        assert isinstance(exc, ConductorError)
+        assert "step1" in str(exc)
+        assert "x.y" in str(exc)
+        assert "m" in str(exc)
+
+    def test_unhandled_workflow_error_empty_frames(self) -> None:
+        """Empty frames shouldn't crash; should produce a defensible message."""
+        env: ErrorEnvelope = {"kind": "x.y", "message": "m", "details": {}}
+        exc = UnhandledWorkflowError(env, frames=[])
+        assert "<unknown>" in str(exc)

From ab31f8a61a1c5c855c7dace6c188cfe8c586cc3f Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 14:20:30 -0700
Subject: [PATCH 03/14] feat(router): split routes into success vs. error
 buckets

Route evaluation now partitions by RouteDef.on_error:

- on_error is None  -> success bucket, evaluated when error=None

- on_error is set   -> error bucket, evaluated when an envelope is

  passed; the route's on_error matcher (True | str | list[str])

  must match envelope[kind]

Behavior preserved on the success path: first matching when: wins, no

catch-all raises the existing ValueError. New: error-path exhaustion

raises UnhandledNodeError carrying the envelope so the engine can

translate it into UnhandledWorkflowError at the call site.

Error-route eval context exposes the envelope as `error` for both

Jinja2 ({{ error.kind }}) and simpleeval (kind == 'x.y' via flatten).

Adds 12 tests in TestRouterErrorBucket covering bucket isolation,

all three on_error matcher shapes, when: combined with on_error,

output: transforms, ordering within the bucket, the new

UnhandledNodeError path, and the legacy ValueError path.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/engine/router.py   | 132 ++++++++++++++++++++----
 tests/test_engine/test_router.py | 171 +++++++++++++++++++++++++++++++
 2 files changed, 283 insertions(+), 20 deletions(-)

diff --git a/src/conductor/engine/router.py b/src/conductor/engine/router.py
index 6dd2a15..0ddefab 100644
--- a/src/conductor/engine/router.py
+++ b/src/conductor/engine/router.py
@@ -3,6 +3,16 @@
 This module provides the Router class for evaluating routing rules to
 determine the next agent in a workflow, including support for Jinja2
 template conditions and simpleeval arithmetic expressions.
+
+Routes split into two buckets at evaluation time:
+
+- **Success routes** (``on_error`` is ``None``, the default): matched
+  only when the producing node completed successfully.
+- **Error routes** (``on_error`` is set): matched only when the
+  producing node raised a typed error envelope.
+
+Within each bucket, the first route whose ``when:`` evaluates truthy
+wins; a route with no ``when:`` always matches its bucket.
 """
 
 from __future__ import annotations
@@ -14,6 +24,7 @@
 
 if TYPE_CHECKING:
     from conductor.config.schema import RouteDef
+    from conductor.engine.errors import ErrorEnvelope
 
 
 @dataclass
@@ -43,7 +54,8 @@ class Router:
     1. Jinja2 template expressions: {{ output.approved }}
     2. Arithmetic expressions via simpleeval: score > 7, iteration < 5
 
-    Example:
+    Success-path example::
+
         >>> from conductor.config.schema import RouteDef
         >>> router = Router()
         >>> routes = [
@@ -53,6 +65,18 @@ class Router:
         >>> result = router.evaluate(routes, {"success": True}, {})
         >>> result.target
         'handler'
+
+    Error-path example::
+
+        >>> routes = [
+        ...     RouteDef(to="recover", on_error="external.git.fetch_failed"),
+        ...     RouteDef(to="$end"),  # success fallback, not taken on error
+        ... ]
+        >>> envelope = {"kind": "external.git.fetch_failed", "message": "boom",
+        ...             "details": {}}
+        >>> result = router.evaluate(routes, {}, {}, error=envelope)
+        >>> result.target
+        'recover'
     """
 
     def __init__(self) -> None:
@@ -64,51 +88,100 @@ def evaluate(
         routes: list[RouteDef],
         current_output: dict[str, Any],
         context: dict[str, Any],
+        error: ErrorEnvelope | None = None,
     ) -> RouteResult:
         """Evaluate routes and return the first matching target.
 
-        Routes are evaluated in order. First matching 'when' condition wins.
-        A route with no 'when' clause always matches.
+        When ``error`` is None, only success routes (``on_error is None``)
+        are considered. When ``error`` is provided, only error routes
+        (``on_error`` set) are considered, AND the route's ``on_error``
+        kind matcher must match the envelope's kind.
 
         Args:
             routes: Ordered list of route definitions.
-            current_output: Output from the just-executed agent.
+            current_output: Output from the just-executed agent. Pass an
+                empty dict if the agent raised (output is not meaningful
+                in that case).
             context: Full workflow context.
+            error: Error envelope from a node-level raise, or None for
+                the success path.
 
         Returns:
             RouteResult with target and optional output transform.
 
         Raises:
-            ValueError: If no routes match (shouldn't happen with proper config).
+            ValueError: On the success path, if no route matches —
+                indicates a configuration error (no success catch-all).
+            UnhandledNodeError: On the error path, if no error route
+                matches the envelope's kind — engine catches this and
+                re-raises as :class:`conductor.exceptions.UnhandledWorkflowError`.
         """
-        # Add current output to context for condition evaluation
-        eval_context = {
-            **context,
-            "output": current_output,
-        }
+        if error is None:
+            return self._evaluate_success(routes, current_output, context)
+        return self._evaluate_error(routes, context, error)
+
+    def _evaluate_success(
+        self,
+        routes: list[RouteDef],
+        current_output: dict[str, Any],
+        context: dict[str, Any],
+    ) -> RouteResult:
+        """Evaluate success routes (``on_error is None``)."""
+        eval_context = {**context, "output": current_output}
 
         for route in routes:
-            if route.when is None:
-                # No condition = always matches
+            if route.on_error is not None:
+                continue  # error routes don't compete on the success path
+
+            if route.when is None or self._evaluate_condition(route.when, eval_context):
                 return RouteResult(
                     target=route.to,
                     output_transform=self._render_output(route.output, eval_context),
                     matched_rule=route,
                 )
 
-            # Evaluate the condition
-            if self._evaluate_condition(route.when, eval_context):
+        raise ValueError(
+            "No matching route found. Ensure at least one route has no 'when' clause "
+            "or add a catch-all route at the end."
+        )
+
+    def _evaluate_error(
+        self,
+        routes: list[RouteDef],
+        context: dict[str, Any],
+        error: ErrorEnvelope,
+    ) -> RouteResult:
+        """Evaluate error routes against an envelope.
+
+        Raises :class:`conductor.exceptions.UnhandledNodeError` when no
+        error route matches. The exception is imported lazily to avoid
+        a hard import cycle through ``conductor.exceptions`` for the
+        success path (which doesn't need it).
+        """
+        # `error` exposed to Jinja and simpleeval; templates use
+        # `{{ error.kind }}`, simpleeval sees flattened `error_kind`.
+        eval_context = {**context, "error": error}
+
+        for route in routes:
+            if route.on_error is None:
+                continue  # success routes don't compete on the error path
+            if not _on_error_matches(route.on_error, error["kind"]):
+                continue
+            if route.when is None or self._evaluate_condition(route.when, eval_context):
                 return RouteResult(
                     target=route.to,
                     output_transform=self._render_output(route.output, eval_context),
                     matched_rule=route,
                 )
 
-        # No routes matched - this is a configuration error
-        raise ValueError(
-            "No matching route found. Ensure at least one route has no 'when' clause "
-            "or add a catch-all route at the end."
-        )
+        # Deferred import: success path must not depend on this.
+        from conductor.exceptions import UnhandledNodeError
+
+        # Best-effort node name from the matched-against frame; engine
+        # call sites pass the failing node's name via the envelope or
+        # in their own UnhandledWorkflowError wrap. Here we use a
+        # placeholder since the router doesn't track node identity.
+        raise UnhandledNodeError(dict(error), node_name=str(context.get("_current_node", "?")))
 
     def _evaluate_condition(self, when: str, context: dict[str, Any]) -> bool:
         """Evaluate a 'when' condition.
@@ -176,7 +249,7 @@ def _flatten_context(self, context: dict[str, Any]) -> dict[str, Any]:
                 for sub_key, sub_value in value.items():
                     flat[f"{key}_{sub_key}"] = sub_value
                     # Also add top-level access for common patterns
-                    if key == "output":
+                    if key in ("output", "error"):
                         flat[sub_key] = sub_value
             else:
                 flat[key] = value
@@ -203,3 +276,22 @@ def _render_output(
         for key, template in output.items():
             result[key] = self.renderer.render(template, context)
         return result
+
+
+def _on_error_matches(on_error: bool | str | list[str], kind: str) -> bool:
+    """Return True if a route's ``on_error`` matcher accepts ``kind``.
+
+    - ``True`` matches any kind (catch-all).
+    - ``str`` matches by exact equality.
+    - ``list[str]`` matches if the kind appears in the list.
+
+    ``False`` and ``None`` are filtered out at the bucket level and
+    should never reach this function.
+    """
+    if on_error is True:
+        return True
+    if isinstance(on_error, str):
+        return on_error == kind
+    if isinstance(on_error, list):
+        return kind in on_error
+    return False
diff --git a/tests/test_engine/test_router.py b/tests/test_engine/test_router.py
index 4e690e6..3901c35 100644
--- a/tests/test_engine/test_router.py
+++ b/tests/test_engine/test_router.py
@@ -431,3 +431,174 @@ def test_empty_string_is_falsy_in_jinja(self) -> None:
 
         result = router.evaluate(routes, {"value": ""}, {})
         assert result.target == "empty"
+
+
+class TestRouterErrorBucket:
+    """Tests for on_error routing.
+
+    Routes split into a success bucket (``on_error is None``) and an
+    error bucket (``on_error`` set). Only the bucket matching the
+    presence/absence of an envelope competes.
+    """
+
+    @staticmethod
+    def _envelope(kind: str, message: str = "boom") -> dict[str, object]:
+        """Build a minimal ErrorEnvelope-shaped dict for tests."""
+        return {"kind": kind, "message": message, "details": {}}
+
+    def test_error_path_success_routes_skipped(self) -> None:
+        """Success routes never match when an envelope is provided."""
+        router = Router()
+        routes = [
+            RouteDef(to="should_not_match"),  # success catch-all
+            RouteDef(to="handler", on_error=True),
+        ]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("any.kind"))
+        assert result.target == "handler"
+
+    def test_success_path_error_routes_skipped(self) -> None:
+        """Error routes never match on the success path."""
+        router = Router()
+        routes = [
+            RouteDef(to="error_handler", on_error=True),
+            RouteDef(to="next"),  # success catch-all
+        ]
+
+        result = router.evaluate(routes, {"ok": True}, {})
+        assert result.target == "next"
+
+    def test_on_error_true_catches_any_kind(self) -> None:
+        """``on_error: true`` matches any envelope kind."""
+        router = Router()
+        routes = [RouteDef(to="catch_all", on_error=True)]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("external.git.drift"))
+        assert result.target == "catch_all"
+
+    def test_on_error_string_exact_match(self) -> None:
+        """A string ``on_error`` matches only the exact kind."""
+        router = Router()
+        routes = [
+            RouteDef(to="git_handler", on_error="external.git.drift"),
+            RouteDef(to="fallback", on_error=True),
+        ]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("external.git.drift"))
+        assert result.target == "git_handler"
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("external.api.timeout"))
+        assert result.target == "fallback"
+
+    def test_on_error_list_membership(self) -> None:
+        """A list ``on_error`` matches if the kind appears in the list."""
+        router = Router()
+        routes = [
+            RouteDef(
+                to="external_handler",
+                on_error=["external.git.drift", "external.api.timeout"],
+            ),
+            RouteDef(to="fallback", on_error=True),
+        ]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("external.api.timeout"))
+        assert result.target == "external_handler"
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("policy.violation"))
+        assert result.target == "fallback"
+
+    def test_error_route_when_clause_applies(self) -> None:
+        """``when:`` still applies in the error bucket."""
+        router = Router()
+        routes = [
+            RouteDef(
+                to="retry",
+                on_error=True,
+                when="{{ error.details.retryable }}",
+            ),
+            RouteDef(to="give_up", on_error=True),
+        ]
+
+        retryable = {"kind": "external.x", "message": "m", "details": {"retryable": True}}
+        not_retryable = {"kind": "external.x", "message": "m", "details": {"retryable": False}}
+
+        assert router.evaluate(routes, {}, {}, error=retryable).target == "retry"
+        assert router.evaluate(routes, {}, {}, error=not_retryable).target == "give_up"
+
+    def test_error_eval_context_exposes_kind_via_jinja(self) -> None:
+        """Templates on error routes can reference ``error.kind`` etc."""
+        router = Router()
+        routes = [
+            RouteDef(
+                to="match",
+                on_error=True,
+                when="{{ error.kind == 'policy.violation' }}",
+            ),
+            RouteDef(to="other", on_error=True),
+        ]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("policy.violation"))
+        assert result.target == "match"
+
+    def test_error_route_output_transform_sees_error(self) -> None:
+        """``output:`` on an error route renders against the envelope."""
+        router = Router()
+        routes = [
+            RouteDef(
+                to="reporter",
+                on_error=True,
+                output={"failed_kind": "{{ error.kind }}"},
+            )
+        ]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("external.git.drift"))
+        assert result.output_transform == {"failed_kind": "external.git.drift"}
+
+    def test_no_matching_error_route_raises_unhandled_node_error(self) -> None:
+        """An envelope with no matching error route raises UnhandledNodeError."""
+        from conductor.exceptions import UnhandledNodeError
+
+        router = Router()
+        routes = [
+            RouteDef(to="git", on_error="external.git.drift"),
+            RouteDef(to="next"),  # success catch-all — must NOT swallow errors
+        ]
+
+        envelope = self._envelope("policy.violation")
+        with pytest.raises(UnhandledNodeError) as exc_info:
+            router.evaluate(routes, {}, {}, error=envelope)
+
+        # The envelope is preserved on the exception so the engine can
+        # wrap it in UnhandledWorkflowError.
+        assert exc_info.value.envelope["kind"] == "policy.violation"
+
+    def test_first_matching_error_route_wins(self) -> None:
+        """Order matters within the error bucket."""
+        router = Router()
+        routes = [
+            RouteDef(to="first", on_error=True),
+            RouteDef(to="second", on_error=True),
+        ]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("any.thing"))
+        assert result.target == "first"
+
+    def test_success_no_match_still_raises_value_error(self) -> None:
+        """Backwards-compat: success-path exhaustion still raises ValueError."""
+        router = Router()
+        # Only an error route, no success catch-all.
+        routes = [RouteDef(to="handler", on_error=True)]
+
+        with pytest.raises(ValueError, match="No matching route found"):
+            router.evaluate(routes, {"x": 1}, {})
+
+    def test_simpleeval_can_reference_flattened_error_fields(self) -> None:
+        """simpleeval flattening exposes ``error.kind`` as ``kind``/``error_kind``."""
+        router = Router()
+        routes = [
+            RouteDef(to="git", on_error=True, when="kind == 'external.git.drift'"),
+            RouteDef(to="other", on_error=True),
+        ]
+
+        result = router.evaluate(routes, {}, {}, error=self._envelope("external.git.drift"))
+        assert result.target == "git"

From 17c7cc778b320c0e3738cb4fb2532e1fd4234514 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 14:34:52 -0700
Subject: [PATCH 04/14] feat(context): add store_error API and .error access in
 all modes

Adds WorkflowContext.store_error(agent_name, envelope) that co-locates

error envelopes with their producing node's slot. The rendered context

shape for a node is now `{node: {output?, error?}}`.

All three context modes surface errors:

- accumulate: each failing node gets `{agent: {error: envelope}}`

- last_only:  failing last agent surfaces with `{error: ...}`

- explicit:   declarations of the form `agent.error[.field]` copy

  the whole envelope into ctx[agent]['error']. Envelopes are bounded

  and templates commonly need `error.details.*`, so the runtime

  never field-slices them.

Validator updates so existing semantic checks cover the new path:

- INPUT_REF_PATTERN gains an `error_agent`/`error_field` branch

  matching `<agent>.error[.field]`.

- _OUTPUT_ATTRS includes the singular `error` so Jinja AST analysis

  treats `{{ failing.error.kind }}` as a real output-class ref.

- TemplateRefs gains `agent_error_refs: set[str]` and the AST

  walker populates it.

- The per-agent template walk emits the same explicit-mode

  `undeclared input` warning for `.error` refs that `.output`

  and group `.errors` already get.

- Unknown-agent checks cover the `.error` ref path.

- Parallel-group internal-dependency check rejects intra-group

  `.error` refs too.

Checkpoint round-trip via to_dict/from_dict serializes `agent_errors`;

older checkpoints without the key restore as empty (backwards-compat).

Adds 14 tests in TestWorkflowContextStoreError, 5 INPUT_REF_PATTERN

shape tests, and 3 TemplateRefs error-extraction tests. Fixes the

test_empty_context dict-equality fixture to include the new field.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/config/validator.py             |  65 +++++++-
 src/conductor/engine/context.py               | 117 ++++++++++++--
 tests/test_config/test_validator.py           |  43 +++++
 tests/test_engine/test_context.py             | 149 ++++++++++++++++++
 .../test_engine/test_context_serialization.py |   1 +
 5 files changed, 363 insertions(+), 12 deletions(-)

diff --git a/src/conductor/config/validator.py b/src/conductor/config/validator.py
index 1b1f753..48cdd9b 100644
--- a/src/conductor/config/validator.py
+++ b/src/conductor/config/validator.py
@@ -65,8 +65,8 @@ def get(self, key: str, default: object = None) -> object:
 _BUILTIN_NAMES = frozenset({"workflow", "context", "item", "_index", "_key", "loop"})
 
 # Attribute names that mark a Getattr chain as an "output reference":
-#   agent.output.field, group.outputs.member, group.errors.member
-_OUTPUT_ATTRS = frozenset({"output", "outputs", "errors"})
+#   agent.output.field, agent.error.field, group.outputs.member, group.errors.member
+_OUTPUT_ATTRS = frozenset({"output", "outputs", "error", "errors"})
 
 # Attribute names that look like fields on an output but are actually built-in
 # dict methods. We avoid emitting field-precision warnings for these because
@@ -83,12 +83,14 @@ def get(self, key: str, default: object = None) -> object:
 
 # Pattern for input references:
 # - agent.output(.field)?
+# - agent.error(.field)?
 # - parallel_group.outputs.agent(.field)?
 # - workflow.input.param
 # All with optional ? suffix
 INPUT_REF_PATTERN = re.compile(
     r"^(?:"
     r"(?P<agent>[a-zA-Z_][a-zA-Z0-9_]*)\.output(?:\.(?P<field>[a-zA-Z_][a-zA-Z0-9_]*))?|"
+    r"(?P<error_agent>[a-zA-Z_][a-zA-Z0-9_]*)\.error(?:\.(?P<error_field>[a-zA-Z_][a-zA-Z0-9_]*))?|"
     r"(?P<parallel>[a-zA-Z_][a-zA-Z0-9_]*)\.(?P<pg_kind>outputs|errors)(?:\.(?P<pg_agent>[a-zA-Z_][a-zA-Z0-9_]*)(?:\.(?P<pg_field>[a-zA-Z_][a-zA-Z0-9_]*))?)?|"
     r"workflow\.input\.(?P<input>[a-zA-Z_][a-zA-Z0-9_]*)"
     r")(?P<optional>\?)?$"
@@ -329,6 +331,21 @@ def _validate_input_references(
                     f"Agent '{agent_name}' references unknown agent '{ref_agent}' in input"
                 )
 
+        # Check if referencing another agent's error envelope
+        ref_error_agent = match.group("error_agent")
+        if ref_error_agent and ref_error_agent not in agent_names:
+            is_optional = match.group("optional") == "?"
+            if is_optional:
+                warnings.append(
+                    f"Agent '{agent_name}' has optional reference to error envelope "
+                    f"from unknown agent '{ref_error_agent}'"
+                )
+            else:
+                errors.append(
+                    f"Agent '{agent_name}' references error envelope from unknown agent "
+                    f"'{ref_error_agent}' in input"
+                )
+
         # Check if referencing parallel/for-each group output
         ref_parallel = match.group("parallel")
         if ref_parallel and ref_parallel not in group_names:
@@ -518,7 +535,7 @@ def _validate_parallel_groups(config: WorkflowConfig) -> list[str]:
                 # Parse input reference to extract agent name
                 match = INPUT_REF_PATTERN.match(input_ref)
                 if match:
-                    ref_agent = match.group("agent")
+                    ref_agent = match.group("agent") or match.group("error_agent")
                     if ref_agent and ref_agent in pg_agents_set and ref_agent != agent_name:
                         errors.append(
                             f"Agent '{agent_name}' in parallel group '{pg.name}' references "
@@ -635,6 +652,12 @@ class TemplateRefs(NamedTuple):
             separate from output refs because the engine's runtime semantics
             for ``.errors`` always copy the whole errors dict and never field-
             slice, so field-precision checks must not be applied to them.
+        agent_error_refs: Agent names referenced via ``<name>.error`` (singular
+            error envelope from a failing leaf node). Kept separate from
+            output refs because envelopes are always copied whole — the
+            runtime never field-slices them — so field-precision checks
+            must not be applied. Used for explicit-mode undeclared-input
+            warnings on ``{{ failing_node.error.kind }}``-style references.
     """
 
     agent_refs: set[str]
@@ -642,6 +665,7 @@ class TemplateRefs(NamedTuple):
     agent_output_fields: dict[str, set[str | None]]
     group_member_fields: dict[tuple[str, str | None], set[str | None]]
     group_error_refs: set[str]
+    agent_error_refs: set[str]
 
 
 def _extract_template_refs(template: str) -> TemplateRefs:
@@ -693,6 +717,7 @@ def _extract_template_refs(template: str) -> TemplateRefs:
         agent_output_fields={},
         group_member_fields={},
         group_error_refs=set(),
+        agent_error_refs=set(),
     )
 
     if not template or ("{{" not in template and "{%" not in template):
@@ -733,6 +758,8 @@ def _extract_template_refs(template: str) -> TemplateRefs:
     workflow_inputs: set[str] = set()
     # group.errors chains; collected directly into the result.
     group_error_refs: set[str] = set()
+    # agent.error chains (singular envelope from a failing leaf).
+    agent_error_refs: set[str] = set()
     # Output / outputs chains, accumulated as the structured maps directly.
     agent_output_fields: dict[str, set[str | None]] = {}
     group_member_fields: dict[tuple[str, str | None], set[str | None]] = {}
@@ -790,6 +817,13 @@ def _extract_template_refs(template: str) -> TemplateRefs:
             agent_refs.add(root)
             continue
 
+        # Singular error envelope on a failing leaf node; whole envelope
+        # is copied into ctx, so no field-precision either.
+        if kind == "error":
+            agent_error_refs.add(root)
+            agent_refs.add(root)
+            continue
+
         agent_refs.add(root)
         if kind == "output":
             # attrs is ["output"] or ["output", "<field>", ...]
@@ -811,6 +845,7 @@ def _extract_template_refs(template: str) -> TemplateRefs:
         agent_output_fields=agent_output_fields,
         group_member_fields=group_member_fields,
         group_error_refs=group_error_refs,
+        agent_error_refs=agent_error_refs,
     )
 
 
@@ -1188,6 +1223,10 @@ def _validate_template_references(
         # (see ``_add_parallel_group_input`` errors branch), so no field-
         # precision tracking is needed for errors.
         declared_group_errors: set[str] = set()
+        # Set of agent names whose error envelope is declared via
+        # ``<agent>.error[.field]``. The engine always copies the whole
+        # envelope, so no field-precision tracking is needed here either.
+        declared_agent_error_refs: set[str] = set()
         for ref in agent.input:
             match = INPUT_REF_PATTERN.match(ref.rstrip("?"))
             if not match:
@@ -1197,6 +1236,9 @@ def _validate_template_references(
                 field = match.group("field")
                 # field is None for bare ``a.output`` (whole output declared).
                 declared_agent_output_fields.setdefault(ref_agent, set()).add(field)
+            ref_error_agent = match.group("error_agent")
+            if ref_error_agent:
+                declared_agent_error_refs.add(ref_error_agent)
             ref_parallel = match.group("parallel")
             if ref_parallel:
                 pg_kind = match.group("pg_kind")
@@ -1347,6 +1389,23 @@ def _validate_template_references(
                         f"in its input: list (explicit context mode)"
                     )
 
+            # --- Agent-error references (``a.error[.field]``) ---
+            # Mirror of the group-error block. Envelopes are bounded in
+            # size and always copied whole, so no field-precision check.
+            for failing in refs.agent_error_refs:
+                if failing not in valid_names:
+                    errors.append(
+                        f"{source} references unknown agent '{failing}'. "
+                        f"Available: {', '.join(sorted(valid_names))}"
+                    )
+                    continue
+                if agent_output_warning_allowed and failing not in declared_agent_error_refs:
+                    warnings.append(
+                        f"{source} references '{failing}.error' but "
+                        f"agent '{agent.name}' does not declare '{failing}.error' "
+                        f"in its input: list (explicit context mode)"
+                    )
+
             for input_name in refs.workflow_inputs:
                 if workflow_input_names and input_name not in workflow_input_names:
                     # Only error when inputs ARE declared — workflows without
diff --git a/src/conductor/engine/context.py b/src/conductor/engine/context.py
index 4b97547..34550fc 100644
--- a/src/conductor/engine/context.py
+++ b/src/conductor/engine/context.py
@@ -10,6 +10,7 @@
 from typing import TYPE_CHECKING, Any, Literal
 
 if TYPE_CHECKING:
+    from conductor.engine.errors import ErrorEnvelope
     from conductor.providers.base import AgentProvider
 
 # Token estimation constants
@@ -101,6 +102,16 @@ class WorkflowContext:
     agent_outputs: dict[str, dict[str, Any]] = field(default_factory=dict)
     """Outputs from executed agents, keyed by agent name."""
 
+    agent_errors: dict[str, ErrorEnvelope] = field(default_factory=dict)
+    """Error envelopes from failed agents, keyed by agent name.
+
+    Populated by :meth:`store_error`. Phase 1 leaf executors (agent,
+    script) either populate ``agent_outputs`` (success) or
+    ``agent_errors`` (raise), never both — but consumers should treat
+    them as a co-located pair: a node's slot in the rendered context
+    is ``{node: {output?, error?}}``.
+    """
+
     current_iteration: int = 0
     """Current execution iteration count."""
 
@@ -157,6 +168,23 @@ def store(self, agent_name: str, output: dict[str, Any]) -> None:
         self.execution_history.append(agent_name)
         self.current_iteration += 1
 
+    def store_error(self, agent_name: str, envelope: ErrorEnvelope) -> None:
+        """Store an error envelope from a failed agent in context.
+
+        Co-located with :meth:`store` so that the rendered context shape
+        is ``{agent_name: {output?, error?}}``. Phase 1 leaf executors
+        call exactly one of ``store`` / ``store_error`` per node; both
+        bump ``current_iteration`` and append to ``execution_history``
+        because the node ran to a definitive outcome either way.
+
+        Args:
+            agent_name: The name of the agent that raised the envelope.
+            envelope: The error envelope ``{kind, message, details}``.
+        """
+        self.agent_errors[agent_name] = envelope
+        self.execution_history.append(agent_name)
+        self.current_iteration += 1
+
     def build_for_agent(
         self,
         agent_name: str,
@@ -240,10 +268,21 @@ def build_for_agent(
                         # Regular agents wrap output in {"output": ...}
                         ctx[agent] = {"output": output}
 
+                # Surface error envelopes co-located with their node's
+                # slot. Failing nodes never produced an output in Phase 1,
+                # but if both ever coexist, the dict merges cleanly.
+                for agent, envelope in self.agent_errors.items():
+                    slot = ctx.get(agent)
+                    if isinstance(slot, dict):
+                        slot["error"] = envelope
+                    else:
+                        ctx[agent] = {"error": envelope}
+
             elif mode == "last_only" and self.execution_history:
-                # Only the most recent agent's output
+                # Only the most recent agent's output (and/or error)
                 last_agent = self.execution_history[-1]
-                last_output = self.agent_outputs.get(last_agent, {})
+                last_output = self.agent_outputs.get(last_agent)
+                last_error = self.agent_errors.get(last_agent)
 
                 # Check if this is a parallel group output
                 is_parallel_group = (
@@ -254,8 +293,18 @@ def build_for_agent(
 
                 if is_parallel_group:
                     ctx[last_agent] = last_output
-                else:
-                    ctx[last_agent] = {"output": last_output}
+                elif last_agent in self.agent_outputs:
+                    # Preserve legacy behavior: even an empty output dict
+                    # surfaces as ``{"output": {}}`` so templates that
+                    # reference the last agent don't KeyError.
+                    slot: dict[str, Any] = {
+                        "output": last_output if last_output is not None else {}
+                    }
+                    if last_error is not None:
+                        slot["error"] = last_error
+                    ctx[last_agent] = slot
+                elif last_error is not None:
+                    ctx[last_agent] = {"error": last_error}
 
         return ctx
 
@@ -312,7 +361,7 @@ def _add_explicit_input(self, ctx: dict[str, Any], input_ref: str) -> None:
                 else:
                     raise KeyError(f"Missing required workflow input: {param_name}")
         else:
-            # Could be agent_name.output or parallel_group.outputs
+            # Could be agent_name.output, agent_name.error, or parallel_group.outputs
             entity_name = parts[0]
 
             if entity_name in self.agent_outputs:
@@ -329,15 +378,27 @@ def _add_explicit_input(self, ctx: dict[str, Any], input_ref: str) -> None:
                     # Handle parallel group references
                     self._add_parallel_group_input(ctx, entity_name, parts[1:], is_optional)
                 else:
-                    # Handle regular agent references
+                    # Handle regular agent references (output and/or error)
                     self._add_agent_input(ctx, entity_name, parts[1:], is_optional)
+            elif entity_name in self.agent_errors:
+                # Failing-agent-only case: no output stored, only the
+                # envelope. ``agent_name.error[.field]`` is the only
+                # legal subpath here.
+                self._add_agent_input(ctx, entity_name, parts[1:], is_optional)
             elif not is_optional:
                 raise KeyError(f"Missing required agent output: {entity_name}")
 
     def _add_agent_input(
         self, ctx: dict[str, Any], agent_name: str, remaining_parts: list[str], is_optional: bool
     ) -> None:
-        """Add a regular agent output reference to context.
+        """Add a regular agent output or error reference to context.
+
+        Supports both success-path references (``agent.output(.field)?``)
+        and error-path references (``agent.error(.field)?``). Error
+        envelopes are bounded in size and templates often need
+        ``error.details.*`` access, so any ``.error*`` reference copies
+        the whole envelope into ``ctx[agent_name]["error"]`` rather than
+        selecting a single field.
 
         Args:
             ctx: The context dictionary to update.
@@ -348,14 +409,36 @@ def _add_agent_input(
         Raises:
             KeyError: If a required field is missing.
         """
+        # Error-path references: copy the whole envelope. Phase 1 leaf
+        # executors raise XOR succeed, so a node with ``.error`` declared
+        # is typically a failing producer the explicit-mode handler is
+        # consuming.
+        if remaining_parts and remaining_parts[0] == "error":
+            envelope = self.agent_errors.get(agent_name)
+            if envelope is None:
+                if not is_optional:
+                    raise KeyError(f"Missing error envelope from agent '{agent_name}'")
+                return
+            slot = ctx.get(agent_name)
+            if not isinstance(slot, dict):
+                slot = {}
+                ctx[agent_name] = slot
+            slot["error"] = envelope
+            return
+
+        # Success-path references: existing behavior.
+        agent_output = self.agent_outputs.get(agent_name)
+        if agent_output is None:
+            if not is_optional:
+                raise KeyError(f"Missing output from agent '{agent_name}'")
+            return
+
         # Ensure the agent context exists
         if agent_name not in ctx:
             ctx[agent_name] = {"output": {}}
         elif "output" not in ctx[agent_name]:
             ctx[agent_name]["output"] = {}
 
-        agent_output = self.agent_outputs[agent_name]
-
         if not remaining_parts:
             # Just agent_name - copy entire output
             ctx[agent_name]["output"] = agent_output.copy()
@@ -514,6 +597,7 @@ def to_dict(self) -> dict[str, Any]:
         return {
             "workflow_inputs": copy.deepcopy(self.workflow_inputs),
             "agent_outputs": copy.deepcopy(self.agent_outputs),
+            "agent_errors": copy.deepcopy(self.agent_errors),
             "current_iteration": self.current_iteration,
             "execution_history": list(self.execution_history),
             "user_guidance": list(self.user_guidance),
@@ -534,6 +618,9 @@ def from_dict(cls, data: dict[str, Any]) -> WorkflowContext:
         ctx = cls()
         ctx.workflow_inputs = copy.deepcopy(data.get("workflow_inputs", {}))
         ctx.agent_outputs = copy.deepcopy(data.get("agent_outputs", {}))
+        # ``agent_errors`` is new in Phase 1 error routing; older
+        # checkpoints simply have no errors to restore.
+        ctx.agent_errors = copy.deepcopy(data.get("agent_errors", {}))
         ctx.current_iteration = data.get("current_iteration", 0)
         ctx.execution_history = list(data.get("execution_history", []))
         ctx.user_guidance = list(data.get("user_guidance", []))
@@ -561,6 +648,18 @@ def get_latest_output(self) -> dict[str, Any] | None:
         last_agent = self.execution_history[-1]
         return self.agent_outputs.get(last_agent)
 
+    def get_latest_error(self) -> ErrorEnvelope | None:
+        """Get the error envelope from the most recently executed agent.
+
+        Returns ``None`` if the last node succeeded, was a non-leaf
+        node, or if no nodes have executed. Useful for the halt-on-
+        unhandled path which needs the envelope from the failing leaf.
+        """
+        if not self.execution_history:
+            return None
+        last_agent = self.execution_history[-1]
+        return self.agent_errors.get(last_agent)
+
     def estimate_context_tokens(self) -> int:
         """Estimate the total number of tokens in the current context.
 
diff --git a/tests/test_config/test_validator.py b/tests/test_config/test_validator.py
index 07c2d3c..a0c1fac 100644
--- a/tests/test_config/test_validator.py
+++ b/tests/test_config/test_validator.py
@@ -718,6 +718,30 @@ def test_unrelated_attrs_ignored(self) -> None:
         assert refs.agent_refs == set()
         assert refs.workflow_inputs == set()
 
+    def test_singular_error_ref_extracted(self) -> None:
+        """``agent.error[.field]`` populates ``agent_error_refs``."""
+        refs = _extract_template_refs("{{ failing.error.kind }}")
+        # Reflected in both flat agent_refs (for unknown-agent checks)
+        # AND in the dedicated agent_error_refs set (for explicit-mode
+        # undeclared-input warnings on the .error path).
+        assert refs.agent_refs == {"failing"}
+        assert refs.agent_error_refs == {"failing"}
+        # No spurious field-precision tracking for errors.
+        assert refs.agent_output_fields == {}
+
+    def test_bare_error_ref_extracted(self) -> None:
+        refs = _extract_template_refs("{% if failing.error %}boom{% endif %}")
+        assert refs.agent_error_refs == {"failing"}
+
+    def test_error_and_output_can_coexist_for_same_agent(self) -> None:
+        """A template referencing both ``a.output`` and ``a.error`` is legal."""
+        refs = _extract_template_refs(
+            "{{ a.output.text }}{% if a.error %}{{ a.error.kind }}{% endif %}"
+        )
+        assert refs.agent_refs == {"a"}
+        assert refs.agent_error_refs == {"a"}
+        assert refs.agent_output_fields == {"a": {"text"}}
+
     def test_no_template_tags(self) -> None:
         refs = _extract_template_refs("just plain text")
         assert refs.agent_refs == set() and refs.workflow_inputs == set()
@@ -854,6 +878,25 @@ def test_pattern_still_accepts_legacy_shapes(self, ref: str) -> None:
     def test_pattern_rejects_invalid_shapes(self, ref: str) -> None:
         assert INPUT_REF_PATTERN.match(ref) is None
 
+    @pytest.mark.parametrize(
+        "ref,expected_agent",
+        [
+            ("failing_node.error", "failing_node"),
+            ("failing_node.error.kind", "failing_node"),
+            ("failing_node.error.message", "failing_node"),
+            ("failing_node.error?", "failing_node"),
+            ("failing_node.error.kind?", "failing_node"),
+        ],
+    )
+    def test_pattern_accepts_agent_error_shapes(self, ref: str, expected_agent: str) -> None:
+        """``agent.error[.field]`` (singular) is the on_error envelope ref."""
+        match = INPUT_REF_PATTERN.match(ref)
+        assert match is not None, f"{ref!r} should match INPUT_REF_PATTERN"
+        assert match.group("error_agent") == expected_agent
+        # And it must not be misclassified as an output / parallel ref.
+        assert match.group("agent") is None
+        assert match.group("parallel") is None
+
 
 class TestTemplateReferenceValidation:
     """End-to-end tests for stale-reference detection in agent templates."""
diff --git a/tests/test_engine/test_context.py b/tests/test_engine/test_context.py
index dbc9709..7403d75 100644
--- a/tests/test_engine/test_context.py
+++ b/tests/test_engine/test_context.py
@@ -1265,3 +1265,152 @@ def test_guidance_section_starts_with_newlines(self) -> None:
 
         assert section is not None
         assert section.startswith("\n\n")
+
+
+class TestWorkflowContextStoreError:
+    """Tests for ``store_error`` and the ``.error`` access path."""
+
+    @staticmethod
+    def _envelope(kind: str = "external.git.drift") -> dict[str, object]:
+        return {"kind": kind, "message": "boom", "details": {"branch": "main"}}
+
+    def test_store_error_populates_state(self) -> None:
+        """store_error sets agent_errors, history, iteration counter."""
+        ctx = WorkflowContext()
+        env = self._envelope()
+        ctx.store_error("fetcher", env)
+
+        assert ctx.agent_errors == {"fetcher": env}
+        assert ctx.execution_history == ["fetcher"]
+        assert ctx.current_iteration == 1
+        assert ctx.agent_outputs == {}
+
+    def test_get_latest_error_returns_envelope(self) -> None:
+        ctx = WorkflowContext()
+        env = self._envelope("policy.violation")
+        ctx.store_error("policy_check", env)
+
+        assert ctx.get_latest_error() == env
+        assert ctx.get_latest_output() is None
+
+    def test_get_latest_error_none_when_last_succeeded(self) -> None:
+        ctx = WorkflowContext()
+        ctx.store_error("a", self._envelope())
+        ctx.store("b", {"result": "ok"})
+
+        assert ctx.get_latest_error() is None
+        assert ctx.get_latest_output() == {"result": "ok"}
+
+    def test_accumulate_mode_surfaces_error_envelope(self) -> None:
+        ctx = WorkflowContext()
+        ctx.store("good", {"x": 1})
+        ctx.store_error("bad", self._envelope())
+
+        agent_ctx = ctx.build_for_agent("handler", [], mode="accumulate")
+
+        assert agent_ctx["good"] == {"output": {"x": 1}}
+        assert agent_ctx["bad"] == {"error": self._envelope()}
+
+    def test_last_only_mode_with_failing_last(self) -> None:
+        ctx = WorkflowContext()
+        ctx.store("good", {"x": 1})
+        ctx.store_error("bad", self._envelope())
+
+        agent_ctx = ctx.build_for_agent("handler", [], mode="last_only")
+
+        # Only the last (failing) agent should be present.
+        assert "good" not in agent_ctx
+        assert agent_ctx["bad"] == {"error": self._envelope()}
+
+    def test_last_only_mode_with_successful_last_does_not_surface_old_error(self) -> None:
+        ctx = WorkflowContext()
+        ctx.store_error("bad", self._envelope())
+        ctx.store("good", {"x": 1})
+
+        agent_ctx = ctx.build_for_agent("handler", [], mode="last_only")
+
+        assert "bad" not in agent_ctx
+        assert agent_ctx["good"] == {"output": {"x": 1}}
+
+    def test_explicit_mode_declared_error_ref_copies_envelope(self) -> None:
+        ctx = WorkflowContext()
+        ctx.store_error("bad", self._envelope())
+
+        agent_ctx = ctx.build_for_agent("handler", ["bad.error"], mode="explicit")
+
+        assert agent_ctx["bad"]["error"] == self._envelope()
+
+    def test_explicit_mode_dotted_error_field_copies_whole_envelope(self) -> None:
+        """``agent.error.kind`` declaration copies the whole envelope.
+
+        Envelopes are bounded in size and templates commonly need
+        ``error.details.*`` access, so the runtime never field-slices
+        them — declaring a sub-path is treated like declaring the whole.
+        """
+        ctx = WorkflowContext()
+        env = self._envelope()
+        ctx.store_error("bad", env)
+
+        agent_ctx = ctx.build_for_agent("handler", ["bad.error.kind"], mode="explicit")
+
+        assert agent_ctx["bad"]["error"] == env
+
+    def test_explicit_mode_undeclared_error_ref_raises(self) -> None:
+        ctx = WorkflowContext()
+        ctx.store_error("bad", self._envelope())
+
+        with pytest.raises(KeyError, match="good"):
+            # Failing agent is "bad"; declaring "good.output" doesn't help —
+            # missing required output should raise.
+            ctx.build_for_agent("handler", ["good.output"], mode="explicit")
+
+    def test_explicit_mode_optional_error_ref_tolerates_missing(self) -> None:
+        ctx = WorkflowContext()
+        # No failing agent stored at all.
+        agent_ctx = ctx.build_for_agent("handler", ["missing.error?"], mode="explicit")
+
+        # Optional missing ref should not raise and should not populate.
+        assert "missing" not in agent_ctx
+
+    def test_explicit_mode_output_and_error_coexist(self) -> None:
+        """A handler may need a peer's output plus the failer's envelope."""
+        ctx = WorkflowContext()
+        ctx.store("planner", {"plan": "do x"})
+        ctx.store_error("executor", self._envelope())
+
+        agent_ctx = ctx.build_for_agent(
+            "handler",
+            ["planner.output", "executor.error"],
+            mode="explicit",
+        )
+
+        assert agent_ctx["planner"] == {"output": {"plan": "do x"}}
+        assert agent_ctx["executor"] == {"error": self._envelope()}
+
+    def test_to_dict_round_trips_agent_errors(self) -> None:
+        ctx = WorkflowContext()
+        env = self._envelope()
+        ctx.set_workflow_inputs({"q": "?"})
+        ctx.store("good", {"x": 1})
+        ctx.store_error("bad", env)
+
+        data = ctx.to_dict()
+        restored = WorkflowContext.from_dict(data)
+
+        assert restored.agent_errors == {"bad": env}
+        assert restored.agent_outputs == {"good": {"x": 1}}
+        assert restored.execution_history == ["good", "bad"]
+        assert restored.current_iteration == 2
+
+    def test_from_dict_missing_agent_errors_is_empty(self) -> None:
+        """Older checkpoints without agent_errors restore cleanly."""
+        ctx = WorkflowContext.from_dict(
+            {
+                "workflow_inputs": {},
+                "agent_outputs": {"a": {"x": 1}},
+                "current_iteration": 1,
+                "execution_history": ["a"],
+                "user_guidance": [],
+            }
+        )
+        assert ctx.agent_errors == {}
diff --git a/tests/test_engine/test_context_serialization.py b/tests/test_engine/test_context_serialization.py
index 999a0d3..be1b35d 100644
--- a/tests/test_engine/test_context_serialization.py
+++ b/tests/test_engine/test_context_serialization.py
@@ -32,6 +32,7 @@ def test_empty_context(self) -> None:
         assert d == {
             "workflow_inputs": {},
             "agent_outputs": {},
+            "agent_errors": {},
             "current_iteration": 0,
             "execution_history": [],
             "user_guidance": [],

From d38d43b37fba63765b4c8a88fb7966a03f00e2a6 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 15:24:33 -0700
Subject: [PATCH 05/14] feat(validator): cross-check on_error routes against
 raises and node type

Adds two new helpers in conductor.config.validator:

- _validate_on_error_routes(agent): hard-errors on_error routes on node types that don't raise envelopes in Phase 1 (human_gate, workflow); validates each kind matches KIND_PATTERN; if agent.raises is declared, every concrete kind in on_error must be in raises or RESERVED_ON_ERROR_ALLOWLIST (catch-all 	rue always legal).

- _validate_group_routes_no_on_error(): rejects on_error routes on parallel and for_each groups (group-level envelopes are Phase 2).

Both helpers are wired into validate_workflow_config(). 11 new tests cover plain agent + script (legal), human_gate + workflow + parallel + for_each (rejected), bad kind format, undeclared-kind cross-check, catch-all, reserved allowlist, and no-raises = no constraint.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/config/validator.py   | 150 ++++++++++++++++++++-
 tests/test_config/test_validator.py | 197 ++++++++++++++++++++++++++++
 2 files changed, 345 insertions(+), 2 deletions(-)

diff --git a/src/conductor/config/validator.py b/src/conductor/config/validator.py
index 48cdd9b..a11456a 100644
--- a/src/conductor/config/validator.py
+++ b/src/conductor/config/validator.py
@@ -9,15 +9,16 @@
 
 import re
 from pathlib import Path
-from typing import TYPE_CHECKING, NamedTuple
+from typing import TYPE_CHECKING, Any, NamedTuple
 
 import jinja2
 from jinja2 import Environment, meta, nodes
 
+from conductor.error_kinds import KIND_PATTERN, RESERVED_ON_ERROR_ALLOWLIST
 from conductor.exceptions import ConfigurationError
 
 if TYPE_CHECKING:
-    from conductor.config.schema import AgentDef, WorkflowConfig
+    from conductor.config.schema import AgentDef, WorkflowConfig  # noqa: F401
 
 
 # Shared Jinja2 environment used purely for AST parsing of template strings.
@@ -148,6 +149,10 @@ def validate_workflow_config(
         agent_errors = _validate_agent_routes(agent.name, agent.routes, all_names)
         errors.extend(agent_errors)
 
+        # Validate on_error route placement + cross-check against raises
+        on_error_errors = _validate_on_error_routes(agent)
+        errors.extend(on_error_errors)
+
         # Validate human_gate has options
         if agent.type == "human_gate":
             if not agent.options:
@@ -205,6 +210,10 @@ def validate_workflow_config(
     if config.parallel:
         parallel_errors = _validate_parallel_groups(config)
         errors.extend(parallel_errors)
+        # Phase 1: on_error routes on parallel groups are not supported
+        # (group-level error envelopes are deferred to Phase 2).
+        for pg in config.parallel:
+            errors.extend(_validate_group_routes_no_on_error("parallel group", pg.name, pg.routes))
 
     # Validate for_each groups: reject script steps as inline agents
     for for_each_group in config.for_each:
@@ -213,6 +222,12 @@ def validate_workflow_config(
                 f"For-each group '{for_each_group.name}' uses a script step as its "
                 "inline agent. Script steps cannot be used in for_each groups."
             )
+        # Phase 1: same as parallel — no group-level error envelopes yet.
+        errors.extend(
+            _validate_group_routes_no_on_error(
+                "for_each group", for_each_group.name, for_each_group.routes
+            )
+        )
 
     # Validate sub-workflow references (local paths and registry refs).
     # Skipped when workflow_path is not provided — relative paths cannot be
@@ -281,6 +296,137 @@ def _validate_agent_routes(
     return errors
 
 
+# Node types that DO emit error envelopes in Phase 1 and may therefore
+# carry ``on_error`` routes. Other types (human_gate, workflow,
+# notification) get a hard validator error if they try.
+_LEAF_TYPES_THAT_RAISE: frozenset[str | None] = frozenset({"agent", "script", None})
+
+
+def _validate_on_error_routes(agent: Any) -> list[str]:
+    """Validate ``on_error`` route shape, placement, and cross-check vs ``raises``.
+
+    The schema-level Pydantic validator already enforces the discriminator
+    shape (``bool | str | list[str]``), rejects ``False``, and forbids
+    reserved-prefix kinds in ``raises``. This function adds the
+    cross-cutting checks that need the full :class:`AgentDef` in view:
+
+    1. **Placement**: ``on_error`` routes are only legal on leaf node
+       types that actually raise envelopes in Phase 1 (``agent``,
+       ``script``, or untyped). Routes on ``human_gate`` / ``workflow``
+       /  ``notification`` agents that declare ``on_error`` get a hard
+       error so authors don't ship handlers that never fire.
+    2. **Kind shape**: any string kind in ``on_error`` must look like a
+       dotted lowercase identifier (the same ``KIND_PATTERN`` enforced on
+       ``raises``).
+    3. **Cross-check vs ``raises``**: if ``agent.raises`` is declared,
+       every concrete kind matched by an ``on_error`` route must appear
+       in ``raises`` or in :data:`RESERVED_ON_ERROR_ALLOWLIST` (the
+       runtime-synthesized kinds — ``internal.script_error``,
+       ``internal.schema_violation``, ``internal.undeclared_kind``).
+       Catch-all (``on_error: true``) is always allowed. Routes with no
+       ``on_error`` are unaffected.
+
+    Args:
+        agent: An :class:`~conductor.config.schema.AgentDef`.
+
+    Returns:
+        List of validation error messages (empty if all checks pass).
+    """
+    errors: list[str] = []
+    if not agent.routes:
+        return errors
+
+    # Collect routes with on_error set (the schema-level validator already
+    # filtered out False; None means "success route" and is skipped here).
+    error_routes = [(i, r) for i, r in enumerate(agent.routes) if r.on_error is not None]
+    if not error_routes:
+        return errors
+
+    # 1. Placement check — hard error for unsupported node types.
+    if agent.type not in _LEAF_TYPES_THAT_RAISE:
+        errors.append(
+            f"Agent '{agent.name}' has type '{agent.type}' but declares one or more "
+            f"'on_error' routes. In Phase 1, only 'agent' and 'script' nodes raise "
+            f"error envelopes. Remove the on_error routes or change the agent type."
+        )
+        # Continue with the remaining checks so a misconfigured node
+        # surfaces all of its on_error problems in one validator pass.
+
+    declared = set(agent.raises or [])
+
+    for idx, route in error_routes:
+        kinds = _collect_on_error_kinds(route.on_error)
+        for kind in kinds:
+            # 2. Kind shape — must look like a dotted identifier.
+            if not KIND_PATTERN.match(kind):
+                errors.append(
+                    f"Agent '{agent.name}' route {idx} on_error contains "
+                    f"invalid kind '{kind}'. Kinds must be dotted lowercase "
+                    f"identifiers (e.g., 'external.git.fetch_failed')."
+                )
+                continue
+            # 3. Cross-check vs raises. Only run when ``raises`` is
+            # declared — an undeclared producer accepts any kind in
+            # on_error (the discriminator is the contract).
+            if declared and kind not in declared and kind not in RESERVED_ON_ERROR_ALLOWLIST:
+                allowlist_hint = ", ".join(sorted(RESERVED_ON_ERROR_ALLOWLIST)) or "(none)"
+                errors.append(
+                    f"Agent '{agent.name}' route {idx} on_error references kind "
+                    f"'{kind}' but the agent declares raises={sorted(declared)}. "
+                    f"Add '{kind}' to the agent's raises list or remove the "
+                    f"route. Always-legal reserved kinds: {allowlist_hint}."
+                )
+
+    return errors
+
+
+def _collect_on_error_kinds(on_error: bool | str | list[str]) -> list[str]:
+    """Return the concrete kind strings inside an ``on_error`` matcher.
+
+    ``True`` (catch-all) contributes no concrete kinds and is always
+    legal at the cross-check stage. Strings and lists yield their
+    string contents.
+    """
+    if on_error is True:
+        return []
+    if isinstance(on_error, str):
+        return [on_error]
+    if isinstance(on_error, list):
+        return list(on_error)
+    return []
+
+
+def _validate_group_routes_no_on_error(group_kind: str, group_name: str, routes: list) -> list[str]:
+    """Reject ``on_error`` routes attached to a parallel or for_each group.
+
+    Phase 1 only emits envelopes from leaf agent/script nodes. Group-
+    level error envelopes (where any child failure surfaces as one
+    envelope on the group) are deferred to Phase 2. Until then, an
+    ``on_error`` route on a group is a footgun — it would never fire —
+    so we hard-error rather than silently warn.
+
+    Args:
+        group_kind: Human label for the group type, e.g. ``"parallel group"``.
+        group_name: The group's ``name`` field.
+        routes: The group's routes list.
+
+    Returns:
+        List of validation error messages.
+    """
+    errors: list[str] = []
+    if not routes:
+        return errors
+    for i, route in enumerate(routes):
+        if route.on_error is not None:
+            errors.append(
+                f"{group_kind.capitalize()} '{group_name}' route {i} declares "
+                f"'on_error', but Phase 1 does not emit envelopes from "
+                f"{group_kind}s. Remove the on_error field or wait for Phase 2 "
+                f"group-level error routing."
+            )
+    return errors
+
+
 def _validate_input_references(
     agent_name: str,
     inputs: list[str],
diff --git a/tests/test_config/test_validator.py b/tests/test_config/test_validator.py
index a0c1fac..4ba58f2 100644
--- a/tests/test_config/test_validator.py
+++ b/tests/test_config/test_validator.py
@@ -143,6 +143,203 @@ def test_valid_route_to_end(self) -> None:
         validate_workflow_config(config)
 
 
+class TestOnErrorRouteValidation:
+    """Tests for on_error route placement and cross-check against ``raises``."""
+
+    @staticmethod
+    def _wf(*agents: AgentDef, **extra: object) -> WorkflowConfig:
+        return WorkflowConfig(
+            workflow=WorkflowDef(name="t", entry_point=agents[0].name),
+            agents=list(agents),
+            **extra,  # type: ignore[arg-type]
+        )
+
+    def test_on_error_on_plain_agent_is_valid(self) -> None:
+        cfg = self._wf(
+            AgentDef(
+                name="leaf",
+                model="gpt-4",
+                prompt="x",
+                raises=["external.git.fetch_failed"],
+                routes=[
+                    RouteDef(to="handler", on_error="external.git.fetch_failed"),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(name="handler", model="gpt-4", prompt="recover", routes=[RouteDef(to="$end")]),
+        )
+        validate_workflow_config(cfg)  # should not raise
+
+    def test_on_error_on_script_is_valid(self) -> None:
+        cfg = self._wf(
+            AgentDef(
+                name="fetch",
+                type="script",
+                command="git fetch",
+                raises=["external.git.fetch_failed"],
+                routes=[
+                    RouteDef(to="recover", on_error=True),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(name="recover", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+        )
+        validate_workflow_config(cfg)
+
+    def test_on_error_on_human_gate_is_hard_error(self) -> None:
+        cfg = self._wf(
+            AgentDef(
+                name="gate",
+                type="human_gate",
+                prompt="pick one",
+                options=[GateOption(label="ok", value="ok", route="next")],
+                routes=[RouteDef(to="next", on_error=True)],
+            ),
+            AgentDef(name="next", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+        )
+        with pytest.raises(ConfigurationError, match="human_gate.*on_error"):
+            validate_workflow_config(cfg)
+
+    def test_on_error_on_workflow_node_is_hard_error(self) -> None:
+        cfg = self._wf(
+            AgentDef(
+                name="sub",
+                type="workflow",
+                workflow="./sub.yaml",
+                routes=[
+                    RouteDef(to="$end"),
+                    RouteDef(to="rescue", on_error="external.x"),
+                ],
+            ),
+            AgentDef(name="rescue", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+        )
+        with pytest.raises(ConfigurationError, match="workflow.*on_error"):
+            validate_workflow_config(cfg)
+
+    def test_on_error_kind_must_be_dotted(self) -> None:
+        """Schema-level validator rejects malformed kind strings at construction.
+
+        Defensive: the cross-field validator repeats this check so configs
+        built via ``model_construct`` (bypassing Pydantic validation) still
+        get caught.
+        """
+        with pytest.raises(Exception, match="dotted lowercase identifier"):
+            RouteDef(to="rescue", on_error="not_dotted")
+
+    def test_undeclared_kind_in_on_error_is_hard_error(self) -> None:
+        """If raises is set, on_error kinds must be declared (or reserved/catch-all)."""
+        cfg = self._wf(
+            AgentDef(
+                name="leaf",
+                model="gpt-4",
+                prompt="x",
+                raises=["external.git.fetch_failed"],
+                routes=[
+                    RouteDef(to="rescue", on_error="external.api.timeout"),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(name="rescue", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+        )
+        with pytest.raises(ConfigurationError, match="external.api.timeout"):
+            validate_workflow_config(cfg)
+
+    def test_catch_all_on_error_always_legal_even_with_raises(self) -> None:
+        cfg = self._wf(
+            AgentDef(
+                name="leaf",
+                model="gpt-4",
+                prompt="x",
+                raises=["external.git.fetch_failed"],
+                routes=[
+                    RouteDef(to="rescue", on_error=True),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(name="rescue", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+        )
+        validate_workflow_config(cfg)
+
+    def test_reserved_allowlist_kind_is_legal_in_on_error(self) -> None:
+        """``internal.schema_violation`` may be matched even though raises forbids it."""
+        cfg = self._wf(
+            AgentDef(
+                name="leaf",
+                model="gpt-4",
+                prompt="x",
+                raises=["external.git.fetch_failed"],
+                routes=[
+                    RouteDef(to="rescue", on_error="internal.schema_violation"),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(name="rescue", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+        )
+        validate_workflow_config(cfg)
+
+    def test_undeclared_raises_means_anything_goes(self) -> None:
+        """No raises declared = on_error accepts any well-formed kind."""
+        cfg = self._wf(
+            AgentDef(
+                name="leaf",
+                model="gpt-4",
+                prompt="x",
+                routes=[
+                    RouteDef(to="rescue", on_error=["a.b", "c.d"]),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(name="rescue", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+        )
+        validate_workflow_config(cfg)
+
+    def test_on_error_on_parallel_group_is_hard_error(self) -> None:
+        cfg = WorkflowConfig(
+            workflow=WorkflowDef(name="t", entry_point="group"),
+            agents=[
+                AgentDef(name="a", model="gpt-4", prompt="x"),
+                AgentDef(name="b", model="gpt-4", prompt="x"),
+                AgentDef(name="rescue", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")]),
+            ],
+            parallel=[
+                ParallelGroup(
+                    name="group",
+                    agents=["a", "b"],
+                    routes=[
+                        RouteDef(to="$end"),
+                        RouteDef(to="rescue", on_error=True),
+                    ],
+                )
+            ],
+        )
+        with pytest.raises(ConfigurationError, match="parallel group.*on_error"):
+            validate_workflow_config(cfg)
+
+    def test_on_error_on_for_each_group_is_hard_error(self) -> None:
+        cfg = WorkflowConfig(
+            workflow=WorkflowDef(
+                name="t",
+                entry_point="loop",
+                input={"items": InputDef(type="array")},
+            ),
+            agents=[
+                AgentDef(name="rescue", model="gpt-4", prompt="x", routes=[RouteDef(to="$end")])
+            ],
+            for_each=[
+                ForEachDef(
+                    name="loop",
+                    type="for_each",
+                    source="workflow.input.items",
+                    **{"as": "item"},  # 'as' is a reserved keyword
+                    agent=AgentDef(name="worker", model="gpt-4", prompt="{{ item }}"),
+                    routes=[RouteDef(to="rescue", on_error="x.y")],
+                )
+            ],
+        )
+        with pytest.raises(ConfigurationError, match="for_each group.*on_error"):
+            validate_workflow_config(cfg)
+
+
 class TestHumanGateValidation:
     """Tests for human gate validation."""
 

From f3b4202094363acfe1a888c2d429ddd766481767 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 15:27:31 -0700
Subject: [PATCH 06/14] feat(agent-exec): surface conductor_error envelopes and
 schema violations

AgentOutput grows an optional `error: dict | None` field that carries an ErrorEnvelope when the agent failed.

AgentExecutor.execute() now, after the provider call returns:

1. If the response is a dict with `conductor_error: true`, coerces the envelope (or synthesizes `internal.schema_violation` when the envelope itself is malformed) and attaches it to output.error WITHOUT running validate_output (the declared output schema doesn't apply to error envelopes).

2. Otherwise runs validate_output, and on ValidationError synthesizes an `internal.schema_violation` envelope on output.error instead of raising.

Partial outputs (from mid-agent interrupts) bypass both checks.

The error module is imported lazily inside execute() to avoid a circular import via conductor.engine.__init__.

Updated two existing tests to assert the new envelope contract instead of expecting ValidationError. Added three new tests covering well-formed envelopes, malformed envelopes, and the partial-output bypass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/executor/agent.py   |  74 +++++++++++++++++++++-
 src/conductor/providers/base.py   |  10 +++
 tests/test_executor/test_agent.py | 101 ++++++++++++++++++++++++++++--
 3 files changed, 176 insertions(+), 9 deletions(-)

diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py
index a0cc8cc..b5de3f2 100644
--- a/src/conductor/executor/agent.py
+++ b/src/conductor/executor/agent.py
@@ -229,9 +229,77 @@ async def execute(
                     model=output.model,
                 )
 
-        # Validate output against schema (skip for partial output from interrupts)
-        if agent.output and not output.partial:
-            validate_output(output.content, agent.output)
+        # Validate output against schema (skip for partial output from interrupts).
+        #
+        # Before validating, check for the ``conductor_error`` discriminator —
+        # if the agent's structured output says ``conductor_error: true``, the
+        # node has raised a typed error rather than produced a normal result.
+        # We coerce + attach the envelope to ``output.error`` and SKIP schema
+        # validation: the agent's declared output schema doesn't apply to
+        # error envelopes, and forcing it would mask the real failure with a
+        # confusing schema-violation message.
+        #
+        # A schema-violation on a non-error response is itself surfaced as a
+        # synthesized ``internal.schema_violation`` envelope so the engine can
+        # route on it like any other failure.
+        if output.partial:
+            return output
+
+        # Lazy import: ``conductor.engine`` package __init__ pulls in
+        # workflow.py, which imports AgentExecutor — going through the
+        # package here would deadlock. The errors module is a leaf.
+        from conductor.engine.errors import (  # noqa: PLC0415
+            EnvelopeValidationError,
+            coerce_envelope,
+            make_schema_violation,
+        )
+
+        if isinstance(output.content, dict) and output.content.get("conductor_error") is True:
+            try:
+                envelope = coerce_envelope(output.content)
+            except EnvelopeValidationError as exc:
+                # The agent claimed a failure but the envelope is malformed
+                # (missing kind, bad type, etc.). Surface as a schema violation
+                # of the envelope shape itself so the engine still halts cleanly.
+                envelope = make_schema_violation(
+                    node_name=agent.name,
+                    source="agent",
+                    original_message=f"Malformed conductor_error envelope: {exc}",
+                )
+            return AgentOutput(
+                content=output.content,
+                raw_response=output.raw_response,
+                tokens_used=output.tokens_used,
+                input_tokens=output.input_tokens,
+                output_tokens=output.output_tokens,
+                cache_read_tokens=output.cache_read_tokens,
+                cache_write_tokens=output.cache_write_tokens,
+                model=output.model,
+                partial=output.partial,
+                error=dict(envelope),
+            )
+
+        if agent.output:
+            try:
+                validate_output(output.content, agent.output)
+            except ValidationError as exc:
+                envelope = make_schema_violation(
+                    node_name=agent.name,
+                    source="agent",
+                    original_message=str(exc),
+                )
+                return AgentOutput(
+                    content=output.content,
+                    raw_response=output.raw_response,
+                    tokens_used=output.tokens_used,
+                    input_tokens=output.input_tokens,
+                    output_tokens=output.output_tokens,
+                    cache_read_tokens=output.cache_read_tokens,
+                    cache_write_tokens=output.cache_write_tokens,
+                    model=output.model,
+                    partial=output.partial,
+                    error=dict(envelope),
+                )
 
         return output
 
diff --git a/src/conductor/providers/base.py b/src/conductor/providers/base.py
index adfba7f..d1c38a4 100644
--- a/src/conductor/providers/base.py
+++ b/src/conductor/providers/base.py
@@ -106,6 +106,16 @@ class AgentOutput:
     partial: bool = False
     """Whether this output is partial (from a mid-agent interrupt)."""
 
+    error: dict[str, Any] | None = None
+    """Optional structured error envelope (see :mod:`conductor.engine.errors`).
+
+    When set, the agent produced a ``{conductor_error: true, ...}``
+    discriminator (or the engine synthesized one — e.g. for an output
+    schema violation). The engine inspects this field at the agent call
+    site to route on the failure rather than treating the output as
+    success. ``None`` means the agent succeeded.
+    """
+
 
 class AgentProvider(ABC):
     """Abstract base class for SDK providers.
diff --git a/tests/test_executor/test_agent.py b/tests/test_executor/test_agent.py
index 14260be..560054e 100644
--- a/tests/test_executor/test_agent.py
+++ b/tests/test_executor/test_agent.py
@@ -90,7 +90,12 @@ def mock_handler(agent, prompt, context):
 
     @pytest.mark.asyncio
     async def test_execute_validates_output(self, simple_agent: AgentDef) -> None:
-        """Test that execute validates output against schema."""
+        """Schema-violation surfaces as an ``internal.schema_violation`` envelope.
+
+        Previously this raised ``ValidationError`` directly. With error
+        routing, validation failures attach an envelope to
+        ``output.error`` so the engine can route on them.
+        """
 
         def mock_handler(agent, prompt, context):
             return {"answer": 42}  # Wrong type - should be string
@@ -100,8 +105,10 @@ def mock_handler(agent, prompt, context):
 
         context = {"workflow": {"input": {"question": "test"}}}
 
-        with pytest.raises(ValidationError, match="wrong type"):
-            await executor.execute(simple_agent, context)
+        output = await executor.execute(simple_agent, context)
+        assert output.error is not None
+        assert output.error["kind"] == "internal.schema_violation"
+        assert "wrong type" in output.error["message"]
 
     @pytest.mark.asyncio
     async def test_execute_without_schema_skips_validation(
@@ -527,7 +534,7 @@ class TestAgentExecutorOutputHandling:
 
     @pytest.mark.asyncio
     async def test_missing_output_field_raises(self) -> None:
-        """Test that missing required output field raises ValidationError."""
+        """Missing required output field surfaces as a schema-violation envelope."""
         agent = AgentDef(
             name="test",
             model="gpt-4",
@@ -544,8 +551,10 @@ def mock_handler(agent, prompt, context):
         provider = CopilotProvider(mock_handler=mock_handler)
         executor = AgentExecutor(provider)
 
-        with pytest.raises(ValidationError, match="Missing required output field"):
-            await executor.execute(agent, {})
+        output = await executor.execute(agent, {})
+        assert output.error is not None
+        assert output.error["kind"] == "internal.schema_violation"
+        assert "Missing required output field" in output.error["message"]
 
     @pytest.mark.asyncio
     async def test_output_with_multiple_types(self) -> None:
@@ -582,3 +591,83 @@ def mock_handler(agent, prompt, context):
         assert output.content["active"] is True
         assert output.content["items"] == [1, 2, 3]
         assert output.content["meta"] == {"key": "value"}
+
+
+class TestAgentExecutorConductorError:
+    """Tests for the ``conductor_error`` discriminator path."""
+
+    @pytest.mark.asyncio
+    async def test_well_formed_conductor_error_becomes_envelope(self) -> None:
+        """A ``conductor_error: true`` response attaches an envelope and skips schema."""
+        agent = AgentDef(
+            name="leaf",
+            model="gpt-4",
+            prompt="x",
+            output={"answer": OutputField(type="string")},
+        )
+
+        def mock_handler(agent, prompt, context):
+            return {
+                "conductor_error": True,
+                "kind": "external.git.fetch_failed",
+                "message": "remote rejected",
+                "details": {"remote": "origin"},
+            }
+
+        provider = CopilotProvider(mock_handler=mock_handler)
+        executor = AgentExecutor(provider)
+
+        output = await executor.execute(agent, {})
+        assert output.error is not None
+        assert output.error["kind"] == "external.git.fetch_failed"
+        assert output.error["message"] == "remote rejected"
+        assert output.error["details"] == {"remote": "origin"}
+        # Original content is preserved verbatim (engine may want it).
+        assert output.content["conductor_error"] is True
+
+    @pytest.mark.asyncio
+    async def test_malformed_conductor_error_becomes_schema_violation(self) -> None:
+        """A ``conductor_error: true`` with no ``kind`` is reported as a schema violation."""
+        agent = AgentDef(name="leaf", model="gpt-4", prompt="x")
+
+        def mock_handler(agent, prompt, context):
+            return {"conductor_error": True, "message": "missing kind"}
+
+        provider = CopilotProvider(mock_handler=mock_handler)
+        executor = AgentExecutor(provider)
+
+        output = await executor.execute(agent, {})
+        assert output.error is not None
+        assert output.error["kind"] == "internal.schema_violation"
+        assert "Malformed conductor_error envelope" in output.error["message"]
+
+    @pytest.mark.asyncio
+    async def test_partial_output_skips_error_checks(self) -> None:
+        """Partial output (mid-interrupt) bypasses both discriminator and schema."""
+        agent = AgentDef(
+            name="leaf",
+            model="gpt-4",
+            prompt="x",
+            output={"answer": OutputField(type="string")},
+        )
+
+        def mock_handler(agent, prompt, context):
+            return {"conductor_error": True, "kind": "external.x", "message": "y"}
+
+        provider = CopilotProvider(mock_handler=mock_handler)
+        executor = AgentExecutor(provider)
+
+        # Force partial by monkey-patching the provider's output post-call.
+        original_execute = provider.execute
+
+        async def patched_execute(*args, **kwargs):
+            out = await original_execute(*args, **kwargs)
+            out.partial = True
+            return out
+
+        provider.execute = patched_execute  # type: ignore[method-assign]
+
+        output = await executor.execute(agent, {})
+        # Partial path returns directly; no envelope coercion.
+        assert output.error is None
+        assert output.partial is True

From ea0e960655235850c3450e0a7b2b4554895b9d87 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 15:29:52 -0700
Subject: [PATCH 07/14] feat(script-exec): write CONDUCTOR_ERROR_OUT, read
 envelopes back, synthesize internal.script_error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ScriptOutput grows an optional `error: dict | None` field.

Each script run now allocates a tempfile via tempfile.mkstemp(), closes the fd immediately, and exposes the path in the env as CONDUCTOR_ERROR_OUT — set AFTER the agent.env merge so users cannot accidentally redirect or override it.

After process.communicate() the executor reads the file:

- empty / missing → no envelope

- valid JSON envelope → coerce_envelope, attach to output.error

- valid JSON but malformed envelope → internal.schema_violation

- invalid JSON → internal.schema_violation

If no envelope was written AND exit_code != 0 AND the node has opted into error routing (raises declared OR any on_error route present), the executor synthesizes an `internal.script_error` envelope. Legacy workflows that route on exit_code (no opt-in) keep their existing behavior.

Temp file is always removed in finally — even on timeout/command-not-found.

Added 7 tests covering: no envelope on success, well-formed envelope surfaces, user env cannot override, synthesized internal.script_error on opt-in, legacy non-zero with no opt-in keeps error=None, malformed envelope downgrades to schema_violation, temp file cleanup.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/executor/script.py   | 204 ++++++++++++++++++++++-------
 tests/test_executor/test_script.py | 150 +++++++++++++++++++++
 2 files changed, 306 insertions(+), 48 deletions(-)

diff --git a/src/conductor/executor/script.py b/src/conductor/executor/script.py
index a7fd0b6..c58fb56 100644
--- a/src/conductor/executor/script.py
+++ b/src/conductor/executor/script.py
@@ -7,8 +7,11 @@
 from __future__ import annotations
 
 import asyncio
+import contextlib
+import json
 import os
-from dataclasses import dataclass
+import tempfile
+from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any
 
 from conductor.exceptions import ExecutionError
@@ -38,11 +41,37 @@ class ScriptOutput:
         stdout: Captured standard output as text.
         stderr: Captured standard error as text.
         exit_code: Process exit code.
+        error: Optional structured error envelope.
+
+            Populated when the script wrote a well-formed envelope to the
+            file referenced by ``$CONDUCTOR_ERROR_OUT``, or when the engine
+            synthesized an ``internal.script_error`` envelope because the
+            script exited non-zero, opted into error routing (via ``raises``
+            or any ``on_error`` route), and produced no envelope of its own.
+            ``None`` for legacy success/exit-code routing.
     """
 
     stdout: str
     stderr: str
     exit_code: int
+    error: dict[str, Any] | None = field(default=None)
+
+
+def _node_uses_error_routing(agent: AgentDef) -> bool:
+    """True if the script node opts into error envelopes via ``raises``/``on_error``.
+
+    Backward compatibility: legacy workflows route on ``exit_code`` and
+    must NOT see a synthesized ``internal.script_error`` envelope when
+    they haven't opted in. Any presence of ``raises`` or an ``on_error``
+    field on any route counts as opting in.
+    """
+    if agent.raises:
+        return True
+    if agent.routes:
+        for route in agent.routes:
+            if route.on_error is not None:
+                return True
+    return False
 
 
 class ScriptExecutor:
@@ -98,55 +127,134 @@ async def execute(
         base_env = {**os.environ, "PYTHONUTF8": "1"}
         env = {**base_env, **agent.env} if agent.env else base_env
 
+        # Error envelope contract: allocate a temp file the script can write
+        # a typed error envelope to, and expose its path via the
+        # CONDUCTOR_ERROR_OUT env var. Set this AFTER agent.env merge so the
+        # user cannot accidentally (or deliberately) override it. Caller is
+        # responsible for reading + deleting in the finally block.
+        fd, error_path = tempfile.mkstemp(prefix="conductor-err-", suffix=".json")
+        os.close(fd)
+        env["CONDUCTOR_ERROR_OUT"] = error_path
+
         _verbose_log(f"  Script: {rendered_command} {' '.join(rendered_args)}")
 
-        # Create subprocess
+        envelope: dict[str, Any] | None = None
         try:
-            process = await asyncio.create_subprocess_exec(
-                rendered_command,
-                *rendered_args,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-                cwd=rendered_working_dir,
-                env=env,
-            )
-        except FileNotFoundError as exc:
-            raise ExecutionError(
-                f"Script '{agent.name}': command not found: '{rendered_command}'",
-                agent_name=agent.name,
-                suggestion=f"Ensure '{rendered_command}' is installed and in PATH",
-            ) from exc
-        except OSError as e:
-            raise ExecutionError(
-                f"Script '{agent.name}' failed to start: {e}",
-                agent_name=agent.name,
-            ) from e
-
-        # Wait with optional per-script timeout
-        timeout = agent.timeout
-        try:
-            stdout_bytes, stderr_bytes = await asyncio.wait_for(
-                process.communicate(), timeout=timeout
+            # Create subprocess
+            try:
+                process = await asyncio.create_subprocess_exec(
+                    rendered_command,
+                    *rendered_args,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
+                    cwd=rendered_working_dir,
+                    env=env,
+                )
+            except FileNotFoundError as exc:
+                raise ExecutionError(
+                    f"Script '{agent.name}': command not found: '{rendered_command}'",
+                    agent_name=agent.name,
+                    suggestion=f"Ensure '{rendered_command}' is installed and in PATH",
+                ) from exc
+            except OSError as e:
+                raise ExecutionError(
+                    f"Script '{agent.name}' failed to start: {e}",
+                    agent_name=agent.name,
+                ) from e
+
+            # Wait with optional per-script timeout
+            timeout = agent.timeout
+            try:
+                stdout_bytes, stderr_bytes = await asyncio.wait_for(
+                    process.communicate(), timeout=timeout
+                )
+            except TimeoutError:
+                process.kill()
+                await process.wait()
+                raise ExecutionError(
+                    f"Script '{agent.name}' timed out after {timeout}s",
+                    agent_name=agent.name,
+                ) from None
+
+            stdout_text = stdout_bytes.decode("utf-8", errors="replace")
+            stderr_text = stderr_bytes.decode("utf-8", errors="replace")
+
+            if stderr_text:
+                _verbose_log(f"  Script stderr: {stderr_text.strip()}")
+
+            # IMPORTANT: process.returncode is guaranteed non-None after communicate().
+            # Do NOT use `process.returncode or 0` — 0 is falsy in Python.
+            assert process.returncode is not None
+            exit_code = process.returncode
+
+            envelope = _read_error_envelope(error_path, agent.name)
+            if envelope is None and exit_code != 0 and _node_uses_error_routing(agent):
+                envelope = _synthesize_script_error(
+                    exit_code=exit_code,
+                    stderr_tail=stderr_text,
+                    command=rendered_command,
+                )
+
+            return ScriptOutput(
+                stdout=stdout_text,
+                stderr=stderr_text,
+                exit_code=exit_code,
+                error=envelope,
             )
-        except TimeoutError:
-            process.kill()
-            await process.wait()
-            raise ExecutionError(
-                f"Script '{agent.name}' timed out after {timeout}s",
-                agent_name=agent.name,
-            ) from None
-
-        stdout_text = stdout_bytes.decode("utf-8", errors="replace")
-        stderr_text = stderr_bytes.decode("utf-8", errors="replace")
-
-        if stderr_text:
-            _verbose_log(f"  Script stderr: {stderr_text.strip()}")
-
-        # IMPORTANT: process.returncode is guaranteed non-None after communicate().
-        # Do NOT use `process.returncode or 0` — 0 is falsy in Python.
-        assert process.returncode is not None
-        return ScriptOutput(
-            stdout=stdout_text,
-            stderr=stderr_text,
-            exit_code=process.returncode,
+        finally:
+            # Always remove the temp file — even on TimeoutError or
+            # FileNotFoundError above. The script's contract is to write
+            # once; lingering files would leak across runs.
+            with contextlib.suppress(OSError):
+                os.unlink(error_path)
+
+
+def _read_error_envelope(path: str, node_name: str) -> dict[str, Any] | None:
+    """Read and coerce the envelope file the script may have written.
+
+    Returns ``None`` if the file is missing, empty, unreadable, or
+    contains a malformed envelope. A malformed envelope is downgraded to
+    an ``internal.schema_violation`` envelope so the engine still sees
+    a typed failure (rather than silently dropping the script's signal).
+    """
+    # Lazy import: package __init__ pulls in workflow → circular.
+    from conductor.engine.errors import (  # noqa: PLC0415
+        EnvelopeValidationError,
+        coerce_envelope,
+        make_schema_violation,
+    )
+
+    try:
+        with open(path, encoding="utf-8") as f:
+            raw = f.read()
+    except OSError:
+        return None
+    if not raw.strip():
+        return None
+    try:
+        parsed = json.loads(raw)
+    except json.JSONDecodeError as exc:
+        envelope = make_schema_violation(
+            node_name=node_name,
+            source="script",
+            original_message=f"CONDUCTOR_ERROR_OUT file is not valid JSON: {exc}",
+        )
+        return dict(envelope)
+    try:
+        return dict(coerce_envelope(parsed))
+    except EnvelopeValidationError as exc:
+        envelope = make_schema_violation(
+            node_name=node_name,
+            source="script",
+            original_message=f"Malformed envelope in CONDUCTOR_ERROR_OUT: {exc}",
         )
+        return dict(envelope)
+
+
+def _synthesize_script_error(*, exit_code: int, stderr_tail: str, command: str) -> dict[str, Any]:
+    """Wrap the make_script_error helper with the same lazy-import dance."""
+    from conductor.engine.errors import make_script_error  # noqa: PLC0415
+
+    # Truncate stderr so we don't drag megabytes of compiler noise into the envelope.
+    tail = stderr_tail[-2000:] if len(stderr_tail) > 2000 else stderr_tail
+    return dict(make_script_error(exit_code=exit_code, stderr_tail=tail, command=command))
diff --git a/tests/test_executor/test_script.py b/tests/test_executor/test_script.py
index 9473e00..44ec088 100644
--- a/tests/test_executor/test_script.py
+++ b/tests/test_executor/test_script.py
@@ -281,3 +281,153 @@ async def test_specific_exit_code(self, executor: ScriptExecutor) -> None:
         )
         output = await executor.execute(agent, {})
         assert output.exit_code == 42
+
+
+class TestScriptErrorEnvelope:
+    """Tests for the ``CONDUCTOR_ERROR_OUT`` envelope contract."""
+
+    @pytest.mark.asyncio
+    async def test_no_envelope_when_file_empty(self, executor: ScriptExecutor) -> None:
+        """A script that exits 0 and writes nothing produces no envelope."""
+        agent = AgentDef(
+            name="quiet",
+            type="script",
+            command=sys.executable,
+            args=["-c", "print('ok')"],
+            raises=["external.x"],  # opted in, but didn't raise anything
+        )
+        output = await executor.execute(agent, {})
+        assert output.exit_code == 0
+        assert output.error is None
+
+    @pytest.mark.asyncio
+    async def test_well_formed_envelope_is_surfaced(self, executor: ScriptExecutor) -> None:
+        """A script writes a JSON envelope to ``$CONDUCTOR_ERROR_OUT``."""
+        script = (
+            "import json, os\n"
+            "with open(os.environ['CONDUCTOR_ERROR_OUT'], 'w', encoding='utf-8') as f:\n"
+            "    json.dump({"
+            "'conductor_error': True, "
+            "'kind': 'external.git.fetch_failed', "
+            "'message': 'remote unreachable', "
+            "'details': {'remote': 'origin'}"
+            "}, f)\n"
+        )
+        agent = AgentDef(
+            name="fetch",
+            type="script",
+            command=sys.executable,
+            args=["-c", script],
+            raises=["external.git.fetch_failed"],
+        )
+        output = await executor.execute(agent, {})
+        assert output.error is not None
+        assert output.error["kind"] == "external.git.fetch_failed"
+        assert output.error["message"] == "remote unreachable"
+        assert output.error["details"] == {"remote": "origin"}
+
+    @pytest.mark.asyncio
+    async def test_user_env_cannot_override_envelope_path(self, executor: ScriptExecutor) -> None:
+        """``CONDUCTOR_ERROR_OUT`` in ``agent.env`` is overridden by the executor."""
+        # The script reads $CONDUCTOR_ERROR_OUT and writes there. If the
+        # executor's value wins, the test passes (envelope shows up). If the
+        # user override won, the file would be created in a path the executor
+        # doesn't know about, so output.error stays None.
+        script = (
+            "import json, os\n"
+            "with open(os.environ['CONDUCTOR_ERROR_OUT'], 'w', encoding='utf-8') as f:\n"
+            "    json.dump({"
+            "'conductor_error': True, "
+            "'kind': 'external.x', "
+            "'message': 'm'"
+            "}, f)\n"
+        )
+        agent = AgentDef(
+            name="hijack",
+            type="script",
+            command=sys.executable,
+            args=["-c", script],
+            env={"CONDUCTOR_ERROR_OUT": "/this/path/does/not/exist/should/not/win"},
+            raises=["external.x"],
+        )
+        output = await executor.execute(agent, {})
+        assert output.error is not None
+        assert output.error["kind"] == "external.x"
+
+    @pytest.mark.asyncio
+    async def test_synthesize_script_error_when_opted_in(self, executor: ScriptExecutor) -> None:
+        """Non-zero exit + no envelope + opt-in → synthesized ``internal.script_error``."""
+        agent = AgentDef(
+            name="boom",
+            type="script",
+            command=sys.executable,
+            args=["-c", "import sys; sys.stderr.write('kaboom\\n'); sys.exit(7)"],
+            raises=["external.x"],
+        )
+        output = await executor.execute(agent, {})
+        assert output.exit_code == 7
+        assert output.error is not None
+        assert output.error["kind"] == "internal.script_error"
+        assert output.error["details"]["exit_code"] == 7
+
+    @pytest.mark.asyncio
+    async def test_legacy_nonzero_exit_without_optin_has_no_envelope(
+        self, executor: ScriptExecutor
+    ) -> None:
+        """Legacy workflows that route on ``exit_code`` are not surprised by an envelope."""
+        agent = AgentDef(
+            name="legacy",
+            type="script",
+            command=sys.executable,
+            args=["-c", "import sys; sys.exit(3)"],
+        )
+        output = await executor.execute(agent, {})
+        assert output.exit_code == 3
+        assert output.error is None
+
+    @pytest.mark.asyncio
+    async def test_malformed_envelope_downgraded_to_schema_violation(
+        self, executor: ScriptExecutor
+    ) -> None:
+        """An envelope missing ``kind`` becomes ``internal.schema_violation``."""
+        script = (
+            "import json, os\n"
+            "with open(os.environ['CONDUCTOR_ERROR_OUT'], 'w', encoding='utf-8') as f:\n"
+            "    json.dump({'conductor_error': True, 'message': 'no kind here'}, f)\n"
+        )
+        agent = AgentDef(
+            name="bad",
+            type="script",
+            command=sys.executable,
+            args=["-c", script],
+            raises=["external.x"],
+        )
+        output = await executor.execute(agent, {})
+        assert output.error is not None
+        assert output.error["kind"] == "internal.schema_violation"
+
+    @pytest.mark.asyncio
+    async def test_envelope_temp_file_is_cleaned_up(self, executor: ScriptExecutor) -> None:
+        """The temp file is removed after the script runs (success path)."""
+        # Capture the path the script saw and check it doesn't exist after.
+        capture_path = os.path.join(tempfile.gettempdir(), "conductor-test-capture.txt")
+        script = (
+            "import os\n"
+            f"with open(r'{capture_path}', 'w') as f:\n"
+            "    f.write(os.environ['CONDUCTOR_ERROR_OUT'])\n"
+        )
+        try:
+            agent = AgentDef(
+                name="capture",
+                type="script",
+                command=sys.executable,
+                args=["-c", script],
+            )
+            await executor.execute(agent, {})
+            with open(capture_path) as f:
+                error_path = f.read().strip()
+            assert error_path  # we got a path
+            assert not os.path.exists(error_path), "temp envelope file should be removed"
+        finally:
+            if os.path.exists(capture_path):
+                os.unlink(capture_path)

From 82b5fed3e46fc92cd0532641eb32c1fd1692ddf2 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 16:02:58 -0700
Subject: [PATCH 08/14] feat(engine-wire): leaf-error path, normalization,
 sub-workflow boundary, group failure carry envelope
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wires the on_error contract through the workflow engine end-to-end:

- _evaluate_routes() now accepts an optional error envelope; empty routes plus an unhandled envelope raises UnhandledNodeError instead of silently routing to \.

- New _normalize_envelope_for_node() applies undeclared-kind wrapping (raises declared + kind not in raises + not in allowlist → internal.undeclared_kind with original kind preserved under details.original_kind).

- New _handle_leaf_error() centralizes the leaf path: normalize, store_error, evaluate error routes, raise UnhandledWorkflowError with a single-leaf frame trail on no match.

- Agent call site (~2583) and script call site (~2359) both branch on output.error BEFORE storage and BEFORE schema validation. Script's success path runs full output-schema validation as before; error path skips it.

- Sub-workflow call sites catch UnhandledWorkflowError from child engines and re-raise as ExecutionError. Phase 1 invariant: envelopes do not propagate across the sub-workflow boundary.

- ParallelAgentError and ForEachError grow an optional envelope field. The parallel/for_each child execution helpers detect output.error, normalize it, raise an ExecutionError tagged with ._envelope, and the existing failure_mode machinery records it. Downstream group consumers can inspect the typed envelope.

- Tests: 6 new tests in tests/test_engine/test_error_routing.py covering agent envelope routing, unhandled-envelope halt, undeclared-kind normalization (with the rescue agent reading the original kind from context), success-path regression, script envelope routing, and legacy exit_code routing regression. Full suite (2887 tests) green; no regressions.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/engine/workflow.py        | 245 +++++++++++++++++++++-
 tests/test_engine/test_error_routing.py | 267 ++++++++++++++++++++++++
 2 files changed, 506 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_engine/test_error_routing.py

diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py
index eccc740..8a1a58d 100644
--- a/src/conductor/engine/workflow.py
+++ b/src/conductor/engine/workflow.py
@@ -20,10 +20,12 @@
 
 from conductor.engine.checkpoint import CheckpointManager
 from conductor.engine.context import WorkflowContext
+from conductor.engine.errors import wrap_undeclared_kind
 from conductor.engine.limits import LimitEnforcer
 from conductor.engine.pricing import ModelPricing
 from conductor.engine.router import Router, RouteResult
 from conductor.engine.usage import UsageTracker, WorkflowUsage
+from conductor.error_kinds import RESERVED_ON_ERROR_ALLOWLIST
 from conductor.events import WorkflowEvent, WorkflowEventEmitter
 from conductor.exceptions import (
     AgentTimeoutError,
@@ -31,6 +33,8 @@
     ExecutionError,
     InterruptError,
     MaxIterationsError,
+    UnhandledNodeError,
+    UnhandledWorkflowError,
     ValidationError,
 )
 from conductor.exceptions import (
@@ -102,6 +106,14 @@ class ParallelAgentError:
     exception_type: str
     message: str
     suggestion: str | None = None
+    envelope: dict[str, Any] | None = None
+    """Typed error envelope when the child raised via the on_error contract.
+
+    Populated when a child agent/script returned an
+    :class:`~conductor.engine.errors.ErrorEnvelope` instead of throwing.
+    Downstream consumers can inspect this to recover the typed kind/details.
+    ``None`` for failures that came through ordinary exception paths.
+    """
 
 
 @dataclass
@@ -147,6 +159,8 @@ class ForEachError:
     exception_type: str
     message: str
     suggestion: str | None = None
+    envelope: dict[str, Any] | None = None
+    """Typed error envelope when the iteration raised via the on_error contract."""
 
 
 @dataclass
@@ -1143,7 +1157,19 @@ async def _execute_subworkflow(
             instructions_preamble=child_preamble,
         )
 
-        return await child_engine.run(sub_inputs)
+        try:
+            return await child_engine.run(sub_inputs)
+        except UnhandledWorkflowError as exc:
+            # Phase 1 invariant: error envelopes do NOT propagate across
+            # sub-workflow boundaries. The child's halt is surfaced to the
+            # parent as a generic ExecutionError so the parent's success
+            # path treats it like any other sub-workflow failure. Phase 2
+            # will introduce envelope propagation with parent frames.
+            raise ExecutionError(
+                f"sub-workflow '{agent.name}' halted on unhandled error envelope "
+                f"({exc.envelope.get('kind')}): {exc.envelope.get('message')}",
+                agent_name=agent.name,
+            ) from exc
 
     async def _execute_subworkflow_with_inputs(
         self,
@@ -1255,7 +1281,14 @@ async def _execute_subworkflow_with_inputs(
 
         child_engine = WorkflowEngine(**child_engine_kwargs)
 
-        output = await child_engine.run(sub_inputs)
+        try:
+            output = await child_engine.run(sub_inputs)
+        except UnhandledWorkflowError as exc:
+            raise ExecutionError(
+                f"sub-workflow '{agent.name}' halted on unhandled error envelope "
+                f"({exc.envelope.get('kind')}): {exc.envelope.get('message')}",
+                agent_name=agent.name,
+            ) from exc
         usage = child_engine.usage_tracker.get_summary()
         return output, usage
 
@@ -2313,6 +2346,53 @@ async def _execute_loop(self, current_agent_name: str) -> dict[str, Any]:
                                     )
                                 output_content.update(parsed_json)
 
+                            # Error-bucket: script wrote a CONDUCTOR_ERROR_OUT
+                            # envelope, or the executor synthesized
+                            # internal.script_error. Skip declared-output schema
+                            # validation (the script's "output" contract doesn't
+                            # apply when it raised) and route via the error path.
+                            if script_output.error is not None:
+                                self._emit(
+                                    "script_completed",
+                                    {
+                                        "agent_name": agent.name,
+                                        "elapsed": _script_elapsed,
+                                        "stdout": script_output.stdout,
+                                        "stderr": script_output.stderr,
+                                        "exit_code": script_output.exit_code,
+                                    },
+                                )
+                                self.limits.record_execution(agent.name)
+                                self.limits.check_timeout()
+                                route_result = self._handle_leaf_error(
+                                    agent, script_output.error, output_content
+                                )
+                                self._emit(
+                                    "route_taken",
+                                    {
+                                        "from_agent": agent.name,
+                                        "to_agent": route_result.target,
+                                    },
+                                )
+                                if route_result.target == "$end":
+                                    result = self._build_final_output(route_result.output_transform)
+                                    self._emit(
+                                        "workflow_completed",
+                                        {
+                                            "elapsed": _time.time() - _workflow_start,
+                                            "output": result,
+                                        },
+                                    )
+                                    self._execute_hook("on_complete", result=result)
+                                    return result
+                                current_agent_name = route_result.target
+                                interrupt_result = await self._check_interrupt(current_agent_name)
+                                if interrupt_result is not None:
+                                    current_agent_name = await self._handle_interrupt_result(
+                                        interrupt_result, current_agent_name
+                                    )
+                                continue
+
                             # Validate against declared output schema (issue #118).
                             # `is not None` so an explicit `output: {}` opts
                             # into strict JSON-object mode with zero declared
@@ -2570,6 +2650,43 @@ async def _execute_loop(self, current_agent_name: str) -> dict[str, Any]:
                             },
                         )
 
+                        # Error-bucket: leaf raised a typed envelope. Normalize,
+                        # store via store_error (NOT store), evaluate error
+                        # routes, and let an unhandled envelope propagate as
+                        # UnhandledWorkflowError. The success path below is
+                        # skipped entirely.
+                        if output.error is not None:
+                            self.limits.record_execution(agent.name)
+                            self.limits.check_timeout()
+                            route_result = self._handle_leaf_error(
+                                agent, output.error, output.content
+                            )
+                            self._emit(
+                                "route_taken",
+                                {
+                                    "from_agent": agent.name,
+                                    "to_agent": route_result.target,
+                                },
+                            )
+                            if route_result.target == "$end":
+                                result = self._build_final_output(route_result.output_transform)
+                                self._emit(
+                                    "workflow_completed",
+                                    {
+                                        "elapsed": _time.time() - _workflow_start,
+                                        "output": result,
+                                    },
+                                )
+                                self._execute_hook("on_complete", result=result)
+                                return result
+                            current_agent_name = route_result.target
+                            interrupt_result = await self._check_interrupt(current_agent_name)
+                            if interrupt_result is not None:
+                                current_agent_name = await self._handle_interrupt_result(
+                                    interrupt_result, current_agent_name
+                                )
+                            continue
+
                         # Store output
                         self.context.store(agent.name, output.content)
 
@@ -3551,6 +3668,21 @@ async def execute_single_agent(agent: AgentDef) -> tuple[str, Any]:
                     },
                 )
 
+                # If the child raised a typed envelope, surface it as a
+                # child failure via the existing exception path so the
+                # group's failure_mode logic handles it uniformly. The
+                # envelope is preserved on the wrapped exception so the
+                # group can record it in its ``errors`` collection.
+                if output.error is not None:
+                    normalized = self._normalize_envelope_for_node(agent, output.error)
+                    exc = ExecutionError(
+                        f"agent '{agent.name}' raised "
+                        f"'{normalized.get('kind')}': {normalized.get('message')}",
+                        agent_name=agent.name,
+                    )
+                    exc._envelope = normalized  # type: ignore[attr-defined]
+                    raise exc
+
                 # Individual parallel agents are counted toward iteration limit
                 # at the parallel group level after all agents complete
                 return (agent.name, output.content)
@@ -3643,6 +3775,7 @@ async def execute_single_agent(agent: AgentDef) -> tuple[str, Any]:
                         exception_type=type(result).__name__,
                         message=str(result),
                         suggestion=getattr(result, "suggestion", None),
+                        envelope=getattr(result, "_envelope", None),
                     )
                 else:
                     # Agent succeeded - store output
@@ -3698,6 +3831,7 @@ async def execute_single_agent(agent: AgentDef) -> tuple[str, Any]:
                         exception_type=type(result).__name__,
                         message=str(result),
                         suggestion=getattr(result, "suggestion", None),
+                        envelope=getattr(result, "_envelope", None),
                     )
                 else:
                     # Agent succeeded - store output
@@ -3990,6 +4124,21 @@ def _item_callback(event_type: str, data: dict[str, Any]) -> None:
                     },
                 )
 
+                # Typed-envelope path mirrors the parallel group: surface
+                # as a child failure via the existing exception channel so
+                # the failure_mode policy handles it uniformly.
+                if output.error is not None:
+                    normalized = self._normalize_envelope_for_node(
+                        for_each_group.agent, output.error
+                    )
+                    exc = ExecutionError(
+                        f"for_each iteration '{key}' raised "
+                        f"'{normalized.get('kind')}': {normalized.get('message')}",
+                        agent_name=for_each_group.agent.name,
+                    )
+                    exc._envelope = normalized  # type: ignore[attr-defined]
+                    raise exc
+
                 return (key, output.content)
             except Exception as e:
                 _item_elapsed = _time.time() - _item_start
@@ -4083,6 +4232,7 @@ def _item_callback(event_type: str, data: dict[str, Any]) -> None:
                             exception_type=type(result).__name__,
                             message=str(result),
                             suggestion=getattr(result, "suggestion", None),
+                            envelope=getattr(result, "_envelope", None),
                         )
                     else:
                         # Item succeeded - store output
@@ -4115,6 +4265,7 @@ def _item_callback(event_type: str, data: dict[str, Any]) -> None:
                             exception_type=type(result).__name__,
                             message=str(result),
                             suggestion=getattr(result, "suggestion", None),
+                            envelope=getattr(result, "_envelope", None),
                         )
                     else:
                         # Item succeeded - store output
@@ -4197,7 +4348,81 @@ def _get_next_agent(self, agent: AgentDef, output: dict[str, Any]) -> str:
         result = self._evaluate_routes(agent, output)
         return result.target
 
-    def _evaluate_routes(self, agent: AgentDef, output: dict[str, Any]) -> RouteResult:
+    def _normalize_envelope_for_node(
+        self,
+        agent: AgentDef,
+        envelope: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Apply undeclared-kind normalization to a leaf envelope.
+
+        When a node has declared ``raises:``, any envelope whose ``kind`` is
+        not in that list (and not in the runtime-synthesized
+        :data:`RESERVED_ON_ERROR_ALLOWLIST`) is wrapped into
+        ``internal.undeclared_kind`` with the original kind preserved in
+        ``details.original_kind``. This is the engine's job (not the
+        executor's): the executor is agnostic to the workflow's contract.
+
+        Called once at the leaf node's call site before context storage,
+        route evaluation, and event emission so every downstream observer
+        sees the normalized form.
+        """
+        declared = agent.raises
+        if not declared:
+            return envelope
+        kind = envelope.get("kind")
+        if kind in declared or kind in RESERVED_ON_ERROR_ALLOWLIST:
+            return envelope
+        return dict(wrap_undeclared_kind(envelope, declared=list(declared)))  # type: ignore[arg-type]
+
+    def _handle_leaf_error(
+        self,
+        agent: AgentDef,
+        envelope: dict[str, Any],
+        output_for_route: dict[str, Any],
+    ) -> RouteResult:
+        """Apply normalization, store the envelope, and evaluate error routes.
+
+        Used by both the agent and script call sites after a leaf executor
+        returns an envelope. The returned :class:`RouteResult` is what the
+        caller should drive forward; if no error route matches, this method
+        raises :class:`UnhandledWorkflowError` wrapping the normalized
+        envelope and a single-leaf frame trail.
+
+        Args:
+            agent: The leaf node that raised.
+            envelope: The raw envelope (pre-normalization) from the executor.
+            output_for_route: The leaf node's output content as it would be
+                stored on the success path. The error router sees this as
+                ``current_output`` so templates that reference output fields
+                still work.
+
+        Returns:
+            The :class:`RouteResult` from the matching error route.
+
+        Raises:
+            UnhandledWorkflowError: if no error route at this level matched.
+        """
+        normalized = self._normalize_envelope_for_node(agent, envelope)
+        self.context.store_error(agent.name, normalized)
+
+        try:
+            return self._evaluate_routes(agent, output_for_route, error=normalized)
+        except UnhandledNodeError as exc:
+            frames = [
+                {
+                    "node": agent.name,
+                    "kind": normalized.get("kind"),
+                    "iteration": self.limits.get_agent_execution_count(agent.name),
+                }
+            ]
+            raise UnhandledWorkflowError(normalized, frames=frames) from exc
+
+    def _evaluate_routes(
+        self,
+        agent: AgentDef,
+        output: dict[str, Any],
+        error: dict[str, Any] | None = None,
+    ) -> RouteResult:
         """Evaluate routes using the Router.
 
         Uses the Router to evaluate routing rules and determine the next agent.
@@ -4205,19 +4430,27 @@ def _evaluate_routes(self, agent: AgentDef, output: dict[str, Any]) -> RouteResu
 
         Args:
             agent: The current agent definition.
-            output: The agent's output content.
+            output: The agent's output content. Pass ``{}`` on the error path
+                (the envelope is the meaningful signal).
+            error: If set, treated as the error envelope and only error-bucket
+                routes (those with ``on_error`` populated) are considered.
+                Empty routes plus a non-None error raises
+                :class:`UnhandledNodeError` rather than silently routing to ``$end``.
 
         Returns:
             RouteResult with target and optional output transform.
         """
         if not agent.routes:
-            # No routes defined - default to $end
+            if error is not None:
+                from conductor.exceptions import UnhandledNodeError  # noqa: PLC0415
+
+                raise UnhandledNodeError(error, node_name=agent.name)
             return RouteResult(target="$end")
 
         # Build context for condition evaluation
         eval_context = self.context.get_for_template()
 
-        return self.router.evaluate(agent.routes, output, eval_context)
+        return self.router.evaluate(agent.routes, output, eval_context, error=error)
 
     def _evaluate_parallel_routes(
         self, parallel_group: ParallelGroup, output: dict[str, Any]
diff --git a/tests/test_engine/test_error_routing.py b/tests/test_engine/test_error_routing.py
new file mode 100644
index 0000000..25911a1
--- /dev/null
+++ b/tests/test_engine/test_error_routing.py
@@ -0,0 +1,267 @@
+"""End-to-end engine tests for ``on_error`` routing.
+
+These tests exercise the agent and script call sites in
+:mod:`conductor.engine.workflow` to confirm that:
+
+- An agent that returns ``conductor_error: true`` routes to its
+  matching ``on_error`` route instead of the success route.
+- A script that writes a ``CONDUCTOR_ERROR_OUT`` envelope routes the
+  same way.
+- An undeclared kind is normalized to ``internal.undeclared_kind``
+  before route evaluation.
+- An unhandled envelope halts the workflow with
+  :class:`UnhandledWorkflowError` carrying the envelope and a frame
+  trail.
+- Phase 1 does NOT propagate envelopes across sub-workflow boundaries
+  (they surface to the parent as generic :class:`ExecutionError`).
+"""
+
+from __future__ import annotations
+
+import sys
+
+import pytest
+
+from conductor.config.schema import (
+    AgentDef,
+    ContextConfig,
+    LimitsConfig,
+    OutputField,
+    RouteDef,
+    RuntimeConfig,
+    WorkflowConfig,
+    WorkflowDef,
+)
+from conductor.engine.workflow import WorkflowEngine
+from conductor.exceptions import UnhandledWorkflowError
+from conductor.providers.copilot import CopilotProvider
+
+
+def _wf(*agents: AgentDef, output: dict[str, str] | None = None) -> WorkflowConfig:
+    return WorkflowConfig(
+        workflow=WorkflowDef(
+            name="t",
+            entry_point=agents[0].name,
+            runtime=RuntimeConfig(provider="copilot"),
+            context=ContextConfig(mode="accumulate"),
+            limits=LimitsConfig(max_iterations=10),
+        ),
+        agents=list(agents),
+        output=output or {},
+    )
+
+
+class TestAgentErrorRouting:
+    @pytest.mark.asyncio
+    async def test_envelope_routes_to_on_error_target(self) -> None:
+        """An agent envelope picks the on_error route, not the success route."""
+        config = _wf(
+            AgentDef(
+                name="probe",
+                model="gpt-4",
+                prompt="x",
+                raises=["external.git.fetch_failed"],
+                routes=[
+                    RouteDef(to="rescue", on_error="external.git.fetch_failed"),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(
+                name="rescue",
+                model="gpt-4",
+                prompt="recover",
+                routes=[RouteDef(to="$end")],
+                output={"status": OutputField(type="string")},
+            ),
+            output={"status": "{{ rescue.output.status }}"},
+        )
+
+        def handler(agent, prompt, context):
+            if agent.name == "probe":
+                return {
+                    "conductor_error": True,
+                    "kind": "external.git.fetch_failed",
+                    "message": "remote rejected",
+                }
+            return {"status": "recovered"}
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider)
+
+        result = await engine.run({})
+        assert result == {"status": "recovered"}
+
+    @pytest.mark.asyncio
+    async def test_unhandled_envelope_halts_with_typed_error(self) -> None:
+        """No matching on_error route → UnhandledWorkflowError with envelope + frames."""
+        config = _wf(
+            AgentDef(
+                name="probe",
+                model="gpt-4",
+                prompt="x",
+                # No on_error route — workflow must halt.
+                routes=[RouteDef(to="$end")],
+            ),
+        )
+
+        def handler(agent, prompt, context):
+            return {
+                "conductor_error": True,
+                "kind": "external.api.timeout",
+                "message": "took too long",
+            }
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider)
+
+        with pytest.raises(UnhandledWorkflowError) as exc_info:
+            await engine.run({})
+
+        assert exc_info.value.envelope["kind"] == "external.api.timeout"
+        assert exc_info.value.envelope["message"] == "took too long"
+        assert len(exc_info.value.frames) == 1
+        assert exc_info.value.frames[0]["node"] == "probe"
+        assert exc_info.value.frames[0]["kind"] == "external.api.timeout"
+
+    @pytest.mark.asyncio
+    async def test_undeclared_kind_is_normalized_then_routes(self) -> None:
+        """If ``raises`` declares X but agent raises Y, kind becomes ``internal.undeclared_kind``."""
+        config = _wf(
+            AgentDef(
+                name="probe",
+                model="gpt-4",
+                prompt="x",
+                raises=["external.git.fetch_failed"],  # declared, but agent raises something else
+                routes=[
+                    RouteDef(to="rescue", on_error="internal.undeclared_kind"),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(
+                name="rescue",
+                model="gpt-4",
+                prompt="x",
+                routes=[RouteDef(to="$end")],
+                output={"original_kind": OutputField(type="string")},
+            ),
+            output={"recovered_from": "{{ rescue.output.original_kind }}"},
+        )
+
+        def handler(agent, prompt, context):
+            if agent.name == "probe":
+                return {
+                    "conductor_error": True,
+                    "kind": "external.unexpected.thing",
+                    "message": "boom",
+                }
+            # rescue agent reads probe.error.details.original_kind from context
+            err = context["probe"]["error"]
+            return {"original_kind": err["details"]["original_kind"]}
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider)
+
+        result = await engine.run({})
+        assert result == {"recovered_from": "external.unexpected.thing"}
+
+    @pytest.mark.asyncio
+    async def test_success_path_unchanged(self) -> None:
+        """Regression: agents without ``raises``/``on_error`` behave exactly as before."""
+        config = _wf(
+            AgentDef(
+                name="happy",
+                model="gpt-4",
+                prompt="x",
+                output={"answer": OutputField(type="string")},
+                routes=[RouteDef(to="$end")],
+            ),
+            output={"answer": "{{ happy.output.answer }}"},
+        )
+
+        def handler(agent, prompt, context):
+            return {"answer": "ok"}
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider)
+
+        assert await engine.run({}) == {"answer": "ok"}
+
+
+class TestScriptErrorRouting:
+    @pytest.mark.asyncio
+    async def test_script_envelope_routes_to_on_error(self, tmp_path) -> None:
+        """A script writes an envelope and the engine routes via on_error."""
+        # Script that writes a typed envelope and exits 1.
+        script_body = (
+            "import json, os, sys\n"
+            "with open(os.environ['CONDUCTOR_ERROR_OUT'], 'w') as f:\n"
+            "    json.dump({"
+            "'conductor_error': True, "
+            "'kind': 'external.git.fetch_failed', "
+            "'message': 'remote down'"
+            "}, f)\n"
+            "sys.exit(1)\n"
+        )
+        config = _wf(
+            AgentDef(
+                name="fetch",
+                type="script",
+                command=sys.executable,
+                args=["-c", script_body],
+                raises=["external.git.fetch_failed"],
+                routes=[
+                    RouteDef(to="rescue", on_error="external.git.fetch_failed"),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(
+                name="rescue",
+                model="gpt-4",
+                prompt="recover",
+                routes=[RouteDef(to="$end")],
+                output={"status": OutputField(type="string")},
+            ),
+            output={"status": "{{ rescue.output.status }}"},
+        )
+
+        def handler(agent, prompt, context):
+            return {"status": "recovered"}
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider)
+
+        result = await engine.run({})
+        assert result == {"status": "recovered"}
+
+    @pytest.mark.asyncio
+    async def test_legacy_script_exit_code_routing_unchanged(self) -> None:
+        """Scripts without ``raises``/``on_error`` keep their legacy exit_code routing."""
+        config = _wf(
+            AgentDef(
+                name="legacy_fail",
+                type="script",
+                command=sys.executable,
+                args=["-c", "import sys; sys.exit(3)"],
+                routes=[
+                    RouteDef(when="{{ legacy_fail.output.exit_code != 0 }}", to="fallback"),
+                    RouteDef(to="$end"),
+                ],
+            ),
+            AgentDef(
+                name="fallback",
+                model="gpt-4",
+                prompt="x",
+                routes=[RouteDef(to="$end")],
+                output={"v": OutputField(type="string")},
+            ),
+            output={"v": "{{ fallback.output.v }}"},
+        )
+
+        def handler(agent, prompt, context):
+            return {"v": "fallback-ran"}
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider)
+
+        result = await engine.run({})
+        assert result == {"v": "fallback-ran"}

From 4121525e8b1d4972eb873fcab9a2c548d2d85914 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 16:09:03 -0700
Subject: [PATCH 09/14] feat(halt-jsonl): write errors.jsonl and typed
 workflow_failed event on UnhandledWorkflowError
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 Step 9. When the engine raises UnhandledWorkflowError (a leaf
node returned an error envelope that no on_error route matched), the
new `except UnhandledWorkflowError` arm in `_execute_loop`:

* writes a single-line `errors.jsonl` record under \\\/conductor/\\
  using the same naming convention as the event log
  (\\conductor-<workflow>-<ts>-<run_id>.errors.jsonl\\), carrying the
  envelope, frame trail, and leaf node name;
* emits a typed \\workflow_failed\\ event with
  \\rror_type='UnhandledWorkflowError'\\ plus the envelope, frames
  and errors-jsonl path so dashboard/log subscribers can render the
  typed halt distinctly from a generic ConductorError;
* runs the \\on_error\\ lifecycle hook and saves a checkpoint, for
  parity with the other failure paths;
* re-raises so the CLI (Step 10) can map it to its distinct exit code.

The new arm is placed *before* the existing \\xcept ConductorError\\
so the typed halt is caught first. Generic ConductorError handling is
unchanged.

Adds two integration tests covering the jsonl artefact and the typed
event. Also tightens a docstring that was 101 chars (pre-existing from
Step 8 — caught now by ruff format check).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/engine/workflow.py        |  72 +++++++++++++++++
 tests/test_engine/test_error_routing.py | 100 +++++++++++++++++++++++-
 2 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py
index 8a1a58d..1c3bac8 100644
--- a/src/conductor/engine/workflow.py
+++ b/src/conductor/engine/workflow.py
@@ -424,6 +424,12 @@ def __init__(
         # Checkpoint tracking
         self._current_agent_name: str | None = None
         self._last_checkpoint_path: Path | None = None
+        self._errors_jsonl_path: Path | None = None
+        """Path to ``errors.jsonl`` if this run halted on an ``UnhandledWorkflowError``.
+
+        Populated by the ``_execute_loop`` exception arm; consumed by the
+        CLI to surface the path in the end-of-run summary.
+        """
 
         # Sub-workflow depth tracking
         self._subworkflow_depth = _subworkflow_depth
@@ -1447,6 +1453,50 @@ def set_limits(self, limits: LimitEnforcer) -> None:
         """
         self.limits = limits
 
+    def _write_errors_jsonl(self, exc: UnhandledWorkflowError) -> Path | None:
+        """Write the unhandled error envelope to ``errors.jsonl``.
+
+        Each unhandled-error halt produces one record (single line) under
+        ``$TMPDIR/conductor/`` with a filename that pairs with the
+        corresponding ``.events.jsonl`` log so external tooling can join
+        them. The record carries the envelope, the frame trail, and the
+        leaf node name so a reader doesn't need to scan the event log to
+        learn what happened.
+
+        Best-effort: I/O failures are logged but never raised — the
+        original :class:`UnhandledWorkflowError` must reach the CLI.
+        """
+        import secrets  # noqa: PLC0415
+        import tempfile  # noqa: PLC0415
+        import time  # noqa: PLC0415
+
+        workflow_name = self.config.workflow.name
+        ts = time.strftime("%Y%m%d-%H%M%S")
+        run_id = self._run_context.run_id if self._run_context else secrets.token_hex(4)
+        path = (
+            Path(tempfile.gettempdir())
+            / "conductor"
+            / f"conductor-{workflow_name}-{ts}-{run_id}.errors.jsonl"
+        )
+        record = {
+            "type": "unhandled_workflow_error",
+            "timestamp": time.time(),
+            "workflow": workflow_name,
+            "run_id": run_id,
+            "envelope": exc.envelope,
+            "frames": exc.frames,
+            "leaf_node": (exc.frames[0].get("node") if exc.frames else None),
+        }
+        try:
+            path.parent.mkdir(parents=True, exist_ok=True)
+            with open(path, "w", encoding="utf-8") as f:
+                f.write(json.dumps(record, default=str))
+                f.write("\n")
+        except OSError as e:
+            logger.warning("Failed to write errors.jsonl at %s: %s", path, e)
+            return None
+        return path
+
     def _save_checkpoint_on_failure(self, error: BaseException) -> None:
         """Attempt to save a checkpoint after a failure.
 
@@ -2731,6 +2781,28 @@ async def _execute_loop(self, current_agent_name: str) -> dict[str, Any]:
         except KeyboardInterrupt:
             self._save_checkpoint_on_failure(KeyboardInterrupt("Workflow interrupted by user"))
             raise
+        except UnhandledWorkflowError as e:
+            # Typed-error halt: the workflow ran to a node that raised an
+            # error envelope and no on_error route at any reachable level
+            # matched. Write an errors.jsonl describing the halt (separate
+            # file from the event log so external tooling can pick it up
+            # without parsing the whole stream), emit a typed
+            # workflow_failed event, then re-raise so the CLI maps to its
+            # distinct exit code in Step 10.
+            errors_path = self._write_errors_jsonl(e)
+            self._errors_jsonl_path = errors_path
+            fail_data: dict[str, Any] = {
+                "error_type": "UnhandledWorkflowError",
+                "message": str(e),
+                "agent_name": self._current_agent_name,
+                "envelope": e.envelope,
+                "frames": e.frames,
+                "errors_jsonl_path": str(errors_path) if errors_path else None,
+            }
+            self._emit("workflow_failed", fail_data)
+            self._execute_hook("on_error", error=e)
+            self._save_checkpoint_on_failure(e)
+            raise
         except ConductorError as e:
             fail_data: dict[str, Any] = {
                 "error_type": type(e).__name__,
diff --git a/tests/test_engine/test_error_routing.py b/tests/test_engine/test_error_routing.py
index 25911a1..39995d1 100644
--- a/tests/test_engine/test_error_routing.py
+++ b/tests/test_engine/test_error_routing.py
@@ -125,7 +125,7 @@ def handler(agent, prompt, context):
 
     @pytest.mark.asyncio
     async def test_undeclared_kind_is_normalized_then_routes(self) -> None:
-        """If ``raises`` declares X but agent raises Y, kind becomes ``internal.undeclared_kind``."""
+        """If ``raises`` lists X but agent raises Y, kind becomes ``internal.undeclared_kind``."""
         config = _wf(
             AgentDef(
                 name="probe",
@@ -265,3 +265,101 @@ def handler(agent, prompt, context):
 
         result = await engine.run({})
         assert result == {"v": "fallback-ran"}
+
+
+class TestUnhandledHaltArtifacts:
+    """Step 9: errors.jsonl + ``workflow_failed`` event on unhandled halt."""
+
+    @pytest.mark.asyncio
+    async def test_writes_errors_jsonl_with_envelope_and_frame(self) -> None:
+        """An unhandled envelope produces a one-line errors.jsonl record."""
+        import json as _json
+        from pathlib import Path as _Path
+
+        config = _wf(
+            AgentDef(
+                name="probe",
+                model="gpt-4",
+                prompt="x",
+                routes=[RouteDef(to="$end")],
+            ),
+        )
+
+        def handler(agent, prompt, context):
+            return {
+                "conductor_error": True,
+                "kind": "external.api.timeout",
+                "message": "took too long",
+                "details": {"endpoint": "/v1/things"},
+            }
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider)
+
+        with pytest.raises(UnhandledWorkflowError):
+            await engine.run({})
+
+        path = engine._errors_jsonl_path
+        assert path is not None, "errors.jsonl path was not recorded on the engine"
+        assert _Path(path).exists(), f"errors.jsonl was not written at {path}"
+
+        lines = _Path(path).read_text(encoding="utf-8").splitlines()
+        assert len(lines) == 1, "errors.jsonl must contain exactly one record"
+        record = _json.loads(lines[0])
+
+        assert record["type"] == "unhandled_workflow_error"
+        assert record["workflow"] == "t"
+        assert record["leaf_node"] == "probe"
+        assert record["envelope"]["kind"] == "external.api.timeout"
+        assert record["envelope"]["message"] == "took too long"
+        assert record["envelope"]["details"] == {"endpoint": "/v1/things"}
+        assert record["frames"] == [
+            {"node": "probe", "kind": "external.api.timeout", "iteration": 1}
+        ]
+        assert "timestamp" in record
+        assert "run_id" in record
+
+    @pytest.mark.asyncio
+    async def test_emits_typed_workflow_failed_event_with_envelope(self) -> None:
+        """A ``workflow_failed`` event is emitted with ``error_type='UnhandledWorkflowError'``."""
+        from conductor.events import WorkflowEventEmitter
+
+        captured: list[tuple[str, dict]] = []
+
+        def _capture(event):
+            captured.append((event.type, dict(event.data)))
+
+        emitter = WorkflowEventEmitter()
+        emitter.subscribe(_capture)
+
+        config = _wf(
+            AgentDef(
+                name="probe",
+                model="gpt-4",
+                prompt="x",
+                routes=[RouteDef(to="$end")],
+            ),
+        )
+
+        def handler(agent, prompt, context):
+            return {
+                "conductor_error": True,
+                "kind": "external.api.timeout",
+                "message": "took too long",
+            }
+
+        provider = CopilotProvider(mock_handler=handler)
+        engine = WorkflowEngine(config=config, provider=provider, event_emitter=emitter)
+
+        with pytest.raises(UnhandledWorkflowError):
+            await engine.run({})
+
+        failed = [d for t, d in captured if t == "workflow_failed"]
+        assert len(failed) == 1, f"expected one workflow_failed event, got {len(failed)}"
+        data = failed[0]
+        assert data["error_type"] == "UnhandledWorkflowError"
+        assert data["agent_name"] == "probe"
+        assert data["envelope"]["kind"] == "external.api.timeout"
+        assert data["envelope"]["message"] == "took too long"
+        assert data["frames"][0]["node"] == "probe"
+        assert data["errors_jsonl_path"] is not None

From 7d3c094ea19e16dba1ac8f43a60746ad9477b6f3 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 16:11:56 -0700
Subject: [PATCH 10/14] feat(cli-exit): map UnhandledWorkflowError to exit code
 3 with typed stderr summary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 Step 10. Both \\un\\ and \\esume\\ now catch
\\UnhandledWorkflowError\\ specifically before the generic
\\xcept Exception\\, render a typed panel to stderr via the new
\\print_unhandled_workflow_error\\ helper, and exit with code 3 so
callers (CI, polyphony, shell scripts) can distinguish 'workflow ran
to completion and halted on a typed error' from a generic failure
(code 1).

The summary panel surfaces the leaf node, kind, message, optional
\\details\\, and the path to the \\rrors.jsonl\\ artefact. To make
the path reachable from the CLI without holding a reference to the
engine, \\_execute_loop\\ now also attaches the path to the exception
instance (\\xc.errors_jsonl_path\\) before re-raising.

Adds two CLI tests:

* unhandled typed halt exits 3,
* generic \\RuntimeError\\ still exits 1 (regression guard).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/cli/app.py         | 56 +++++++++++++++++++++++++
 src/conductor/engine/workflow.py |  3 ++
 tests/test_cli/test_run.py       | 71 ++++++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+)

diff --git a/src/conductor/cli/app.py b/src/conductor/cli/app.py
index 93f5bc6..05bcc75 100644
--- a/src/conductor/cli/app.py
+++ b/src/conductor/cli/app.py
@@ -155,6 +155,52 @@ def print_error(error: Exception) -> None:
         console.print(panel)
 
 
+def print_unhandled_workflow_error(error: Any) -> None:
+    """Print an :class:`UnhandledWorkflowError` summary to stderr.
+
+    Renders the typed halt distinctly from generic failures so operators
+    and CI tooling can recognise it at a glance: shows the failing leaf
+    node, the kind, the message, and the path to the ``errors.jsonl``
+    artefact (read off the engine attribute set in
+    :meth:`WorkflowEngine._execute_loop`'s ``UnhandledWorkflowError``
+    arm) when available.
+    """
+    envelope = error.envelope or {}
+    frames = error.frames or []
+    node = frames[0].get("node", "<unknown>") if frames else "<unknown>"
+    kind = envelope.get("kind", "<unknown>")
+    message = envelope.get("message", "")
+
+    content = Text()
+    content.append("Node:    ", style="bold")
+    content.append(f"{node}\n", style="cyan")
+    content.append("Kind:    ", style="bold")
+    content.append(f"{kind}\n", style="yellow")
+    content.append("Message: ", style="bold")
+    content.append(f"{message}\n", style="white")
+
+    details = envelope.get("details")
+    if details:
+        import json as _json
+
+        content.append("Details: ", style="bold")
+        content.append(_json.dumps(details, default=str), style="dim")
+        content.append("\n")
+
+    errors_path = getattr(error, "errors_jsonl_path", None)
+    if errors_path:
+        content.append("Halt log: ", style="bold")
+        content.append(f"{errors_path}\n", style="dim")
+
+    panel = Panel(
+        content,
+        title="[bold red]❌ Workflow halted: unhandled typed error[/bold red]",
+        border_style="red",
+        padding=(1, 2),
+    )
+    console.print(panel)
+
+
 def version_callback(value: bool) -> None:
     """Display version information and exit."""
     if value:
@@ -469,6 +515,11 @@ def run(
         output_console.print_json(json.dumps(result))
 
     except Exception as e:
+        from conductor.exceptions import UnhandledWorkflowError
+
+        if isinstance(e, UnhandledWorkflowError):
+            print_unhandled_workflow_error(e)
+            raise typer.Exit(code=3) from None
         print_error(e)
         raise typer.Exit(code=1) from None
 
@@ -877,6 +928,11 @@ def resume(
         output_console.print_json(json.dumps(result))
 
     except Exception as e:
+        from conductor.exceptions import UnhandledWorkflowError
+
+        if isinstance(e, UnhandledWorkflowError):
+            print_unhandled_workflow_error(e)
+            raise typer.Exit(code=3) from None
         print_error(e)
         raise typer.Exit(code=1) from None
 
diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py
index 1c3bac8..80eab65 100644
--- a/src/conductor/engine/workflow.py
+++ b/src/conductor/engine/workflow.py
@@ -2791,6 +2791,9 @@ async def _execute_loop(self, current_agent_name: str) -> dict[str, Any]:
             # distinct exit code in Step 10.
             errors_path = self._write_errors_jsonl(e)
             self._errors_jsonl_path = errors_path
+            # Attach the path to the exception itself so the CLI handler
+            # can render it without needing a reference to the engine.
+            e.errors_jsonl_path = errors_path  # type: ignore[attr-defined]
             fail_data: dict[str, Any] = {
                 "error_type": "UnhandledWorkflowError",
                 "message": str(e),
diff --git a/tests/test_cli/test_run.py b/tests/test_cli/test_run.py
index 7a22789..6f10596 100644
--- a/tests/test_cli/test_run.py
+++ b/tests/test_cli/test_run.py
@@ -980,3 +980,74 @@ def test_build_plan_detects_loop(self, tmp_path: Path) -> None:
         # agent1 should be marked as a loop target
         agent1_step = next(s for s in plan.steps if s.agent_name == "agent1")
         assert agent1_step.is_loop_target is True
+
+
+class TestUnhandledWorkflowErrorExitCode:
+    """Step 10: ``run`` exits 3 (not 1) when an UnhandledWorkflowError escapes."""
+
+    def test_exit_code_3_on_unhandled_workflow_error(self, tmp_path: Path) -> None:
+        """A workflow that halts on an unhandled envelope exits with code 3."""
+        from conductor.exceptions import UnhandledWorkflowError
+
+        workflow_file = tmp_path / "halt.yaml"
+        workflow_file.write_text("""\
+workflow:
+  name: halting
+  entry_point: probe
+
+agents:
+  - name: probe
+    model: gpt-4
+    prompt: "x"
+    routes:
+      - to: $end
+
+output:
+  v: "x"
+""")
+
+        envelope = {
+            "kind": "external.api.timeout",
+            "message": "took too long",
+        }
+        frames = [{"node": "probe", "kind": "external.api.timeout", "iteration": 1}]
+
+        async def _raiser(*_args, **_kwargs):
+            exc = UnhandledWorkflowError(envelope, frames)
+            exc.errors_jsonl_path = tmp_path / "fake-errors.jsonl"  # type: ignore[attr-defined]
+            raise exc
+
+        with patch("conductor.cli.run.run_workflow_async", side_effect=_raiser):
+            result = runner.invoke(app, ["run", str(workflow_file)])
+
+        assert result.exit_code == 3, (
+            f"expected exit code 3 (unhandled typed halt), got {result.exit_code}; "
+            f"output={result.output!r}"
+        )
+
+    def test_generic_exception_still_exits_1(self, tmp_path: Path) -> None:
+        """Regression: a generic failure still maps to exit code 1, not 3."""
+        workflow_file = tmp_path / "boom.yaml"
+        workflow_file.write_text("""\
+workflow:
+  name: boom
+  entry_point: agent1
+
+agents:
+  - name: agent1
+    model: gpt-4
+    prompt: "x"
+    routes:
+      - to: $end
+
+output:
+  v: "x"
+""")
+
+        async def _raiser(*_args, **_kwargs):
+            raise RuntimeError("kaboom")
+
+        with patch("conductor.cli.run.run_workflow_async", side_effect=_raiser):
+            result = runner.invoke(app, ["run", str(workflow_file)])
+
+        assert result.exit_code == 1

From 9a37a75db66b7b1c5fb54212b827a4ca3236e858 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 16:15:27 -0700
Subject: [PATCH 11/14] feat(helpers): ship engine-agnostic error helpers for
 pwsh/bash/python/node/dotnet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 Step 11. New \\src/conductor/helpers/error/\\ directory with
one-file convenience modules for raising typed Conductor error
envelopes from script-type nodes. All helpers implement the same
contract: read \\CONDUCTOR_ERROR_OUT\\, write the
\\{conductor_error: true, kind, message, details?}\\ JSON envelope to
that path, and return — leaving exit-code management to the caller.

* \\Conductor.Error.psm1\\ — PowerShell, \\Write-ConductorError\\ cmdlet
* \\conductor-error.sh\\ — Bash/sh, sourced \\conductor_error\\ function
* \\conductor_error.py\\ — Python, \\aise_kind\\ function
* \\conductor-error.mjs\\ — Node, exported \\aiseError\\
* \\ConductorError.cs\\ — .NET, static \\ConductorError.Raise\\
* \\README.md\\ — quick reference + usage examples per engine

Helpers ship under \\src/conductor/\\ so hatchling rolls them into
the wheel; verified by inspecting the built artefact. Nothing is
auto-loaded, on PATH, or on PYTHONPATH — script authors must
explicitly Import-Module / source / import to use them, and authors
who don't want them write the JSON themselves (it's three lines per
engine).

Adds \\	ests/test_helpers/test_error_helpers.py\\ with 6 cases
covering the Python helper's envelope shape, env-var contract, no-
sys.exit guarantee, and round-trip through \\coerce_envelope\\. The
non-Python helpers are exercised by the cross-engine integration test
landing in Step 13.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/helpers/__init__.py             |  8 ++
 .../helpers/error/Conductor.Error.psm1        | 40 ++++++++
 src/conductor/helpers/error/ConductorError.cs | 50 ++++++++++
 src/conductor/helpers/error/README.md         | 96 ++++++++++++++++++
 src/conductor/helpers/error/__init__.py       | 23 +++++
 .../helpers/error/conductor-error.mjs         | 53 ++++++++++
 .../helpers/error/conductor-error.sh          | 57 +++++++++++
 .../helpers/error/conductor_error.py          | 70 +++++++++++++
 tests/test_helpers/__init__.py                |  0
 tests/test_helpers/test_error_helpers.py      | 99 +++++++++++++++++++
 10 files changed, 496 insertions(+)
 create mode 100644 src/conductor/helpers/__init__.py
 create mode 100644 src/conductor/helpers/error/Conductor.Error.psm1
 create mode 100644 src/conductor/helpers/error/ConductorError.cs
 create mode 100644 src/conductor/helpers/error/README.md
 create mode 100644 src/conductor/helpers/error/__init__.py
 create mode 100644 src/conductor/helpers/error/conductor-error.mjs
 create mode 100644 src/conductor/helpers/error/conductor-error.sh
 create mode 100644 src/conductor/helpers/error/conductor_error.py
 create mode 100644 tests/test_helpers/__init__.py
 create mode 100644 tests/test_helpers/test_error_helpers.py

diff --git a/src/conductor/helpers/__init__.py b/src/conductor/helpers/__init__.py
new file mode 100644
index 0000000..2c6c8ac
--- /dev/null
+++ b/src/conductor/helpers/__init__.py
@@ -0,0 +1,8 @@
+"""Helpers package for engine-agnostic Conductor utilities.
+
+Currently exposes the :mod:`conductor.helpers.error` subpackage. The
+``helpers`` namespace is reserved for future additions (logging,
+retry, metrics) that follow the same engine-agnostic pattern.
+"""
+
+from __future__ import annotations
diff --git a/src/conductor/helpers/error/Conductor.Error.psm1 b/src/conductor/helpers/error/Conductor.Error.psm1
new file mode 100644
index 0000000..d82ac32
--- /dev/null
+++ b/src/conductor/helpers/error/Conductor.Error.psm1
@@ -0,0 +1,40 @@
+# Conductor.Error.psm1 — PowerShell helper for raising typed Conductor
+# error envelopes from script-type workflow nodes.
+#
+# Contract: write a single JSON object to $env:CONDUCTOR_ERROR_OUT and
+# exit 0. Conductor reads the file, treats the node as raised, and
+# evaluates on_error routes against the envelope.
+#
+# Usage:
+#   Import-Module ./Conductor.Error.psm1
+#   Write-ConductorError -Kind "external.git.fetch_failed" `
+#                        -Message "remote rejected push" `
+#                        -Details @{ remote = "origin"; exit = 128 }
+#   exit 0
+
+function Write-ConductorError {
+    [CmdletBinding()]
+    param(
+        [Parameter(Mandatory = $true)][string]$Kind,
+        [Parameter(Mandatory = $true)][string]$Message,
+        [Parameter(Mandatory = $false)][hashtable]$Details
+    )
+
+    if (-not $env:CONDUCTOR_ERROR_OUT) {
+        throw "CONDUCTOR_ERROR_OUT is not set; this script must be run by Conductor as a script-type node."
+    }
+
+    $envelope = [ordered]@{
+        conductor_error = $true
+        kind            = $Kind
+        message         = $Message
+    }
+    if ($PSBoundParameters.ContainsKey('Details') -and $null -ne $Details) {
+        $envelope['details'] = $Details
+    }
+
+    $json = $envelope | ConvertTo-Json -Depth 16 -Compress
+    Set-Content -Path $env:CONDUCTOR_ERROR_OUT -Value $json -Encoding utf8 -NoNewline
+}
+
+Export-ModuleMember -Function Write-ConductorError
diff --git a/src/conductor/helpers/error/ConductorError.cs b/src/conductor/helpers/error/ConductorError.cs
new file mode 100644
index 0000000..2c839d9
--- /dev/null
+++ b/src/conductor/helpers/error/ConductorError.cs
@@ -0,0 +1,50 @@
+// ConductorError.cs — .NET helper for raising typed Conductor error
+// envelopes from script-type workflow nodes.
+//
+// Contract: write a single JSON object to the CONDUCTOR_ERROR_OUT env
+// variable's file path and exit 0. Conductor reads the file, treats
+// the node as raised, and evaluates on_error routes against the
+// envelope.
+//
+// Usage (drop into a script-type node's project):
+//   ConductorError.Raise(
+//       kind: "external.git.fetch_failed",
+//       message: "remote rejected push",
+//       details: new { remote = "origin", exit = 128 });
+//   return 0;
+//
+// Targets net6.0+ for System.Text.Json. Raise does NOT call
+// Environment.Exit on its own; callers stay in charge of process exit.
+
+using System;
+using System.IO;
+using System.Text.Json;
+
+public static class ConductorError
+{
+    public static string Raise(string kind, string message, object? details = null)
+    {
+        if (string.IsNullOrEmpty(kind))
+        {
+            throw new ArgumentException("kind is required and must be non-empty", nameof(kind));
+        }
+        if (message is null)
+        {
+            throw new ArgumentNullException(nameof(message));
+        }
+
+        var path = Environment.GetEnvironmentVariable("CONDUCTOR_ERROR_OUT");
+        if (string.IsNullOrEmpty(path))
+        {
+            throw new InvalidOperationException(
+                "CONDUCTOR_ERROR_OUT is not set; this script must be run by Conductor as a script-type node.");
+        }
+
+        object envelope = details is null
+            ? new { conductor_error = true, kind, message }
+            : new { conductor_error = true, kind, message, details };
+
+        File.WriteAllText(path, JsonSerializer.Serialize(envelope));
+        return path;
+    }
+}
diff --git a/src/conductor/helpers/error/README.md b/src/conductor/helpers/error/README.md
new file mode 100644
index 0000000..4b77cde
--- /dev/null
+++ b/src/conductor/helpers/error/README.md
@@ -0,0 +1,96 @@
+# Conductor error helpers
+
+These are **optional** convenience modules for raising typed
+[`on_error`](../../../../docs/projects/error-routing/on-error-routing.brainstorm.md)
+envelopes from `type: script` workflow nodes. Each file is ~15 lines
+of language-native code that:
+
+1. Reads `CONDUCTOR_ERROR_OUT` from the environment (which Conductor
+   sets to a per-invocation file path before running the script).
+2. Writes a JSON object of the shape
+   `{ "conductor_error": true, "kind": "...", "message": "...", "details"?: {...} }`
+   to that path.
+3. Returns — leaving exit-code management to the caller (helpers
+   never call `exit` / `Environment.Exit` themselves).
+
+Conductor then reads the file after the script exits, treats the node
+as having raised, and evaluates `on_error` routes against the
+envelope. The script can exit `0` after writing the envelope; the
+non-zero / fallback rules apply *only* when no envelope was written.
+
+Nothing here is auto-loaded. None of these files are on `PATH`, on
+`PYTHONPATH`, or otherwise injected into the script's environment.
+Script authors that want them must copy or reference them explicitly
+(`Import-Module`, `source`, `import`, etc.). Script authors that don't
+want them write the JSON themselves — it's three lines in every
+supported engine.
+
+## Files
+
+| Engine          | File                       | Surface                                                                          |
+| --------------- | -------------------------- | -------------------------------------------------------------------------------- |
+| PowerShell      | `Conductor.Error.psm1`     | `Write-ConductorError -Kind x.y -Message m [-Details @{...}]`                    |
+| Bash / sh       | `conductor-error.sh`       | `conductor_error x.y "message" '{"k":"v"}'` (source first)                       |
+| Python          | `conductor_error.py`       | `conductor_error.raise_kind("x.y", "message", details={...})`                    |
+| Node            | `conductor-error.mjs`      | `raiseError({ kind: "x.y", message: "m", details: {} })`                         |
+| .NET (net6.0+)  | `ConductorError.cs`        | `ConductorError.Raise("x.y", "message", new { ... })`                            |
+
+## Example (PowerShell)
+
+```powershell
+Import-Module ./Conductor.Error.psm1
+
+git fetch origin 2>&1 | Tee-Object -Variable gitOut | Out-Null
+if ($LASTEXITCODE -ne 0) {
+    Write-ConductorError -Kind 'external.git.fetch_failed' `
+                         -Message "git fetch failed: $($gitOut[-1])" `
+                         -Details @{ remote = 'origin'; exit = $LASTEXITCODE }
+    exit 0
+}
+```
+
+## Example (Bash)
+
+```bash
+. ./conductor-error.sh
+
+if ! git fetch origin 2>/tmp/err; then
+    conductor_error \
+        "external.git.fetch_failed" \
+        "git fetch failed: $(head -1 /tmp/err)" \
+        '{"remote":"origin"}'
+    exit 0
+fi
+```
+
+## Example (Python)
+
+```python
+import conductor_error
+import subprocess
+import sys
+
+result = subprocess.run(["git", "fetch", "origin"], capture_output=True, text=True)
+if result.returncode != 0:
+    conductor_error.raise_kind(
+        "external.git.fetch_failed",
+        f"git fetch failed: {result.stderr.splitlines()[0] if result.stderr else ''}",
+        details={"remote": "origin", "exit": result.returncode},
+    )
+    sys.exit(0)
+```
+
+## Notes
+
+- The contract is `kind`, `message`, optional `details`. Conductor
+  validates the shape of the envelope itself — see
+  `conductor.engine.errors.coerce_envelope`.
+- Helpers do not validate the *value* of `kind` beyond requiring a
+  non-empty string. Whether `kind` is allowed at runtime is governed
+  by the node's `raises:` list in the workflow YAML; an undeclared
+  kind is rewritten to `internal.undeclared_kind` by the engine.
+- Helpers do not call `exit`. Callers stay in charge of process exit
+  so they can do their own teardown (close handles, flush logs)
+  before returning control to Conductor.
+- New engines do not need a helper to use the contract — write the
+  JSON yourself. The contract is the API.
diff --git a/src/conductor/helpers/error/__init__.py b/src/conductor/helpers/error/__init__.py
new file mode 100644
index 0000000..50209c9
--- /dev/null
+++ b/src/conductor/helpers/error/__init__.py
@@ -0,0 +1,23 @@
+"""Engine-agnostic helpers for raising typed Conductor error envelopes.
+
+This subpackage ships *optional* convenience modules for the five
+script engines Conductor most commonly executes:
+
+* PowerShell (``Conductor.Error.psm1``)
+* Bash / sh  (``conductor-error.sh``)
+* Python     (``conductor_error.py``)
+* Node       (``conductor-error.mjs``)
+* .NET       (``ConductorError.cs``)
+
+All of them write the same JSON envelope to ``$CONDUCTOR_ERROR_OUT``
+and exit ``0``. They exist so the common path reads naturally; nothing
+in Conductor *requires* them — a script that wants to opt out of the
+helper layer can write the JSON itself.
+
+The Python helper is also importable directly (``from
+conductor.helpers.error import conductor_error``) for in-process use
+from agent prompts that shell out, but its primary purpose is to be
+shipped to user scripts as a reference implementation.
+"""
+
+from __future__ import annotations
diff --git a/src/conductor/helpers/error/conductor-error.mjs b/src/conductor/helpers/error/conductor-error.mjs
new file mode 100644
index 0000000..cd1b295
--- /dev/null
+++ b/src/conductor/helpers/error/conductor-error.mjs
@@ -0,0 +1,53 @@
+// conductor-error.mjs — Node.js helper for raising typed Conductor
+// error envelopes from script-type workflow nodes.
+//
+// Contract: write a single JSON object to process.env.CONDUCTOR_ERROR_OUT
+// and exit 0. Conductor reads the file, treats the node as raised, and
+// evaluates on_error routes against the envelope.
+//
+// Usage:
+//   import { raiseError } from "./conductor-error.mjs";
+//   raiseError({
+//     kind: "external.git.fetch_failed",
+//     message: "remote rejected push",
+//     details: { remote: "origin", exit: 128 },
+//   });
+//   process.exit(0);
+//
+// raiseError does NOT call process.exit on its own; callers stay in
+// charge of process exit so they can do their own teardown first.
+
+import { writeFileSync } from "node:fs";
+
+/**
+ * @param {{ kind: string, message: string, details?: Record<string, unknown> }} envelope
+ * @returns {string} the path the envelope was written to
+ */
+export function raiseError({ kind, message, details } = {}) {
+  if (typeof kind !== "string" || kind.length === 0) {
+    throw new TypeError("raiseError: 'kind' is required and must be a non-empty string");
+  }
+  if (typeof message !== "string") {
+    throw new TypeError("raiseError: 'message' is required and must be a string");
+  }
+
+  const out = process.env.CONDUCTOR_ERROR_OUT;
+  if (!out) {
+    throw new Error(
+      "CONDUCTOR_ERROR_OUT is not set; this script must be run by Conductor as a script-type node."
+    );
+  }
+
+  /** @type {Record<string, unknown>} */
+  const payload = {
+    conductor_error: true,
+    kind,
+    message,
+  };
+  if (details !== undefined) {
+    payload.details = details;
+  }
+
+  writeFileSync(out, JSON.stringify(payload), { encoding: "utf8" });
+  return out;
+}
diff --git a/src/conductor/helpers/error/conductor-error.sh b/src/conductor/helpers/error/conductor-error.sh
new file mode 100644
index 0000000..14b5b67
--- /dev/null
+++ b/src/conductor/helpers/error/conductor-error.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# conductor-error.sh — Bash/sh helper for raising typed Conductor error
+# envelopes from script-type workflow nodes.
+#
+# Contract: write a single JSON object to $CONDUCTOR_ERROR_OUT and exit
+# 0. Conductor reads the file, treats the node as raised, and evaluates
+# on_error routes against the envelope.
+#
+# Usage (source then call):
+#   . ./conductor-error.sh
+#   conductor_error "external.git.fetch_failed" \
+#                   "remote rejected push" \
+#                   '{"remote":"origin","exit":128}'
+#   exit 0
+#
+# Arguments:
+#   $1 = kind     (required, dotted-namespace string e.g. "external.git.drift")
+#   $2 = message  (required, human-readable)
+#   $3 = details  (optional, raw JSON object; defaults to omitted)
+#
+# Notes:
+#   - $3 is inlined verbatim. The caller is responsible for valid JSON.
+#   - We use a tiny python one-liner to escape kind/message safely.
+#     Python is assumed available; if not, fall back to jq or printf.
+
+conductor_error() {
+    if [ -z "${CONDUCTOR_ERROR_OUT:-}" ]; then
+        echo "conductor-error: CONDUCTOR_ERROR_OUT is not set; this script must be run by Conductor as a script-type node." >&2
+        return 1
+    fi
+    if [ $# -lt 2 ]; then
+        echo "conductor-error: usage: conductor_error <kind> <message> [<details-json>]" >&2
+        return 1
+    fi
+
+    _ce_kind="$1"
+    _ce_msg="$2"
+    _ce_details="${3:-}"
+
+    if [ -n "$_ce_details" ]; then
+        python3 - "$_ce_kind" "$_ce_msg" "$_ce_details" "$CONDUCTOR_ERROR_OUT" <<'PY'
+import json, sys
+kind, msg, details_raw, out = sys.argv[1:5]
+envelope = {"conductor_error": True, "kind": kind, "message": msg, "details": json.loads(details_raw)}
+with open(out, "w", encoding="utf-8") as f:
+    json.dump(envelope, f)
+PY
+    else
+        python3 - "$_ce_kind" "$_ce_msg" "$CONDUCTOR_ERROR_OUT" <<'PY'
+import json, sys
+kind, msg, out = sys.argv[1:4]
+envelope = {"conductor_error": True, "kind": kind, "message": msg}
+with open(out, "w", encoding="utf-8") as f:
+    json.dump(envelope, f)
+PY
+    fi
+}
diff --git a/src/conductor/helpers/error/conductor_error.py b/src/conductor/helpers/error/conductor_error.py
new file mode 100644
index 0000000..7330830
--- /dev/null
+++ b/src/conductor/helpers/error/conductor_error.py
@@ -0,0 +1,70 @@
+"""Python helper for raising typed Conductor error envelopes.
+
+Contract: write a single JSON object to ``$CONDUCTOR_ERROR_OUT`` and
+exit ``0``. Conductor reads the file, treats the node as raised, and
+evaluates ``on_error`` routes against the envelope.
+
+Usage::
+
+    import conductor_error
+    conductor_error.raise_kind(
+        "external.git.fetch_failed",
+        "remote rejected push",
+        details={"remote": "origin", "exit": 128},
+    )
+    raise SystemExit(0)
+
+The helper deliberately does NOT call :func:`sys.exit`; callers stay
+in charge of process exit so they can do their own teardown before
+returning control to Conductor.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+
+def raise_kind(
+    kind: str,
+    message: str,
+    *,
+    details: dict[str, Any] | None = None,
+) -> Path:
+    """Write a typed error envelope to ``$CONDUCTOR_ERROR_OUT``.
+
+    Args:
+        kind: Dotted-namespace error identifier (for example
+            ``"external.git.fetch_failed"``). Must match the contract
+            documented in the workflow's ``raises`` declaration.
+        message: Human-readable description of what went wrong.
+        details: Optional structured context. Must be JSON-serialisable.
+
+    Returns:
+        The path the envelope was written to.
+
+    Raises:
+        RuntimeError: If ``$CONDUCTOR_ERROR_OUT`` is not set, which
+            indicates the script is being run outside of a
+            Conductor script-type node.
+    """
+    out = os.environ.get("CONDUCTOR_ERROR_OUT")
+    if not out:
+        raise RuntimeError(
+            "CONDUCTOR_ERROR_OUT is not set; "
+            "this script must be run by Conductor as a script-type node."
+        )
+
+    envelope: dict[str, Any] = {
+        "conductor_error": True,
+        "kind": kind,
+        "message": message,
+    }
+    if details is not None:
+        envelope["details"] = details
+
+    path = Path(out)
+    path.write_text(json.dumps(envelope), encoding="utf-8")
+    return path
diff --git a/tests/test_helpers/__init__.py b/tests/test_helpers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_helpers/test_error_helpers.py b/tests/test_helpers/test_error_helpers.py
new file mode 100644
index 0000000..6646923
--- /dev/null
+++ b/tests/test_helpers/test_error_helpers.py
@@ -0,0 +1,99 @@
+"""Tests for the Python flavour of the engine-agnostic error helpers.
+
+These confirm that ``conductor.helpers.error.conductor_error.raise_kind``:
+
+* writes the expected envelope shape to ``$CONDUCTOR_ERROR_OUT``;
+* refuses to run when the env var is unset (so a script accidentally
+  executed outside of a Conductor script-node fails loudly instead of
+  silently dropping the envelope);
+* omits the ``details`` key when no details are passed (so the
+  envelope round-trips cleanly through
+  :func:`conductor.engine.errors.coerce_envelope`).
+
+The other-language helpers (psm1, sh, mjs, cs) are exercised by the
+cross-engine integration test in Step 13. They don't get unit tests
+here because they're not Python.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+import pytest
+
+from conductor.engine.errors import coerce_envelope
+from conductor.helpers.error import conductor_error
+
+
+class TestRaiseKind:
+    def test_writes_envelope_with_required_fields(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        out = tmp_path / "envelope.json"
+        monkeypatch.setenv("CONDUCTOR_ERROR_OUT", str(out))
+
+        returned = conductor_error.raise_kind("external.git.fetch_failed", "remote rejected")
+
+        assert returned == out
+        loaded = json.loads(out.read_text(encoding="utf-8"))
+        assert loaded == {
+            "conductor_error": True,
+            "kind": "external.git.fetch_failed",
+            "message": "remote rejected",
+        }
+        envelope = coerce_envelope(loaded)
+        assert envelope["kind"] == "external.git.fetch_failed"
+        assert envelope["message"] == "remote rejected"
+
+    def test_includes_details_when_provided(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        out = tmp_path / "envelope.json"
+        monkeypatch.setenv("CONDUCTOR_ERROR_OUT", str(out))
+
+        conductor_error.raise_kind(
+            "external.git.drift",
+            "SHA mismatch",
+            details={"expected": "abc", "actual": "def"},
+        )
+
+        loaded = json.loads(out.read_text(encoding="utf-8"))
+        assert loaded["details"] == {"expected": "abc", "actual": "def"}
+
+    def test_raises_when_env_var_unset(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.delenv("CONDUCTOR_ERROR_OUT", raising=False)
+
+        with pytest.raises(RuntimeError, match="CONDUCTOR_ERROR_OUT is not set"):
+            conductor_error.raise_kind("x.y", "msg")
+
+    def test_does_not_call_sys_exit(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Helper must not exit the process itself — callers stay in charge."""
+        out = tmp_path / "envelope.json"
+        monkeypatch.setenv("CONDUCTOR_ERROR_OUT", str(out))
+
+        sentinel = 0
+        conductor_error.raise_kind("x.y", "msg")
+        sentinel = 1
+        assert sentinel == 1
+
+    def test_helper_files_are_packaged(self) -> None:
+        """All five language helpers + README ship under the wheel package dir."""
+        pkg = Path(conductor_error.__file__).parent
+        for name in (
+            "Conductor.Error.psm1",
+            "conductor-error.sh",
+            "conductor_error.py",
+            "conductor-error.mjs",
+            "ConductorError.cs",
+            "README.md",
+        ):
+            assert (pkg / name).is_file(), f"missing helper file: {name}"
+
+    def test_env_var_is_read_per_call(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """Regression: the helper reads the env var each call, not at import time."""
+        monkeypatch.setenv("CONDUCTOR_ERROR_OUT", os.path.join(os.path.sep, "tmp", "first"))
+        monkeypatch.delenv("CONDUCTOR_ERROR_OUT", raising=False)
+        with pytest.raises(RuntimeError):
+            conductor_error.raise_kind("x.y", "msg")

From ede35788286e9fb1dbbe51858dccbcae1c9e1c1a Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 16:18:12 -0700
Subject: [PATCH 12/14] feat(examples): add error-routing.yaml and
 error-routing-helpers.yaml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 Step 12. Two new example workflows under \\xamples/\\ plus
an Error Routing section in \\xamples/README.md\\.

\\rror-routing.yaml\\:

* A \\	ype: script\\ probe that writes \\{conductor_error: true,
  kind, message, details}\\ to \\\\\ and exits 0
  via the language-neutral contract (no helper required).
* The probe declares \\aises: [external.git.drift,
  external.api.rate_limited]\\ so undeclared kinds get normalised to
  \\internal.undeclared_kind\\.
* Routes select by \\on_error: <kind>\\ to demonstrate that an
  envelope picks the typed arm over a generic exit_code fallback.
* Handlers read \\{{ probe.error.kind }}\\,
  \\{{ probe.error.message }}\\, and \\{{ probe.error.details.* }}\\.
* A \\simulated_failure\\ workflow input toggles between
  \\ok\\ / \\drift\\ / \\ate_limited\\ so the same YAML exercises
  all three arms.

\\rror-routing-helpers.yaml\\:

* Same flow, but raises via the shipped Python helper
  (\\conductor.helpers.error.conductor_error.raise_kind\\) instead of
  hand-rolled JSON, so authors see the ergonomic version.

Both examples validate with \\conductor validate\\ and the full
\\make validate-examples\\ sweep (17/17 pass). Caught one writing
issue along the way: the input schema field is \\input:\\ (singular),
not \\inputs:\\ — fixed before committing.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 examples/README.md                  |  36 +++++++
 examples/error-routing-helpers.yaml |  83 +++++++++++++++++
 examples/error-routing.yaml         | 139 ++++++++++++++++++++++++++++
 3 files changed, 258 insertions(+)
 create mode 100644 examples/error-routing-helpers.yaml
 create mode 100644 examples/error-routing.yaml

diff --git a/examples/README.md b/examples/README.md
index 254a9cc..dc86b99 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -89,6 +89,42 @@ conductor run examples/reasoning-effort.yaml \
 
 See [Reasoning Effort](../docs/configuration.md#reasoning-effort) for the per-provider translation, supported models, and validation rules.
 
+## Error Routing
+
+### error-routing.yaml
+
+A script-driven probe that emits a typed Conductor error envelope via
+the language-neutral `CONDUCTOR_ERROR_OUT` contract, then routes by
+`on_error: <kind>` instead of a fragile `exit_code` check. Demonstrates:
+
+- Writing `{conductor_error: true, kind, message, details}` from a
+  script and exiting 0
+- Declaring `raises:` on the node so undeclared kinds are normalised
+  to `internal.undeclared_kind` instead of leaking through
+- Routing by `on_error: <kind>` (`external.git.drift` →
+  rescue agent, `external.api.rate_limited` → backoff agent)
+- Reading `{{ probe.error.kind }}`, `{{ probe.error.message }}`,
+  `{{ probe.error.details.* }}` from the routed-to handler
+
+```bash
+conductor run examples/error-routing.yaml --input simulated_failure=drift
+conductor run examples/error-routing.yaml --input simulated_failure=rate_limited
+conductor run examples/error-routing.yaml --input simulated_failure=ok
+```
+
+### error-routing-helpers.yaml
+
+Companion example showing the same routing flow but raising the
+envelope via the shipped Python helper
+(`conductor.helpers.error.conductor_error.raise_kind`). PowerShell,
+Bash, Node, and .NET helpers are all available under
+[`src/conductor/helpers/error/`](../src/conductor/helpers/error/) and
+follow the same shape.
+
+```bash
+conductor run examples/error-routing-helpers.yaml
+```
+
 ## Multi-Agent Workflows
 
 ### research-assistant.yaml
diff --git a/examples/error-routing-helpers.yaml b/examples/error-routing-helpers.yaml
new file mode 100644
index 0000000..6e7054f
--- /dev/null
+++ b/examples/error-routing-helpers.yaml
@@ -0,0 +1,83 @@
+# Error Routing with Helpers
+#
+# Companion to `error-routing.yaml`. Demonstrates the same `on_error`
+# flow but uses the engine-agnostic helper modules shipped under
+# `src/conductor/helpers/error/` instead of hand-writing the envelope.
+#
+# Each helper is ~15 lines of language-native code that:
+#
+#   1. Reads `$CONDUCTOR_ERROR_OUT` from the environment.
+#   2. Writes `{conductor_error: true, kind, message, details?}` to it.
+#   3. Returns — leaving exit-code management to the caller.
+#
+# Nothing here is auto-loaded. Scripts must explicitly source / import
+# the helper they want (PowerShell `Import-Module`, Bash `source`,
+# Python `import`, Node `import`, .NET `using`). Scripts that don't
+# want a helper write the JSON themselves — it's three lines.
+#
+# This example uses the Python and PowerShell helpers because they're
+# the two engines most commonly invoked from conductor on developer
+# machines. Bash, Node, and .NET equivalents follow the same shape.
+#
+# Usage:
+#   conductor run examples/error-routing-helpers.yaml
+
+workflow:
+  name: error-routing-helpers-demo
+  description: |
+    Same typed-error routing as error-routing.yaml, but raising via
+    the per-engine helpers shipped under conductor.helpers.error.
+  entry_point: probe_python
+
+  runtime:
+    provider: copilot
+
+  context:
+    mode: accumulate
+
+  limits:
+    max_iterations: 20
+
+agents:
+  - name: probe_python
+    type: script
+    command: python
+    args:
+      - -c
+      - |
+        import sys
+        # The helper is shipped with the conductor wheel and is
+        # importable when conductor is installed. Adjust the import
+        # path if you're running from a source checkout.
+        from conductor.helpers.error import conductor_error
+
+        conductor_error.raise_kind(
+            "external.git.fetch_failed",
+            "git fetch origin returned 128",
+            details={"remote": "origin", "exit": 128},
+        )
+        sys.exit(0)
+    raises:
+      - external.git.fetch_failed
+    routes:
+      - to: rescue
+        on_error: external.git.fetch_failed
+      - to: $end
+
+  - name: rescue
+    model: gpt-4o-mini
+    prompt: |
+      A git fetch failure was raised via the Python helper.
+        kind:    {{ probe_python.error.kind }}
+        message: {{ probe_python.error.message }}
+        remote:  {{ probe_python.error.details.remote }}
+        exit:    {{ probe_python.error.details.exit }}
+      Suggest a single recovery step.
+    routes:
+      - to: $end
+    output:
+      next_step:
+        type: string
+
+output:
+  next_step: "{{ rescue.output.next_step }}"
diff --git a/examples/error-routing.yaml b/examples/error-routing.yaml
new file mode 100644
index 0000000..b6e9bbb
--- /dev/null
+++ b/examples/error-routing.yaml
@@ -0,0 +1,139 @@
+# Error Routing Workflow
+#
+# This example demonstrates Phase 1 of typed `on_error` routing
+# (issue #227). It shows how a `type: script` node can write a typed
+# error envelope to $CONDUCTOR_ERROR_OUT and have the engine pick a
+# matching route by `on_error: <kind>` instead of falling through to a
+# generic exit_code branch.
+#
+# The shape of the contract is intentionally simple:
+#
+#   1. Conductor sets CONDUCTOR_ERROR_OUT to a per-invocation temp file
+#      path in the script's environment.
+#   2. The script writes
+#      `{"conductor_error": true, "kind": "<dotted>", "message": "..."}`
+#      to that path and exits 0.
+#   3. Conductor reads the file, treats the node as raised, and
+#      evaluates routes by matching `on_error` against the envelope's
+#      `kind`. The first matching route wins (same rule as `when:`).
+#
+# A node that declares `raises:` is opting in to typed errors — at
+# runtime, any kind not in the declared list is rewritten to
+# `internal.undeclared_kind` (with the original kind preserved under
+# `details.original_kind`) so authors who care about the contract get
+# loud feedback when a script "lies".
+#
+# Usage:
+#   conductor run examples/error-routing.yaml
+#
+# Try toggling the `simulated_failure` workflow input between
+# "drift", "rate_limited", and "ok" to see different route arms fire.
+
+workflow:
+  name: error-routing-demo
+  description: |
+    Demonstrates typed on_error routing: a probe step either succeeds
+    or writes a typed envelope, and downstream agents react based on
+    the kind without using fragile exit_code checks.
+  entry_point: probe
+
+  input:
+    simulated_failure:
+      type: string
+      description: One of "ok", "drift", "rate_limited"
+      default: drift
+
+  runtime:
+    provider: copilot
+
+  context:
+    mode: accumulate
+
+  limits:
+    max_iterations: 20
+
+agents:
+  - name: probe
+    type: script
+    command: python
+    args:
+      - -c
+      - |
+        import json, os, sys
+        mode = os.environ.get("MODE", "ok")
+        if mode == "ok":
+            print(json.dumps({"sha": "abc123"}))
+            sys.exit(0)
+        envelope = {
+            "ok": {},
+            "drift": {
+                "conductor_error": True,
+                "kind": "external.git.drift",
+                "message": "remote SHA does not match expected",
+                "details": {"expected": "abc123", "actual": "def456"},
+            },
+            "rate_limited": {
+                "conductor_error": True,
+                "kind": "external.api.rate_limited",
+                "message": "GitHub returned 429",
+                "details": {"retry_after_s": 30},
+            },
+        }[mode]
+        with open(os.environ["CONDUCTOR_ERROR_OUT"], "w") as f:
+            json.dump(envelope, f)
+        sys.exit(0)
+    env:
+      MODE: "{{ workflow.input.simulated_failure }}"
+    raises:
+      - external.git.drift
+      - external.api.rate_limited
+    routes:
+      - to: rescue_drift
+        on_error: external.git.drift
+      - to: backoff_and_retry
+        on_error: external.api.rate_limited
+      - to: continue_happy
+    output:
+      sha:
+        type: string
+
+  - name: continue_happy
+    model: gpt-4o-mini
+    prompt: |
+      The git probe reported sha={{ probe.output.sha }}. Acknowledge.
+    routes:
+      - to: $end
+    output:
+      acknowledgment:
+        type: string
+
+  - name: rescue_drift
+    model: gpt-4o-mini
+    prompt: |
+      A git drift was detected.
+        kind:    {{ probe.error.kind }}
+        message: {{ probe.error.message }}
+        details: {{ probe.error.details }}
+      Recommend a recovery plan in two sentences.
+    routes:
+      - to: $end
+    output:
+      recovery_plan:
+        type: string
+
+  - name: backoff_and_retry
+    model: gpt-4o-mini
+    prompt: |
+      A rate-limit hit was detected.
+        retry_after_s: {{ probe.error.details.retry_after_s }}
+      Describe a sensible backoff strategy in one sentence.
+    routes:
+      - to: $end
+    output:
+      strategy:
+        type: string
+
+output:
+  acknowledgment: "{{ continue_happy.output.acknowledgment | default('') }}"
+  recovery_plan: "{{ rescue_drift.output.recovery_plan | default('') }}"
+  strategy: "{{ backoff_and_retry.output.strategy | default('') }}"

From 5f4714ed5d33ff16cf9569b9090f22cded0c1ad2 Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 16:20:06 -0700
Subject: [PATCH 13/14] test(xeng): cross-engine envelope contract for
 python/pwsh/bash
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 Step 13 (acceptance #1). New
\\	ests/test_integration/test_error_routing_cross_engine.py\\
exercises the \\CONDUCTOR_ERROR_OUT\\ contract end-to-end through the
real \\WorkflowEngine\\ (no executor mocking) with three writer
scripts in different languages: Python, PowerShell (\\pwsh\\), and
bash. All three share the same workflow YAML shape and the same
expected outcome: the probe writes a typed envelope, the engine routes
by \\on_error: external.git.drift\\, and a rescue agent reads the
kind back from the routed-to scope.

Each test is skipped when the corresponding interpreter is missing
from PATH (and bash is also skipped on Windows, where \\shutil.which\\
typically returns the WSL relay shim that fails with an opaque
\\xecvpe(/bin/bash) failed: No such file or directory\\ — outside
the scope of this contract test; the brief calls for bash-on-Linux
specifically).

Locally on Windows: 2 pass (python + pwsh), 1 skipped (bash). On a
Linux CI runner with pwsh installed, all three execute. The contract
is the same string of bytes in every engine, so identical assertions
hold across them — which is the whole point.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../test_error_routing_cross_engine.py        | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 tests/test_integration/test_error_routing_cross_engine.py

diff --git a/tests/test_integration/test_error_routing_cross_engine.py b/tests/test_integration/test_error_routing_cross_engine.py
new file mode 100644
index 0000000..7308b69
--- /dev/null
+++ b/tests/test_integration/test_error_routing_cross_engine.py
@@ -0,0 +1,182 @@
+"""Cross-engine integration test for the ``CONDUCTOR_ERROR_OUT`` contract.
+
+Phase 1 acceptance #1: a script-type node that writes the typed error
+envelope to ``$CONDUCTOR_ERROR_OUT`` and exits 0 causes the node to be
+marked errored, regardless of *which* script engine produced the
+envelope. The brief calls for at least pwsh-on-Windows,
+bash-on-Linux, and python on both.
+
+This module exercises the contract through the real ``WorkflowEngine``
+(no mocking the script executor) with three small writer scripts in
+different languages, sharing one workflow YAML shape. Each test runs
+only if the corresponding interpreter is on ``PATH``; CI matrices that
+provide pwsh and bash on every OS will execute all three.
+"""
+
+from __future__ import annotations
+
+import shutil
+import sys
+import textwrap
+
+import pytest
+
+from conductor.config.schema import (
+    AgentDef,
+    ContextConfig,
+    LimitsConfig,
+    OutputField,
+    RouteDef,
+    RuntimeConfig,
+    WorkflowConfig,
+    WorkflowDef,
+)
+from conductor.engine.workflow import WorkflowEngine
+from conductor.providers.copilot import CopilotProvider
+
+
+def _build_workflow(probe: AgentDef) -> WorkflowConfig:
+    """Wrap a probe script node in a minimal workflow with one rescue agent.
+
+    Every engine variant shares this shape so the only thing under test
+    is the script's envelope writing — not the surrounding workflow.
+    """
+    rescue = AgentDef(
+        name="rescue",
+        model="gpt-4",
+        prompt="rescue from {{ probe.error.kind }}",
+        routes=[RouteDef(to="$end")],
+        output={"recovered_kind": OutputField(type="string")},
+    )
+    return WorkflowConfig(
+        workflow=WorkflowDef(
+            name="xeng",
+            entry_point="probe",
+            runtime=RuntimeConfig(provider="copilot"),
+            context=ContextConfig(mode="accumulate"),
+            limits=LimitsConfig(max_iterations=10),
+        ),
+        agents=[probe, rescue],
+        output={"recovered_kind": "{{ rescue.output.recovered_kind }}"},
+    )
+
+
+def _make_handler():
+    def handler(agent, prompt, context):
+        if agent.name == "rescue":
+            err = context["probe"]["error"]
+            return {"recovered_kind": err["kind"]}
+        return {}
+
+    return handler
+
+
+class TestCrossEngineEnvelope:
+    """One test per script engine; same expected behaviour from the workflow."""
+
+    @pytest.mark.asyncio
+    async def test_python_writes_envelope_and_routes(self) -> None:
+        """Python writer: the contract works without any helper at all."""
+        probe = AgentDef(
+            name="probe",
+            type="script",
+            command=sys.executable,
+            args=[
+                "-c",
+                "import json, os, sys; "
+                "open(os.environ['CONDUCTOR_ERROR_OUT'], 'w').write("
+                "json.dumps({'conductor_error': True, "
+                "'kind': 'external.git.drift', "
+                "'message': 'sha mismatch'})); "
+                "sys.exit(0)",
+            ],
+            raises=["external.git.drift"],
+            routes=[
+                RouteDef(to="rescue", on_error="external.git.drift"),
+                RouteDef(to="$end"),
+            ],
+        )
+        engine = WorkflowEngine(
+            config=_build_workflow(probe),
+            provider=CopilotProvider(mock_handler=_make_handler()),
+        )
+        result = await engine.run({})
+        assert result == {"recovered_kind": "external.git.drift"}
+
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(shutil.which("pwsh") is None, reason="pwsh not on PATH")
+    async def test_pwsh_writes_envelope_and_routes(self, tmp_path) -> None:
+        """PowerShell writer using Set-Content with utf8 (no BOM)."""
+        script = tmp_path / "raise.ps1"
+        # PowerShell here-string assembles the envelope inline; we avoid
+        # the shipped helper to confirm the bare contract works.
+        script.write_text(
+            textwrap.dedent(
+                """\
+                $envelope = @{
+                    conductor_error = $true
+                    kind            = 'external.git.drift'
+                    message         = 'sha mismatch'
+                } | ConvertTo-Json -Compress
+                Set-Content -Path $env:CONDUCTOR_ERROR_OUT `
+                    -Value $envelope -Encoding utf8 -NoNewline
+                exit 0
+                """
+            ),
+            encoding="utf-8",
+        )
+        probe = AgentDef(
+            name="probe",
+            type="script",
+            command="pwsh",
+            args=["-NoProfile", "-File", str(script)],
+            raises=["external.git.drift"],
+            routes=[
+                RouteDef(to="rescue", on_error="external.git.drift"),
+                RouteDef(to="$end"),
+            ],
+        )
+        engine = WorkflowEngine(
+            config=_build_workflow(probe),
+            provider=CopilotProvider(mock_handler=_make_handler()),
+        )
+        result = await engine.run({})
+        assert result == {"recovered_kind": "external.git.drift"}
+
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(
+        shutil.which("bash") is None or sys.platform == "win32",
+        reason="bash on Windows is typically a broken WSL shim; brief requires bash-on-Linux only",
+    )
+    async def test_bash_writes_envelope_and_routes(self, tmp_path) -> None:
+        """Bash writer using a heredoc."""
+        script = tmp_path / "raise.sh"
+        script.write_text(
+            textwrap.dedent(
+                """\
+                #!/usr/bin/env bash
+                cat > "$CONDUCTOR_ERROR_OUT" <<'JSON'
+                {"conductor_error":true,"kind":"external.git.drift","message":"sha mismatch"}
+                JSON
+                exit 0
+                """
+            ),
+            encoding="utf-8",
+        )
+        probe = AgentDef(
+            name="probe",
+            type="script",
+            command="bash",
+            args=[str(script)],
+            raises=["external.git.drift"],
+            routes=[
+                RouteDef(to="rescue", on_error="external.git.drift"),
+                RouteDef(to="$end"),
+            ],
+        )
+        engine = WorkflowEngine(
+            config=_build_workflow(probe),
+            provider=CopilotProvider(mock_handler=_make_handler()),
+        )
+        result = await engine.run({})
+        assert result == {"recovered_kind": "external.git.drift"}

From f13791be59195deb7567edba243d0cdab507619c Mon Sep 17 00:00:00 2001
From: Daniel Green <dangreen@microsoft.com>
Date: Thu, 21 May 2026 16:36:13 -0700
Subject: [PATCH 14/14] phase-1(checks): cast TypedDict envelopes for ty parity

ty's TypedDict to dict() conversion picks the wrong overload

for our ErrorEnvelope, surfacing 8 spurious diagnostics.

Use cast() at the leaf sites where TypedDict envelopes cross

into APIs typed as dict[str, Any] (script/agent executor returns;

workflow.py store_error and router.evaluate call sites).

Brings ty count back to the 12-diagnostic baseline (all pre-existing

Windows termios/tty noise). Lint, format, examples-validation, and

full test suite (2943 pass, 12 baseline failures) all green.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/conductor/engine/workflow.py |  6 +++---
 src/conductor/executor/agent.py  |  6 +++---
 src/conductor/executor/script.py | 13 ++++++++-----
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/conductor/engine/workflow.py b/src/conductor/engine/workflow.py
index 80eab65..7d1cf35 100644
--- a/src/conductor/engine/workflow.py
+++ b/src/conductor/engine/workflow.py
@@ -16,7 +16,7 @@
 import uuid
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from conductor.engine.checkpoint import CheckpointManager
 from conductor.engine.context import WorkflowContext
@@ -4478,7 +4478,7 @@ def _handle_leaf_error(
             UnhandledWorkflowError: if no error route at this level matched.
         """
         normalized = self._normalize_envelope_for_node(agent, envelope)
-        self.context.store_error(agent.name, normalized)
+        self.context.store_error(agent.name, cast("Any", normalized))
 
         try:
             return self._evaluate_routes(agent, output_for_route, error=normalized)
@@ -4525,7 +4525,7 @@ def _evaluate_routes(
         # Build context for condition evaluation
         eval_context = self.context.get_for_template()
 
-        return self.router.evaluate(agent.routes, output, eval_context, error=error)
+        return self.router.evaluate(agent.routes, output, eval_context, error=cast("Any", error))
 
     def _evaluate_parallel_routes(
         self, parallel_group: ParallelGroup, output: dict[str, Any]
diff --git a/src/conductor/executor/agent.py b/src/conductor/executor/agent.py
index b5de3f2..612c57b 100644
--- a/src/conductor/executor/agent.py
+++ b/src/conductor/executor/agent.py
@@ -8,7 +8,7 @@
 
 import asyncio
 import contextlib
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from conductor.exceptions import ValidationError
 from conductor.executor.output import parse_json_output, validate_output
@@ -276,7 +276,7 @@ async def execute(
                 cache_write_tokens=output.cache_write_tokens,
                 model=output.model,
                 partial=output.partial,
-                error=dict(envelope),
+                error=cast("dict[str, Any]", envelope),
             )
 
         if agent.output:
@@ -298,7 +298,7 @@ async def execute(
                     cache_write_tokens=output.cache_write_tokens,
                     model=output.model,
                     partial=output.partial,
-                    error=dict(envelope),
+                    error=cast("dict[str, Any]", envelope),
                 )
 
         return output
diff --git a/src/conductor/executor/script.py b/src/conductor/executor/script.py
index c58fb56..7a183e3 100644
--- a/src/conductor/executor/script.py
+++ b/src/conductor/executor/script.py
@@ -12,7 +12,7 @@
 import os
 import tempfile
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from conductor.exceptions import ExecutionError
 from conductor.executor.template import TemplateRenderer
@@ -239,16 +239,16 @@ def _read_error_envelope(path: str, node_name: str) -> dict[str, Any] | None:
             source="script",
             original_message=f"CONDUCTOR_ERROR_OUT file is not valid JSON: {exc}",
         )
-        return dict(envelope)
+        return cast("dict[str, Any]", envelope)
     try:
-        return dict(coerce_envelope(parsed))
+        return cast("dict[str, Any]", coerce_envelope(parsed))
     except EnvelopeValidationError as exc:
         envelope = make_schema_violation(
             node_name=node_name,
             source="script",
             original_message=f"Malformed envelope in CONDUCTOR_ERROR_OUT: {exc}",
         )
-        return dict(envelope)
+        return cast("dict[str, Any]", envelope)
 
 
 def _synthesize_script_error(*, exit_code: int, stderr_tail: str, command: str) -> dict[str, Any]:
@@ -257,4 +257,7 @@ def _synthesize_script_error(*, exit_code: int, stderr_tail: str, command: str)
 
     # Truncate stderr so we don't drag megabytes of compiler noise into the envelope.
     tail = stderr_tail[-2000:] if len(stderr_tail) > 2000 else stderr_tail
-    return dict(make_script_error(exit_code=exit_code, stderr_tail=tail, command=command))
+    return cast(
+        "dict[str, Any]",
+        make_script_error(exit_code=exit_code, stderr_tail=tail, command=command),
+    )