From b9e62f99aa2fe7665bc4ddff0dd821ebae3d53bd Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Fri, 19 Jun 2026 18:27:53 +0200
Subject: [PATCH 1/4] feat(sdk): agent runtime ports, adapters, tool
 resolution, and messages protocol

---
 sdks/python/agenta/__init__.py                |  17 +
 sdks/python/agenta/sdk/agents/__init__.py     | 183 +++++
 .../agenta/sdk/agents/adapters/__init__.py    |  24 +
 .../sdk/agents/adapters/agenta_builtins.py    |  90 +++
 .../agenta/sdk/agents/adapters/harnesses.py   | 150 ++++
 .../agenta/sdk/agents/adapters/in_process.py  | 170 +++++
 .../agenta/sdk/agents/adapters/local.py       |  48 ++
 .../agenta/sdk/agents/adapters/rivet.py       | 186 +++++
 .../sdk/agents/adapters/vercel/__init__.py    |  43 ++
 .../sdk/agents/adapters/vercel/messages.py    | 219 ++++++
 .../sdk/agents/adapters/vercel/routing.py     | 209 ++++++
 .../agenta/sdk/agents/adapters/vercel/sse.py  |  25 +
 .../sdk/agents/adapters/vercel/stream.py      | 216 ++++++
 sdks/python/agenta/sdk/agents/dtos.py         | 698 ++++++++++++++++++
 sdks/python/agenta/sdk/agents/errors.py       |  26 +
 sdks/python/agenta/sdk/agents/interfaces.py   | 317 ++++++++
 sdks/python/agenta/sdk/agents/mcp/__init__.py |  22 +
 sdks/python/agenta/sdk/agents/mcp/errors.py   |  33 +
 .../agenta/sdk/agents/mcp/interfaces.py       |  10 +
 sdks/python/agenta/sdk/agents/mcp/models.py   |  57 ++
 sdks/python/agenta/sdk/agents/mcp/parsing.py  |  39 +
 sdks/python/agenta/sdk/agents/mcp/resolver.py |  68 ++
 sdks/python/agenta/sdk/agents/mcp/wire.py     |  17 +
 sdks/python/agenta/sdk/agents/streaming.py    |  91 +++
 .../agenta/sdk/agents/tools/__init__.py       |  75 ++
 sdks/python/agenta/sdk/agents/tools/compat.py | 132 ++++
 sdks/python/agenta/sdk/agents/tools/errors.py |  82 ++
 .../agenta/sdk/agents/tools/interfaces.py     |  20 +
 sdks/python/agenta/sdk/agents/tools/models.py | 221 ++++++
 .../python/agenta/sdk/agents/tools/parsing.py |  39 +
 .../agenta/sdk/agents/tools/resolver.py       | 177 +++++
 sdks/python/agenta/sdk/agents/tools/wire.py   |  15 +
 sdks/python/agenta/sdk/agents/ui_messages.py  |  18 +
 .../agenta/sdk/agents/utils/__init__.py       |  19 +
 .../agenta/sdk/agents/utils/ts_runner.py      | 163 ++++
 sdks/python/agenta/sdk/agents/utils/wire.py   |  91 +++
 sdks/python/agenta/sdk/decorators/routing.py  |  62 +-
 .../agenta/sdk/engines/running/interfaces.py  |  43 ++
 .../agenta/sdk/engines/running/utils.py       |  25 +-
 .../sdk/middlewares/running/normalizer.py     |  10 +-
 sdks/python/agenta/sdk/models/workflows.py    |  33 +
 sdks/python/agenta/sdk/utils/types.py         |  80 ++
 .../agenta/tests/agents/test_streaming.py     | 167 +++++
 .../pytest/integration/agents/__init__.py     |   1 +
 .../agents/test_transport_roundtrip.py        | 113 +++
 .../oss/tests/pytest/unit/agents/__init__.py  |   1 +
 .../oss/tests/pytest/unit/agents/conftest.py  | 198 +++++
 .../agents/golden/run_request.claude.json     |  28 +
 .../unit/agents/golden/run_request.pi.json    |  36 +
 .../unit/agents/golden/run_result.error.json  |   4 +
 .../unit/agents/golden/run_result.ok.json     |  31 +
 .../tests/pytest/unit/agents/mcp/__init__.py  |   1 +
 .../pytest/unit/agents/mcp/test_resolver.py   |  76 ++
 .../unit/agents/test_dtos_agent_config.py     | 155 ++++
 .../agents/test_dtos_capabilities_events.py   |  81 ++
 .../unit/agents/test_dtos_content_blocks.py   |  90 +++
 .../unit/agents/test_dtos_harness_configs.py  |  99 +++
 .../unit/agents/test_environment_lifecycle.py | 127 ++++
 .../unit/agents/test_harness_adapters.py      | 273 +++++++
 .../pytest/unit/agents/test_ui_messages.py    | 430 +++++++++++
 .../pytest/unit/agents/test_wire_contract.py  | 301 ++++++++
 .../pytest/unit/agents/tools/__init__.py      |   1 +
 .../pytest/unit/agents/tools/test_models.py   |  63 ++
 .../pytest/unit/agents/tools/test_parsing.py  |  60 ++
 .../pytest/unit/agents/tools/test_resolver.py | 131 ++++
 .../unit/test_normalizer_passthrough.py       |  30 +
 .../pytest/utils/test_messages_endpoint.py    | 284 +++++++
 .../oss/tests/pytest/utils/test_routing.py    | 121 +++
 68 files changed, 7154 insertions(+), 11 deletions(-)
 create mode 100644 sdks/python/agenta/sdk/agents/__init__.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/__init__.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/harnesses.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/in_process.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/local.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/rivet.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/messages.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/routing.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/sse.py
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/stream.py
 create mode 100644 sdks/python/agenta/sdk/agents/dtos.py
 create mode 100644 sdks/python/agenta/sdk/agents/errors.py
 create mode 100644 sdks/python/agenta/sdk/agents/interfaces.py
 create mode 100644 sdks/python/agenta/sdk/agents/mcp/__init__.py
 create mode 100644 sdks/python/agenta/sdk/agents/mcp/errors.py
 create mode 100644 sdks/python/agenta/sdk/agents/mcp/interfaces.py
 create mode 100644 sdks/python/agenta/sdk/agents/mcp/models.py
 create mode 100644 sdks/python/agenta/sdk/agents/mcp/parsing.py
 create mode 100644 sdks/python/agenta/sdk/agents/mcp/resolver.py
 create mode 100644 sdks/python/agenta/sdk/agents/mcp/wire.py
 create mode 100644 sdks/python/agenta/sdk/agents/streaming.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/__init__.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/compat.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/errors.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/interfaces.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/models.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/parsing.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/resolver.py
 create mode 100644 sdks/python/agenta/sdk/agents/tools/wire.py
 create mode 100644 sdks/python/agenta/sdk/agents/ui_messages.py
 create mode 100644 sdks/python/agenta/sdk/agents/utils/__init__.py
 create mode 100644 sdks/python/agenta/sdk/agents/utils/ts_runner.py
 create mode 100644 sdks/python/agenta/sdk/agents/utils/wire.py
 create mode 100644 sdks/python/agenta/tests/agents/test_streaming.py
 create mode 100644 sdks/python/oss/tests/pytest/integration/agents/__init__.py
 create mode 100644 sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/__init__.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/conftest.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py
 create mode 100644 sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py

diff --git a/sdks/python/agenta/__init__.py b/sdks/python/agenta/__init__.py
index df014c4e00..dc01c3396a 100644
--- a/sdks/python/agenta/__init__.py
+++ b/sdks/python/agenta/__init__.py
@@ -52,6 +52,23 @@
 from .sdk.utils.logging import get_module_logger  # noqa: F401
 from .sdk.utils.preinit import PreInitObject  # noqa: F401
 
+# Agent runtime (the agents subsystem). `Message` is intentionally not re-exported here:
+# `agenta.Message` already names the prompt message type; import the agents one from
+# `agenta.sdk.agents` when needed.
+from .sdk.agents import (  # noqa: F401
+    AgentaHarness,
+    AgentConfig,
+    ClaudeHarness,
+    Environment,
+    InProcessPiBackend,
+    LocalBackend,
+    PiHarness,
+    RivetBackend,
+    RunSelection,
+    SessionConfig,
+    make_harness,
+)
+
 DEFAULT_AGENTA_SINGLETON_INSTANCE = AgentaSingleton()
 
 types = client_types
diff --git a/sdks/python/agenta/sdk/agents/__init__.py b/sdks/python/agenta/sdk/agents/__init__.py
new file mode 100644
index 0000000000..b1cd4370d2
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/__init__.py
@@ -0,0 +1,183 @@
+"""Agenta agent runtime: run a coding harness (Pi, Claude, ...) as a swappable port.
+
+Layers (Agenta's hexagonal vocabulary):
+
+- ``dtos.py`` — data contracts (``AgentConfig``, ``SessionConfig``, ``Message``, ...).
+- ``interfaces.py`` — the ports (ABCs): ``Backend``, ``Environment``, ``Sandbox``,
+  ``Session``, ``Harness``.
+- ``adapters/`` — implementations: ``RivetBackend`` / ``InProcessPiBackend`` / ``LocalBackend``
+  and ``PiHarness`` / ``ClaudeHarness``.
+- ``utils/`` — shared plumbing (the ``/run`` wire and the transports to the TS runner).
+
+Standalone usage::
+
+    import agenta as ag
+    from agenta.sdk.agents import Message
+
+    cfg = ag.ConfigManager.get_from_registry(app_slug="my-agent")
+    agent = ag.AgentConfig.from_params(cfg)
+    harness = ag.PiHarness(ag.Environment(ag.RivetBackend()))
+    result = await harness.prompt(ag.SessionConfig(agent=agent), [Message(role="user", content="hi")])
+"""
+
+from .adapters import (
+    AgentaHarness,
+    ClaudeHarness,
+    InProcessPiBackend,
+    LocalBackend,
+    PiHarness,
+    RivetBackend,
+    make_harness,
+)
+from .dtos import (
+    AgentaAgentConfig,
+    AgentConfig,
+    AgentEvent,
+    AgentResult,
+    ClaudeAgentConfig,
+    ContentBlock,
+    HarnessAgentConfig,
+    HarnessCapabilities,
+    HarnessType,
+    Message,
+    PermissionPolicy,
+    PiAgentConfig,
+    RunSelection,
+    SessionConfig,
+    ToolCallback,
+    TraceContext,
+    to_messages,
+)
+from .errors import ToolResolutionError, UnsupportedHarnessError
+from .interfaces import (
+    Backend,
+    Environment,
+    Harness,
+    NoopSessionStore,
+    Sandbox,
+    Session,
+    SessionStore,
+)
+from .mcp import (
+    MCPConfigurationError,
+    MCPError,
+    MCPResolver,
+    MCPServerConfig,
+    MissingMCPSecretError,
+    ResolvedMCPServer,
+)
+from .streaming import AgentRun
+from .tools import (
+    BuiltinToolConfig,
+    CallbackToolSpec,
+    ClientToolConfig,
+    ClientToolSpec,
+    CodeToolConfig,
+    CodeToolSpec,
+    DuplicateToolNameError,
+    EnvironmentToolSecretProvider,
+    GatewayToolResolver,
+    GatewayToolConfig,
+    GatewayToolResolution,
+    GatewayToolResolutionError,
+    MissingSecretPolicy,
+    MissingToolSecretError,
+    ResolvedToolSet,
+    ToolConfig,
+    ToolConfigError,
+    ToolConfigurationError,
+    ToolError,
+    ToolResolver,
+    ToolSecretProvider,
+    ToolSpec,
+    UnsupportedToolProviderError,
+    coerce_tool_config,
+    coerce_tool_configs,
+    parse_tool_config,
+    parse_tool_configs,
+)
+from .adapters.vercel import (
+    from_ui_messages,
+    to_ui_message,
+    ui_message_stream,
+)
+
+__all__ = [
+    # DTOs
+    "AgentConfig",
+    "RunSelection",
+    "SessionConfig",
+    "HarnessAgentConfig",
+    "PiAgentConfig",
+    "ClaudeAgentConfig",
+    "AgentaAgentConfig",
+    "HarnessType",
+    "HarnessCapabilities",
+    "ContentBlock",
+    "Message",
+    "to_messages",
+    "AgentEvent",
+    "AgentResult",
+    "AgentRun",
+    # Former flat Vercel adapter names (compatibility; new code uses adapters.vercel)
+    "from_ui_messages",
+    "to_ui_message",
+    "ui_message_stream",
+    "TraceContext",
+    "ToolCallback",
+    "PermissionPolicy",
+    # Canonical tools API
+    "ToolConfig",
+    "BuiltinToolConfig",
+    "GatewayToolConfig",
+    "CodeToolConfig",
+    "ClientToolConfig",
+    "ToolSpec",
+    "CallbackToolSpec",
+    "CodeToolSpec",
+    "ClientToolSpec",
+    "ResolvedToolSet",
+    "GatewayToolResolution",
+    "ToolResolver",
+    "ToolSecretProvider",
+    "GatewayToolResolver",
+    "EnvironmentToolSecretProvider",
+    "MissingSecretPolicy",
+    "parse_tool_config",
+    "parse_tool_configs",
+    "coerce_tool_config",
+    "coerce_tool_configs",
+    "ToolError",
+    "ToolConfigError",
+    "ToolConfigurationError",
+    "GatewayToolResolutionError",
+    "UnsupportedToolProviderError",
+    "MissingToolSecretError",
+    "DuplicateToolNameError",
+    # MCP is a sibling subsystem
+    "MCPServerConfig",
+    "ResolvedMCPServer",
+    "MCPResolver",
+    "MCPError",
+    "MCPConfigurationError",
+    "MissingMCPSecretError",
+    # Interfaces (ports)
+    "Backend",
+    "Sandbox",
+    "Session",
+    "SessionStore",
+    "NoopSessionStore",
+    "Environment",
+    "Harness",
+    # Errors
+    "UnsupportedHarnessError",
+    "ToolResolutionError",
+    # Adapters
+    "RivetBackend",
+    "InProcessPiBackend",
+    "LocalBackend",
+    "PiHarness",
+    "ClaudeHarness",
+    "AgentaHarness",
+    "make_harness",
+]
diff --git a/sdks/python/agenta/sdk/agents/adapters/__init__.py b/sdks/python/agenta/sdk/agents/adapters/__init__.py
new file mode 100644
index 0000000000..30e555d82b
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/__init__.py
@@ -0,0 +1,24 @@
+"""Adapters: concrete implementations of the agent runtime ports.
+
+- Backend adapters: ``RivetBackend`` (rivet over ACP), ``InProcessPiBackend`` (in-process Pi,
+  the reference backend), ``LocalBackend`` (standalone SDK runs; not yet implemented).
+- Harness adapters: ``PiHarness``, ``ClaudeHarness``, ``AgentaHarness`` (+ ``make_harness``).
+- HTTP/browser protocol adapters live in subpackages, e.g. ``adapters.vercel``.
+
+Shared plumbing for the runner-backed adapters lives in ``agents/utils``.
+"""
+
+from .harnesses import AgentaHarness, ClaudeHarness, PiHarness, make_harness
+from .in_process import InProcessPiBackend
+from .local import LocalBackend
+from .rivet import RivetBackend
+
+__all__ = [
+    "RivetBackend",
+    "InProcessPiBackend",
+    "LocalBackend",
+    "PiHarness",
+    "ClaudeHarness",
+    "AgentaHarness",
+    "make_harness",
+]
diff --git a/sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py b/sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py
new file mode 100644
index 0000000000..b5fae23bd2
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py
@@ -0,0 +1,90 @@
+"""The Agenta harness's forced defaults: the things ``AgentaHarness`` always applies.
+
+``AgentaHarness`` is Pi with an opinion. It is the same engine as :class:`PiHarness`, but
+every run carries a fixed set of Agenta-shipped extras the author cannot turn off:
+
+- a base **persona** appended to Pi's system prompt (``AGENTA_FORCED_APPEND_SYSTEM``),
+- a base **AGENTS.md preamble** the author's instructions are appended to (``AGENTA_PREAMBLE``),
+- a set of **forced tools** (``AGENTA_FORCED_TOOLS``), and
+- a set of **forced skills** (``AGENTA_FORCED_SKILLS``).
+
+The forced *policy* lives here (harness knowledge). The forced skill *files* live with the
+runner that runs Pi, under ``services/agent/skills/<name>/``; the contract between the two is
+the skill directory **name**, so each entry in ``AGENTA_FORCED_SKILLS`` must match a committed
+directory there.
+
+Two layers, kept distinct on purpose (matching Pi's own split, see :class:`PiAgentConfig`):
+the *persona* is an ``append_system`` (changes Pi's base prompt), while *project conventions*
+belong in ``AGENTS.md``. ``AGENTA_PREAMBLE`` is the AGENTS.md layer; ``AGENTA_FORCED_APPEND_SYSTEM``
+is the persona layer.
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+# The base AGENTS.md preamble. The author's own ``instructions`` are appended after this, so
+# the final AGENTS.md is ``AGENTA_PREAMBLE`` + the author's project conventions.
+#
+# TODO(product): replace this placeholder with the real Agenta AGENTS.md preamble.
+AGENTA_PREAMBLE = """\
+# Agenta agent
+
+You are an agent running on the Agenta platform. The instructions below are Agenta's
+baseline; the user's own instructions follow and take precedence where they are more
+specific.
+
+- Prefer the tools and skills provided to you over guessing.
+- When a skill matches the task, read its SKILL.md fully before acting.
+- Keep answers grounded in what the tools and skills actually return."""
+
+# The base persona, always appended to Pi's built-in system prompt (never replaces it). This
+# is the "who the agent is" layer, distinct from the AGENTS.md project-context layer above.
+#
+# TODO(product): replace this placeholder with the real Agenta persona framing.
+AGENTA_FORCED_APPEND_SYSTEM = """\
+You are an Agenta agent. Be precise, cite what your tools and skills return, and do not
+fabricate results."""
+
+# Built-in tools every Agenta run forces on, unioned with the agent's resolved tools.
+# ``read`` is mandatory: Pi only renders the skills section into the system prompt when the
+# ``read`` tool is available. ``bash`` lets skills run their helper scripts.
+AGENTA_FORCED_TOOLS: List[str] = ["read", "bash"]
+
+# Built-in skills every Agenta run forces on. Each name must match a committed directory under
+# the runner's ``services/agent/skills/<name>/`` (the runner resolves names to those dirs).
+#
+# TODO(product): grow this with the real Agenta skill set.
+AGENTA_FORCED_SKILLS: List[str] = ["agenta-getting-started"]
+
+
+def _join(*parts: Optional[str]) -> Optional[str]:
+    """Join the non-empty parts with a blank line, or ``None`` when nothing remains."""
+    kept = [part.strip() for part in parts if part and part.strip()]
+    if not kept:
+        return None
+    return "\n\n".join(kept)
+
+
+def compose_instructions(user: Optional[str]) -> Optional[str]:
+    """The AGENTS.md the harness ships: the base preamble with the author's instructions
+    appended after it."""
+    return _join(AGENTA_PREAMBLE, user)
+
+
+def compose_append_system(user: Optional[str]) -> Optional[str]:
+    """The ``append_system`` the harness ships: the forced base persona with the author's own
+    ``append_system`` appended after it."""
+    return _join(AGENTA_FORCED_APPEND_SYSTEM, user)
+
+
+def force_tools(builtin_tools: List[str]) -> List[str]:
+    """Union the resolved built-in tools with the forced set, order-stable and de-duplicated
+    (resolved tools first, then any forced tools not already present)."""
+    seen = set()
+    out: List[str] = []
+    for name in list(builtin_tools) + AGENTA_FORCED_TOOLS:
+        if name and name not in seen:
+            seen.add(name)
+            out.append(name)
+    return out
diff --git a/sdks/python/agenta/sdk/agents/adapters/harnesses.py b/sdks/python/agenta/sdk/agents/adapters/harnesses.py
new file mode 100644
index 0000000000..e718c1db2b
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/harnesses.py
@@ -0,0 +1,150 @@
+"""Adapters of the :class:`~agenta.sdk.agents.interfaces.Harness` port: one per harness type.
+
+This is where the per-harness adaptation lives (the logic that used to sit in the TS runner):
+turning the neutral :class:`SessionConfig` into the harness's own config, especially the
+*tools*. The harnesses genuinely differ, so the two adapters do different work:
+
+- **Pi** takes built-in tools by name *and* resolved tool specs, delivered natively (Pi has
+  no MCP). Pi does not gate tool use, so the permission policy does not apply.
+- **Claude** has no built-in tools (they are a Pi concept), delivers tools over MCP, and
+  gates tool use, so the permission policy applies.
+- **Agenta** is Pi with an opinion: the same engine and config shape, plus a fixed set of
+  forced tools, skills, a base AGENTS.md preamble, and a persona (see :mod:`.agenta_builtins`).
+
+The backend below stays pure plumbing; this layer owns the harness knowledge.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Type
+
+from agenta.sdk.utils.logging import get_module_logger
+
+from ..dtos import (
+    AgentaAgentConfig,
+    ClaudeAgentConfig,
+    HarnessType,
+    PiAgentConfig,
+    SessionConfig,
+)
+from ..interfaces import Environment, Harness
+from ..tools.models import ToolSpec, coerce_tool_spec
+from .agenta_builtins import (
+    AGENTA_FORCED_SKILLS,
+    compose_append_system,
+    compose_instructions,
+    force_tools,
+)
+
+log = get_module_logger(__name__)
+
+
+def _opt_str(value: Any) -> Any:
+    """Keep a harness option only if it is a non-empty string; otherwise drop it to ``None``
+    so an empty or malformed value never reaches the wire as a real override."""
+    if isinstance(value, str) and value.strip():
+        return value
+    return None
+
+
+def _normalize_tool_specs(specs: List[Dict[str, Any]]) -> List[ToolSpec]:
+    """Compatibility helper for old tests/callers still supplying runner dictionaries."""
+    return [coerce_tool_spec(spec) for spec in specs or []]
+
+
+class PiHarness(Harness):
+    harness_type = HarnessType.PI
+
+    def _to_harness_config(self, config: SessionConfig) -> PiAgentConfig:
+        # Pi delivers tools natively: built-in names plus resolved specs registered through
+        # the Pi extension. Pi does not gate tool use, so the permission policy is dropped.
+        # Pi reads its own slice of the neutral harness_options bag: `system` replaces Pi's
+        # base prompt, `append_system` extends it (both leave AGENTS.md untouched).
+        pi_options = config.agent.harness_options.get(HarnessType.PI.value, {})
+        return PiAgentConfig(
+            agents_md=config.agent.instructions,
+            model=config.agent.model,
+            builtin_names=list(config.builtin_names),
+            tool_specs=list(config.tool_specs),
+            tool_callback=config.tool_callback,
+            mcp_servers=list(config.mcp_servers),
+            system=_opt_str(pi_options.get("system")),
+            append_system=_opt_str(pi_options.get("append_system")),
+        )
+
+
+class ClaudeHarness(Harness):
+    harness_type = HarnessType.CLAUDE
+
+    def _to_harness_config(self, config: SessionConfig) -> ClaudeAgentConfig:
+        # Claude has no Pi built-in tools; drop them rather than ship a name Claude cannot
+        # honor. Tools go over MCP, and Claude gates tool use, so the permission policy is
+        # carried through.
+        if config.builtin_names:
+            log.warning(
+                "ClaudeHarness ignores %d built-in tool(s); built-ins are a Pi concept",
+                len(config.builtin_names),
+            )
+        return ClaudeAgentConfig(
+            agents_md=config.agent.instructions,
+            model=config.agent.model,
+            tool_specs=list(config.tool_specs),
+            tool_callback=config.tool_callback,
+            mcp_servers=list(config.mcp_servers),
+            permission_policy=config.permission_policy,
+        )
+
+
+class AgentaHarness(Harness):
+    """Pi with an Agenta opinion. Same engine as :class:`PiHarness`, but every run carries the
+    forced Agenta extras (see :mod:`.agenta_builtins`): a base AGENTS.md preamble the author's
+    instructions are appended to, a forced persona ``append_system``, forced tools, and forced
+    skills. The author's own Pi ``harness_options`` (``system`` / ``append_system``) still
+    apply, layered after the forced bits."""
+
+    harness_type = HarnessType.AGENTA
+
+    def _to_harness_config(self, config: SessionConfig) -> AgentaAgentConfig:
+        # The author's Pi options still apply; the Agenta harness reads the same `pi` slice as
+        # PiHarness (it drives Pi) and layers its forced extras on top.
+        pi_options = config.agent.harness_options.get(HarnessType.PI.value, {})
+        return AgentaAgentConfig(
+            agents_md=compose_instructions(config.agent.instructions),
+            model=config.agent.model,
+            builtin_names=force_tools(list(config.builtin_names)),
+            tool_specs=list(config.tool_specs),
+            tool_callback=config.tool_callback,
+            mcp_servers=list(config.mcp_servers),
+            system=_opt_str(pi_options.get("system")),
+            append_system=compose_append_system(
+                _opt_str(pi_options.get("append_system"))
+            ),
+            skills=list(AGENTA_FORCED_SKILLS),
+        )
+
+
+_HARNESSES: Dict[HarnessType, Type[Harness]] = {
+    HarnessType.PI: PiHarness,
+    HarnessType.CLAUDE: ClaudeHarness,
+    HarnessType.AGENTA: AgentaHarness,
+}
+
+
+def make_harness(
+    harness_type: "HarnessType | str", environment: Environment
+) -> Harness:
+    """Construct the Harness for a harness type over an environment.
+
+    Maps the playground/config string to the right class. Raises
+    :class:`~agenta.sdk.agents.errors.UnsupportedHarnessError` if the environment's backend
+    cannot drive it.
+    """
+    resolved = HarnessType.coerce(harness_type)
+    try:
+        cls = _HARNESSES[resolved]
+    except KeyError as exc:
+        known = ", ".join(sorted(h.value for h in _HARNESSES))
+        raise ValueError(
+            f"unknown harness '{resolved.value}'; known harnesses: {known}"
+        ) from exc
+    return cls(environment)
diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py
new file mode 100644
index 0000000000..bfd1528bd7
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py
@@ -0,0 +1,170 @@
+"""InProcessPiBackend: drive Pi in-process through the TS runner, no rivet daemon.
+
+This was the first backend implementation and stays as the simplest one: a single harness
+(Pi), a single place (local), the legacy in-process Pi engine (``engines/pi.ts``). It is the
+reference to read when writing a new backend.
+
+It is its own class and hard-codes its differences (the ``pi`` engine, Pi-only support,
+local-only). It is deliberately NOT a subclass of ``RivetBackend``; the two are different
+engines that happen to share the ``utils`` wire and transport helpers.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any, AsyncIterator, Dict, List, Mapping, Optional, Sequence
+
+from ..dtos import (
+    AgentResult,
+    EventSink,
+    HarnessAgentConfig,
+    HarnessType,
+    Message,
+    TraceContext,
+)
+from ..interfaces import Backend, Sandbox, Session
+from ..streaming import AgentRun
+from ..utils import (
+    deliver_http,
+    deliver_http_stream,
+    deliver_subprocess,
+    deliver_subprocess_stream,
+    request_to_wire,
+    result_from_wire,
+)
+
+_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"]
+
+
+class InProcessSandbox(Sandbox):
+    """The local host. In-process Pi runs here directly; provisioning files are buffered
+    (AGENTS.md rides the wire today)."""
+
+    def __init__(self) -> None:
+        self.files: Dict[str, bytes] = {}
+
+    async def add_files(self, files: Mapping[str, bytes]) -> None:
+        self.files.update(files)
+
+
+class InProcessPiSession(Session):
+    """One turn-per-prompt Pi session driven in-process by the TS runner."""
+
+    def __init__(
+        self,
+        backend: "InProcessPiBackend",
+        config: HarnessAgentConfig,
+        *,
+        secrets: Optional[Mapping[str, str]],
+        trace: Optional[TraceContext],
+        session_id: Optional[str],
+    ) -> None:
+        self._backend = backend
+        self._config = config
+        self._secrets = dict(secrets or {})
+        self._trace = trace
+        self._session_id = session_id
+
+    @property
+    def id(self) -> Optional[str]:
+        return self._session_id
+
+    def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]:
+        """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``)."""
+        return request_to_wire(
+            engine=InProcessPiBackend._ENGINE,
+            harness=HarnessType.PI,
+            sandbox="local",
+            config=self._config,
+            messages=messages,
+            secrets=self._secrets,
+            trace=self._trace,
+            session_id=self._session_id,
+        )
+
+    def _absorb_result(self, result: AgentResult) -> None:
+        """Carry the run's session id forward so a follow-up turn resumes it."""
+        if result.session_id:
+            self._session_id = result.session_id
+
+    async def prompt(
+        self,
+        messages: Sequence[Message],
+        *,
+        on_event: Optional[EventSink] = None,
+    ) -> AgentResult:
+        data = await self._backend._deliver(self._wire_payload(messages))
+        result = result_from_wire(data)
+        self._absorb_result(result)
+        if on_event:
+            for event in result.events:
+                try:
+                    on_event(event)
+                except Exception:  # pylint: disable=broad-except
+                    pass
+        return result
+
+    def stream(self, messages: Sequence[Message]) -> AgentRun:
+        """Run one turn over the streaming transport, yielding events live (see AgentRun)."""
+        records = self._backend._deliver_stream(self._wire_payload(messages))
+        return AgentRun(records).on_result(self._absorb_result)
+
+
+class InProcessPiBackend(Backend):
+    """The in-process Pi engine: drives the Pi SDK directly in the TS runner. Pi only, local
+    only, no rivet daemon."""
+
+    # Agenta is Pi with an opinion: same in-process engine, so this backend drives it too.
+    supported_harnesses = frozenset({HarnessType.PI, HarnessType.AGENTA})
+    _ENGINE = "pi"  # hard-coded engine identity
+
+    def __init__(
+        self,
+        *,
+        url: Optional[str] = None,
+        command: Optional[Sequence[str]] = None,
+        cwd: Optional[str] = None,
+        timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")),
+    ) -> None:
+        self._url = url
+        self._command: List[str] = list(command or _DEFAULT_COMMAND)
+        self._cwd = cwd
+        self._timeout = timeout
+
+    async def create_sandbox(self) -> InProcessSandbox:
+        return InProcessSandbox()
+
+    async def create_session(
+        self,
+        sandbox: Sandbox,
+        config: HarnessAgentConfig,
+        *,
+        harness: HarnessType,
+        secrets: Optional[Mapping[str, str]] = None,
+        trace: Optional[TraceContext] = None,
+        session_id: Optional[str] = None,
+    ) -> InProcessPiSession:
+        return InProcessPiSession(
+            self,
+            config,
+            secrets=secrets,
+            trace=trace,
+            session_id=session_id,
+        )
+
+    async def _deliver(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        if self._url:
+            return await deliver_http(self._url, payload, timeout=self._timeout)
+        env = {**os.environ, "AGENT_BACKEND": self._ENGINE}
+        return await deliver_subprocess(
+            self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout
+        )
+
+    def _deliver_stream(self, payload: Dict[str, Any]) -> AsyncIterator[Dict[str, Any]]:
+        """The live counterpart of ``_deliver``: an NDJSON record stream from the runner."""
+        if self._url:
+            return deliver_http_stream(self._url, payload, timeout=self._timeout)
+        env = {**os.environ, "AGENT_BACKEND": self._ENGINE}
+        return deliver_subprocess_stream(
+            self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout
+        )
diff --git a/sdks/python/agenta/sdk/agents/adapters/local.py b/sdks/python/agenta/sdk/agents/adapters/local.py
new file mode 100644
index 0000000000..5435ea4751
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/local.py
@@ -0,0 +1,48 @@
+"""LocalBackend: run a harness on this machine, no rivet daemon and no Agenta sidecar.
+
+This is the backend a standalone SDK user gets. It is two mechanisms, one per harness, which
+is exactly a backend's "plumbing per harness" job:
+
+- Pi   -> the bundled JS runner (the in-process Pi engine), shipped inside the wheel, run
+          with ``node``.
+- Claude -> the pure-Python ``claude-agent-sdk``, in-process, no TS bridge.
+
+NOT YET IMPLEMENTED. Tracked as Phase 3 (Pi) and Phase 4 (Claude) in
+``docs/design/agent-workflows/scratch/sdk-local-backend/plan.md``. The class is present so
+the adapter layout is complete and the port shape is visible; the methods raise until the
+bundling build step and the ``claude-agent-sdk`` wiring land.
+"""
+
+from __future__ import annotations
+
+from typing import Mapping, Optional
+
+from ..dtos import HarnessAgentConfig, HarnessType, TraceContext
+from ..interfaces import Backend, Sandbox, Session
+
+
+class LocalBackend(Backend):
+    """Run Pi (bundled JS) or Claude (``claude-agent-sdk``) on this machine."""
+
+    supported_harnesses = frozenset({HarnessType.PI, HarnessType.CLAUDE})
+
+    async def create_sandbox(self) -> Sandbox:
+        raise NotImplementedError(
+            "LocalBackend is not implemented yet (Phase 3: Pi via bundled JS, "
+            "Phase 4: Claude via claude-agent-sdk)."
+        )
+
+    async def create_session(
+        self,
+        sandbox: Sandbox,
+        config: HarnessAgentConfig,
+        *,
+        harness: HarnessType,
+        secrets: Optional[Mapping[str, str]] = None,
+        trace: Optional[TraceContext] = None,
+        session_id: Optional[str] = None,
+    ) -> Session:
+        raise NotImplementedError(
+            "LocalBackend is not implemented yet (Phase 3: Pi via bundled JS, "
+            "Phase 4: Claude via claude-agent-sdk)."
+        )
diff --git a/sdks/python/agenta/sdk/agents/adapters/rivet.py b/sdks/python/agenta/sdk/agents/adapters/rivet.py
new file mode 100644
index 0000000000..2316eb0dea
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/rivet.py
@@ -0,0 +1,186 @@
+"""RivetBackend: drive a harness over ACP via the TypeScript rivet runner.
+
+This backend hard-codes that it is the rivet engine. It reaches the same runner the deployed
+sidecar runs (HTTP when a ``url`` is set, otherwise a subprocess CLI), and the runner starts
+the rivet daemon, the ACP adapter, and the harness. Supports Pi and Claude. The ``sandbox``
+axis (``local`` / ``daytona``) is a real runtime choice, so it stays a constructor arg.
+
+It is its own class, not a subclass of any other backend; it shares only the ``utils`` wire
+and transport helpers.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any, AsyncIterator, Dict, List, Mapping, Optional, Sequence
+
+from ..dtos import (
+    AgentResult,
+    EventSink,
+    HarnessAgentConfig,
+    HarnessType,
+    Message,
+    TraceContext,
+)
+from ..interfaces import Backend, Sandbox, Session
+from ..streaming import AgentRun
+from ..utils import (
+    deliver_http,
+    deliver_http_stream,
+    deliver_subprocess,
+    deliver_subprocess_stream,
+    request_to_wire,
+    result_from_wire,
+)
+
+_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"]
+
+
+class RivetSandbox(Sandbox):
+    """Carries the sandbox axis for the run. The real sandbox (a local daemon or a Daytona
+    VM) is created inside the TS runner; here we hold the axis and buffer provisioning files
+    (today AGENTS.md rides the wire, so this is informational)."""
+
+    def __init__(self, sandbox_id: str) -> None:
+        self.sandbox_id = sandbox_id
+        self.files: Dict[str, bytes] = {}
+
+    async def add_files(self, files: Mapping[str, bytes]) -> None:
+        self.files.update(files)
+
+
+class RivetSession(Session):
+    """One turn-per-prompt session. Each prompt sends one ``/run`` (cold + replay)."""
+
+    def __init__(
+        self,
+        backend: "RivetBackend",
+        sandbox: RivetSandbox,
+        config: HarnessAgentConfig,
+        *,
+        harness: HarnessType,
+        secrets: Optional[Mapping[str, str]],
+        trace: Optional[TraceContext],
+        session_id: Optional[str],
+    ) -> None:
+        self._backend = backend
+        self._sandbox = sandbox
+        self._config = config
+        self._harness = harness
+        self._secrets = dict(secrets or {})
+        self._trace = trace
+        self._session_id = session_id
+
+    @property
+    def id(self) -> Optional[str]:
+        return self._session_id
+
+    def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]:
+        """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``)."""
+        return request_to_wire(
+            engine=RivetBackend._ENGINE,
+            harness=self._harness,
+            sandbox=self._sandbox.sandbox_id,
+            config=self._config,
+            messages=messages,
+            secrets=self._secrets,
+            trace=self._trace,
+            session_id=self._session_id,
+        )
+
+    def _absorb_result(self, result: AgentResult) -> None:
+        """Carry the run's session id forward so a follow-up turn resumes it."""
+        if result.session_id:
+            self._session_id = result.session_id
+
+    async def prompt(
+        self,
+        messages: Sequence[Message],
+        *,
+        on_event: Optional[EventSink] = None,
+    ) -> AgentResult:
+        data = await self._backend._deliver(self._wire_payload(messages))
+        result = result_from_wire(data)
+        self._absorb_result(result)
+        _emit_events(result, on_event)
+        return result
+
+    def stream(self, messages: Sequence[Message]) -> AgentRun:
+        """Run one turn over the streaming transport, yielding events live (see AgentRun)."""
+        records = self._backend._deliver_stream(self._wire_payload(messages))
+        return AgentRun(records).on_result(self._absorb_result)
+
+
+class RivetBackend(Backend):
+    """The rivet engine: a harness over ACP through the TS runner. Pi and Claude."""
+
+    supported_harnesses = frozenset({HarnessType.PI, HarnessType.CLAUDE})
+    _ENGINE = "rivet"  # hard-coded engine identity, not a constructor arg
+
+    def __init__(
+        self,
+        *,
+        sandbox: str = "local",
+        url: Optional[str] = None,
+        command: Optional[Sequence[str]] = None,
+        cwd: Optional[str] = None,
+        timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")),
+    ) -> None:
+        self._sandbox = sandbox
+        self._url = url
+        self._command: List[str] = list(command or _DEFAULT_COMMAND)
+        self._cwd = cwd
+        self._timeout = timeout
+
+    async def create_sandbox(self) -> RivetSandbox:
+        return RivetSandbox(self._sandbox)
+
+    async def create_session(
+        self,
+        sandbox: Sandbox,
+        config: HarnessAgentConfig,
+        *,
+        harness: HarnessType,
+        secrets: Optional[Mapping[str, str]] = None,
+        trace: Optional[TraceContext] = None,
+        session_id: Optional[str] = None,
+    ) -> RivetSession:
+        if not isinstance(sandbox, RivetSandbox):
+            raise TypeError("RivetBackend.create_session requires a RivetSandbox")
+        return RivetSession(
+            self,
+            sandbox,
+            config,
+            harness=harness,
+            secrets=secrets,
+            trace=trace,
+            session_id=session_id,
+        )
+
+    async def _deliver(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        if self._url:
+            return await deliver_http(self._url, payload, timeout=self._timeout)
+        env = {**os.environ, "AGENT_BACKEND": self._ENGINE}
+        return await deliver_subprocess(
+            self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout
+        )
+
+    def _deliver_stream(self, payload: Dict[str, Any]) -> AsyncIterator[Dict[str, Any]]:
+        """The live counterpart of ``_deliver``: an NDJSON record stream from the runner."""
+        if self._url:
+            return deliver_http_stream(self._url, payload, timeout=self._timeout)
+        env = {**os.environ, "AGENT_BACKEND": self._ENGINE}
+        return deliver_subprocess_stream(
+            self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout
+        )
+
+
+def _emit_events(result: AgentResult, on_event: Optional[EventSink]) -> None:
+    """Replay the result's event log to a live sink (the one-shot transports batch it)."""
+    if not on_event:
+        return
+    for event in result.events:
+        try:
+            on_event(event)
+        except Exception:  # pylint: disable=broad-except
+            pass
diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py b/sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py
new file mode 100644
index 0000000000..a8ad63761a
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py
@@ -0,0 +1,43 @@
+"""Vercel AI SDK adapters for the agent runtime.
+
+The neutral agent runtime speaks ``Message``, ``AgentEvent``, and ``AgentRun``. This package
+is the browser protocol adapter: Vercel ``UIMessage`` request bodies, UI Message Stream parts,
+SSE framing, and the ``/messages`` route helpers.
+"""
+
+from .messages import (
+    from_ui_messages,
+    message_to_vercel_ui_message,
+    to_ui_message,
+    vercel_ui_messages_to_messages,
+)
+from .routing import (
+    VERCEL_MESSAGE_PROTOCOL,
+    VERCEL_MESSAGE_PROTOCOL_HEADERS,
+    VERCEL_MESSAGE_PROTOCOL_VERSION,
+    inject_stream_session_id,
+    register_agent_message_routes,
+    resolve_session_id,
+    set_vercel_message_protocol_headers,
+)
+from .sse import VERCEL_UI_MESSAGE_STREAM_HEADERS, vercel_sse_stream
+from .stream import agent_run_to_vercel_parts, ui_message_stream
+
+__all__ = [
+    "vercel_ui_messages_to_messages",
+    "message_to_vercel_ui_message",
+    "agent_run_to_vercel_parts",
+    "VERCEL_UI_MESSAGE_STREAM_HEADERS",
+    "vercel_sse_stream",
+    "resolve_session_id",
+    "inject_stream_session_id",
+    "VERCEL_MESSAGE_PROTOCOL",
+    "VERCEL_MESSAGE_PROTOCOL_VERSION",
+    "VERCEL_MESSAGE_PROTOCOL_HEADERS",
+    "set_vercel_message_protocol_headers",
+    "register_agent_message_routes",
+    # Former flat-module names.
+    "from_ui_messages",
+    "to_ui_message",
+    "ui_message_stream",
+]
diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/messages.py b/sdks/python/agenta/sdk/agents/adapters/vercel/messages.py
new file mode 100644
index 0000000000..7f718b9032
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/vercel/messages.py
@@ -0,0 +1,219 @@
+"""Vercel ``UIMessage`` conversion at the agent HTTP edge.
+
+This adapter translates between the Vercel AI SDK ``UIMessage`` parts shape and the
+neutral agent runtime ``Message`` / ``ContentBlock`` types. The neutral DTOs stay the port;
+Vercel-specific part names live here.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+
+from ...dtos import AgentResult, ContentBlock, Message
+
+TOOL_APPROVAL_REQUEST = "tool-approval-request"
+TOOL_APPROVAL_RESPONSE = "tool-approval-response"
+TOOL_OUTPUT_AVAILABLE = "tool-output-available"
+
+
+def vercel_ui_messages_to_messages(raw: Optional[List[Any]]) -> List[Message]:
+    """Coerce inbound Vercel ``UIMessage`` objects into neutral messages."""
+    messages: List[Message] = []
+    for item in raw or []:
+        message = _ui_message_to_message(item)
+        if message is not None:
+            messages.append(message)
+    return messages
+
+
+def _ui_message_to_message(raw: Any) -> Optional[Message]:
+    if isinstance(raw, Message):
+        return raw
+    if not isinstance(raw, dict) or "role" not in raw:
+        return None
+    role = str(raw["role"])
+
+    parts = raw.get("parts")
+    if parts is None:
+        return Message.from_raw(raw)
+
+    blocks: List[ContentBlock] = []
+    for part in parts or []:
+        blocks.extend(_part_to_blocks(part))
+
+    if not blocks:
+        return Message(role=role, content="")
+    if all(block.type == "text" for block in blocks):
+        return Message(role=role, content="".join(block.text or "" for block in blocks))
+    return Message(role=role, content=blocks)
+
+
+def _part_to_blocks(part: Any) -> List[ContentBlock]:
+    if not isinstance(part, dict):
+        return []
+    ptype = str(part.get("type", ""))
+
+    if ptype == "text":
+        text = part.get("text")
+        return [ContentBlock(type="text", text=text)] if text is not None else []
+
+    if ptype == "file":
+        media = part.get("mediaType") or part.get("mimeType")
+        kind = (
+            "image"
+            if isinstance(media, str) and media.startswith("image/")
+            else "resource"
+        )
+        return [
+            ContentBlock(
+                type=kind,
+                uri=part.get("url") or part.get("uri"),
+                data=part.get("data"),
+                mime_type=media,
+            )
+        ]
+
+    if ptype == TOOL_APPROVAL_REQUEST:
+        return []
+
+    if ptype == TOOL_APPROVAL_RESPONSE:
+        return _approval_response_blocks(part)
+
+    if (
+        ptype == TOOL_OUTPUT_AVAILABLE
+        or ptype == "dynamic-tool"
+        or ptype.startswith("tool-")
+    ):
+        return _tool_part_blocks(part, ptype)
+
+    return []
+
+
+def _tool_part_blocks(part: Dict[str, Any], ptype: str) -> List[ContentBlock]:
+    """A Vercel tool part -> neutral tool-call/result content blocks."""
+    tool_call_id = part.get("toolCallId") or part.get("tool_call_id")
+    tool_name = part.get("toolName") or part.get("tool_name")
+    if (
+        tool_name is None
+        and ptype.startswith("tool-")
+        and ptype != TOOL_OUTPUT_AVAILABLE
+    ):
+        tool_name = ptype[len("tool-") :]
+
+    blocks: List[ContentBlock] = []
+    if ptype != TOOL_OUTPUT_AVAILABLE or "input" in part:
+        blocks.append(
+            ContentBlock(
+                type="tool_call",
+                tool_call_id=tool_call_id,
+                tool_name=tool_name,
+                input=part.get("input"),
+            )
+        )
+
+    state = part.get("state")
+    error_text = part.get("errorText")
+    if error_text is not None or state == "output-error":
+        blocks.append(
+            ContentBlock(
+                type="tool_result",
+                tool_call_id=tool_call_id,
+                tool_name=tool_name,
+                output=error_text if error_text is not None else part.get("output"),
+                is_error=True,
+            )
+        )
+    elif "output" in part or state == "output-available":
+        blocks.append(
+            ContentBlock(
+                type="tool_result",
+                tool_call_id=tool_call_id,
+                tool_name=tool_name,
+                output=part.get("output"),
+                is_error=False,
+            )
+        )
+    return blocks
+
+
+def _approval_response_blocks(part: Dict[str, Any]) -> List[ContentBlock]:
+    """A cross-turn approval reply -> a tool-result block keyed by toolCallId."""
+    tool_call_id = (
+        part.get("toolCallId") or part.get("tool_call_id") or part.get("approvalId")
+    )
+    output = part.get("output")
+    if output is None:
+        approved = part.get("approved")
+        output = {"approved": approved} if approved is not None else part.get("reason")
+    return [ContentBlock(type="tool_result", tool_call_id=tool_call_id, output=output)]
+
+
+def message_to_vercel_ui_message(
+    source: Any,
+    *,
+    message_id: str = "msg-1",
+) -> Dict[str, Any]:
+    """Render an ``AgentResult`` or neutral ``Message`` as one Vercel ``UIMessage``."""
+    if isinstance(source, AgentResult):
+        return {
+            "id": message_id,
+            "role": "assistant",
+            "parts": [{"type": "text", "text": source.output or ""}],
+        }
+    if isinstance(source, Message):
+        return {
+            "id": message_id,
+            "role": source.role,
+            "parts": _content_to_parts(source.content),
+        }
+    raise TypeError(
+        "message_to_vercel_ui_message expects an AgentResult or Message, "
+        f"got {type(source).__name__!r}"
+    )
+
+
+def _content_to_parts(content: Any) -> List[Dict[str, Any]]:
+    if isinstance(content, str):
+        return [{"type": "text", "text": content}] if content else []
+    parts: List[Dict[str, Any]] = []
+    for block in content or []:
+        parts.extend(_block_to_parts(block))
+    return parts
+
+
+def _block_to_parts(block: ContentBlock) -> List[Dict[str, Any]]:
+    if block.type == "text":
+        return [{"type": "text", "text": block.text or ""}]
+    if block.type in ("image", "resource"):
+        part: Dict[str, Any] = {"type": "file"}
+        if block.uri is not None:
+            part["url"] = block.uri
+        if block.mime_type is not None:
+            part["mediaType"] = block.mime_type
+        if block.data is not None:
+            part["data"] = block.data
+        return [part]
+    if block.type == "tool_call":
+        return [
+            {
+                "type": f"tool-{block.tool_name or 'tool'}",
+                "toolCallId": block.tool_call_id,
+                "state": "input-available",
+                "input": block.input,
+            }
+        ]
+    if block.type == "tool_result":
+        return [
+            {
+                "type": f"tool-{block.tool_name or 'tool'}",
+                "toolCallId": block.tool_call_id,
+                "state": "output-error" if block.is_error else "output-available",
+                "output": block.output,
+            }
+        ]
+    return []
+
+
+# Back-compat aliases for the former flat module API.
+from_ui_messages = vercel_ui_messages_to_messages
+to_ui_message = message_to_vercel_ui_message
diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py
new file mode 100644
index 0000000000..a854ca0460
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py
@@ -0,0 +1,209 @@
+"""FastAPI route wiring for the agent ``/messages`` Vercel adapter."""
+
+from __future__ import annotations
+
+import re
+from typing import Any, Callable, Collection, Optional
+from uuid import uuid4
+
+from fastapi import Request
+from fastapi.responses import JSONResponse, Response
+
+from agenta.sdk.contexts.tracing import tracing_context_manager
+from agenta.sdk.models.workflows import (
+    LoadSessionRequest,
+    LoadSessionResponse,
+    WorkflowBatchResponse,
+    WorkflowInvokeRequest,
+    WorkflowRequestData,
+    WorkflowStreamingResponse,
+)
+
+from ...interfaces import NoopSessionStore, SessionStore
+from .messages import message_to_vercel_ui_message, vercel_ui_messages_to_messages
+
+# An opaque, project-scoped session id (RFC §4.1): bounded length, restricted charset.
+_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._:-]{1,128}$")
+
+VERCEL_MESSAGE_PROTOCOL = "vercel"
+VERCEL_MESSAGE_PROTOCOL_VERSION = "v1"
+VERCEL_MESSAGE_PROTOCOL_HEADERS = {
+    "x-ag-messages-format": VERCEL_MESSAGE_PROTOCOL,
+    "x-ag-messages-version": VERCEL_MESSAGE_PROTOCOL_VERSION,
+}
+
+
+def set_vercel_message_protocol_headers(response: Response) -> Response:
+    """Stamp the default agent ``/messages`` protocol identity on an HTTP response."""
+    for key, value in VERCEL_MESSAGE_PROTOCOL_HEADERS.items():
+        response.headers.setdefault(key, value)
+    return response
+
+
+def resolve_session_id(session_id: Optional[str]) -> Optional[str]:
+    """Mint a new id when absent, echo a valid one, or return ``None`` when invalid."""
+    if session_id is None:
+        return "sess_" + uuid4().hex
+    return session_id if _SESSION_ID_RE.match(session_id) else None
+
+
+def inject_stream_session_id(
+    response: WorkflowStreamingResponse,
+    session_id: str,
+) -> None:
+    """Stamp ``messageMetadata.sessionId`` onto the first Vercel ``start`` part."""
+    original = response.generator
+
+    async def generator():
+        stamped = False
+        async for part in original():
+            if not stamped and isinstance(part, dict) and part.get("type") == "start":
+                part.setdefault("messageMetadata", {})["sessionId"] = session_id
+                stamped = True
+            yield part
+
+    response.generator = generator
+
+
+def make_messages_endpoint(
+    *,
+    wf: Any,
+    get_request_tracing_context: Callable[[Request], Any],
+    parse_accept: Callable[[Request], Optional[str]],
+    stream_media_types: Collection[str],
+    make_json_response: Callable[[WorkflowBatchResponse], Response],
+    make_not_acceptable_response: Callable[[str, Any], Response],
+    make_stream_response: Callable[[WorkflowStreamingResponse, str], Response],
+    handle_failure: Callable[[Exception], Any],
+):
+    """Build the ``POST /messages`` endpoint for one routed agent workflow."""
+
+    async def messages_endpoint(req: Request, request: WorkflowInvokeRequest):
+        credentials = req.state.auth.get("credentials")
+
+        session_id = resolve_session_id(request.session_id)
+        if session_id is None:
+            return set_vercel_message_protocol_headers(
+                JSONResponse(
+                    status_code=400,
+                    content={
+                        "detail": "session_id violates the allowed charset/length"
+                    },
+                )
+            )
+
+        try:
+            request.session_id = session_id
+            if request.data is None:
+                request.data = WorkflowRequestData()
+
+            request.data.messages = [
+                message.to_wire()
+                for message in vercel_ui_messages_to_messages(request.data.messages)
+            ]
+
+            requested = parse_accept(req)
+            want_stream = requested in stream_media_types
+            request.data.stream = want_stream
+
+            with tracing_context_manager(get_request_tracing_context(req)):
+                response = await wf.invoke(
+                    request=request,
+                    secrets=None,
+                    credentials=credentials,
+                )
+
+            if isinstance(response, (WorkflowBatchResponse, WorkflowStreamingResponse)):
+                response.session_id = session_id
+
+            if (
+                isinstance(response, WorkflowBatchResponse)
+                and response.status
+                and response.status.code is not None
+                and response.status.code >= 400
+            ):
+                return set_vercel_message_protocol_headers(make_json_response(response))
+
+            if want_stream:
+                if not isinstance(response, WorkflowStreamingResponse):
+                    return set_vercel_message_protocol_headers(
+                        make_not_acceptable_response(str(requested), response)
+                    )
+                inject_stream_session_id(response, session_id)
+                return set_vercel_message_protocol_headers(
+                    make_stream_response(response, "vercel")
+                )
+
+            if not isinstance(response, WorkflowBatchResponse):
+                return set_vercel_message_protocol_headers(
+                    make_not_acceptable_response(
+                        requested or "application/json", response
+                    )
+                )
+            return set_vercel_message_protocol_headers(make_json_response(response))
+
+        except Exception as exception:
+            return set_vercel_message_protocol_headers(await handle_failure(exception))
+
+    return messages_endpoint
+
+
+def make_load_session_endpoint(
+    *,
+    session_store: Optional[SessionStore] = None,
+):
+    """Build the v1 ``POST /load-session`` endpoint over the session-store port."""
+    store = session_store or NoopSessionStore()
+
+    async def load_session_endpoint(req: Request, request: LoadSessionRequest):
+        messages = await store.load(request.session_id)
+        response = LoadSessionResponse(
+            session_id=request.session_id,
+            messages=[
+                message_to_vercel_ui_message(message, message_id=f"msg-{idx}")
+                for idx, message in enumerate(messages, start=1)
+            ],
+        )
+        return set_vercel_message_protocol_headers(
+            JSONResponse(content=response.model_dump(mode="json"))
+        )
+
+    return load_session_endpoint
+
+
+def register_agent_message_routes(
+    target: Any,
+    prefix: str,
+    *,
+    wf: Any,
+    invoke_responses: dict,
+    get_request_tracing_context: Callable[[Request], Any],
+    parse_accept: Callable[[Request], Optional[str]],
+    stream_media_types: Collection[str],
+    make_json_response: Callable[[WorkflowBatchResponse], Response],
+    make_not_acceptable_response: Callable[[str, Any], Response],
+    make_stream_response: Callable[[WorkflowStreamingResponse, str], Response],
+    handle_failure: Callable[[Exception], Any],
+    session_store: Optional[SessionStore] = None,
+) -> None:
+    """Register ``/messages`` and ``/load-session`` on a FastAPI app/router target."""
+    target.add_api_route(
+        prefix + "/messages",
+        make_messages_endpoint(
+            wf=wf,
+            get_request_tracing_context=get_request_tracing_context,
+            parse_accept=parse_accept,
+            stream_media_types=stream_media_types,
+            make_json_response=make_json_response,
+            make_not_acceptable_response=make_not_acceptable_response,
+            make_stream_response=make_stream_response,
+            handle_failure=handle_failure,
+        ),
+        methods=["POST"],
+        responses=invoke_responses,
+    )
+    target.add_api_route(
+        prefix + "/load-session",
+        make_load_session_endpoint(session_store=session_store),
+        methods=["POST"],
+    )
diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/sse.py b/sdks/python/agenta/sdk/agents/adapters/vercel/sse.py
new file mode 100644
index 0000000000..cd60023916
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/vercel/sse.py
@@ -0,0 +1,25 @@
+"""SSE framing for the Vercel AI SDK UI Message Stream."""
+
+from __future__ import annotations
+
+from json import dumps
+from typing import Any, AsyncGenerator
+
+# Headers the Vercel AI SDK client and intermediaries require for a UI Message Stream.
+# ``x-accel-buffering: no`` stops a proxy from re-buffering the SSE so parts flush live.
+VERCEL_UI_MESSAGE_STREAM_HEADERS = {
+    "x-vercel-ai-ui-message-stream": "v1",
+    "cache-control": "no-cache",
+    "x-accel-buffering": "no",
+}
+
+
+def vercel_sse_stream(aiter: AsyncGenerator[Any, None]):
+    """Frame Vercel UI Message Stream parts as SSE and append ``[DONE]``."""
+
+    async def gen():
+        async for chunk in aiter:
+            yield "data: " + dumps(chunk, ensure_ascii=False) + "\n\n"
+        yield "data: [DONE]\n\n"
+
+    return gen()
diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py b/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py
new file mode 100644
index 0000000000..6d0e1526b2
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py
@@ -0,0 +1,216 @@
+"""Encode neutral agent run events as Vercel UI Message Stream parts."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator, Dict, Optional
+
+from ...dtos import AgentResult
+from ...streaming import AgentRun
+from .messages import TOOL_APPROVAL_REQUEST
+
+
+async def agent_run_to_vercel_parts(
+    run: AgentRun,
+    *,
+    session_id: Optional[str] = None,
+    message_id: str = "msg-1",
+    trace_id: Optional[str] = None,
+) -> AsyncIterator[Dict[str, Any]]:
+    """Project a live ``AgentRun`` into Vercel UI Message Stream part dictionaries."""
+    start: Dict[str, Any] = {"type": "start", "messageId": message_id}
+    if session_id is not None:
+        start["messageMetadata"] = {"sessionId": session_id}
+    yield start
+    yield {"type": "start-step"}
+
+    text_seq = 0
+    reasoning_seq = 0
+    usage: Optional[Dict[str, Any]] = None
+    stop_reason: Optional[str] = None
+
+    try:
+        async for event in run:
+            etype = event.type
+            data = event.data
+
+            if etype == "message":
+                text_seq += 1
+                tid = f"text-{text_seq}"
+                yield {"type": "text-start", "id": tid}
+                yield {"type": "text-delta", "id": tid, "delta": data.get("text", "")}
+                yield {"type": "text-end", "id": tid}
+            elif etype == "message_start":
+                yield {"type": "text-start", "id": data.get("id")}
+            elif etype == "message_delta":
+                yield {
+                    "type": "text-delta",
+                    "id": data.get("id"),
+                    "delta": data.get("delta", ""),
+                }
+            elif etype == "message_end":
+                yield {"type": "text-end", "id": data.get("id")}
+            elif etype == "thought":
+                reasoning_seq += 1
+                rid = f"reasoning-{reasoning_seq}"
+                yield {"type": "reasoning-start", "id": rid}
+                yield {
+                    "type": "reasoning-delta",
+                    "id": rid,
+                    "delta": data.get("text", ""),
+                }
+                yield {"type": "reasoning-end", "id": rid}
+            elif etype == "reasoning_start":
+                yield {"type": "reasoning-start", "id": data.get("id")}
+            elif etype == "reasoning_delta":
+                yield {
+                    "type": "reasoning-delta",
+                    "id": data.get("id"),
+                    "delta": data.get("delta", ""),
+                }
+            elif etype == "reasoning_end":
+                yield {"type": "reasoning-end", "id": data.get("id")}
+            elif etype == "tool_call":
+                tool_call_id = data.get("id")
+                tool_name = data.get("name")
+                yield {
+                    "type": "tool-input-start",
+                    "toolCallId": tool_call_id,
+                    "toolName": tool_name,
+                }
+                available: Dict[str, Any] = {
+                    "type": "tool-input-available",
+                    "toolCallId": tool_call_id,
+                    "toolName": tool_name,
+                    "input": data.get("input"),
+                }
+                if data.get("render") is not None:
+                    available["render"] = data["render"]
+                yield available
+            elif etype == "tool_result":
+                tool_call_id = data.get("id")
+                if data.get("denied"):
+                    yield {
+                        "type": "tool-output-denied",
+                        "toolCallId": tool_call_id,
+                    }
+                elif data.get("isError"):
+                    yield {
+                        "type": "tool-output-error",
+                        "toolCallId": tool_call_id,
+                        "errorText": _as_text(data.get("output")),
+                    }
+                else:
+                    structured = data.get("data")
+                    out = structured if structured is not None else data.get("output")
+                    available = {
+                        "type": "tool-output-available",
+                        "toolCallId": tool_call_id,
+                        "output": out,
+                    }
+                    if data.get("render") is not None:
+                        available["render"] = data["render"]
+                    yield available
+            elif etype == "interaction_request":
+                yield _interaction_part(data)
+            elif etype == "data":
+                part: Dict[str, Any] = {
+                    "type": f"data-{data.get('name', 'data')}",
+                    "data": data.get("data"),
+                }
+                if data.get("transient"):
+                    part["transient"] = True
+                yield part
+            elif etype == "file":
+                yield {
+                    "type": "file",
+                    "url": data.get("url"),
+                    "mediaType": data.get("mediaType"),
+                }
+            elif etype == "usage":
+                usage = _usage_metadata(data)
+            elif etype == "error":
+                yield {"type": "error", "errorText": data.get("message", "")}
+            elif etype == "done":
+                stop_reason = data.get("stopReason")
+    except Exception as exc:
+        yield {"type": "error", "errorText": str(exc)}
+        return
+
+    if usage is None or trace_id is None:
+        result = _safe_result(run)
+        if result is not None:
+            if usage is None:
+                usage = _usage_metadata(result.usage or {})
+                if stop_reason is None:
+                    stop_reason = result.stop_reason
+            if trace_id is None:
+                trace_id = result.trace_id
+
+    yield {"type": "finish-step"}
+    finish: Dict[str, Any] = {"type": "finish"}
+    if stop_reason is not None:
+        finish["finishReason"] = stop_reason
+    metadata: Dict[str, Any] = {}
+    if usage:
+        metadata["usage"] = usage
+    if trace_id is not None:
+        metadata["traceId"] = trace_id
+    if metadata:
+        finish["messageMetadata"] = metadata
+    yield finish
+
+
+def _interaction_part(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Project a neutral ``interaction_request`` event to a Vercel stream part."""
+    kind = data.get("kind")
+    payload = data.get("payload") or {}
+    if kind == "permission":
+        return {
+            "type": TOOL_APPROVAL_REQUEST,
+            "approvalId": data.get("id"),
+            "toolCallId": _approval_tool_call_id(payload),
+            "availableReplies": payload.get("availableReplies"),
+            "toolCall": payload.get("toolCall"),
+        }
+    if kind == "input":
+        return {"type": "data-input-request", "id": data.get("id"), "data": payload}
+    return {
+        "type": "data-interaction",
+        "id": data.get("id"),
+        "data": {"kind": kind, "payload": payload},
+    }
+
+
+def _approval_tool_call_id(payload: Dict[str, Any]) -> Optional[Any]:
+    tool_call_id = payload.get("toolCallId")
+    if tool_call_id is not None:
+        return tool_call_id
+    tool_call = payload.get("toolCall")
+    if isinstance(tool_call, dict):
+        return tool_call.get("id") or tool_call.get("toolCallId")
+    return None
+
+
+def _usage_metadata(data: Dict[str, Any]) -> Dict[str, Any]:
+    return {
+        key: data[key]
+        for key in ("input", "output", "total", "cost")
+        if data.get(key) is not None
+    }
+
+
+def _as_text(value: Any) -> str:
+    if value is None:
+        return ""
+    return value if isinstance(value, str) else str(value)
+
+
+def _safe_result(run: AgentRun) -> Optional[AgentResult]:
+    try:
+        return run.result()
+    except Exception:
+        return None
+
+
+# Back-compat alias for the former flat module API.
+ui_message_stream = agent_run_to_vercel_parts
diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py
new file mode 100644
index 0000000000..0a050b4cb1
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/dtos.py
@@ -0,0 +1,698 @@
+"""Data contracts for the agent runtime (the DTO layer).
+
+Everything the ports and adapters pass around: harness identity, capabilities, content
+blocks, messages, run events, the run result, trace/tool-callback plumbing, the neutral
+``AgentConfig``, the per-harness configs a backend plumbs, and the ``SessionConfig`` bundle.
+
+These are Pydantic models (the SDK already depends on Pydantic), kept neutral: an adapter
+translates them to and from its engine's own shapes at its edge.
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import Any, Callable, ClassVar, Dict, List, Optional, Tuple, Union
+
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator
+
+from .mcp import (
+    MCPServerConfig,
+    ResolvedMCPServer,
+    mcp_servers_to_wire,
+    parse_mcp_server_configs,
+)
+from .tools import ToolCallback, ToolConfig, ToolSpec, coerce_tool_configs
+from .tools.models import coerce_tool_spec
+
+
+# ---------------------------------------------------------------------------
+# Harness identity
+# ---------------------------------------------------------------------------
+
+
+class HarnessType(str, Enum):
+    """The coding agent program a run drives. A backend declares which it supports."""
+
+    PI = "pi"
+    CLAUDE = "claude"
+    AGENTA = "agenta"
+
+    @classmethod
+    def coerce(cls, value: "HarnessType | str") -> "HarnessType":
+        """Accept either an enum or a loose string (the playground sends a string)."""
+        if isinstance(value, cls):
+            return value
+        return cls(str(value).lower())
+
+
+# Permission policy for harness tool use in a headless run. ``auto`` approves (tools are
+# backend-resolved and trusted, no human to prompt); ``deny`` rejects.
+PermissionPolicy = str  # "auto" | "deny"
+
+
+# ---------------------------------------------------------------------------
+# Capabilities
+# ---------------------------------------------------------------------------
+
+
+class HarnessCapabilities(BaseModel):
+    """What a harness can do, probed by the backend (rivet ``AgentCapabilities``).
+
+    Adapters branch on these flags rather than the harness name (no ``if pi``): deliver
+    tools over MCP only when ``mcp_tools`` is set, skip image blocks without ``images``.
+    """
+
+    text_messages: bool = True
+    images: bool = False
+    file_attachments: bool = False
+    mcp_tools: bool = False
+    tool_calls: bool = False
+    reasoning: bool = False
+    plan_mode: bool = False
+    permissions: bool = False
+    usage: bool = False
+    streaming_deltas: bool = False
+    session_lifecycle: bool = False
+
+    @classmethod
+    def from_wire(
+        cls, data: Optional[Dict[str, Any]]
+    ) -> Optional["HarnessCapabilities"]:
+        """Parse the camelCase capability object an adapter returns. ``None`` passes through."""
+        if not isinstance(data, dict):
+            return None
+        return cls(
+            text_messages=bool(data.get("textMessages", True)),
+            images=bool(data.get("images", False)),
+            file_attachments=bool(data.get("fileAttachments", False)),
+            mcp_tools=bool(data.get("mcpTools", False)),
+            tool_calls=bool(data.get("toolCalls", False)),
+            reasoning=bool(data.get("reasoning", False)),
+            plan_mode=bool(data.get("planMode", False)),
+            permissions=bool(data.get("permissions", False)),
+            usage=bool(data.get("usage", False)),
+            streaming_deltas=bool(data.get("streamingDeltas", False)),
+            session_lifecycle=bool(data.get("sessionLifecycle", False)),
+        )
+
+
+# ---------------------------------------------------------------------------
+# Turn input: content blocks and messages
+# ---------------------------------------------------------------------------
+
+
+class ContentBlock(BaseModel):
+    """One piece of a message, mirroring the ACP content-block kinds.
+
+    ``text`` is the only kind callers send today; ``image`` and ``resource`` are plumbed so
+    an image-capable harness can take them. A bare string normalizes to a single ``text``
+    block on the wire.
+
+    ``tool_call`` / ``tool_result`` carriers (``tool_call_id``/``tool_name``/``input``/
+    ``output``/``is_error``) hold a resolved tool turn for structured-message continuation:
+    the ``/messages`` egress folds inbound UIMessage tool/approval parts into these so a
+    cross-turn HITL reply replays as a real tool call plus its result, and the model resumes
+    from the result instead of re-asking. Mirrors ``ContentBlock`` in
+    ``services/agent/src/protocol.ts``.
+    """
+
+    type: str  # "text" | "image" | "resource" | "tool_call" | "tool_result"
+    text: Optional[str] = None
+    data: Optional[str] = None  # base64 payload, used when type != "text"
+    mime_type: Optional[str] = None
+    uri: Optional[str] = None
+    # Tool-turn carriers (used by tool_call / tool_result blocks).
+    tool_call_id: Optional[str] = None
+    tool_name: Optional[str] = None
+    input: Optional[Any] = None
+    output: Optional[Any] = None
+    is_error: Optional[bool] = None
+
+    def to_wire(self) -> Dict[str, Any]:
+        block: Dict[str, Any] = {"type": self.type}
+        if self.text is not None:
+            block["text"] = self.text
+        if self.data is not None:
+            block["data"] = self.data
+        if self.mime_type is not None:
+            block["mimeType"] = self.mime_type
+        if self.uri is not None:
+            block["uri"] = self.uri
+        if self.tool_call_id is not None:
+            block["toolCallId"] = self.tool_call_id
+        if self.tool_name is not None:
+            block["toolName"] = self.tool_name
+        if self.input is not None:
+            block["input"] = self.input
+        if self.output is not None:
+            block["output"] = self.output
+        if self.is_error is not None:
+            block["isError"] = self.is_error
+        return block
+
+    @classmethod
+    def from_raw(cls, raw: Any) -> "ContentBlock":
+        """Coerce a loose block (string or dict) into a ContentBlock."""
+        if isinstance(raw, ContentBlock):
+            return raw
+        if isinstance(raw, str):
+            return cls(type="text", text=raw)
+        if isinstance(raw, dict):
+            return cls(
+                type=str(raw.get("type", "text")),
+                text=raw.get("text"),
+                data=raw.get("data"),
+                mime_type=raw.get("mimeType") or raw.get("mime_type"),
+                uri=raw.get("uri"),
+                tool_call_id=raw.get("toolCallId") or raw.get("tool_call_id"),
+                tool_name=raw.get("toolName") or raw.get("tool_name"),
+                input=raw.get("input"),
+                output=raw.get("output"),
+                is_error=raw.get("isError")
+                if raw.get("isError") is not None
+                else raw.get("is_error"),
+            )
+        return cls(type="text", text=str(raw))
+
+
+# A message's content is either a plain string or a list of content blocks.
+MessageContent = Union[str, List[ContentBlock]]
+
+
+class Message(BaseModel):
+    """A chat message in the conversation. ``content`` is text or content blocks.
+
+    This is the runtime's own message type, distinct from the SDK's prompt ``Message``
+    (``agenta.Message``); the two serve different layers.
+    """
+
+    role: str
+    content: MessageContent = ""
+
+    def to_wire(self) -> Dict[str, Any]:
+        if isinstance(self.content, str):
+            content: Any = self.content
+        else:
+            content = [block.to_wire() for block in self.content]
+        return {"role": self.role, "content": content}
+
+    @classmethod
+    def from_raw(cls, raw: Any) -> Optional["Message"]:
+        """Coerce a loose dict (the playground's message shape) into a Message."""
+        if isinstance(raw, Message):
+            return raw
+        if not isinstance(raw, dict) or "role" not in raw:
+            return None
+        content = raw.get("content", "")
+        if isinstance(content, list):
+            content = [ContentBlock.from_raw(block) for block in content]
+        return cls(role=str(raw["role"]), content=content)
+
+
+def to_messages(raw: Optional[List[Any]]) -> List[Message]:
+    """Coerce a list of loose message dicts into :class:`Message` objects."""
+    messages: List[Message] = []
+    for item in raw or []:
+        message = Message.from_raw(item)
+        if message is not None:
+            messages.append(message)
+    return messages
+
+
+# ---------------------------------------------------------------------------
+# Run events
+# ---------------------------------------------------------------------------
+
+
+class AgentEvent(BaseModel):
+    """One structured event from a run, mapped from an ACP ``session/update``.
+
+    ``type`` is one of ``message``, ``thought``, ``tool_call``, ``tool_result``, ``usage``,
+    ``error``, ``done``. ``data`` carries the rest verbatim.
+    """
+
+    type: str
+    data: Dict[str, Any] = Field(default_factory=dict)
+
+    @classmethod
+    def from_wire(cls, raw: Any) -> Optional["AgentEvent"]:
+        if not isinstance(raw, dict) or not raw.get("type"):
+            return None
+        return cls(type=str(raw["type"]), data=raw)
+
+
+# A live event sink. Synchronous: adapters invoke it as events arrive (or as a batch).
+EventSink = Callable[[AgentEvent], None]
+
+
+# ---------------------------------------------------------------------------
+# Cross-boundary plumbing
+# ---------------------------------------------------------------------------
+
+
+class TraceContext(BaseModel):
+    """Agenta trace context threaded into a harness run, so it nests under the caller's
+    workflow span. All fields optional; with none set the run traces standalone (or not at
+    all), the standalone-SDK case."""
+
+    traceparent: Optional[str] = None
+    baggage: Optional[str] = None
+    endpoint: Optional[str] = None  # OTLP traces URL
+    authorization: Optional[str] = None  # full Authorization header value
+    capture_content: bool = True
+
+    def to_wire(self) -> Dict[str, Any]:
+        return {
+            "traceparent": self.traceparent,
+            "baggage": self.baggage,
+            "endpoint": self.endpoint,
+            "authorization": self.authorization,
+            "captureContent": self.capture_content,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Run result
+# ---------------------------------------------------------------------------
+
+
+class AgentResult(BaseModel):
+    """A run's reply plus structured metadata. ``output`` is the final assistant text;
+    ``usage`` rolls token/cost onto a workflow span; ``capabilities`` is what the harness
+    was probed to support."""
+
+    output: str = ""
+    messages: List[Message] = Field(default_factory=list)
+    events: List[AgentEvent] = Field(default_factory=list)
+    usage: Optional[Dict[str, Any]] = None
+    stop_reason: Optional[str] = None
+    capabilities: Optional[HarnessCapabilities] = None
+    session_id: Optional[str] = None
+    model: Optional[str] = None
+    trace_id: Optional[str] = None
+
+
+# ---------------------------------------------------------------------------
+# The neutral agent definition + run selection
+# ---------------------------------------------------------------------------
+
+
+class AgentConfig(BaseModel):
+    """What an agent IS, independent of where or how it runs. ``instructions`` becomes
+    ``AGENTS.md``. ``tools`` are provider-agnostic references; resolving them into runnable
+    specs is the caller's job (the Agenta service does it server-side).
+
+    ``harness_options`` is the neutral config's one escape hatch: a map keyed by harness
+    name (``"pi"``, ``"claude"``) whose value is a free-form bag of knobs only that harness
+    understands, for example Pi's ``system`` / ``append_system`` prompt overrides. The
+    config stays harness-agnostic because each Harness adapter reads only its own slice and
+    ignores the rest; a key for a harness that is not running is simply never looked at.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    instructions: Optional[str] = None
+    model: Optional[str] = None
+    tools: List[ToolConfig] = Field(default_factory=list)
+    mcp_servers: List[MCPServerConfig] = Field(default_factory=list)
+    harness_options: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
+
+    @field_validator("tools", mode="before")
+    @classmethod
+    def _coerce_tools(cls, value: Any) -> List[ToolConfig]:
+        return coerce_tool_configs(_as_list(value)).tool_configs
+
+    @field_validator("mcp_servers", mode="before")
+    @classmethod
+    def _coerce_mcp_servers(cls, value: Any) -> List[MCPServerConfig]:
+        return parse_mcp_server_configs(_as_list(value))
+
+    @classmethod
+    def from_params(
+        cls,
+        params: Dict[str, Any],
+        *,
+        defaults: Optional["AgentConfig"] = None,
+    ) -> "AgentConfig":
+        """Build an :class:`AgentConfig` from a request/config dict.
+
+        Accepts three shapes, in priority order: the dedicated ``agent`` element, the
+        playground ``prompt`` prompt-template (system message -> instructions, ``llm_config``
+        -> model + tools), and a flat ``{model, agents_md, tools}``. Unset fields fall back
+        to ``defaults``. ``harness_options`` is read from the ``agent`` element (or the flat
+        request) when present.
+        """
+        base = defaults or cls()
+        instructions, model, tools = _parse_agent_fields(params, base)
+        return cls(
+            instructions=instructions,
+            model=model,
+            tools=_as_list(tools),
+            mcp_servers=_parse_mcp_servers_raw(params, base),
+            harness_options=_parse_harness_options(params, base),
+        )
+
+
+class RunSelection(BaseModel):
+    """The run-time choices stored next to the agent config: which harness, which sandbox,
+    the permission policy. Read by the caller to pick a backend and harness class;
+    deliberately not part of the neutral :class:`AgentConfig`."""
+
+    harness: str = "pi"
+    sandbox: str = "local"
+    permission_policy: PermissionPolicy = "auto"
+
+    @classmethod
+    def from_params(
+        cls,
+        params: Dict[str, Any],
+        *,
+        default_harness: str = "pi",
+        default_sandbox: str = "local",
+    ) -> "RunSelection":
+        agent = params.get("agent")
+        source = agent if isinstance(agent, dict) else params
+        return cls(
+            harness=str(source.get("harness") or default_harness).lower(),
+            sandbox=str(source.get("sandbox") or default_sandbox).lower(),
+            permission_policy=str(source.get("permission_policy") or "auto").lower(),
+        )
+
+
+# ---------------------------------------------------------------------------
+# Per-harness configs (what an adapter consumes)
+# ---------------------------------------------------------------------------
+
+
+class HarnessAgentConfig(BaseModel):
+    """Base for a harness-specific config. A Harness produces one of these from the neutral
+    config; a backend plumbs it as-is, with no business logic about how the harness works.
+
+    The two subclasses differ in their *shape*, not just their identity, because the
+    harnesses differ: Pi takes built-in tool names plus native tool specs and never gates
+    tool use; Claude has no built-ins, delivers tools over MCP, and gates tool use behind a
+    permission policy. ``wire_tools`` is where each config emits its own tool/permission
+    fields for the ``/run`` payload.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    harness: ClassVar[HarnessType]
+
+    agents_md: Optional[str] = None
+    model: Optional[str] = None
+    tool_callback: Optional[ToolCallback] = None
+    mcp_servers: List[ResolvedMCPServer] = Field(default_factory=list)
+
+    @field_validator("mcp_servers", mode="before")
+    @classmethod
+    def _coerce_resolved_mcp_servers(cls, value: Any) -> List[ResolvedMCPServer]:
+        return [
+            item
+            if isinstance(item, ResolvedMCPServer)
+            else ResolvedMCPServer.model_validate(item)
+            for item in value or []
+        ]
+
+    def wire_tools(self) -> Dict[str, Any]:
+        """The tool + permission fields this harness contributes to the ``/run`` payload."""
+        raise NotImplementedError
+
+    def wire_prompt(self) -> Dict[str, Any]:
+        """The system-prompt fields this harness contributes to the ``/run`` payload. Empty
+        by default; a harness that exposes prompt overrides (Pi) emits them here."""
+        return {}
+
+    def wire_mcp(self) -> Dict[str, Any]:
+        """The ``mcpServers`` field for the ``/run`` payload. Omitted when none are declared so
+        a tool-free run's payload is unchanged (the golden wire contract)."""
+        if not self.mcp_servers:
+            return {}
+        return {"mcpServers": mcp_servers_to_wire(self.mcp_servers)}
+
+
+class PiAgentConfig(HarnessAgentConfig):
+    """Pi's config. Built-in tools by name plus resolved specs delivered natively (Pi has no
+    MCP; the runner registers them through the Pi extension). Pi does not gate tool use, so
+    no permission policy applies.
+
+    ``system`` and ``append_system`` are Pi's two system-prompt layers, distinct from
+    ``agents_md``. ``system`` *replaces* Pi's built-in base prompt outright (Pi's ``SYSTEM.md``
+    / ``--system-prompt``); ``append_system`` *adds* to the base prompt without replacing it
+    (Pi's ``APPEND_SYSTEM.md`` / ``--append-system-prompt``). Both are independent of
+    ``agents_md``: Pi still appends the AGENTS.md project context after the system prompt
+    either way, so AGENTS.md remains the right home for project conventions and these are
+    only for changing or extending Pi's base persona."""
+
+    harness: ClassVar[HarnessType] = HarnessType.PI
+
+    builtin_names: List[str] = Field(
+        default_factory=list,
+        validation_alias=AliasChoices("builtin_names", "builtin_tools"),
+    )
+    tool_specs: List[ToolSpec] = Field(
+        default_factory=list,
+        validation_alias=AliasChoices("tool_specs", "custom_tools"),
+    )
+    system: Optional[str] = None
+    append_system: Optional[str] = None
+
+    @field_validator("tool_specs", mode="before")
+    @classmethod
+    def _coerce_tool_specs(cls, value: Any) -> List[ToolSpec]:
+        return [coerce_tool_spec(item) for item in value or []]
+
+    @property
+    def builtin_tools(self) -> List[str]:
+        return list(self.builtin_names)
+
+    @property
+    def custom_tools(self) -> List[Dict[str, Any]]:
+        return [tool_spec.to_wire() for tool_spec in self.tool_specs]
+
+    def wire_tools(self) -> Dict[str, Any]:
+        return {
+            "tools": list(self.builtin_names),
+            "customTools": [tool_spec.to_wire() for tool_spec in self.tool_specs],
+            "toolCallback": self.tool_callback.to_wire()
+            if self.tool_callback
+            else None,
+            "permissionPolicy": "auto",  # Pi never gates tool use
+        }
+
+    def wire_prompt(self) -> Dict[str, Any]:
+        out: Dict[str, Any] = {}
+        if self.system is not None:
+            out["systemPrompt"] = self.system
+        if self.append_system is not None:
+            out["appendSystemPrompt"] = self.append_system
+        return out
+
+
+class ClaudeAgentConfig(HarnessAgentConfig):
+    """Claude's config. No Pi built-ins; tools are delivered over MCP, and
+    ``permission_policy`` answers Claude's tool-use prompts in a headless run."""
+
+    harness: ClassVar[HarnessType] = HarnessType.CLAUDE
+
+    tool_specs: List[ToolSpec] = Field(
+        default_factory=list,
+        validation_alias=AliasChoices("tool_specs", "custom_tools"),
+    )
+    permission_policy: PermissionPolicy = "auto"
+
+    @field_validator("tool_specs", mode="before")
+    @classmethod
+    def _coerce_tool_specs(cls, value: Any) -> List[ToolSpec]:
+        return [coerce_tool_spec(item) for item in value or []]
+
+    @property
+    def custom_tools(self) -> List[Dict[str, Any]]:
+        return [tool_spec.to_wire() for tool_spec in self.tool_specs]
+
+    def wire_tools(self) -> Dict[str, Any]:
+        return {
+            "tools": [],  # Claude has no Pi built-in tools
+            "customTools": [tool_spec.to_wire() for tool_spec in self.tool_specs],
+            "toolCallback": self.tool_callback.to_wire()
+            if self.tool_callback
+            else None,
+            "permissionPolicy": self.permission_policy,
+        }
+
+
+class AgentaAgentConfig(PiAgentConfig):
+    """The Agenta harness's config. It *is* a Pi config (same engine, same tool delivery and
+    system-prompt layers), plus the forced ``skills`` the Agenta harness always ships.
+
+    ``skills`` are skill directory names the runner resolves against its bundled
+    ``services/agent/skills/`` root and loads into Pi's resource loader, so they appear in the
+    system prompt on every run."""
+
+    harness: ClassVar[HarnessType] = HarnessType.AGENTA
+
+    skills: List[str] = Field(default_factory=list)
+
+    def wire_tools(self) -> Dict[str, Any]:
+        # Same tool fields as Pi, plus the forced skill names the runner loads.
+        return {**super().wire_tools(), "skills": list(self.skills)}
+
+
+# ---------------------------------------------------------------------------
+# The session bundle
+# ---------------------------------------------------------------------------
+
+
+class SessionConfig(BaseModel):
+    """Everything one run needs except where it runs.
+
+    ``agent`` is the neutral definition. ``secrets`` are provider keys injected as harness
+    env, never written to the agent filesystem. The ``builtin_tools`` / ``custom_tools`` /
+    ``tool_callback`` triple is the resolved tool delivery (Agenta produces it server-side;
+    empty for a bare standalone run). Sandbox is intentionally absent: it is a
+    backend/environment concern."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    agent: AgentConfig
+    secrets: Dict[str, str] = Field(default_factory=dict)
+    permission_policy: PermissionPolicy = "auto"
+    trace: Optional[TraceContext] = None
+    session_id: Optional[str] = None
+    builtin_names: List[str] = Field(
+        default_factory=list,
+        validation_alias=AliasChoices("builtin_names", "builtin_tools"),
+    )
+    tool_specs: List[ToolSpec] = Field(
+        default_factory=list,
+        validation_alias=AliasChoices("tool_specs", "custom_tools"),
+    )
+    tool_callback: Optional[ToolCallback] = None
+    mcp_servers: List[ResolvedMCPServer] = Field(default_factory=list)
+
+    @field_validator("tool_specs", mode="before")
+    @classmethod
+    def _coerce_tool_specs(cls, value: Any) -> List[ToolSpec]:
+        return [coerce_tool_spec(item) for item in value or []]
+
+    @field_validator("mcp_servers", mode="before")
+    @classmethod
+    def _coerce_resolved_mcp_servers(cls, value: Any) -> List[ResolvedMCPServer]:
+        return [
+            item
+            if isinstance(item, ResolvedMCPServer)
+            else ResolvedMCPServer.model_validate(item)
+            for item in value or []
+        ]
+
+    @property
+    def builtin_tools(self) -> List[str]:
+        return list(self.builtin_names)
+
+    @property
+    def custom_tools(self) -> List[Dict[str, Any]]:
+        return [tool_spec.to_wire() for tool_spec in self.tool_specs]
+
+
+# ---------------------------------------------------------------------------
+# Parsing helpers (ported from the agent service's inputs.py)
+# ---------------------------------------------------------------------------
+
+
+def _as_list(raw: Any) -> List[Any]:
+    if isinstance(raw, dict):
+        return [raw]
+    if isinstance(raw, list):
+        return raw
+    return []
+
+
+def _parse_mcp_servers_raw(
+    params: Dict[str, Any],
+    defaults: AgentConfig,
+) -> List[Any]:
+    """Pull the raw ``mcp_servers`` list from a request/config dict, falling back to defaults.
+
+    Reads ``mcp_servers`` from the ``agent`` element when present, else the flat request.
+    Canonical validation happens on :class:`AgentConfig` construction."""
+    agent = params.get("agent")
+    source = agent if isinstance(agent, dict) else params
+    raw = source.get("mcp_servers")
+    if raw is None:
+        return list(defaults.mcp_servers)
+    return _as_list(raw)
+
+
+def _parse_harness_options(
+    params: Dict[str, Any],
+    defaults: AgentConfig,
+) -> Dict[str, Dict[str, Any]]:
+    """Pull the per-harness options bag from a request/config dict, falling back to defaults.
+
+    Reads ``harness_options`` from the ``agent`` element when present, else from the flat
+    request. Keeps only well-formed entries (a harness name mapping to an options dict) and
+    lower-cases the harness key so it matches :class:`HarnessType` values.
+    """
+    agent = params.get("agent")
+    source = agent if isinstance(agent, dict) else params
+    raw = source.get("harness_options")
+    if not isinstance(raw, dict):
+        return dict(defaults.harness_options)
+    options: Dict[str, Dict[str, Any]] = {}
+    for name, opts in raw.items():
+        if isinstance(opts, dict):
+            options[str(name).lower()] = dict(opts)
+    return options or dict(defaults.harness_options)
+
+
+def _system_text(messages: Optional[List[Any]]) -> str:
+    """Join the system-message content of a prompt-template into AGENTS.md text."""
+    parts: List[str] = []
+    for message in messages or []:
+        if not isinstance(message, dict) or message.get("role") != "system":
+            continue
+        content = message.get("content")
+        if isinstance(content, str):
+            parts.append(content)
+        elif isinstance(content, list):
+            parts.extend(
+                block.get("text", "")
+                for block in content
+                if isinstance(block, dict) and block.get("type") == "text"
+            )
+    return "\n\n".join(part for part in parts if part)
+
+
+def _parse_agent_fields(
+    params: Dict[str, Any],
+    defaults: AgentConfig,
+) -> Tuple[Optional[str], Optional[str], Any]:
+    """Pull (instructions, model, tools) from a request/config dict, with fallbacks."""
+    agent = params.get("agent")
+    if isinstance(agent, dict):
+        # ``agents_md`` is the field the playground/catalog schema exposes; ``instructions`` is
+        # the legacy key kept as a fallback so already-stored agent configs still resolve.
+        return (
+            agent.get("agents_md")
+            or agent.get("instructions")
+            or defaults.instructions,
+            agent.get("model") or defaults.model,
+            agent.get("tools"),
+        )
+
+    prompt_cfg = params.get("prompt")
+    if isinstance(prompt_cfg, dict):
+        llm_config = prompt_cfg.get("llm_config") or {}
+        model = llm_config.get("model") or defaults.model
+        instructions = _system_text(prompt_cfg.get("messages")) or defaults.instructions
+        raw_tools = llm_config.get("tools")
+        if raw_tools is None:
+            raw_tools = prompt_cfg.get("tools")
+    else:
+        model = params.get("model") or defaults.model
+        instructions = params.get("agents_md") or defaults.instructions
+        raw_tools = params.get("tools")
+
+    if raw_tools is None:
+        raw_tools = defaults.tools
+    return instructions, model, raw_tools
diff --git a/sdks/python/agenta/sdk/agents/errors.py b/sdks/python/agenta/sdk/agents/errors.py
new file mode 100644
index 0000000000..b9f136a472
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/errors.py
@@ -0,0 +1,26 @@
+"""Typed errors for the agent runtime."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from .dtos import HarnessType
+from .tools.errors import ToolResolutionError
+
+__all__ = ["UnsupportedHarnessError", "ToolResolutionError"]
+
+if TYPE_CHECKING:
+    from .interfaces import Backend
+
+
+class UnsupportedHarnessError(RuntimeError):
+    """Raised when a harness is asked to run on a backend that cannot drive it."""
+
+    def __init__(self, harness: HarnessType, backend: "Backend") -> None:
+        supported = ", ".join(sorted(h.value for h in backend.supported_harnesses))
+        super().__init__(
+            f"{type(backend).__name__} cannot drive harness '{harness.value}'; "
+            f"it supports: {supported or '(none)'}"
+        )
+        self.harness = harness
+        self.backend = backend
diff --git a/sdks/python/agenta/sdk/agents/interfaces.py b/sdks/python/agenta/sdk/agents/interfaces.py
new file mode 100644
index 0000000000..a7df7280d5
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/interfaces.py
@@ -0,0 +1,317 @@
+"""The ports of the agent runtime: the abstract contracts (Agenta calls these interfaces).
+
+Three layers, lowest to highest:
+
+- ``Backend`` is the engine. It declares which harnesses it can drive
+  (``supported_harnesses``), owns sandbox + session lifecycle, and is pure plumbing: it
+  takes an already-harness-shaped config and launches it. Adapters: ``RivetBackend``,
+  ``InProcessPiBackend``, ``LocalBackend``.
+- ``Sandbox`` is where a session's process tree lives, plus the provisioning verb
+  (``add_files``).
+- ``Session`` is one conversation (``prompt``, ``destroy``).
+- ``Environment`` sits above a backend and owns the sandbox policy.
+
+The ``Harness`` port (with its ``PiHarness`` / ``ClaudeHarness`` adapters) sits above an
+``Environment`` and validates against ``Backend.supported_harnesses``.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import ClassVar, FrozenSet, Mapping, Optional, Sequence
+
+from .dtos import (
+    AgentResult,
+    EventSink,
+    HarnessAgentConfig,
+    HarnessType,
+    Message,
+    SessionConfig,
+    TraceContext,
+)
+from .errors import UnsupportedHarnessError
+from .streaming import AgentRun
+
+
+# ---------------------------------------------------------------------------
+# Sandbox and Session
+# ---------------------------------------------------------------------------
+
+
+class Sandbox(ABC):
+    """Where a session's process tree runs. Holds the provisioning verb and teardown.
+
+    ``add_files`` lays files into the sandbox before the session prompts (AGENTS.md, a
+    bundled extension, an uploaded login). Provisioning, used by the runtime, never exposed
+    to the agent-config author.
+    """
+
+    async def add_files(self, files: Mapping[str, bytes]) -> None:
+        """Write files into the sandbox. No-op by default (an adapter may need nothing)."""
+        return None
+
+    async def destroy(self) -> None:
+        """Tear the sandbox down. No-op by default."""
+        return None
+
+
+class Session(ABC):
+    """One conversation over a harness running in a sandbox."""
+
+    @property
+    @abstractmethod
+    def id(self) -> Optional[str]:
+        """The engine's session id, carried forward so a follow-up turn can resume it."""
+
+    @abstractmethod
+    async def prompt(
+        self,
+        messages: Sequence[Message],
+        *,
+        on_event: Optional[EventSink] = None,
+    ) -> AgentResult:
+        """Run one turn and return the structured result (the one-shot path)."""
+
+    @abstractmethod
+    def stream(self, messages: Sequence[Message]) -> AgentRun:
+        """Run one turn, yielding events live across the boundary.
+
+        Returns an :class:`~agenta.sdk.agents.streaming.AgentRun`: an async-iterable of
+        ``AgentEvent`` that also carries the terminal ``AgentResult`` once consumed. This is
+        the live counterpart of :meth:`prompt`.
+        """
+
+    async def destroy(self) -> None:
+        """Drop the session's resources. A no-op under cold + replay."""
+        return None
+
+
+class SessionStore(ABC):
+    """Durable conversation history behind the agent session id.
+
+    The cold runtime still receives the full message history on every turn. This port is the
+    place a platform-backed or file-backed store attaches when the server owns that history.
+    """
+
+    @abstractmethod
+    async def load(self, session_id: str) -> Sequence[Message]:
+        """Return the neutral message history for ``session_id``."""
+
+    @abstractmethod
+    async def save_turn(
+        self,
+        session_id: str,
+        *,
+        messages: Sequence[Message],
+        result: Optional[AgentResult] = None,
+    ) -> None:
+        """Persist one completed cold turn."""
+
+
+class NoopSessionStore(SessionStore):
+    """Session store adapter used until server-owned history persistence lands."""
+
+    async def load(self, session_id: str) -> Sequence[Message]:
+        return ()
+
+    async def save_turn(
+        self,
+        session_id: str,
+        *,
+        messages: Sequence[Message],
+        result: Optional[AgentResult] = None,
+    ) -> None:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Backend (the engine)
+# ---------------------------------------------------------------------------
+
+
+class Backend(ABC):
+    """The engine. Declares supported harnesses; owns sandbox + session lifecycle.
+
+    Each concrete backend is its own thing and hard-codes what makes it that engine (its
+    engine id, its supported harnesses). They do not share a base beyond this ABC.
+    """
+
+    #: The single source of truth for what this engine can run.
+    supported_harnesses: ClassVar[FrozenSet[HarnessType]] = frozenset()
+
+    def supports(self, harness: HarnessType) -> bool:
+        return harness in self.supported_harnesses
+
+    async def setup(self) -> None:
+        """Bring the backend up. No-op by default."""
+        return None
+
+    async def shutdown(self) -> None:
+        """Release backend resources. No-op by default."""
+        return None
+
+    @abstractmethod
+    async def create_sandbox(self) -> Sandbox:
+        """Create a sandbox this backend can run a session in."""
+
+    @abstractmethod
+    async def create_session(
+        self,
+        sandbox: Sandbox,
+        config: HarnessAgentConfig,
+        *,
+        harness: HarnessType,
+        secrets: Optional[Mapping[str, str]] = None,
+        trace: Optional[TraceContext] = None,
+        session_id: Optional[str] = None,
+    ) -> Session:
+        """Open a session in ``sandbox`` for an already-harness-shaped ``config``."""
+
+
+# ---------------------------------------------------------------------------
+# Environment (sandbox policy over a backend)
+# ---------------------------------------------------------------------------
+
+
+class Environment:
+    """A layer above a backend that owns the sandbox policy.
+
+    Default ``sandbox_per_session=True`` gives each session a fresh sandbox (the cold model,
+    strong isolation). Pass ``False`` to keep one sandbox and run many sessions in it; share
+    a single ``Environment`` across harnesses to share that sandbox.
+    """
+
+    def __init__(self, backend: Backend, *, sandbox_per_session: bool = True) -> None:
+        self._backend = backend
+        self._sandbox_per_session = sandbox_per_session
+        self._shared: Optional[Sandbox] = None
+
+    @property
+    def backend(self) -> Backend:
+        return self._backend
+
+    async def setup(self) -> None:
+        await self._backend.setup()
+
+    async def shutdown(self) -> None:
+        if self._shared is not None:
+            await self._shared.destroy()
+            self._shared = None
+        await self._backend.shutdown()
+
+    async def _sandbox(self) -> Sandbox:
+        if self._sandbox_per_session:
+            return await self._backend.create_sandbox()
+        if self._shared is None:
+            self._shared = await self._backend.create_sandbox()
+        return self._shared
+
+    async def create_session(
+        self,
+        config: HarnessAgentConfig,
+        *,
+        harness: HarnessType,
+        session_config: SessionConfig,
+        provisioning: Optional[Mapping[str, bytes]] = None,
+    ) -> Session:
+        """Provision a sandbox per policy, then open a session in it."""
+        sandbox = await self._sandbox()
+        if provisioning:
+            await sandbox.add_files(provisioning)
+        return await self._backend.create_session(
+            sandbox,
+            config,
+            harness=harness,
+            secrets=session_config.secrets,
+            trace=session_config.trace,
+            session_id=session_config.session_id,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Harness (the port; adapters live in adapters/harnesses.py)
+# ---------------------------------------------------------------------------
+
+
+class Harness(ABC):
+    """A harness-type-specific wrapper over an :class:`Environment`.
+
+    Holds the mapping from the neutral :class:`~agenta.sdk.agents.dtos.SessionConfig` to this
+    harness's config, and validates at construction that the environment's backend can drive
+    it (raising :class:`UnsupportedHarnessError` otherwise). The backend stays pure plumbing;
+    the per-harness knowledge lives here.
+    """
+
+    harness_type: ClassVar[HarnessType]
+
+    def __init__(self, environment: Environment) -> None:
+        if not environment.backend.supports(self.harness_type):
+            raise UnsupportedHarnessError(self.harness_type, environment.backend)
+        self._env = environment
+
+    @property
+    def environment(self) -> Environment:
+        return self._env
+
+    async def setup(self) -> None:
+        await self._env.setup()
+
+    async def cleanup(self) -> None:
+        await self._env.shutdown()
+
+    @abstractmethod
+    def _to_harness_config(self, config: SessionConfig) -> HarnessAgentConfig:
+        """Map the neutral config into this harness's own config (the mapping logic)."""
+
+    def _provisioning(self, config: SessionConfig) -> Mapping[str, bytes]:
+        """Files this harness needs laid into the sandbox before the run."""
+        files: dict[str, bytes] = {}
+        instructions = config.agent.instructions
+        if instructions and instructions.strip():
+            files["AGENTS.md"] = instructions.encode("utf-8")
+        return files
+
+    async def create_session(self, config: SessionConfig) -> Session:
+        return await self._env.create_session(
+            self._to_harness_config(config),
+            harness=self.harness_type,
+            session_config=config,
+            provisioning=self._provisioning(config),
+        )
+
+    async def prompt(
+        self,
+        config: SessionConfig,
+        messages: Sequence[Message],
+        *,
+        on_event: Optional[EventSink] = None,
+    ) -> AgentResult:
+        """Convenience: open a session, run one turn, and destroy it (the cold path)."""
+        session = await self.create_session(config)
+        try:
+            result = await session.prompt(messages, on_event=on_event)
+            if result.session_id:
+                config.session_id = result.session_id
+            return result
+        finally:
+            await session.destroy()
+
+    async def stream(
+        self,
+        config: SessionConfig,
+        messages: Sequence[Message],
+    ) -> AgentRun:
+        """Convenience: open a cold session and stream one turn (the live counterpart of
+        :meth:`prompt`).
+
+        The session id is carried onto ``config`` when the terminal result arrives, and the
+        session is destroyed when the stream ends — by drain, ``break``, or cancellation —
+        via the run's cleanup hook.
+        """
+        session = await self.create_session(config)
+
+        def _absorb(result: AgentResult) -> None:
+            if result.session_id:
+                config.session_id = result.session_id
+
+        return session.stream(messages).on_result(_absorb).on_cleanup(session.destroy)
diff --git a/sdks/python/agenta/sdk/agents/mcp/__init__.py b/sdks/python/agenta/sdk/agents/mcp/__init__.py
new file mode 100644
index 0000000000..4881f30d52
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/mcp/__init__.py
@@ -0,0 +1,22 @@
+"""Public MCP configuration and resolution API."""
+
+from .errors import MCPConfigurationError, MCPError, MissingMCPSecretError
+from .interfaces import MCPSecretProvider
+from .models import MCPServerConfig, ResolvedMCPServer
+from .parsing import parse_mcp_server_config, parse_mcp_server_configs
+from .resolver import MCPResolver
+from .wire import mcp_server_to_wire, mcp_servers_to_wire
+
+__all__ = [
+    "MCPServerConfig",
+    "ResolvedMCPServer",
+    "MCPSecretProvider",
+    "MCPResolver",
+    "parse_mcp_server_config",
+    "parse_mcp_server_configs",
+    "mcp_server_to_wire",
+    "mcp_servers_to_wire",
+    "MCPError",
+    "MCPConfigurationError",
+    "MissingMCPSecretError",
+]
diff --git a/sdks/python/agenta/sdk/agents/mcp/errors.py b/sdks/python/agenta/sdk/agents/mcp/errors.py
new file mode 100644
index 0000000000..2d2ab05193
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/mcp/errors.py
@@ -0,0 +1,33 @@
+"""Errors raised while parsing and resolving MCP server configuration."""
+
+from __future__ import annotations
+
+from typing import Any, Optional, Sequence
+
+
+class MCPError(RuntimeError):
+    """Base error for the agent MCP subsystem."""
+
+
+class MCPConfigurationError(MCPError):
+    def __init__(
+        self,
+        message: str,
+        *,
+        index: Optional[int] = None,
+        value: Any = None,
+    ) -> None:
+        super().__init__(message)
+        self.index = index
+        self.value = value
+
+
+class MissingMCPSecretError(MCPError):
+    def __init__(self, *, server_name: str, secret_names: Sequence[str]) -> None:
+        names = tuple(secret_names)
+        super().__init__(
+            f"MCP server '{server_name}' is missing required secret(s): "
+            f"{', '.join(names)}"
+        )
+        self.server_name = server_name
+        self.secret_names = names
diff --git a/sdks/python/agenta/sdk/agents/mcp/interfaces.py b/sdks/python/agenta/sdk/agents/mcp/interfaces.py
new file mode 100644
index 0000000000..23c5c91522
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/mcp/interfaces.py
@@ -0,0 +1,10 @@
+"""Injected dependencies used by MCP resolution."""
+
+from __future__ import annotations
+
+from typing import Mapping, Protocol, Sequence
+
+
+class MCPSecretProvider(Protocol):
+    async def get_many(self, names: Sequence[str]) -> Mapping[str, str]:
+        """Return available values for the requested MCP secret names."""
diff --git a/sdks/python/agenta/sdk/agents/mcp/models.py b/sdks/python/agenta/sdk/agents/mcp/models.py
new file mode 100644
index 0000000000..e4df7f87e5
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/mcp/models.py
@@ -0,0 +1,57 @@
+"""Canonical MCP server declarations and resolved runner configuration."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Literal, Optional
+
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+
+
+class MCPServerConfig(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    name: str = Field(min_length=1)
+    transport: Literal["stdio", "http"] = "stdio"
+    command: Optional[str] = None
+    args: List[str] = Field(default_factory=list)
+    env: Dict[str, str] = Field(default_factory=dict, repr=False)
+    url: Optional[str] = None
+    secrets: Dict[str, str] = Field(default_factory=dict)
+    tools: List[str] = Field(default_factory=list)
+
+    @model_validator(mode="after")
+    def _validate_transport(self) -> "MCPServerConfig":
+        if self.transport == "stdio" and not self.command:
+            raise ValueError("stdio MCP server requires command")
+        if self.transport == "http" and not self.url:
+            raise ValueError("http MCP server requires url")
+        return self
+
+
+class ResolvedMCPServer(BaseModel):
+    model_config = ConfigDict(extra="forbid", frozen=True)
+
+    name: str
+    transport: Literal["stdio", "http"] = "stdio"
+    command: Optional[str] = None
+    args: List[str] = Field(default_factory=list)
+    env: Dict[str, str] = Field(default_factory=dict, repr=False)
+    url: Optional[str] = None
+    tools: List[str] = Field(default_factory=list)
+
+    def to_wire(self) -> Dict[str, Any]:
+        wire: Dict[str, Any] = {
+            "name": self.name,
+            "transport": self.transport,
+        }
+        if self.command:
+            wire["command"] = self.command
+        if self.args:
+            wire["args"] = list(self.args)
+        if self.env:
+            wire["env"] = dict(self.env)
+        if self.url:
+            wire["url"] = self.url
+        if self.tools:
+            wire["tools"] = list(self.tools)
+        return wire
diff --git a/sdks/python/agenta/sdk/agents/mcp/parsing.py b/sdks/python/agenta/sdk/agents/mcp/parsing.py
new file mode 100644
index 0000000000..dfb5f169a6
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/mcp/parsing.py
@@ -0,0 +1,39 @@
+"""Strict parsing of MCP server configuration."""
+
+from __future__ import annotations
+
+from typing import Any, Mapping, Sequence
+
+from pydantic import ValidationError
+
+from .errors import MCPConfigurationError
+from .models import MCPServerConfig
+
+
+def parse_mcp_server_config(
+    value: MCPServerConfig | Mapping[str, Any],
+) -> MCPServerConfig:
+    try:
+        return MCPServerConfig.model_validate(value)
+    except ValidationError as exc:
+        raise MCPConfigurationError(
+            "Invalid MCP server configuration: "
+            f"{exc.errors(include_url=False, include_input=False)}",
+            value=value,
+        ) from exc
+
+
+def parse_mcp_server_configs(
+    values: Sequence[MCPServerConfig | Mapping[str, Any]],
+) -> list[MCPServerConfig]:
+    parsed: list[MCPServerConfig] = []
+    for index, value in enumerate(values):
+        try:
+            parsed.append(parse_mcp_server_config(value))
+        except MCPConfigurationError as exc:
+            raise MCPConfigurationError(
+                str(exc),
+                index=index,
+                value=value,
+            ) from exc
+    return parsed
diff --git a/sdks/python/agenta/sdk/agents/mcp/resolver.py b/sdks/python/agenta/sdk/agents/mcp/resolver.py
new file mode 100644
index 0000000000..6ce78162dd
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/mcp/resolver.py
@@ -0,0 +1,68 @@
+"""Resolution of MCP server declarations into runner configuration."""
+
+from __future__ import annotations
+
+from typing import Mapping, Sequence
+
+from agenta.sdk.agents.tools.models import MissingSecretPolicy
+
+from .errors import MissingMCPSecretError
+from .interfaces import MCPSecretProvider
+from .models import MCPServerConfig, ResolvedMCPServer
+
+
+class MCPResolver:
+    def __init__(
+        self,
+        *,
+        secret_provider: MCPSecretProvider,
+        missing_secret_policy: MissingSecretPolicy = MissingSecretPolicy.ERROR,
+    ) -> None:
+        self._secret_provider = secret_provider
+        self._missing_secret_policy = missing_secret_policy
+
+    async def resolve(
+        self,
+        server_configs: Sequence[MCPServerConfig],
+    ) -> list[ResolvedMCPServer]:
+        secret_names = sorted(
+            {
+                secret_name
+                for server_config in server_configs
+                for secret_name in server_config.secrets.values()
+            }
+        )
+        secret_values: Mapping[str, str] = (
+            await self._secret_provider.get_many(secret_names) if secret_names else {}
+        )
+
+        resolved: list[ResolvedMCPServer] = []
+        for server_config in server_configs:
+            missing = [
+                secret_name
+                for secret_name in server_config.secrets.values()
+                if secret_name not in secret_values
+            ]
+            if missing and self._missing_secret_policy == MissingSecretPolicy.ERROR:
+                raise MissingMCPSecretError(
+                    server_name=server_config.name,
+                    secret_names=missing,
+                )
+
+            env = dict(server_config.env)
+            for env_var, secret_name in server_config.secrets.items():
+                if secret_name in secret_values:
+                    env[env_var] = secret_values[secret_name]
+
+            resolved.append(
+                ResolvedMCPServer(
+                    name=server_config.name,
+                    transport=server_config.transport,
+                    command=server_config.command,
+                    args=list(server_config.args),
+                    env=env,
+                    url=server_config.url,
+                    tools=list(server_config.tools),
+                )
+            )
+        return resolved
diff --git a/sdks/python/agenta/sdk/agents/mcp/wire.py b/sdks/python/agenta/sdk/agents/mcp/wire.py
new file mode 100644
index 0000000000..f9c1a7cb68
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/mcp/wire.py
@@ -0,0 +1,17 @@
+"""Serialization of resolved MCP servers to the runner contract."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, Sequence
+
+from .models import ResolvedMCPServer
+
+
+def mcp_server_to_wire(server: ResolvedMCPServer) -> Dict[str, Any]:
+    return server.to_wire()
+
+
+def mcp_servers_to_wire(
+    servers: Sequence[ResolvedMCPServer],
+) -> list[Dict[str, Any]]:
+    return [mcp_server_to_wire(server) for server in servers]
diff --git a/sdks/python/agenta/sdk/agents/streaming.py b/sdks/python/agenta/sdk/agents/streaming.py
new file mode 100644
index 0000000000..e631d0ecdc
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/streaming.py
@@ -0,0 +1,91 @@
+"""Live streaming surface: ``AgentRun`` turns the runner's NDJSON record stream into a live
+``AgentEvent`` async-iterable plus the one terminal ``AgentResult``.
+
+A streaming transport (``utils.deliver_*_stream``) yields the runner's ``StreamRecord`` lines:
+``{"kind":"event", ...}`` for every event the moment it is built, then exactly one
+``{"kind":"result", ...}`` terminal record. ``AgentRun`` wraps that source so a caller can::
+
+    run = session.stream(messages)
+    async for event in run:
+        ...               # event is an AgentEvent, flushed live
+    result = run.result()  # the terminal AgentResult (session_id, usage, stop_reason, ...)
+
+This lives in its own module (not ``dtos``) because parsing the terminal record reuses
+``utils.wire.result_from_wire``, which imports the DTOs — keeping ``AgentRun`` above both
+avoids an import cycle.
+"""
+
+from __future__ import annotations
+
+from typing import (
+    Any,
+    AsyncIterator,
+    Awaitable,
+    Callable,
+    Dict,
+    List,
+    Optional,
+)
+
+from .dtos import AgentEvent, AgentResult
+from .utils import result_from_wire
+
+# Hooks: a result hook sees the terminal result once; a cleanup runs when iteration ends
+# (drain, break, or cancel).
+ResultHook = Callable[[AgentResult], None]
+Cleanup = Callable[[], Awaitable[None]]
+
+
+class AgentRun:
+    """An async-iterable over a run's live ``AgentEvent``s that also carries the terminal
+    ``AgentResult``.
+
+    Iterate it once. Each ``{"kind":"event"}`` record is yielded as an ``AgentEvent``; the
+    ``{"kind":"result"}`` record is parsed (raising the run's error when ``ok`` is false,
+    just like the one-shot path) and ends iteration. ``result()`` returns it afterwards.
+    """
+
+    def __init__(self, records: AsyncIterator[Dict[str, Any]]) -> None:
+        self._records = records
+        self._result: Optional[AgentResult] = None
+        self._result_hooks: List[ResultHook] = []
+        self._cleanups: List[Cleanup] = []
+
+    def on_result(self, hook: ResultHook) -> "AgentRun":
+        """Register a callback to run when the terminal result arrives (chainable)."""
+        self._result_hooks.append(hook)
+        return self
+
+    def on_cleanup(self, cleanup: Cleanup) -> "AgentRun":
+        """Register an async cleanup to run when iteration ends, any way it ends (chainable)."""
+        self._cleanups.append(cleanup)
+        return self
+
+    async def __aiter__(self) -> AsyncIterator[AgentEvent]:
+        try:
+            async for record in self._records:
+                kind = record.get("kind")
+                if kind == "event":
+                    event = AgentEvent.from_wire(record.get("event"))
+                    if event is not None:
+                        yield event
+                elif kind == "result":
+                    # result_from_wire raises on ok=false — surface it to the consumer.
+                    self._result = result_from_wire(record.get("result") or {})
+                    for hook in self._result_hooks:
+                        hook(self._result)
+                    return
+        finally:
+            for cleanup in self._cleanups:
+                try:
+                    await cleanup()
+                except Exception:  # pylint: disable=broad-except
+                    pass
+
+    def result(self) -> AgentResult:
+        """The terminal result. Available only after the stream is fully consumed."""
+        if self._result is None:
+            raise RuntimeError(
+                "AgentRun result is not available until the stream is fully consumed"
+            )
+        return self._result
diff --git a/sdks/python/agenta/sdk/agents/tools/__init__.py b/sdks/python/agenta/sdk/agents/tools/__init__.py
new file mode 100644
index 0000000000..2b40dc082e
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/__init__.py
@@ -0,0 +1,75 @@
+"""Public agent-tool configuration and resolution API."""
+
+from .compat import (
+    ToolConfigDiagnostic,
+    ToolConfigParseResult,
+    coerce_tool_config,
+    coerce_tool_configs,
+)
+from .errors import (
+    DuplicateToolNameError,
+    GatewayToolResolutionError,
+    MissingToolSecretError,
+    ToolConfigError,
+    ToolConfigurationError,
+    ToolError,
+    ToolResolutionError,
+    UnsupportedToolProviderError,
+)
+from .interfaces import GatewayToolResolver, ToolSecretProvider
+from .models import (
+    BuiltinToolConfig,
+    CallbackToolSpec,
+    ClientToolConfig,
+    ClientToolSpec,
+    CodeToolConfig,
+    CodeToolSpec,
+    GatewayToolConfig,
+    GatewayToolResolution,
+    MissingSecretPolicy,
+    ResolvedToolSet,
+    ToolCallback,
+    ToolConfig,
+    ToolConfigBase,
+    ToolSpec,
+)
+from .parsing import parse_tool_config, parse_tool_configs
+from .resolver import EnvironmentToolSecretProvider, ToolResolver
+from .wire import tool_spec_to_wire, tool_specs_to_wire
+
+__all__ = [
+    "ToolConfigBase",
+    "ToolConfig",
+    "BuiltinToolConfig",
+    "GatewayToolConfig",
+    "CodeToolConfig",
+    "ClientToolConfig",
+    "ToolSpec",
+    "CallbackToolSpec",
+    "CodeToolSpec",
+    "ClientToolSpec",
+    "ToolCallback",
+    "ResolvedToolSet",
+    "GatewayToolResolution",
+    "MissingSecretPolicy",
+    "ToolResolver",
+    "ToolSecretProvider",
+    "GatewayToolResolver",
+    "EnvironmentToolSecretProvider",
+    "parse_tool_config",
+    "parse_tool_configs",
+    "coerce_tool_config",
+    "coerce_tool_configs",
+    "ToolConfigDiagnostic",
+    "ToolConfigParseResult",
+    "tool_spec_to_wire",
+    "tool_specs_to_wire",
+    "ToolError",
+    "ToolConfigError",
+    "ToolConfigurationError",
+    "ToolResolutionError",
+    "GatewayToolResolutionError",
+    "UnsupportedToolProviderError",
+    "MissingToolSecretError",
+    "DuplicateToolNameError",
+]
diff --git a/sdks/python/agenta/sdk/agents/tools/compat.py b/sdks/python/agenta/sdk/agents/tools/compat.py
new file mode 100644
index 0000000000..e356abfdde
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/compat.py
@@ -0,0 +1,132 @@
+"""Compatibility conversion for legacy playground and persisted tool shapes."""
+
+from __future__ import annotations
+
+from typing import Any, Literal, Optional, Sequence
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .errors import ToolConfigurationError
+from .models import (
+    BuiltinToolConfig,
+    ClientToolConfig,
+    CodeToolConfig,
+    GatewayToolConfig,
+    ToolConfig,
+)
+from .parsing import parse_tool_config
+
+
+class ToolConfigDiagnostic(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    index: int
+    message: str
+
+
+class ToolConfigParseResult(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    tool_configs: list[ToolConfig] = Field(default_factory=list)
+    diagnostics: list[ToolConfigDiagnostic] = Field(default_factory=list)
+
+
+def _parse_gateway_slug(slug: Any) -> Optional[dict[str, Any]]:
+    if not isinstance(slug, str):
+        return None
+    parts = slug.replace("__", ".").split(".")
+    if len(parts) != 5 or parts[0] != "tools":
+        return None
+    return {
+        "type": "gateway",
+        "provider": parts[1],
+        "integration": parts[2],
+        "action": parts[3],
+        "connection": parts[4],
+    }
+
+
+def _copy_tool_metadata(
+    source: dict[str, Any], target: dict[str, Any]
+) -> dict[str, Any]:
+    result = dict(target)
+    if "needs_approval" in source:
+        result["needs_approval"] = bool(source["needs_approval"])
+    if isinstance(source.get("render"), dict):
+        result["render"] = dict(source["render"])
+    return result
+
+
+def coerce_tool_config(value: Any) -> ToolConfig:
+    """Convert one supported legacy shape into canonical tool configuration."""
+    if isinstance(
+        value,
+        (
+            BuiltinToolConfig,
+            GatewayToolConfig,
+            CodeToolConfig,
+            ClientToolConfig,
+        ),
+    ):
+        return value
+    if isinstance(value, str):
+        return BuiltinToolConfig(name=value)
+    if not isinstance(value, dict):
+        raise ToolConfigurationError(
+            "Tool configuration must be a string or mapping",
+            value=value,
+        )
+
+    data = dict(value)
+    if data.get("type") == "composio":
+        data["type"] = "gateway"
+        data.setdefault("provider", "composio")
+
+    if data.get("type") in {"builtin", "gateway", "code", "client"}:
+        return parse_tool_config(data)
+
+    function = data.get("function") if isinstance(data.get("function"), dict) else {}
+    gateway = _parse_gateway_slug(function.get("name") or data.get("name"))
+    if gateway:
+        return parse_tool_config(_copy_tool_metadata(data, gateway))
+
+    if isinstance(data.get("name"), str) and "type" not in data:
+        return BuiltinToolConfig(name=data["name"])
+
+    raise ToolConfigurationError("Unsupported tool configuration shape", value=value)
+
+
+def coerce_tool_configs(
+    values: Optional[Sequence[Any]],
+    *,
+    on_error: Literal["raise", "collect"] = "raise",
+) -> ToolConfigParseResult:
+    """Convert legacy values, either raising or returning structured diagnostics."""
+    tool_configs: list[ToolConfig] = []
+    diagnostics: list[ToolConfigDiagnostic] = []
+    for index, value in enumerate(values or []):
+        if value is None:
+            error = ToolConfigurationError(
+                "Tool configuration cannot be null",
+                index=index,
+                value=value,
+            )
+        else:
+            try:
+                tool_configs.append(coerce_tool_config(value))
+                continue
+            except ToolConfigurationError as exc:
+                error = ToolConfigurationError(
+                    str(exc),
+                    index=index,
+                    value=value,
+                )
+
+        if on_error == "raise":
+            raise error
+        diagnostics.append(ToolConfigDiagnostic(index=index, message=str(error)))
+
+    return ToolConfigParseResult(
+        tool_configs=tool_configs,
+        diagnostics=diagnostics,
+    )
diff --git a/sdks/python/agenta/sdk/agents/tools/errors.py b/sdks/python/agenta/sdk/agents/tools/errors.py
new file mode 100644
index 0000000000..24d62614c4
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/errors.py
@@ -0,0 +1,82 @@
+"""Errors raised while parsing and resolving agent tools."""
+
+from __future__ import annotations
+
+from typing import Any, Optional, Sequence
+
+
+class ToolError(RuntimeError):
+    """Base error for the agent tools domain."""
+
+
+class ToolConfigurationError(ToolError):
+    """Raised when tool configuration cannot be converted to a canonical model."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        index: Optional[int] = None,
+        value: Any = None,
+    ) -> None:
+        super().__init__(message)
+        self.index = index
+        self.value = value
+
+
+ToolConfigError = ToolConfigurationError
+
+
+class ToolResolutionError(ToolError):
+    """Raised when tool configuration cannot become runnable specifications."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        status: Optional[int] = None,
+        ref_count: Optional[int] = None,
+        spec_count: Optional[int] = None,
+        provider: Optional[str] = None,
+        reference: Optional[str] = None,
+    ) -> None:
+        super().__init__(message)
+        self.status = status
+        self.ref_count = ref_count
+        self.spec_count = spec_count
+        self.provider = provider
+        self.reference = reference
+
+
+class GatewayToolResolutionError(ToolResolutionError):
+    """Raised when a gateway adapter cannot resolve a configured tool."""
+
+
+class UnsupportedToolProviderError(ToolResolutionError):
+    """Raised when no resolver is available for a configured gateway provider."""
+
+    def __init__(self, provider: str) -> None:
+        super().__init__(
+            f"Unsupported tool provider: {provider}",
+            provider=provider,
+        )
+
+
+class MissingToolSecretError(ToolResolutionError):
+    """Raised when a tool declares required secrets that a provider cannot supply."""
+
+    def __init__(self, *, tool_name: str, secret_names: Sequence[str]) -> None:
+        names = tuple(secret_names)
+        super().__init__(
+            f"Tool '{tool_name}' is missing required secret(s): {', '.join(names)}"
+        )
+        self.tool_name = tool_name
+        self.secret_names = names
+
+
+class DuplicateToolNameError(ToolResolutionError):
+    """Raised when two configured tools resolve to the same model-visible name."""
+
+    def __init__(self, name: str) -> None:
+        super().__init__(f"Duplicate tool name: {name}")
+        self.name = name
diff --git a/sdks/python/agenta/sdk/agents/tools/interfaces.py b/sdks/python/agenta/sdk/agents/tools/interfaces.py
new file mode 100644
index 0000000000..3ccc4c767c
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/interfaces.py
@@ -0,0 +1,20 @@
+"""Injected dependencies used by the tool resolver."""
+
+from __future__ import annotations
+
+from typing import Mapping, Protocol, Sequence
+
+from .models import GatewayToolConfig, GatewayToolResolution
+
+
+class ToolSecretProvider(Protocol):
+    async def get_many(self, names: Sequence[str]) -> Mapping[str, str]:
+        """Return available values for the requested secret names."""
+
+
+class GatewayToolResolver(Protocol):
+    async def resolve(
+        self,
+        tools: Sequence[GatewayToolConfig],
+    ) -> GatewayToolResolution:
+        """Resolve gateway declarations into callback specifications."""
diff --git a/sdks/python/agenta/sdk/agents/tools/models.py b/sdks/python/agenta/sdk/agents/tools/models.py
new file mode 100644
index 0000000000..6e467f51dd
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/models.py
@@ -0,0 +1,221 @@
+"""Canonical tool configuration and resolved runtime specifications."""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+
+from pydantic import (
+    AliasChoices,
+    BaseModel,
+    ConfigDict,
+    Field,
+    TypeAdapter,
+    field_validator,
+)
+
+
+def _empty_object_schema() -> Dict[str, Any]:
+    return {"type": "object", "properties": {}}
+
+
+class ToolConfigBase(BaseModel):
+    """Fields shared by every persisted tool declaration."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    needs_approval: bool = False
+    render: Optional[Dict[str, Any]] = None
+
+
+class BuiltinToolConfig(ToolConfigBase):
+    type: Literal["builtin"] = "builtin"
+    name: str = Field(min_length=1)
+
+
+class GatewayToolConfig(ToolConfigBase):
+    type: Literal["gateway"] = "gateway"
+    provider: str = Field(default="composio", min_length=1)
+    integration: str = Field(min_length=1)
+    action: str = Field(min_length=1)
+    connection: str = Field(min_length=1)
+    name: Optional[str] = Field(default=None, min_length=1)
+
+    @property
+    def reference(self) -> str:
+        return (
+            f"tools.{self.provider}.{self.integration}.{self.action}.{self.connection}"
+        )
+
+
+class CodeToolConfig(ToolConfigBase):
+    type: Literal["code"] = "code"
+    name: str = Field(min_length=1)
+    description: Optional[str] = None
+    runtime: Literal["python", "node"] = "python"
+    script: str = Field(min_length=1)
+    input_schema: Dict[str, Any] = Field(default_factory=_empty_object_schema)
+    secrets: List[str] = Field(default_factory=list)
+
+
+class ClientToolConfig(ToolConfigBase):
+    type: Literal["client"] = "client"
+    name: str = Field(min_length=1)
+    description: Optional[str] = None
+    input_schema: Dict[str, Any] = Field(default_factory=_empty_object_schema)
+
+
+ToolConfig = Annotated[
+    Union[
+        BuiltinToolConfig,
+        GatewayToolConfig,
+        CodeToolConfig,
+        ClientToolConfig,
+    ],
+    Field(discriminator="type"),
+]
+TOOL_CONFIG_ADAPTER: TypeAdapter[ToolConfig] = TypeAdapter(ToolConfig)
+
+
+class ToolCallback(BaseModel):
+    """Where callback tool calls are sent."""
+
+    model_config = ConfigDict(frozen=True)
+
+    endpoint: str
+    authorization: Optional[str] = Field(default=None, repr=False)
+
+    def to_wire(self) -> Dict[str, Any]:
+        return {
+            "endpoint": self.endpoint,
+            "authorization": self.authorization,
+        }
+
+
+class ToolSpecBase(BaseModel):
+    """Fields shared by every resolved, runner-ready tool specification."""
+
+    model_config = ConfigDict(
+        extra="forbid",
+        frozen=True,
+        populate_by_name=True,
+    )
+
+    name: str
+    description: str
+    input_schema: Dict[str, Any] = Field(
+        default_factory=_empty_object_schema,
+        validation_alias=AliasChoices("input_schema", "inputSchema"),
+        serialization_alias="inputSchema",
+    )
+    needs_approval: bool = Field(
+        default=False,
+        validation_alias=AliasChoices("needs_approval", "needsApproval"),
+        serialization_alias="needsApproval",
+    )
+    render: Optional[Dict[str, Any]] = None
+
+    def to_wire(self) -> Dict[str, Any]:
+        wire = self.model_dump(
+            mode="json",
+            by_alias=True,
+            exclude_none=True,
+        )
+        if not self.needs_approval:
+            wire.pop("needsApproval", None)
+        if not wire.get("env"):
+            wire.pop("env", None)
+        return wire
+
+
+class CallbackToolSpec(ToolSpecBase):
+    kind: Literal["callback"] = "callback"
+    call_ref: str = Field(
+        validation_alias=AliasChoices("call_ref", "callRef"),
+        serialization_alias="callRef",
+    )
+
+
+class CodeToolSpec(ToolSpecBase):
+    kind: Literal["code"] = "code"
+    runtime: Literal["python", "node"] = "python"
+    code: str
+    env: Dict[str, str] = Field(default_factory=dict, repr=False)
+
+
+class ClientToolSpec(ToolSpecBase):
+    kind: Literal["client"] = "client"
+
+
+ToolSpec = Annotated[
+    Union[CallbackToolSpec, CodeToolSpec, ClientToolSpec],
+    Field(discriminator="kind"),
+]
+TOOL_SPEC_ADAPTER: TypeAdapter[ToolSpec] = TypeAdapter(ToolSpec)
+
+
+def coerce_tool_spec(value: Any) -> ToolSpec:
+    if isinstance(value, (CallbackToolSpec, CodeToolSpec, ClientToolSpec)):
+        return value
+    if not isinstance(value, dict):
+        raise TypeError("tool spec must be a mapping")
+    data = dict(value)
+    if not data.get("kind"):
+        if data.get("callRef") or data.get("call_ref"):
+            data["kind"] = "callback"
+        elif data.get("code") is not None:
+            data["kind"] = "code"
+        else:
+            data["kind"] = "client"
+    name = data.get("name")
+    data.setdefault("description", name)
+    data.setdefault("inputSchema", _empty_object_schema())
+    return TOOL_SPEC_ADAPTER.validate_python(data)
+
+
+class MissingSecretPolicy(str, Enum):
+    ERROR = "error"
+    OMIT = "omit"
+
+
+class ResolvedToolSet(BaseModel):
+    """Resolved tools ready to attach to a session."""
+
+    model_config = ConfigDict(
+        frozen=True,
+        populate_by_name=True,
+    )
+
+    builtin_names: List[str] = Field(
+        default_factory=list,
+        validation_alias=AliasChoices("builtin_names", "builtin_tools"),
+    )
+    tool_specs: List[ToolSpec] = Field(
+        default_factory=list,
+        validation_alias=AliasChoices("tool_specs", "custom_tools"),
+    )
+    tool_callback: Optional[ToolCallback] = None
+
+    @field_validator("tool_specs", mode="before")
+    @classmethod
+    def _coerce_specs(cls, value: Any) -> List[ToolSpec]:
+        return [coerce_tool_spec(item) for item in value or []]
+
+    @property
+    def builtin_tools(self) -> List[str]:
+        """Compatibility alias for the previous field name."""
+        return list(self.builtin_names)
+
+    @property
+    def custom_tools(self) -> List[Dict[str, Any]]:
+        """Compatibility wire dictionaries for callers not yet using typed specs."""
+        return [spec.to_wire() for spec in self.tool_specs]
+
+
+class GatewayToolResolution(BaseModel):
+    """Result returned by an injected gateway adapter."""
+
+    model_config = ConfigDict(frozen=True)
+
+    tool_specs: List[CallbackToolSpec] = Field(default_factory=list)
+    tool_callback: ToolCallback
diff --git a/sdks/python/agenta/sdk/agents/tools/parsing.py b/sdks/python/agenta/sdk/agents/tools/parsing.py
new file mode 100644
index 0000000000..b5779caa19
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/parsing.py
@@ -0,0 +1,39 @@
+"""Strict parsing of canonical tool configuration."""
+
+from __future__ import annotations
+
+from typing import Any, Mapping, Sequence
+
+from pydantic import ValidationError
+
+from .errors import ToolConfigurationError
+from .models import TOOL_CONFIG_ADAPTER, ToolConfig
+
+
+def parse_tool_config(value: ToolConfig | Mapping[str, Any]) -> ToolConfig:
+    """Parse one canonical tool mapping, rejecting legacy and unexpected fields."""
+    try:
+        return TOOL_CONFIG_ADAPTER.validate_python(value)
+    except ValidationError as exc:
+        raise ToolConfigurationError(
+            "Invalid tool configuration: "
+            f"{exc.errors(include_url=False, include_input=False)}",
+            value=value,
+        ) from exc
+
+
+def parse_tool_configs(
+    values: Sequence[ToolConfig | Mapping[str, Any]],
+) -> list[ToolConfig]:
+    """Parse canonical tool mappings and report the failing item index."""
+    parsed: list[ToolConfig] = []
+    for index, value in enumerate(values):
+        try:
+            parsed.append(parse_tool_config(value))
+        except ToolConfigurationError as exc:
+            raise ToolConfigurationError(
+                str(exc),
+                index=index,
+                value=value,
+            ) from exc
+    return parsed
diff --git a/sdks/python/agenta/sdk/agents/tools/resolver.py b/sdks/python/agenta/sdk/agents/tools/resolver.py
new file mode 100644
index 0000000000..54f4c8b03f
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/resolver.py
@@ -0,0 +1,177 @@
+"""Resolution of canonical tool configuration into runnable specifications."""
+
+from __future__ import annotations
+
+import os
+from typing import Mapping, Optional, Sequence
+
+from .errors import (
+    DuplicateToolNameError,
+    MissingToolSecretError,
+    UnsupportedToolProviderError,
+)
+from .interfaces import GatewayToolResolver, ToolSecretProvider
+from .models import (
+    BuiltinToolConfig,
+    ClientToolConfig,
+    ClientToolSpec,
+    CodeToolConfig,
+    CodeToolSpec,
+    GatewayToolConfig,
+    MissingSecretPolicy,
+    ResolvedToolSet,
+    ToolConfig,
+    ToolSpec,
+)
+
+
+class EnvironmentToolSecretProvider:
+    """Read declared tool secrets from the current process environment."""
+
+    async def get_many(self, names: Sequence[str]) -> Mapping[str, str]:
+        return {
+            name: value for name in names if (value := os.environ.get(name)) is not None
+        }
+
+
+def _apply_tool_metadata(tool_spec: ToolSpec, tool_config: ToolConfig) -> ToolSpec:
+    """Return a new spec carrying the config's approval and rendering metadata."""
+    return tool_spec.model_copy(
+        update={
+            "needs_approval": tool_config.needs_approval,
+            "render": tool_config.render,
+        }
+    )
+
+
+def _build_code_tool_spec(
+    *,
+    tool_config: CodeToolConfig,
+    env: Mapping[str, str],
+) -> CodeToolSpec:
+    return _apply_tool_metadata(
+        CodeToolSpec(
+            name=tool_config.name,
+            description=tool_config.description or tool_config.name,
+            input_schema=tool_config.input_schema,
+            runtime=tool_config.runtime,
+            code=tool_config.script,
+            env=dict(env),
+        ),
+        tool_config,
+    )
+
+
+def _build_client_tool_spec(*, tool_config: ClientToolConfig) -> ClientToolSpec:
+    return _apply_tool_metadata(
+        ClientToolSpec(
+            name=tool_config.name,
+            description=tool_config.description or tool_config.name,
+            input_schema=tool_config.input_schema,
+        ),
+        tool_config,
+    )
+
+
+def _validate_unique_names(
+    *,
+    builtin_names: Sequence[str],
+    tool_specs: Sequence[ToolSpec],
+) -> None:
+    seen: set[str] = set()
+    for name in [*builtin_names, *(tool_spec.name for tool_spec in tool_specs)]:
+        if name in seen:
+            raise DuplicateToolNameError(name)
+        seen.add(name)
+
+
+class ToolResolver:
+    """Resolve canonical tool configuration through injected secret and gateway adapters."""
+
+    def __init__(
+        self,
+        *,
+        secret_provider: Optional[ToolSecretProvider] = None,
+        gateway_resolver: Optional[GatewayToolResolver] = None,
+        missing_secret_policy: MissingSecretPolicy = MissingSecretPolicy.ERROR,
+    ) -> None:
+        self._secret_provider = secret_provider or EnvironmentToolSecretProvider()
+        self._gateway_resolver = gateway_resolver
+        self._missing_secret_policy = missing_secret_policy
+
+    async def resolve(self, tool_configs: Sequence[ToolConfig]) -> ResolvedToolSet:
+        builtin_names = [
+            tool_config.name
+            for tool_config in tool_configs
+            if isinstance(tool_config, BuiltinToolConfig)
+        ]
+        code_configs = [
+            tool_config
+            for tool_config in tool_configs
+            if isinstance(tool_config, CodeToolConfig)
+        ]
+        client_configs = [
+            tool_config
+            for tool_config in tool_configs
+            if isinstance(tool_config, ClientToolConfig)
+        ]
+        gateway_configs = [
+            tool_config
+            for tool_config in tool_configs
+            if isinstance(tool_config, GatewayToolConfig)
+        ]
+
+        secret_names = sorted(
+            {
+                secret_name
+                for tool_config in code_configs
+                for secret_name in tool_config.secrets
+            }
+        )
+        secret_values = (
+            dict(await self._secret_provider.get_many(secret_names))
+            if secret_names
+            else {}
+        )
+
+        tool_specs: list[ToolSpec] = []
+        for tool_config in code_configs:
+            missing = [
+                secret_name
+                for secret_name in tool_config.secrets
+                if secret_name not in secret_values
+            ]
+            if missing and self._missing_secret_policy == MissingSecretPolicy.ERROR:
+                raise MissingToolSecretError(
+                    tool_name=tool_config.name,
+                    secret_names=missing,
+                )
+            env = {
+                secret_name: secret_values[secret_name]
+                for secret_name in tool_config.secrets
+                if secret_name in secret_values
+            }
+            tool_specs.append(_build_code_tool_spec(tool_config=tool_config, env=env))
+
+        tool_specs.extend(
+            _build_client_tool_spec(tool_config=tool_config)
+            for tool_config in client_configs
+        )
+
+        tool_callback = None
+        if gateway_configs:
+            if self._gateway_resolver is None:
+                raise UnsupportedToolProviderError(gateway_configs[0].provider)
+            gateway_resolution = await self._gateway_resolver.resolve(gateway_configs)
+            tool_specs = [*gateway_resolution.tool_specs, *tool_specs]
+            tool_callback = gateway_resolution.tool_callback
+
+        _validate_unique_names(
+            builtin_names=builtin_names,
+            tool_specs=tool_specs,
+        )
+        return ResolvedToolSet(
+            builtin_names=builtin_names,
+            tool_specs=tool_specs,
+            tool_callback=tool_callback,
+        )
diff --git a/sdks/python/agenta/sdk/agents/tools/wire.py b/sdks/python/agenta/sdk/agents/tools/wire.py
new file mode 100644
index 0000000000..1f716b503d
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/tools/wire.py
@@ -0,0 +1,15 @@
+"""Serialization of resolved tool specifications to the runner contract."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, Sequence
+
+from .models import ToolSpec
+
+
+def tool_spec_to_wire(tool_spec: ToolSpec) -> Dict[str, Any]:
+    return tool_spec.to_wire()
+
+
+def tool_specs_to_wire(tool_specs: Sequence[ToolSpec]) -> list[Dict[str, Any]]:
+    return [tool_spec_to_wire(tool_spec) for tool_spec in tool_specs]
diff --git a/sdks/python/agenta/sdk/agents/ui_messages.py b/sdks/python/agenta/sdk/agents/ui_messages.py
new file mode 100644
index 0000000000..2dc1f5e39b
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/ui_messages.py
@@ -0,0 +1,18 @@
+"""Compatibility imports for the Vercel UI Message adapter.
+
+New code should import from :mod:`agenta.sdk.agents.adapters.vercel`.
+"""
+
+from __future__ import annotations
+
+from .adapters.vercel import (
+    from_ui_messages,
+    to_ui_message,
+    ui_message_stream,
+)
+
+__all__ = [
+    "from_ui_messages",
+    "to_ui_message",
+    "ui_message_stream",
+]
diff --git a/sdks/python/agenta/sdk/agents/utils/__init__.py b/sdks/python/agenta/sdk/agents/utils/__init__.py
new file mode 100644
index 0000000000..620e3b1b7e
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/utils/__init__.py
@@ -0,0 +1,19 @@
+"""Shared plumbing for the runner-backed adapters: the ``/run`` wire shape and the two
+transports to the TypeScript runner."""
+
+from .ts_runner import (
+    deliver_http,
+    deliver_http_stream,
+    deliver_subprocess,
+    deliver_subprocess_stream,
+)
+from .wire import request_to_wire, result_from_wire
+
+__all__ = [
+    "request_to_wire",
+    "result_from_wire",
+    "deliver_http",
+    "deliver_subprocess",
+    "deliver_http_stream",
+    "deliver_subprocess_stream",
+]
diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
new file mode 100644
index 0000000000..f7a5497d1c
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
@@ -0,0 +1,163 @@
+"""Transports to the TypeScript runner: HTTP (a running sidecar) or subprocess (a CLI).
+
+Shared by the runner-backed adapters. Each adapter chooses a transport and hard-codes its
+own engine id on the payload (via ``utils.wire``); this module only delivers the JSON.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+from typing import Any, AsyncIterator, Dict, Optional, Sequence
+
+_DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180"))
+
+
+async def deliver_http(
+    base_url: str,
+    payload: Dict[str, Any],
+    *,
+    timeout: float = _DEFAULT_TIMEOUT,
+) -> Dict[str, Any]:
+    """POST ``/run`` to a running runner and return the parsed JSON body."""
+    import httpx  # local import: only the HTTP transport needs it
+
+    url = base_url.rstrip("/") + "/run"
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        response = await client.post(url, json=payload)
+    if response.status_code >= 500:
+        raise RuntimeError(
+            f"Agent runner HTTP {response.status_code}: {response.text[:1000]}"
+        )
+    return response.json()
+
+
+async def deliver_subprocess(
+    command: Sequence[str],
+    payload: Dict[str, Any],
+    *,
+    cwd: Optional[str] = None,
+    env: Optional[Dict[str, str]] = None,
+    timeout: float = _DEFAULT_TIMEOUT,
+) -> Dict[str, Any]:
+    """Spawn the runner CLI, feed the request on stdin, and parse the JSON on stdout."""
+    proc = await asyncio.create_subprocess_exec(
+        *command,
+        cwd=cwd,
+        env=env,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    body = json.dumps(payload).encode("utf-8")
+    try:
+        stdout, stderr = await asyncio.wait_for(
+            proc.communicate(input=body), timeout=timeout
+        )
+    except asyncio.TimeoutError:
+        proc.kill()
+        await proc.wait()
+        raise RuntimeError(
+            f"Agent runner timed out after {timeout}s: {' '.join(command)}"
+        )
+
+    out = stdout.decode("utf-8", "replace")
+    err = stderr.decode("utf-8", "replace")
+    if not out.strip():
+        raise RuntimeError(
+            f"Agent runner returned no output. exit={proc.returncode} stderr={err[-2000:]}"
+        )
+    try:
+        return json.loads(out)
+    except json.JSONDecodeError as exc:
+        raise RuntimeError(
+            f"Agent runner returned invalid JSON. stdout={out[:500]} stderr={err[-1000:]}"
+        ) from exc
+
+
+# ---------------------------------------------------------------------------
+# Streaming transports (NDJSON): one parsed record per line, live.
+#
+# Each yields the runner's ``StreamRecord`` lines as they arrive — ``{"kind":"event",...}``
+# for every event the moment it is built, then exactly one ``{"kind":"result",...}`` terminal
+# record. The caller (a ``Session.stream``) turns these into live ``AgentEvent``s and the
+# terminal ``AgentResult``. Cancellation closes the underlying connection / kills the child.
+# ---------------------------------------------------------------------------
+
+
+async def deliver_http_stream(
+    base_url: str,
+    payload: Dict[str, Any],
+    *,
+    timeout: float = _DEFAULT_TIMEOUT,
+) -> AsyncIterator[Dict[str, Any]]:
+    """POST ``/run`` asking for NDJSON and yield each parsed record as it arrives.
+
+    The ``async with`` closes the connection when the generator is closed or cancelled, which
+    the runner observes as a client disconnect and turns into run cancellation.
+    """
+    import httpx  # local import: only the HTTP transport needs it
+
+    url = base_url.rstrip("/") + "/run"
+    headers = {"Accept": "application/x-ndjson"}
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        async with client.stream(
+            "POST", url, json=payload, headers=headers
+        ) as response:
+            if response.status_code >= 500:
+                body = await response.aread()
+                raise RuntimeError(
+                    f"Agent runner HTTP {response.status_code}: {body[:1000]!r}"
+                )
+            async for line in response.aiter_lines():
+                line = line.strip()
+                if line:
+                    yield json.loads(line)
+
+
+async def deliver_subprocess_stream(
+    command: Sequence[str],
+    payload: Dict[str, Any],
+    *,
+    cwd: Optional[str] = None,
+    env: Optional[Dict[str, str]] = None,
+    timeout: float = _DEFAULT_TIMEOUT,
+) -> AsyncIterator[Dict[str, Any]]:
+    """Spawn the runner CLI in ``--stream`` mode and yield each NDJSON record from stdout.
+
+    The ``finally`` kills the child if the consumer stops early (break/cancel), so a dropped
+    stream does not leave a runner process behind.
+    """
+    proc = await asyncio.create_subprocess_exec(
+        *command,
+        "--stream",
+        cwd=cwd,
+        env=env,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    assert proc.stdin is not None and proc.stdout is not None
+    proc.stdin.write(json.dumps(payload).encode("utf-8"))
+    proc.stdin.close()
+    loop = asyncio.get_event_loop()
+    deadline = loop.time() + timeout
+    try:
+        while True:
+            remaining = deadline - loop.time()
+            if remaining <= 0:
+                raise RuntimeError(
+                    f"Agent runner stream timed out after {timeout}s: {' '.join(command)}"
+                )
+            raw = await asyncio.wait_for(proc.stdout.readline(), timeout=remaining)
+            if not raw:  # EOF
+                break
+            line = raw.decode("utf-8", "replace").strip()
+            if line:
+                yield json.loads(line)
+        await proc.wait()
+    finally:
+        if proc.returncode is None:
+            proc.kill()
+            await proc.wait()
diff --git a/sdks/python/agenta/sdk/agents/utils/wire.py b/sdks/python/agenta/sdk/agents/utils/wire.py
new file mode 100644
index 0000000000..b7558a4530
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/utils/wire.py
@@ -0,0 +1,91 @@
+"""The ``/run`` wire contract: our DTOs <-> the runner's camelCase JSON.
+
+Shared by the runner-backed adapters (rivet, in-process Pi). The TS side mirrors these names
+in ``services/agent/src/protocol.ts``, and the contract is pinned by shared golden fixtures
+under ``sdks/python/oss/tests/pytest/unit/agents/golden/`` (see ``test_wire_contract.py``).
+The caller passes the engine id explicitly, since each adapter hard-codes its own.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Sequence
+
+from ..dtos import (
+    AgentEvent,
+    AgentResult,
+    HarnessAgentConfig,
+    HarnessCapabilities,
+    HarnessType,
+    Message,
+    TraceContext,
+)
+
+
+def request_to_wire(
+    *,
+    engine: str,
+    harness: HarnessType,
+    sandbox: str,
+    config: HarnessAgentConfig,
+    messages: Sequence[Message],
+    secrets: Optional[Dict[str, str]] = None,
+    trace: Optional[TraceContext] = None,
+    session_id: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Serialize one turn into the ``/run`` request JSON.
+
+    The tool + permission fields come from ``config.wire_tools()`` so each harness shapes its
+    own (Pi: built-ins + native specs, no gating; Claude: MCP specs + permission policy).
+    ``config.wire_prompt()`` adds any system-prompt overrides the harness exposes (Pi's
+    ``systemPrompt`` / ``appendSystemPrompt``); it is empty for harnesses that have none.
+    ``config.wire_mcp()`` adds user-declared MCP servers, omitted when there are none so a
+    tool-free run's payload is unchanged.
+    """
+    return {
+        "backend": engine,
+        "harness": harness.value,
+        "sandbox": sandbox,
+        "sessionId": session_id,
+        "agentsMd": config.agents_md,
+        "model": config.model,
+        "messages": [message.to_wire() for message in messages],
+        "secrets": dict(secrets or {}),
+        "trace": trace.to_wire() if trace else None,
+        **config.wire_tools(),
+        **config.wire_prompt(),
+        **config.wire_mcp(),
+    }
+
+
+def result_from_wire(data: Dict[str, Any]) -> AgentResult:
+    """Parse a ``/run`` result JSON into an :class:`AgentResult`.
+
+    Raises ``RuntimeError`` when the runner reported a failure, so the caller surfaces a
+    clear message rather than handing the model an empty reply.
+    """
+    if not data.get("ok"):
+        raise RuntimeError(f"Agent run failed: {data.get('error')}")
+
+    messages: List[Message] = []
+    for raw in data.get("messages") or []:
+        message = Message.from_raw(raw)
+        if message is not None:
+            messages.append(message)
+
+    events: List[AgentEvent] = []
+    for raw in data.get("events") or []:
+        event = AgentEvent.from_wire(raw)
+        if event is not None:
+            events.append(event)
+
+    return AgentResult(
+        output=data.get("output", "") or "",
+        messages=messages,
+        events=events,
+        usage=data.get("usage"),
+        stop_reason=data.get("stopReason"),
+        capabilities=HarnessCapabilities.from_wire(data.get("capabilities")),
+        session_id=data.get("sessionId"),
+        model=data.get("model"),
+        trace_id=data.get("traceId"),
+    )
diff --git a/sdks/python/agenta/sdk/decorators/routing.py b/sdks/python/agenta/sdk/decorators/routing.py
index 4a57846d6e..04cb88c3a0 100644
--- a/sdks/python/agenta/sdk/decorators/routing.py
+++ b/sdks/python/agenta/sdk/decorators/routing.py
@@ -20,6 +20,11 @@
     WorkflowBaseResponse,
     WorkflowServiceResponseData,
 )
+from agenta.sdk.agents.adapters.vercel.routing import register_agent_message_routes
+from agenta.sdk.agents.adapters.vercel.sse import (
+    VERCEL_UI_MESSAGE_STREAM_HEADERS as _VERCEL_UI_MESSAGE_STREAM_HEADERS,
+    vercel_sse_stream as _vercel_sse_stream,
+)
 from agenta.sdk.middlewares.routing.cors import CORSMiddleware
 from agenta.sdk.middlewares.routing.auth import AuthMiddleware
 from agenta.sdk.middlewares.routing.otel import OTelMiddleware
@@ -34,7 +39,7 @@
 # These names are used by the per-route namespace triple itself.
 # ---------------------------------------------------------------------------
 
-_RESERVED_PATHS = {"invoke", "inspect"}
+_RESERVED_PATHS = {"invoke", "inspect", "messages", "load-session"}
 
 
 def _validate_path(path: str) -> None:
@@ -195,15 +200,27 @@ def _make_stream_response(
 ) -> StreamingResponse:
     aiter = response.iterator()
 
-    if wire_format == "sse":
-        media_type = "text/event-stream"
-        res = StreamingResponse(_sse_stream(aiter), media_type=media_type)
+    if wire_format == "vercel":
+        # The Vercel UI Message Stream: SSE framing terminated by `data: [DONE]`, plus the
+        # headers the AI SDK client and proxies require. Endpoint-selected (the agent
+        # `/messages` route passes "vercel"), not derived from Accept — a Vercel UI message
+        # stream and a plain SSE stream share the `text/event-stream` media type, so the
+        # choice cannot come from the Accept header alone.
+        res = StreamingResponse(
+            _vercel_sse_stream(aiter), media_type="text/event-stream"
+        )
+        for key, value in _VERCEL_UI_MESSAGE_STREAM_HEADERS.items():
+            res.headers.setdefault(key, value)
+    elif wire_format == "sse":
+        res = StreamingResponse(_sse_stream(aiter), media_type="text/event-stream")
     elif wire_format == "ndjson":
-        media_type = "application/x-ndjson"
-        res = StreamingResponse(_ndjson_stream(aiter), media_type=media_type)
+        res = StreamingResponse(
+            _ndjson_stream(aiter), media_type="application/x-ndjson"
+        )
     else:
-        media_type = "application/x-ndjson"
-        res = StreamingResponse(_ndjson_stream(aiter), media_type=media_type)
+        res = StreamingResponse(
+            _ndjson_stream(aiter), media_type="application/x-ndjson"
+        )
 
     return _set_common_headers(res, response)  # type: ignore
 
@@ -451,6 +468,10 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest):
             except Exception as exception:
                 return await handle_inspect_failure(exception)
 
+        # Agent-only endpoints are Vercel/browser-protocol adapters. Keep their request
+        # folding, session id handling, and UI Message Stream details out of the generic
+        # workflow route decorator.
+
         invoke_responses: dict = {
             200: {
                 "description": "Negotiated response — format determined by Accept header",
@@ -489,6 +510,25 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest):
             },
         }
 
+        agent_enabled = bool(self.flags and self.flags.get("is_agent"))
+
+        def _add_agent_routes(target: Any, prefix: str) -> None:
+            """Register the agent-only /messages + /load-session routes on a target
+            (sub-app / router / mount root), mirroring how /invoke + /inspect are added."""
+            register_agent_message_routes(
+                target,
+                prefix,
+                wf=wf,
+                invoke_responses=invoke_responses,
+                get_request_tracing_context=_get_request_tracing_context,
+                parse_accept=_parse_accept,
+                stream_media_types=STREAM_MEDIA_TYPES,
+                make_json_response=_make_json_response,
+                make_not_acceptable_response=_make_not_acceptable_response,
+                make_stream_response=_make_stream_response,
+                handle_failure=handle_invoke_failure,
+            )
+
         # ------------------------------------------------------------------
         # Legacy path: router= was provided.
         # Registers prefixed routes on the APIRouter without isolation.
@@ -506,6 +546,8 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest):
                 methods=["POST"],
                 response_model=WorkflowInvokeRequest,
             )
+            if agent_enabled:
+                _add_agent_routes(self.router_fallback, self.path)
             return foo
 
         # ------------------------------------------------------------------
@@ -528,6 +570,8 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest):
                 methods=["POST"],
                 response_model=WorkflowInvokeRequest,
             )
+            if agent_enabled:
+                _add_agent_routes(self.mount_root, "")
 
             return foo
 
@@ -545,6 +589,8 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest):
             methods=["POST"],
             response_model=WorkflowInvokeRequest,
         )
+        if agent_enabled:
+            _add_agent_routes(sub_app, "")
 
         self.mount_root.mount(self.path, sub_app)
 
diff --git a/sdks/python/agenta/sdk/engines/running/interfaces.py b/sdks/python/agenta/sdk/engines/running/interfaces.py
index d84908a164..514135c90b 100644
--- a/sdks/python/agenta/sdk/engines/running/interfaces.py
+++ b/sdks/python/agenta/sdk/engines/running/interfaces.py
@@ -524,6 +524,49 @@ def llm_inputs_schema(
     ),
 )
 
+agent_v0_interface = WorkflowRevisionData(
+    uri="agenta:builtin:agent:v0",
+    schemas=dict(  # type: ignore
+        parameters=obj(
+            properties={
+                # One composite control for the whole agent config. The field shape lives in
+                # `AgentConfigSchema` (agenta.sdk.utils.types), registered as the `agent_config`
+                # catalog type; the playground resolves this ref and renders the AgentConfigControl.
+                "agent": semantic_field(
+                    x_ag_type_ref="agent_config",
+                    jtype="object",
+                    description="The agent's instructions, model, tools, MCP servers, and runtime.",
+                    default={
+                        "agents_md": (
+                            "You are a friendly hello-world agent running on the "
+                            "Agenta agent service.\n\n- Greet the user warmly.\n- "
+                            "Answer the user's message in one or two short sentences."
+                        ),
+                        "model": "gpt-5.5",
+                        "tools": [],
+                        "mcp_servers": [],
+                        "harness": "pi",
+                        "sandbox": "local",
+                        "permission_policy": "auto",
+                    },
+                ),
+            },
+            additional_properties=True,
+        ),
+        inputs=llm_inputs_schema(
+            include_messages=True,
+        ),
+        outputs={
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            **semantic_field(
+                x_ag_type_ref="message",
+                jtype="object",
+                description="Final assistant message returned by the agent.",
+            ),
+        },
+    ),
+)
+
 completion_v0_interface = WorkflowRevisionData(
     uri="agenta:builtin:completion:v0",
     schemas=dict(  # type: ignore
diff --git a/sdks/python/agenta/sdk/engines/running/utils.py b/sdks/python/agenta/sdk/engines/running/utils.py
index da84036e5a..a55a7069ca 100644
--- a/sdks/python/agenta/sdk/engines/running/utils.py
+++ b/sdks/python/agenta/sdk/engines/running/utils.py
@@ -51,6 +51,7 @@
     # --- OLD URI
     chat_v0_interface,
     completion_v0_interface,
+    agent_v0_interface,
     echo_v0_interface,
     auto_exact_match_v0_interface,
     auto_regex_test_v0_interface,
@@ -88,6 +89,7 @@
             # --- OLD URI
             chat=dict(v0=chat_v0_interface),
             completion=dict(v0=completion_v0_interface),
+            agent=dict(v0=agent_v0_interface),
             echo=dict(v0=echo_v0_interface),
             auto_exact_match=dict(v0=auto_exact_match_v0_interface),
             auto_regex_test=dict(v0=auto_regex_test_v0_interface),
@@ -243,6 +245,15 @@ def _catalog_entry() -> dict:
                     presets=[],
                 )
             ),
+            agent=dict(
+                v0=dict(
+                    name="agent",
+                    description="Agent that runs tools over multiple turns on the Pi harness.",
+                    categories=None,
+                    flags=None,
+                    presets=[],
+                )
+            ),
             #
             echo=dict(v0=_catalog_entry()),
             auto_exact_match=dict(v0=_catalog_entry()),
@@ -282,6 +293,18 @@ def _catalog_entry() -> dict:
             # --- OLD URI
             chat=dict(v0=WorkflowRevisionData()),
             completion=dict(v0=WorkflowRevisionData()),
+            agent=dict(
+                v0=WorkflowRevisionData(
+                    parameters={
+                        "model": "gpt-5.5",
+                        "agents_md": (
+                            "You are a friendly hello-world agent running on the "
+                            "Agenta agent service.\n\n- Greet the user warmly.\n- "
+                            "Answer the user's message in one or two short sentences."
+                        ),
+                    }
+                )
+            ),
             echo=dict(v0=WorkflowRevisionData()),
             auto_exact_match=dict(v0=WorkflowRevisionData()),
             auto_regex_test=dict(v0=WorkflowRevisionData()),
@@ -543,12 +566,12 @@ def infer_url_from_uri(uri: Optional[str]) -> Optional[str]:
     # agenta:builtin:* — application-only (not evaluators)
     ("builtin", "chat"): (True, False, False),
     ("builtin", "completion"): (True, False, False),
+    ("builtin", "agent"): (True, False, False),
     # agenta:builtin:* — both evaluator and application
     ("builtin", "llm"): (True, True, False),
     # agenta:builtin:* — evaluator-only
     ("builtin", "match"): (False, True, False),
     ("builtin", "prompt"): (False, True, False),
-    ("builtin", "agent"): (False, True, False),
     ("builtin", "echo"): (False, True, False),
     ("builtin", "human"): (False, True, False),
     ("builtin", "auto_exact_match"): (False, True, False),
diff --git a/sdks/python/agenta/sdk/middlewares/running/normalizer.py b/sdks/python/agenta/sdk/middlewares/running/normalizer.py
index cdbe389b33..44c5b791e4 100644
--- a/sdks/python/agenta/sdk/middlewares/running/normalizer.py
+++ b/sdks/python/agenta/sdk/middlewares/running/normalizer.py
@@ -66,8 +66,10 @@ async def _normalize_request(
         1. If parameter name is 'request': passes the entire WorkflowServiceRequest
         2. If parameter name matches DATA_FIELDS (like 'inputs', 'outputs', 'parameters'):
            extracts that field from request.data
-        3. If parameter is **kwargs: includes all unconsumed DATA_FIELDS
-        4. Otherwise: looks up the parameter name in request.data.inputs dict
+        3. If parameter name is a supported top-level request field like 'session_id':
+           extracts that field from the request envelope
+        4. If parameter is **kwargs: includes all unconsumed DATA_FIELDS
+        5. Otherwise: looks up the parameter name in request.data.inputs dict
 
         Args:
             request: The workflow service request containing inputs and data
@@ -95,6 +97,10 @@ async def _normalize_request(
                 )
                 consumed.add(name)
 
+            elif name == "session_id":
+                normalized[name] = request.session_id
+                consumed.add(name)
+
             elif param.kind == inspect.Parameter.VAR_KEYWORD:
                 if request.data:
                     for f in self.DATA_FIELDS - consumed:
diff --git a/sdks/python/agenta/sdk/models/workflows.py b/sdks/python/agenta/sdk/models/workflows.py
index a9437342df..0cb751e9dc 100644
--- a/sdks/python/agenta/sdk/models/workflows.py
+++ b/sdks/python/agenta/sdk/models/workflows.py
@@ -79,6 +79,7 @@ class WorkflowFlags(BaseModel):
     # interface-derived
     ## schema
     is_chat: bool = False
+    is_agent: bool = False
     ## hook
     has_url: bool = False
     ## code
@@ -106,6 +107,7 @@ class WorkflowQueryFlags(BaseModel):
     # interface-derived
     ## schema
     is_chat: Optional[bool] = None
+    is_agent: Optional[bool] = None
     ## hook
     has_url: Optional[bool] = None
     ## code
@@ -209,6 +211,15 @@ class WorkflowRequestData(BaseModel):
     #
     testcase: Optional[dict] = None
     inputs: Optional[dict] = None
+    # The agent ``/messages`` egress lifts the conversation out of ``inputs`` to this
+    # first-class member, in the Vercel ``UIMessage`` shape; ``/invoke`` ignores it.
+    messages: Optional[list] = None
+    # Transport mode for the agent ``/messages`` route: the endpoint sets this from the Accept
+    # negotiation so the shared agent handler streams (returns an async generator) instead of
+    # returning a batch dict. A sibling of ``messages`` / ``inputs`` / ``parameters`` on purpose
+    # — it must not live in ``parameters``, where it would leak into agent config / revision
+    # state / trace inputs. ``/invoke`` leaves it unset (batch).
+    stream: Optional[bool] = None
     #
     trace: Optional[dict] = None
     outputs: Optional[Any] = None
@@ -233,6 +244,10 @@ class WorkflowBaseRequest(Metadata):
     secrets: Optional[Dict[str, Any]] = None
     credentials: Optional[str] = None
 
+    # The agent ``/messages`` session this turn belongs to (opaque, project-scoped). Optional;
+    # absent on ``/invoke`` and on the first turn of a server-minted session.
+    session_id: Optional[str] = None
+
     @model_validator(mode="before")
     def _coerce_nested_models(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         if "references" in values and isinstance(values["references"], dict):
@@ -291,6 +306,10 @@ class WorkflowBaseResponse(TraceID, SpanID):
 
     status: Optional[WorkflowServiceStatus] = WorkflowServiceStatus()
 
+    # The resolved agent session id (minted or echoed) on the ``/messages`` response, alongside
+    # ``trace_id`` / ``span_id``. ``None`` for plain ``/invoke`` responses.
+    session_id: Optional[str] = None
+
 
 # back-compat alias
 WorkflowServiceBaseResponse = WorkflowBaseResponse
@@ -324,6 +343,20 @@ async def iterator(self):
 ]
 
 
+class LoadSessionRequest(BaseModel):
+    """``POST /load-session`` body. The session id is required (RFC §7.1)."""
+
+    session_id: str
+
+
+class LoadSessionResponse(BaseModel):
+    """``POST /load-session`` response: a session's history as Vercel ``UIMessage`` objects,
+    the shape ``useChat`` takes as its initial ``messages``."""
+
+    session_id: str
+    messages: List[Dict[str, Any]] = Field(default_factory=list)
+
+
 # aliases ----------------------------------------------------------------------
 
 
diff --git a/sdks/python/agenta/sdk/utils/types.py b/sdks/python/agenta/sdk/utils/types.py
index 8e629b92fb..994c781aa4 100644
--- a/sdks/python/agenta/sdk/utils/types.py
+++ b/sdks/python/agenta/sdk/utils/types.py
@@ -8,6 +8,8 @@
 from pydantic import Field, model_validator, AliasChoices
 
 
+from agenta.sdk.agents.mcp import MCPServerConfig
+from agenta.sdk.agents.tools import ToolConfig
 from agenta.sdk.utils.assets import supported_llm_models, model_metadata
 from agenta.sdk.utils.helpers import _PLACEHOLDER_RE
 from agenta.sdk.utils.rendering import (
@@ -1052,6 +1054,81 @@ def _model_catalog_type() -> dict:
     }
 
 
+_DEFAULT_AGENT_MODEL = "gpt-5.5"
+_DEFAULT_AGENTS_MD = (
+    "You are a friendly hello-world agent running on the Agenta agent service.\n\n"
+    "- Greet the user warmly.\n"
+    "- Answer the user's message in one or two short sentences."
+)
+
+
+class AgentConfigSchema(AgSchemaMixin):
+    """The playground's editable agent config (the ``agent`` element), as one semantic type.
+
+    This is the schema-generation counterpart to the runtime :class:`agenta.sdk.agents.AgentConfig`
+    parser: it exists only to emit a rich JSON Schema for the ``agent_config`` control, so the
+    field shapes live in Pydantic (single source of truth) instead of a hand-written literal.
+    It deliberately composes the editable fields the control surfaces — the neutral config
+    (``agents_md``/``model``/``tools``/``mcp_servers``) plus the run selection
+    (``harness``/``sandbox``/``permission_policy``) — and types ``tools``/``mcp_servers`` with the
+    real tool-def models so the playground gets typed editors. The runtime ``AgentConfig`` stays
+    permissive (``List[Any]``) because its job is to coerce the loose shapes the playground emits;
+    this model is strict because its job is to describe them.
+    """
+
+    __ag_type__ = "agent_config"
+
+    agents_md: str = Field(
+        default=_DEFAULT_AGENTS_MD,
+        title="Instructions",
+        description="The agent's system prompt (its AGENTS.md).",
+        json_schema_extra={"x-ag-type": "textarea"},
+    )
+    model: str = Field(
+        default=_DEFAULT_AGENT_MODEL,
+        title="Model",
+        description="Model the agent runs on.",
+        json_schema_extra={"x-parameter": "grouped_choice"},
+    )
+    tools: List[ToolConfig] = Field(
+        default_factory=list,
+        title="Tools",
+        description=(
+            "Runnable tools the agent can call: harness built-ins, server-side gateway "
+            "actions (e.g. Composio), sandboxed code, or client-fulfilled tools."
+        ),
+    )
+    mcp_servers: List[MCPServerConfig] = Field(
+        default_factory=list,
+        title="MCP servers",
+        description=(
+            "Declared MCP servers exposed to the agent. The backend resolves each server's "
+            "secret env from the vault at run time; tokens never live in the config."
+        ),
+    )
+    harness: Literal["pi", "claude", "agenta"] = Field(
+        default="pi",
+        title="Harness",
+        description=(
+            "Coding agent to drive: pi, claude, or agenta (pi with Agenta's forced "
+            "skills, tools, and base instructions)."
+        ),
+    )
+    sandbox: Literal["local", "daytona"] = Field(
+        default="local",
+        title="Sandbox",
+        description="Where the agent runs: local daemon or a Daytona sandbox.",
+    )
+    permission_policy: Literal["auto", "deny"] = Field(
+        default="auto",
+        title="Permission policy",
+        description=(
+            "How a permission-gating harness (e.g. Claude Code) handles tool-use prompts "
+            "in this headless run: auto-approve or deny."
+        ),
+    )
+
+
 CATALOG_TYPES = {
     Message.ag_type(): _dereference_schema(Message.model_json_schema()),
     Messages.ag_type(): _dereference_schema(Messages.model_json_schema()),
@@ -1065,4 +1142,7 @@ def _model_catalog_type() -> dict:
     AgPermissions.ag_type(): _dereference_schema(AgPermissions.model_json_schema()),
     AgResponse.ag_type(): _dereference_schema(AgResponse.model_json_schema()),
     PromptTemplate.ag_type(): _dereference_schema(PromptTemplate.model_json_schema()),
+    AgentConfigSchema.ag_type(): _dereference_schema(
+        AgentConfigSchema.model_json_schema()
+    ),
 }
diff --git a/sdks/python/agenta/tests/agents/test_streaming.py b/sdks/python/agenta/tests/agents/test_streaming.py
new file mode 100644
index 0000000000..bd378a2ece
--- /dev/null
+++ b/sdks/python/agenta/tests/agents/test_streaming.py
@@ -0,0 +1,167 @@
+"""Tests for the live streaming boundary: ``AgentRun`` and the NDJSON subprocess transport.
+
+Two layers:
+
+- ``AgentRun`` over a fake record source — pure, fast: events are yielded live, the terminal
+  result is captured, hooks/cleanup fire, and an ``ok:false`` terminal raises.
+- ``deliver_subprocess_stream`` against a fake NDJSON emitter — proves records arrive
+  incrementally (not buffered then dumped) and that closing the stream kills the child.
+
+A final integration test drives the real ``cli.ts --stream`` when ``pnpm`` is available.
+
+Run: ``uv run pytest agenta/tests/agents/test_streaming.py`` from ``sdks/python``.
+"""
+
+from __future__ import annotations
+
+import shutil
+import sys
+import time
+from pathlib import Path
+from typing import Any, Dict, List
+
+import pytest
+
+from agenta.sdk.agents import AgentRun
+from agenta.sdk.agents.utils import deliver_subprocess_stream
+
+
+async def _from_list(records: List[Dict[str, Any]]):
+    for record in records:
+        yield record
+
+
+# --- AgentRun ---------------------------------------------------------------
+
+
+async def test_agentrun_yields_events_then_captures_result() -> None:
+    seen_result: Dict[str, Any] = {}
+    cleaned: List[bool] = []
+
+    async def _cleanup() -> None:
+        cleaned.append(True)
+
+    records = [
+        {"kind": "event", "event": {"type": "message_start", "id": "m0"}},
+        {
+            "kind": "event",
+            "event": {"type": "message_delta", "id": "m0", "delta": "Hi"},
+        },
+        {"kind": "event", "event": {"type": "message_end", "id": "m0"}},
+        {
+            "kind": "result",
+            "result": {
+                "ok": True,
+                "output": "Hi",
+                "sessionId": "s1",
+                "stopReason": "end_turn",
+            },
+        },
+    ]
+    run = AgentRun(_from_list(records))
+    run.on_result(lambda r: seen_result.update({"id": r.session_id}))
+    run.on_cleanup(_cleanup)
+
+    events = [event async for event in run]
+
+    assert [e.type for e in events] == ["message_start", "message_delta", "message_end"]
+    assert run.result().output == "Hi"
+    assert run.result().session_id == "s1"
+    assert run.result().stop_reason == "end_turn"
+    assert seen_result == {"id": "s1"}  # on_result fired with the terminal result
+    assert cleaned == [True]  # cleanup ran when iteration ended
+
+
+async def test_agentrun_raises_on_error_terminal() -> None:
+    records = [
+        {"kind": "event", "event": {"type": "message_start", "id": "m0"}},
+        {"kind": "result", "result": {"ok": False, "error": "boom"}},
+    ]
+    run = AgentRun(_from_list(records))
+    with pytest.raises(RuntimeError, match="boom"):
+        async for _ in run:
+            pass
+
+
+async def test_agentrun_result_unavailable_before_drain() -> None:
+    run = AgentRun(_from_list([{"kind": "event", "event": {"type": "done"}}]))
+    with pytest.raises(RuntimeError, match="not available"):
+        run.result()
+
+
+# --- deliver_subprocess_stream (fake NDJSON emitter) ------------------------
+
+# Emits 3 event lines with a small gap, then one terminal result line. `-u` + flush so the
+# parent observes each line as it is written, not at process exit.
+_EMITTER = r"""
+import sys, time, json
+for i in range(3):
+    sys.stdout.write(json.dumps({"kind":"event","event":{"type":"message_delta","id":"m","delta":"d%d"%i}})+"\n")
+    sys.stdout.flush()
+    time.sleep(0.05)
+sys.stdout.write(json.dumps({"kind":"result","result":{"ok":True,"output":"d0d1d2","sessionId":"s1"}})+"\n")
+sys.stdout.flush()
+"""
+
+
+async def test_subprocess_stream_is_incremental() -> None:
+    cmd = [sys.executable, "-u", "-c", _EMITTER]
+    stamped = []
+    async for record in deliver_subprocess_stream(cmd, {}):
+        stamped.append((time.monotonic(), record))
+
+    kinds = [r["kind"] for _, r in stamped]
+    assert kinds == ["event", "event", "event", "result"], (
+        "events precede the single terminal result"
+    )
+    assert kinds.count("result") == 1, "exactly one terminal record"
+    # Incremental, not buffered-then-dumped: the first event lands well before the result.
+    first_event_t = stamped[0][0]
+    result_t = stamped[-1][0]
+    assert result_t - first_event_t >= 0.1, (
+        "records were spread out over time, not delivered in one batch"
+    )
+
+
+# Emits one event, then blocks for a long time. Closing the stream must kill it promptly.
+_HANGING_EMITTER = r"""
+import sys, time, json
+sys.stdout.write(json.dumps({"kind":"event","event":{"type":"message_delta","id":"m","delta":"x"}})+"\n")
+sys.stdout.flush()
+time.sleep(60)
+"""
+
+
+async def test_subprocess_stream_cancellation_kills_child() -> None:
+    cmd = [sys.executable, "-u", "-c", _HANGING_EMITTER]
+    agen = deliver_subprocess_stream(cmd, {})
+    first = await agen.__anext__()
+    assert first["kind"] == "event"
+
+    started = time.monotonic()
+    await agen.aclose()  # runs the finally: proc.kill() + await proc.wait()
+    elapsed = time.monotonic() - started
+    assert elapsed < 5, "aclose() killed the child instead of waiting out its 60s sleep"
+
+
+# --- Real cli.ts --stream boundary (integration) ----------------------------
+
+
+@pytest.mark.skipif(shutil.which("pnpm") is None, reason="pnpm not available")
+async def test_cli_stream_terminal_only_on_empty_request() -> None:
+    agent_dir = Path(__file__).resolve().parents[5] / "services" / "agent"
+    cmd = ["pnpm", "exec", "tsx", "src/cli.ts"]
+    records = []
+    async for record in deliver_subprocess_stream(cmd, {}, cwd=str(agent_dir)):
+        records.append(record)
+
+    # An empty request fails before any event, so the stream is exactly one result record.
+    assert len(records) == 1, records
+    assert records[0]["kind"] == "result"
+    assert records[0]["result"]["ok"] is False
+
+    # AgentRun surfaces that failure as a RuntimeError, just like the one-shot path.
+    run = AgentRun(deliver_subprocess_stream(cmd, {}, cwd=str(agent_dir)))
+    with pytest.raises(RuntimeError):
+        async for _ in run:
+            pass
diff --git a/sdks/python/oss/tests/pytest/integration/agents/__init__.py b/sdks/python/oss/tests/pytest/integration/agents/__init__.py
new file mode 100644
index 0000000000..de6d92eeaf
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/integration/agents/__init__.py
@@ -0,0 +1 @@
+# Integration tests for the agent runtime: the real wire + transport against a fake runner.
diff --git a/sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py b/sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py
new file mode 100644
index 0000000000..a73c30eecc
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py
@@ -0,0 +1,113 @@
+"""End-to-end through the real wire and transport, against a fake runner.
+
+This is the Python-only stand-in for a live ``/invoke``: a tiny script plays the runner,
+echoing the latest turn. The whole runtime path is real -- harness translation, the cold
+environment lifecycle, ``request_to_wire``, the subprocess transport, and ``result_from_wire``
+-- only the runner program (which would be the TS + Pi + LLM stack) is faked. So it catches
+serialization or transport drift that per-side unit tests cannot, with no TS and no LLM.
+"""
+
+from __future__ import annotations
+
+import sys
+
+import pytest
+
+from agenta.sdk.agents import (
+    AgentConfig,
+    Environment,
+    InProcessPiBackend,
+    Message,
+    PiHarness,
+    SessionConfig,
+)
+
+pytestmark = pytest.mark.integration
+
+
+# A runner that reads the /run request on stdin and echoes the latest user turn as a full
+# AgentRunResult on stdout (the camelCase wire shape result_from_wire parses).
+_ECHO_RUNNER = """
+import sys, json
+
+req = json.load(sys.stdin)
+text = ""
+for message in reversed(req.get("messages") or []):
+    if message.get("role") == "user":
+        content = message.get("content")
+        if isinstance(content, str):
+            text = content
+        else:
+            text = "".join(
+                block.get("text", "")
+                for block in content
+                if isinstance(block, dict) and block.get("type") == "text"
+            )
+        if text:
+            break
+
+out = {
+    "ok": True,
+    "output": "echo: " + text,
+    "messages": [{"role": "assistant", "content": "echo: " + text}],
+    "events": [
+        {"type": "message", "text": "echo: " + text},
+        {"type": "done", "stopReason": "end_turn"},
+    ],
+    "usage": {"input": 1, "output": 1, "total": 2, "cost": 0.0},
+    "stopReason": "end_turn",
+    "capabilities": {"textMessages": True, "mcpTools": False},
+    "sessionId": "sess-fake",
+    "model": req.get("model"),
+}
+sys.stdout.write(json.dumps(out))
+"""
+
+_FAIL_RUNNER = """
+import sys, json
+json.load(sys.stdin)
+sys.stdout.write(json.dumps({"ok": False, "error": "model exploded"}))
+"""
+
+_SILENT_RUNNER = """
+import sys, json
+json.load(sys.stdin)
+"""
+
+
+def _backend(tmp_path, body: str) -> InProcessPiBackend:
+    runner = tmp_path / "fake_runner.py"
+    runner.write_text(body, encoding="utf-8")
+    return InProcessPiBackend(command=[sys.executable, str(runner)], cwd=str(tmp_path))
+
+
+async def test_prompt_round_trips_through_the_real_transport(tmp_path):
+    harness = PiHarness(Environment(_backend(tmp_path, _ECHO_RUNNER)))
+    config = SessionConfig(agent=AgentConfig(instructions="hi", model="gpt-5.5"))
+
+    result = await harness.prompt(config, [Message(role="user", content="ping")])
+
+    # The runner saw the wired turn and model, and the result parsed back cleanly.
+    assert result.output == "echo: ping"
+    assert result.model == "gpt-5.5"
+    assert [e.type for e in result.events] == ["message", "done"]
+    assert result.capabilities is not None and result.capabilities.mcp_tools is False
+    # The session id is parsed and carried forward for a follow-up turn.
+    assert result.session_id == "sess-fake"
+    assert config.session_id == "sess-fake"
+
+
+async def test_runner_failure_surfaces_as_runtime_error(tmp_path):
+    harness = PiHarness(Environment(_backend(tmp_path, _FAIL_RUNNER)))
+    config = SessionConfig(agent=AgentConfig(instructions="hi"))
+
+    with pytest.raises(RuntimeError, match="model exploded"):
+        await harness.prompt(config, [Message(role="user", content="hi")])
+
+
+async def test_runner_empty_output_raises(tmp_path):
+    harness = PiHarness(Environment(_backend(tmp_path, _SILENT_RUNNER)))
+    config = SessionConfig(agent=AgentConfig(instructions="hi"))
+
+    with pytest.raises(RuntimeError, match="no output"):
+        await harness.prompt(config, [Message(role="user", content="hi")])
diff --git a/sdks/python/oss/tests/pytest/unit/agents/__init__.py b/sdks/python/oss/tests/pytest/unit/agents/__init__.py
new file mode 100644
index 0000000000..4db23c7442
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/__init__.py
@@ -0,0 +1 @@
+# Unit tests for the agent runtime (agenta.sdk.agents).
diff --git a/sdks/python/oss/tests/pytest/unit/agents/conftest.py b/sdks/python/oss/tests/pytest/unit/agents/conftest.py
new file mode 100644
index 0000000000..a434fdacc5
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/conftest.py
@@ -0,0 +1,198 @@
+"""Shared fakes and fixtures for the agent-runtime unit tests.
+
+The fakes implement the real ports (``Backend`` / ``Sandbox`` / ``Session`` from
+``agenta.sdk.agents.interfaces``) so the port contract keeps them honest: if a port grows an
+abstract method, the fake fails to instantiate and these tests flag that the fake needs
+updating. They record what they receive so a test can assert on lifecycle and translation
+without a runner, a sandbox, an LLM, or the network.
+
+Everything is exposed through fixtures because pytest's prepend import mode makes a plain
+``from .fakes import ...`` brittle across components; a fixture factory sidesteps that.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Mapping, Optional, Sequence
+
+import pytest
+
+from agenta.sdk.agents import (
+    AgentResult,
+    Environment,
+    HarnessType,
+)
+from agenta.sdk.agents.interfaces import Backend, Sandbox, Session
+from agenta.sdk.agents.streaming import AgentRun
+
+
+class FakeSandbox(Sandbox):
+    """Records provisioning and teardown."""
+
+    def __init__(self) -> None:
+        self.files: Dict[str, bytes] = {}
+        self.destroyed = False
+
+    async def add_files(self, files: Mapping[str, bytes]) -> None:
+        self.files.update(files)
+
+    async def destroy(self) -> None:
+        self.destroyed = True
+
+
+class FakeSession(Session):
+    """Returns a canned result, records prompts, and tracks teardown. Can be told to raise."""
+
+    def __init__(
+        self,
+        *,
+        result: AgentResult,
+        session_id: Optional[str] = None,
+        raise_on_prompt: bool = False,
+    ) -> None:
+        self._result = result
+        self._session_id = session_id
+        self._raise = raise_on_prompt
+        self.prompts: List[List[Any]] = []
+        self.destroyed = False
+
+    @property
+    def id(self) -> Optional[str]:
+        return self._session_id
+
+    async def prompt(self, messages, *, on_event=None) -> AgentResult:
+        self.prompts.append(list(messages))
+        if self._raise:
+            raise RuntimeError("boom from fake session")
+        if on_event:
+            for event in self._result.events:
+                on_event(event)
+        return self._result
+
+    def stream(self, messages) -> AgentRun:
+        # Mirror the runner's NDJSON stream: an event record per event, then one terminal
+        # result record (the shape `result_from_wire`/`AgentRun` expect).
+        self.prompts.append(list(messages))
+        result = self._result
+        raising = self._raise
+
+        async def _records():
+            if raising:
+                yield {
+                    "kind": "result",
+                    "result": {"ok": False, "error": "boom from fake session"},
+                }
+                return
+            for event in result.events:
+                yield {"kind": "event", "event": event.data}
+            yield {
+                "kind": "result",
+                "result": {
+                    "ok": True,
+                    "output": result.output,
+                    "sessionId": result.session_id,
+                },
+            }
+
+        return AgentRun(_records())
+
+    async def destroy(self) -> None:
+        self.destroyed = True
+
+
+class FakeBackend(Backend):
+    """A backend that hands out fakes and records every lifecycle call."""
+
+    def __init__(
+        self,
+        *,
+        supported: Sequence[HarnessType] = (HarnessType.PI, HarnessType.CLAUDE),
+        result: Optional[AgentResult] = None,
+        result_session_id: Optional[str] = None,
+        raise_on_prompt: bool = False,
+    ) -> None:
+        # Instance attribute shadows the ClassVar so `supports()` reflects this fake.
+        self.supported_harnesses = frozenset(supported)
+        self._result = result if result is not None else AgentResult(output="ok")
+        self._result_session_id = result_session_id
+        self._raise = raise_on_prompt
+        self.sandboxes: List[FakeSandbox] = []
+        self.sessions: List[FakeSession] = []
+        self.created_sessions: List[Dict[str, Any]] = []
+        self.setup_calls = 0
+        self.shutdown_calls = 0
+
+    async def setup(self) -> None:
+        self.setup_calls += 1
+
+    async def shutdown(self) -> None:
+        self.shutdown_calls += 1
+
+    async def create_sandbox(self) -> FakeSandbox:
+        sandbox = FakeSandbox()
+        self.sandboxes.append(sandbox)
+        return sandbox
+
+    async def create_session(
+        self,
+        sandbox,
+        config,
+        *,
+        harness,
+        secrets=None,
+        trace=None,
+        session_id=None,
+    ) -> FakeSession:
+        self.created_sessions.append(
+            {
+                "sandbox": sandbox,
+                "config": config,
+                "harness": harness,
+                "secrets": secrets,
+                "trace": trace,
+                "session_id": session_id,
+            }
+        )
+        session = FakeSession(
+            result=self._result,
+            session_id=self._result_session_id,
+            raise_on_prompt=self._raise,
+        )
+        self.sessions.append(session)
+        return session
+
+
+@pytest.fixture
+def make_backend():
+    """Factory returning a configured :class:`FakeBackend`."""
+
+    def _make(**kwargs) -> FakeBackend:
+        return FakeBackend(**kwargs)
+
+    return _make
+
+
+@pytest.fixture
+def make_env(make_backend):
+    """Factory returning an :class:`Environment` over a fresh :class:`FakeBackend`.
+
+    Returns the Environment; reach its backend via ``env.backend`` to assert on recordings.
+    """
+
+    def _make(*, sandbox_per_session: bool = True, **backend_kwargs) -> Environment:
+        backend = make_backend(**backend_kwargs)
+        return Environment(backend, sandbox_per_session=sandbox_per_session)
+
+    return _make
+
+
+@pytest.fixture
+def golden():
+    """Load a checked-in golden ``/run`` fixture (the cross-language wire contract anchor)."""
+    base = Path(__file__).parent / "golden"
+
+    def _load(name: str) -> Dict[str, Any]:
+        return json.loads((base / name).read_text(encoding="utf-8"))
+
+    return _load
diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json
new file mode 100644
index 0000000000..318722efe5
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json
@@ -0,0 +1,28 @@
+{
+  "backend": "rivet",
+  "harness": "claude",
+  "sandbox": "local",
+  "sessionId": null,
+  "agentsMd": "You are a helpful assistant.",
+  "model": "claude-sonnet-4-6",
+  "messages": [
+    {"role": "user", "content": "hi"}
+  ],
+  "secrets": {"ANTHROPIC_API_KEY": "sk-ant"},
+  "trace": null,
+  "tools": [],
+  "customTools": [
+    {
+      "name": "get_user",
+      "description": "Get a user",
+      "inputSchema": {"type": "object", "properties": {}},
+      "callRef": "tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn",
+      "kind": "callback"
+    }
+  ],
+  "toolCallback": {
+    "endpoint": "https://api.example/tools/call",
+    "authorization": "Access tok-123"
+  },
+  "permissionPolicy": "deny"
+}
diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json
new file mode 100644
index 0000000000..ebfb966479
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json
@@ -0,0 +1,36 @@
+{
+  "backend": "pi",
+  "harness": "pi",
+  "sandbox": "local",
+  "sessionId": "sess-1",
+  "agentsMd": "You are a helpful assistant.",
+  "model": "openai-codex/gpt-5.5",
+  "messages": [
+    {"role": "user", "content": "hi"}
+  ],
+  "secrets": {"OPENAI_API_KEY": "sk-test"},
+  "trace": {
+    "traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
+    "baggage": null,
+    "endpoint": "https://otlp.example/v1/traces",
+    "authorization": "Access tok-123",
+    "captureContent": true
+  },
+  "tools": ["read", "write"],
+  "customTools": [
+    {
+      "name": "get_user",
+      "description": "Get a user",
+      "inputSchema": {"type": "object", "properties": {}},
+      "callRef": "tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn",
+      "kind": "callback"
+    }
+  ],
+  "toolCallback": {
+    "endpoint": "https://api.example/tools/call",
+    "authorization": "Access tok-123"
+  },
+  "permissionPolicy": "auto",
+  "systemPrompt": "You are Pi.",
+  "appendSystemPrompt": "Be terse."
+}
diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json
new file mode 100644
index 0000000000..9791d5a4ea
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json
@@ -0,0 +1,4 @@
+{
+  "ok": false,
+  "error": "model exploded"
+}
diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json
new file mode 100644
index 0000000000..0943d2d047
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json
@@ -0,0 +1,31 @@
+{
+  "ok": true,
+  "output": "Hello!",
+  "messages": [
+    {"role": "assistant", "content": "Hello!"}
+  ],
+  "events": [
+    {"type": "message", "text": "Hello!"},
+    {"type": "usage", "input": 10, "output": 5, "total": 15, "cost": 0.001},
+    {"type": "done", "stopReason": "end_turn"},
+    {"text": "an event with no type, dropped on parse"}
+  ],
+  "usage": {"input": 10, "output": 5, "total": 15, "cost": 0.001},
+  "stopReason": "end_turn",
+  "capabilities": {
+    "textMessages": true,
+    "images": false,
+    "fileAttachments": false,
+    "mcpTools": true,
+    "toolCalls": true,
+    "reasoning": true,
+    "planMode": false,
+    "permissions": false,
+    "usage": true,
+    "streamingDeltas": false,
+    "sessionLifecycle": false
+  },
+  "sessionId": "sess-42",
+  "model": "gpt-5.5",
+  "traceId": "trace-abc"
+}
diff --git a/sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py b/sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py
@@ -0,0 +1 @@
+
diff --git a/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py b/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py
new file mode 100644
index 0000000000..a8a97ab6f0
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from typing import Mapping, Sequence
+
+import pytest
+from pydantic import ValidationError
+
+from agenta.sdk.agents.mcp import (
+    MCPResolver,
+    MCPServerConfig,
+    MissingMCPSecretError,
+)
+from agenta.sdk.agents.tools import MissingSecretPolicy
+
+
+class DictSecretProvider:
+    def __init__(self, values: Mapping[str, str]):
+        self.values = values
+
+    async def get_many(self, names: Sequence[str]) -> Mapping[str, str]:
+        return {name: self.values[name] for name in names if name in self.values}
+
+
+def test_transport_specific_fields_are_required():
+    with pytest.raises(ValidationError, match="requires command"):
+        MCPServerConfig(name="stdio")
+    with pytest.raises(ValidationError, match="requires url"):
+        MCPServerConfig(name="remote", transport="http")
+
+
+async def test_resolves_mcp_environment_in_sibling_subsystem():
+    servers = await MCPResolver(
+        secret_provider=DictSecretProvider({"github_pat": "ghp"})
+    ).resolve(
+        [
+            MCPServerConfig(
+                name="github",
+                command="npx",
+                env={"LOG": "info"},
+                secrets={"GITHUB_TOKEN": "github_pat"},
+            )
+        ]
+    )
+    assert servers[0].to_wire()["env"] == {
+        "LOG": "info",
+        "GITHUB_TOKEN": "ghp",
+    }
+
+
+async def test_missing_mcp_secret_is_explicit():
+    with pytest.raises(MissingMCPSecretError):
+        await MCPResolver(secret_provider=DictSecretProvider({})).resolve(
+            [
+                MCPServerConfig(
+                    name="github",
+                    command="npx",
+                    secrets={"GITHUB_TOKEN": "missing"},
+                )
+            ]
+        )
+
+
+async def test_mcp_compatibility_policy_can_omit_missing_secret():
+    servers = await MCPResolver(
+        secret_provider=DictSecretProvider({}),
+        missing_secret_policy=MissingSecretPolicy.OMIT,
+    ).resolve(
+        [
+            MCPServerConfig(
+                name="github",
+                command="npx",
+                secrets={"GITHUB_TOKEN": "missing"},
+            )
+        ]
+    )
+    assert "env" not in servers[0].to_wire()
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py
new file mode 100644
index 0000000000..f4bacd92d4
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py
@@ -0,0 +1,155 @@
+"""``AgentConfig.from_params`` (the three request shapes) and ``RunSelection.from_params``.
+
+The handler parses whatever the playground or a stored config sends into a neutral
+``AgentConfig`` plus a ``RunSelection``. This file locks the three accepted shapes, the
+defaults fall-through, the ``harness_options`` escape hatch, and the run-selection parsing.
+"""
+
+from __future__ import annotations
+
+from agenta.sdk.agents import (
+    AgentConfig,
+    BuiltinToolConfig,
+    RunSelection,
+)
+
+_DEFAULTS = AgentConfig(instructions="default-md", model="default-model", tools=["d"])
+
+
+# ----------------------------------------------------------- AgentConfig shapes
+
+
+def test_from_params_agent_element_shape():
+    config = AgentConfig.from_params(
+        {
+            "agent": {
+                "instructions": "I",
+                "model": "M",
+                "tools": [{"type": "builtin", "name": "read"}],
+                "harness_options": {"pi": {"system": "S"}},
+            }
+        },
+        defaults=_DEFAULTS,
+    )
+    assert config.instructions == "I"
+    assert config.model == "M"
+    assert config.tools == [BuiltinToolConfig(name="read")]
+    assert config.harness_options == {"pi": {"system": "S"}}
+
+
+def test_from_params_prompt_template_shape():
+    config = AgentConfig.from_params(
+        {
+            "prompt": {
+                "messages": [
+                    {"role": "system", "content": "You are helpful."},
+                    {"role": "user", "content": "ignored for instructions"},
+                ],
+                "llm_config": {"model": "M", "tools": ["t"]},
+            }
+        },
+        defaults=_DEFAULTS,
+    )
+    assert config.instructions == "You are helpful."  # system message -> instructions
+    assert config.model == "M"
+    assert config.tools == [BuiltinToolConfig(name="t")]
+
+
+def test_from_params_prompt_template_joins_multiple_system_messages():
+    config = AgentConfig.from_params(
+        {
+            "prompt": {
+                "messages": [
+                    {"role": "system", "content": "First."},
+                    {
+                        "role": "system",
+                        "content": [{"type": "text", "text": "Second."}],
+                    },
+                ],
+                "llm_config": {"model": "M"},
+            }
+        }
+    )
+    assert config.instructions == "First.\n\nSecond."
+
+
+def test_from_params_flat_shape():
+    config = AgentConfig.from_params(
+        {"model": "M", "agents_md": "A", "tools": [{"name": "x"}]},
+        defaults=_DEFAULTS,
+    )
+    assert config.instructions == "A"
+    assert config.model == "M"
+    assert config.tools == [BuiltinToolConfig(name="x")]
+
+
+def test_from_params_falls_back_to_defaults():
+    config = AgentConfig.from_params({}, defaults=_DEFAULTS)
+    assert config.instructions == "default-md"
+    assert config.model == "default-model"
+    assert config.tools == [BuiltinToolConfig(name="d")]
+
+
+def test_from_params_coerces_single_tool_dict_to_list():
+    config = AgentConfig.from_params({"agent": {"tools": {"name": "solo"}}})
+    assert config.tools == [BuiltinToolConfig(name="solo")]
+
+
+def test_harness_options_drops_malformed_and_lowercases_keys():
+    config = AgentConfig.from_params(
+        {
+            "agent": {
+                "harness_options": {
+                    "PI": {"system": "S"},  # key lower-cased
+                    "claude": "not a dict",  # dropped
+                }
+            }
+        }
+    )
+    assert config.harness_options == {"pi": {"system": "S"}}
+
+
+def test_harness_options_falls_back_to_defaults_when_absent():
+    defaults = AgentConfig(harness_options={"pi": {"system": "D"}})
+    config = AgentConfig.from_params(
+        {"agent": {"instructions": "I"}}, defaults=defaults
+    )
+    assert config.harness_options == {"pi": {"system": "D"}}
+
+
+# -------------------------------------------------------------- RunSelection
+
+
+def test_run_selection_defaults():
+    sel = RunSelection.from_params({})
+    assert (sel.harness, sel.sandbox, sel.permission_policy) == ("pi", "local", "auto")
+
+
+def test_run_selection_reads_agent_subdict_and_lowercases():
+    sel = RunSelection.from_params(
+        {
+            "agent": {
+                "harness": "Claude",
+                "sandbox": "Daytona",
+                "permission_policy": "Deny",
+            }
+        }
+    )
+    assert (sel.harness, sel.sandbox, sel.permission_policy) == (
+        "claude",
+        "daytona",
+        "deny",
+    )
+
+
+def test_run_selection_honors_custom_defaults():
+    sel = RunSelection.from_params(
+        {}, default_harness="claude", default_sandbox="daytona"
+    )
+    assert sel.harness == "claude"
+    assert sel.sandbox == "daytona"
+
+
+def test_run_selection_reads_flat_request():
+    sel = RunSelection.from_params({"harness": "claude"})
+    assert sel.harness == "claude"
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py
new file mode 100644
index 0000000000..5d6ce90e8c
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py
@@ -0,0 +1,81 @@
+"""Capabilities, events, and the small cross-boundary DTOs.
+
+Capabilities are what lets adapters branch on a flag instead of the harness name, so their
+camelCase parsing is contract-critical. Events feed tracing; the trace/tool-callback DTOs
+plumb the run into Agenta.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agenta.sdk.agents import (
+    AgentEvent,
+    HarnessCapabilities,
+    HarnessType,
+    ToolCallback,
+    TraceContext,
+)
+
+
+def test_capabilities_none_and_non_dict_pass_through_as_none():
+    assert HarnessCapabilities.from_wire(None) is None
+    assert HarnessCapabilities.from_wire("nope") is None
+
+
+def test_capabilities_defaults_text_messages_true():
+    caps = HarnessCapabilities.from_wire({})
+    assert caps is not None
+    assert caps.text_messages is True  # the one flag that defaults on
+    assert caps.mcp_tools is False
+    assert caps.images is False
+
+
+def test_capabilities_map_camelcase_flags():
+    caps = HarnessCapabilities.from_wire(
+        {"mcpTools": True, "fileAttachments": True, "sessionLifecycle": True}
+    )
+    assert caps.mcp_tools is True
+    assert caps.file_attachments is True
+    assert caps.session_lifecycle is True
+
+
+def test_agent_event_requires_type():
+    assert AgentEvent.from_wire({"text": "no type"}) is None
+    assert AgentEvent.from_wire({"type": ""}) is None  # falsy type
+    assert AgentEvent.from_wire("not a dict") is None
+
+
+def test_agent_event_keeps_full_payload_in_data():
+    event = AgentEvent.from_wire(
+        {"type": "tool_call", "name": "search", "input": {"q": "x"}}
+    )
+    assert event.type == "tool_call"
+    # `data` carries the rest verbatim, including the type key.
+    assert event.data == {"type": "tool_call", "name": "search", "input": {"q": "x"}}
+
+
+def test_trace_context_to_wire_emits_all_keys_camelcase():
+    wire = TraceContext(traceparent="tp", endpoint="ep").to_wire()
+    assert wire == {
+        "traceparent": "tp",
+        "baggage": None,
+        "endpoint": "ep",
+        "authorization": None,
+        "captureContent": True,  # defaults on, camelCase
+    }
+
+
+def test_tool_callback_to_wire():
+    assert ToolCallback(endpoint="e", authorization="a").to_wire() == {
+        "endpoint": "e",
+        "authorization": "a",
+    }
+
+
+def test_harness_type_coerce():
+    assert HarnessType.coerce(HarnessType.PI) is HarnessType.PI
+    assert HarnessType.coerce("PI") is HarnessType.PI  # case-insensitive
+    assert HarnessType.coerce("claude") is HarnessType.CLAUDE
+    with pytest.raises(ValueError):
+        HarnessType.coerce("bogus")
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py
new file mode 100644
index 0000000000..5c8ba74ade
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py
@@ -0,0 +1,90 @@
+"""Content blocks and messages: loose-input coercion and wire serialization.
+
+The playground sends loose dicts and bare strings; the runtime coerces them and emits
+camelCase on the wire. These round-trips lock that coercion.
+"""
+
+from __future__ import annotations
+
+from agenta.sdk.agents import ContentBlock, Message, to_messages
+
+
+def test_content_block_from_string():
+    block = ContentBlock.from_raw("hello")
+    assert block.type == "text"
+    assert block.text == "hello"
+
+
+def test_content_block_from_dict_accepts_both_mime_spellings():
+    camel = ContentBlock.from_raw(
+        {"type": "image", "data": "b64", "mimeType": "image/png"}
+    )
+    snake = ContentBlock.from_raw(
+        {"type": "image", "data": "b64", "mime_type": "image/png"}
+    )
+    assert camel.mime_type == "image/png"
+    assert snake.mime_type == "image/png"
+
+
+def test_content_block_passthrough_and_fallback():
+    existing = ContentBlock(type="text", text="x")
+    assert ContentBlock.from_raw(existing) is existing
+    # A non-string, non-dict value stringifies into a text block.
+    assert ContentBlock.from_raw(42).text == "42"
+
+
+def test_content_block_to_wire_omits_none_and_uses_camelcase():
+    block = ContentBlock(type="image", data="b64", mime_type="image/png")
+    wire = block.to_wire()
+    assert wire == {"type": "image", "data": "b64", "mimeType": "image/png"}
+    assert "text" not in wire  # None fields are omitted
+
+
+def test_text_block_round_trips():
+    assert ContentBlock(type="text", text="hi").to_wire() == {
+        "type": "text",
+        "text": "hi",
+    }
+
+
+def test_message_from_raw_requires_role():
+    assert Message.from_raw({"content": "no role"}) is None
+    assert Message.from_raw("not a dict") is None
+    msg = Message.from_raw({"role": "user", "content": "hi"})
+    assert msg is not None and msg.role == "user" and msg.content == "hi"
+
+
+def test_message_from_raw_coerces_block_list():
+    msg = Message.from_raw(
+        {"role": "user", "content": [{"type": "text", "text": "a"}, "b"]}
+    )
+    assert isinstance(msg.content, list)
+    assert [b.text for b in msg.content] == ["a", "b"]
+
+
+def test_message_to_wire_string_and_blocks():
+    assert Message(role="user", content="hi").to_wire() == {
+        "role": "user",
+        "content": "hi",
+    }
+    blocks = Message(role="user", content=[ContentBlock(type="text", text="a")])
+    assert blocks.to_wire() == {
+        "role": "user",
+        "content": [{"type": "text", "text": "a"}],
+    }
+
+
+def test_to_messages_filters_invalid_entries():
+    messages = to_messages(
+        [
+            {"role": "user", "content": "hi"},
+            {"content": "no role"},  # dropped
+            None,  # dropped
+            {"role": "assistant", "content": "yo"},
+        ]
+    )
+    assert [m.role for m in messages] == ["user", "assistant"]
+
+
+def test_to_messages_handles_none():
+    assert to_messages(None) == []
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py
new file mode 100644
index 0000000000..1d53c8f469
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py
@@ -0,0 +1,99 @@
+"""Per-harness configs: how each shapes its own tool/prompt fields for the ``/run`` payload.
+
+These are the per-harness halves of the wire contract. ``test_wire_contract`` checks the full
+payload against the golden; this file pins each config's contribution in isolation so a failure
+points straight at the harness whose shape changed.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agenta.sdk.agents import (
+    ClaudeAgentConfig,
+    ClientToolSpec,
+    HarnessAgentConfig,
+    PiAgentConfig,
+    ToolCallback,
+)
+
+_CALLBACK = ToolCallback(endpoint="https://api.example/tools/call", authorization="A")
+
+
+def test_pi_wire_tools_is_native_and_never_gates():
+    config = PiAgentConfig(
+        builtin_tools=["read"],
+        tool_specs=[
+            ClientToolSpec(
+                name="t",
+                description="t",
+            )
+        ],
+        tool_callback=_CALLBACK,
+    )
+    assert config.wire_tools() == {
+        "tools": ["read"],
+        "customTools": [
+            {
+                "name": "t",
+                "description": "t",
+                "inputSchema": {"type": "object", "properties": {}},
+                "kind": "client",
+            }
+        ],
+        "toolCallback": {
+            "endpoint": "https://api.example/tools/call",
+            "authorization": "A",
+        },
+        "permissionPolicy": "auto",  # Pi never gates tool use
+    }
+
+
+def test_pi_wire_tools_without_callback():
+    assert PiAgentConfig().wire_tools()["toolCallback"] is None
+
+
+def test_pi_wire_prompt_emits_only_set_overrides():
+    assert PiAgentConfig().wire_prompt() == {}
+    assert PiAgentConfig(system="s").wire_prompt() == {"systemPrompt": "s"}
+    assert PiAgentConfig(append_system="a").wire_prompt() == {"appendSystemPrompt": "a"}
+    assert PiAgentConfig(system="", append_system="a").wire_prompt() == {
+        "systemPrompt": "",  # an explicit empty string is still an override here
+        "appendSystemPrompt": "a",
+    }
+
+
+def test_claude_wire_tools_has_no_builtins_and_carries_policy():
+    config = ClaudeAgentConfig(
+        tool_specs=[
+            ClientToolSpec(
+                name="t",
+                description="t",
+            )
+        ],
+        tool_callback=_CALLBACK,
+        permission_policy="deny",
+    )
+    wire = config.wire_tools()
+    assert wire["tools"] == []  # Claude has no Pi built-ins
+    assert wire["customTools"] == [
+        {
+            "name": "t",
+            "description": "t",
+            "inputSchema": {"type": "object", "properties": {}},
+            "kind": "client",
+        }
+    ]
+    assert wire["permissionPolicy"] == "deny"
+
+
+def test_claude_defaults_to_auto_policy_and_empty_prompt():
+    assert ClaudeAgentConfig().wire_tools()["permissionPolicy"] == "auto"
+    assert ClaudeAgentConfig().wire_prompt() == {}  # Claude exposes no prompt overrides
+
+
+def test_base_config_wire_tools_is_abstract():
+    # The base class does not know any engine's tool shape.
+    with pytest.raises(NotImplementedError):
+        HarnessAgentConfig().wire_tools()
+    assert HarnessAgentConfig().wire_prompt() == {}
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py b/sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py
new file mode 100644
index 0000000000..c84761885f
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py
@@ -0,0 +1,127 @@
+"""Environment sandbox policy and the cold ``Harness.prompt`` lifecycle.
+
+These lock the isolation guarantees the design docs promise: a fresh sandbox per session
+under the cold model, the session torn down in a ``finally`` even when the turn raises, the
+session id carried forward, and AGENTS.md provisioned only when there are instructions.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agenta.sdk.agents import (
+    AgentConfig,
+    AgentResult,
+    HarnessType,
+    Message,
+    PiHarness,
+    SessionConfig,
+)
+
+
+def _config(instructions="hi") -> SessionConfig:
+    return SessionConfig(agent=AgentConfig(instructions=instructions, model="m"))
+
+
+# ------------------------------------------------------------- Environment policy
+
+
+async def test_fresh_sandbox_per_session(make_env):
+    env = make_env(sandbox_per_session=True)
+    config = _config()
+
+    await env.create_session(
+        PiHarness(env)._to_harness_config(config),
+        harness=HarnessType.PI,
+        session_config=config,
+    )
+    await env.create_session(
+        PiHarness(env)._to_harness_config(config),
+        harness=HarnessType.PI,
+        session_config=config,
+    )
+
+    assert len(env.backend.sandboxes) == 2  # a new sandbox each time (cold model)
+
+
+async def test_shared_sandbox_when_not_per_session(make_env):
+    env = make_env(sandbox_per_session=False)
+    config = _config()
+
+    for _ in range(2):
+        await env.create_session(
+            PiHarness(env)._to_harness_config(config),
+            harness=HarnessType.PI,
+            session_config=config,
+        )
+
+    assert len(env.backend.sandboxes) == 1  # one sandbox reused
+    await env.shutdown()
+    assert env.backend.sandboxes[0].destroyed is True  # shutdown tears it down
+    assert env.backend.shutdown_calls == 1
+
+
+async def test_provisioning_writes_agents_md_only_when_present(make_env):
+    env = make_env()
+    harness = PiHarness(env)
+
+    assert harness._provisioning(_config("hello")) == {"AGENTS.md": b"hello"}
+    assert harness._provisioning(_config("")) == {}
+    assert harness._provisioning(_config("   ")) == {}
+    assert harness._provisioning(_config(None)) == {}
+
+
+async def test_create_session_adds_files_when_provisioned(make_env):
+    env = make_env()
+    config = _config("project conventions")
+
+    await PiHarness(env).create_session(config)
+
+    assert env.backend.sandboxes[0].files == {"AGENTS.md": b"project conventions"}
+
+
+# ------------------------------------------------------- Cold Harness.prompt path
+
+
+async def test_prompt_runs_and_tears_down(make_env):
+    env = make_env(result=AgentResult(output="done"))
+    harness = PiHarness(env)
+
+    result = await harness.prompt(_config(), [Message(role="user", content="hi")])
+
+    assert result.output == "done"
+    assert env.backend.sessions[0].destroyed is True  # torn down on the happy path
+
+
+async def test_prompt_destroys_session_even_when_it_raises(make_env):
+    env = make_env(raise_on_prompt=True)
+    harness = PiHarness(env)
+
+    with pytest.raises(RuntimeError, match="boom"):
+        await harness.prompt(_config(), [Message(role="user", content="hi")])
+
+    assert env.backend.sessions[0].destroyed is True  # finally still runs
+
+
+async def test_prompt_carries_session_id_forward(make_env):
+    env = make_env(
+        result=AgentResult(output="x", session_id="sess-new"),
+        result_session_id="sess-new",
+    )
+    harness = PiHarness(env)
+    config = _config()
+
+    await harness.prompt(config, [Message(role="user", content="hi")])
+
+    assert config.session_id == "sess-new"  # next turn can resume it
+
+
+async def test_prompt_leaves_session_id_when_result_has_none(make_env):
+    env = make_env(result=AgentResult(output="x", session_id=None))
+    harness = PiHarness(env)
+    config = _config()
+    config.session_id = "prior"
+
+    await harness.prompt(config, [Message(role="user", content="hi")])
+
+    assert config.session_id == "prior"  # unchanged
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
new file mode 100644
index 0000000000..7e68d3af93
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
@@ -0,0 +1,273 @@
+"""Harness adapters: the neutral ``SessionConfig`` -> per-harness config translation.
+
+Pi and Claude genuinely differ (Pi takes built-ins and never gates tool use; Claude has no
+built-ins, delivers tools over MCP, and gates on a permission policy). Agenta is Pi with a
+fixed opinion: a forced preamble, persona, tools, and skills. These tests lock that the
+translation honors those differences and that ``make_harness`` validates support.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agenta.sdk.agents import (
+    AgentaAgentConfig,
+    AgentaHarness,
+    AgentConfig,
+    ClaudeAgentConfig,
+    ClaudeHarness,
+    ClientToolSpec,
+    HarnessType,
+    PiAgentConfig,
+    PiHarness,
+    SessionConfig,
+    ToolCallback,
+    UnsupportedHarnessError,
+    make_harness,
+)
+from agenta.sdk.agents.adapters import harnesses
+from agenta.sdk.agents.adapters.agenta_builtins import (
+    AGENTA_FORCED_APPEND_SYSTEM,
+    AGENTA_FORCED_SKILLS,
+    AGENTA_FORCED_TOOLS,
+    AGENTA_PREAMBLE,
+)
+from agenta.sdk.agents.adapters.harnesses import _normalize_tool_specs, _opt_str
+
+_CALLBACK = ToolCallback(endpoint="https://api.example/tools/call", authorization=None)
+
+
+def _session_config(**kwargs) -> SessionConfig:
+    agent = kwargs.pop("agent", AgentConfig(instructions="hi", model="m"))
+    return SessionConfig(agent=agent, **kwargs)
+
+
+# --------------------------------------------------------------------------- Pi
+
+
+def test_pi_keeps_builtins_and_native_tools(make_env):
+    harness = PiHarness(make_env(supported=[HarnessType.PI]))
+    config = _session_config(
+        builtin_tools=["read", "write"],
+        custom_tools=[{"name": "t", "callRef": "ref"}],
+        tool_callback=_CALLBACK,
+    )
+
+    result = harness._to_harness_config(config)
+
+    assert isinstance(result, PiAgentConfig)
+    assert result.builtin_tools == ["read", "write"]
+    assert result.custom_tools[0]["name"] == "t"
+    assert result.tool_callback is _CALLBACK
+    assert result.agents_md == "hi"
+    assert result.model == "m"
+
+
+def test_pi_reads_its_harness_options_slice(make_env):
+    harness = PiHarness(make_env(supported=[HarnessType.PI]))
+    agent = AgentConfig(
+        instructions="hi",
+        harness_options={
+            "pi": {"system": "You are Pi.", "append_system": "Be terse."},
+            "claude": {"system": "ignored for Pi"},
+        },
+    )
+    config = _session_config(agent=agent)
+
+    result = harness._to_harness_config(config)
+
+    assert result.system == "You are Pi."
+    assert result.append_system == "Be terse."
+    # The Pi prompt overrides reach the wire.
+    assert result.wire_prompt() == {
+        "systemPrompt": "You are Pi.",
+        "appendSystemPrompt": "Be terse.",
+    }
+
+
+def test_pi_drops_blank_harness_options(make_env):
+    harness = PiHarness(make_env(supported=[HarnessType.PI]))
+    agent = AgentConfig(
+        instructions="hi",
+        harness_options={"pi": {"system": "   ", "append_system": ""}},
+    )
+
+    result = harness._to_harness_config(_session_config(agent=agent))
+
+    assert result.system is None
+    assert result.append_system is None
+    assert result.wire_prompt() == {}
+
+
+# ------------------------------------------------------------------------- Agenta
+
+
+def test_agenta_forces_skills_tools_preamble_and_persona(make_env):
+    harness = AgentaHarness(make_env(supported=[HarnessType.AGENTA]))
+    config = _session_config(
+        agent=AgentConfig(instructions="My project rules.", model="m"),
+        builtin_tools=["web_search"],
+        custom_tools=[{"name": "t", "callRef": "ref"}],
+        tool_callback=_CALLBACK,
+    )
+
+    result = harness._to_harness_config(config)
+
+    assert isinstance(result, AgentaAgentConfig)
+    # AGENTS.md is the base preamble with the author's instructions appended after it.
+    assert result.agents_md.startswith(AGENTA_PREAMBLE)
+    assert result.agents_md.endswith("My project rules.")
+    # Forced tools are unioned in (and `read` is present so Pi renders the skills section).
+    for forced in AGENTA_FORCED_TOOLS:
+        assert forced in result.builtin_tools
+    assert "web_search" in result.builtin_tools
+    assert "read" in result.builtin_tools
+    # Forced skills ride the config and reach the wire.
+    assert result.skills == list(AGENTA_FORCED_SKILLS)
+    assert result.wire_tools()["skills"] == list(AGENTA_FORCED_SKILLS)
+    # The persona is forced onto append_system; custom tools and callback pass through.
+    assert result.append_system.startswith(AGENTA_FORCED_APPEND_SYSTEM)
+    assert result.custom_tools[0]["name"] == "t"
+    assert result.tool_callback is _CALLBACK
+
+
+def test_agenta_forces_tools_without_duplicates(make_env):
+    harness = AgentaHarness(make_env(supported=[HarnessType.AGENTA]))
+    # `read` already configured: it must not be duplicated when forced.
+    config = _session_config(builtin_tools=["read"])
+
+    result = harness._to_harness_config(config)
+
+    assert result.builtin_tools.count("read") == 1
+
+
+def test_agenta_passes_through_user_pi_options(make_env):
+    harness = AgentaHarness(make_env(supported=[HarnessType.AGENTA]))
+    agent = AgentConfig(
+        instructions="hi",
+        harness_options={"pi": {"system": "You are Pi.", "append_system": "Be terse."}},
+    )
+
+    result = harness._to_harness_config(_session_config(agent=agent))
+
+    # `system` passes through; the author's `append_system` is appended after the forced persona.
+    assert result.system == "You are Pi."
+    assert result.append_system.startswith(AGENTA_FORCED_APPEND_SYSTEM)
+    assert result.append_system.endswith("Be terse.")
+
+
+def test_agenta_is_in_process_pi_supported():
+    from agenta.sdk.agents import InProcessPiBackend
+
+    assert InProcessPiBackend().supports(HarnessType.AGENTA)
+
+
+# ------------------------------------------------------------------------- Claude
+
+
+def test_claude_drops_builtins_and_warns(make_env, monkeypatch):
+    recorded = []
+    monkeypatch.setattr(
+        harnesses,
+        "log",
+        type("L", (), {"warning": lambda self, *a, **k: recorded.append(a)})(),
+    )
+    harness = ClaudeHarness(make_env(supported=[HarnessType.CLAUDE]))
+    config = _session_config(
+        builtin_tools=["read"],
+        custom_tools=[{"name": "t", "callRef": "ref"}],
+        permission_policy="deny",
+    )
+
+    result = harness._to_harness_config(config)
+
+    assert isinstance(result, ClaudeAgentConfig)
+    assert not hasattr(result, "builtin_tools")  # Claude has no built-in tools at all
+    assert result.custom_tools[0]["name"] == "t"
+    assert result.permission_policy == "deny"  # Claude carries the policy
+    assert recorded, "expected a warning when built-ins are dropped"
+
+
+def test_claude_no_warning_without_builtins(make_env, monkeypatch):
+    recorded = []
+    monkeypatch.setattr(
+        harnesses,
+        "log",
+        type("L", (), {"warning": lambda self, *a, **k: recorded.append(a)})(),
+    )
+    harness = ClaudeHarness(make_env(supported=[HarnessType.CLAUDE]))
+
+    harness._to_harness_config(_session_config(permission_policy="auto"))
+
+    assert recorded == []
+
+
+# --------------------------------------------------------------- _normalize_tool_specs
+
+
+def test_compat_normalize_tool_specs_returns_typed_specs():
+    specs = [
+        {"name": "keep", "callRef": "r1"},  # missing description + inputSchema
+        {
+            "name": "full",
+            "description": "d",
+            "inputSchema": {"type": "object", "properties": {"x": {}}},
+            "callRef": "r2",
+        },
+    ]
+
+    out = _normalize_tool_specs(specs)
+
+    assert [spec.name for spec in out] == ["keep", "full"]
+    # description falls back to the name; inputSchema falls back to an empty object schema.
+    assert out[0].description == "keep"
+    assert out[0].input_schema == {"type": "object", "properties": {}}
+    assert out[0].call_ref == "r1"
+    # provided values are preserved.
+    assert out[1].description == "d"
+    assert out[1].input_schema["properties"] == {"x": {}}
+
+
+def test_harness_accepts_typed_tool_specs_without_normalizing_dicts(make_env):
+    harness = PiHarness(make_env(supported=[HarnessType.PI]))
+    spec = ClientToolSpec(name="pick", description="Pick")
+    result = harness._to_harness_config(_session_config(tool_specs=[spec]))
+    assert result.tool_specs == [spec]
+
+
+def test_normalize_tool_specs_empty():
+    assert _normalize_tool_specs([]) == []
+    assert _normalize_tool_specs(None) == []
+
+
+def test_opt_str_keeps_only_nonempty_strings():
+    assert _opt_str("hi") == "hi"
+    assert _opt_str("  ") is None
+    assert _opt_str("") is None
+    assert _opt_str(None) is None
+    assert _opt_str(123) is None
+
+
+# -------------------------------------------------------------------- make_harness
+
+
+def test_make_harness_maps_string_to_class(make_env):
+    env = make_env(supported=[HarnessType.PI, HarnessType.CLAUDE, HarnessType.AGENTA])
+    assert isinstance(make_harness("pi", env), PiHarness)
+    assert isinstance(make_harness("PI", env), PiHarness)  # coerced, case-insensitive
+    assert isinstance(make_harness("claude", env), ClaudeHarness)
+    assert isinstance(make_harness(HarnessType.CLAUDE, env), ClaudeHarness)
+    assert isinstance(make_harness("agenta", env), AgentaHarness)
+    assert isinstance(make_harness(HarnessType.AGENTA, env), AgentaHarness)
+
+
+def test_make_harness_unsupported_backend_raises(make_env):
+    env = make_env(supported=[HarnessType.PI])  # backend cannot drive Claude
+    with pytest.raises(UnsupportedHarnessError):
+        make_harness("claude", env)
+
+
+def test_make_harness_unknown_name_raises(make_env):
+    env = make_env(supported=[HarnessType.PI])
+    with pytest.raises(ValueError):
+        make_harness("bogus", env)
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py b/sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py
new file mode 100644
index 0000000000..f7cce7d31c
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py
@@ -0,0 +1,430 @@
+"""Tests for the Vercel UI message adapter, the ``/messages`` egress adapter between the
+Vercel ``UIMessage`` shape and the neutral runtime types.
+
+Three directions:
+
+- ``vercel_ui_messages_to_messages`` — inbound parts -> ``Message``; tool/approval parts are
+  preserved as structured ``tool_call`` / ``tool_result`` content blocks.
+- ``message_to_vercel_ui_message`` — outbound ``AgentResult`` / ``Message`` -> one
+  ``UIMessage`` dict.
+- ``agent_run_to_vercel_parts`` — a live ``AgentRun`` -> Vercel UI Message Stream parts.
+
+The stream tests fabricate an ``AgentRun`` from a fixed record list (the same trick
+``test_streaming.py`` uses), so they are pure and need no backend.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+from agenta.sdk.agents import AgentRun, AgentResult, Message
+from agenta.sdk.agents.adapters.vercel import (
+    agent_run_to_vercel_parts,
+    message_to_vercel_ui_message,
+    vercel_ui_messages_to_messages,
+)
+
+
+async def _from_list(records: List[Dict[str, Any]]):
+    for record in records:
+        yield record
+
+
+def _run(events: List[Dict[str, Any]], result: Dict[str, Any]) -> AgentRun:
+    """An ``AgentRun`` over fabricated live events plus a terminal result record."""
+    records = [{"kind": "event", "event": e} for e in events]
+    records.append({"kind": "result", "result": {"ok": True, **result}})
+    return AgentRun(_from_list(records))
+
+
+async def _collect(run: AgentRun, **kwargs) -> List[Dict[str, Any]]:
+    return [part async for part in agent_run_to_vercel_parts(run, **kwargs)]
+
+
+# ---------------------------------------------------------------------------
+# vercel_ui_messages_to_messages
+# ---------------------------------------------------------------------------
+
+
+class TestFromUIMessages:
+    def test_all_text_message_collapses_to_string(self):
+        msgs = vercel_ui_messages_to_messages(
+            [{"id": "m1", "role": "user", "parts": [{"type": "text", "text": "hi"}]}]
+        )
+        assert len(msgs) == 1
+        assert msgs[0].role == "user"
+        assert msgs[0].content == "hi"
+
+    def test_file_part_becomes_image_or_resource_block(self):
+        msgs = vercel_ui_messages_to_messages(
+            [
+                {
+                    "id": "m1",
+                    "role": "user",
+                    "parts": [
+                        {"type": "text", "text": "look:"},
+                        {"type": "file", "url": "data:...", "mediaType": "image/png"},
+                    ],
+                }
+            ]
+        )
+        blocks = msgs[0].content
+        assert [b.type for b in blocks] == ["text", "image"]
+        assert blocks[1].uri == "data:..."
+        assert blocks[1].mime_type == "image/png"
+
+    def test_tool_part_is_preserved_as_structured_blocks(self):
+        # A resolved tool part -> a tool_call block plus a tool_result block, keyed by
+        # toolCallId, with the field names the runner transcript renders.
+        msgs = vercel_ui_messages_to_messages(
+            [
+                {
+                    "id": "m2",
+                    "role": "assistant",
+                    "parts": [
+                        {
+                            "type": "tool-getWeather",
+                            "toolCallId": "call_1",
+                            "state": "output-available",
+                            "input": {"city": "Paris"},
+                            "output": {"weather": "sunny"},
+                        }
+                    ],
+                }
+            ]
+        )
+        wire = [b.to_wire() for b in msgs[0].content]
+        assert wire == [
+            {
+                "type": "tool_call",
+                "toolCallId": "call_1",
+                "toolName": "getWeather",
+                "input": {"city": "Paris"},
+            },
+            {
+                "type": "tool_result",
+                "toolCallId": "call_1",
+                "toolName": "getWeather",
+                "output": {"weather": "sunny"},
+                "isError": False,
+            },
+        ]
+
+    def test_tool_error_part_sets_is_error(self):
+        msgs = vercel_ui_messages_to_messages(
+            [
+                {
+                    "id": "m2",
+                    "role": "assistant",
+                    "parts": [
+                        {
+                            "type": "tool-getWeather",
+                            "toolCallId": "call_1",
+                            "state": "output-error",
+                            "input": {"city": "Paris"},
+                            "errorText": "boom",
+                        }
+                    ],
+                }
+            ]
+        )
+        result_block = msgs[0].content[1]
+        assert result_block.type == "tool_result"
+        assert result_block.is_error is True
+        assert result_block.output == "boom"
+
+    def test_approval_response_becomes_tool_result_keyed_by_call_id(self):
+        # The cross-turn HITL reply: a tool_result keyed by toolCallId so the runtime resumes.
+        msgs = vercel_ui_messages_to_messages(
+            [
+                {
+                    "id": "m3",
+                    "role": "user",
+                    "parts": [
+                        {
+                            "type": "tool-approval-response",
+                            "toolCallId": "call_1",
+                            "approved": True,
+                        }
+                    ],
+                }
+            ]
+        )
+        block = msgs[0].content[0]
+        assert block.type == "tool_result"
+        assert block.tool_call_id == "call_1"
+        assert block.output == {"approved": True}
+
+    def test_approval_request_part_is_dropped_on_replay(self):
+        # The server's own request, echoed back; regenerated on replay, not model input.
+        msgs = vercel_ui_messages_to_messages(
+            [
+                {
+                    "id": "m4",
+                    "role": "assistant",
+                    "parts": [
+                        {"type": "tool-approval-request", "approvalId": "p1"},
+                        {"type": "text", "text": "thinking"},
+                    ],
+                }
+            ]
+        )
+        assert msgs[0].content == "thinking"
+
+    def test_plain_role_content_message_still_parses(self):
+        # A non-parts {role, content} message in a mixed history falls back cleanly.
+        msgs = vercel_ui_messages_to_messages([{"role": "user", "content": "hello"}])
+        assert msgs[0].content == "hello"
+
+
+# ---------------------------------------------------------------------------
+# message_to_vercel_ui_message
+# ---------------------------------------------------------------------------
+
+
+class TestToUIMessage:
+    def test_agent_result_becomes_assistant_text_message(self):
+        ui = message_to_vercel_ui_message(AgentResult(output="Paris."), message_id="m9")
+        assert ui == {
+            "id": "m9",
+            "role": "assistant",
+            "parts": [{"type": "text", "text": "Paris."}],
+        }
+
+    def test_message_with_tool_blocks_round_trips_to_parts(self):
+        from agenta.sdk.agents import ContentBlock
+
+        msg = Message(
+            role="assistant",
+            content=[
+                ContentBlock(
+                    type="tool_call",
+                    tool_call_id="c1",
+                    tool_name="getWeather",
+                    input={"city": "Paris"},
+                ),
+            ],
+        )
+        ui = message_to_vercel_ui_message(msg)
+        assert ui["role"] == "assistant"
+        assert ui["parts"][0]["type"] == "tool-getWeather"
+        assert ui["parts"][0]["toolCallId"] == "c1"
+
+
+# ---------------------------------------------------------------------------
+# agent_run_to_vercel_parts
+# ---------------------------------------------------------------------------
+
+
+class TestUIMessageStream:
+    async def test_full_turn_part_order(self):
+        run = _run(
+            events=[
+                {
+                    "type": "tool_call",
+                    "id": "call_1",
+                    "name": "getWeather",
+                    "input": {"city": "Paris"},
+                },
+                {
+                    "type": "tool_result",
+                    "id": "call_1",
+                    "output": "sunny",
+                    "data": {"w": "sunny"},
+                },
+                {"type": "message_start", "id": "t1"},
+                {"type": "message_delta", "id": "t1", "delta": "It is sunny."},
+                {"type": "message_end", "id": "t1"},
+                {"type": "usage", "input": 820, "output": 36, "cost": 0.004},
+                {"type": "done", "stopReason": "end_turn"},
+            ],
+            result={"output": "It is sunny.", "sessionId": "sess_123"},
+        )
+        parts = await _collect(run, session_id="sess_123")
+
+        assert [p["type"] for p in parts] == [
+            "start",
+            "start-step",
+            "tool-input-start",
+            "tool-input-available",
+            "tool-output-available",
+            "text-start",
+            "text-delta",
+            "text-end",
+            "finish-step",
+            "finish",
+        ]
+        # start carries the session id; tool output prefers the structured `data`.
+        assert parts[0]["messageMetadata"] == {"sessionId": "sess_123"}
+        assert parts[4]["output"] == {"w": "sunny"}
+        # finish carries the usage and the stop reason.
+        assert parts[-1]["finishReason"] == "end_turn"
+        assert parts[-1]["messageMetadata"]["usage"] == {
+            "input": 820,
+            "output": 36,
+            "cost": 0.004,
+        }
+
+    async def test_usage_falls_back_to_terminal_result(self):
+        run = _run(
+            events=[
+                {"type": "message", "text": "hi"},
+                {"type": "done", "stopReason": "end_turn"},
+            ],
+            result={"output": "hi", "usage": {"input": 10, "output": 2}},
+        )
+        parts = await _collect(run, session_id="s1")
+        assert parts[-1]["messageMetadata"]["usage"] == {"input": 10, "output": 2}
+
+    async def test_coalesced_message_emits_text_block(self):
+        run = _run(
+            events=[{"type": "message", "text": "Paris."}, {"type": "done"}],
+            result={"output": "Paris."},
+        )
+        parts = await _collect(run, session_id="s1")
+        types = [p["type"] for p in parts]
+        assert "text-start" in types and "text-delta" in types and "text-end" in types
+        delta = next(p for p in parts if p["type"] == "text-delta")
+        assert delta["delta"] == "Paris."
+
+    async def test_permission_interaction_becomes_approval_request(self):
+        run = _run(
+            events=[
+                {
+                    "type": "interaction_request",
+                    "id": "perm_1",
+                    "kind": "permission",
+                    "payload": {
+                        "toolCallId": "call_1",
+                        "availableReplies": ["once", "always", "reject"],
+                        "toolCall": {"toolCallId": "call_1", "name": "deleteFile"},
+                    },
+                },
+                {"type": "done"},
+            ],
+            result={"output": ""},
+        )
+        parts = await _collect(run, session_id="s1")
+        approval = next(p for p in parts if p["type"] == "tool-approval-request")
+        assert approval["approvalId"] == "perm_1"
+        # REQUIRED top-level toolCallId binds the approval to its tool part (RFC / AI SDK).
+        assert approval["toolCallId"] == "call_1"
+        assert approval["availableReplies"] == ["once", "always", "reject"]
+        assert approval["toolCall"] == {"toolCallId": "call_1", "name": "deleteFile"}
+
+    async def test_permission_tool_call_id_falls_back_to_nested_tool_call(self):
+        # No top-level toolCallId on the payload: dig it out of the nested ACP toolCall detail.
+        run = _run(
+            events=[
+                {
+                    "type": "interaction_request",
+                    "id": "perm_2",
+                    "kind": "permission",
+                    "payload": {
+                        "availableReplies": ["once", "reject"],
+                        "toolCall": {"id": "call_9", "name": "deleteFile"},
+                    },
+                },
+                {"type": "done"},
+            ],
+            result={"output": ""},
+        )
+        parts = await _collect(run, session_id="s1")
+        approval = next(p for p in parts if p["type"] == "tool-approval-request")
+        assert approval["toolCallId"] == "call_9"
+
+    async def test_tool_denial_becomes_output_denied(self):
+        # A human denied the tool: it never ran, so emit tool-output-denied (not -available).
+        run = _run(
+            events=[
+                {"type": "tool_call", "id": "c1", "name": "deleteFile", "input": {}},
+                {"type": "tool_result", "id": "c1", "denied": True},
+                {"type": "done"},
+            ],
+            result={"output": ""},
+        )
+        parts = await _collect(run, session_id="s1")
+        denied = next(p for p in parts if p["type"] == "tool-output-denied")
+        assert denied["toolCallId"] == "c1"
+        # A denied result is neither output-available nor output-error.
+        types = [p["type"] for p in parts]
+        assert "tool-output-available" not in types
+        assert "tool-output-error" not in types
+
+    async def test_finish_carries_trace_id_from_param(self):
+        run = _run(
+            events=[
+                {"type": "message", "text": "hi"},
+                {"type": "done", "stopReason": "end_turn"},
+            ],
+            result={"output": "hi", "usage": {"input": 10, "output": 2}},
+        )
+        parts = await _collect(run, session_id="s1", trace_id="abc123")
+        # traceId and usage coexist under the finish messageMetadata.
+        assert parts[-1]["messageMetadata"]["traceId"] == "abc123"
+        assert parts[-1]["messageMetadata"]["usage"] == {"input": 10, "output": 2}
+
+    async def test_finish_trace_id_falls_back_to_terminal_result(self):
+        run = _run(
+            events=[
+                {"type": "message", "text": "hi"},
+                {"type": "done", "stopReason": "end_turn"},
+            ],
+            result={"output": "hi", "traceId": "trace_from_result"},
+        )
+        parts = await _collect(run, session_id="s1")
+        assert parts[-1]["messageMetadata"]["traceId"] == "trace_from_result"
+
+    async def test_render_hint_passes_through_tool_parts(self):
+        render = {"kind": "component", "component": "WeatherCard"}
+        run = _run(
+            events=[
+                {
+                    "type": "tool_call",
+                    "id": "c1",
+                    "name": "w",
+                    "input": {},
+                    "render": render,
+                },
+                {
+                    "type": "tool_result",
+                    "id": "c1",
+                    "data": {"w": "sunny"},
+                    "render": render,
+                },
+                {"type": "done"},
+            ],
+            result={"output": ""},
+        )
+        parts = await _collect(run, session_id="s1")
+        available = next(p for p in parts if p["type"] == "tool-input-available")
+        output = next(p for p in parts if p["type"] == "tool-output-available")
+        assert available["render"] == render
+        assert output["render"] == render
+
+    async def test_tool_error_becomes_output_error(self):
+        run = _run(
+            events=[
+                {"type": "tool_call", "id": "c1", "name": "w", "input": {}},
+                {"type": "tool_result", "id": "c1", "output": "boom", "isError": True},
+                {"type": "done"},
+            ],
+            result={"output": ""},
+        )
+        parts = await _collect(run, session_id="s1")
+        err = next(p for p in parts if p["type"] == "tool-output-error")
+        assert err["toolCallId"] == "c1"
+        assert err["errorText"] == "boom"
+
+    async def test_terminal_failure_emits_error_part_and_no_finish(self):
+        records = [
+            {"kind": "event", "event": {"type": "message", "text": "partial"}},
+            {"kind": "result", "result": {"ok": False, "error": "kaboom"}},
+        ]
+        run = AgentRun(_from_list(records))
+        parts = [part async for part in agent_run_to_vercel_parts(run, session_id="s1")]
+        types = [p["type"] for p in parts]
+        assert types[0] == "start"
+        assert "finish" not in types
+        error = next(p for p in parts if p["type"] == "error")
+        assert "kaboom" in error["errorText"]
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
new file mode 100644
index 0000000000..4aa24a86b1
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
@@ -0,0 +1,301 @@
+"""The ``/run`` wire contract: ``request_to_wire`` / ``result_from_wire``.
+
+This is the highest-value regression guard in the agent runtime. ``wire.py`` (the Python
+producer) and ``services/agent/src/protocol.ts`` (the TS consumer) are hand-mirrored, so the
+two can drift silently. The golden fixtures in ``golden/`` are the shared anchor: this file
+asserts the Python side against them, and the TS side asserts the same files (a later PR).
+
+If a field is added, renamed, or removed on the wire, a golden assertion here fails on
+purpose. Regenerate the golden deliberately, and update ``protocol.ts`` and ``KNOWN_REQUEST_KEYS``
+to match.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agenta.sdk.agents import (
+    AgentaAgentConfig,
+    ClaudeAgentConfig,
+    HarnessType,
+    Message,
+    PiAgentConfig,
+    ToolCallback,
+    TraceContext,
+)
+from agenta.sdk.agents.utils.wire import request_to_wire, result_from_wire
+
+# The full set of top-level keys ``request_to_wire`` may emit. The TS ``AgentRunRequest``
+# interface must declare a superset of these. Adding a key here without adding it to
+# protocol.ts is exactly the drift this set exists to catch.
+KNOWN_REQUEST_KEYS = {
+    "backend",
+    "harness",
+    "sandbox",
+    "sessionId",
+    "agentsMd",
+    "model",
+    "messages",
+    "secrets",
+    "trace",
+    "tools",
+    "customTools",
+    "mcpServers",
+    "toolCallback",
+    "permissionPolicy",
+    "systemPrompt",
+    "appendSystemPrompt",
+    "skills",
+}
+
+_CUSTOM_TOOL = {
+    "name": "get_user",
+    "description": "Get a user",
+    "inputSchema": {"type": "object", "properties": {}},
+    "callRef": "tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn",
+    "kind": "callback",
+}
+_CALLBACK = ToolCallback(
+    endpoint="https://api.example/tools/call", authorization="Access tok-123"
+)
+
+
+def _pi_payload():
+    config = PiAgentConfig(
+        agents_md="You are a helpful assistant.",
+        model="openai-codex/gpt-5.5",
+        builtin_tools=["read", "write"],
+        custom_tools=[dict(_CUSTOM_TOOL)],
+        tool_callback=_CALLBACK,
+        system="You are Pi.",
+        append_system="Be terse.",
+    )
+    return request_to_wire(
+        engine="pi",
+        harness=HarnessType.PI,
+        sandbox="local",
+        config=config,
+        messages=[Message(role="user", content="hi")],
+        secrets={"OPENAI_API_KEY": "sk-test"},
+        trace=TraceContext(
+            traceparent="00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
+            endpoint="https://otlp.example/v1/traces",
+            authorization="Access tok-123",
+            capture_content=True,
+        ),
+        session_id="sess-1",
+    )
+
+
+def _claude_payload():
+    config = ClaudeAgentConfig(
+        agents_md="You are a helpful assistant.",
+        model="claude-sonnet-4-6",
+        custom_tools=[dict(_CUSTOM_TOOL)],
+        tool_callback=_CALLBACK,
+        permission_policy="deny",
+    )
+    return request_to_wire(
+        engine="rivet",
+        harness=HarnessType.CLAUDE,
+        sandbox="local",
+        config=config,
+        messages=[Message(role="user", content="hi")],
+        secrets={"ANTHROPIC_API_KEY": "sk-ant"},
+        trace=None,
+        session_id=None,
+    )
+
+
+def _agenta_payload():
+    config = AgentaAgentConfig(
+        agents_md="Agenta preamble + project rules.",
+        model="gpt-5.5",
+        builtin_tools=["read", "bash"],
+        custom_tools=[dict(_CUSTOM_TOOL)],
+        tool_callback=_CALLBACK,
+        append_system="You are an Agenta agent.",
+        skills=["agenta-getting-started"],
+    )
+    return request_to_wire(
+        engine="pi",
+        harness=HarnessType.AGENTA,
+        sandbox="local",
+        config=config,
+        messages=[Message(role="user", content="hi")],
+    )
+
+
+def test_request_to_wire_agenta_carries_skills_and_pi_shape():
+    payload = _agenta_payload()
+    assert set(payload) <= KNOWN_REQUEST_KEYS
+    # Agenta is a Pi config: same tool shape, never gates, exposes the prompt overrides...
+    assert payload["permissionPolicy"] == "auto"
+    assert payload["tools"] == ["read", "bash"]
+    assert payload["appendSystemPrompt"] == "You are an Agenta agent."
+    # ...plus the forced skills the runner loads.
+    assert payload["skills"] == ["agenta-getting-started"]
+
+
+def test_request_to_wire_pi_has_no_skills_key():
+    # Only the Agenta config emits `skills`; the plain Pi config must not.
+    assert "skills" not in _pi_payload()
+
+
+def test_request_to_wire_pi_matches_golden(golden):
+    assert _pi_payload() == golden("run_request.pi.json")
+
+
+def test_request_to_wire_claude_matches_golden(golden):
+    payload = _claude_payload()
+    assert payload == golden("run_request.claude.json")
+    # Claude-specific invariants the golden encodes, asserted explicitly so a failure reads clearly.
+    assert payload["tools"] == []  # Claude has no Pi built-ins
+    assert payload["permissionPolicy"] == "deny"  # Claude gates tool use
+    assert "systemPrompt" not in payload  # Claude exposes no prompt overrides
+    assert "appendSystemPrompt" not in payload
+
+
+def test_request_to_wire_has_no_prompt_key():
+    # The serializer emits `messages` only; the TS side derives the latest turn with
+    # `resolvePromptText`. This asymmetry is intentional and easy to break, so lock it.
+    payload = request_to_wire(
+        engine="pi",
+        harness=HarnessType.PI,
+        sandbox="local",
+        config=PiAgentConfig(),
+        messages=[Message(role="user", content="hi")],
+    )
+    assert "prompt" not in payload
+
+
+def test_request_to_wire_emits_only_known_keys():
+    pi = _pi_payload()
+    claude = _claude_payload()
+    assert set(pi) <= KNOWN_REQUEST_KEYS
+    assert set(claude) <= KNOWN_REQUEST_KEYS
+    # The Pi case must actually exercise the prompt-override keys, otherwise this guard would
+    # silently stop covering them.
+    assert {"systemPrompt", "appendSystemPrompt"} <= set(pi)
+
+
+def test_pi_permission_policy_is_always_auto():
+    # Pi never gates tool use, regardless of any requested policy.
+    payload = request_to_wire(
+        engine="pi",
+        harness=HarnessType.PI,
+        sandbox="local",
+        config=PiAgentConfig(),
+        messages=[Message(role="user", content="hi")],
+    )
+    assert payload["permissionPolicy"] == "auto"
+
+
+def test_result_from_wire_parses_ok(golden):
+    result = result_from_wire(golden("run_result.ok.json"))
+
+    assert result.output == "Hello!"
+    assert [m.role for m in result.messages] == ["assistant"]
+    # The event with no `type` is dropped on parse; the other three survive.
+    assert [e.type for e in result.events] == ["message", "usage", "done"]
+    assert result.events[0].data == {"type": "message", "text": "Hello!"}
+    assert result.usage == {"input": 10, "output": 5, "total": 15, "cost": 0.001}
+    assert result.stop_reason == "end_turn"
+    assert result.session_id == "sess-42"
+    assert result.model == "gpt-5.5"
+    assert result.trace_id == "trace-abc"
+    # Capabilities come back camelCase and map onto snake_case flags.
+    assert result.capabilities is not None
+    assert result.capabilities.mcp_tools is True
+    assert result.capabilities.images is False
+    assert result.capabilities.text_messages is True
+
+
+def test_result_from_wire_raises_on_failure(golden):
+    with pytest.raises(RuntimeError, match="model exploded"):
+        result_from_wire(golden("run_result.error.json"))
+
+
+def test_result_from_wire_minimal_ok():
+    # A bare success: empty output, empty collections, no capabilities.
+    result = result_from_wire({"ok": True})
+    assert result.output == ""
+    assert result.messages == []
+    assert result.events == []
+    assert result.capabilities is None
+    assert result.session_id is None
+
+
+def test_request_to_wire_carries_code_client_and_mcp_specs():
+    # The three-axes surface reaches the wire intact: a code spec keeps its executor fields
+    # (kind/runtime/code/env) and the orthogonal axes (needsApproval/render); a client spec
+    # has no callRef; user MCP servers ride `mcpServers`.
+    config = PiAgentConfig(
+        custom_tools=[
+            {
+                "name": "calc",
+                "description": "calc",
+                "inputSchema": {"type": "object", "properties": {}},
+                "kind": "code",
+                "runtime": "python",
+                "code": "def main(): return 1",
+                "env": {"STRIPE_API_KEY": "sk"},
+                "needsApproval": True,
+                "render": {"kind": "component", "component": "Calc"},
+            },
+            {
+                "name": "pick",
+                "description": "pick",
+                "inputSchema": {"type": "object", "properties": {}},
+                "kind": "client",
+            },
+        ],
+        mcp_servers=[
+            {
+                "name": "github",
+                "transport": "stdio",
+                "command": "npx",
+                "env": {"GITHUB_TOKEN": "ghp"},
+                "tools": ["create_issue"],
+            }
+        ],
+    )
+    payload = request_to_wire(
+        engine="pi",
+        harness=HarnessType.PI,
+        sandbox="local",
+        config=config,
+        messages=[Message(role="user", content="hi")],
+    )
+    assert set(payload) <= KNOWN_REQUEST_KEYS
+    code = next(t for t in payload["customTools"] if t["name"] == "calc")
+    assert code["kind"] == "code"
+    assert code["runtime"] == "python"
+    assert code["code"] == "def main(): return 1"
+    assert code["env"] == {"STRIPE_API_KEY": "sk"}
+    assert code["needsApproval"] is True
+    assert code["render"] == {"kind": "component", "component": "Calc"}
+    client = next(t for t in payload["customTools"] if t["name"] == "pick")
+    assert client["kind"] == "client"
+    assert "callRef" not in client
+    assert payload["mcpServers"] == [
+        {
+            "name": "github",
+            "transport": "stdio",
+            "command": "npx",
+            "env": {"GITHUB_TOKEN": "ghp"},
+            "tools": ["create_issue"],
+        }
+    ]
+
+
+def test_request_to_wire_omits_mcp_servers_when_none():
+    # No declared servers -> no `mcpServers` key (keeps a tool-free payload byte-identical).
+    payload = request_to_wire(
+        engine="pi",
+        harness=HarnessType.PI,
+        sandbox="local",
+        config=PiAgentConfig(),
+        messages=[Message(role="user", content="hi")],
+    )
+    assert "mcpServers" not in payload
diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py b/sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py
@@ -0,0 +1 @@
+
diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py
new file mode 100644
index 0000000000..f823b4f32c
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from agenta.sdk.agents.tools import (
+    CallbackToolSpec,
+    CodeToolConfig,
+    CodeToolSpec,
+)
+
+
+def test_canonical_config_forbids_unexpected_fields():
+    with pytest.raises(ValidationError):
+        CodeToolConfig(
+            name="calc",
+            script="def main(): return 1",
+            unexpected=True,
+        )
+
+
+def test_code_spec_serializes_only_runner_fields():
+    spec = CodeToolSpec(
+        name="calc",
+        description="Calculate",
+        input_schema={"type": "object", "properties": {}},
+        runtime="python",
+        code="def main(): return 1",
+        env={"TOKEN": "secret"},
+        needs_approval=True,
+        render={"kind": "component", "component": "Calculator"},
+    )
+    assert spec.to_wire() == {
+        "name": "calc",
+        "description": "Calculate",
+        "inputSchema": {"type": "object", "properties": {}},
+        "kind": "code",
+        "runtime": "python",
+        "code": "def main(): return 1",
+        "env": {"TOKEN": "secret"},
+        "needsApproval": True,
+        "render": {"kind": "component", "component": "Calculator"},
+    }
+
+
+def test_callback_spec_has_stable_typed_contract():
+    spec = CallbackToolSpec(
+        name="get_user",
+        description="Get user",
+        call_ref="tools.composio.github.GET_USER.c1",
+    )
+    assert spec.to_wire()["kind"] == "callback"
+    assert spec.to_wire()["callRef"] == "tools.composio.github.GET_USER.c1"
+
+
+def test_secret_values_are_hidden_from_repr():
+    spec = CodeToolSpec(
+        name="private",
+        description="private",
+        code="...",
+        env={"TOKEN": "do-not-print"},
+    )
+    assert "do-not-print" not in repr(spec)
diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py
new file mode 100644
index 0000000000..ff6f212f9f
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import pytest
+
+from agenta.sdk.agents.tools import (
+    BuiltinToolConfig,
+    GatewayToolConfig,
+    ToolConfigurationError,
+    coerce_tool_config,
+    coerce_tool_configs,
+    parse_tool_config,
+)
+
+
+def test_strict_parser_accepts_only_canonical_mapping():
+    tool = parse_tool_config({"type": "builtin", "name": "read"})
+    assert isinstance(tool, BuiltinToolConfig)
+    with pytest.raises(ToolConfigurationError):
+        parse_tool_config({"name": "read"})
+
+
+def test_compat_parser_accepts_legacy_shapes():
+    assert coerce_tool_config("bash") == BuiltinToolConfig(name="bash")
+    gateway = coerce_tool_config(
+        {
+            "type": "composio",
+            "integration": "github",
+            "action": "GET_USER",
+            "connection": "c1",
+        }
+    )
+    assert isinstance(gateway, GatewayToolConfig)
+    assert gateway.provider == "composio"
+
+
+def test_compat_parser_accepts_playground_gateway_slug_and_metadata():
+    gateway = coerce_tool_config(
+        {
+            "function": {"name": "tools__composio__github__GET_USER__c1"},
+            "needs_approval": True,
+            "render": {"kind": "component", "component": "User"},
+        }
+    )
+    assert gateway.needs_approval is True
+    assert gateway.render == {"kind": "component", "component": "User"}
+
+
+def test_collect_mode_reports_invalid_entries():
+    result = coerce_tool_configs(
+        ["read", {"invalid": True}, None],
+        on_error="collect",
+    )
+    assert result.tool_configs == [BuiltinToolConfig(name="read")]
+    assert [diagnostic.index for diagnostic in result.diagnostics] == [1, 2]
+
+
+def test_default_compat_mode_raises_with_index():
+    with pytest.raises(ToolConfigurationError) as caught:
+        coerce_tool_configs(["read", {"invalid": True}])
+    assert caught.value.index == 1
diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py
new file mode 100644
index 0000000000..7c7ef58b46
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+from typing import Mapping, Sequence
+
+import pytest
+
+from agenta.sdk.agents.tools import (
+    BuiltinToolConfig,
+    CallbackToolSpec,
+    ClientToolConfig,
+    CodeToolConfig,
+    DuplicateToolNameError,
+    GatewayToolConfig,
+    GatewayToolResolution,
+    MissingSecretPolicy,
+    MissingToolSecretError,
+    ToolCallback,
+    ToolResolver,
+    UnsupportedToolProviderError,
+)
+
+
+class DictSecretProvider:
+    def __init__(self, values: Mapping[str, str]):
+        self.values = values
+        self.requests: list[list[str]] = []
+
+    async def get_many(self, names: Sequence[str]) -> Mapping[str, str]:
+        self.requests.append(list(names))
+        return {name: self.values[name] for name in names if name in self.values}
+
+
+class FakeGatewayResolver:
+    async def resolve(
+        self,
+        tools: Sequence[GatewayToolConfig],
+    ) -> GatewayToolResolution:
+        return GatewayToolResolution(
+            tool_specs=[
+                CallbackToolSpec(
+                    name=tool.name or f"{tool.integration}__{tool.action}",
+                    description=tool.name or tool.action,
+                    call_ref=tool.reference,
+                    needs_approval=tool.needs_approval,
+                    render=tool.render,
+                )
+                for tool in tools
+            ],
+            tool_callback=ToolCallback(endpoint="https://example/tools/call"),
+        )
+
+
+async def test_resolves_builtin_code_client_and_scopes_secrets():
+    secrets = DictSecretProvider({"A": "a", "B": "b"})
+    resolved = await ToolResolver(secret_provider=secrets).resolve(
+        [
+            BuiltinToolConfig(name="read"),
+            CodeToolConfig(name="one", script="...", secrets=["A"]),
+            CodeToolConfig(name="two", script="...", secrets=["B"]),
+            ClientToolConfig(name="pick"),
+        ]
+    )
+    assert resolved.builtin_names == ["read"]
+    assert secrets.requests == [["A", "B"]]
+    by_name = {spec.name: spec for spec in resolved.tool_specs}
+    assert by_name["one"].env == {"A": "a"}
+    assert by_name["two"].env == {"B": "b"}
+    assert by_name["pick"].kind == "client"
+
+
+async def test_missing_declared_secret_fails_by_default():
+    resolver = ToolResolver(secret_provider=DictSecretProvider({}))
+    with pytest.raises(MissingToolSecretError) as caught:
+        await resolver.resolve(
+            [CodeToolConfig(name="charge", script="...", secrets=["TOKEN"])]
+        )
+    assert caught.value.secret_names == ("TOKEN",)
+
+
+async def test_missing_secret_can_be_explicitly_omitted_for_compatibility():
+    resolved = await ToolResolver(
+        secret_provider=DictSecretProvider({}),
+        missing_secret_policy=MissingSecretPolicy.OMIT,
+    ).resolve([CodeToolConfig(name="charge", script="...", secrets=["TOKEN"])])
+    assert resolved.tool_specs[0].env == {}
+
+
+async def test_gateway_requires_injected_adapter():
+    with pytest.raises(UnsupportedToolProviderError):
+        await ToolResolver().resolve(
+            [
+                GatewayToolConfig(
+                    integration="github",
+                    action="GET_USER",
+                    connection="c1",
+                )
+            ]
+        )
+
+
+async def test_gateway_metadata_survives_resolution():
+    resolved = await ToolResolver(gateway_resolver=FakeGatewayResolver()).resolve(
+        [
+            GatewayToolConfig(
+                integration="github",
+                action="GET_USER",
+                connection="c1",
+                needs_approval=True,
+                render={"kind": "component", "component": "User"},
+            )
+        ]
+    )
+    spec = resolved.tool_specs[0]
+    assert spec.needs_approval is True
+    assert spec.render == {"kind": "component", "component": "User"}
+
+
+@pytest.mark.parametrize(
+    "configs",
+    [
+        [BuiltinToolConfig(name="read"), BuiltinToolConfig(name="read")],
+        [
+            BuiltinToolConfig(name="same"),
+            ClientToolConfig(name="same"),
+        ],
+        [ClientToolConfig(name="same"), ClientToolConfig(name="same")],
+    ],
+)
+async def test_duplicate_model_visible_names_are_rejected(configs):
+    with pytest.raises(DuplicateToolNameError):
+        await ToolResolver().resolve(configs)
diff --git a/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py b/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py
index b796680685..94d99e0fdf 100644
--- a/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py
+++ b/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py
@@ -79,6 +79,36 @@ def handler(parameters):
 
         assert kwargs["parameters"] == {"correct_answer_key": "answer"}
 
+    @pytest.mark.asyncio
+    async def test_session_id_is_passed_to_explicit_handler_argument(self):
+        def handler(session_id):
+            return session_id
+
+        request = WorkflowServiceRequest(
+            session_id="sess_request",
+            data=WorkflowRequestData(),
+        )
+
+        mw = NormalizerMiddleware()
+        kwargs = await mw._normalize_request(request, handler)
+
+        assert kwargs["session_id"] == "sess_request"
+
+    @pytest.mark.asyncio
+    async def test_session_id_is_not_added_to_var_kwargs(self):
+        def handler(**kwargs):
+            return kwargs
+
+        request = WorkflowServiceRequest(
+            session_id="sess_request",
+            data=WorkflowRequestData(inputs={"prompt": "hi"}),
+        )
+
+        mw = NormalizerMiddleware()
+        kwargs = await mw._normalize_request(request, handler)
+
+        assert "session_id" not in kwargs
+
 
 class TestAsyncGenerator:
     @pytest.mark.asyncio
diff --git a/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py
new file mode 100644
index 0000000000..89a06d6783
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py
@@ -0,0 +1,284 @@
+"""Tests for the agent ``/messages`` + ``/load-session`` endpoints.
+
+Two layers:
+
+- Direct unit tests of the two pure Vercel routing helpers (``resolve_session_id``,
+  ``inject_stream_session_id``).
+- HTTP tests over a Starlette ``TestClient`` driving the real ``route(flags={"is_agent":
+  True})`` wiring with a fake agent handler (no harness/runner). Registering on a bare
+  ``FastAPI`` app keeps the auth middleware out; a stand-in sets ``request.state.auth``. The
+  offline tracing mock (mirroring ``test_negotiation_integration``) lets ``wf.invoke`` run
+  without ``ag.init()``.
+"""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from agenta.sdk.agents import Message
+from agenta.sdk.agents.adapters.vercel.routing import (
+    VERCEL_MESSAGE_PROTOCOL,
+    VERCEL_MESSAGE_PROTOCOL_VERSION,
+    inject_stream_session_id,
+    make_load_session_endpoint,
+    resolve_session_id,
+)
+from agenta.sdk.decorators.routing import route
+from agenta.sdk.models.workflows import (
+    LoadSessionRequest,
+    WorkflowBatchResponse,
+    WorkflowServiceStatus,
+    WorkflowStreamingResponse,
+)
+
+
+# ---------------------------------------------------------------------------
+# Pure helpers
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_session_id_mints_echoes_and_validates():
+    assert resolve_session_id("sess_ok") == "sess_ok"
+    assert resolve_session_id(None).startswith("sess_")
+    assert resolve_session_id("bad id!") is None  # space + '!' are out of charset
+    assert resolve_session_id("x" * 200) is None  # over the length bound
+
+
+@pytest.mark.asyncio
+async def test_inject_stream_session_id_stamps_first_start_part():
+    async def base():
+        yield {"type": "start", "messageId": "m1"}
+        yield {"type": "text-delta", "id": "t1", "delta": "x"}
+
+    resp = WorkflowStreamingResponse(generator=base)
+    inject_stream_session_id(resp, "sess_z")
+
+    parts = [p async for p in resp.iterator()]
+    assert parts[0]["messageMetadata"]["sessionId"] == "sess_z"
+    assert parts[1] == {"type": "text-delta", "id": "t1", "delta": "x"}
+
+
+# ---------------------------------------------------------------------------
+# HTTP wiring
+# ---------------------------------------------------------------------------
+
+
+_UI_MESSAGE = {"role": "user", "parts": [{"type": "text", "text": "hello"}]}
+
+
+def _assert_vercel_message_protocol(response):
+    assert response.headers["x-ag-messages-format"] == VERCEL_MESSAGE_PROTOCOL
+    assert response.headers["x-ag-messages-version"] == VERCEL_MESSAGE_PROTOCOL_VERSION
+
+
+def _build_client() -> TestClient:
+    app = FastAPI()
+
+    # Stand in for AuthMiddleware (omitted by using a bare app): the endpoints read
+    # ``request.state.auth``. No credentials needed — the fake handler runs locally.
+    @app.middleware("http")
+    async def _fake_auth(request, call_next):
+        request.state.auth = {}
+        return await call_next(request)
+
+    @route("/", app=app, flags={"is_agent": True})
+    async def agent(
+        messages=None,
+        inputs=None,
+        parameters=None,
+        stream=None,
+        session_id=None,
+    ):
+        if stream:
+
+            async def gen():
+                yield {"type": "start", "messageId": "m1"}
+                yield {"type": "text-start", "id": "t1"}
+                yield {"type": "text-delta", "id": "t1", "delta": "hi"}
+                yield {"type": "text-end", "id": "t1"}
+                yield {"type": "finish"}
+
+            return gen()
+        return {
+            "role": "assistant",
+            "content": "hi",
+            "echoed": messages,
+            "session_id": session_id,
+        }
+
+    return TestClient(app)
+
+
+def _build_failing_client() -> TestClient:
+    app = FastAPI()
+
+    @app.middleware("http")
+    async def _fake_auth(request, call_next):
+        request.state.auth = {}
+        return await call_next(request)
+
+    @route("/", app=app, flags={"is_agent": True})
+    async def failing_agent(messages=None, inputs=None, parameters=None, stream=None):
+        return WorkflowBatchResponse(
+            status=WorkflowServiceStatus(
+                code=500,
+                message="tool resolution failed before stream",
+                type="https://agenta.ai/docs/errors#v1:sdk:tool-resolution-error",
+            )
+        )
+
+    return TestClient(app)
+
+
+@pytest.fixture()
+def client():
+    """A TestClient with the offline tracing mock active so ``wf.invoke`` runs without
+    ``ag.init()`` (same approach as ``test_negotiation_integration``)."""
+    with (
+        patch("agenta.sdk.decorators.tracing.ag") as mock_ag,
+        patch("agenta.sdk.decorators.running.ag") as mock_run_ag,
+    ):
+        mock_span = MagicMock()
+        mock_span.is_recording.return_value = False
+        mock_span.get_span_context.return_value = MagicMock(trace_id=0, span_id=0)
+        mock_ag.tracing = MagicMock()
+        mock_ag.tracing.get_current_span.return_value = mock_span
+        mock_ag.tracing.redact = None
+        mock_tracer = MagicMock()
+        mock_tracer.start_as_current_span.return_value.__enter__ = MagicMock(
+            return_value=mock_span
+        )
+        mock_tracer.start_as_current_span.return_value.__exit__ = MagicMock(
+            return_value=None
+        )
+        mock_ag.tracer = mock_tracer
+        mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE = MagicMock()
+        mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.api_key = None
+        yield _build_client()
+
+
+def test_messages_json_mints_session_and_folds_conversation(client):
+    res = client.post("/messages", json={"data": {"messages": [_UI_MESSAGE]}})
+    assert res.status_code == 200
+    _assert_vercel_message_protocol(res)
+    body = res.json()
+    assert body["session_id"].startswith("sess_")
+    assert body["data"]["outputs"]["content"] == "hi"
+    assert body["data"]["outputs"]["session_id"] == body["session_id"]
+    # The Vercel UIMessage was folded to a neutral {role, content} message for the handler.
+    assert body["data"]["outputs"]["echoed"] == [{"role": "user", "content": "hello"}]
+
+
+def test_messages_echoes_supplied_session_id(client):
+    res = client.post(
+        "/messages",
+        json={"session_id": "sess_keep", "data": {"messages": [_UI_MESSAGE]}},
+    )
+    assert res.status_code == 200
+    _assert_vercel_message_protocol(res)
+    assert res.json()["session_id"] == "sess_keep"
+    assert res.json()["data"]["outputs"]["session_id"] == "sess_keep"
+
+
+def test_messages_sse_streams_with_done_and_session_in_start(client):
+    res = client.post(
+        "/messages",
+        headers={"accept": "text/event-stream"},
+        json={"session_id": "sess_abc", "data": {"messages": [_UI_MESSAGE]}},
+    )
+    assert res.status_code == 200
+    _assert_vercel_message_protocol(res)
+    assert res.headers["x-vercel-ai-ui-message-stream"] == "v1"
+    text = res.text
+    assert '"sessionId": "sess_abc"' in text  # stamped onto the start part
+    assert '"type": "text-delta"' in text
+    assert "data: [DONE]" in text
+
+
+def test_messages_sse_preserves_json_error_before_stream():
+    with (
+        patch("agenta.sdk.decorators.tracing.ag") as mock_ag,
+        patch("agenta.sdk.decorators.running.ag") as mock_run_ag,
+    ):
+        mock_span = MagicMock()
+        mock_span.is_recording.return_value = False
+        mock_span.get_span_context.return_value = MagicMock(trace_id=0, span_id=0)
+        mock_ag.tracing = MagicMock()
+        mock_ag.tracing.get_current_span.return_value = mock_span
+        mock_ag.tracing.redact = None
+        mock_tracer = MagicMock()
+        mock_tracer.start_as_current_span.return_value.__enter__ = MagicMock(
+            return_value=mock_span
+        )
+        mock_tracer.start_as_current_span.return_value.__exit__ = MagicMock(
+            return_value=None
+        )
+        mock_ag.tracer = mock_tracer
+        mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE = MagicMock()
+        mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.api_key = None
+        client = _build_failing_client()
+
+        response = client.post(
+            "/messages",
+            headers={"accept": "text/event-stream"},
+            json={
+                "session_id": "sess_error",
+                "data": {"messages": [_UI_MESSAGE]},
+            },
+        )
+
+    assert response.status_code == 500
+    _assert_vercel_message_protocol(response)
+    assert response.headers["content-type"].startswith("application/json")
+    assert "x-vercel-ai-ui-message-stream" not in response.headers
+    body = response.json()
+    assert body["status"]["code"] == 500
+    assert "tool resolution failed before stream" in body["status"]["message"]
+    assert body["session_id"] == "sess_error"
+    assert "[DONE]" not in response.text
+
+
+def test_messages_rejects_invalid_session_id(client):
+    res = client.post(
+        "/messages", json={"session_id": "bad id!", "data": {"messages": []}}
+    )
+    assert res.status_code == 400
+    _assert_vercel_message_protocol(res)
+
+
+def test_load_session_returns_stub_history(client):
+    res = client.post("/load-session", json={"session_id": "sess_abc"})
+    assert res.status_code == 200
+    _assert_vercel_message_protocol(res)
+    assert res.json() == {"session_id": "sess_abc", "messages": []}
+
+
+@pytest.mark.asyncio
+async def test_load_session_uses_session_store_port():
+    class _Store:
+        async def load(self, session_id):
+            assert session_id == "sess_abc"
+            return [Message(role="user", content="hello")]
+
+        async def save_turn(self, session_id, *, messages, result=None):
+            raise AssertionError("load-session should only load")
+
+    endpoint = make_load_session_endpoint(session_store=_Store())
+    response = await endpoint(None, LoadSessionRequest(session_id="sess_abc"))
+
+    assert response.status_code == 200
+    assert response.headers["x-ag-messages-format"] == VERCEL_MESSAGE_PROTOCOL
+    assert response.headers["x-ag-messages-version"] == VERCEL_MESSAGE_PROTOCOL_VERSION
+    assert json.loads(response.body) == {
+        "session_id": "sess_abc",
+        "messages": [
+            {
+                "id": "msg-1",
+                "role": "user",
+                "parts": [{"type": "text", "text": "hello"}],
+            }
+        ],
+    }
diff --git a/sdks/python/oss/tests/pytest/utils/test_routing.py b/sdks/python/oss/tests/pytest/utils/test_routing.py
index 0bed6e7922..a1851e5907 100644
--- a/sdks/python/oss/tests/pytest/utils/test_routing.py
+++ b/sdks/python/oss/tests/pytest/utils/test_routing.py
@@ -7,6 +7,8 @@
 3. router= param   — issues DeprecationWarning, falls back to prefixed registration
 """
 
+import asyncio
+import json
 import warnings
 
 import pytest
@@ -15,10 +17,13 @@
 
 from agenta.sdk.decorators.routing import (
     _RESERVED_PATHS,
+    _make_stream_response,
     _validate_path,
     create_app,
     route,
 )
+from agenta.sdk.agents.adapters.vercel.sse import vercel_sse_stream
+from agenta.sdk.models.workflows import WorkflowStreamingResponse
 
 
 # ---------------------------------------------------------------------------
@@ -127,6 +132,49 @@ async def foo():
         assert "/foo/invoke" not in parent_schema.get("paths", {})
 
 
+# ---------------------------------------------------------------------------
+# 2b. Agent-only endpoints (/messages + /load-session), gated on is_agent
+# ---------------------------------------------------------------------------
+
+
+class TestAgentEndpoints:
+    def test_is_agent_sub_app_has_messages_and_load_session(self):
+        app = create_app()
+
+        @route("/chat", app=app, flags={"is_agent": True})
+        async def chat():
+            return {"role": "assistant", "content": "hi"}
+
+        schema = _mounts(app)["/chat"].app.openapi()
+        assert "/messages" in schema["paths"]
+        assert "/load-session" in schema["paths"]
+        assert "/invoke" in schema["paths"]  # the base routes are still present
+
+    def test_non_agent_route_has_no_agent_endpoints(self):
+        app = create_app()
+
+        @route("/qa", app=app)
+        async def qa():
+            return "answer"
+
+        schema = _mounts(app)["/qa"].app.openapi()
+        assert "/messages" not in schema["paths"]
+        assert "/load-session" not in schema["paths"]
+
+    def test_root_agent_route_registers_on_mount_root(self):
+        # The agent app uses route("/", app=app, flags={"is_agent": True}); the endpoints
+        # land on the app itself, not a mounted sub-app.
+        app = create_app()
+
+        @route("/", app=app, flags={"is_agent": True})
+        async def agent():
+            return {"role": "assistant", "content": "hi"}
+
+        schema = app.openapi()
+        assert "/messages" in schema["paths"]
+        assert "/load-session" in schema["paths"]
+
+
 # ---------------------------------------------------------------------------
 # 3. router= deprecation warning
 # ---------------------------------------------------------------------------
@@ -179,3 +227,76 @@ async def noisy():
         mounts_after = set(_mount_paths(default_app))
         # No new mounts should have appeared on default_app
         assert mounts_after == mounts_before
+
+
+# ---------------------------------------------------------------------------
+# 4. Reserved agent paths (/messages, /load-session)
+# ---------------------------------------------------------------------------
+
+
+class TestReservedAgentPaths:
+    def test_agent_endpoint_names_are_reserved(self):
+        assert {"messages", "load-session"} <= _RESERVED_PATHS
+
+    @pytest.mark.parametrize("reserved", ["messages", "load-session"])
+    def test_route_rejects_reserved_agent_path(self, reserved):
+        with pytest.raises(ValueError, match=reserved):
+            route(f"/{reserved}")
+
+
+# ---------------------------------------------------------------------------
+# 5. Vercel UI Message Stream framing
+# ---------------------------------------------------------------------------
+
+
+async def _collect(aiter):
+    return [chunk async for chunk in aiter]
+
+
+def _sse_payload(chunk: str) -> str:
+    """The JSON body of one `data: <json>\\n\\n` SSE event."""
+    assert chunk.startswith("data: ") and chunk.endswith("\n\n")
+    return chunk[len("data: ") : -2]
+
+
+class TestVercelUIMessageStream:
+    def test_framing_wraps_each_part_and_appends_done(self):
+        async def parts():
+            yield {"type": "start", "messageMetadata": {"sessionId": "sess_1"}}
+            yield {"type": "text-delta", "id": "t1", "delta": "hi"}
+            yield {"type": "finish"}
+
+        chunks = asyncio.run(_collect(vercel_sse_stream(parts())))
+
+        # one SSE event per part, plus the terminal [DONE]
+        assert len(chunks) == 4
+        assert json.loads(_sse_payload(chunks[0])) == {
+            "type": "start",
+            "messageMetadata": {"sessionId": "sess_1"},
+        }
+        assert json.loads(_sse_payload(chunks[1])) == {
+            "type": "text-delta",
+            "id": "t1",
+            "delta": "hi",
+        }
+        assert chunks[-1] == "data: [DONE]\n\n"
+
+    def test_done_is_emitted_for_an_empty_stream(self):
+        async def parts():
+            return
+            yield  # pragma: no cover — makes this an async generator
+
+        chunks = asyncio.run(_collect(vercel_sse_stream(parts())))
+        assert chunks == ["data: [DONE]\n\n"]
+
+    def test_make_stream_response_vercel_sets_headers_and_media_type(self):
+        async def parts():
+            yield {"type": "start"}
+
+        response = WorkflowStreamingResponse(generator=lambda: parts())
+        res = _make_stream_response(response, "vercel")
+
+        assert res.media_type == "text/event-stream"
+        assert res.headers["x-vercel-ai-ui-message-stream"] == "v1"
+        assert res.headers["cache-control"] == "no-cache"
+        assert res.headers["x-accel-buffering"] == "no"

From 741fc7365b6ea450e658dc8ebce52205ecb213a3 Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Fri, 19 Jun 2026 22:10:54 +0200
Subject: [PATCH 2/4] fix(sdk): validate agent runner configuration

---
 sdks/python/agenta/sdk/agents/__init__.py     |  7 ++-
 .../sdk/agents/adapters/_runner_config.py     | 40 +++++++++++++
 .../agenta/sdk/agents/adapters/in_process.py  | 10 +++-
 .../agenta/sdk/agents/adapters/rivet.py       | 10 +++-
 sdks/python/agenta/sdk/agents/dtos.py         |  2 +-
 sdks/python/agenta/sdk/agents/errors.py       | 10 +++-
 .../unit/agents/test_dtos_agent_config.py     | 20 +++++++
 .../unit/agents/test_harness_adapters.py      |  2 +-
 .../unit/agents/test_runner_adapter_config.py | 60 +++++++++++++++++++
 9 files changed, 151 insertions(+), 10 deletions(-)
 create mode 100644 sdks/python/agenta/sdk/agents/adapters/_runner_config.py
 create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py

diff --git a/sdks/python/agenta/sdk/agents/__init__.py b/sdks/python/agenta/sdk/agents/__init__.py
index b1cd4370d2..6dc3bd3196 100644
--- a/sdks/python/agenta/sdk/agents/__init__.py
+++ b/sdks/python/agenta/sdk/agents/__init__.py
@@ -48,7 +48,11 @@
     TraceContext,
     to_messages,
 )
-from .errors import ToolResolutionError, UnsupportedHarnessError
+from .errors import (
+    AgentRunnerConfigurationError,
+    ToolResolutionError,
+    UnsupportedHarnessError,
+)
 from .interfaces import (
     Backend,
     Environment,
@@ -170,6 +174,7 @@
     "Environment",
     "Harness",
     # Errors
+    "AgentRunnerConfigurationError",
     "UnsupportedHarnessError",
     "ToolResolutionError",
     # Adapters
diff --git a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
new file mode 100644
index 0000000000..94398ae3f8
--- /dev/null
+++ b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
@@ -0,0 +1,40 @@
+"""Shared constructor validation for runner-backed adapters."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import List, Optional, Sequence
+
+from ..errors import AgentRunnerConfigurationError
+
+DEFAULT_RUNNER_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"]
+RUNNER_CLI_PATH = Path("src") / "cli.ts"
+
+
+def resolve_runner_command(
+    *,
+    backend_name: str,
+    url: Optional[str],
+    command: Optional[Sequence[str]],
+    cwd: Optional[str],
+) -> List[str]:
+    if url:
+        return list(command) if command is not None else list(DEFAULT_RUNNER_COMMAND)
+    if command is not None:
+        return list(command)
+    if not cwd:
+        raise AgentRunnerConfigurationError(
+            f"{backend_name} requires a runner transport: pass url for an HTTP runner, "
+            "pass command for a custom subprocess runner, or pass cwd pointing to a "
+            f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}."
+        )
+
+    cli_path = Path(cwd) / RUNNER_CLI_PATH
+    if not cli_path.is_file():
+        raise AgentRunnerConfigurationError(
+            f"{backend_name} could not find runner CLI at {cli_path}. Pass url for an "
+            "HTTP runner, pass command for a custom subprocess runner, or set cwd to a "
+            f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}."
+        )
+
+    return list(DEFAULT_RUNNER_COMMAND)
diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py
index bfd1528bd7..aef8d7bc64 100644
--- a/sdks/python/agenta/sdk/agents/adapters/in_process.py
+++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py
@@ -32,8 +32,7 @@
     request_to_wire,
     result_from_wire,
 )
-
-_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"]
+from ._runner_config import resolve_runner_command
 
 
 class InProcessSandbox(Sandbox):
@@ -127,7 +126,12 @@ def __init__(
         timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")),
     ) -> None:
         self._url = url
-        self._command: List[str] = list(command or _DEFAULT_COMMAND)
+        self._command: List[str] = resolve_runner_command(
+            backend_name=type(self).__name__,
+            url=url,
+            command=command,
+            cwd=cwd,
+        )
         self._cwd = cwd
         self._timeout = timeout
 
diff --git a/sdks/python/agenta/sdk/agents/adapters/rivet.py b/sdks/python/agenta/sdk/agents/adapters/rivet.py
index 2316eb0dea..78dbee0635 100644
--- a/sdks/python/agenta/sdk/agents/adapters/rivet.py
+++ b/sdks/python/agenta/sdk/agents/adapters/rivet.py
@@ -32,8 +32,7 @@
     request_to_wire,
     result_from_wire,
 )
-
-_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"]
+from ._runner_config import resolve_runner_command
 
 
 class RivetSandbox(Sandbox):
@@ -128,7 +127,12 @@ def __init__(
     ) -> None:
         self._sandbox = sandbox
         self._url = url
-        self._command: List[str] = list(command or _DEFAULT_COMMAND)
+        self._command: List[str] = resolve_runner_command(
+            backend_name=type(self).__name__,
+            url=url,
+            command=command,
+            cwd=cwd,
+        )
         self._cwd = cwd
         self._timeout = timeout
 
diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py
index 0a050b4cb1..d066eee132 100644
--- a/sdks/python/agenta/sdk/agents/dtos.py
+++ b/sdks/python/agenta/sdk/agents/dtos.py
@@ -677,7 +677,7 @@ def _parse_agent_fields(
             or agent.get("instructions")
             or defaults.instructions,
             agent.get("model") or defaults.model,
-            agent.get("tools"),
+            agent.get("tools") if agent.get("tools") is not None else defaults.tools,
         )
 
     prompt_cfg = params.get("prompt")
diff --git a/sdks/python/agenta/sdk/agents/errors.py b/sdks/python/agenta/sdk/agents/errors.py
index b9f136a472..7517df9061 100644
--- a/sdks/python/agenta/sdk/agents/errors.py
+++ b/sdks/python/agenta/sdk/agents/errors.py
@@ -7,7 +7,11 @@
 from .dtos import HarnessType
 from .tools.errors import ToolResolutionError
 
-__all__ = ["UnsupportedHarnessError", "ToolResolutionError"]
+__all__ = [
+    "AgentRunnerConfigurationError",
+    "UnsupportedHarnessError",
+    "ToolResolutionError",
+]
 
 if TYPE_CHECKING:
     from .interfaces import Backend
@@ -24,3 +28,7 @@ def __init__(self, harness: HarnessType, backend: "Backend") -> None:
         )
         self.harness = harness
         self.backend = backend
+
+
+class AgentRunnerConfigurationError(RuntimeError):
+    """Raised when a runner-backed adapter lacks a usable transport configuration."""
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py
index f4bacd92d4..e4cf65716e 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py
@@ -90,6 +90,26 @@ def test_from_params_falls_back_to_defaults():
     assert config.tools == [BuiltinToolConfig(name="d")]
 
 
+def test_from_params_agent_element_preserves_default_tools_when_absent():
+    config = AgentConfig.from_params(
+        {"agent": {"instructions": "I", "model": "M"}},
+        defaults=_DEFAULTS,
+    )
+
+    assert config.instructions == "I"
+    assert config.model == "M"
+    assert config.tools == [BuiltinToolConfig(name="d")]
+
+
+def test_from_params_agent_element_empty_tools_clears_defaults():
+    config = AgentConfig.from_params(
+        {"agent": {"tools": []}},
+        defaults=_DEFAULTS,
+    )
+
+    assert config.tools == []
+
+
 def test_from_params_coerces_single_tool_dict_to_list():
     config = AgentConfig.from_params({"agent": {"tools": {"name": "solo"}}})
     assert config.tools == [BuiltinToolConfig(name="solo")]
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
index 7e68d3af93..fe0eb52fbe 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
@@ -159,7 +159,7 @@ def test_agenta_passes_through_user_pi_options(make_env):
 def test_agenta_is_in_process_pi_supported():
     from agenta.sdk.agents import InProcessPiBackend
 
-    assert InProcessPiBackend().supports(HarnessType.AGENTA)
+    assert InProcessPiBackend(url="http://runner").supports(HarnessType.AGENTA)
 
 
 # ------------------------------------------------------------------------- Claude
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py
new file mode 100644
index 0000000000..f71863915e
--- /dev/null
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py
@@ -0,0 +1,60 @@
+"""Constructor validation for runner-backed backend adapters."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+from agenta.sdk.agents import (
+    AgentRunnerConfigurationError,
+    InProcessPiBackend,
+    RivetBackend,
+)
+
+
+@pytest.fixture
+def runner_dir(tmp_path: Path) -> Path:
+    cli = tmp_path / "src" / "cli.ts"
+    cli.parent.mkdir()
+    cli.write_text("console.log('runner')\n", encoding="utf-8")
+    return tmp_path
+
+
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+def test_default_subprocess_requires_cwd(backend_cls):
+    with pytest.raises(AgentRunnerConfigurationError, match="pass cwd"):
+        backend_cls()
+
+
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+def test_default_subprocess_requires_runner_cli(backend_cls, tmp_path: Path):
+    with pytest.raises(AgentRunnerConfigurationError, match="src/cli.ts"):
+        backend_cls(cwd=str(tmp_path))
+
+
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+def test_default_subprocess_accepts_runner_wrapper_cwd(backend_cls, runner_dir: Path):
+    backend = backend_cls(cwd=str(runner_dir))
+
+    assert backend._cwd == str(runner_dir)
+    assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"]
+
+
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+def test_http_transport_does_not_require_runner_wrapper(backend_cls):
+    backend = backend_cls(url="http://agent-pi:8765")
+
+    assert backend._url == "http://agent-pi:8765"
+    assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"]
+
+
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+def test_custom_command_does_not_require_runner_wrapper(backend_cls):
+    command = [sys.executable, "-m", "runner"]
+
+    backend = backend_cls(command=command)
+
+    assert backend._command == command
+    assert backend._cwd is None

From 2a7c1299b265d0248362c23c9ae91beb11a7c034 Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Mon, 22 Jun 2026 12:32:03 +0200
Subject: [PATCH 3/4] refactor(sdk): rename rivet adapter/backend to
 sandbox-agent

---
 sdks/python/agenta/__init__.py                |  2 +-
 sdks/python/agenta/sdk/agents/__init__.py     |  8 ++--
 .../agenta/sdk/agents/adapters/__init__.py    |  6 +--
 .../sdk/agents/adapters/_runner_config.py     |  4 +-
 .../agenta/sdk/agents/adapters/in_process.py  |  8 ++--
 .../agenta/sdk/agents/adapters/local.py       | 13 ++++--
 .../adapters/{rivet.py => sandbox_agent.py}   | 46 +++++++++++--------
 sdks/python/agenta/sdk/agents/dtos.py         |  2 +-
 sdks/python/agenta/sdk/agents/interfaces.py   |  2 +-
 .../agenta/sdk/agents/utils/ts_runner.py      |  2 +-
 sdks/python/agenta/sdk/agents/utils/wire.py   |  2 +-
 .../agents/golden/run_request.claude.json     |  2 +-
 .../unit/agents/test_harness_adapters.py      |  9 ++++
 .../unit/agents/test_runner_adapter_config.py | 16 +++----
 .../pytest/unit/agents/test_wire_contract.py  |  2 +-
 15 files changed, 72 insertions(+), 52 deletions(-)
 rename sdks/python/agenta/sdk/agents/adapters/{rivet.py => sandbox_agent.py} (78%)

diff --git a/sdks/python/agenta/__init__.py b/sdks/python/agenta/__init__.py
index dc01c3396a..15d1af84a4 100644
--- a/sdks/python/agenta/__init__.py
+++ b/sdks/python/agenta/__init__.py
@@ -63,7 +63,7 @@
     InProcessPiBackend,
     LocalBackend,
     PiHarness,
-    RivetBackend,
+    SandboxAgentBackend,
     RunSelection,
     SessionConfig,
     make_harness,
diff --git a/sdks/python/agenta/sdk/agents/__init__.py b/sdks/python/agenta/sdk/agents/__init__.py
index 6dc3bd3196..534ca0f650 100644
--- a/sdks/python/agenta/sdk/agents/__init__.py
+++ b/sdks/python/agenta/sdk/agents/__init__.py
@@ -5,7 +5,7 @@
 - ``dtos.py`` — data contracts (``AgentConfig``, ``SessionConfig``, ``Message``, ...).
 - ``interfaces.py`` — the ports (ABCs): ``Backend``, ``Environment``, ``Sandbox``,
   ``Session``, ``Harness``.
-- ``adapters/`` — implementations: ``RivetBackend`` / ``InProcessPiBackend`` / ``LocalBackend``
+- ``adapters/`` — implementations: ``SandboxAgentBackend`` / ``InProcessPiBackend`` / ``LocalBackend``
   and ``PiHarness`` / ``ClaudeHarness``.
 - ``utils/`` — shared plumbing (the ``/run`` wire and the transports to the TS runner).
 
@@ -16,7 +16,7 @@
 
     cfg = ag.ConfigManager.get_from_registry(app_slug="my-agent")
     agent = ag.AgentConfig.from_params(cfg)
-    harness = ag.PiHarness(ag.Environment(ag.RivetBackend()))
+    harness = ag.PiHarness(ag.Environment(ag.SandboxAgentBackend()))
     result = await harness.prompt(ag.SessionConfig(agent=agent), [Message(role="user", content="hi")])
 """
 
@@ -26,7 +26,7 @@
     InProcessPiBackend,
     LocalBackend,
     PiHarness,
-    RivetBackend,
+    SandboxAgentBackend,
     make_harness,
 )
 from .dtos import (
@@ -178,7 +178,7 @@
     "UnsupportedHarnessError",
     "ToolResolutionError",
     # Adapters
-    "RivetBackend",
+    "SandboxAgentBackend",
     "InProcessPiBackend",
     "LocalBackend",
     "PiHarness",
diff --git a/sdks/python/agenta/sdk/agents/adapters/__init__.py b/sdks/python/agenta/sdk/agents/adapters/__init__.py
index 30e555d82b..9cce3f7240 100644
--- a/sdks/python/agenta/sdk/agents/adapters/__init__.py
+++ b/sdks/python/agenta/sdk/agents/adapters/__init__.py
@@ -1,6 +1,6 @@
 """Adapters: concrete implementations of the agent runtime ports.
 
-- Backend adapters: ``RivetBackend`` (rivet over ACP), ``InProcessPiBackend`` (in-process Pi,
+- Backend adapters: ``SandboxAgentBackend`` (sandbox-agent over ACP), ``InProcessPiBackend`` (in-process Pi,
   the reference backend), ``LocalBackend`` (standalone SDK runs; not yet implemented).
 - Harness adapters: ``PiHarness``, ``ClaudeHarness``, ``AgentaHarness`` (+ ``make_harness``).
 - HTTP/browser protocol adapters live in subpackages, e.g. ``adapters.vercel``.
@@ -11,10 +11,10 @@
 from .harnesses import AgentaHarness, ClaudeHarness, PiHarness, make_harness
 from .in_process import InProcessPiBackend
 from .local import LocalBackend
-from .rivet import RivetBackend
+from .sandbox_agent import SandboxAgentBackend
 
 __all__ = [
-    "RivetBackend",
+    "SandboxAgentBackend",
     "InProcessPiBackend",
     "LocalBackend",
     "PiHarness",
diff --git a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
index 94398ae3f8..b3daab6e2e 100644
--- a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
+++ b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
@@ -26,7 +26,7 @@ def resolve_runner_command(
         raise AgentRunnerConfigurationError(
             f"{backend_name} requires a runner transport: pass url for an HTTP runner, "
             "pass command for a custom subprocess runner, or pass cwd pointing to a "
-            f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}."
+            f"runner directory containing {RUNNER_CLI_PATH.as_posix()}."
         )
 
     cli_path = Path(cwd) / RUNNER_CLI_PATH
@@ -34,7 +34,7 @@ def resolve_runner_command(
         raise AgentRunnerConfigurationError(
             f"{backend_name} could not find runner CLI at {cli_path}. Pass url for an "
             "HTTP runner, pass command for a custom subprocess runner, or set cwd to a "
-            f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}."
+            f"runner directory containing {RUNNER_CLI_PATH.as_posix()}."
         )
 
     return list(DEFAULT_RUNNER_COMMAND)
diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py
index aef8d7bc64..3a7b1a9110 100644
--- a/sdks/python/agenta/sdk/agents/adapters/in_process.py
+++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py
@@ -1,11 +1,11 @@
-"""InProcessPiBackend: drive Pi in-process through the TS runner, no rivet daemon.
+"""InProcessPiBackend: drive Pi in-process through the TS runner, no sandbox-agent daemon.
 
 This was the first backend implementation and stays as the simplest one: a single harness
 (Pi), a single place (local), the legacy in-process Pi engine (``engines/pi.ts``). It is the
 reference to read when writing a new backend.
 
 It is its own class and hard-codes its differences (the ``pi`` engine, Pi-only support,
-local-only). It is deliberately NOT a subclass of ``RivetBackend``; the two are different
+local-only). It is deliberately NOT a subclass of ``SandboxAgentBackend``; the two are different
 engines that happen to share the ``utils`` wire and transport helpers.
 """
 
@@ -111,7 +111,7 @@ def stream(self, messages: Sequence[Message]) -> AgentRun:
 
 class InProcessPiBackend(Backend):
     """The in-process Pi engine: drives the Pi SDK directly in the TS runner. Pi only, local
-    only, no rivet daemon."""
+    only, no sandbox-agent daemon."""
 
     # Agenta is Pi with an opinion: same in-process engine, so this backend drives it too.
     supported_harnesses = frozenset({HarnessType.PI, HarnessType.AGENTA})
@@ -123,7 +123,7 @@ def __init__(
         url: Optional[str] = None,
         command: Optional[Sequence[str]] = None,
         cwd: Optional[str] = None,
-        timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")),
+        timeout: float = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180")),
     ) -> None:
         self._url = url
         self._command: List[str] = resolve_runner_command(
diff --git a/sdks/python/agenta/sdk/agents/adapters/local.py b/sdks/python/agenta/sdk/agents/adapters/local.py
index 5435ea4751..d0c304c793 100644
--- a/sdks/python/agenta/sdk/agents/adapters/local.py
+++ b/sdks/python/agenta/sdk/agents/adapters/local.py
@@ -1,16 +1,21 @@
-"""LocalBackend: run a harness on this machine, no rivet daemon and no Agenta sidecar.
+"""LocalBackend: run a harness on this machine, no sandbox-agent daemon and no Agenta runner.
 
 This is the backend a standalone SDK user gets. It is two mechanisms, one per harness, which
 is exactly a backend's "plumbing per harness" job:
 
-- Pi   -> the bundled JS runner (the in-process Pi engine), shipped inside the wheel, run
-          with ``node``.
+- Pi   -> the Node agent runner (``services/agent``), driven over the subprocess transport.
 - Claude -> the pure-Python ``claude-agent-sdk``, in-process, no TS bridge.
 
+NOTE on packaging: the Node runner is NOT part of this Python wheel (``pip install agenta``
+stays pure Python; the wheel contains zero ``.ts``/``.js``). How a standalone Pi user obtains
+the runner -- an ``npx`` npm package, a local checkout, or a Docker sidecar over HTTP -- is an
+open distribution decision; see ``docs/design/agent-workflows/typescript-structure/``. Do NOT
+silently bundle a JS runner into the wheel.
+
 NOT YET IMPLEMENTED. Tracked as Phase 3 (Pi) and Phase 4 (Claude) in
 ``docs/design/agent-workflows/scratch/sdk-local-backend/plan.md``. The class is present so
 the adapter layout is complete and the port shape is visible; the methods raise until the
-bundling build step and the ``claude-agent-sdk`` wiring land.
+runner-delivery decision and the ``claude-agent-sdk`` wiring land.
 """
 
 from __future__ import annotations
diff --git a/sdks/python/agenta/sdk/agents/adapters/rivet.py b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py
similarity index 78%
rename from sdks/python/agenta/sdk/agents/adapters/rivet.py
rename to sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py
index 78dbee0635..5fbd7898eb 100644
--- a/sdks/python/agenta/sdk/agents/adapters/rivet.py
+++ b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py
@@ -1,9 +1,11 @@
-"""RivetBackend: drive a harness over ACP via the TypeScript rivet runner.
+"""SandboxAgentBackend: drive a harness over ACP via the TypeScript sandbox-agent runner.
 
-This backend hard-codes that it is the rivet engine. It reaches the same runner the deployed
+This backend hard-codes that it is the sandbox-agent engine. It reaches the same runner the deployed
 sidecar runs (HTTP when a ``url`` is set, otherwise a subprocess CLI), and the runner starts
-the rivet daemon, the ACP adapter, and the harness. Supports Pi and Claude. The ``sandbox``
-axis (``local`` / ``daytona``) is a real runtime choice, so it stays a constructor arg.
+the sandbox-agent daemon, the ACP adapter, and the harness. Supports Pi, Claude, and Agenta (Pi with
+an opinion, which the runner drives on the same ``pi`` ACP agent plus forced skills). The
+``sandbox`` axis (``local`` / ``daytona``) is a real runtime choice, so it stays a constructor
+arg.
 
 It is its own class, not a subclass of any other backend; it shares only the ``utils`` wire
 and transport helpers.
@@ -35,7 +37,7 @@
 from ._runner_config import resolve_runner_command
 
 
-class RivetSandbox(Sandbox):
+class SandboxAgentSandbox(Sandbox):
     """Carries the sandbox axis for the run. The real sandbox (a local daemon or a Daytona
     VM) is created inside the TS runner; here we hold the axis and buffer provisioning files
     (today AGENTS.md rides the wire, so this is informational)."""
@@ -48,13 +50,13 @@ async def add_files(self, files: Mapping[str, bytes]) -> None:
         self.files.update(files)
 
 
-class RivetSession(Session):
+class SandboxAgentSession(Session):
     """One turn-per-prompt session. Each prompt sends one ``/run`` (cold + replay)."""
 
     def __init__(
         self,
-        backend: "RivetBackend",
-        sandbox: RivetSandbox,
+        backend: "SandboxAgentBackend",
+        sandbox: SandboxAgentSandbox,
         config: HarnessAgentConfig,
         *,
         harness: HarnessType,
@@ -77,7 +79,7 @@ def id(self) -> Optional[str]:
     def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]:
         """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``)."""
         return request_to_wire(
-            engine=RivetBackend._ENGINE,
+            engine=SandboxAgentBackend._ENGINE,
             harness=self._harness,
             sandbox=self._sandbox.sandbox_id,
             config=self._config,
@@ -110,11 +112,13 @@ def stream(self, messages: Sequence[Message]) -> AgentRun:
         return AgentRun(records).on_result(self._absorb_result)
 
 
-class RivetBackend(Backend):
-    """The rivet engine: a harness over ACP through the TS runner. Pi and Claude."""
+class SandboxAgentBackend(Backend):
+    """The sandbox-agent engine: a harness over ACP through the TS runner. Pi, Claude, and Agenta."""
 
-    supported_harnesses = frozenset({HarnessType.PI, HarnessType.CLAUDE})
-    _ENGINE = "rivet"  # hard-coded engine identity, not a constructor arg
+    supported_harnesses = frozenset(
+        {HarnessType.PI, HarnessType.CLAUDE, HarnessType.AGENTA}
+    )
+    _ENGINE = "sandbox-agent"  # hard-coded engine identity, not a constructor arg
 
     def __init__(
         self,
@@ -123,7 +127,7 @@ def __init__(
         url: Optional[str] = None,
         command: Optional[Sequence[str]] = None,
         cwd: Optional[str] = None,
-        timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")),
+        timeout: float = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180")),
     ) -> None:
         self._sandbox = sandbox
         self._url = url
@@ -136,8 +140,8 @@ def __init__(
         self._cwd = cwd
         self._timeout = timeout
 
-    async def create_sandbox(self) -> RivetSandbox:
-        return RivetSandbox(self._sandbox)
+    async def create_sandbox(self) -> SandboxAgentSandbox:
+        return SandboxAgentSandbox(self._sandbox)
 
     async def create_session(
         self,
@@ -148,10 +152,12 @@ async def create_session(
         secrets: Optional[Mapping[str, str]] = None,
         trace: Optional[TraceContext] = None,
         session_id: Optional[str] = None,
-    ) -> RivetSession:
-        if not isinstance(sandbox, RivetSandbox):
-            raise TypeError("RivetBackend.create_session requires a RivetSandbox")
-        return RivetSession(
+    ) -> SandboxAgentSession:
+        if not isinstance(sandbox, SandboxAgentSandbox):
+            raise TypeError(
+                "SandboxAgentBackend.create_session requires a SandboxAgentSandbox"
+            )
+        return SandboxAgentSession(
             self,
             sandbox,
             config,
diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py
index d066eee132..db089eec67 100644
--- a/sdks/python/agenta/sdk/agents/dtos.py
+++ b/sdks/python/agenta/sdk/agents/dtos.py
@@ -56,7 +56,7 @@ def coerce(cls, value: "HarnessType | str") -> "HarnessType":
 
 
 class HarnessCapabilities(BaseModel):
-    """What a harness can do, probed by the backend (rivet ``AgentCapabilities``).
+    """What a harness can do, probed by the sandbox-agent backend.
 
     Adapters branch on these flags rather than the harness name (no ``if pi``): deliver
     tools over MCP only when ``mcp_tools`` is set, skip image blocks without ``images``.
diff --git a/sdks/python/agenta/sdk/agents/interfaces.py b/sdks/python/agenta/sdk/agents/interfaces.py
index a7df7280d5..75c9858d22 100644
--- a/sdks/python/agenta/sdk/agents/interfaces.py
+++ b/sdks/python/agenta/sdk/agents/interfaces.py
@@ -4,7 +4,7 @@
 
 - ``Backend`` is the engine. It declares which harnesses it can drive
   (``supported_harnesses``), owns sandbox + session lifecycle, and is pure plumbing: it
-  takes an already-harness-shaped config and launches it. Adapters: ``RivetBackend``,
+  takes an already-harness-shaped config and launches it. Adapters: ``SandboxAgentBackend``,
   ``InProcessPiBackend``, ``LocalBackend``.
 - ``Sandbox`` is where a session's process tree lives, plus the provisioning verb
   (``add_files``).
diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
index f7a5497d1c..b95f708ba6 100644
--- a/sdks/python/agenta/sdk/agents/utils/ts_runner.py
+++ b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
@@ -11,7 +11,7 @@
 import os
 from typing import Any, AsyncIterator, Dict, Optional, Sequence
 
-_DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180"))
+_DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180"))
 
 
 async def deliver_http(
diff --git a/sdks/python/agenta/sdk/agents/utils/wire.py b/sdks/python/agenta/sdk/agents/utils/wire.py
index b7558a4530..1b203ed287 100644
--- a/sdks/python/agenta/sdk/agents/utils/wire.py
+++ b/sdks/python/agenta/sdk/agents/utils/wire.py
@@ -1,6 +1,6 @@
 """The ``/run`` wire contract: our DTOs <-> the runner's camelCase JSON.
 
-Shared by the runner-backed adapters (rivet, in-process Pi). The TS side mirrors these names
+Shared by the runner-backed adapters (sandbox-agent, in-process Pi). The TS side mirrors these names
 in ``services/agent/src/protocol.ts``, and the contract is pinned by shared golden fixtures
 under ``sdks/python/oss/tests/pytest/unit/agents/golden/`` (see ``test_wire_contract.py``).
 The caller passes the engine id explicitly, since each adapter hard-codes its own.
diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json
index 318722efe5..14944896fb 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json
+++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json
@@ -1,5 +1,5 @@
 {
-  "backend": "rivet",
+  "backend": "sandbox-agent",
   "harness": "claude",
   "sandbox": "local",
   "sessionId": null,
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
index fe0eb52fbe..0b3b64ad43 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py
@@ -162,6 +162,15 @@ def test_agenta_is_in_process_pi_supported():
     assert InProcessPiBackend(url="http://runner").supports(HarnessType.AGENTA)
 
 
+def test_agenta_is_sandbox_agent_supported():
+    # Agenta is Pi with an opinion, so the sandbox-agent backend drives it too (on the `pi` ACP
+    # agent, with the runner laying the forced skills into the sandbox). This is what lets
+    # `agenta` run on a non-local sandbox (e.g. daytona) instead of raising.
+    from agenta.sdk.agents import SandboxAgentBackend
+
+    assert SandboxAgentBackend(url="http://runner").supports(HarnessType.AGENTA)
+
+
 # ------------------------------------------------------------------------- Claude
 
 
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py
index f71863915e..b60575fc8c 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py
@@ -10,7 +10,7 @@
 from agenta.sdk.agents import (
     AgentRunnerConfigurationError,
     InProcessPiBackend,
-    RivetBackend,
+    SandboxAgentBackend,
 )
 
 
@@ -22,19 +22,19 @@ def runner_dir(tmp_path: Path) -> Path:
     return tmp_path
 
 
-@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend])
 def test_default_subprocess_requires_cwd(backend_cls):
     with pytest.raises(AgentRunnerConfigurationError, match="pass cwd"):
         backend_cls()
 
 
-@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend])
 def test_default_subprocess_requires_runner_cli(backend_cls, tmp_path: Path):
     with pytest.raises(AgentRunnerConfigurationError, match="src/cli.ts"):
         backend_cls(cwd=str(tmp_path))
 
 
-@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend])
 def test_default_subprocess_accepts_runner_wrapper_cwd(backend_cls, runner_dir: Path):
     backend = backend_cls(cwd=str(runner_dir))
 
@@ -42,15 +42,15 @@ def test_default_subprocess_accepts_runner_wrapper_cwd(backend_cls, runner_dir:
     assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"]
 
 
-@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend])
 def test_http_transport_does_not_require_runner_wrapper(backend_cls):
-    backend = backend_cls(url="http://agent-pi:8765")
+    backend = backend_cls(url="http://sandbox-agent:8765")
 
-    assert backend._url == "http://agent-pi:8765"
+    assert backend._url == "http://sandbox-agent:8765"
     assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"]
 
 
-@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend])
+@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend])
 def test_custom_command_does_not_require_runner_wrapper(backend_cls):
     command = [sys.executable, "-m", "runner"]
 
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
index 4aa24a86b1..c7f9497495 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
@@ -96,7 +96,7 @@ def _claude_payload():
         permission_policy="deny",
     )
     return request_to_wire(
-        engine="rivet",
+        engine="sandbox-agent",
         harness=HarnessType.CLAUDE,
         sandbox="local",
         config=config,

From 0beb1207f7b5e8837dac38b74feb58abe54fc068 Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Mon, 22 Jun 2026 14:16:26 +0200
Subject: [PATCH 4/4] fix(sdk): address review feedback (locking, input
 validation, stream/error handling)

---
 .../sdk/agents/adapters/_runner_config.py     | 18 +++++++++--
 .../agenta/sdk/agents/adapters/in_process.py  |  5 +++-
 .../sdk/agents/adapters/sandbox_agent.py      |  7 ++++-
 .../sdk/agents/adapters/vercel/routing.py     | 12 ++++++++
 sdks/python/agenta/sdk/agents/dtos.py         | 18 +++++++----
 sdks/python/agenta/sdk/agents/interfaces.py   |  6 +++-
 sdks/python/agenta/sdk/agents/mcp/models.py   |  8 +++++
 sdks/python/agenta/sdk/agents/streaming.py    |  8 +++++
 sdks/python/agenta/sdk/agents/tools/compat.py |  7 ++++-
 .../agenta/sdk/agents/utils/ts_runner.py      | 30 ++++++++++++++++---
 .../pytest/unit/agents/tools/test_parsing.py  | 25 ++++++++++++++++
 .../pytest/utils/test_messages_endpoint.py    | 14 +++++++--
 12 files changed, 141 insertions(+), 17 deletions(-)

diff --git a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
index b3daab6e2e..a8b01531e6 100644
--- a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
+++ b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py
@@ -18,10 +18,24 @@ def resolve_runner_command(
     command: Optional[Sequence[str]],
     cwd: Optional[str],
 ) -> List[str]:
+    def _validated_command(raw: Sequence[str]) -> List[str]:
+        cmd = list(raw)
+        if not cmd:
+            raise AgentRunnerConfigurationError(
+                f"{backend_name} received an empty command. Pass a non-empty command, "
+                "pass url for an HTTP runner, or set cwd to a runner directory containing "
+                f"{RUNNER_CLI_PATH.as_posix()}."
+            )
+        return cmd
+
     if url:
-        return list(command) if command is not None else list(DEFAULT_RUNNER_COMMAND)
+        return (
+            _validated_command(command)
+            if command is not None
+            else list(DEFAULT_RUNNER_COMMAND)
+        )
     if command is not None:
-        return list(command)
+        return _validated_command(command)
     if not cwd:
         raise AgentRunnerConfigurationError(
             f"{backend_name} requires a runner transport: pass url for an HTTP runner, "
diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py
index 3a7b1a9110..114d0aa79f 100644
--- a/sdks/python/agenta/sdk/agents/adapters/in_process.py
+++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py
@@ -54,12 +54,14 @@ def __init__(
         backend: "InProcessPiBackend",
         config: HarnessAgentConfig,
         *,
+        harness: HarnessType,
         secrets: Optional[Mapping[str, str]],
         trace: Optional[TraceContext],
         session_id: Optional[str],
     ) -> None:
         self._backend = backend
         self._config = config
+        self._harness = harness
         self._secrets = dict(secrets or {})
         self._trace = trace
         self._session_id = session_id
@@ -72,7 +74,7 @@ def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]:
         """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``)."""
         return request_to_wire(
             engine=InProcessPiBackend._ENGINE,
-            harness=HarnessType.PI,
+            harness=self._harness,
             sandbox="local",
             config=self._config,
             messages=messages,
@@ -151,6 +153,7 @@ async def create_session(
         return InProcessPiSession(
             self,
             config,
+            harness=harness,
             secrets=secrets,
             trace=trace,
             session_id=session_id,
diff --git a/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py
index 5fbd7898eb..24f0f84781 100644
--- a/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py
+++ b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py
@@ -13,6 +13,7 @@
 
 from __future__ import annotations
 
+import logging
 import os
 from typing import Any, AsyncIterator, Dict, List, Mapping, Optional, Sequence
 
@@ -36,6 +37,8 @@
 )
 from ._runner_config import resolve_runner_command
 
+_log = logging.getLogger(__name__)
+
 
 class SandboxAgentSandbox(Sandbox):
     """Carries the sandbox axis for the run. The real sandbox (a local daemon or a Daytona
@@ -193,4 +196,6 @@ def _emit_events(result: AgentResult, on_event: Optional[EventSink]) -> None:
         try:
             on_event(event)
         except Exception:  # pylint: disable=broad-except
-            pass
+            # The sink is caller-provided; don't let it crash the result. Log at debug so a
+            # misbehaving sink is still diagnosable.
+            _log.debug("event sink raised; suppressing", exc_info=True)
diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py
index a854ca0460..5f7c2cc37f 100644
--- a/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py
+++ b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py
@@ -156,6 +156,18 @@ def make_load_session_endpoint(
     store = session_store or NoopSessionStore()
 
     async def load_session_endpoint(req: Request, request: LoadSessionRequest):
+        # Gate the id with the same charset/length bound as ``/messages`` before it reaches
+        # the store, so both endpoints share one trust boundary. Unlike ``/messages`` we never
+        # mint here: loading needs an existing id, so an absent/invalid one is a 400.
+        if not _SESSION_ID_RE.match(request.session_id or ""):
+            return set_vercel_message_protocol_headers(
+                JSONResponse(
+                    status_code=400,
+                    content={
+                        "detail": "session_id violates the allowed charset/length"
+                    },
+                )
+            )
         messages = await store.load(request.session_id)
         response = LoadSessionResponse(
             session_id=request.session_id,
diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py
index db089eec67..44629c3bb9 100644
--- a/sdks/python/agenta/sdk/agents/dtos.py
+++ b/sdks/python/agenta/sdk/agents/dtos.py
@@ -11,7 +11,7 @@
 from __future__ import annotations
 
 from enum import Enum
-from typing import Any, Callable, ClassVar, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, ClassVar, Dict, List, Literal, Optional, Tuple, Union
 
 from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator
 
@@ -47,7 +47,7 @@ def coerce(cls, value: "HarnessType | str") -> "HarnessType":
 
 # Permission policy for harness tool use in a headless run. ``auto`` approves (tools are
 # backend-resolved and trusted, no human to prompt); ``deny`` rejects.
-PermissionPolicy = str  # "auto" | "deny"
+PermissionPolicy = Literal["auto", "deny"]
 
 
 # ---------------------------------------------------------------------------
@@ -180,10 +180,18 @@ def from_raw(cls, raw: Any) -> "ContentBlock":
 
 
 class Message(BaseModel):
-    """A chat message in the conversation. ``content`` is text or content blocks.
+    """A chat message in an agent-runtime conversation. ``content`` is text or content blocks.
 
-    This is the runtime's own message type, distinct from the SDK's prompt ``Message``
-    (``agenta.Message``); the two serve different layers.
+    Two unrelated types share the name ``Message`` in this SDK, on purpose, for two layers:
+
+    - this one — the agent runtime's conversation message, imported from
+      ``agenta.sdk.agents`` (it is deliberately *not* re-exported as ``agenta.Message``);
+    - the prompt-template message ``agenta.Message`` (``agenta.sdk.utils.types.Message``),
+      used by the prompt/completion layer.
+
+    They never appear together in the same call, so the namespacing (top-level vs.
+    ``agenta.sdk.agents``) is what keeps them apart. Import the agents one explicitly when you
+    need both in one module.
     """
 
     role: str
diff --git a/sdks/python/agenta/sdk/agents/interfaces.py b/sdks/python/agenta/sdk/agents/interfaces.py
index 75c9858d22..e03fb646a6 100644
--- a/sdks/python/agenta/sdk/agents/interfaces.py
+++ b/sdks/python/agenta/sdk/agents/interfaces.py
@@ -17,6 +17,7 @@
 
 from __future__ import annotations
 
+import asyncio
 from abc import ABC, abstractmethod
 from typing import ClassVar, FrozenSet, Mapping, Optional, Sequence
 
@@ -185,6 +186,7 @@ def __init__(self, backend: Backend, *, sandbox_per_session: bool = True) -> Non
         self._backend = backend
         self._sandbox_per_session = sandbox_per_session
         self._shared: Optional[Sandbox] = None
+        self._shared_lock = asyncio.Lock()
 
     @property
     def backend(self) -> Backend:
@@ -203,7 +205,9 @@ async def _sandbox(self) -> Sandbox:
         if self._sandbox_per_session:
             return await self._backend.create_sandbox()
         if self._shared is None:
-            self._shared = await self._backend.create_sandbox()
+            async with self._shared_lock:
+                if self._shared is None:
+                    self._shared = await self._backend.create_sandbox()
         return self._shared
 
     async def create_session(
diff --git a/sdks/python/agenta/sdk/agents/mcp/models.py b/sdks/python/agenta/sdk/agents/mcp/models.py
index e4df7f87e5..37c3f6806b 100644
--- a/sdks/python/agenta/sdk/agents/mcp/models.py
+++ b/sdks/python/agenta/sdk/agents/mcp/models.py
@@ -39,6 +39,14 @@ class ResolvedMCPServer(BaseModel):
     url: Optional[str] = None
     tools: List[str] = Field(default_factory=list)
 
+    @model_validator(mode="after")
+    def _validate_transport(self) -> "ResolvedMCPServer":
+        if self.transport == "stdio" and not self.command:
+            raise ValueError("stdio MCP server requires command")
+        if self.transport == "http" and not self.url:
+            raise ValueError("http MCP server requires url")
+        return self
+
     def to_wire(self) -> Dict[str, Any]:
         wire: Dict[str, Any] = {
             "name": self.name,
diff --git a/sdks/python/agenta/sdk/agents/streaming.py b/sdks/python/agenta/sdk/agents/streaming.py
index e631d0ecdc..2ae86ea6fc 100644
--- a/sdks/python/agenta/sdk/agents/streaming.py
+++ b/sdks/python/agenta/sdk/agents/streaming.py
@@ -62,6 +62,7 @@ def on_cleanup(self, cleanup: Cleanup) -> "AgentRun":
         return self
 
     async def __aiter__(self) -> AsyncIterator[AgentEvent]:
+        saw_terminal = False
         try:
             async for record in self._records:
                 kind = record.get("kind")
@@ -74,7 +75,14 @@ async def __aiter__(self) -> AsyncIterator[AgentEvent]:
                     self._result = result_from_wire(record.get("result") or {})
                     for hook in self._result_hooks:
                         hook(self._result)
+                    saw_terminal = True
                     return
+            if not saw_terminal:
+                # A truncated stream (runner disconnect/early exit) would otherwise leave
+                # ``result()`` raising an opaque "not available" later; fail loud here instead.
+                raise RuntimeError(
+                    "AgentRun stream ended without a terminal result record"
+                )
         finally:
             for cleanup in self._cleanups:
                 try:
diff --git a/sdks/python/agenta/sdk/agents/tools/compat.py b/sdks/python/agenta/sdk/agents/tools/compat.py
index e356abfdde..d2ddf16b4b 100644
--- a/sdks/python/agenta/sdk/agents/tools/compat.py
+++ b/sdks/python/agenta/sdk/agents/tools/compat.py
@@ -51,7 +51,9 @@ def _copy_tool_metadata(
 ) -> dict[str, Any]:
     result = dict(target)
     if "needs_approval" in source:
-        result["needs_approval"] = bool(source["needs_approval"])
+        # Pass the raw value through; the model's bool field coerces it correctly. Using
+        # ``bool(...)`` here would flip legacy string payloads (``"false"`` -> ``True``).
+        result["needs_approval"] = source["needs_approval"]
     if isinstance(source.get("render"), dict):
         result["render"] = dict(source["render"])
     return result
@@ -102,6 +104,9 @@ def coerce_tool_configs(
     on_error: Literal["raise", "collect"] = "raise",
 ) -> ToolConfigParseResult:
     """Convert legacy values, either raising or returning structured diagnostics."""
+    if on_error not in {"raise", "collect"}:
+        raise ValueError("on_error must be 'raise' or 'collect'")
+
     tool_configs: list[ToolConfig] = []
     diagnostics: list[ToolConfigDiagnostic] = []
     for index, value in enumerate(values or []):
diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
index b95f708ba6..590f47cd1c 100644
--- a/sdks/python/agenta/sdk/agents/utils/ts_runner.py
+++ b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
@@ -26,7 +26,9 @@ async def deliver_http(
     url = base_url.rstrip("/") + "/run"
     async with httpx.AsyncClient(timeout=timeout) as client:
         response = await client.post(url, json=payload)
-    if response.status_code >= 500:
+    # Any non-2xx is a transport failure; 4xx left to fall through surfaces as an opaque
+    # JSON parse error instead of a clear runner failure.
+    if response.status_code >= 400:
         raise RuntimeError(
             f"Agent runner HTTP {response.status_code}: {response.text[:1000]}"
         )
@@ -101,11 +103,12 @@ async def deliver_http_stream(
 
     url = base_url.rstrip("/") + "/run"
     headers = {"Accept": "application/x-ndjson"}
+    saw_result = False
     async with httpx.AsyncClient(timeout=timeout) as client:
         async with client.stream(
             "POST", url, json=payload, headers=headers
         ) as response:
-            if response.status_code >= 500:
+            if response.status_code >= 400:
                 body = await response.aread()
                 raise RuntimeError(
                     f"Agent runner HTTP {response.status_code}: {body[:1000]!r}"
@@ -113,7 +116,12 @@ async def deliver_http_stream(
             async for line in response.aiter_lines():
                 line = line.strip()
                 if line:
-                    yield json.loads(line)
+                    record = json.loads(line)
+                    if record.get("kind") == "result":
+                        saw_result = True
+                    yield record
+    if not saw_result:
+        raise RuntimeError("Agent runner stream ended without a terminal result record")
 
 
 async def deliver_subprocess_stream(
@@ -143,6 +151,7 @@ async def deliver_subprocess_stream(
     proc.stdin.close()
     loop = asyncio.get_event_loop()
     deadline = loop.time() + timeout
+    saw_result = False
     try:
         while True:
             remaining = deadline - loop.time()
@@ -155,8 +164,21 @@ async def deliver_subprocess_stream(
                 break
             line = raw.decode("utf-8", "replace").strip()
             if line:
-                yield json.loads(line)
+                record = json.loads(line)
+                if record.get("kind") == "result":
+                    saw_result = True
+                yield record
         await proc.wait()
+        # A clean drain that never produced a terminal result means the runner exited or
+        # disconnected early; fail loud rather than leaving the consumer without a result.
+        if not saw_result:
+            err = b""
+            if proc.stderr is not None:
+                err = await proc.stderr.read()
+            raise RuntimeError(
+                "Agent runner stream ended without a terminal result record. "
+                f"exit={proc.returncode} stderr={err.decode('utf-8', 'replace')[-2000:]}"
+            )
     finally:
         if proc.returncode is None:
             proc.kill()
diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py
index ff6f212f9f..2f707a7ab5 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py
@@ -45,6 +45,31 @@ def test_compat_parser_accepts_playground_gateway_slug_and_metadata():
     assert gateway.render == {"kind": "component", "component": "User"}
 
 
+def test_compat_parser_does_not_flip_string_false_needs_approval():
+    # Legacy payloads may carry the flag as the string "false"; it must not coerce to True
+    # (a plain ``bool("false")`` would).
+    gateway = coerce_tool_config(
+        {
+            "function": {"name": "tools__composio__github__GET_USER__c1"},
+            "needs_approval": "false",
+        }
+    )
+    assert gateway.needs_approval is False
+
+    approved = coerce_tool_config(
+        {
+            "function": {"name": "tools__composio__github__GET_USER__c1"},
+            "needs_approval": "true",
+        }
+    )
+    assert approved.needs_approval is True
+
+
+def test_coerce_tool_configs_rejects_invalid_on_error():
+    with pytest.raises(ValueError):
+        coerce_tool_configs(["read"], on_error="bogus")  # type: ignore[arg-type]
+
+
 def test_collect_mode_reports_invalid_entries():
     result = coerce_tool_configs(
         ["read", {"invalid": True}, None],
diff --git a/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py
index 89a06d6783..4ade145a1c 100644
--- a/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py
+++ b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py
@@ -193,8 +193,18 @@ def test_messages_sse_streams_with_done_and_session_in_start(client):
     _assert_vercel_message_protocol(res)
     assert res.headers["x-vercel-ai-ui-message-stream"] == "v1"
     text = res.text
-    assert '"sessionId": "sess_abc"' in text  # stamped onto the start part
-    assert '"type": "text-delta"' in text
+    # Parse the SSE payloads so the check survives serializer formatting changes (whitespace,
+    # key order) rather than matching a literal JSON substring.
+    payloads = [
+        json.loads(line.removeprefix("data: "))
+        for line in text.splitlines()
+        if line.startswith("data: ") and line != "data: [DONE]"
+    ]
+    start = next(p for p in payloads if p.get("type") == "start")
+    assert (
+        start["messageMetadata"]["sessionId"] == "sess_abc"
+    )  # stamped onto the start part
+    assert any(p.get("type") == "text-delta" for p in payloads)
     assert "data: [DONE]" in text