From b9e62f99aa2fe7665bc4ddff0dd821ebae3d53bd Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Fri, 19 Jun 2026 18:27:53 +0200 Subject: [PATCH 1/4] feat(sdk): agent runtime ports, adapters, tool resolution, and messages protocol --- sdks/python/agenta/__init__.py | 17 + sdks/python/agenta/sdk/agents/__init__.py | 183 +++++ .../agenta/sdk/agents/adapters/__init__.py | 24 + .../sdk/agents/adapters/agenta_builtins.py | 90 +++ .../agenta/sdk/agents/adapters/harnesses.py | 150 ++++ .../agenta/sdk/agents/adapters/in_process.py | 170 +++++ .../agenta/sdk/agents/adapters/local.py | 48 ++ .../agenta/sdk/agents/adapters/rivet.py | 186 +++++ .../sdk/agents/adapters/vercel/__init__.py | 43 ++ .../sdk/agents/adapters/vercel/messages.py | 219 ++++++ .../sdk/agents/adapters/vercel/routing.py | 209 ++++++ .../agenta/sdk/agents/adapters/vercel/sse.py | 25 + .../sdk/agents/adapters/vercel/stream.py | 216 ++++++ sdks/python/agenta/sdk/agents/dtos.py | 698 ++++++++++++++++++ sdks/python/agenta/sdk/agents/errors.py | 26 + sdks/python/agenta/sdk/agents/interfaces.py | 317 ++++++++ sdks/python/agenta/sdk/agents/mcp/__init__.py | 22 + sdks/python/agenta/sdk/agents/mcp/errors.py | 33 + .../agenta/sdk/agents/mcp/interfaces.py | 10 + sdks/python/agenta/sdk/agents/mcp/models.py | 57 ++ sdks/python/agenta/sdk/agents/mcp/parsing.py | 39 + sdks/python/agenta/sdk/agents/mcp/resolver.py | 68 ++ sdks/python/agenta/sdk/agents/mcp/wire.py | 17 + sdks/python/agenta/sdk/agents/streaming.py | 91 +++ .../agenta/sdk/agents/tools/__init__.py | 75 ++ sdks/python/agenta/sdk/agents/tools/compat.py | 132 ++++ sdks/python/agenta/sdk/agents/tools/errors.py | 82 ++ .../agenta/sdk/agents/tools/interfaces.py | 20 + sdks/python/agenta/sdk/agents/tools/models.py | 221 ++++++ .../python/agenta/sdk/agents/tools/parsing.py | 39 + .../agenta/sdk/agents/tools/resolver.py | 177 +++++ sdks/python/agenta/sdk/agents/tools/wire.py | 15 + sdks/python/agenta/sdk/agents/ui_messages.py | 18 + .../agenta/sdk/agents/utils/__init__.py | 19 + .../agenta/sdk/agents/utils/ts_runner.py | 163 ++++ sdks/python/agenta/sdk/agents/utils/wire.py | 91 +++ sdks/python/agenta/sdk/decorators/routing.py | 62 +- .../agenta/sdk/engines/running/interfaces.py | 43 ++ .../agenta/sdk/engines/running/utils.py | 25 +- .../sdk/middlewares/running/normalizer.py | 10 +- sdks/python/agenta/sdk/models/workflows.py | 33 + sdks/python/agenta/sdk/utils/types.py | 80 ++ .../agenta/tests/agents/test_streaming.py | 167 +++++ .../pytest/integration/agents/__init__.py | 1 + .../agents/test_transport_roundtrip.py | 113 +++ .../oss/tests/pytest/unit/agents/__init__.py | 1 + .../oss/tests/pytest/unit/agents/conftest.py | 198 +++++ .../agents/golden/run_request.claude.json | 28 + .../unit/agents/golden/run_request.pi.json | 36 + .../unit/agents/golden/run_result.error.json | 4 + .../unit/agents/golden/run_result.ok.json | 31 + .../tests/pytest/unit/agents/mcp/__init__.py | 1 + .../pytest/unit/agents/mcp/test_resolver.py | 76 ++ .../unit/agents/test_dtos_agent_config.py | 155 ++++ .../agents/test_dtos_capabilities_events.py | 81 ++ .../unit/agents/test_dtos_content_blocks.py | 90 +++ .../unit/agents/test_dtos_harness_configs.py | 99 +++ .../unit/agents/test_environment_lifecycle.py | 127 ++++ .../unit/agents/test_harness_adapters.py | 273 +++++++ .../pytest/unit/agents/test_ui_messages.py | 430 +++++++++++ .../pytest/unit/agents/test_wire_contract.py | 301 ++++++++ .../pytest/unit/agents/tools/__init__.py | 1 + .../pytest/unit/agents/tools/test_models.py | 63 ++ .../pytest/unit/agents/tools/test_parsing.py | 60 ++ .../pytest/unit/agents/tools/test_resolver.py | 131 ++++ .../unit/test_normalizer_passthrough.py | 30 + .../pytest/utils/test_messages_endpoint.py | 284 +++++++ .../oss/tests/pytest/utils/test_routing.py | 121 +++ 68 files changed, 7154 insertions(+), 11 deletions(-) create mode 100644 sdks/python/agenta/sdk/agents/__init__.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/__init__.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/harnesses.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/in_process.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/local.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/rivet.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/messages.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/routing.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/sse.py create mode 100644 sdks/python/agenta/sdk/agents/adapters/vercel/stream.py create mode 100644 sdks/python/agenta/sdk/agents/dtos.py create mode 100644 sdks/python/agenta/sdk/agents/errors.py create mode 100644 sdks/python/agenta/sdk/agents/interfaces.py create mode 100644 sdks/python/agenta/sdk/agents/mcp/__init__.py create mode 100644 sdks/python/agenta/sdk/agents/mcp/errors.py create mode 100644 sdks/python/agenta/sdk/agents/mcp/interfaces.py create mode 100644 sdks/python/agenta/sdk/agents/mcp/models.py create mode 100644 sdks/python/agenta/sdk/agents/mcp/parsing.py create mode 100644 sdks/python/agenta/sdk/agents/mcp/resolver.py create mode 100644 sdks/python/agenta/sdk/agents/mcp/wire.py create mode 100644 sdks/python/agenta/sdk/agents/streaming.py create mode 100644 sdks/python/agenta/sdk/agents/tools/__init__.py create mode 100644 sdks/python/agenta/sdk/agents/tools/compat.py create mode 100644 sdks/python/agenta/sdk/agents/tools/errors.py create mode 100644 sdks/python/agenta/sdk/agents/tools/interfaces.py create mode 100644 sdks/python/agenta/sdk/agents/tools/models.py create mode 100644 sdks/python/agenta/sdk/agents/tools/parsing.py create mode 100644 sdks/python/agenta/sdk/agents/tools/resolver.py create mode 100644 sdks/python/agenta/sdk/agents/tools/wire.py create mode 100644 sdks/python/agenta/sdk/agents/ui_messages.py create mode 100644 sdks/python/agenta/sdk/agents/utils/__init__.py create mode 100644 sdks/python/agenta/sdk/agents/utils/ts_runner.py create mode 100644 sdks/python/agenta/sdk/agents/utils/wire.py create mode 100644 sdks/python/agenta/tests/agents/test_streaming.py create mode 100644 sdks/python/oss/tests/pytest/integration/agents/__init__.py create mode 100644 sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/__init__.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/conftest.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json create mode 100644 sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json create mode 100644 sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py create mode 100644 sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py diff --git a/sdks/python/agenta/__init__.py b/sdks/python/agenta/__init__.py index df014c4e00..dc01c3396a 100644 --- a/sdks/python/agenta/__init__.py +++ b/sdks/python/agenta/__init__.py @@ -52,6 +52,23 @@ from .sdk.utils.logging import get_module_logger # noqa: F401 from .sdk.utils.preinit import PreInitObject # noqa: F401 +# Agent runtime (the agents subsystem). `Message` is intentionally not re-exported here: +# `agenta.Message` already names the prompt message type; import the agents one from +# `agenta.sdk.agents` when needed. +from .sdk.agents import ( # noqa: F401 + AgentaHarness, + AgentConfig, + ClaudeHarness, + Environment, + InProcessPiBackend, + LocalBackend, + PiHarness, + RivetBackend, + RunSelection, + SessionConfig, + make_harness, +) + DEFAULT_AGENTA_SINGLETON_INSTANCE = AgentaSingleton() types = client_types diff --git a/sdks/python/agenta/sdk/agents/__init__.py b/sdks/python/agenta/sdk/agents/__init__.py new file mode 100644 index 0000000000..b1cd4370d2 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/__init__.py @@ -0,0 +1,183 @@ +"""Agenta agent runtime: run a coding harness (Pi, Claude, ...) as a swappable port. + +Layers (Agenta's hexagonal vocabulary): + +- ``dtos.py`` — data contracts (``AgentConfig``, ``SessionConfig``, ``Message``, ...). +- ``interfaces.py`` — the ports (ABCs): ``Backend``, ``Environment``, ``Sandbox``, + ``Session``, ``Harness``. +- ``adapters/`` — implementations: ``RivetBackend`` / ``InProcessPiBackend`` / ``LocalBackend`` + and ``PiHarness`` / ``ClaudeHarness``. +- ``utils/`` — shared plumbing (the ``/run`` wire and the transports to the TS runner). + +Standalone usage:: + + import agenta as ag + from agenta.sdk.agents import Message + + cfg = ag.ConfigManager.get_from_registry(app_slug="my-agent") + agent = ag.AgentConfig.from_params(cfg) + harness = ag.PiHarness(ag.Environment(ag.RivetBackend())) + result = await harness.prompt(ag.SessionConfig(agent=agent), [Message(role="user", content="hi")]) +""" + +from .adapters import ( + AgentaHarness, + ClaudeHarness, + InProcessPiBackend, + LocalBackend, + PiHarness, + RivetBackend, + make_harness, +) +from .dtos import ( + AgentaAgentConfig, + AgentConfig, + AgentEvent, + AgentResult, + ClaudeAgentConfig, + ContentBlock, + HarnessAgentConfig, + HarnessCapabilities, + HarnessType, + Message, + PermissionPolicy, + PiAgentConfig, + RunSelection, + SessionConfig, + ToolCallback, + TraceContext, + to_messages, +) +from .errors import ToolResolutionError, UnsupportedHarnessError +from .interfaces import ( + Backend, + Environment, + Harness, + NoopSessionStore, + Sandbox, + Session, + SessionStore, +) +from .mcp import ( + MCPConfigurationError, + MCPError, + MCPResolver, + MCPServerConfig, + MissingMCPSecretError, + ResolvedMCPServer, +) +from .streaming import AgentRun +from .tools import ( + BuiltinToolConfig, + CallbackToolSpec, + ClientToolConfig, + ClientToolSpec, + CodeToolConfig, + CodeToolSpec, + DuplicateToolNameError, + EnvironmentToolSecretProvider, + GatewayToolResolver, + GatewayToolConfig, + GatewayToolResolution, + GatewayToolResolutionError, + MissingSecretPolicy, + MissingToolSecretError, + ResolvedToolSet, + ToolConfig, + ToolConfigError, + ToolConfigurationError, + ToolError, + ToolResolver, + ToolSecretProvider, + ToolSpec, + UnsupportedToolProviderError, + coerce_tool_config, + coerce_tool_configs, + parse_tool_config, + parse_tool_configs, +) +from .adapters.vercel import ( + from_ui_messages, + to_ui_message, + ui_message_stream, +) + +__all__ = [ + # DTOs + "AgentConfig", + "RunSelection", + "SessionConfig", + "HarnessAgentConfig", + "PiAgentConfig", + "ClaudeAgentConfig", + "AgentaAgentConfig", + "HarnessType", + "HarnessCapabilities", + "ContentBlock", + "Message", + "to_messages", + "AgentEvent", + "AgentResult", + "AgentRun", + # Former flat Vercel adapter names (compatibility; new code uses adapters.vercel) + "from_ui_messages", + "to_ui_message", + "ui_message_stream", + "TraceContext", + "ToolCallback", + "PermissionPolicy", + # Canonical tools API + "ToolConfig", + "BuiltinToolConfig", + "GatewayToolConfig", + "CodeToolConfig", + "ClientToolConfig", + "ToolSpec", + "CallbackToolSpec", + "CodeToolSpec", + "ClientToolSpec", + "ResolvedToolSet", + "GatewayToolResolution", + "ToolResolver", + "ToolSecretProvider", + "GatewayToolResolver", + "EnvironmentToolSecretProvider", + "MissingSecretPolicy", + "parse_tool_config", + "parse_tool_configs", + "coerce_tool_config", + "coerce_tool_configs", + "ToolError", + "ToolConfigError", + "ToolConfigurationError", + "GatewayToolResolutionError", + "UnsupportedToolProviderError", + "MissingToolSecretError", + "DuplicateToolNameError", + # MCP is a sibling subsystem + "MCPServerConfig", + "ResolvedMCPServer", + "MCPResolver", + "MCPError", + "MCPConfigurationError", + "MissingMCPSecretError", + # Interfaces (ports) + "Backend", + "Sandbox", + "Session", + "SessionStore", + "NoopSessionStore", + "Environment", + "Harness", + # Errors + "UnsupportedHarnessError", + "ToolResolutionError", + # Adapters + "RivetBackend", + "InProcessPiBackend", + "LocalBackend", + "PiHarness", + "ClaudeHarness", + "AgentaHarness", + "make_harness", +] diff --git a/sdks/python/agenta/sdk/agents/adapters/__init__.py b/sdks/python/agenta/sdk/agents/adapters/__init__.py new file mode 100644 index 0000000000..30e555d82b --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/__init__.py @@ -0,0 +1,24 @@ +"""Adapters: concrete implementations of the agent runtime ports. + +- Backend adapters: ``RivetBackend`` (rivet over ACP), ``InProcessPiBackend`` (in-process Pi, + the reference backend), ``LocalBackend`` (standalone SDK runs; not yet implemented). +- Harness adapters: ``PiHarness``, ``ClaudeHarness``, ``AgentaHarness`` (+ ``make_harness``). +- HTTP/browser protocol adapters live in subpackages, e.g. ``adapters.vercel``. + +Shared plumbing for the runner-backed adapters lives in ``agents/utils``. +""" + +from .harnesses import AgentaHarness, ClaudeHarness, PiHarness, make_harness +from .in_process import InProcessPiBackend +from .local import LocalBackend +from .rivet import RivetBackend + +__all__ = [ + "RivetBackend", + "InProcessPiBackend", + "LocalBackend", + "PiHarness", + "ClaudeHarness", + "AgentaHarness", + "make_harness", +] diff --git a/sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py b/sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py new file mode 100644 index 0000000000..b5fae23bd2 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/agenta_builtins.py @@ -0,0 +1,90 @@ +"""The Agenta harness's forced defaults: the things ``AgentaHarness`` always applies. + +``AgentaHarness`` is Pi with an opinion. It is the same engine as :class:`PiHarness`, but +every run carries a fixed set of Agenta-shipped extras the author cannot turn off: + +- a base **persona** appended to Pi's system prompt (``AGENTA_FORCED_APPEND_SYSTEM``), +- a base **AGENTS.md preamble** the author's instructions are appended to (``AGENTA_PREAMBLE``), +- a set of **forced tools** (``AGENTA_FORCED_TOOLS``), and +- a set of **forced skills** (``AGENTA_FORCED_SKILLS``). + +The forced *policy* lives here (harness knowledge). The forced skill *files* live with the +runner that runs Pi, under ``services/agent/skills//``; the contract between the two is +the skill directory **name**, so each entry in ``AGENTA_FORCED_SKILLS`` must match a committed +directory there. + +Two layers, kept distinct on purpose (matching Pi's own split, see :class:`PiAgentConfig`): +the *persona* is an ``append_system`` (changes Pi's base prompt), while *project conventions* +belong in ``AGENTS.md``. ``AGENTA_PREAMBLE`` is the AGENTS.md layer; ``AGENTA_FORCED_APPEND_SYSTEM`` +is the persona layer. +""" + +from __future__ import annotations + +from typing import List, Optional + +# The base AGENTS.md preamble. The author's own ``instructions`` are appended after this, so +# the final AGENTS.md is ``AGENTA_PREAMBLE`` + the author's project conventions. +# +# TODO(product): replace this placeholder with the real Agenta AGENTS.md preamble. +AGENTA_PREAMBLE = """\ +# Agenta agent + +You are an agent running on the Agenta platform. The instructions below are Agenta's +baseline; the user's own instructions follow and take precedence where they are more +specific. + +- Prefer the tools and skills provided to you over guessing. +- When a skill matches the task, read its SKILL.md fully before acting. +- Keep answers grounded in what the tools and skills actually return.""" + +# The base persona, always appended to Pi's built-in system prompt (never replaces it). This +# is the "who the agent is" layer, distinct from the AGENTS.md project-context layer above. +# +# TODO(product): replace this placeholder with the real Agenta persona framing. +AGENTA_FORCED_APPEND_SYSTEM = """\ +You are an Agenta agent. Be precise, cite what your tools and skills return, and do not +fabricate results.""" + +# Built-in tools every Agenta run forces on, unioned with the agent's resolved tools. +# ``read`` is mandatory: Pi only renders the skills section into the system prompt when the +# ``read`` tool is available. ``bash`` lets skills run their helper scripts. +AGENTA_FORCED_TOOLS: List[str] = ["read", "bash"] + +# Built-in skills every Agenta run forces on. Each name must match a committed directory under +# the runner's ``services/agent/skills//`` (the runner resolves names to those dirs). +# +# TODO(product): grow this with the real Agenta skill set. +AGENTA_FORCED_SKILLS: List[str] = ["agenta-getting-started"] + + +def _join(*parts: Optional[str]) -> Optional[str]: + """Join the non-empty parts with a blank line, or ``None`` when nothing remains.""" + kept = [part.strip() for part in parts if part and part.strip()] + if not kept: + return None + return "\n\n".join(kept) + + +def compose_instructions(user: Optional[str]) -> Optional[str]: + """The AGENTS.md the harness ships: the base preamble with the author's instructions + appended after it.""" + return _join(AGENTA_PREAMBLE, user) + + +def compose_append_system(user: Optional[str]) -> Optional[str]: + """The ``append_system`` the harness ships: the forced base persona with the author's own + ``append_system`` appended after it.""" + return _join(AGENTA_FORCED_APPEND_SYSTEM, user) + + +def force_tools(builtin_tools: List[str]) -> List[str]: + """Union the resolved built-in tools with the forced set, order-stable and de-duplicated + (resolved tools first, then any forced tools not already present).""" + seen = set() + out: List[str] = [] + for name in list(builtin_tools) + AGENTA_FORCED_TOOLS: + if name and name not in seen: + seen.add(name) + out.append(name) + return out diff --git a/sdks/python/agenta/sdk/agents/adapters/harnesses.py b/sdks/python/agenta/sdk/agents/adapters/harnesses.py new file mode 100644 index 0000000000..e718c1db2b --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/harnesses.py @@ -0,0 +1,150 @@ +"""Adapters of the :class:`~agenta.sdk.agents.interfaces.Harness` port: one per harness type. + +This is where the per-harness adaptation lives (the logic that used to sit in the TS runner): +turning the neutral :class:`SessionConfig` into the harness's own config, especially the +*tools*. The harnesses genuinely differ, so the two adapters do different work: + +- **Pi** takes built-in tools by name *and* resolved tool specs, delivered natively (Pi has + no MCP). Pi does not gate tool use, so the permission policy does not apply. +- **Claude** has no built-in tools (they are a Pi concept), delivers tools over MCP, and + gates tool use, so the permission policy applies. +- **Agenta** is Pi with an opinion: the same engine and config shape, plus a fixed set of + forced tools, skills, a base AGENTS.md preamble, and a persona (see :mod:`.agenta_builtins`). + +The backend below stays pure plumbing; this layer owns the harness knowledge. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Type + +from agenta.sdk.utils.logging import get_module_logger + +from ..dtos import ( + AgentaAgentConfig, + ClaudeAgentConfig, + HarnessType, + PiAgentConfig, + SessionConfig, +) +from ..interfaces import Environment, Harness +from ..tools.models import ToolSpec, coerce_tool_spec +from .agenta_builtins import ( + AGENTA_FORCED_SKILLS, + compose_append_system, + compose_instructions, + force_tools, +) + +log = get_module_logger(__name__) + + +def _opt_str(value: Any) -> Any: + """Keep a harness option only if it is a non-empty string; otherwise drop it to ``None`` + so an empty or malformed value never reaches the wire as a real override.""" + if isinstance(value, str) and value.strip(): + return value + return None + + +def _normalize_tool_specs(specs: List[Dict[str, Any]]) -> List[ToolSpec]: + """Compatibility helper for old tests/callers still supplying runner dictionaries.""" + return [coerce_tool_spec(spec) for spec in specs or []] + + +class PiHarness(Harness): + harness_type = HarnessType.PI + + def _to_harness_config(self, config: SessionConfig) -> PiAgentConfig: + # Pi delivers tools natively: built-in names plus resolved specs registered through + # the Pi extension. Pi does not gate tool use, so the permission policy is dropped. + # Pi reads its own slice of the neutral harness_options bag: `system` replaces Pi's + # base prompt, `append_system` extends it (both leave AGENTS.md untouched). + pi_options = config.agent.harness_options.get(HarnessType.PI.value, {}) + return PiAgentConfig( + agents_md=config.agent.instructions, + model=config.agent.model, + builtin_names=list(config.builtin_names), + tool_specs=list(config.tool_specs), + tool_callback=config.tool_callback, + mcp_servers=list(config.mcp_servers), + system=_opt_str(pi_options.get("system")), + append_system=_opt_str(pi_options.get("append_system")), + ) + + +class ClaudeHarness(Harness): + harness_type = HarnessType.CLAUDE + + def _to_harness_config(self, config: SessionConfig) -> ClaudeAgentConfig: + # Claude has no Pi built-in tools; drop them rather than ship a name Claude cannot + # honor. Tools go over MCP, and Claude gates tool use, so the permission policy is + # carried through. + if config.builtin_names: + log.warning( + "ClaudeHarness ignores %d built-in tool(s); built-ins are a Pi concept", + len(config.builtin_names), + ) + return ClaudeAgentConfig( + agents_md=config.agent.instructions, + model=config.agent.model, + tool_specs=list(config.tool_specs), + tool_callback=config.tool_callback, + mcp_servers=list(config.mcp_servers), + permission_policy=config.permission_policy, + ) + + +class AgentaHarness(Harness): + """Pi with an Agenta opinion. Same engine as :class:`PiHarness`, but every run carries the + forced Agenta extras (see :mod:`.agenta_builtins`): a base AGENTS.md preamble the author's + instructions are appended to, a forced persona ``append_system``, forced tools, and forced + skills. The author's own Pi ``harness_options`` (``system`` / ``append_system``) still + apply, layered after the forced bits.""" + + harness_type = HarnessType.AGENTA + + def _to_harness_config(self, config: SessionConfig) -> AgentaAgentConfig: + # The author's Pi options still apply; the Agenta harness reads the same `pi` slice as + # PiHarness (it drives Pi) and layers its forced extras on top. + pi_options = config.agent.harness_options.get(HarnessType.PI.value, {}) + return AgentaAgentConfig( + agents_md=compose_instructions(config.agent.instructions), + model=config.agent.model, + builtin_names=force_tools(list(config.builtin_names)), + tool_specs=list(config.tool_specs), + tool_callback=config.tool_callback, + mcp_servers=list(config.mcp_servers), + system=_opt_str(pi_options.get("system")), + append_system=compose_append_system( + _opt_str(pi_options.get("append_system")) + ), + skills=list(AGENTA_FORCED_SKILLS), + ) + + +_HARNESSES: Dict[HarnessType, Type[Harness]] = { + HarnessType.PI: PiHarness, + HarnessType.CLAUDE: ClaudeHarness, + HarnessType.AGENTA: AgentaHarness, +} + + +def make_harness( + harness_type: "HarnessType | str", environment: Environment +) -> Harness: + """Construct the Harness for a harness type over an environment. + + Maps the playground/config string to the right class. Raises + :class:`~agenta.sdk.agents.errors.UnsupportedHarnessError` if the environment's backend + cannot drive it. + """ + resolved = HarnessType.coerce(harness_type) + try: + cls = _HARNESSES[resolved] + except KeyError as exc: + known = ", ".join(sorted(h.value for h in _HARNESSES)) + raise ValueError( + f"unknown harness '{resolved.value}'; known harnesses: {known}" + ) from exc + return cls(environment) diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py new file mode 100644 index 0000000000..bfd1528bd7 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py @@ -0,0 +1,170 @@ +"""InProcessPiBackend: drive Pi in-process through the TS runner, no rivet daemon. + +This was the first backend implementation and stays as the simplest one: a single harness +(Pi), a single place (local), the legacy in-process Pi engine (``engines/pi.ts``). It is the +reference to read when writing a new backend. + +It is its own class and hard-codes its differences (the ``pi`` engine, Pi-only support, +local-only). It is deliberately NOT a subclass of ``RivetBackend``; the two are different +engines that happen to share the ``utils`` wire and transport helpers. +""" + +from __future__ import annotations + +import os +from typing import Any, AsyncIterator, Dict, List, Mapping, Optional, Sequence + +from ..dtos import ( + AgentResult, + EventSink, + HarnessAgentConfig, + HarnessType, + Message, + TraceContext, +) +from ..interfaces import Backend, Sandbox, Session +from ..streaming import AgentRun +from ..utils import ( + deliver_http, + deliver_http_stream, + deliver_subprocess, + deliver_subprocess_stream, + request_to_wire, + result_from_wire, +) + +_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"] + + +class InProcessSandbox(Sandbox): + """The local host. In-process Pi runs here directly; provisioning files are buffered + (AGENTS.md rides the wire today).""" + + def __init__(self) -> None: + self.files: Dict[str, bytes] = {} + + async def add_files(self, files: Mapping[str, bytes]) -> None: + self.files.update(files) + + +class InProcessPiSession(Session): + """One turn-per-prompt Pi session driven in-process by the TS runner.""" + + def __init__( + self, + backend: "InProcessPiBackend", + config: HarnessAgentConfig, + *, + secrets: Optional[Mapping[str, str]], + trace: Optional[TraceContext], + session_id: Optional[str], + ) -> None: + self._backend = backend + self._config = config + self._secrets = dict(secrets or {}) + self._trace = trace + self._session_id = session_id + + @property + def id(self) -> Optional[str]: + return self._session_id + + def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]: + """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``).""" + return request_to_wire( + engine=InProcessPiBackend._ENGINE, + harness=HarnessType.PI, + sandbox="local", + config=self._config, + messages=messages, + secrets=self._secrets, + trace=self._trace, + session_id=self._session_id, + ) + + def _absorb_result(self, result: AgentResult) -> None: + """Carry the run's session id forward so a follow-up turn resumes it.""" + if result.session_id: + self._session_id = result.session_id + + async def prompt( + self, + messages: Sequence[Message], + *, + on_event: Optional[EventSink] = None, + ) -> AgentResult: + data = await self._backend._deliver(self._wire_payload(messages)) + result = result_from_wire(data) + self._absorb_result(result) + if on_event: + for event in result.events: + try: + on_event(event) + except Exception: # pylint: disable=broad-except + pass + return result + + def stream(self, messages: Sequence[Message]) -> AgentRun: + """Run one turn over the streaming transport, yielding events live (see AgentRun).""" + records = self._backend._deliver_stream(self._wire_payload(messages)) + return AgentRun(records).on_result(self._absorb_result) + + +class InProcessPiBackend(Backend): + """The in-process Pi engine: drives the Pi SDK directly in the TS runner. Pi only, local + only, no rivet daemon.""" + + # Agenta is Pi with an opinion: same in-process engine, so this backend drives it too. + supported_harnesses = frozenset({HarnessType.PI, HarnessType.AGENTA}) + _ENGINE = "pi" # hard-coded engine identity + + def __init__( + self, + *, + url: Optional[str] = None, + command: Optional[Sequence[str]] = None, + cwd: Optional[str] = None, + timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")), + ) -> None: + self._url = url + self._command: List[str] = list(command or _DEFAULT_COMMAND) + self._cwd = cwd + self._timeout = timeout + + async def create_sandbox(self) -> InProcessSandbox: + return InProcessSandbox() + + async def create_session( + self, + sandbox: Sandbox, + config: HarnessAgentConfig, + *, + harness: HarnessType, + secrets: Optional[Mapping[str, str]] = None, + trace: Optional[TraceContext] = None, + session_id: Optional[str] = None, + ) -> InProcessPiSession: + return InProcessPiSession( + self, + config, + secrets=secrets, + trace=trace, + session_id=session_id, + ) + + async def _deliver(self, payload: Dict[str, Any]) -> Dict[str, Any]: + if self._url: + return await deliver_http(self._url, payload, timeout=self._timeout) + env = {**os.environ, "AGENT_BACKEND": self._ENGINE} + return await deliver_subprocess( + self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout + ) + + def _deliver_stream(self, payload: Dict[str, Any]) -> AsyncIterator[Dict[str, Any]]: + """The live counterpart of ``_deliver``: an NDJSON record stream from the runner.""" + if self._url: + return deliver_http_stream(self._url, payload, timeout=self._timeout) + env = {**os.environ, "AGENT_BACKEND": self._ENGINE} + return deliver_subprocess_stream( + self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout + ) diff --git a/sdks/python/agenta/sdk/agents/adapters/local.py b/sdks/python/agenta/sdk/agents/adapters/local.py new file mode 100644 index 0000000000..5435ea4751 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/local.py @@ -0,0 +1,48 @@ +"""LocalBackend: run a harness on this machine, no rivet daemon and no Agenta sidecar. + +This is the backend a standalone SDK user gets. It is two mechanisms, one per harness, which +is exactly a backend's "plumbing per harness" job: + +- Pi -> the bundled JS runner (the in-process Pi engine), shipped inside the wheel, run + with ``node``. +- Claude -> the pure-Python ``claude-agent-sdk``, in-process, no TS bridge. + +NOT YET IMPLEMENTED. Tracked as Phase 3 (Pi) and Phase 4 (Claude) in +``docs/design/agent-workflows/scratch/sdk-local-backend/plan.md``. The class is present so +the adapter layout is complete and the port shape is visible; the methods raise until the +bundling build step and the ``claude-agent-sdk`` wiring land. +""" + +from __future__ import annotations + +from typing import Mapping, Optional + +from ..dtos import HarnessAgentConfig, HarnessType, TraceContext +from ..interfaces import Backend, Sandbox, Session + + +class LocalBackend(Backend): + """Run Pi (bundled JS) or Claude (``claude-agent-sdk``) on this machine.""" + + supported_harnesses = frozenset({HarnessType.PI, HarnessType.CLAUDE}) + + async def create_sandbox(self) -> Sandbox: + raise NotImplementedError( + "LocalBackend is not implemented yet (Phase 3: Pi via bundled JS, " + "Phase 4: Claude via claude-agent-sdk)." + ) + + async def create_session( + self, + sandbox: Sandbox, + config: HarnessAgentConfig, + *, + harness: HarnessType, + secrets: Optional[Mapping[str, str]] = None, + trace: Optional[TraceContext] = None, + session_id: Optional[str] = None, + ) -> Session: + raise NotImplementedError( + "LocalBackend is not implemented yet (Phase 3: Pi via bundled JS, " + "Phase 4: Claude via claude-agent-sdk)." + ) diff --git a/sdks/python/agenta/sdk/agents/adapters/rivet.py b/sdks/python/agenta/sdk/agents/adapters/rivet.py new file mode 100644 index 0000000000..2316eb0dea --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/rivet.py @@ -0,0 +1,186 @@ +"""RivetBackend: drive a harness over ACP via the TypeScript rivet runner. + +This backend hard-codes that it is the rivet engine. It reaches the same runner the deployed +sidecar runs (HTTP when a ``url`` is set, otherwise a subprocess CLI), and the runner starts +the rivet daemon, the ACP adapter, and the harness. Supports Pi and Claude. The ``sandbox`` +axis (``local`` / ``daytona``) is a real runtime choice, so it stays a constructor arg. + +It is its own class, not a subclass of any other backend; it shares only the ``utils`` wire +and transport helpers. +""" + +from __future__ import annotations + +import os +from typing import Any, AsyncIterator, Dict, List, Mapping, Optional, Sequence + +from ..dtos import ( + AgentResult, + EventSink, + HarnessAgentConfig, + HarnessType, + Message, + TraceContext, +) +from ..interfaces import Backend, Sandbox, Session +from ..streaming import AgentRun +from ..utils import ( + deliver_http, + deliver_http_stream, + deliver_subprocess, + deliver_subprocess_stream, + request_to_wire, + result_from_wire, +) + +_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"] + + +class RivetSandbox(Sandbox): + """Carries the sandbox axis for the run. The real sandbox (a local daemon or a Daytona + VM) is created inside the TS runner; here we hold the axis and buffer provisioning files + (today AGENTS.md rides the wire, so this is informational).""" + + def __init__(self, sandbox_id: str) -> None: + self.sandbox_id = sandbox_id + self.files: Dict[str, bytes] = {} + + async def add_files(self, files: Mapping[str, bytes]) -> None: + self.files.update(files) + + +class RivetSession(Session): + """One turn-per-prompt session. Each prompt sends one ``/run`` (cold + replay).""" + + def __init__( + self, + backend: "RivetBackend", + sandbox: RivetSandbox, + config: HarnessAgentConfig, + *, + harness: HarnessType, + secrets: Optional[Mapping[str, str]], + trace: Optional[TraceContext], + session_id: Optional[str], + ) -> None: + self._backend = backend + self._sandbox = sandbox + self._config = config + self._harness = harness + self._secrets = dict(secrets or {}) + self._trace = trace + self._session_id = session_id + + @property + def id(self) -> Optional[str]: + return self._session_id + + def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]: + """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``).""" + return request_to_wire( + engine=RivetBackend._ENGINE, + harness=self._harness, + sandbox=self._sandbox.sandbox_id, + config=self._config, + messages=messages, + secrets=self._secrets, + trace=self._trace, + session_id=self._session_id, + ) + + def _absorb_result(self, result: AgentResult) -> None: + """Carry the run's session id forward so a follow-up turn resumes it.""" + if result.session_id: + self._session_id = result.session_id + + async def prompt( + self, + messages: Sequence[Message], + *, + on_event: Optional[EventSink] = None, + ) -> AgentResult: + data = await self._backend._deliver(self._wire_payload(messages)) + result = result_from_wire(data) + self._absorb_result(result) + _emit_events(result, on_event) + return result + + def stream(self, messages: Sequence[Message]) -> AgentRun: + """Run one turn over the streaming transport, yielding events live (see AgentRun).""" + records = self._backend._deliver_stream(self._wire_payload(messages)) + return AgentRun(records).on_result(self._absorb_result) + + +class RivetBackend(Backend): + """The rivet engine: a harness over ACP through the TS runner. Pi and Claude.""" + + supported_harnesses = frozenset({HarnessType.PI, HarnessType.CLAUDE}) + _ENGINE = "rivet" # hard-coded engine identity, not a constructor arg + + def __init__( + self, + *, + sandbox: str = "local", + url: Optional[str] = None, + command: Optional[Sequence[str]] = None, + cwd: Optional[str] = None, + timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")), + ) -> None: + self._sandbox = sandbox + self._url = url + self._command: List[str] = list(command or _DEFAULT_COMMAND) + self._cwd = cwd + self._timeout = timeout + + async def create_sandbox(self) -> RivetSandbox: + return RivetSandbox(self._sandbox) + + async def create_session( + self, + sandbox: Sandbox, + config: HarnessAgentConfig, + *, + harness: HarnessType, + secrets: Optional[Mapping[str, str]] = None, + trace: Optional[TraceContext] = None, + session_id: Optional[str] = None, + ) -> RivetSession: + if not isinstance(sandbox, RivetSandbox): + raise TypeError("RivetBackend.create_session requires a RivetSandbox") + return RivetSession( + self, + sandbox, + config, + harness=harness, + secrets=secrets, + trace=trace, + session_id=session_id, + ) + + async def _deliver(self, payload: Dict[str, Any]) -> Dict[str, Any]: + if self._url: + return await deliver_http(self._url, payload, timeout=self._timeout) + env = {**os.environ, "AGENT_BACKEND": self._ENGINE} + return await deliver_subprocess( + self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout + ) + + def _deliver_stream(self, payload: Dict[str, Any]) -> AsyncIterator[Dict[str, Any]]: + """The live counterpart of ``_deliver``: an NDJSON record stream from the runner.""" + if self._url: + return deliver_http_stream(self._url, payload, timeout=self._timeout) + env = {**os.environ, "AGENT_BACKEND": self._ENGINE} + return deliver_subprocess_stream( + self._command, payload, cwd=self._cwd, env=env, timeout=self._timeout + ) + + +def _emit_events(result: AgentResult, on_event: Optional[EventSink]) -> None: + """Replay the result's event log to a live sink (the one-shot transports batch it).""" + if not on_event: + return + for event in result.events: + try: + on_event(event) + except Exception: # pylint: disable=broad-except + pass diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py b/sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py new file mode 100644 index 0000000000..a8ad63761a --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/vercel/__init__.py @@ -0,0 +1,43 @@ +"""Vercel AI SDK adapters for the agent runtime. + +The neutral agent runtime speaks ``Message``, ``AgentEvent``, and ``AgentRun``. This package +is the browser protocol adapter: Vercel ``UIMessage`` request bodies, UI Message Stream parts, +SSE framing, and the ``/messages`` route helpers. +""" + +from .messages import ( + from_ui_messages, + message_to_vercel_ui_message, + to_ui_message, + vercel_ui_messages_to_messages, +) +from .routing import ( + VERCEL_MESSAGE_PROTOCOL, + VERCEL_MESSAGE_PROTOCOL_HEADERS, + VERCEL_MESSAGE_PROTOCOL_VERSION, + inject_stream_session_id, + register_agent_message_routes, + resolve_session_id, + set_vercel_message_protocol_headers, +) +from .sse import VERCEL_UI_MESSAGE_STREAM_HEADERS, vercel_sse_stream +from .stream import agent_run_to_vercel_parts, ui_message_stream + +__all__ = [ + "vercel_ui_messages_to_messages", + "message_to_vercel_ui_message", + "agent_run_to_vercel_parts", + "VERCEL_UI_MESSAGE_STREAM_HEADERS", + "vercel_sse_stream", + "resolve_session_id", + "inject_stream_session_id", + "VERCEL_MESSAGE_PROTOCOL", + "VERCEL_MESSAGE_PROTOCOL_VERSION", + "VERCEL_MESSAGE_PROTOCOL_HEADERS", + "set_vercel_message_protocol_headers", + "register_agent_message_routes", + # Former flat-module names. + "from_ui_messages", + "to_ui_message", + "ui_message_stream", +] diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/messages.py b/sdks/python/agenta/sdk/agents/adapters/vercel/messages.py new file mode 100644 index 0000000000..7f718b9032 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/vercel/messages.py @@ -0,0 +1,219 @@ +"""Vercel ``UIMessage`` conversion at the agent HTTP edge. + +This adapter translates between the Vercel AI SDK ``UIMessage`` parts shape and the +neutral agent runtime ``Message`` / ``ContentBlock`` types. The neutral DTOs stay the port; +Vercel-specific part names live here. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from ...dtos import AgentResult, ContentBlock, Message + +TOOL_APPROVAL_REQUEST = "tool-approval-request" +TOOL_APPROVAL_RESPONSE = "tool-approval-response" +TOOL_OUTPUT_AVAILABLE = "tool-output-available" + + +def vercel_ui_messages_to_messages(raw: Optional[List[Any]]) -> List[Message]: + """Coerce inbound Vercel ``UIMessage`` objects into neutral messages.""" + messages: List[Message] = [] + for item in raw or []: + message = _ui_message_to_message(item) + if message is not None: + messages.append(message) + return messages + + +def _ui_message_to_message(raw: Any) -> Optional[Message]: + if isinstance(raw, Message): + return raw + if not isinstance(raw, dict) or "role" not in raw: + return None + role = str(raw["role"]) + + parts = raw.get("parts") + if parts is None: + return Message.from_raw(raw) + + blocks: List[ContentBlock] = [] + for part in parts or []: + blocks.extend(_part_to_blocks(part)) + + if not blocks: + return Message(role=role, content="") + if all(block.type == "text" for block in blocks): + return Message(role=role, content="".join(block.text or "" for block in blocks)) + return Message(role=role, content=blocks) + + +def _part_to_blocks(part: Any) -> List[ContentBlock]: + if not isinstance(part, dict): + return [] + ptype = str(part.get("type", "")) + + if ptype == "text": + text = part.get("text") + return [ContentBlock(type="text", text=text)] if text is not None else [] + + if ptype == "file": + media = part.get("mediaType") or part.get("mimeType") + kind = ( + "image" + if isinstance(media, str) and media.startswith("image/") + else "resource" + ) + return [ + ContentBlock( + type=kind, + uri=part.get("url") or part.get("uri"), + data=part.get("data"), + mime_type=media, + ) + ] + + if ptype == TOOL_APPROVAL_REQUEST: + return [] + + if ptype == TOOL_APPROVAL_RESPONSE: + return _approval_response_blocks(part) + + if ( + ptype == TOOL_OUTPUT_AVAILABLE + or ptype == "dynamic-tool" + or ptype.startswith("tool-") + ): + return _tool_part_blocks(part, ptype) + + return [] + + +def _tool_part_blocks(part: Dict[str, Any], ptype: str) -> List[ContentBlock]: + """A Vercel tool part -> neutral tool-call/result content blocks.""" + tool_call_id = part.get("toolCallId") or part.get("tool_call_id") + tool_name = part.get("toolName") or part.get("tool_name") + if ( + tool_name is None + and ptype.startswith("tool-") + and ptype != TOOL_OUTPUT_AVAILABLE + ): + tool_name = ptype[len("tool-") :] + + blocks: List[ContentBlock] = [] + if ptype != TOOL_OUTPUT_AVAILABLE or "input" in part: + blocks.append( + ContentBlock( + type="tool_call", + tool_call_id=tool_call_id, + tool_name=tool_name, + input=part.get("input"), + ) + ) + + state = part.get("state") + error_text = part.get("errorText") + if error_text is not None or state == "output-error": + blocks.append( + ContentBlock( + type="tool_result", + tool_call_id=tool_call_id, + tool_name=tool_name, + output=error_text if error_text is not None else part.get("output"), + is_error=True, + ) + ) + elif "output" in part or state == "output-available": + blocks.append( + ContentBlock( + type="tool_result", + tool_call_id=tool_call_id, + tool_name=tool_name, + output=part.get("output"), + is_error=False, + ) + ) + return blocks + + +def _approval_response_blocks(part: Dict[str, Any]) -> List[ContentBlock]: + """A cross-turn approval reply -> a tool-result block keyed by toolCallId.""" + tool_call_id = ( + part.get("toolCallId") or part.get("tool_call_id") or part.get("approvalId") + ) + output = part.get("output") + if output is None: + approved = part.get("approved") + output = {"approved": approved} if approved is not None else part.get("reason") + return [ContentBlock(type="tool_result", tool_call_id=tool_call_id, output=output)] + + +def message_to_vercel_ui_message( + source: Any, + *, + message_id: str = "msg-1", +) -> Dict[str, Any]: + """Render an ``AgentResult`` or neutral ``Message`` as one Vercel ``UIMessage``.""" + if isinstance(source, AgentResult): + return { + "id": message_id, + "role": "assistant", + "parts": [{"type": "text", "text": source.output or ""}], + } + if isinstance(source, Message): + return { + "id": message_id, + "role": source.role, + "parts": _content_to_parts(source.content), + } + raise TypeError( + "message_to_vercel_ui_message expects an AgentResult or Message, " + f"got {type(source).__name__!r}" + ) + + +def _content_to_parts(content: Any) -> List[Dict[str, Any]]: + if isinstance(content, str): + return [{"type": "text", "text": content}] if content else [] + parts: List[Dict[str, Any]] = [] + for block in content or []: + parts.extend(_block_to_parts(block)) + return parts + + +def _block_to_parts(block: ContentBlock) -> List[Dict[str, Any]]: + if block.type == "text": + return [{"type": "text", "text": block.text or ""}] + if block.type in ("image", "resource"): + part: Dict[str, Any] = {"type": "file"} + if block.uri is not None: + part["url"] = block.uri + if block.mime_type is not None: + part["mediaType"] = block.mime_type + if block.data is not None: + part["data"] = block.data + return [part] + if block.type == "tool_call": + return [ + { + "type": f"tool-{block.tool_name or 'tool'}", + "toolCallId": block.tool_call_id, + "state": "input-available", + "input": block.input, + } + ] + if block.type == "tool_result": + return [ + { + "type": f"tool-{block.tool_name or 'tool'}", + "toolCallId": block.tool_call_id, + "state": "output-error" if block.is_error else "output-available", + "output": block.output, + } + ] + return [] + + +# Back-compat aliases for the former flat module API. +from_ui_messages = vercel_ui_messages_to_messages +to_ui_message = message_to_vercel_ui_message diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py new file mode 100644 index 0000000000..a854ca0460 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py @@ -0,0 +1,209 @@ +"""FastAPI route wiring for the agent ``/messages`` Vercel adapter.""" + +from __future__ import annotations + +import re +from typing import Any, Callable, Collection, Optional +from uuid import uuid4 + +from fastapi import Request +from fastapi.responses import JSONResponse, Response + +from agenta.sdk.contexts.tracing import tracing_context_manager +from agenta.sdk.models.workflows import ( + LoadSessionRequest, + LoadSessionResponse, + WorkflowBatchResponse, + WorkflowInvokeRequest, + WorkflowRequestData, + WorkflowStreamingResponse, +) + +from ...interfaces import NoopSessionStore, SessionStore +from .messages import message_to_vercel_ui_message, vercel_ui_messages_to_messages + +# An opaque, project-scoped session id (RFC §4.1): bounded length, restricted charset. +_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9._:-]{1,128}$") + +VERCEL_MESSAGE_PROTOCOL = "vercel" +VERCEL_MESSAGE_PROTOCOL_VERSION = "v1" +VERCEL_MESSAGE_PROTOCOL_HEADERS = { + "x-ag-messages-format": VERCEL_MESSAGE_PROTOCOL, + "x-ag-messages-version": VERCEL_MESSAGE_PROTOCOL_VERSION, +} + + +def set_vercel_message_protocol_headers(response: Response) -> Response: + """Stamp the default agent ``/messages`` protocol identity on an HTTP response.""" + for key, value in VERCEL_MESSAGE_PROTOCOL_HEADERS.items(): + response.headers.setdefault(key, value) + return response + + +def resolve_session_id(session_id: Optional[str]) -> Optional[str]: + """Mint a new id when absent, echo a valid one, or return ``None`` when invalid.""" + if session_id is None: + return "sess_" + uuid4().hex + return session_id if _SESSION_ID_RE.match(session_id) else None + + +def inject_stream_session_id( + response: WorkflowStreamingResponse, + session_id: str, +) -> None: + """Stamp ``messageMetadata.sessionId`` onto the first Vercel ``start`` part.""" + original = response.generator + + async def generator(): + stamped = False + async for part in original(): + if not stamped and isinstance(part, dict) and part.get("type") == "start": + part.setdefault("messageMetadata", {})["sessionId"] = session_id + stamped = True + yield part + + response.generator = generator + + +def make_messages_endpoint( + *, + wf: Any, + get_request_tracing_context: Callable[[Request], Any], + parse_accept: Callable[[Request], Optional[str]], + stream_media_types: Collection[str], + make_json_response: Callable[[WorkflowBatchResponse], Response], + make_not_acceptable_response: Callable[[str, Any], Response], + make_stream_response: Callable[[WorkflowStreamingResponse, str], Response], + handle_failure: Callable[[Exception], Any], +): + """Build the ``POST /messages`` endpoint for one routed agent workflow.""" + + async def messages_endpoint(req: Request, request: WorkflowInvokeRequest): + credentials = req.state.auth.get("credentials") + + session_id = resolve_session_id(request.session_id) + if session_id is None: + return set_vercel_message_protocol_headers( + JSONResponse( + status_code=400, + content={ + "detail": "session_id violates the allowed charset/length" + }, + ) + ) + + try: + request.session_id = session_id + if request.data is None: + request.data = WorkflowRequestData() + + request.data.messages = [ + message.to_wire() + for message in vercel_ui_messages_to_messages(request.data.messages) + ] + + requested = parse_accept(req) + want_stream = requested in stream_media_types + request.data.stream = want_stream + + with tracing_context_manager(get_request_tracing_context(req)): + response = await wf.invoke( + request=request, + secrets=None, + credentials=credentials, + ) + + if isinstance(response, (WorkflowBatchResponse, WorkflowStreamingResponse)): + response.session_id = session_id + + if ( + isinstance(response, WorkflowBatchResponse) + and response.status + and response.status.code is not None + and response.status.code >= 400 + ): + return set_vercel_message_protocol_headers(make_json_response(response)) + + if want_stream: + if not isinstance(response, WorkflowStreamingResponse): + return set_vercel_message_protocol_headers( + make_not_acceptable_response(str(requested), response) + ) + inject_stream_session_id(response, session_id) + return set_vercel_message_protocol_headers( + make_stream_response(response, "vercel") + ) + + if not isinstance(response, WorkflowBatchResponse): + return set_vercel_message_protocol_headers( + make_not_acceptable_response( + requested or "application/json", response + ) + ) + return set_vercel_message_protocol_headers(make_json_response(response)) + + except Exception as exception: + return set_vercel_message_protocol_headers(await handle_failure(exception)) + + return messages_endpoint + + +def make_load_session_endpoint( + *, + session_store: Optional[SessionStore] = None, +): + """Build the v1 ``POST /load-session`` endpoint over the session-store port.""" + store = session_store or NoopSessionStore() + + async def load_session_endpoint(req: Request, request: LoadSessionRequest): + messages = await store.load(request.session_id) + response = LoadSessionResponse( + session_id=request.session_id, + messages=[ + message_to_vercel_ui_message(message, message_id=f"msg-{idx}") + for idx, message in enumerate(messages, start=1) + ], + ) + return set_vercel_message_protocol_headers( + JSONResponse(content=response.model_dump(mode="json")) + ) + + return load_session_endpoint + + +def register_agent_message_routes( + target: Any, + prefix: str, + *, + wf: Any, + invoke_responses: dict, + get_request_tracing_context: Callable[[Request], Any], + parse_accept: Callable[[Request], Optional[str]], + stream_media_types: Collection[str], + make_json_response: Callable[[WorkflowBatchResponse], Response], + make_not_acceptable_response: Callable[[str, Any], Response], + make_stream_response: Callable[[WorkflowStreamingResponse, str], Response], + handle_failure: Callable[[Exception], Any], + session_store: Optional[SessionStore] = None, +) -> None: + """Register ``/messages`` and ``/load-session`` on a FastAPI app/router target.""" + target.add_api_route( + prefix + "/messages", + make_messages_endpoint( + wf=wf, + get_request_tracing_context=get_request_tracing_context, + parse_accept=parse_accept, + stream_media_types=stream_media_types, + make_json_response=make_json_response, + make_not_acceptable_response=make_not_acceptable_response, + make_stream_response=make_stream_response, + handle_failure=handle_failure, + ), + methods=["POST"], + responses=invoke_responses, + ) + target.add_api_route( + prefix + "/load-session", + make_load_session_endpoint(session_store=session_store), + methods=["POST"], + ) diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/sse.py b/sdks/python/agenta/sdk/agents/adapters/vercel/sse.py new file mode 100644 index 0000000000..cd60023916 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/vercel/sse.py @@ -0,0 +1,25 @@ +"""SSE framing for the Vercel AI SDK UI Message Stream.""" + +from __future__ import annotations + +from json import dumps +from typing import Any, AsyncGenerator + +# Headers the Vercel AI SDK client and intermediaries require for a UI Message Stream. +# ``x-accel-buffering: no`` stops a proxy from re-buffering the SSE so parts flush live. +VERCEL_UI_MESSAGE_STREAM_HEADERS = { + "x-vercel-ai-ui-message-stream": "v1", + "cache-control": "no-cache", + "x-accel-buffering": "no", +} + + +def vercel_sse_stream(aiter: AsyncGenerator[Any, None]): + """Frame Vercel UI Message Stream parts as SSE and append ``[DONE]``.""" + + async def gen(): + async for chunk in aiter: + yield "data: " + dumps(chunk, ensure_ascii=False) + "\n\n" + yield "data: [DONE]\n\n" + + return gen() diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py b/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py new file mode 100644 index 0000000000..6d0e1526b2 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py @@ -0,0 +1,216 @@ +"""Encode neutral agent run events as Vercel UI Message Stream parts.""" + +from __future__ import annotations + +from typing import Any, AsyncIterator, Dict, Optional + +from ...dtos import AgentResult +from ...streaming import AgentRun +from .messages import TOOL_APPROVAL_REQUEST + + +async def agent_run_to_vercel_parts( + run: AgentRun, + *, + session_id: Optional[str] = None, + message_id: str = "msg-1", + trace_id: Optional[str] = None, +) -> AsyncIterator[Dict[str, Any]]: + """Project a live ``AgentRun`` into Vercel UI Message Stream part dictionaries.""" + start: Dict[str, Any] = {"type": "start", "messageId": message_id} + if session_id is not None: + start["messageMetadata"] = {"sessionId": session_id} + yield start + yield {"type": "start-step"} + + text_seq = 0 + reasoning_seq = 0 + usage: Optional[Dict[str, Any]] = None + stop_reason: Optional[str] = None + + try: + async for event in run: + etype = event.type + data = event.data + + if etype == "message": + text_seq += 1 + tid = f"text-{text_seq}" + yield {"type": "text-start", "id": tid} + yield {"type": "text-delta", "id": tid, "delta": data.get("text", "")} + yield {"type": "text-end", "id": tid} + elif etype == "message_start": + yield {"type": "text-start", "id": data.get("id")} + elif etype == "message_delta": + yield { + "type": "text-delta", + "id": data.get("id"), + "delta": data.get("delta", ""), + } + elif etype == "message_end": + yield {"type": "text-end", "id": data.get("id")} + elif etype == "thought": + reasoning_seq += 1 + rid = f"reasoning-{reasoning_seq}" + yield {"type": "reasoning-start", "id": rid} + yield { + "type": "reasoning-delta", + "id": rid, + "delta": data.get("text", ""), + } + yield {"type": "reasoning-end", "id": rid} + elif etype == "reasoning_start": + yield {"type": "reasoning-start", "id": data.get("id")} + elif etype == "reasoning_delta": + yield { + "type": "reasoning-delta", + "id": data.get("id"), + "delta": data.get("delta", ""), + } + elif etype == "reasoning_end": + yield {"type": "reasoning-end", "id": data.get("id")} + elif etype == "tool_call": + tool_call_id = data.get("id") + tool_name = data.get("name") + yield { + "type": "tool-input-start", + "toolCallId": tool_call_id, + "toolName": tool_name, + } + available: Dict[str, Any] = { + "type": "tool-input-available", + "toolCallId": tool_call_id, + "toolName": tool_name, + "input": data.get("input"), + } + if data.get("render") is not None: + available["render"] = data["render"] + yield available + elif etype == "tool_result": + tool_call_id = data.get("id") + if data.get("denied"): + yield { + "type": "tool-output-denied", + "toolCallId": tool_call_id, + } + elif data.get("isError"): + yield { + "type": "tool-output-error", + "toolCallId": tool_call_id, + "errorText": _as_text(data.get("output")), + } + else: + structured = data.get("data") + out = structured if structured is not None else data.get("output") + available = { + "type": "tool-output-available", + "toolCallId": tool_call_id, + "output": out, + } + if data.get("render") is not None: + available["render"] = data["render"] + yield available + elif etype == "interaction_request": + yield _interaction_part(data) + elif etype == "data": + part: Dict[str, Any] = { + "type": f"data-{data.get('name', 'data')}", + "data": data.get("data"), + } + if data.get("transient"): + part["transient"] = True + yield part + elif etype == "file": + yield { + "type": "file", + "url": data.get("url"), + "mediaType": data.get("mediaType"), + } + elif etype == "usage": + usage = _usage_metadata(data) + elif etype == "error": + yield {"type": "error", "errorText": data.get("message", "")} + elif etype == "done": + stop_reason = data.get("stopReason") + except Exception as exc: + yield {"type": "error", "errorText": str(exc)} + return + + if usage is None or trace_id is None: + result = _safe_result(run) + if result is not None: + if usage is None: + usage = _usage_metadata(result.usage or {}) + if stop_reason is None: + stop_reason = result.stop_reason + if trace_id is None: + trace_id = result.trace_id + + yield {"type": "finish-step"} + finish: Dict[str, Any] = {"type": "finish"} + if stop_reason is not None: + finish["finishReason"] = stop_reason + metadata: Dict[str, Any] = {} + if usage: + metadata["usage"] = usage + if trace_id is not None: + metadata["traceId"] = trace_id + if metadata: + finish["messageMetadata"] = metadata + yield finish + + +def _interaction_part(data: Dict[str, Any]) -> Dict[str, Any]: + """Project a neutral ``interaction_request`` event to a Vercel stream part.""" + kind = data.get("kind") + payload = data.get("payload") or {} + if kind == "permission": + return { + "type": TOOL_APPROVAL_REQUEST, + "approvalId": data.get("id"), + "toolCallId": _approval_tool_call_id(payload), + "availableReplies": payload.get("availableReplies"), + "toolCall": payload.get("toolCall"), + } + if kind == "input": + return {"type": "data-input-request", "id": data.get("id"), "data": payload} + return { + "type": "data-interaction", + "id": data.get("id"), + "data": {"kind": kind, "payload": payload}, + } + + +def _approval_tool_call_id(payload: Dict[str, Any]) -> Optional[Any]: + tool_call_id = payload.get("toolCallId") + if tool_call_id is not None: + return tool_call_id + tool_call = payload.get("toolCall") + if isinstance(tool_call, dict): + return tool_call.get("id") or tool_call.get("toolCallId") + return None + + +def _usage_metadata(data: Dict[str, Any]) -> Dict[str, Any]: + return { + key: data[key] + for key in ("input", "output", "total", "cost") + if data.get(key) is not None + } + + +def _as_text(value: Any) -> str: + if value is None: + return "" + return value if isinstance(value, str) else str(value) + + +def _safe_result(run: AgentRun) -> Optional[AgentResult]: + try: + return run.result() + except Exception: + return None + + +# Back-compat alias for the former flat module API. +ui_message_stream = agent_run_to_vercel_parts diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py new file mode 100644 index 0000000000..0a050b4cb1 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/dtos.py @@ -0,0 +1,698 @@ +"""Data contracts for the agent runtime (the DTO layer). + +Everything the ports and adapters pass around: harness identity, capabilities, content +blocks, messages, run events, the run result, trace/tool-callback plumbing, the neutral +``AgentConfig``, the per-harness configs a backend plumbs, and the ``SessionConfig`` bundle. + +These are Pydantic models (the SDK already depends on Pydantic), kept neutral: an adapter +translates them to and from its engine's own shapes at its edge. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Any, Callable, ClassVar, Dict, List, Optional, Tuple, Union + +from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator + +from .mcp import ( + MCPServerConfig, + ResolvedMCPServer, + mcp_servers_to_wire, + parse_mcp_server_configs, +) +from .tools import ToolCallback, ToolConfig, ToolSpec, coerce_tool_configs +from .tools.models import coerce_tool_spec + + +# --------------------------------------------------------------------------- +# Harness identity +# --------------------------------------------------------------------------- + + +class HarnessType(str, Enum): + """The coding agent program a run drives. A backend declares which it supports.""" + + PI = "pi" + CLAUDE = "claude" + AGENTA = "agenta" + + @classmethod + def coerce(cls, value: "HarnessType | str") -> "HarnessType": + """Accept either an enum or a loose string (the playground sends a string).""" + if isinstance(value, cls): + return value + return cls(str(value).lower()) + + +# Permission policy for harness tool use in a headless run. ``auto`` approves (tools are +# backend-resolved and trusted, no human to prompt); ``deny`` rejects. +PermissionPolicy = str # "auto" | "deny" + + +# --------------------------------------------------------------------------- +# Capabilities +# --------------------------------------------------------------------------- + + +class HarnessCapabilities(BaseModel): + """What a harness can do, probed by the backend (rivet ``AgentCapabilities``). + + Adapters branch on these flags rather than the harness name (no ``if pi``): deliver + tools over MCP only when ``mcp_tools`` is set, skip image blocks without ``images``. + """ + + text_messages: bool = True + images: bool = False + file_attachments: bool = False + mcp_tools: bool = False + tool_calls: bool = False + reasoning: bool = False + plan_mode: bool = False + permissions: bool = False + usage: bool = False + streaming_deltas: bool = False + session_lifecycle: bool = False + + @classmethod + def from_wire( + cls, data: Optional[Dict[str, Any]] + ) -> Optional["HarnessCapabilities"]: + """Parse the camelCase capability object an adapter returns. ``None`` passes through.""" + if not isinstance(data, dict): + return None + return cls( + text_messages=bool(data.get("textMessages", True)), + images=bool(data.get("images", False)), + file_attachments=bool(data.get("fileAttachments", False)), + mcp_tools=bool(data.get("mcpTools", False)), + tool_calls=bool(data.get("toolCalls", False)), + reasoning=bool(data.get("reasoning", False)), + plan_mode=bool(data.get("planMode", False)), + permissions=bool(data.get("permissions", False)), + usage=bool(data.get("usage", False)), + streaming_deltas=bool(data.get("streamingDeltas", False)), + session_lifecycle=bool(data.get("sessionLifecycle", False)), + ) + + +# --------------------------------------------------------------------------- +# Turn input: content blocks and messages +# --------------------------------------------------------------------------- + + +class ContentBlock(BaseModel): + """One piece of a message, mirroring the ACP content-block kinds. + + ``text`` is the only kind callers send today; ``image`` and ``resource`` are plumbed so + an image-capable harness can take them. A bare string normalizes to a single ``text`` + block on the wire. + + ``tool_call`` / ``tool_result`` carriers (``tool_call_id``/``tool_name``/``input``/ + ``output``/``is_error``) hold a resolved tool turn for structured-message continuation: + the ``/messages`` egress folds inbound UIMessage tool/approval parts into these so a + cross-turn HITL reply replays as a real tool call plus its result, and the model resumes + from the result instead of re-asking. Mirrors ``ContentBlock`` in + ``services/agent/src/protocol.ts``. + """ + + type: str # "text" | "image" | "resource" | "tool_call" | "tool_result" + text: Optional[str] = None + data: Optional[str] = None # base64 payload, used when type != "text" + mime_type: Optional[str] = None + uri: Optional[str] = None + # Tool-turn carriers (used by tool_call / tool_result blocks). + tool_call_id: Optional[str] = None + tool_name: Optional[str] = None + input: Optional[Any] = None + output: Optional[Any] = None + is_error: Optional[bool] = None + + def to_wire(self) -> Dict[str, Any]: + block: Dict[str, Any] = {"type": self.type} + if self.text is not None: + block["text"] = self.text + if self.data is not None: + block["data"] = self.data + if self.mime_type is not None: + block["mimeType"] = self.mime_type + if self.uri is not None: + block["uri"] = self.uri + if self.tool_call_id is not None: + block["toolCallId"] = self.tool_call_id + if self.tool_name is not None: + block["toolName"] = self.tool_name + if self.input is not None: + block["input"] = self.input + if self.output is not None: + block["output"] = self.output + if self.is_error is not None: + block["isError"] = self.is_error + return block + + @classmethod + def from_raw(cls, raw: Any) -> "ContentBlock": + """Coerce a loose block (string or dict) into a ContentBlock.""" + if isinstance(raw, ContentBlock): + return raw + if isinstance(raw, str): + return cls(type="text", text=raw) + if isinstance(raw, dict): + return cls( + type=str(raw.get("type", "text")), + text=raw.get("text"), + data=raw.get("data"), + mime_type=raw.get("mimeType") or raw.get("mime_type"), + uri=raw.get("uri"), + tool_call_id=raw.get("toolCallId") or raw.get("tool_call_id"), + tool_name=raw.get("toolName") or raw.get("tool_name"), + input=raw.get("input"), + output=raw.get("output"), + is_error=raw.get("isError") + if raw.get("isError") is not None + else raw.get("is_error"), + ) + return cls(type="text", text=str(raw)) + + +# A message's content is either a plain string or a list of content blocks. +MessageContent = Union[str, List[ContentBlock]] + + +class Message(BaseModel): + """A chat message in the conversation. ``content`` is text or content blocks. + + This is the runtime's own message type, distinct from the SDK's prompt ``Message`` + (``agenta.Message``); the two serve different layers. + """ + + role: str + content: MessageContent = "" + + def to_wire(self) -> Dict[str, Any]: + if isinstance(self.content, str): + content: Any = self.content + else: + content = [block.to_wire() for block in self.content] + return {"role": self.role, "content": content} + + @classmethod + def from_raw(cls, raw: Any) -> Optional["Message"]: + """Coerce a loose dict (the playground's message shape) into a Message.""" + if isinstance(raw, Message): + return raw + if not isinstance(raw, dict) or "role" not in raw: + return None + content = raw.get("content", "") + if isinstance(content, list): + content = [ContentBlock.from_raw(block) for block in content] + return cls(role=str(raw["role"]), content=content) + + +def to_messages(raw: Optional[List[Any]]) -> List[Message]: + """Coerce a list of loose message dicts into :class:`Message` objects.""" + messages: List[Message] = [] + for item in raw or []: + message = Message.from_raw(item) + if message is not None: + messages.append(message) + return messages + + +# --------------------------------------------------------------------------- +# Run events +# --------------------------------------------------------------------------- + + +class AgentEvent(BaseModel): + """One structured event from a run, mapped from an ACP ``session/update``. + + ``type`` is one of ``message``, ``thought``, ``tool_call``, ``tool_result``, ``usage``, + ``error``, ``done``. ``data`` carries the rest verbatim. + """ + + type: str + data: Dict[str, Any] = Field(default_factory=dict) + + @classmethod + def from_wire(cls, raw: Any) -> Optional["AgentEvent"]: + if not isinstance(raw, dict) or not raw.get("type"): + return None + return cls(type=str(raw["type"]), data=raw) + + +# A live event sink. Synchronous: adapters invoke it as events arrive (or as a batch). +EventSink = Callable[[AgentEvent], None] + + +# --------------------------------------------------------------------------- +# Cross-boundary plumbing +# --------------------------------------------------------------------------- + + +class TraceContext(BaseModel): + """Agenta trace context threaded into a harness run, so it nests under the caller's + workflow span. All fields optional; with none set the run traces standalone (or not at + all), the standalone-SDK case.""" + + traceparent: Optional[str] = None + baggage: Optional[str] = None + endpoint: Optional[str] = None # OTLP traces URL + authorization: Optional[str] = None # full Authorization header value + capture_content: bool = True + + def to_wire(self) -> Dict[str, Any]: + return { + "traceparent": self.traceparent, + "baggage": self.baggage, + "endpoint": self.endpoint, + "authorization": self.authorization, + "captureContent": self.capture_content, + } + + +# --------------------------------------------------------------------------- +# Run result +# --------------------------------------------------------------------------- + + +class AgentResult(BaseModel): + """A run's reply plus structured metadata. ``output`` is the final assistant text; + ``usage`` rolls token/cost onto a workflow span; ``capabilities`` is what the harness + was probed to support.""" + + output: str = "" + messages: List[Message] = Field(default_factory=list) + events: List[AgentEvent] = Field(default_factory=list) + usage: Optional[Dict[str, Any]] = None + stop_reason: Optional[str] = None + capabilities: Optional[HarnessCapabilities] = None + session_id: Optional[str] = None + model: Optional[str] = None + trace_id: Optional[str] = None + + +# --------------------------------------------------------------------------- +# The neutral agent definition + run selection +# --------------------------------------------------------------------------- + + +class AgentConfig(BaseModel): + """What an agent IS, independent of where or how it runs. ``instructions`` becomes + ``AGENTS.md``. ``tools`` are provider-agnostic references; resolving them into runnable + specs is the caller's job (the Agenta service does it server-side). + + ``harness_options`` is the neutral config's one escape hatch: a map keyed by harness + name (``"pi"``, ``"claude"``) whose value is a free-form bag of knobs only that harness + understands, for example Pi's ``system`` / ``append_system`` prompt overrides. The + config stays harness-agnostic because each Harness adapter reads only its own slice and + ignores the rest; a key for a harness that is not running is simply never looked at. + """ + + model_config = ConfigDict(populate_by_name=True) + + instructions: Optional[str] = None + model: Optional[str] = None + tools: List[ToolConfig] = Field(default_factory=list) + mcp_servers: List[MCPServerConfig] = Field(default_factory=list) + harness_options: Dict[str, Dict[str, Any]] = Field(default_factory=dict) + + @field_validator("tools", mode="before") + @classmethod + def _coerce_tools(cls, value: Any) -> List[ToolConfig]: + return coerce_tool_configs(_as_list(value)).tool_configs + + @field_validator("mcp_servers", mode="before") + @classmethod + def _coerce_mcp_servers(cls, value: Any) -> List[MCPServerConfig]: + return parse_mcp_server_configs(_as_list(value)) + + @classmethod + def from_params( + cls, + params: Dict[str, Any], + *, + defaults: Optional["AgentConfig"] = None, + ) -> "AgentConfig": + """Build an :class:`AgentConfig` from a request/config dict. + + Accepts three shapes, in priority order: the dedicated ``agent`` element, the + playground ``prompt`` prompt-template (system message -> instructions, ``llm_config`` + -> model + tools), and a flat ``{model, agents_md, tools}``. Unset fields fall back + to ``defaults``. ``harness_options`` is read from the ``agent`` element (or the flat + request) when present. + """ + base = defaults or cls() + instructions, model, tools = _parse_agent_fields(params, base) + return cls( + instructions=instructions, + model=model, + tools=_as_list(tools), + mcp_servers=_parse_mcp_servers_raw(params, base), + harness_options=_parse_harness_options(params, base), + ) + + +class RunSelection(BaseModel): + """The run-time choices stored next to the agent config: which harness, which sandbox, + the permission policy. Read by the caller to pick a backend and harness class; + deliberately not part of the neutral :class:`AgentConfig`.""" + + harness: str = "pi" + sandbox: str = "local" + permission_policy: PermissionPolicy = "auto" + + @classmethod + def from_params( + cls, + params: Dict[str, Any], + *, + default_harness: str = "pi", + default_sandbox: str = "local", + ) -> "RunSelection": + agent = params.get("agent") + source = agent if isinstance(agent, dict) else params + return cls( + harness=str(source.get("harness") or default_harness).lower(), + sandbox=str(source.get("sandbox") or default_sandbox).lower(), + permission_policy=str(source.get("permission_policy") or "auto").lower(), + ) + + +# --------------------------------------------------------------------------- +# Per-harness configs (what an adapter consumes) +# --------------------------------------------------------------------------- + + +class HarnessAgentConfig(BaseModel): + """Base for a harness-specific config. A Harness produces one of these from the neutral + config; a backend plumbs it as-is, with no business logic about how the harness works. + + The two subclasses differ in their *shape*, not just their identity, because the + harnesses differ: Pi takes built-in tool names plus native tool specs and never gates + tool use; Claude has no built-ins, delivers tools over MCP, and gates tool use behind a + permission policy. ``wire_tools`` is where each config emits its own tool/permission + fields for the ``/run`` payload. + """ + + model_config = ConfigDict(populate_by_name=True) + + harness: ClassVar[HarnessType] + + agents_md: Optional[str] = None + model: Optional[str] = None + tool_callback: Optional[ToolCallback] = None + mcp_servers: List[ResolvedMCPServer] = Field(default_factory=list) + + @field_validator("mcp_servers", mode="before") + @classmethod + def _coerce_resolved_mcp_servers(cls, value: Any) -> List[ResolvedMCPServer]: + return [ + item + if isinstance(item, ResolvedMCPServer) + else ResolvedMCPServer.model_validate(item) + for item in value or [] + ] + + def wire_tools(self) -> Dict[str, Any]: + """The tool + permission fields this harness contributes to the ``/run`` payload.""" + raise NotImplementedError + + def wire_prompt(self) -> Dict[str, Any]: + """The system-prompt fields this harness contributes to the ``/run`` payload. Empty + by default; a harness that exposes prompt overrides (Pi) emits them here.""" + return {} + + def wire_mcp(self) -> Dict[str, Any]: + """The ``mcpServers`` field for the ``/run`` payload. Omitted when none are declared so + a tool-free run's payload is unchanged (the golden wire contract).""" + if not self.mcp_servers: + return {} + return {"mcpServers": mcp_servers_to_wire(self.mcp_servers)} + + +class PiAgentConfig(HarnessAgentConfig): + """Pi's config. Built-in tools by name plus resolved specs delivered natively (Pi has no + MCP; the runner registers them through the Pi extension). Pi does not gate tool use, so + no permission policy applies. + + ``system`` and ``append_system`` are Pi's two system-prompt layers, distinct from + ``agents_md``. ``system`` *replaces* Pi's built-in base prompt outright (Pi's ``SYSTEM.md`` + / ``--system-prompt``); ``append_system`` *adds* to the base prompt without replacing it + (Pi's ``APPEND_SYSTEM.md`` / ``--append-system-prompt``). Both are independent of + ``agents_md``: Pi still appends the AGENTS.md project context after the system prompt + either way, so AGENTS.md remains the right home for project conventions and these are + only for changing or extending Pi's base persona.""" + + harness: ClassVar[HarnessType] = HarnessType.PI + + builtin_names: List[str] = Field( + default_factory=list, + validation_alias=AliasChoices("builtin_names", "builtin_tools"), + ) + tool_specs: List[ToolSpec] = Field( + default_factory=list, + validation_alias=AliasChoices("tool_specs", "custom_tools"), + ) + system: Optional[str] = None + append_system: Optional[str] = None + + @field_validator("tool_specs", mode="before") + @classmethod + def _coerce_tool_specs(cls, value: Any) -> List[ToolSpec]: + return [coerce_tool_spec(item) for item in value or []] + + @property + def builtin_tools(self) -> List[str]: + return list(self.builtin_names) + + @property + def custom_tools(self) -> List[Dict[str, Any]]: + return [tool_spec.to_wire() for tool_spec in self.tool_specs] + + def wire_tools(self) -> Dict[str, Any]: + return { + "tools": list(self.builtin_names), + "customTools": [tool_spec.to_wire() for tool_spec in self.tool_specs], + "toolCallback": self.tool_callback.to_wire() + if self.tool_callback + else None, + "permissionPolicy": "auto", # Pi never gates tool use + } + + def wire_prompt(self) -> Dict[str, Any]: + out: Dict[str, Any] = {} + if self.system is not None: + out["systemPrompt"] = self.system + if self.append_system is not None: + out["appendSystemPrompt"] = self.append_system + return out + + +class ClaudeAgentConfig(HarnessAgentConfig): + """Claude's config. No Pi built-ins; tools are delivered over MCP, and + ``permission_policy`` answers Claude's tool-use prompts in a headless run.""" + + harness: ClassVar[HarnessType] = HarnessType.CLAUDE + + tool_specs: List[ToolSpec] = Field( + default_factory=list, + validation_alias=AliasChoices("tool_specs", "custom_tools"), + ) + permission_policy: PermissionPolicy = "auto" + + @field_validator("tool_specs", mode="before") + @classmethod + def _coerce_tool_specs(cls, value: Any) -> List[ToolSpec]: + return [coerce_tool_spec(item) for item in value or []] + + @property + def custom_tools(self) -> List[Dict[str, Any]]: + return [tool_spec.to_wire() for tool_spec in self.tool_specs] + + def wire_tools(self) -> Dict[str, Any]: + return { + "tools": [], # Claude has no Pi built-in tools + "customTools": [tool_spec.to_wire() for tool_spec in self.tool_specs], + "toolCallback": self.tool_callback.to_wire() + if self.tool_callback + else None, + "permissionPolicy": self.permission_policy, + } + + +class AgentaAgentConfig(PiAgentConfig): + """The Agenta harness's config. It *is* a Pi config (same engine, same tool delivery and + system-prompt layers), plus the forced ``skills`` the Agenta harness always ships. + + ``skills`` are skill directory names the runner resolves against its bundled + ``services/agent/skills/`` root and loads into Pi's resource loader, so they appear in the + system prompt on every run.""" + + harness: ClassVar[HarnessType] = HarnessType.AGENTA + + skills: List[str] = Field(default_factory=list) + + def wire_tools(self) -> Dict[str, Any]: + # Same tool fields as Pi, plus the forced skill names the runner loads. + return {**super().wire_tools(), "skills": list(self.skills)} + + +# --------------------------------------------------------------------------- +# The session bundle +# --------------------------------------------------------------------------- + + +class SessionConfig(BaseModel): + """Everything one run needs except where it runs. + + ``agent`` is the neutral definition. ``secrets`` are provider keys injected as harness + env, never written to the agent filesystem. The ``builtin_tools`` / ``custom_tools`` / + ``tool_callback`` triple is the resolved tool delivery (Agenta produces it server-side; + empty for a bare standalone run). Sandbox is intentionally absent: it is a + backend/environment concern.""" + + model_config = ConfigDict(populate_by_name=True) + + agent: AgentConfig + secrets: Dict[str, str] = Field(default_factory=dict) + permission_policy: PermissionPolicy = "auto" + trace: Optional[TraceContext] = None + session_id: Optional[str] = None + builtin_names: List[str] = Field( + default_factory=list, + validation_alias=AliasChoices("builtin_names", "builtin_tools"), + ) + tool_specs: List[ToolSpec] = Field( + default_factory=list, + validation_alias=AliasChoices("tool_specs", "custom_tools"), + ) + tool_callback: Optional[ToolCallback] = None + mcp_servers: List[ResolvedMCPServer] = Field(default_factory=list) + + @field_validator("tool_specs", mode="before") + @classmethod + def _coerce_tool_specs(cls, value: Any) -> List[ToolSpec]: + return [coerce_tool_spec(item) for item in value or []] + + @field_validator("mcp_servers", mode="before") + @classmethod + def _coerce_resolved_mcp_servers(cls, value: Any) -> List[ResolvedMCPServer]: + return [ + item + if isinstance(item, ResolvedMCPServer) + else ResolvedMCPServer.model_validate(item) + for item in value or [] + ] + + @property + def builtin_tools(self) -> List[str]: + return list(self.builtin_names) + + @property + def custom_tools(self) -> List[Dict[str, Any]]: + return [tool_spec.to_wire() for tool_spec in self.tool_specs] + + +# --------------------------------------------------------------------------- +# Parsing helpers (ported from the agent service's inputs.py) +# --------------------------------------------------------------------------- + + +def _as_list(raw: Any) -> List[Any]: + if isinstance(raw, dict): + return [raw] + if isinstance(raw, list): + return raw + return [] + + +def _parse_mcp_servers_raw( + params: Dict[str, Any], + defaults: AgentConfig, +) -> List[Any]: + """Pull the raw ``mcp_servers`` list from a request/config dict, falling back to defaults. + + Reads ``mcp_servers`` from the ``agent`` element when present, else the flat request. + Canonical validation happens on :class:`AgentConfig` construction.""" + agent = params.get("agent") + source = agent if isinstance(agent, dict) else params + raw = source.get("mcp_servers") + if raw is None: + return list(defaults.mcp_servers) + return _as_list(raw) + + +def _parse_harness_options( + params: Dict[str, Any], + defaults: AgentConfig, +) -> Dict[str, Dict[str, Any]]: + """Pull the per-harness options bag from a request/config dict, falling back to defaults. + + Reads ``harness_options`` from the ``agent`` element when present, else from the flat + request. Keeps only well-formed entries (a harness name mapping to an options dict) and + lower-cases the harness key so it matches :class:`HarnessType` values. + """ + agent = params.get("agent") + source = agent if isinstance(agent, dict) else params + raw = source.get("harness_options") + if not isinstance(raw, dict): + return dict(defaults.harness_options) + options: Dict[str, Dict[str, Any]] = {} + for name, opts in raw.items(): + if isinstance(opts, dict): + options[str(name).lower()] = dict(opts) + return options or dict(defaults.harness_options) + + +def _system_text(messages: Optional[List[Any]]) -> str: + """Join the system-message content of a prompt-template into AGENTS.md text.""" + parts: List[str] = [] + for message in messages or []: + if not isinstance(message, dict) or message.get("role") != "system": + continue + content = message.get("content") + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + parts.extend( + block.get("text", "") + for block in content + if isinstance(block, dict) and block.get("type") == "text" + ) + return "\n\n".join(part for part in parts if part) + + +def _parse_agent_fields( + params: Dict[str, Any], + defaults: AgentConfig, +) -> Tuple[Optional[str], Optional[str], Any]: + """Pull (instructions, model, tools) from a request/config dict, with fallbacks.""" + agent = params.get("agent") + if isinstance(agent, dict): + # ``agents_md`` is the field the playground/catalog schema exposes; ``instructions`` is + # the legacy key kept as a fallback so already-stored agent configs still resolve. + return ( + agent.get("agents_md") + or agent.get("instructions") + or defaults.instructions, + agent.get("model") or defaults.model, + agent.get("tools"), + ) + + prompt_cfg = params.get("prompt") + if isinstance(prompt_cfg, dict): + llm_config = prompt_cfg.get("llm_config") or {} + model = llm_config.get("model") or defaults.model + instructions = _system_text(prompt_cfg.get("messages")) or defaults.instructions + raw_tools = llm_config.get("tools") + if raw_tools is None: + raw_tools = prompt_cfg.get("tools") + else: + model = params.get("model") or defaults.model + instructions = params.get("agents_md") or defaults.instructions + raw_tools = params.get("tools") + + if raw_tools is None: + raw_tools = defaults.tools + return instructions, model, raw_tools diff --git a/sdks/python/agenta/sdk/agents/errors.py b/sdks/python/agenta/sdk/agents/errors.py new file mode 100644 index 0000000000..b9f136a472 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/errors.py @@ -0,0 +1,26 @@ +"""Typed errors for the agent runtime.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .dtos import HarnessType +from .tools.errors import ToolResolutionError + +__all__ = ["UnsupportedHarnessError", "ToolResolutionError"] + +if TYPE_CHECKING: + from .interfaces import Backend + + +class UnsupportedHarnessError(RuntimeError): + """Raised when a harness is asked to run on a backend that cannot drive it.""" + + def __init__(self, harness: HarnessType, backend: "Backend") -> None: + supported = ", ".join(sorted(h.value for h in backend.supported_harnesses)) + super().__init__( + f"{type(backend).__name__} cannot drive harness '{harness.value}'; " + f"it supports: {supported or '(none)'}" + ) + self.harness = harness + self.backend = backend diff --git a/sdks/python/agenta/sdk/agents/interfaces.py b/sdks/python/agenta/sdk/agents/interfaces.py new file mode 100644 index 0000000000..a7df7280d5 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/interfaces.py @@ -0,0 +1,317 @@ +"""The ports of the agent runtime: the abstract contracts (Agenta calls these interfaces). + +Three layers, lowest to highest: + +- ``Backend`` is the engine. It declares which harnesses it can drive + (``supported_harnesses``), owns sandbox + session lifecycle, and is pure plumbing: it + takes an already-harness-shaped config and launches it. Adapters: ``RivetBackend``, + ``InProcessPiBackend``, ``LocalBackend``. +- ``Sandbox`` is where a session's process tree lives, plus the provisioning verb + (``add_files``). +- ``Session`` is one conversation (``prompt``, ``destroy``). +- ``Environment`` sits above a backend and owns the sandbox policy. + +The ``Harness`` port (with its ``PiHarness`` / ``ClaudeHarness`` adapters) sits above an +``Environment`` and validates against ``Backend.supported_harnesses``. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import ClassVar, FrozenSet, Mapping, Optional, Sequence + +from .dtos import ( + AgentResult, + EventSink, + HarnessAgentConfig, + HarnessType, + Message, + SessionConfig, + TraceContext, +) +from .errors import UnsupportedHarnessError +from .streaming import AgentRun + + +# --------------------------------------------------------------------------- +# Sandbox and Session +# --------------------------------------------------------------------------- + + +class Sandbox(ABC): + """Where a session's process tree runs. Holds the provisioning verb and teardown. + + ``add_files`` lays files into the sandbox before the session prompts (AGENTS.md, a + bundled extension, an uploaded login). Provisioning, used by the runtime, never exposed + to the agent-config author. + """ + + async def add_files(self, files: Mapping[str, bytes]) -> None: + """Write files into the sandbox. No-op by default (an adapter may need nothing).""" + return None + + async def destroy(self) -> None: + """Tear the sandbox down. No-op by default.""" + return None + + +class Session(ABC): + """One conversation over a harness running in a sandbox.""" + + @property + @abstractmethod + def id(self) -> Optional[str]: + """The engine's session id, carried forward so a follow-up turn can resume it.""" + + @abstractmethod + async def prompt( + self, + messages: Sequence[Message], + *, + on_event: Optional[EventSink] = None, + ) -> AgentResult: + """Run one turn and return the structured result (the one-shot path).""" + + @abstractmethod + def stream(self, messages: Sequence[Message]) -> AgentRun: + """Run one turn, yielding events live across the boundary. + + Returns an :class:`~agenta.sdk.agents.streaming.AgentRun`: an async-iterable of + ``AgentEvent`` that also carries the terminal ``AgentResult`` once consumed. This is + the live counterpart of :meth:`prompt`. + """ + + async def destroy(self) -> None: + """Drop the session's resources. A no-op under cold + replay.""" + return None + + +class SessionStore(ABC): + """Durable conversation history behind the agent session id. + + The cold runtime still receives the full message history on every turn. This port is the + place a platform-backed or file-backed store attaches when the server owns that history. + """ + + @abstractmethod + async def load(self, session_id: str) -> Sequence[Message]: + """Return the neutral message history for ``session_id``.""" + + @abstractmethod + async def save_turn( + self, + session_id: str, + *, + messages: Sequence[Message], + result: Optional[AgentResult] = None, + ) -> None: + """Persist one completed cold turn.""" + + +class NoopSessionStore(SessionStore): + """Session store adapter used until server-owned history persistence lands.""" + + async def load(self, session_id: str) -> Sequence[Message]: + return () + + async def save_turn( + self, + session_id: str, + *, + messages: Sequence[Message], + result: Optional[AgentResult] = None, + ) -> None: + return None + + +# --------------------------------------------------------------------------- +# Backend (the engine) +# --------------------------------------------------------------------------- + + +class Backend(ABC): + """The engine. Declares supported harnesses; owns sandbox + session lifecycle. + + Each concrete backend is its own thing and hard-codes what makes it that engine (its + engine id, its supported harnesses). They do not share a base beyond this ABC. + """ + + #: The single source of truth for what this engine can run. + supported_harnesses: ClassVar[FrozenSet[HarnessType]] = frozenset() + + def supports(self, harness: HarnessType) -> bool: + return harness in self.supported_harnesses + + async def setup(self) -> None: + """Bring the backend up. No-op by default.""" + return None + + async def shutdown(self) -> None: + """Release backend resources. No-op by default.""" + return None + + @abstractmethod + async def create_sandbox(self) -> Sandbox: + """Create a sandbox this backend can run a session in.""" + + @abstractmethod + async def create_session( + self, + sandbox: Sandbox, + config: HarnessAgentConfig, + *, + harness: HarnessType, + secrets: Optional[Mapping[str, str]] = None, + trace: Optional[TraceContext] = None, + session_id: Optional[str] = None, + ) -> Session: + """Open a session in ``sandbox`` for an already-harness-shaped ``config``.""" + + +# --------------------------------------------------------------------------- +# Environment (sandbox policy over a backend) +# --------------------------------------------------------------------------- + + +class Environment: + """A layer above a backend that owns the sandbox policy. + + Default ``sandbox_per_session=True`` gives each session a fresh sandbox (the cold model, + strong isolation). Pass ``False`` to keep one sandbox and run many sessions in it; share + a single ``Environment`` across harnesses to share that sandbox. + """ + + def __init__(self, backend: Backend, *, sandbox_per_session: bool = True) -> None: + self._backend = backend + self._sandbox_per_session = sandbox_per_session + self._shared: Optional[Sandbox] = None + + @property + def backend(self) -> Backend: + return self._backend + + async def setup(self) -> None: + await self._backend.setup() + + async def shutdown(self) -> None: + if self._shared is not None: + await self._shared.destroy() + self._shared = None + await self._backend.shutdown() + + async def _sandbox(self) -> Sandbox: + if self._sandbox_per_session: + return await self._backend.create_sandbox() + if self._shared is None: + self._shared = await self._backend.create_sandbox() + return self._shared + + async def create_session( + self, + config: HarnessAgentConfig, + *, + harness: HarnessType, + session_config: SessionConfig, + provisioning: Optional[Mapping[str, bytes]] = None, + ) -> Session: + """Provision a sandbox per policy, then open a session in it.""" + sandbox = await self._sandbox() + if provisioning: + await sandbox.add_files(provisioning) + return await self._backend.create_session( + sandbox, + config, + harness=harness, + secrets=session_config.secrets, + trace=session_config.trace, + session_id=session_config.session_id, + ) + + +# --------------------------------------------------------------------------- +# Harness (the port; adapters live in adapters/harnesses.py) +# --------------------------------------------------------------------------- + + +class Harness(ABC): + """A harness-type-specific wrapper over an :class:`Environment`. + + Holds the mapping from the neutral :class:`~agenta.sdk.agents.dtos.SessionConfig` to this + harness's config, and validates at construction that the environment's backend can drive + it (raising :class:`UnsupportedHarnessError` otherwise). The backend stays pure plumbing; + the per-harness knowledge lives here. + """ + + harness_type: ClassVar[HarnessType] + + def __init__(self, environment: Environment) -> None: + if not environment.backend.supports(self.harness_type): + raise UnsupportedHarnessError(self.harness_type, environment.backend) + self._env = environment + + @property + def environment(self) -> Environment: + return self._env + + async def setup(self) -> None: + await self._env.setup() + + async def cleanup(self) -> None: + await self._env.shutdown() + + @abstractmethod + def _to_harness_config(self, config: SessionConfig) -> HarnessAgentConfig: + """Map the neutral config into this harness's own config (the mapping logic).""" + + def _provisioning(self, config: SessionConfig) -> Mapping[str, bytes]: + """Files this harness needs laid into the sandbox before the run.""" + files: dict[str, bytes] = {} + instructions = config.agent.instructions + if instructions and instructions.strip(): + files["AGENTS.md"] = instructions.encode("utf-8") + return files + + async def create_session(self, config: SessionConfig) -> Session: + return await self._env.create_session( + self._to_harness_config(config), + harness=self.harness_type, + session_config=config, + provisioning=self._provisioning(config), + ) + + async def prompt( + self, + config: SessionConfig, + messages: Sequence[Message], + *, + on_event: Optional[EventSink] = None, + ) -> AgentResult: + """Convenience: open a session, run one turn, and destroy it (the cold path).""" + session = await self.create_session(config) + try: + result = await session.prompt(messages, on_event=on_event) + if result.session_id: + config.session_id = result.session_id + return result + finally: + await session.destroy() + + async def stream( + self, + config: SessionConfig, + messages: Sequence[Message], + ) -> AgentRun: + """Convenience: open a cold session and stream one turn (the live counterpart of + :meth:`prompt`). + + The session id is carried onto ``config`` when the terminal result arrives, and the + session is destroyed when the stream ends — by drain, ``break``, or cancellation — + via the run's cleanup hook. + """ + session = await self.create_session(config) + + def _absorb(result: AgentResult) -> None: + if result.session_id: + config.session_id = result.session_id + + return session.stream(messages).on_result(_absorb).on_cleanup(session.destroy) diff --git a/sdks/python/agenta/sdk/agents/mcp/__init__.py b/sdks/python/agenta/sdk/agents/mcp/__init__.py new file mode 100644 index 0000000000..4881f30d52 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/mcp/__init__.py @@ -0,0 +1,22 @@ +"""Public MCP configuration and resolution API.""" + +from .errors import MCPConfigurationError, MCPError, MissingMCPSecretError +from .interfaces import MCPSecretProvider +from .models import MCPServerConfig, ResolvedMCPServer +from .parsing import parse_mcp_server_config, parse_mcp_server_configs +from .resolver import MCPResolver +from .wire import mcp_server_to_wire, mcp_servers_to_wire + +__all__ = [ + "MCPServerConfig", + "ResolvedMCPServer", + "MCPSecretProvider", + "MCPResolver", + "parse_mcp_server_config", + "parse_mcp_server_configs", + "mcp_server_to_wire", + "mcp_servers_to_wire", + "MCPError", + "MCPConfigurationError", + "MissingMCPSecretError", +] diff --git a/sdks/python/agenta/sdk/agents/mcp/errors.py b/sdks/python/agenta/sdk/agents/mcp/errors.py new file mode 100644 index 0000000000..2d2ab05193 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/mcp/errors.py @@ -0,0 +1,33 @@ +"""Errors raised while parsing and resolving MCP server configuration.""" + +from __future__ import annotations + +from typing import Any, Optional, Sequence + + +class MCPError(RuntimeError): + """Base error for the agent MCP subsystem.""" + + +class MCPConfigurationError(MCPError): + def __init__( + self, + message: str, + *, + index: Optional[int] = None, + value: Any = None, + ) -> None: + super().__init__(message) + self.index = index + self.value = value + + +class MissingMCPSecretError(MCPError): + def __init__(self, *, server_name: str, secret_names: Sequence[str]) -> None: + names = tuple(secret_names) + super().__init__( + f"MCP server '{server_name}' is missing required secret(s): " + f"{', '.join(names)}" + ) + self.server_name = server_name + self.secret_names = names diff --git a/sdks/python/agenta/sdk/agents/mcp/interfaces.py b/sdks/python/agenta/sdk/agents/mcp/interfaces.py new file mode 100644 index 0000000000..23c5c91522 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/mcp/interfaces.py @@ -0,0 +1,10 @@ +"""Injected dependencies used by MCP resolution.""" + +from __future__ import annotations + +from typing import Mapping, Protocol, Sequence + + +class MCPSecretProvider(Protocol): + async def get_many(self, names: Sequence[str]) -> Mapping[str, str]: + """Return available values for the requested MCP secret names.""" diff --git a/sdks/python/agenta/sdk/agents/mcp/models.py b/sdks/python/agenta/sdk/agents/mcp/models.py new file mode 100644 index 0000000000..e4df7f87e5 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/mcp/models.py @@ -0,0 +1,57 @@ +"""Canonical MCP server declarations and resolved runner configuration.""" + +from __future__ import annotations + +from typing import Any, Dict, List, Literal, Optional + +from pydantic import BaseModel, ConfigDict, Field, model_validator + + +class MCPServerConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + name: str = Field(min_length=1) + transport: Literal["stdio", "http"] = "stdio" + command: Optional[str] = None + args: List[str] = Field(default_factory=list) + env: Dict[str, str] = Field(default_factory=dict, repr=False) + url: Optional[str] = None + secrets: Dict[str, str] = Field(default_factory=dict) + tools: List[str] = Field(default_factory=list) + + @model_validator(mode="after") + def _validate_transport(self) -> "MCPServerConfig": + if self.transport == "stdio" and not self.command: + raise ValueError("stdio MCP server requires command") + if self.transport == "http" and not self.url: + raise ValueError("http MCP server requires url") + return self + + +class ResolvedMCPServer(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + name: str + transport: Literal["stdio", "http"] = "stdio" + command: Optional[str] = None + args: List[str] = Field(default_factory=list) + env: Dict[str, str] = Field(default_factory=dict, repr=False) + url: Optional[str] = None + tools: List[str] = Field(default_factory=list) + + def to_wire(self) -> Dict[str, Any]: + wire: Dict[str, Any] = { + "name": self.name, + "transport": self.transport, + } + if self.command: + wire["command"] = self.command + if self.args: + wire["args"] = list(self.args) + if self.env: + wire["env"] = dict(self.env) + if self.url: + wire["url"] = self.url + if self.tools: + wire["tools"] = list(self.tools) + return wire diff --git a/sdks/python/agenta/sdk/agents/mcp/parsing.py b/sdks/python/agenta/sdk/agents/mcp/parsing.py new file mode 100644 index 0000000000..dfb5f169a6 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/mcp/parsing.py @@ -0,0 +1,39 @@ +"""Strict parsing of MCP server configuration.""" + +from __future__ import annotations + +from typing import Any, Mapping, Sequence + +from pydantic import ValidationError + +from .errors import MCPConfigurationError +from .models import MCPServerConfig + + +def parse_mcp_server_config( + value: MCPServerConfig | Mapping[str, Any], +) -> MCPServerConfig: + try: + return MCPServerConfig.model_validate(value) + except ValidationError as exc: + raise MCPConfigurationError( + "Invalid MCP server configuration: " + f"{exc.errors(include_url=False, include_input=False)}", + value=value, + ) from exc + + +def parse_mcp_server_configs( + values: Sequence[MCPServerConfig | Mapping[str, Any]], +) -> list[MCPServerConfig]: + parsed: list[MCPServerConfig] = [] + for index, value in enumerate(values): + try: + parsed.append(parse_mcp_server_config(value)) + except MCPConfigurationError as exc: + raise MCPConfigurationError( + str(exc), + index=index, + value=value, + ) from exc + return parsed diff --git a/sdks/python/agenta/sdk/agents/mcp/resolver.py b/sdks/python/agenta/sdk/agents/mcp/resolver.py new file mode 100644 index 0000000000..6ce78162dd --- /dev/null +++ b/sdks/python/agenta/sdk/agents/mcp/resolver.py @@ -0,0 +1,68 @@ +"""Resolution of MCP server declarations into runner configuration.""" + +from __future__ import annotations + +from typing import Mapping, Sequence + +from agenta.sdk.agents.tools.models import MissingSecretPolicy + +from .errors import MissingMCPSecretError +from .interfaces import MCPSecretProvider +from .models import MCPServerConfig, ResolvedMCPServer + + +class MCPResolver: + def __init__( + self, + *, + secret_provider: MCPSecretProvider, + missing_secret_policy: MissingSecretPolicy = MissingSecretPolicy.ERROR, + ) -> None: + self._secret_provider = secret_provider + self._missing_secret_policy = missing_secret_policy + + async def resolve( + self, + server_configs: Sequence[MCPServerConfig], + ) -> list[ResolvedMCPServer]: + secret_names = sorted( + { + secret_name + for server_config in server_configs + for secret_name in server_config.secrets.values() + } + ) + secret_values: Mapping[str, str] = ( + await self._secret_provider.get_many(secret_names) if secret_names else {} + ) + + resolved: list[ResolvedMCPServer] = [] + for server_config in server_configs: + missing = [ + secret_name + for secret_name in server_config.secrets.values() + if secret_name not in secret_values + ] + if missing and self._missing_secret_policy == MissingSecretPolicy.ERROR: + raise MissingMCPSecretError( + server_name=server_config.name, + secret_names=missing, + ) + + env = dict(server_config.env) + for env_var, secret_name in server_config.secrets.items(): + if secret_name in secret_values: + env[env_var] = secret_values[secret_name] + + resolved.append( + ResolvedMCPServer( + name=server_config.name, + transport=server_config.transport, + command=server_config.command, + args=list(server_config.args), + env=env, + url=server_config.url, + tools=list(server_config.tools), + ) + ) + return resolved diff --git a/sdks/python/agenta/sdk/agents/mcp/wire.py b/sdks/python/agenta/sdk/agents/mcp/wire.py new file mode 100644 index 0000000000..f9c1a7cb68 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/mcp/wire.py @@ -0,0 +1,17 @@ +"""Serialization of resolved MCP servers to the runner contract.""" + +from __future__ import annotations + +from typing import Any, Dict, Sequence + +from .models import ResolvedMCPServer + + +def mcp_server_to_wire(server: ResolvedMCPServer) -> Dict[str, Any]: + return server.to_wire() + + +def mcp_servers_to_wire( + servers: Sequence[ResolvedMCPServer], +) -> list[Dict[str, Any]]: + return [mcp_server_to_wire(server) for server in servers] diff --git a/sdks/python/agenta/sdk/agents/streaming.py b/sdks/python/agenta/sdk/agents/streaming.py new file mode 100644 index 0000000000..e631d0ecdc --- /dev/null +++ b/sdks/python/agenta/sdk/agents/streaming.py @@ -0,0 +1,91 @@ +"""Live streaming surface: ``AgentRun`` turns the runner's NDJSON record stream into a live +``AgentEvent`` async-iterable plus the one terminal ``AgentResult``. + +A streaming transport (``utils.deliver_*_stream``) yields the runner's ``StreamRecord`` lines: +``{"kind":"event", ...}`` for every event the moment it is built, then exactly one +``{"kind":"result", ...}`` terminal record. ``AgentRun`` wraps that source so a caller can:: + + run = session.stream(messages) + async for event in run: + ... # event is an AgentEvent, flushed live + result = run.result() # the terminal AgentResult (session_id, usage, stop_reason, ...) + +This lives in its own module (not ``dtos``) because parsing the terminal record reuses +``utils.wire.result_from_wire``, which imports the DTOs — keeping ``AgentRun`` above both +avoids an import cycle. +""" + +from __future__ import annotations + +from typing import ( + Any, + AsyncIterator, + Awaitable, + Callable, + Dict, + List, + Optional, +) + +from .dtos import AgentEvent, AgentResult +from .utils import result_from_wire + +# Hooks: a result hook sees the terminal result once; a cleanup runs when iteration ends +# (drain, break, or cancel). +ResultHook = Callable[[AgentResult], None] +Cleanup = Callable[[], Awaitable[None]] + + +class AgentRun: + """An async-iterable over a run's live ``AgentEvent``s that also carries the terminal + ``AgentResult``. + + Iterate it once. Each ``{"kind":"event"}`` record is yielded as an ``AgentEvent``; the + ``{"kind":"result"}`` record is parsed (raising the run's error when ``ok`` is false, + just like the one-shot path) and ends iteration. ``result()`` returns it afterwards. + """ + + def __init__(self, records: AsyncIterator[Dict[str, Any]]) -> None: + self._records = records + self._result: Optional[AgentResult] = None + self._result_hooks: List[ResultHook] = [] + self._cleanups: List[Cleanup] = [] + + def on_result(self, hook: ResultHook) -> "AgentRun": + """Register a callback to run when the terminal result arrives (chainable).""" + self._result_hooks.append(hook) + return self + + def on_cleanup(self, cleanup: Cleanup) -> "AgentRun": + """Register an async cleanup to run when iteration ends, any way it ends (chainable).""" + self._cleanups.append(cleanup) + return self + + async def __aiter__(self) -> AsyncIterator[AgentEvent]: + try: + async for record in self._records: + kind = record.get("kind") + if kind == "event": + event = AgentEvent.from_wire(record.get("event")) + if event is not None: + yield event + elif kind == "result": + # result_from_wire raises on ok=false — surface it to the consumer. + self._result = result_from_wire(record.get("result") or {}) + for hook in self._result_hooks: + hook(self._result) + return + finally: + for cleanup in self._cleanups: + try: + await cleanup() + except Exception: # pylint: disable=broad-except + pass + + def result(self) -> AgentResult: + """The terminal result. Available only after the stream is fully consumed.""" + if self._result is None: + raise RuntimeError( + "AgentRun result is not available until the stream is fully consumed" + ) + return self._result diff --git a/sdks/python/agenta/sdk/agents/tools/__init__.py b/sdks/python/agenta/sdk/agents/tools/__init__.py new file mode 100644 index 0000000000..2b40dc082e --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/__init__.py @@ -0,0 +1,75 @@ +"""Public agent-tool configuration and resolution API.""" + +from .compat import ( + ToolConfigDiagnostic, + ToolConfigParseResult, + coerce_tool_config, + coerce_tool_configs, +) +from .errors import ( + DuplicateToolNameError, + GatewayToolResolutionError, + MissingToolSecretError, + ToolConfigError, + ToolConfigurationError, + ToolError, + ToolResolutionError, + UnsupportedToolProviderError, +) +from .interfaces import GatewayToolResolver, ToolSecretProvider +from .models import ( + BuiltinToolConfig, + CallbackToolSpec, + ClientToolConfig, + ClientToolSpec, + CodeToolConfig, + CodeToolSpec, + GatewayToolConfig, + GatewayToolResolution, + MissingSecretPolicy, + ResolvedToolSet, + ToolCallback, + ToolConfig, + ToolConfigBase, + ToolSpec, +) +from .parsing import parse_tool_config, parse_tool_configs +from .resolver import EnvironmentToolSecretProvider, ToolResolver +from .wire import tool_spec_to_wire, tool_specs_to_wire + +__all__ = [ + "ToolConfigBase", + "ToolConfig", + "BuiltinToolConfig", + "GatewayToolConfig", + "CodeToolConfig", + "ClientToolConfig", + "ToolSpec", + "CallbackToolSpec", + "CodeToolSpec", + "ClientToolSpec", + "ToolCallback", + "ResolvedToolSet", + "GatewayToolResolution", + "MissingSecretPolicy", + "ToolResolver", + "ToolSecretProvider", + "GatewayToolResolver", + "EnvironmentToolSecretProvider", + "parse_tool_config", + "parse_tool_configs", + "coerce_tool_config", + "coerce_tool_configs", + "ToolConfigDiagnostic", + "ToolConfigParseResult", + "tool_spec_to_wire", + "tool_specs_to_wire", + "ToolError", + "ToolConfigError", + "ToolConfigurationError", + "ToolResolutionError", + "GatewayToolResolutionError", + "UnsupportedToolProviderError", + "MissingToolSecretError", + "DuplicateToolNameError", +] diff --git a/sdks/python/agenta/sdk/agents/tools/compat.py b/sdks/python/agenta/sdk/agents/tools/compat.py new file mode 100644 index 0000000000..e356abfdde --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/compat.py @@ -0,0 +1,132 @@ +"""Compatibility conversion for legacy playground and persisted tool shapes.""" + +from __future__ import annotations + +from typing import Any, Literal, Optional, Sequence + +from pydantic import BaseModel, ConfigDict, Field + +from .errors import ToolConfigurationError +from .models import ( + BuiltinToolConfig, + ClientToolConfig, + CodeToolConfig, + GatewayToolConfig, + ToolConfig, +) +from .parsing import parse_tool_config + + +class ToolConfigDiagnostic(BaseModel): + model_config = ConfigDict(frozen=True) + + index: int + message: str + + +class ToolConfigParseResult(BaseModel): + model_config = ConfigDict(frozen=True) + + tool_configs: list[ToolConfig] = Field(default_factory=list) + diagnostics: list[ToolConfigDiagnostic] = Field(default_factory=list) + + +def _parse_gateway_slug(slug: Any) -> Optional[dict[str, Any]]: + if not isinstance(slug, str): + return None + parts = slug.replace("__", ".").split(".") + if len(parts) != 5 or parts[0] != "tools": + return None + return { + "type": "gateway", + "provider": parts[1], + "integration": parts[2], + "action": parts[3], + "connection": parts[4], + } + + +def _copy_tool_metadata( + source: dict[str, Any], target: dict[str, Any] +) -> dict[str, Any]: + result = dict(target) + if "needs_approval" in source: + result["needs_approval"] = bool(source["needs_approval"]) + if isinstance(source.get("render"), dict): + result["render"] = dict(source["render"]) + return result + + +def coerce_tool_config(value: Any) -> ToolConfig: + """Convert one supported legacy shape into canonical tool configuration.""" + if isinstance( + value, + ( + BuiltinToolConfig, + GatewayToolConfig, + CodeToolConfig, + ClientToolConfig, + ), + ): + return value + if isinstance(value, str): + return BuiltinToolConfig(name=value) + if not isinstance(value, dict): + raise ToolConfigurationError( + "Tool configuration must be a string or mapping", + value=value, + ) + + data = dict(value) + if data.get("type") == "composio": + data["type"] = "gateway" + data.setdefault("provider", "composio") + + if data.get("type") in {"builtin", "gateway", "code", "client"}: + return parse_tool_config(data) + + function = data.get("function") if isinstance(data.get("function"), dict) else {} + gateway = _parse_gateway_slug(function.get("name") or data.get("name")) + if gateway: + return parse_tool_config(_copy_tool_metadata(data, gateway)) + + if isinstance(data.get("name"), str) and "type" not in data: + return BuiltinToolConfig(name=data["name"]) + + raise ToolConfigurationError("Unsupported tool configuration shape", value=value) + + +def coerce_tool_configs( + values: Optional[Sequence[Any]], + *, + on_error: Literal["raise", "collect"] = "raise", +) -> ToolConfigParseResult: + """Convert legacy values, either raising or returning structured diagnostics.""" + tool_configs: list[ToolConfig] = [] + diagnostics: list[ToolConfigDiagnostic] = [] + for index, value in enumerate(values or []): + if value is None: + error = ToolConfigurationError( + "Tool configuration cannot be null", + index=index, + value=value, + ) + else: + try: + tool_configs.append(coerce_tool_config(value)) + continue + except ToolConfigurationError as exc: + error = ToolConfigurationError( + str(exc), + index=index, + value=value, + ) + + if on_error == "raise": + raise error + diagnostics.append(ToolConfigDiagnostic(index=index, message=str(error))) + + return ToolConfigParseResult( + tool_configs=tool_configs, + diagnostics=diagnostics, + ) diff --git a/sdks/python/agenta/sdk/agents/tools/errors.py b/sdks/python/agenta/sdk/agents/tools/errors.py new file mode 100644 index 0000000000..24d62614c4 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/errors.py @@ -0,0 +1,82 @@ +"""Errors raised while parsing and resolving agent tools.""" + +from __future__ import annotations + +from typing import Any, Optional, Sequence + + +class ToolError(RuntimeError): + """Base error for the agent tools domain.""" + + +class ToolConfigurationError(ToolError): + """Raised when tool configuration cannot be converted to a canonical model.""" + + def __init__( + self, + message: str, + *, + index: Optional[int] = None, + value: Any = None, + ) -> None: + super().__init__(message) + self.index = index + self.value = value + + +ToolConfigError = ToolConfigurationError + + +class ToolResolutionError(ToolError): + """Raised when tool configuration cannot become runnable specifications.""" + + def __init__( + self, + message: str, + *, + status: Optional[int] = None, + ref_count: Optional[int] = None, + spec_count: Optional[int] = None, + provider: Optional[str] = None, + reference: Optional[str] = None, + ) -> None: + super().__init__(message) + self.status = status + self.ref_count = ref_count + self.spec_count = spec_count + self.provider = provider + self.reference = reference + + +class GatewayToolResolutionError(ToolResolutionError): + """Raised when a gateway adapter cannot resolve a configured tool.""" + + +class UnsupportedToolProviderError(ToolResolutionError): + """Raised when no resolver is available for a configured gateway provider.""" + + def __init__(self, provider: str) -> None: + super().__init__( + f"Unsupported tool provider: {provider}", + provider=provider, + ) + + +class MissingToolSecretError(ToolResolutionError): + """Raised when a tool declares required secrets that a provider cannot supply.""" + + def __init__(self, *, tool_name: str, secret_names: Sequence[str]) -> None: + names = tuple(secret_names) + super().__init__( + f"Tool '{tool_name}' is missing required secret(s): {', '.join(names)}" + ) + self.tool_name = tool_name + self.secret_names = names + + +class DuplicateToolNameError(ToolResolutionError): + """Raised when two configured tools resolve to the same model-visible name.""" + + def __init__(self, name: str) -> None: + super().__init__(f"Duplicate tool name: {name}") + self.name = name diff --git a/sdks/python/agenta/sdk/agents/tools/interfaces.py b/sdks/python/agenta/sdk/agents/tools/interfaces.py new file mode 100644 index 0000000000..3ccc4c767c --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/interfaces.py @@ -0,0 +1,20 @@ +"""Injected dependencies used by the tool resolver.""" + +from __future__ import annotations + +from typing import Mapping, Protocol, Sequence + +from .models import GatewayToolConfig, GatewayToolResolution + + +class ToolSecretProvider(Protocol): + async def get_many(self, names: Sequence[str]) -> Mapping[str, str]: + """Return available values for the requested secret names.""" + + +class GatewayToolResolver(Protocol): + async def resolve( + self, + tools: Sequence[GatewayToolConfig], + ) -> GatewayToolResolution: + """Resolve gateway declarations into callback specifications.""" diff --git a/sdks/python/agenta/sdk/agents/tools/models.py b/sdks/python/agenta/sdk/agents/tools/models.py new file mode 100644 index 0000000000..6e467f51dd --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/models.py @@ -0,0 +1,221 @@ +"""Canonical tool configuration and resolved runtime specifications.""" + +from __future__ import annotations + +from enum import Enum +from typing import Annotated, Any, Dict, List, Literal, Optional, Union + +from pydantic import ( + AliasChoices, + BaseModel, + ConfigDict, + Field, + TypeAdapter, + field_validator, +) + + +def _empty_object_schema() -> Dict[str, Any]: + return {"type": "object", "properties": {}} + + +class ToolConfigBase(BaseModel): + """Fields shared by every persisted tool declaration.""" + + model_config = ConfigDict(extra="forbid") + + needs_approval: bool = False + render: Optional[Dict[str, Any]] = None + + +class BuiltinToolConfig(ToolConfigBase): + type: Literal["builtin"] = "builtin" + name: str = Field(min_length=1) + + +class GatewayToolConfig(ToolConfigBase): + type: Literal["gateway"] = "gateway" + provider: str = Field(default="composio", min_length=1) + integration: str = Field(min_length=1) + action: str = Field(min_length=1) + connection: str = Field(min_length=1) + name: Optional[str] = Field(default=None, min_length=1) + + @property + def reference(self) -> str: + return ( + f"tools.{self.provider}.{self.integration}.{self.action}.{self.connection}" + ) + + +class CodeToolConfig(ToolConfigBase): + type: Literal["code"] = "code" + name: str = Field(min_length=1) + description: Optional[str] = None + runtime: Literal["python", "node"] = "python" + script: str = Field(min_length=1) + input_schema: Dict[str, Any] = Field(default_factory=_empty_object_schema) + secrets: List[str] = Field(default_factory=list) + + +class ClientToolConfig(ToolConfigBase): + type: Literal["client"] = "client" + name: str = Field(min_length=1) + description: Optional[str] = None + input_schema: Dict[str, Any] = Field(default_factory=_empty_object_schema) + + +ToolConfig = Annotated[ + Union[ + BuiltinToolConfig, + GatewayToolConfig, + CodeToolConfig, + ClientToolConfig, + ], + Field(discriminator="type"), +] +TOOL_CONFIG_ADAPTER: TypeAdapter[ToolConfig] = TypeAdapter(ToolConfig) + + +class ToolCallback(BaseModel): + """Where callback tool calls are sent.""" + + model_config = ConfigDict(frozen=True) + + endpoint: str + authorization: Optional[str] = Field(default=None, repr=False) + + def to_wire(self) -> Dict[str, Any]: + return { + "endpoint": self.endpoint, + "authorization": self.authorization, + } + + +class ToolSpecBase(BaseModel): + """Fields shared by every resolved, runner-ready tool specification.""" + + model_config = ConfigDict( + extra="forbid", + frozen=True, + populate_by_name=True, + ) + + name: str + description: str + input_schema: Dict[str, Any] = Field( + default_factory=_empty_object_schema, + validation_alias=AliasChoices("input_schema", "inputSchema"), + serialization_alias="inputSchema", + ) + needs_approval: bool = Field( + default=False, + validation_alias=AliasChoices("needs_approval", "needsApproval"), + serialization_alias="needsApproval", + ) + render: Optional[Dict[str, Any]] = None + + def to_wire(self) -> Dict[str, Any]: + wire = self.model_dump( + mode="json", + by_alias=True, + exclude_none=True, + ) + if not self.needs_approval: + wire.pop("needsApproval", None) + if not wire.get("env"): + wire.pop("env", None) + return wire + + +class CallbackToolSpec(ToolSpecBase): + kind: Literal["callback"] = "callback" + call_ref: str = Field( + validation_alias=AliasChoices("call_ref", "callRef"), + serialization_alias="callRef", + ) + + +class CodeToolSpec(ToolSpecBase): + kind: Literal["code"] = "code" + runtime: Literal["python", "node"] = "python" + code: str + env: Dict[str, str] = Field(default_factory=dict, repr=False) + + +class ClientToolSpec(ToolSpecBase): + kind: Literal["client"] = "client" + + +ToolSpec = Annotated[ + Union[CallbackToolSpec, CodeToolSpec, ClientToolSpec], + Field(discriminator="kind"), +] +TOOL_SPEC_ADAPTER: TypeAdapter[ToolSpec] = TypeAdapter(ToolSpec) + + +def coerce_tool_spec(value: Any) -> ToolSpec: + if isinstance(value, (CallbackToolSpec, CodeToolSpec, ClientToolSpec)): + return value + if not isinstance(value, dict): + raise TypeError("tool spec must be a mapping") + data = dict(value) + if not data.get("kind"): + if data.get("callRef") or data.get("call_ref"): + data["kind"] = "callback" + elif data.get("code") is not None: + data["kind"] = "code" + else: + data["kind"] = "client" + name = data.get("name") + data.setdefault("description", name) + data.setdefault("inputSchema", _empty_object_schema()) + return TOOL_SPEC_ADAPTER.validate_python(data) + + +class MissingSecretPolicy(str, Enum): + ERROR = "error" + OMIT = "omit" + + +class ResolvedToolSet(BaseModel): + """Resolved tools ready to attach to a session.""" + + model_config = ConfigDict( + frozen=True, + populate_by_name=True, + ) + + builtin_names: List[str] = Field( + default_factory=list, + validation_alias=AliasChoices("builtin_names", "builtin_tools"), + ) + tool_specs: List[ToolSpec] = Field( + default_factory=list, + validation_alias=AliasChoices("tool_specs", "custom_tools"), + ) + tool_callback: Optional[ToolCallback] = None + + @field_validator("tool_specs", mode="before") + @classmethod + def _coerce_specs(cls, value: Any) -> List[ToolSpec]: + return [coerce_tool_spec(item) for item in value or []] + + @property + def builtin_tools(self) -> List[str]: + """Compatibility alias for the previous field name.""" + return list(self.builtin_names) + + @property + def custom_tools(self) -> List[Dict[str, Any]]: + """Compatibility wire dictionaries for callers not yet using typed specs.""" + return [spec.to_wire() for spec in self.tool_specs] + + +class GatewayToolResolution(BaseModel): + """Result returned by an injected gateway adapter.""" + + model_config = ConfigDict(frozen=True) + + tool_specs: List[CallbackToolSpec] = Field(default_factory=list) + tool_callback: ToolCallback diff --git a/sdks/python/agenta/sdk/agents/tools/parsing.py b/sdks/python/agenta/sdk/agents/tools/parsing.py new file mode 100644 index 0000000000..b5779caa19 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/parsing.py @@ -0,0 +1,39 @@ +"""Strict parsing of canonical tool configuration.""" + +from __future__ import annotations + +from typing import Any, Mapping, Sequence + +from pydantic import ValidationError + +from .errors import ToolConfigurationError +from .models import TOOL_CONFIG_ADAPTER, ToolConfig + + +def parse_tool_config(value: ToolConfig | Mapping[str, Any]) -> ToolConfig: + """Parse one canonical tool mapping, rejecting legacy and unexpected fields.""" + try: + return TOOL_CONFIG_ADAPTER.validate_python(value) + except ValidationError as exc: + raise ToolConfigurationError( + "Invalid tool configuration: " + f"{exc.errors(include_url=False, include_input=False)}", + value=value, + ) from exc + + +def parse_tool_configs( + values: Sequence[ToolConfig | Mapping[str, Any]], +) -> list[ToolConfig]: + """Parse canonical tool mappings and report the failing item index.""" + parsed: list[ToolConfig] = [] + for index, value in enumerate(values): + try: + parsed.append(parse_tool_config(value)) + except ToolConfigurationError as exc: + raise ToolConfigurationError( + str(exc), + index=index, + value=value, + ) from exc + return parsed diff --git a/sdks/python/agenta/sdk/agents/tools/resolver.py b/sdks/python/agenta/sdk/agents/tools/resolver.py new file mode 100644 index 0000000000..54f4c8b03f --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/resolver.py @@ -0,0 +1,177 @@ +"""Resolution of canonical tool configuration into runnable specifications.""" + +from __future__ import annotations + +import os +from typing import Mapping, Optional, Sequence + +from .errors import ( + DuplicateToolNameError, + MissingToolSecretError, + UnsupportedToolProviderError, +) +from .interfaces import GatewayToolResolver, ToolSecretProvider +from .models import ( + BuiltinToolConfig, + ClientToolConfig, + ClientToolSpec, + CodeToolConfig, + CodeToolSpec, + GatewayToolConfig, + MissingSecretPolicy, + ResolvedToolSet, + ToolConfig, + ToolSpec, +) + + +class EnvironmentToolSecretProvider: + """Read declared tool secrets from the current process environment.""" + + async def get_many(self, names: Sequence[str]) -> Mapping[str, str]: + return { + name: value for name in names if (value := os.environ.get(name)) is not None + } + + +def _apply_tool_metadata(tool_spec: ToolSpec, tool_config: ToolConfig) -> ToolSpec: + """Return a new spec carrying the config's approval and rendering metadata.""" + return tool_spec.model_copy( + update={ + "needs_approval": tool_config.needs_approval, + "render": tool_config.render, + } + ) + + +def _build_code_tool_spec( + *, + tool_config: CodeToolConfig, + env: Mapping[str, str], +) -> CodeToolSpec: + return _apply_tool_metadata( + CodeToolSpec( + name=tool_config.name, + description=tool_config.description or tool_config.name, + input_schema=tool_config.input_schema, + runtime=tool_config.runtime, + code=tool_config.script, + env=dict(env), + ), + tool_config, + ) + + +def _build_client_tool_spec(*, tool_config: ClientToolConfig) -> ClientToolSpec: + return _apply_tool_metadata( + ClientToolSpec( + name=tool_config.name, + description=tool_config.description or tool_config.name, + input_schema=tool_config.input_schema, + ), + tool_config, + ) + + +def _validate_unique_names( + *, + builtin_names: Sequence[str], + tool_specs: Sequence[ToolSpec], +) -> None: + seen: set[str] = set() + for name in [*builtin_names, *(tool_spec.name for tool_spec in tool_specs)]: + if name in seen: + raise DuplicateToolNameError(name) + seen.add(name) + + +class ToolResolver: + """Resolve canonical tool configuration through injected secret and gateway adapters.""" + + def __init__( + self, + *, + secret_provider: Optional[ToolSecretProvider] = None, + gateway_resolver: Optional[GatewayToolResolver] = None, + missing_secret_policy: MissingSecretPolicy = MissingSecretPolicy.ERROR, + ) -> None: + self._secret_provider = secret_provider or EnvironmentToolSecretProvider() + self._gateway_resolver = gateway_resolver + self._missing_secret_policy = missing_secret_policy + + async def resolve(self, tool_configs: Sequence[ToolConfig]) -> ResolvedToolSet: + builtin_names = [ + tool_config.name + for tool_config in tool_configs + if isinstance(tool_config, BuiltinToolConfig) + ] + code_configs = [ + tool_config + for tool_config in tool_configs + if isinstance(tool_config, CodeToolConfig) + ] + client_configs = [ + tool_config + for tool_config in tool_configs + if isinstance(tool_config, ClientToolConfig) + ] + gateway_configs = [ + tool_config + for tool_config in tool_configs + if isinstance(tool_config, GatewayToolConfig) + ] + + secret_names = sorted( + { + secret_name + for tool_config in code_configs + for secret_name in tool_config.secrets + } + ) + secret_values = ( + dict(await self._secret_provider.get_many(secret_names)) + if secret_names + else {} + ) + + tool_specs: list[ToolSpec] = [] + for tool_config in code_configs: + missing = [ + secret_name + for secret_name in tool_config.secrets + if secret_name not in secret_values + ] + if missing and self._missing_secret_policy == MissingSecretPolicy.ERROR: + raise MissingToolSecretError( + tool_name=tool_config.name, + secret_names=missing, + ) + env = { + secret_name: secret_values[secret_name] + for secret_name in tool_config.secrets + if secret_name in secret_values + } + tool_specs.append(_build_code_tool_spec(tool_config=tool_config, env=env)) + + tool_specs.extend( + _build_client_tool_spec(tool_config=tool_config) + for tool_config in client_configs + ) + + tool_callback = None + if gateway_configs: + if self._gateway_resolver is None: + raise UnsupportedToolProviderError(gateway_configs[0].provider) + gateway_resolution = await self._gateway_resolver.resolve(gateway_configs) + tool_specs = [*gateway_resolution.tool_specs, *tool_specs] + tool_callback = gateway_resolution.tool_callback + + _validate_unique_names( + builtin_names=builtin_names, + tool_specs=tool_specs, + ) + return ResolvedToolSet( + builtin_names=builtin_names, + tool_specs=tool_specs, + tool_callback=tool_callback, + ) diff --git a/sdks/python/agenta/sdk/agents/tools/wire.py b/sdks/python/agenta/sdk/agents/tools/wire.py new file mode 100644 index 0000000000..1f716b503d --- /dev/null +++ b/sdks/python/agenta/sdk/agents/tools/wire.py @@ -0,0 +1,15 @@ +"""Serialization of resolved tool specifications to the runner contract.""" + +from __future__ import annotations + +from typing import Any, Dict, Sequence + +from .models import ToolSpec + + +def tool_spec_to_wire(tool_spec: ToolSpec) -> Dict[str, Any]: + return tool_spec.to_wire() + + +def tool_specs_to_wire(tool_specs: Sequence[ToolSpec]) -> list[Dict[str, Any]]: + return [tool_spec_to_wire(tool_spec) for tool_spec in tool_specs] diff --git a/sdks/python/agenta/sdk/agents/ui_messages.py b/sdks/python/agenta/sdk/agents/ui_messages.py new file mode 100644 index 0000000000..2dc1f5e39b --- /dev/null +++ b/sdks/python/agenta/sdk/agents/ui_messages.py @@ -0,0 +1,18 @@ +"""Compatibility imports for the Vercel UI Message adapter. + +New code should import from :mod:`agenta.sdk.agents.adapters.vercel`. +""" + +from __future__ import annotations + +from .adapters.vercel import ( + from_ui_messages, + to_ui_message, + ui_message_stream, +) + +__all__ = [ + "from_ui_messages", + "to_ui_message", + "ui_message_stream", +] diff --git a/sdks/python/agenta/sdk/agents/utils/__init__.py b/sdks/python/agenta/sdk/agents/utils/__init__.py new file mode 100644 index 0000000000..620e3b1b7e --- /dev/null +++ b/sdks/python/agenta/sdk/agents/utils/__init__.py @@ -0,0 +1,19 @@ +"""Shared plumbing for the runner-backed adapters: the ``/run`` wire shape and the two +transports to the TypeScript runner.""" + +from .ts_runner import ( + deliver_http, + deliver_http_stream, + deliver_subprocess, + deliver_subprocess_stream, +) +from .wire import request_to_wire, result_from_wire + +__all__ = [ + "request_to_wire", + "result_from_wire", + "deliver_http", + "deliver_subprocess", + "deliver_http_stream", + "deliver_subprocess_stream", +] diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py new file mode 100644 index 0000000000..f7a5497d1c --- /dev/null +++ b/sdks/python/agenta/sdk/agents/utils/ts_runner.py @@ -0,0 +1,163 @@ +"""Transports to the TypeScript runner: HTTP (a running sidecar) or subprocess (a CLI). + +Shared by the runner-backed adapters. Each adapter chooses a transport and hard-codes its +own engine id on the payload (via ``utils.wire``); this module only delivers the JSON. +""" + +from __future__ import annotations + +import asyncio +import json +import os +from typing import Any, AsyncIterator, Dict, Optional, Sequence + +_DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")) + + +async def deliver_http( + base_url: str, + payload: Dict[str, Any], + *, + timeout: float = _DEFAULT_TIMEOUT, +) -> Dict[str, Any]: + """POST ``/run`` to a running runner and return the parsed JSON body.""" + import httpx # local import: only the HTTP transport needs it + + url = base_url.rstrip("/") + "/run" + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post(url, json=payload) + if response.status_code >= 500: + raise RuntimeError( + f"Agent runner HTTP {response.status_code}: {response.text[:1000]}" + ) + return response.json() + + +async def deliver_subprocess( + command: Sequence[str], + payload: Dict[str, Any], + *, + cwd: Optional[str] = None, + env: Optional[Dict[str, str]] = None, + timeout: float = _DEFAULT_TIMEOUT, +) -> Dict[str, Any]: + """Spawn the runner CLI, feed the request on stdin, and parse the JSON on stdout.""" + proc = await asyncio.create_subprocess_exec( + *command, + cwd=cwd, + env=env, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + body = json.dumps(payload).encode("utf-8") + try: + stdout, stderr = await asyncio.wait_for( + proc.communicate(input=body), timeout=timeout + ) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + raise RuntimeError( + f"Agent runner timed out after {timeout}s: {' '.join(command)}" + ) + + out = stdout.decode("utf-8", "replace") + err = stderr.decode("utf-8", "replace") + if not out.strip(): + raise RuntimeError( + f"Agent runner returned no output. exit={proc.returncode} stderr={err[-2000:]}" + ) + try: + return json.loads(out) + except json.JSONDecodeError as exc: + raise RuntimeError( + f"Agent runner returned invalid JSON. stdout={out[:500]} stderr={err[-1000:]}" + ) from exc + + +# --------------------------------------------------------------------------- +# Streaming transports (NDJSON): one parsed record per line, live. +# +# Each yields the runner's ``StreamRecord`` lines as they arrive — ``{"kind":"event",...}`` +# for every event the moment it is built, then exactly one ``{"kind":"result",...}`` terminal +# record. The caller (a ``Session.stream``) turns these into live ``AgentEvent``s and the +# terminal ``AgentResult``. Cancellation closes the underlying connection / kills the child. +# --------------------------------------------------------------------------- + + +async def deliver_http_stream( + base_url: str, + payload: Dict[str, Any], + *, + timeout: float = _DEFAULT_TIMEOUT, +) -> AsyncIterator[Dict[str, Any]]: + """POST ``/run`` asking for NDJSON and yield each parsed record as it arrives. + + The ``async with`` closes the connection when the generator is closed or cancelled, which + the runner observes as a client disconnect and turns into run cancellation. + """ + import httpx # local import: only the HTTP transport needs it + + url = base_url.rstrip("/") + "/run" + headers = {"Accept": "application/x-ndjson"} + async with httpx.AsyncClient(timeout=timeout) as client: + async with client.stream( + "POST", url, json=payload, headers=headers + ) as response: + if response.status_code >= 500: + body = await response.aread() + raise RuntimeError( + f"Agent runner HTTP {response.status_code}: {body[:1000]!r}" + ) + async for line in response.aiter_lines(): + line = line.strip() + if line: + yield json.loads(line) + + +async def deliver_subprocess_stream( + command: Sequence[str], + payload: Dict[str, Any], + *, + cwd: Optional[str] = None, + env: Optional[Dict[str, str]] = None, + timeout: float = _DEFAULT_TIMEOUT, +) -> AsyncIterator[Dict[str, Any]]: + """Spawn the runner CLI in ``--stream`` mode and yield each NDJSON record from stdout. + + The ``finally`` kills the child if the consumer stops early (break/cancel), so a dropped + stream does not leave a runner process behind. + """ + proc = await asyncio.create_subprocess_exec( + *command, + "--stream", + cwd=cwd, + env=env, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdin is not None and proc.stdout is not None + proc.stdin.write(json.dumps(payload).encode("utf-8")) + proc.stdin.close() + loop = asyncio.get_event_loop() + deadline = loop.time() + timeout + try: + while True: + remaining = deadline - loop.time() + if remaining <= 0: + raise RuntimeError( + f"Agent runner stream timed out after {timeout}s: {' '.join(command)}" + ) + raw = await asyncio.wait_for(proc.stdout.readline(), timeout=remaining) + if not raw: # EOF + break + line = raw.decode("utf-8", "replace").strip() + if line: + yield json.loads(line) + await proc.wait() + finally: + if proc.returncode is None: + proc.kill() + await proc.wait() diff --git a/sdks/python/agenta/sdk/agents/utils/wire.py b/sdks/python/agenta/sdk/agents/utils/wire.py new file mode 100644 index 0000000000..b7558a4530 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/utils/wire.py @@ -0,0 +1,91 @@ +"""The ``/run`` wire contract: our DTOs <-> the runner's camelCase JSON. + +Shared by the runner-backed adapters (rivet, in-process Pi). The TS side mirrors these names +in ``services/agent/src/protocol.ts``, and the contract is pinned by shared golden fixtures +under ``sdks/python/oss/tests/pytest/unit/agents/golden/`` (see ``test_wire_contract.py``). +The caller passes the engine id explicitly, since each adapter hard-codes its own. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Sequence + +from ..dtos import ( + AgentEvent, + AgentResult, + HarnessAgentConfig, + HarnessCapabilities, + HarnessType, + Message, + TraceContext, +) + + +def request_to_wire( + *, + engine: str, + harness: HarnessType, + sandbox: str, + config: HarnessAgentConfig, + messages: Sequence[Message], + secrets: Optional[Dict[str, str]] = None, + trace: Optional[TraceContext] = None, + session_id: Optional[str] = None, +) -> Dict[str, Any]: + """Serialize one turn into the ``/run`` request JSON. + + The tool + permission fields come from ``config.wire_tools()`` so each harness shapes its + own (Pi: built-ins + native specs, no gating; Claude: MCP specs + permission policy). + ``config.wire_prompt()`` adds any system-prompt overrides the harness exposes (Pi's + ``systemPrompt`` / ``appendSystemPrompt``); it is empty for harnesses that have none. + ``config.wire_mcp()`` adds user-declared MCP servers, omitted when there are none so a + tool-free run's payload is unchanged. + """ + return { + "backend": engine, + "harness": harness.value, + "sandbox": sandbox, + "sessionId": session_id, + "agentsMd": config.agents_md, + "model": config.model, + "messages": [message.to_wire() for message in messages], + "secrets": dict(secrets or {}), + "trace": trace.to_wire() if trace else None, + **config.wire_tools(), + **config.wire_prompt(), + **config.wire_mcp(), + } + + +def result_from_wire(data: Dict[str, Any]) -> AgentResult: + """Parse a ``/run`` result JSON into an :class:`AgentResult`. + + Raises ``RuntimeError`` when the runner reported a failure, so the caller surfaces a + clear message rather than handing the model an empty reply. + """ + if not data.get("ok"): + raise RuntimeError(f"Agent run failed: {data.get('error')}") + + messages: List[Message] = [] + for raw in data.get("messages") or []: + message = Message.from_raw(raw) + if message is not None: + messages.append(message) + + events: List[AgentEvent] = [] + for raw in data.get("events") or []: + event = AgentEvent.from_wire(raw) + if event is not None: + events.append(event) + + return AgentResult( + output=data.get("output", "") or "", + messages=messages, + events=events, + usage=data.get("usage"), + stop_reason=data.get("stopReason"), + capabilities=HarnessCapabilities.from_wire(data.get("capabilities")), + session_id=data.get("sessionId"), + model=data.get("model"), + trace_id=data.get("traceId"), + ) diff --git a/sdks/python/agenta/sdk/decorators/routing.py b/sdks/python/agenta/sdk/decorators/routing.py index 4a57846d6e..04cb88c3a0 100644 --- a/sdks/python/agenta/sdk/decorators/routing.py +++ b/sdks/python/agenta/sdk/decorators/routing.py @@ -20,6 +20,11 @@ WorkflowBaseResponse, WorkflowServiceResponseData, ) +from agenta.sdk.agents.adapters.vercel.routing import register_agent_message_routes +from agenta.sdk.agents.adapters.vercel.sse import ( + VERCEL_UI_MESSAGE_STREAM_HEADERS as _VERCEL_UI_MESSAGE_STREAM_HEADERS, + vercel_sse_stream as _vercel_sse_stream, +) from agenta.sdk.middlewares.routing.cors import CORSMiddleware from agenta.sdk.middlewares.routing.auth import AuthMiddleware from agenta.sdk.middlewares.routing.otel import OTelMiddleware @@ -34,7 +39,7 @@ # These names are used by the per-route namespace triple itself. # --------------------------------------------------------------------------- -_RESERVED_PATHS = {"invoke", "inspect"} +_RESERVED_PATHS = {"invoke", "inspect", "messages", "load-session"} def _validate_path(path: str) -> None: @@ -195,15 +200,27 @@ def _make_stream_response( ) -> StreamingResponse: aiter = response.iterator() - if wire_format == "sse": - media_type = "text/event-stream" - res = StreamingResponse(_sse_stream(aiter), media_type=media_type) + if wire_format == "vercel": + # The Vercel UI Message Stream: SSE framing terminated by `data: [DONE]`, plus the + # headers the AI SDK client and proxies require. Endpoint-selected (the agent + # `/messages` route passes "vercel"), not derived from Accept — a Vercel UI message + # stream and a plain SSE stream share the `text/event-stream` media type, so the + # choice cannot come from the Accept header alone. + res = StreamingResponse( + _vercel_sse_stream(aiter), media_type="text/event-stream" + ) + for key, value in _VERCEL_UI_MESSAGE_STREAM_HEADERS.items(): + res.headers.setdefault(key, value) + elif wire_format == "sse": + res = StreamingResponse(_sse_stream(aiter), media_type="text/event-stream") elif wire_format == "ndjson": - media_type = "application/x-ndjson" - res = StreamingResponse(_ndjson_stream(aiter), media_type=media_type) + res = StreamingResponse( + _ndjson_stream(aiter), media_type="application/x-ndjson" + ) else: - media_type = "application/x-ndjson" - res = StreamingResponse(_ndjson_stream(aiter), media_type=media_type) + res = StreamingResponse( + _ndjson_stream(aiter), media_type="application/x-ndjson" + ) return _set_common_headers(res, response) # type: ignore @@ -451,6 +468,10 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest): except Exception as exception: return await handle_inspect_failure(exception) + # Agent-only endpoints are Vercel/browser-protocol adapters. Keep their request + # folding, session id handling, and UI Message Stream details out of the generic + # workflow route decorator. + invoke_responses: dict = { 200: { "description": "Negotiated response — format determined by Accept header", @@ -489,6 +510,25 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest): }, } + agent_enabled = bool(self.flags and self.flags.get("is_agent")) + + def _add_agent_routes(target: Any, prefix: str) -> None: + """Register the agent-only /messages + /load-session routes on a target + (sub-app / router / mount root), mirroring how /invoke + /inspect are added.""" + register_agent_message_routes( + target, + prefix, + wf=wf, + invoke_responses=invoke_responses, + get_request_tracing_context=_get_request_tracing_context, + parse_accept=_parse_accept, + stream_media_types=STREAM_MEDIA_TYPES, + make_json_response=_make_json_response, + make_not_acceptable_response=_make_not_acceptable_response, + make_stream_response=_make_stream_response, + handle_failure=handle_invoke_failure, + ) + # ------------------------------------------------------------------ # Legacy path: router= was provided. # Registers prefixed routes on the APIRouter without isolation. @@ -506,6 +546,8 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest): methods=["POST"], response_model=WorkflowInvokeRequest, ) + if agent_enabled: + _add_agent_routes(self.router_fallback, self.path) return foo # ------------------------------------------------------------------ @@ -528,6 +570,8 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest): methods=["POST"], response_model=WorkflowInvokeRequest, ) + if agent_enabled: + _add_agent_routes(self.mount_root, "") return foo @@ -545,6 +589,8 @@ async def inspect_endpoint(req: Request, request: WorkflowInspectRequest): methods=["POST"], response_model=WorkflowInvokeRequest, ) + if agent_enabled: + _add_agent_routes(sub_app, "") self.mount_root.mount(self.path, sub_app) diff --git a/sdks/python/agenta/sdk/engines/running/interfaces.py b/sdks/python/agenta/sdk/engines/running/interfaces.py index d84908a164..514135c90b 100644 --- a/sdks/python/agenta/sdk/engines/running/interfaces.py +++ b/sdks/python/agenta/sdk/engines/running/interfaces.py @@ -524,6 +524,49 @@ def llm_inputs_schema( ), ) +agent_v0_interface = WorkflowRevisionData( + uri="agenta:builtin:agent:v0", + schemas=dict( # type: ignore + parameters=obj( + properties={ + # One composite control for the whole agent config. The field shape lives in + # `AgentConfigSchema` (agenta.sdk.utils.types), registered as the `agent_config` + # catalog type; the playground resolves this ref and renders the AgentConfigControl. + "agent": semantic_field( + x_ag_type_ref="agent_config", + jtype="object", + description="The agent's instructions, model, tools, MCP servers, and runtime.", + default={ + "agents_md": ( + "You are a friendly hello-world agent running on the " + "Agenta agent service.\n\n- Greet the user warmly.\n- " + "Answer the user's message in one or two short sentences." + ), + "model": "gpt-5.5", + "tools": [], + "mcp_servers": [], + "harness": "pi", + "sandbox": "local", + "permission_policy": "auto", + }, + ), + }, + additional_properties=True, + ), + inputs=llm_inputs_schema( + include_messages=True, + ), + outputs={ + "$schema": "https://json-schema.org/draft/2020-12/schema", + **semantic_field( + x_ag_type_ref="message", + jtype="object", + description="Final assistant message returned by the agent.", + ), + }, + ), +) + completion_v0_interface = WorkflowRevisionData( uri="agenta:builtin:completion:v0", schemas=dict( # type: ignore diff --git a/sdks/python/agenta/sdk/engines/running/utils.py b/sdks/python/agenta/sdk/engines/running/utils.py index da84036e5a..a55a7069ca 100644 --- a/sdks/python/agenta/sdk/engines/running/utils.py +++ b/sdks/python/agenta/sdk/engines/running/utils.py @@ -51,6 +51,7 @@ # --- OLD URI chat_v0_interface, completion_v0_interface, + agent_v0_interface, echo_v0_interface, auto_exact_match_v0_interface, auto_regex_test_v0_interface, @@ -88,6 +89,7 @@ # --- OLD URI chat=dict(v0=chat_v0_interface), completion=dict(v0=completion_v0_interface), + agent=dict(v0=agent_v0_interface), echo=dict(v0=echo_v0_interface), auto_exact_match=dict(v0=auto_exact_match_v0_interface), auto_regex_test=dict(v0=auto_regex_test_v0_interface), @@ -243,6 +245,15 @@ def _catalog_entry() -> dict: presets=[], ) ), + agent=dict( + v0=dict( + name="agent", + description="Agent that runs tools over multiple turns on the Pi harness.", + categories=None, + flags=None, + presets=[], + ) + ), # echo=dict(v0=_catalog_entry()), auto_exact_match=dict(v0=_catalog_entry()), @@ -282,6 +293,18 @@ def _catalog_entry() -> dict: # --- OLD URI chat=dict(v0=WorkflowRevisionData()), completion=dict(v0=WorkflowRevisionData()), + agent=dict( + v0=WorkflowRevisionData( + parameters={ + "model": "gpt-5.5", + "agents_md": ( + "You are a friendly hello-world agent running on the " + "Agenta agent service.\n\n- Greet the user warmly.\n- " + "Answer the user's message in one or two short sentences." + ), + } + ) + ), echo=dict(v0=WorkflowRevisionData()), auto_exact_match=dict(v0=WorkflowRevisionData()), auto_regex_test=dict(v0=WorkflowRevisionData()), @@ -543,12 +566,12 @@ def infer_url_from_uri(uri: Optional[str]) -> Optional[str]: # agenta:builtin:* — application-only (not evaluators) ("builtin", "chat"): (True, False, False), ("builtin", "completion"): (True, False, False), + ("builtin", "agent"): (True, False, False), # agenta:builtin:* — both evaluator and application ("builtin", "llm"): (True, True, False), # agenta:builtin:* — evaluator-only ("builtin", "match"): (False, True, False), ("builtin", "prompt"): (False, True, False), - ("builtin", "agent"): (False, True, False), ("builtin", "echo"): (False, True, False), ("builtin", "human"): (False, True, False), ("builtin", "auto_exact_match"): (False, True, False), diff --git a/sdks/python/agenta/sdk/middlewares/running/normalizer.py b/sdks/python/agenta/sdk/middlewares/running/normalizer.py index cdbe389b33..44c5b791e4 100644 --- a/sdks/python/agenta/sdk/middlewares/running/normalizer.py +++ b/sdks/python/agenta/sdk/middlewares/running/normalizer.py @@ -66,8 +66,10 @@ async def _normalize_request( 1. If parameter name is 'request': passes the entire WorkflowServiceRequest 2. If parameter name matches DATA_FIELDS (like 'inputs', 'outputs', 'parameters'): extracts that field from request.data - 3. If parameter is **kwargs: includes all unconsumed DATA_FIELDS - 4. Otherwise: looks up the parameter name in request.data.inputs dict + 3. If parameter name is a supported top-level request field like 'session_id': + extracts that field from the request envelope + 4. If parameter is **kwargs: includes all unconsumed DATA_FIELDS + 5. Otherwise: looks up the parameter name in request.data.inputs dict Args: request: The workflow service request containing inputs and data @@ -95,6 +97,10 @@ async def _normalize_request( ) consumed.add(name) + elif name == "session_id": + normalized[name] = request.session_id + consumed.add(name) + elif param.kind == inspect.Parameter.VAR_KEYWORD: if request.data: for f in self.DATA_FIELDS - consumed: diff --git a/sdks/python/agenta/sdk/models/workflows.py b/sdks/python/agenta/sdk/models/workflows.py index a9437342df..0cb751e9dc 100644 --- a/sdks/python/agenta/sdk/models/workflows.py +++ b/sdks/python/agenta/sdk/models/workflows.py @@ -79,6 +79,7 @@ class WorkflowFlags(BaseModel): # interface-derived ## schema is_chat: bool = False + is_agent: bool = False ## hook has_url: bool = False ## code @@ -106,6 +107,7 @@ class WorkflowQueryFlags(BaseModel): # interface-derived ## schema is_chat: Optional[bool] = None + is_agent: Optional[bool] = None ## hook has_url: Optional[bool] = None ## code @@ -209,6 +211,15 @@ class WorkflowRequestData(BaseModel): # testcase: Optional[dict] = None inputs: Optional[dict] = None + # The agent ``/messages`` egress lifts the conversation out of ``inputs`` to this + # first-class member, in the Vercel ``UIMessage`` shape; ``/invoke`` ignores it. + messages: Optional[list] = None + # Transport mode for the agent ``/messages`` route: the endpoint sets this from the Accept + # negotiation so the shared agent handler streams (returns an async generator) instead of + # returning a batch dict. A sibling of ``messages`` / ``inputs`` / ``parameters`` on purpose + # — it must not live in ``parameters``, where it would leak into agent config / revision + # state / trace inputs. ``/invoke`` leaves it unset (batch). + stream: Optional[bool] = None # trace: Optional[dict] = None outputs: Optional[Any] = None @@ -233,6 +244,10 @@ class WorkflowBaseRequest(Metadata): secrets: Optional[Dict[str, Any]] = None credentials: Optional[str] = None + # The agent ``/messages`` session this turn belongs to (opaque, project-scoped). Optional; + # absent on ``/invoke`` and on the first turn of a server-minted session. + session_id: Optional[str] = None + @model_validator(mode="before") def _coerce_nested_models(cls, values: Dict[str, Any]) -> Dict[str, Any]: if "references" in values and isinstance(values["references"], dict): @@ -291,6 +306,10 @@ class WorkflowBaseResponse(TraceID, SpanID): status: Optional[WorkflowServiceStatus] = WorkflowServiceStatus() + # The resolved agent session id (minted or echoed) on the ``/messages`` response, alongside + # ``trace_id`` / ``span_id``. ``None`` for plain ``/invoke`` responses. + session_id: Optional[str] = None + # back-compat alias WorkflowServiceBaseResponse = WorkflowBaseResponse @@ -324,6 +343,20 @@ async def iterator(self): ] +class LoadSessionRequest(BaseModel): + """``POST /load-session`` body. The session id is required (RFC §7.1).""" + + session_id: str + + +class LoadSessionResponse(BaseModel): + """``POST /load-session`` response: a session's history as Vercel ``UIMessage`` objects, + the shape ``useChat`` takes as its initial ``messages``.""" + + session_id: str + messages: List[Dict[str, Any]] = Field(default_factory=list) + + # aliases ---------------------------------------------------------------------- diff --git a/sdks/python/agenta/sdk/utils/types.py b/sdks/python/agenta/sdk/utils/types.py index 8e629b92fb..994c781aa4 100644 --- a/sdks/python/agenta/sdk/utils/types.py +++ b/sdks/python/agenta/sdk/utils/types.py @@ -8,6 +8,8 @@ from pydantic import Field, model_validator, AliasChoices +from agenta.sdk.agents.mcp import MCPServerConfig +from agenta.sdk.agents.tools import ToolConfig from agenta.sdk.utils.assets import supported_llm_models, model_metadata from agenta.sdk.utils.helpers import _PLACEHOLDER_RE from agenta.sdk.utils.rendering import ( @@ -1052,6 +1054,81 @@ def _model_catalog_type() -> dict: } +_DEFAULT_AGENT_MODEL = "gpt-5.5" +_DEFAULT_AGENTS_MD = ( + "You are a friendly hello-world agent running on the Agenta agent service.\n\n" + "- Greet the user warmly.\n" + "- Answer the user's message in one or two short sentences." +) + + +class AgentConfigSchema(AgSchemaMixin): + """The playground's editable agent config (the ``agent`` element), as one semantic type. + + This is the schema-generation counterpart to the runtime :class:`agenta.sdk.agents.AgentConfig` + parser: it exists only to emit a rich JSON Schema for the ``agent_config`` control, so the + field shapes live in Pydantic (single source of truth) instead of a hand-written literal. + It deliberately composes the editable fields the control surfaces — the neutral config + (``agents_md``/``model``/``tools``/``mcp_servers``) plus the run selection + (``harness``/``sandbox``/``permission_policy``) — and types ``tools``/``mcp_servers`` with the + real tool-def models so the playground gets typed editors. The runtime ``AgentConfig`` stays + permissive (``List[Any]``) because its job is to coerce the loose shapes the playground emits; + this model is strict because its job is to describe them. + """ + + __ag_type__ = "agent_config" + + agents_md: str = Field( + default=_DEFAULT_AGENTS_MD, + title="Instructions", + description="The agent's system prompt (its AGENTS.md).", + json_schema_extra={"x-ag-type": "textarea"}, + ) + model: str = Field( + default=_DEFAULT_AGENT_MODEL, + title="Model", + description="Model the agent runs on.", + json_schema_extra={"x-parameter": "grouped_choice"}, + ) + tools: List[ToolConfig] = Field( + default_factory=list, + title="Tools", + description=( + "Runnable tools the agent can call: harness built-ins, server-side gateway " + "actions (e.g. Composio), sandboxed code, or client-fulfilled tools." + ), + ) + mcp_servers: List[MCPServerConfig] = Field( + default_factory=list, + title="MCP servers", + description=( + "Declared MCP servers exposed to the agent. The backend resolves each server's " + "secret env from the vault at run time; tokens never live in the config." + ), + ) + harness: Literal["pi", "claude", "agenta"] = Field( + default="pi", + title="Harness", + description=( + "Coding agent to drive: pi, claude, or agenta (pi with Agenta's forced " + "skills, tools, and base instructions)." + ), + ) + sandbox: Literal["local", "daytona"] = Field( + default="local", + title="Sandbox", + description="Where the agent runs: local daemon or a Daytona sandbox.", + ) + permission_policy: Literal["auto", "deny"] = Field( + default="auto", + title="Permission policy", + description=( + "How a permission-gating harness (e.g. Claude Code) handles tool-use prompts " + "in this headless run: auto-approve or deny." + ), + ) + + CATALOG_TYPES = { Message.ag_type(): _dereference_schema(Message.model_json_schema()), Messages.ag_type(): _dereference_schema(Messages.model_json_schema()), @@ -1065,4 +1142,7 @@ def _model_catalog_type() -> dict: AgPermissions.ag_type(): _dereference_schema(AgPermissions.model_json_schema()), AgResponse.ag_type(): _dereference_schema(AgResponse.model_json_schema()), PromptTemplate.ag_type(): _dereference_schema(PromptTemplate.model_json_schema()), + AgentConfigSchema.ag_type(): _dereference_schema( + AgentConfigSchema.model_json_schema() + ), } diff --git a/sdks/python/agenta/tests/agents/test_streaming.py b/sdks/python/agenta/tests/agents/test_streaming.py new file mode 100644 index 0000000000..bd378a2ece --- /dev/null +++ b/sdks/python/agenta/tests/agents/test_streaming.py @@ -0,0 +1,167 @@ +"""Tests for the live streaming boundary: ``AgentRun`` and the NDJSON subprocess transport. + +Two layers: + +- ``AgentRun`` over a fake record source — pure, fast: events are yielded live, the terminal + result is captured, hooks/cleanup fire, and an ``ok:false`` terminal raises. +- ``deliver_subprocess_stream`` against a fake NDJSON emitter — proves records arrive + incrementally (not buffered then dumped) and that closing the stream kills the child. + +A final integration test drives the real ``cli.ts --stream`` when ``pnpm`` is available. + +Run: ``uv run pytest agenta/tests/agents/test_streaming.py`` from ``sdks/python``. +""" + +from __future__ import annotations + +import shutil +import sys +import time +from pathlib import Path +from typing import Any, Dict, List + +import pytest + +from agenta.sdk.agents import AgentRun +from agenta.sdk.agents.utils import deliver_subprocess_stream + + +async def _from_list(records: List[Dict[str, Any]]): + for record in records: + yield record + + +# --- AgentRun --------------------------------------------------------------- + + +async def test_agentrun_yields_events_then_captures_result() -> None: + seen_result: Dict[str, Any] = {} + cleaned: List[bool] = [] + + async def _cleanup() -> None: + cleaned.append(True) + + records = [ + {"kind": "event", "event": {"type": "message_start", "id": "m0"}}, + { + "kind": "event", + "event": {"type": "message_delta", "id": "m0", "delta": "Hi"}, + }, + {"kind": "event", "event": {"type": "message_end", "id": "m0"}}, + { + "kind": "result", + "result": { + "ok": True, + "output": "Hi", + "sessionId": "s1", + "stopReason": "end_turn", + }, + }, + ] + run = AgentRun(_from_list(records)) + run.on_result(lambda r: seen_result.update({"id": r.session_id})) + run.on_cleanup(_cleanup) + + events = [event async for event in run] + + assert [e.type for e in events] == ["message_start", "message_delta", "message_end"] + assert run.result().output == "Hi" + assert run.result().session_id == "s1" + assert run.result().stop_reason == "end_turn" + assert seen_result == {"id": "s1"} # on_result fired with the terminal result + assert cleaned == [True] # cleanup ran when iteration ended + + +async def test_agentrun_raises_on_error_terminal() -> None: + records = [ + {"kind": "event", "event": {"type": "message_start", "id": "m0"}}, + {"kind": "result", "result": {"ok": False, "error": "boom"}}, + ] + run = AgentRun(_from_list(records)) + with pytest.raises(RuntimeError, match="boom"): + async for _ in run: + pass + + +async def test_agentrun_result_unavailable_before_drain() -> None: + run = AgentRun(_from_list([{"kind": "event", "event": {"type": "done"}}])) + with pytest.raises(RuntimeError, match="not available"): + run.result() + + +# --- deliver_subprocess_stream (fake NDJSON emitter) ------------------------ + +# Emits 3 event lines with a small gap, then one terminal result line. `-u` + flush so the +# parent observes each line as it is written, not at process exit. +_EMITTER = r""" +import sys, time, json +for i in range(3): + sys.stdout.write(json.dumps({"kind":"event","event":{"type":"message_delta","id":"m","delta":"d%d"%i}})+"\n") + sys.stdout.flush() + time.sleep(0.05) +sys.stdout.write(json.dumps({"kind":"result","result":{"ok":True,"output":"d0d1d2","sessionId":"s1"}})+"\n") +sys.stdout.flush() +""" + + +async def test_subprocess_stream_is_incremental() -> None: + cmd = [sys.executable, "-u", "-c", _EMITTER] + stamped = [] + async for record in deliver_subprocess_stream(cmd, {}): + stamped.append((time.monotonic(), record)) + + kinds = [r["kind"] for _, r in stamped] + assert kinds == ["event", "event", "event", "result"], ( + "events precede the single terminal result" + ) + assert kinds.count("result") == 1, "exactly one terminal record" + # Incremental, not buffered-then-dumped: the first event lands well before the result. + first_event_t = stamped[0][0] + result_t = stamped[-1][0] + assert result_t - first_event_t >= 0.1, ( + "records were spread out over time, not delivered in one batch" + ) + + +# Emits one event, then blocks for a long time. Closing the stream must kill it promptly. +_HANGING_EMITTER = r""" +import sys, time, json +sys.stdout.write(json.dumps({"kind":"event","event":{"type":"message_delta","id":"m","delta":"x"}})+"\n") +sys.stdout.flush() +time.sleep(60) +""" + + +async def test_subprocess_stream_cancellation_kills_child() -> None: + cmd = [sys.executable, "-u", "-c", _HANGING_EMITTER] + agen = deliver_subprocess_stream(cmd, {}) + first = await agen.__anext__() + assert first["kind"] == "event" + + started = time.monotonic() + await agen.aclose() # runs the finally: proc.kill() + await proc.wait() + elapsed = time.monotonic() - started + assert elapsed < 5, "aclose() killed the child instead of waiting out its 60s sleep" + + +# --- Real cli.ts --stream boundary (integration) ---------------------------- + + +@pytest.mark.skipif(shutil.which("pnpm") is None, reason="pnpm not available") +async def test_cli_stream_terminal_only_on_empty_request() -> None: + agent_dir = Path(__file__).resolve().parents[5] / "services" / "agent" + cmd = ["pnpm", "exec", "tsx", "src/cli.ts"] + records = [] + async for record in deliver_subprocess_stream(cmd, {}, cwd=str(agent_dir)): + records.append(record) + + # An empty request fails before any event, so the stream is exactly one result record. + assert len(records) == 1, records + assert records[0]["kind"] == "result" + assert records[0]["result"]["ok"] is False + + # AgentRun surfaces that failure as a RuntimeError, just like the one-shot path. + run = AgentRun(deliver_subprocess_stream(cmd, {}, cwd=str(agent_dir))) + with pytest.raises(RuntimeError): + async for _ in run: + pass diff --git a/sdks/python/oss/tests/pytest/integration/agents/__init__.py b/sdks/python/oss/tests/pytest/integration/agents/__init__.py new file mode 100644 index 0000000000..de6d92eeaf --- /dev/null +++ b/sdks/python/oss/tests/pytest/integration/agents/__init__.py @@ -0,0 +1 @@ +# Integration tests for the agent runtime: the real wire + transport against a fake runner. diff --git a/sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py b/sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py new file mode 100644 index 0000000000..a73c30eecc --- /dev/null +++ b/sdks/python/oss/tests/pytest/integration/agents/test_transport_roundtrip.py @@ -0,0 +1,113 @@ +"""End-to-end through the real wire and transport, against a fake runner. + +This is the Python-only stand-in for a live ``/invoke``: a tiny script plays the runner, +echoing the latest turn. The whole runtime path is real -- harness translation, the cold +environment lifecycle, ``request_to_wire``, the subprocess transport, and ``result_from_wire`` +-- only the runner program (which would be the TS + Pi + LLM stack) is faked. So it catches +serialization or transport drift that per-side unit tests cannot, with no TS and no LLM. +""" + +from __future__ import annotations + +import sys + +import pytest + +from agenta.sdk.agents import ( + AgentConfig, + Environment, + InProcessPiBackend, + Message, + PiHarness, + SessionConfig, +) + +pytestmark = pytest.mark.integration + + +# A runner that reads the /run request on stdin and echoes the latest user turn as a full +# AgentRunResult on stdout (the camelCase wire shape result_from_wire parses). +_ECHO_RUNNER = """ +import sys, json + +req = json.load(sys.stdin) +text = "" +for message in reversed(req.get("messages") or []): + if message.get("role") == "user": + content = message.get("content") + if isinstance(content, str): + text = content + else: + text = "".join( + block.get("text", "") + for block in content + if isinstance(block, dict) and block.get("type") == "text" + ) + if text: + break + +out = { + "ok": True, + "output": "echo: " + text, + "messages": [{"role": "assistant", "content": "echo: " + text}], + "events": [ + {"type": "message", "text": "echo: " + text}, + {"type": "done", "stopReason": "end_turn"}, + ], + "usage": {"input": 1, "output": 1, "total": 2, "cost": 0.0}, + "stopReason": "end_turn", + "capabilities": {"textMessages": True, "mcpTools": False}, + "sessionId": "sess-fake", + "model": req.get("model"), +} +sys.stdout.write(json.dumps(out)) +""" + +_FAIL_RUNNER = """ +import sys, json +json.load(sys.stdin) +sys.stdout.write(json.dumps({"ok": False, "error": "model exploded"})) +""" + +_SILENT_RUNNER = """ +import sys, json +json.load(sys.stdin) +""" + + +def _backend(tmp_path, body: str) -> InProcessPiBackend: + runner = tmp_path / "fake_runner.py" + runner.write_text(body, encoding="utf-8") + return InProcessPiBackend(command=[sys.executable, str(runner)], cwd=str(tmp_path)) + + +async def test_prompt_round_trips_through_the_real_transport(tmp_path): + harness = PiHarness(Environment(_backend(tmp_path, _ECHO_RUNNER))) + config = SessionConfig(agent=AgentConfig(instructions="hi", model="gpt-5.5")) + + result = await harness.prompt(config, [Message(role="user", content="ping")]) + + # The runner saw the wired turn and model, and the result parsed back cleanly. + assert result.output == "echo: ping" + assert result.model == "gpt-5.5" + assert [e.type for e in result.events] == ["message", "done"] + assert result.capabilities is not None and result.capabilities.mcp_tools is False + # The session id is parsed and carried forward for a follow-up turn. + assert result.session_id == "sess-fake" + assert config.session_id == "sess-fake" + + +async def test_runner_failure_surfaces_as_runtime_error(tmp_path): + harness = PiHarness(Environment(_backend(tmp_path, _FAIL_RUNNER))) + config = SessionConfig(agent=AgentConfig(instructions="hi")) + + with pytest.raises(RuntimeError, match="model exploded"): + await harness.prompt(config, [Message(role="user", content="hi")]) + + +async def test_runner_empty_output_raises(tmp_path): + harness = PiHarness(Environment(_backend(tmp_path, _SILENT_RUNNER))) + config = SessionConfig(agent=AgentConfig(instructions="hi")) + + with pytest.raises(RuntimeError, match="no output"): + await harness.prompt(config, [Message(role="user", content="hi")]) diff --git a/sdks/python/oss/tests/pytest/unit/agents/__init__.py b/sdks/python/oss/tests/pytest/unit/agents/__init__.py new file mode 100644 index 0000000000..4db23c7442 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/__init__.py @@ -0,0 +1 @@ +# Unit tests for the agent runtime (agenta.sdk.agents). diff --git a/sdks/python/oss/tests/pytest/unit/agents/conftest.py b/sdks/python/oss/tests/pytest/unit/agents/conftest.py new file mode 100644 index 0000000000..a434fdacc5 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/conftest.py @@ -0,0 +1,198 @@ +"""Shared fakes and fixtures for the agent-runtime unit tests. + +The fakes implement the real ports (``Backend`` / ``Sandbox`` / ``Session`` from +``agenta.sdk.agents.interfaces``) so the port contract keeps them honest: if a port grows an +abstract method, the fake fails to instantiate and these tests flag that the fake needs +updating. They record what they receive so a test can assert on lifecycle and translation +without a runner, a sandbox, an LLM, or the network. + +Everything is exposed through fixtures because pytest's prepend import mode makes a plain +``from .fakes import ...`` brittle across components; a fixture factory sidesteps that. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, List, Mapping, Optional, Sequence + +import pytest + +from agenta.sdk.agents import ( + AgentResult, + Environment, + HarnessType, +) +from agenta.sdk.agents.interfaces import Backend, Sandbox, Session +from agenta.sdk.agents.streaming import AgentRun + + +class FakeSandbox(Sandbox): + """Records provisioning and teardown.""" + + def __init__(self) -> None: + self.files: Dict[str, bytes] = {} + self.destroyed = False + + async def add_files(self, files: Mapping[str, bytes]) -> None: + self.files.update(files) + + async def destroy(self) -> None: + self.destroyed = True + + +class FakeSession(Session): + """Returns a canned result, records prompts, and tracks teardown. Can be told to raise.""" + + def __init__( + self, + *, + result: AgentResult, + session_id: Optional[str] = None, + raise_on_prompt: bool = False, + ) -> None: + self._result = result + self._session_id = session_id + self._raise = raise_on_prompt + self.prompts: List[List[Any]] = [] + self.destroyed = False + + @property + def id(self) -> Optional[str]: + return self._session_id + + async def prompt(self, messages, *, on_event=None) -> AgentResult: + self.prompts.append(list(messages)) + if self._raise: + raise RuntimeError("boom from fake session") + if on_event: + for event in self._result.events: + on_event(event) + return self._result + + def stream(self, messages) -> AgentRun: + # Mirror the runner's NDJSON stream: an event record per event, then one terminal + # result record (the shape `result_from_wire`/`AgentRun` expect). + self.prompts.append(list(messages)) + result = self._result + raising = self._raise + + async def _records(): + if raising: + yield { + "kind": "result", + "result": {"ok": False, "error": "boom from fake session"}, + } + return + for event in result.events: + yield {"kind": "event", "event": event.data} + yield { + "kind": "result", + "result": { + "ok": True, + "output": result.output, + "sessionId": result.session_id, + }, + } + + return AgentRun(_records()) + + async def destroy(self) -> None: + self.destroyed = True + + +class FakeBackend(Backend): + """A backend that hands out fakes and records every lifecycle call.""" + + def __init__( + self, + *, + supported: Sequence[HarnessType] = (HarnessType.PI, HarnessType.CLAUDE), + result: Optional[AgentResult] = None, + result_session_id: Optional[str] = None, + raise_on_prompt: bool = False, + ) -> None: + # Instance attribute shadows the ClassVar so `supports()` reflects this fake. + self.supported_harnesses = frozenset(supported) + self._result = result if result is not None else AgentResult(output="ok") + self._result_session_id = result_session_id + self._raise = raise_on_prompt + self.sandboxes: List[FakeSandbox] = [] + self.sessions: List[FakeSession] = [] + self.created_sessions: List[Dict[str, Any]] = [] + self.setup_calls = 0 + self.shutdown_calls = 0 + + async def setup(self) -> None: + self.setup_calls += 1 + + async def shutdown(self) -> None: + self.shutdown_calls += 1 + + async def create_sandbox(self) -> FakeSandbox: + sandbox = FakeSandbox() + self.sandboxes.append(sandbox) + return sandbox + + async def create_session( + self, + sandbox, + config, + *, + harness, + secrets=None, + trace=None, + session_id=None, + ) -> FakeSession: + self.created_sessions.append( + { + "sandbox": sandbox, + "config": config, + "harness": harness, + "secrets": secrets, + "trace": trace, + "session_id": session_id, + } + ) + session = FakeSession( + result=self._result, + session_id=self._result_session_id, + raise_on_prompt=self._raise, + ) + self.sessions.append(session) + return session + + +@pytest.fixture +def make_backend(): + """Factory returning a configured :class:`FakeBackend`.""" + + def _make(**kwargs) -> FakeBackend: + return FakeBackend(**kwargs) + + return _make + + +@pytest.fixture +def make_env(make_backend): + """Factory returning an :class:`Environment` over a fresh :class:`FakeBackend`. + + Returns the Environment; reach its backend via ``env.backend`` to assert on recordings. + """ + + def _make(*, sandbox_per_session: bool = True, **backend_kwargs) -> Environment: + backend = make_backend(**backend_kwargs) + return Environment(backend, sandbox_per_session=sandbox_per_session) + + return _make + + +@pytest.fixture +def golden(): + """Load a checked-in golden ``/run`` fixture (the cross-language wire contract anchor).""" + base = Path(__file__).parent / "golden" + + def _load(name: str) -> Dict[str, Any]: + return json.loads((base / name).read_text(encoding="utf-8")) + + return _load diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json new file mode 100644 index 0000000000..318722efe5 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json @@ -0,0 +1,28 @@ +{ + "backend": "rivet", + "harness": "claude", + "sandbox": "local", + "sessionId": null, + "agentsMd": "You are a helpful assistant.", + "model": "claude-sonnet-4-6", + "messages": [ + {"role": "user", "content": "hi"} + ], + "secrets": {"ANTHROPIC_API_KEY": "sk-ant"}, + "trace": null, + "tools": [], + "customTools": [ + { + "name": "get_user", + "description": "Get a user", + "inputSchema": {"type": "object", "properties": {}}, + "callRef": "tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn", + "kind": "callback" + } + ], + "toolCallback": { + "endpoint": "https://api.example/tools/call", + "authorization": "Access tok-123" + }, + "permissionPolicy": "deny" +} diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json new file mode 100644 index 0000000000..ebfb966479 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.pi.json @@ -0,0 +1,36 @@ +{ + "backend": "pi", + "harness": "pi", + "sandbox": "local", + "sessionId": "sess-1", + "agentsMd": "You are a helpful assistant.", + "model": "openai-codex/gpt-5.5", + "messages": [ + {"role": "user", "content": "hi"} + ], + "secrets": {"OPENAI_API_KEY": "sk-test"}, + "trace": { + "traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01", + "baggage": null, + "endpoint": "https://otlp.example/v1/traces", + "authorization": "Access tok-123", + "captureContent": true + }, + "tools": ["read", "write"], + "customTools": [ + { + "name": "get_user", + "description": "Get a user", + "inputSchema": {"type": "object", "properties": {}}, + "callRef": "tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn", + "kind": "callback" + } + ], + "toolCallback": { + "endpoint": "https://api.example/tools/call", + "authorization": "Access tok-123" + }, + "permissionPolicy": "auto", + "systemPrompt": "You are Pi.", + "appendSystemPrompt": "Be terse." +} diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json new file mode 100644 index 0000000000..9791d5a4ea --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.error.json @@ -0,0 +1,4 @@ +{ + "ok": false, + "error": "model exploded" +} diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json new file mode 100644 index 0000000000..0943d2d047 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_result.ok.json @@ -0,0 +1,31 @@ +{ + "ok": true, + "output": "Hello!", + "messages": [ + {"role": "assistant", "content": "Hello!"} + ], + "events": [ + {"type": "message", "text": "Hello!"}, + {"type": "usage", "input": 10, "output": 5, "total": 15, "cost": 0.001}, + {"type": "done", "stopReason": "end_turn"}, + {"text": "an event with no type, dropped on parse"} + ], + "usage": {"input": 10, "output": 5, "total": 15, "cost": 0.001}, + "stopReason": "end_turn", + "capabilities": { + "textMessages": true, + "images": false, + "fileAttachments": false, + "mcpTools": true, + "toolCalls": true, + "reasoning": true, + "planMode": false, + "permissions": false, + "usage": true, + "streamingDeltas": false, + "sessionLifecycle": false + }, + "sessionId": "sess-42", + "model": "gpt-5.5", + "traceId": "trace-abc" +} diff --git a/sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py b/sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/mcp/__init__.py @@ -0,0 +1 @@ + diff --git a/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py b/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py new file mode 100644 index 0000000000..a8a97ab6f0 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from typing import Mapping, Sequence + +import pytest +from pydantic import ValidationError + +from agenta.sdk.agents.mcp import ( + MCPResolver, + MCPServerConfig, + MissingMCPSecretError, +) +from agenta.sdk.agents.tools import MissingSecretPolicy + + +class DictSecretProvider: + def __init__(self, values: Mapping[str, str]): + self.values = values + + async def get_many(self, names: Sequence[str]) -> Mapping[str, str]: + return {name: self.values[name] for name in names if name in self.values} + + +def test_transport_specific_fields_are_required(): + with pytest.raises(ValidationError, match="requires command"): + MCPServerConfig(name="stdio") + with pytest.raises(ValidationError, match="requires url"): + MCPServerConfig(name="remote", transport="http") + + +async def test_resolves_mcp_environment_in_sibling_subsystem(): + servers = await MCPResolver( + secret_provider=DictSecretProvider({"github_pat": "ghp"}) + ).resolve( + [ + MCPServerConfig( + name="github", + command="npx", + env={"LOG": "info"}, + secrets={"GITHUB_TOKEN": "github_pat"}, + ) + ] + ) + assert servers[0].to_wire()["env"] == { + "LOG": "info", + "GITHUB_TOKEN": "ghp", + } + + +async def test_missing_mcp_secret_is_explicit(): + with pytest.raises(MissingMCPSecretError): + await MCPResolver(secret_provider=DictSecretProvider({})).resolve( + [ + MCPServerConfig( + name="github", + command="npx", + secrets={"GITHUB_TOKEN": "missing"}, + ) + ] + ) + + +async def test_mcp_compatibility_policy_can_omit_missing_secret(): + servers = await MCPResolver( + secret_provider=DictSecretProvider({}), + missing_secret_policy=MissingSecretPolicy.OMIT, + ).resolve( + [ + MCPServerConfig( + name="github", + command="npx", + secrets={"GITHUB_TOKEN": "missing"}, + ) + ] + ) + assert "env" not in servers[0].to_wire() diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py new file mode 100644 index 0000000000..f4bacd92d4 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py @@ -0,0 +1,155 @@ +"""``AgentConfig.from_params`` (the three request shapes) and ``RunSelection.from_params``. + +The handler parses whatever the playground or a stored config sends into a neutral +``AgentConfig`` plus a ``RunSelection``. This file locks the three accepted shapes, the +defaults fall-through, the ``harness_options`` escape hatch, and the run-selection parsing. +""" + +from __future__ import annotations + +from agenta.sdk.agents import ( + AgentConfig, + BuiltinToolConfig, + RunSelection, +) + +_DEFAULTS = AgentConfig(instructions="default-md", model="default-model", tools=["d"]) + + +# ----------------------------------------------------------- AgentConfig shapes + + +def test_from_params_agent_element_shape(): + config = AgentConfig.from_params( + { + "agent": { + "instructions": "I", + "model": "M", + "tools": [{"type": "builtin", "name": "read"}], + "harness_options": {"pi": {"system": "S"}}, + } + }, + defaults=_DEFAULTS, + ) + assert config.instructions == "I" + assert config.model == "M" + assert config.tools == [BuiltinToolConfig(name="read")] + assert config.harness_options == {"pi": {"system": "S"}} + + +def test_from_params_prompt_template_shape(): + config = AgentConfig.from_params( + { + "prompt": { + "messages": [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "ignored for instructions"}, + ], + "llm_config": {"model": "M", "tools": ["t"]}, + } + }, + defaults=_DEFAULTS, + ) + assert config.instructions == "You are helpful." # system message -> instructions + assert config.model == "M" + assert config.tools == [BuiltinToolConfig(name="t")] + + +def test_from_params_prompt_template_joins_multiple_system_messages(): + config = AgentConfig.from_params( + { + "prompt": { + "messages": [ + {"role": "system", "content": "First."}, + { + "role": "system", + "content": [{"type": "text", "text": "Second."}], + }, + ], + "llm_config": {"model": "M"}, + } + } + ) + assert config.instructions == "First.\n\nSecond." + + +def test_from_params_flat_shape(): + config = AgentConfig.from_params( + {"model": "M", "agents_md": "A", "tools": [{"name": "x"}]}, + defaults=_DEFAULTS, + ) + assert config.instructions == "A" + assert config.model == "M" + assert config.tools == [BuiltinToolConfig(name="x")] + + +def test_from_params_falls_back_to_defaults(): + config = AgentConfig.from_params({}, defaults=_DEFAULTS) + assert config.instructions == "default-md" + assert config.model == "default-model" + assert config.tools == [BuiltinToolConfig(name="d")] + + +def test_from_params_coerces_single_tool_dict_to_list(): + config = AgentConfig.from_params({"agent": {"tools": {"name": "solo"}}}) + assert config.tools == [BuiltinToolConfig(name="solo")] + + +def test_harness_options_drops_malformed_and_lowercases_keys(): + config = AgentConfig.from_params( + { + "agent": { + "harness_options": { + "PI": {"system": "S"}, # key lower-cased + "claude": "not a dict", # dropped + } + } + } + ) + assert config.harness_options == {"pi": {"system": "S"}} + + +def test_harness_options_falls_back_to_defaults_when_absent(): + defaults = AgentConfig(harness_options={"pi": {"system": "D"}}) + config = AgentConfig.from_params( + {"agent": {"instructions": "I"}}, defaults=defaults + ) + assert config.harness_options == {"pi": {"system": "D"}} + + +# -------------------------------------------------------------- RunSelection + + +def test_run_selection_defaults(): + sel = RunSelection.from_params({}) + assert (sel.harness, sel.sandbox, sel.permission_policy) == ("pi", "local", "auto") + + +def test_run_selection_reads_agent_subdict_and_lowercases(): + sel = RunSelection.from_params( + { + "agent": { + "harness": "Claude", + "sandbox": "Daytona", + "permission_policy": "Deny", + } + } + ) + assert (sel.harness, sel.sandbox, sel.permission_policy) == ( + "claude", + "daytona", + "deny", + ) + + +def test_run_selection_honors_custom_defaults(): + sel = RunSelection.from_params( + {}, default_harness="claude", default_sandbox="daytona" + ) + assert sel.harness == "claude" + assert sel.sandbox == "daytona" + + +def test_run_selection_reads_flat_request(): + sel = RunSelection.from_params({"harness": "claude"}) + assert sel.harness == "claude" diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py new file mode 100644 index 0000000000..5d6ce90e8c --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_capabilities_events.py @@ -0,0 +1,81 @@ +"""Capabilities, events, and the small cross-boundary DTOs. + +Capabilities are what lets adapters branch on a flag instead of the harness name, so their +camelCase parsing is contract-critical. Events feed tracing; the trace/tool-callback DTOs +plumb the run into Agenta. +""" + +from __future__ import annotations + +import pytest + +from agenta.sdk.agents import ( + AgentEvent, + HarnessCapabilities, + HarnessType, + ToolCallback, + TraceContext, +) + + +def test_capabilities_none_and_non_dict_pass_through_as_none(): + assert HarnessCapabilities.from_wire(None) is None + assert HarnessCapabilities.from_wire("nope") is None + + +def test_capabilities_defaults_text_messages_true(): + caps = HarnessCapabilities.from_wire({}) + assert caps is not None + assert caps.text_messages is True # the one flag that defaults on + assert caps.mcp_tools is False + assert caps.images is False + + +def test_capabilities_map_camelcase_flags(): + caps = HarnessCapabilities.from_wire( + {"mcpTools": True, "fileAttachments": True, "sessionLifecycle": True} + ) + assert caps.mcp_tools is True + assert caps.file_attachments is True + assert caps.session_lifecycle is True + + +def test_agent_event_requires_type(): + assert AgentEvent.from_wire({"text": "no type"}) is None + assert AgentEvent.from_wire({"type": ""}) is None # falsy type + assert AgentEvent.from_wire("not a dict") is None + + +def test_agent_event_keeps_full_payload_in_data(): + event = AgentEvent.from_wire( + {"type": "tool_call", "name": "search", "input": {"q": "x"}} + ) + assert event.type == "tool_call" + # `data` carries the rest verbatim, including the type key. + assert event.data == {"type": "tool_call", "name": "search", "input": {"q": "x"}} + + +def test_trace_context_to_wire_emits_all_keys_camelcase(): + wire = TraceContext(traceparent="tp", endpoint="ep").to_wire() + assert wire == { + "traceparent": "tp", + "baggage": None, + "endpoint": "ep", + "authorization": None, + "captureContent": True, # defaults on, camelCase + } + + +def test_tool_callback_to_wire(): + assert ToolCallback(endpoint="e", authorization="a").to_wire() == { + "endpoint": "e", + "authorization": "a", + } + + +def test_harness_type_coerce(): + assert HarnessType.coerce(HarnessType.PI) is HarnessType.PI + assert HarnessType.coerce("PI") is HarnessType.PI # case-insensitive + assert HarnessType.coerce("claude") is HarnessType.CLAUDE + with pytest.raises(ValueError): + HarnessType.coerce("bogus") diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py new file mode 100644 index 0000000000..5c8ba74ade --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_content_blocks.py @@ -0,0 +1,90 @@ +"""Content blocks and messages: loose-input coercion and wire serialization. + +The playground sends loose dicts and bare strings; the runtime coerces them and emits +camelCase on the wire. These round-trips lock that coercion. +""" + +from __future__ import annotations + +from agenta.sdk.agents import ContentBlock, Message, to_messages + + +def test_content_block_from_string(): + block = ContentBlock.from_raw("hello") + assert block.type == "text" + assert block.text == "hello" + + +def test_content_block_from_dict_accepts_both_mime_spellings(): + camel = ContentBlock.from_raw( + {"type": "image", "data": "b64", "mimeType": "image/png"} + ) + snake = ContentBlock.from_raw( + {"type": "image", "data": "b64", "mime_type": "image/png"} + ) + assert camel.mime_type == "image/png" + assert snake.mime_type == "image/png" + + +def test_content_block_passthrough_and_fallback(): + existing = ContentBlock(type="text", text="x") + assert ContentBlock.from_raw(existing) is existing + # A non-string, non-dict value stringifies into a text block. + assert ContentBlock.from_raw(42).text == "42" + + +def test_content_block_to_wire_omits_none_and_uses_camelcase(): + block = ContentBlock(type="image", data="b64", mime_type="image/png") + wire = block.to_wire() + assert wire == {"type": "image", "data": "b64", "mimeType": "image/png"} + assert "text" not in wire # None fields are omitted + + +def test_text_block_round_trips(): + assert ContentBlock(type="text", text="hi").to_wire() == { + "type": "text", + "text": "hi", + } + + +def test_message_from_raw_requires_role(): + assert Message.from_raw({"content": "no role"}) is None + assert Message.from_raw("not a dict") is None + msg = Message.from_raw({"role": "user", "content": "hi"}) + assert msg is not None and msg.role == "user" and msg.content == "hi" + + +def test_message_from_raw_coerces_block_list(): + msg = Message.from_raw( + {"role": "user", "content": [{"type": "text", "text": "a"}, "b"]} + ) + assert isinstance(msg.content, list) + assert [b.text for b in msg.content] == ["a", "b"] + + +def test_message_to_wire_string_and_blocks(): + assert Message(role="user", content="hi").to_wire() == { + "role": "user", + "content": "hi", + } + blocks = Message(role="user", content=[ContentBlock(type="text", text="a")]) + assert blocks.to_wire() == { + "role": "user", + "content": [{"type": "text", "text": "a"}], + } + + +def test_to_messages_filters_invalid_entries(): + messages = to_messages( + [ + {"role": "user", "content": "hi"}, + {"content": "no role"}, # dropped + None, # dropped + {"role": "assistant", "content": "yo"}, + ] + ) + assert [m.role for m in messages] == ["user", "assistant"] + + +def test_to_messages_handles_none(): + assert to_messages(None) == [] diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py new file mode 100644 index 0000000000..1d53c8f469 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_harness_configs.py @@ -0,0 +1,99 @@ +"""Per-harness configs: how each shapes its own tool/prompt fields for the ``/run`` payload. + +These are the per-harness halves of the wire contract. ``test_wire_contract`` checks the full +payload against the golden; this file pins each config's contribution in isolation so a failure +points straight at the harness whose shape changed. +""" + +from __future__ import annotations + +import pytest + +from agenta.sdk.agents import ( + ClaudeAgentConfig, + ClientToolSpec, + HarnessAgentConfig, + PiAgentConfig, + ToolCallback, +) + +_CALLBACK = ToolCallback(endpoint="https://api.example/tools/call", authorization="A") + + +def test_pi_wire_tools_is_native_and_never_gates(): + config = PiAgentConfig( + builtin_tools=["read"], + tool_specs=[ + ClientToolSpec( + name="t", + description="t", + ) + ], + tool_callback=_CALLBACK, + ) + assert config.wire_tools() == { + "tools": ["read"], + "customTools": [ + { + "name": "t", + "description": "t", + "inputSchema": {"type": "object", "properties": {}}, + "kind": "client", + } + ], + "toolCallback": { + "endpoint": "https://api.example/tools/call", + "authorization": "A", + }, + "permissionPolicy": "auto", # Pi never gates tool use + } + + +def test_pi_wire_tools_without_callback(): + assert PiAgentConfig().wire_tools()["toolCallback"] is None + + +def test_pi_wire_prompt_emits_only_set_overrides(): + assert PiAgentConfig().wire_prompt() == {} + assert PiAgentConfig(system="s").wire_prompt() == {"systemPrompt": "s"} + assert PiAgentConfig(append_system="a").wire_prompt() == {"appendSystemPrompt": "a"} + assert PiAgentConfig(system="", append_system="a").wire_prompt() == { + "systemPrompt": "", # an explicit empty string is still an override here + "appendSystemPrompt": "a", + } + + +def test_claude_wire_tools_has_no_builtins_and_carries_policy(): + config = ClaudeAgentConfig( + tool_specs=[ + ClientToolSpec( + name="t", + description="t", + ) + ], + tool_callback=_CALLBACK, + permission_policy="deny", + ) + wire = config.wire_tools() + assert wire["tools"] == [] # Claude has no Pi built-ins + assert wire["customTools"] == [ + { + "name": "t", + "description": "t", + "inputSchema": {"type": "object", "properties": {}}, + "kind": "client", + } + ] + assert wire["permissionPolicy"] == "deny" + + +def test_claude_defaults_to_auto_policy_and_empty_prompt(): + assert ClaudeAgentConfig().wire_tools()["permissionPolicy"] == "auto" + assert ClaudeAgentConfig().wire_prompt() == {} # Claude exposes no prompt overrides + + +def test_base_config_wire_tools_is_abstract(): + # The base class does not know any engine's tool shape. + with pytest.raises(NotImplementedError): + HarnessAgentConfig().wire_tools() + assert HarnessAgentConfig().wire_prompt() == {} diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py b/sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py new file mode 100644 index 0000000000..c84761885f --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_environment_lifecycle.py @@ -0,0 +1,127 @@ +"""Environment sandbox policy and the cold ``Harness.prompt`` lifecycle. + +These lock the isolation guarantees the design docs promise: a fresh sandbox per session +under the cold model, the session torn down in a ``finally`` even when the turn raises, the +session id carried forward, and AGENTS.md provisioned only when there are instructions. +""" + +from __future__ import annotations + +import pytest + +from agenta.sdk.agents import ( + AgentConfig, + AgentResult, + HarnessType, + Message, + PiHarness, + SessionConfig, +) + + +def _config(instructions="hi") -> SessionConfig: + return SessionConfig(agent=AgentConfig(instructions=instructions, model="m")) + + +# ------------------------------------------------------------- Environment policy + + +async def test_fresh_sandbox_per_session(make_env): + env = make_env(sandbox_per_session=True) + config = _config() + + await env.create_session( + PiHarness(env)._to_harness_config(config), + harness=HarnessType.PI, + session_config=config, + ) + await env.create_session( + PiHarness(env)._to_harness_config(config), + harness=HarnessType.PI, + session_config=config, + ) + + assert len(env.backend.sandboxes) == 2 # a new sandbox each time (cold model) + + +async def test_shared_sandbox_when_not_per_session(make_env): + env = make_env(sandbox_per_session=False) + config = _config() + + for _ in range(2): + await env.create_session( + PiHarness(env)._to_harness_config(config), + harness=HarnessType.PI, + session_config=config, + ) + + assert len(env.backend.sandboxes) == 1 # one sandbox reused + await env.shutdown() + assert env.backend.sandboxes[0].destroyed is True # shutdown tears it down + assert env.backend.shutdown_calls == 1 + + +async def test_provisioning_writes_agents_md_only_when_present(make_env): + env = make_env() + harness = PiHarness(env) + + assert harness._provisioning(_config("hello")) == {"AGENTS.md": b"hello"} + assert harness._provisioning(_config("")) == {} + assert harness._provisioning(_config(" ")) == {} + assert harness._provisioning(_config(None)) == {} + + +async def test_create_session_adds_files_when_provisioned(make_env): + env = make_env() + config = _config("project conventions") + + await PiHarness(env).create_session(config) + + assert env.backend.sandboxes[0].files == {"AGENTS.md": b"project conventions"} + + +# ------------------------------------------------------- Cold Harness.prompt path + + +async def test_prompt_runs_and_tears_down(make_env): + env = make_env(result=AgentResult(output="done")) + harness = PiHarness(env) + + result = await harness.prompt(_config(), [Message(role="user", content="hi")]) + + assert result.output == "done" + assert env.backend.sessions[0].destroyed is True # torn down on the happy path + + +async def test_prompt_destroys_session_even_when_it_raises(make_env): + env = make_env(raise_on_prompt=True) + harness = PiHarness(env) + + with pytest.raises(RuntimeError, match="boom"): + await harness.prompt(_config(), [Message(role="user", content="hi")]) + + assert env.backend.sessions[0].destroyed is True # finally still runs + + +async def test_prompt_carries_session_id_forward(make_env): + env = make_env( + result=AgentResult(output="x", session_id="sess-new"), + result_session_id="sess-new", + ) + harness = PiHarness(env) + config = _config() + + await harness.prompt(config, [Message(role="user", content="hi")]) + + assert config.session_id == "sess-new" # next turn can resume it + + +async def test_prompt_leaves_session_id_when_result_has_none(make_env): + env = make_env(result=AgentResult(output="x", session_id=None)) + harness = PiHarness(env) + config = _config() + config.session_id = "prior" + + await harness.prompt(config, [Message(role="user", content="hi")]) + + assert config.session_id == "prior" # unchanged diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py new file mode 100644 index 0000000000..7e68d3af93 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py @@ -0,0 +1,273 @@ +"""Harness adapters: the neutral ``SessionConfig`` -> per-harness config translation. + +Pi and Claude genuinely differ (Pi takes built-ins and never gates tool use; Claude has no +built-ins, delivers tools over MCP, and gates on a permission policy). Agenta is Pi with a +fixed opinion: a forced preamble, persona, tools, and skills. These tests lock that the +translation honors those differences and that ``make_harness`` validates support. +""" + +from __future__ import annotations + +import pytest + +from agenta.sdk.agents import ( + AgentaAgentConfig, + AgentaHarness, + AgentConfig, + ClaudeAgentConfig, + ClaudeHarness, + ClientToolSpec, + HarnessType, + PiAgentConfig, + PiHarness, + SessionConfig, + ToolCallback, + UnsupportedHarnessError, + make_harness, +) +from agenta.sdk.agents.adapters import harnesses +from agenta.sdk.agents.adapters.agenta_builtins import ( + AGENTA_FORCED_APPEND_SYSTEM, + AGENTA_FORCED_SKILLS, + AGENTA_FORCED_TOOLS, + AGENTA_PREAMBLE, +) +from agenta.sdk.agents.adapters.harnesses import _normalize_tool_specs, _opt_str + +_CALLBACK = ToolCallback(endpoint="https://api.example/tools/call", authorization=None) + + +def _session_config(**kwargs) -> SessionConfig: + agent = kwargs.pop("agent", AgentConfig(instructions="hi", model="m")) + return SessionConfig(agent=agent, **kwargs) + + +# --------------------------------------------------------------------------- Pi + + +def test_pi_keeps_builtins_and_native_tools(make_env): + harness = PiHarness(make_env(supported=[HarnessType.PI])) + config = _session_config( + builtin_tools=["read", "write"], + custom_tools=[{"name": "t", "callRef": "ref"}], + tool_callback=_CALLBACK, + ) + + result = harness._to_harness_config(config) + + assert isinstance(result, PiAgentConfig) + assert result.builtin_tools == ["read", "write"] + assert result.custom_tools[0]["name"] == "t" + assert result.tool_callback is _CALLBACK + assert result.agents_md == "hi" + assert result.model == "m" + + +def test_pi_reads_its_harness_options_slice(make_env): + harness = PiHarness(make_env(supported=[HarnessType.PI])) + agent = AgentConfig( + instructions="hi", + harness_options={ + "pi": {"system": "You are Pi.", "append_system": "Be terse."}, + "claude": {"system": "ignored for Pi"}, + }, + ) + config = _session_config(agent=agent) + + result = harness._to_harness_config(config) + + assert result.system == "You are Pi." + assert result.append_system == "Be terse." + # The Pi prompt overrides reach the wire. + assert result.wire_prompt() == { + "systemPrompt": "You are Pi.", + "appendSystemPrompt": "Be terse.", + } + + +def test_pi_drops_blank_harness_options(make_env): + harness = PiHarness(make_env(supported=[HarnessType.PI])) + agent = AgentConfig( + instructions="hi", + harness_options={"pi": {"system": " ", "append_system": ""}}, + ) + + result = harness._to_harness_config(_session_config(agent=agent)) + + assert result.system is None + assert result.append_system is None + assert result.wire_prompt() == {} + + +# ------------------------------------------------------------------------- Agenta + + +def test_agenta_forces_skills_tools_preamble_and_persona(make_env): + harness = AgentaHarness(make_env(supported=[HarnessType.AGENTA])) + config = _session_config( + agent=AgentConfig(instructions="My project rules.", model="m"), + builtin_tools=["web_search"], + custom_tools=[{"name": "t", "callRef": "ref"}], + tool_callback=_CALLBACK, + ) + + result = harness._to_harness_config(config) + + assert isinstance(result, AgentaAgentConfig) + # AGENTS.md is the base preamble with the author's instructions appended after it. + assert result.agents_md.startswith(AGENTA_PREAMBLE) + assert result.agents_md.endswith("My project rules.") + # Forced tools are unioned in (and `read` is present so Pi renders the skills section). + for forced in AGENTA_FORCED_TOOLS: + assert forced in result.builtin_tools + assert "web_search" in result.builtin_tools + assert "read" in result.builtin_tools + # Forced skills ride the config and reach the wire. + assert result.skills == list(AGENTA_FORCED_SKILLS) + assert result.wire_tools()["skills"] == list(AGENTA_FORCED_SKILLS) + # The persona is forced onto append_system; custom tools and callback pass through. + assert result.append_system.startswith(AGENTA_FORCED_APPEND_SYSTEM) + assert result.custom_tools[0]["name"] == "t" + assert result.tool_callback is _CALLBACK + + +def test_agenta_forces_tools_without_duplicates(make_env): + harness = AgentaHarness(make_env(supported=[HarnessType.AGENTA])) + # `read` already configured: it must not be duplicated when forced. + config = _session_config(builtin_tools=["read"]) + + result = harness._to_harness_config(config) + + assert result.builtin_tools.count("read") == 1 + + +def test_agenta_passes_through_user_pi_options(make_env): + harness = AgentaHarness(make_env(supported=[HarnessType.AGENTA])) + agent = AgentConfig( + instructions="hi", + harness_options={"pi": {"system": "You are Pi.", "append_system": "Be terse."}}, + ) + + result = harness._to_harness_config(_session_config(agent=agent)) + + # `system` passes through; the author's `append_system` is appended after the forced persona. + assert result.system == "You are Pi." + assert result.append_system.startswith(AGENTA_FORCED_APPEND_SYSTEM) + assert result.append_system.endswith("Be terse.") + + +def test_agenta_is_in_process_pi_supported(): + from agenta.sdk.agents import InProcessPiBackend + + assert InProcessPiBackend().supports(HarnessType.AGENTA) + + +# ------------------------------------------------------------------------- Claude + + +def test_claude_drops_builtins_and_warns(make_env, monkeypatch): + recorded = [] + monkeypatch.setattr( + harnesses, + "log", + type("L", (), {"warning": lambda self, *a, **k: recorded.append(a)})(), + ) + harness = ClaudeHarness(make_env(supported=[HarnessType.CLAUDE])) + config = _session_config( + builtin_tools=["read"], + custom_tools=[{"name": "t", "callRef": "ref"}], + permission_policy="deny", + ) + + result = harness._to_harness_config(config) + + assert isinstance(result, ClaudeAgentConfig) + assert not hasattr(result, "builtin_tools") # Claude has no built-in tools at all + assert result.custom_tools[0]["name"] == "t" + assert result.permission_policy == "deny" # Claude carries the policy + assert recorded, "expected a warning when built-ins are dropped" + + +def test_claude_no_warning_without_builtins(make_env, monkeypatch): + recorded = [] + monkeypatch.setattr( + harnesses, + "log", + type("L", (), {"warning": lambda self, *a, **k: recorded.append(a)})(), + ) + harness = ClaudeHarness(make_env(supported=[HarnessType.CLAUDE])) + + harness._to_harness_config(_session_config(permission_policy="auto")) + + assert recorded == [] + + +# --------------------------------------------------------------- _normalize_tool_specs + + +def test_compat_normalize_tool_specs_returns_typed_specs(): + specs = [ + {"name": "keep", "callRef": "r1"}, # missing description + inputSchema + { + "name": "full", + "description": "d", + "inputSchema": {"type": "object", "properties": {"x": {}}}, + "callRef": "r2", + }, + ] + + out = _normalize_tool_specs(specs) + + assert [spec.name for spec in out] == ["keep", "full"] + # description falls back to the name; inputSchema falls back to an empty object schema. + assert out[0].description == "keep" + assert out[0].input_schema == {"type": "object", "properties": {}} + assert out[0].call_ref == "r1" + # provided values are preserved. + assert out[1].description == "d" + assert out[1].input_schema["properties"] == {"x": {}} + + +def test_harness_accepts_typed_tool_specs_without_normalizing_dicts(make_env): + harness = PiHarness(make_env(supported=[HarnessType.PI])) + spec = ClientToolSpec(name="pick", description="Pick") + result = harness._to_harness_config(_session_config(tool_specs=[spec])) + assert result.tool_specs == [spec] + + +def test_normalize_tool_specs_empty(): + assert _normalize_tool_specs([]) == [] + assert _normalize_tool_specs(None) == [] + + +def test_opt_str_keeps_only_nonempty_strings(): + assert _opt_str("hi") == "hi" + assert _opt_str(" ") is None + assert _opt_str("") is None + assert _opt_str(None) is None + assert _opt_str(123) is None + + +# -------------------------------------------------------------------- make_harness + + +def test_make_harness_maps_string_to_class(make_env): + env = make_env(supported=[HarnessType.PI, HarnessType.CLAUDE, HarnessType.AGENTA]) + assert isinstance(make_harness("pi", env), PiHarness) + assert isinstance(make_harness("PI", env), PiHarness) # coerced, case-insensitive + assert isinstance(make_harness("claude", env), ClaudeHarness) + assert isinstance(make_harness(HarnessType.CLAUDE, env), ClaudeHarness) + assert isinstance(make_harness("agenta", env), AgentaHarness) + assert isinstance(make_harness(HarnessType.AGENTA, env), AgentaHarness) + + +def test_make_harness_unsupported_backend_raises(make_env): + env = make_env(supported=[HarnessType.PI]) # backend cannot drive Claude + with pytest.raises(UnsupportedHarnessError): + make_harness("claude", env) + + +def test_make_harness_unknown_name_raises(make_env): + env = make_env(supported=[HarnessType.PI]) + with pytest.raises(ValueError): + make_harness("bogus", env) diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py b/sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py new file mode 100644 index 0000000000..f7cce7d31c --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_ui_messages.py @@ -0,0 +1,430 @@ +"""Tests for the Vercel UI message adapter, the ``/messages`` egress adapter between the +Vercel ``UIMessage`` shape and the neutral runtime types. + +Three directions: + +- ``vercel_ui_messages_to_messages`` — inbound parts -> ``Message``; tool/approval parts are + preserved as structured ``tool_call`` / ``tool_result`` content blocks. +- ``message_to_vercel_ui_message`` — outbound ``AgentResult`` / ``Message`` -> one + ``UIMessage`` dict. +- ``agent_run_to_vercel_parts`` — a live ``AgentRun`` -> Vercel UI Message Stream parts. + +The stream tests fabricate an ``AgentRun`` from a fixed record list (the same trick +``test_streaming.py`` uses), so they are pure and need no backend. +""" + +from __future__ import annotations + +from typing import Any, Dict, List + +from agenta.sdk.agents import AgentRun, AgentResult, Message +from agenta.sdk.agents.adapters.vercel import ( + agent_run_to_vercel_parts, + message_to_vercel_ui_message, + vercel_ui_messages_to_messages, +) + + +async def _from_list(records: List[Dict[str, Any]]): + for record in records: + yield record + + +def _run(events: List[Dict[str, Any]], result: Dict[str, Any]) -> AgentRun: + """An ``AgentRun`` over fabricated live events plus a terminal result record.""" + records = [{"kind": "event", "event": e} for e in events] + records.append({"kind": "result", "result": {"ok": True, **result}}) + return AgentRun(_from_list(records)) + + +async def _collect(run: AgentRun, **kwargs) -> List[Dict[str, Any]]: + return [part async for part in agent_run_to_vercel_parts(run, **kwargs)] + + +# --------------------------------------------------------------------------- +# vercel_ui_messages_to_messages +# --------------------------------------------------------------------------- + + +class TestFromUIMessages: + def test_all_text_message_collapses_to_string(self): + msgs = vercel_ui_messages_to_messages( + [{"id": "m1", "role": "user", "parts": [{"type": "text", "text": "hi"}]}] + ) + assert len(msgs) == 1 + assert msgs[0].role == "user" + assert msgs[0].content == "hi" + + def test_file_part_becomes_image_or_resource_block(self): + msgs = vercel_ui_messages_to_messages( + [ + { + "id": "m1", + "role": "user", + "parts": [ + {"type": "text", "text": "look:"}, + {"type": "file", "url": "data:...", "mediaType": "image/png"}, + ], + } + ] + ) + blocks = msgs[0].content + assert [b.type for b in blocks] == ["text", "image"] + assert blocks[1].uri == "data:..." + assert blocks[1].mime_type == "image/png" + + def test_tool_part_is_preserved_as_structured_blocks(self): + # A resolved tool part -> a tool_call block plus a tool_result block, keyed by + # toolCallId, with the field names the runner transcript renders. + msgs = vercel_ui_messages_to_messages( + [ + { + "id": "m2", + "role": "assistant", + "parts": [ + { + "type": "tool-getWeather", + "toolCallId": "call_1", + "state": "output-available", + "input": {"city": "Paris"}, + "output": {"weather": "sunny"}, + } + ], + } + ] + ) + wire = [b.to_wire() for b in msgs[0].content] + assert wire == [ + { + "type": "tool_call", + "toolCallId": "call_1", + "toolName": "getWeather", + "input": {"city": "Paris"}, + }, + { + "type": "tool_result", + "toolCallId": "call_1", + "toolName": "getWeather", + "output": {"weather": "sunny"}, + "isError": False, + }, + ] + + def test_tool_error_part_sets_is_error(self): + msgs = vercel_ui_messages_to_messages( + [ + { + "id": "m2", + "role": "assistant", + "parts": [ + { + "type": "tool-getWeather", + "toolCallId": "call_1", + "state": "output-error", + "input": {"city": "Paris"}, + "errorText": "boom", + } + ], + } + ] + ) + result_block = msgs[0].content[1] + assert result_block.type == "tool_result" + assert result_block.is_error is True + assert result_block.output == "boom" + + def test_approval_response_becomes_tool_result_keyed_by_call_id(self): + # The cross-turn HITL reply: a tool_result keyed by toolCallId so the runtime resumes. + msgs = vercel_ui_messages_to_messages( + [ + { + "id": "m3", + "role": "user", + "parts": [ + { + "type": "tool-approval-response", + "toolCallId": "call_1", + "approved": True, + } + ], + } + ] + ) + block = msgs[0].content[0] + assert block.type == "tool_result" + assert block.tool_call_id == "call_1" + assert block.output == {"approved": True} + + def test_approval_request_part_is_dropped_on_replay(self): + # The server's own request, echoed back; regenerated on replay, not model input. + msgs = vercel_ui_messages_to_messages( + [ + { + "id": "m4", + "role": "assistant", + "parts": [ + {"type": "tool-approval-request", "approvalId": "p1"}, + {"type": "text", "text": "thinking"}, + ], + } + ] + ) + assert msgs[0].content == "thinking" + + def test_plain_role_content_message_still_parses(self): + # A non-parts {role, content} message in a mixed history falls back cleanly. + msgs = vercel_ui_messages_to_messages([{"role": "user", "content": "hello"}]) + assert msgs[0].content == "hello" + + +# --------------------------------------------------------------------------- +# message_to_vercel_ui_message +# --------------------------------------------------------------------------- + + +class TestToUIMessage: + def test_agent_result_becomes_assistant_text_message(self): + ui = message_to_vercel_ui_message(AgentResult(output="Paris."), message_id="m9") + assert ui == { + "id": "m9", + "role": "assistant", + "parts": [{"type": "text", "text": "Paris."}], + } + + def test_message_with_tool_blocks_round_trips_to_parts(self): + from agenta.sdk.agents import ContentBlock + + msg = Message( + role="assistant", + content=[ + ContentBlock( + type="tool_call", + tool_call_id="c1", + tool_name="getWeather", + input={"city": "Paris"}, + ), + ], + ) + ui = message_to_vercel_ui_message(msg) + assert ui["role"] == "assistant" + assert ui["parts"][0]["type"] == "tool-getWeather" + assert ui["parts"][0]["toolCallId"] == "c1" + + +# --------------------------------------------------------------------------- +# agent_run_to_vercel_parts +# --------------------------------------------------------------------------- + + +class TestUIMessageStream: + async def test_full_turn_part_order(self): + run = _run( + events=[ + { + "type": "tool_call", + "id": "call_1", + "name": "getWeather", + "input": {"city": "Paris"}, + }, + { + "type": "tool_result", + "id": "call_1", + "output": "sunny", + "data": {"w": "sunny"}, + }, + {"type": "message_start", "id": "t1"}, + {"type": "message_delta", "id": "t1", "delta": "It is sunny."}, + {"type": "message_end", "id": "t1"}, + {"type": "usage", "input": 820, "output": 36, "cost": 0.004}, + {"type": "done", "stopReason": "end_turn"}, + ], + result={"output": "It is sunny.", "sessionId": "sess_123"}, + ) + parts = await _collect(run, session_id="sess_123") + + assert [p["type"] for p in parts] == [ + "start", + "start-step", + "tool-input-start", + "tool-input-available", + "tool-output-available", + "text-start", + "text-delta", + "text-end", + "finish-step", + "finish", + ] + # start carries the session id; tool output prefers the structured `data`. + assert parts[0]["messageMetadata"] == {"sessionId": "sess_123"} + assert parts[4]["output"] == {"w": "sunny"} + # finish carries the usage and the stop reason. + assert parts[-1]["finishReason"] == "end_turn" + assert parts[-1]["messageMetadata"]["usage"] == { + "input": 820, + "output": 36, + "cost": 0.004, + } + + async def test_usage_falls_back_to_terminal_result(self): + run = _run( + events=[ + {"type": "message", "text": "hi"}, + {"type": "done", "stopReason": "end_turn"}, + ], + result={"output": "hi", "usage": {"input": 10, "output": 2}}, + ) + parts = await _collect(run, session_id="s1") + assert parts[-1]["messageMetadata"]["usage"] == {"input": 10, "output": 2} + + async def test_coalesced_message_emits_text_block(self): + run = _run( + events=[{"type": "message", "text": "Paris."}, {"type": "done"}], + result={"output": "Paris."}, + ) + parts = await _collect(run, session_id="s1") + types = [p["type"] for p in parts] + assert "text-start" in types and "text-delta" in types and "text-end" in types + delta = next(p for p in parts if p["type"] == "text-delta") + assert delta["delta"] == "Paris." + + async def test_permission_interaction_becomes_approval_request(self): + run = _run( + events=[ + { + "type": "interaction_request", + "id": "perm_1", + "kind": "permission", + "payload": { + "toolCallId": "call_1", + "availableReplies": ["once", "always", "reject"], + "toolCall": {"toolCallId": "call_1", "name": "deleteFile"}, + }, + }, + {"type": "done"}, + ], + result={"output": ""}, + ) + parts = await _collect(run, session_id="s1") + approval = next(p for p in parts if p["type"] == "tool-approval-request") + assert approval["approvalId"] == "perm_1" + # REQUIRED top-level toolCallId binds the approval to its tool part (RFC / AI SDK). + assert approval["toolCallId"] == "call_1" + assert approval["availableReplies"] == ["once", "always", "reject"] + assert approval["toolCall"] == {"toolCallId": "call_1", "name": "deleteFile"} + + async def test_permission_tool_call_id_falls_back_to_nested_tool_call(self): + # No top-level toolCallId on the payload: dig it out of the nested ACP toolCall detail. + run = _run( + events=[ + { + "type": "interaction_request", + "id": "perm_2", + "kind": "permission", + "payload": { + "availableReplies": ["once", "reject"], + "toolCall": {"id": "call_9", "name": "deleteFile"}, + }, + }, + {"type": "done"}, + ], + result={"output": ""}, + ) + parts = await _collect(run, session_id="s1") + approval = next(p for p in parts if p["type"] == "tool-approval-request") + assert approval["toolCallId"] == "call_9" + + async def test_tool_denial_becomes_output_denied(self): + # A human denied the tool: it never ran, so emit tool-output-denied (not -available). + run = _run( + events=[ + {"type": "tool_call", "id": "c1", "name": "deleteFile", "input": {}}, + {"type": "tool_result", "id": "c1", "denied": True}, + {"type": "done"}, + ], + result={"output": ""}, + ) + parts = await _collect(run, session_id="s1") + denied = next(p for p in parts if p["type"] == "tool-output-denied") + assert denied["toolCallId"] == "c1" + # A denied result is neither output-available nor output-error. + types = [p["type"] for p in parts] + assert "tool-output-available" not in types + assert "tool-output-error" not in types + + async def test_finish_carries_trace_id_from_param(self): + run = _run( + events=[ + {"type": "message", "text": "hi"}, + {"type": "done", "stopReason": "end_turn"}, + ], + result={"output": "hi", "usage": {"input": 10, "output": 2}}, + ) + parts = await _collect(run, session_id="s1", trace_id="abc123") + # traceId and usage coexist under the finish messageMetadata. + assert parts[-1]["messageMetadata"]["traceId"] == "abc123" + assert parts[-1]["messageMetadata"]["usage"] == {"input": 10, "output": 2} + + async def test_finish_trace_id_falls_back_to_terminal_result(self): + run = _run( + events=[ + {"type": "message", "text": "hi"}, + {"type": "done", "stopReason": "end_turn"}, + ], + result={"output": "hi", "traceId": "trace_from_result"}, + ) + parts = await _collect(run, session_id="s1") + assert parts[-1]["messageMetadata"]["traceId"] == "trace_from_result" + + async def test_render_hint_passes_through_tool_parts(self): + render = {"kind": "component", "component": "WeatherCard"} + run = _run( + events=[ + { + "type": "tool_call", + "id": "c1", + "name": "w", + "input": {}, + "render": render, + }, + { + "type": "tool_result", + "id": "c1", + "data": {"w": "sunny"}, + "render": render, + }, + {"type": "done"}, + ], + result={"output": ""}, + ) + parts = await _collect(run, session_id="s1") + available = next(p for p in parts if p["type"] == "tool-input-available") + output = next(p for p in parts if p["type"] == "tool-output-available") + assert available["render"] == render + assert output["render"] == render + + async def test_tool_error_becomes_output_error(self): + run = _run( + events=[ + {"type": "tool_call", "id": "c1", "name": "w", "input": {}}, + {"type": "tool_result", "id": "c1", "output": "boom", "isError": True}, + {"type": "done"}, + ], + result={"output": ""}, + ) + parts = await _collect(run, session_id="s1") + err = next(p for p in parts if p["type"] == "tool-output-error") + assert err["toolCallId"] == "c1" + assert err["errorText"] == "boom" + + async def test_terminal_failure_emits_error_part_and_no_finish(self): + records = [ + {"kind": "event", "event": {"type": "message", "text": "partial"}}, + {"kind": "result", "result": {"ok": False, "error": "kaboom"}}, + ] + run = AgentRun(_from_list(records)) + parts = [part async for part in agent_run_to_vercel_parts(run, session_id="s1")] + types = [p["type"] for p in parts] + assert types[0] == "start" + assert "finish" not in types + error = next(p for p in parts if p["type"] == "error") + assert "kaboom" in error["errorText"] diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py new file mode 100644 index 0000000000..4aa24a86b1 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py @@ -0,0 +1,301 @@ +"""The ``/run`` wire contract: ``request_to_wire`` / ``result_from_wire``. + +This is the highest-value regression guard in the agent runtime. ``wire.py`` (the Python +producer) and ``services/agent/src/protocol.ts`` (the TS consumer) are hand-mirrored, so the +two can drift silently. The golden fixtures in ``golden/`` are the shared anchor: this file +asserts the Python side against them, and the TS side asserts the same files (a later PR). + +If a field is added, renamed, or removed on the wire, a golden assertion here fails on +purpose. Regenerate the golden deliberately, and update ``protocol.ts`` and ``KNOWN_REQUEST_KEYS`` +to match. +""" + +from __future__ import annotations + +import pytest + +from agenta.sdk.agents import ( + AgentaAgentConfig, + ClaudeAgentConfig, + HarnessType, + Message, + PiAgentConfig, + ToolCallback, + TraceContext, +) +from agenta.sdk.agents.utils.wire import request_to_wire, result_from_wire + +# The full set of top-level keys ``request_to_wire`` may emit. The TS ``AgentRunRequest`` +# interface must declare a superset of these. Adding a key here without adding it to +# protocol.ts is exactly the drift this set exists to catch. +KNOWN_REQUEST_KEYS = { + "backend", + "harness", + "sandbox", + "sessionId", + "agentsMd", + "model", + "messages", + "secrets", + "trace", + "tools", + "customTools", + "mcpServers", + "toolCallback", + "permissionPolicy", + "systemPrompt", + "appendSystemPrompt", + "skills", +} + +_CUSTOM_TOOL = { + "name": "get_user", + "description": "Get a user", + "inputSchema": {"type": "object", "properties": {}}, + "callRef": "tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn", + "kind": "callback", +} +_CALLBACK = ToolCallback( + endpoint="https://api.example/tools/call", authorization="Access tok-123" +) + + +def _pi_payload(): + config = PiAgentConfig( + agents_md="You are a helpful assistant.", + model="openai-codex/gpt-5.5", + builtin_tools=["read", "write"], + custom_tools=[dict(_CUSTOM_TOOL)], + tool_callback=_CALLBACK, + system="You are Pi.", + append_system="Be terse.", + ) + return request_to_wire( + engine="pi", + harness=HarnessType.PI, + sandbox="local", + config=config, + messages=[Message(role="user", content="hi")], + secrets={"OPENAI_API_KEY": "sk-test"}, + trace=TraceContext( + traceparent="00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01", + endpoint="https://otlp.example/v1/traces", + authorization="Access tok-123", + capture_content=True, + ), + session_id="sess-1", + ) + + +def _claude_payload(): + config = ClaudeAgentConfig( + agents_md="You are a helpful assistant.", + model="claude-sonnet-4-6", + custom_tools=[dict(_CUSTOM_TOOL)], + tool_callback=_CALLBACK, + permission_policy="deny", + ) + return request_to_wire( + engine="rivet", + harness=HarnessType.CLAUDE, + sandbox="local", + config=config, + messages=[Message(role="user", content="hi")], + secrets={"ANTHROPIC_API_KEY": "sk-ant"}, + trace=None, + session_id=None, + ) + + +def _agenta_payload(): + config = AgentaAgentConfig( + agents_md="Agenta preamble + project rules.", + model="gpt-5.5", + builtin_tools=["read", "bash"], + custom_tools=[dict(_CUSTOM_TOOL)], + tool_callback=_CALLBACK, + append_system="You are an Agenta agent.", + skills=["agenta-getting-started"], + ) + return request_to_wire( + engine="pi", + harness=HarnessType.AGENTA, + sandbox="local", + config=config, + messages=[Message(role="user", content="hi")], + ) + + +def test_request_to_wire_agenta_carries_skills_and_pi_shape(): + payload = _agenta_payload() + assert set(payload) <= KNOWN_REQUEST_KEYS + # Agenta is a Pi config: same tool shape, never gates, exposes the prompt overrides... + assert payload["permissionPolicy"] == "auto" + assert payload["tools"] == ["read", "bash"] + assert payload["appendSystemPrompt"] == "You are an Agenta agent." + # ...plus the forced skills the runner loads. + assert payload["skills"] == ["agenta-getting-started"] + + +def test_request_to_wire_pi_has_no_skills_key(): + # Only the Agenta config emits `skills`; the plain Pi config must not. + assert "skills" not in _pi_payload() + + +def test_request_to_wire_pi_matches_golden(golden): + assert _pi_payload() == golden("run_request.pi.json") + + +def test_request_to_wire_claude_matches_golden(golden): + payload = _claude_payload() + assert payload == golden("run_request.claude.json") + # Claude-specific invariants the golden encodes, asserted explicitly so a failure reads clearly. + assert payload["tools"] == [] # Claude has no Pi built-ins + assert payload["permissionPolicy"] == "deny" # Claude gates tool use + assert "systemPrompt" not in payload # Claude exposes no prompt overrides + assert "appendSystemPrompt" not in payload + + +def test_request_to_wire_has_no_prompt_key(): + # The serializer emits `messages` only; the TS side derives the latest turn with + # `resolvePromptText`. This asymmetry is intentional and easy to break, so lock it. + payload = request_to_wire( + engine="pi", + harness=HarnessType.PI, + sandbox="local", + config=PiAgentConfig(), + messages=[Message(role="user", content="hi")], + ) + assert "prompt" not in payload + + +def test_request_to_wire_emits_only_known_keys(): + pi = _pi_payload() + claude = _claude_payload() + assert set(pi) <= KNOWN_REQUEST_KEYS + assert set(claude) <= KNOWN_REQUEST_KEYS + # The Pi case must actually exercise the prompt-override keys, otherwise this guard would + # silently stop covering them. + assert {"systemPrompt", "appendSystemPrompt"} <= set(pi) + + +def test_pi_permission_policy_is_always_auto(): + # Pi never gates tool use, regardless of any requested policy. + payload = request_to_wire( + engine="pi", + harness=HarnessType.PI, + sandbox="local", + config=PiAgentConfig(), + messages=[Message(role="user", content="hi")], + ) + assert payload["permissionPolicy"] == "auto" + + +def test_result_from_wire_parses_ok(golden): + result = result_from_wire(golden("run_result.ok.json")) + + assert result.output == "Hello!" + assert [m.role for m in result.messages] == ["assistant"] + # The event with no `type` is dropped on parse; the other three survive. + assert [e.type for e in result.events] == ["message", "usage", "done"] + assert result.events[0].data == {"type": "message", "text": "Hello!"} + assert result.usage == {"input": 10, "output": 5, "total": 15, "cost": 0.001} + assert result.stop_reason == "end_turn" + assert result.session_id == "sess-42" + assert result.model == "gpt-5.5" + assert result.trace_id == "trace-abc" + # Capabilities come back camelCase and map onto snake_case flags. + assert result.capabilities is not None + assert result.capabilities.mcp_tools is True + assert result.capabilities.images is False + assert result.capabilities.text_messages is True + + +def test_result_from_wire_raises_on_failure(golden): + with pytest.raises(RuntimeError, match="model exploded"): + result_from_wire(golden("run_result.error.json")) + + +def test_result_from_wire_minimal_ok(): + # A bare success: empty output, empty collections, no capabilities. + result = result_from_wire({"ok": True}) + assert result.output == "" + assert result.messages == [] + assert result.events == [] + assert result.capabilities is None + assert result.session_id is None + + +def test_request_to_wire_carries_code_client_and_mcp_specs(): + # The three-axes surface reaches the wire intact: a code spec keeps its executor fields + # (kind/runtime/code/env) and the orthogonal axes (needsApproval/render); a client spec + # has no callRef; user MCP servers ride `mcpServers`. + config = PiAgentConfig( + custom_tools=[ + { + "name": "calc", + "description": "calc", + "inputSchema": {"type": "object", "properties": {}}, + "kind": "code", + "runtime": "python", + "code": "def main(): return 1", + "env": {"STRIPE_API_KEY": "sk"}, + "needsApproval": True, + "render": {"kind": "component", "component": "Calc"}, + }, + { + "name": "pick", + "description": "pick", + "inputSchema": {"type": "object", "properties": {}}, + "kind": "client", + }, + ], + mcp_servers=[ + { + "name": "github", + "transport": "stdio", + "command": "npx", + "env": {"GITHUB_TOKEN": "ghp"}, + "tools": ["create_issue"], + } + ], + ) + payload = request_to_wire( + engine="pi", + harness=HarnessType.PI, + sandbox="local", + config=config, + messages=[Message(role="user", content="hi")], + ) + assert set(payload) <= KNOWN_REQUEST_KEYS + code = next(t for t in payload["customTools"] if t["name"] == "calc") + assert code["kind"] == "code" + assert code["runtime"] == "python" + assert code["code"] == "def main(): return 1" + assert code["env"] == {"STRIPE_API_KEY": "sk"} + assert code["needsApproval"] is True + assert code["render"] == {"kind": "component", "component": "Calc"} + client = next(t for t in payload["customTools"] if t["name"] == "pick") + assert client["kind"] == "client" + assert "callRef" not in client + assert payload["mcpServers"] == [ + { + "name": "github", + "transport": "stdio", + "command": "npx", + "env": {"GITHUB_TOKEN": "ghp"}, + "tools": ["create_issue"], + } + ] + + +def test_request_to_wire_omits_mcp_servers_when_none(): + # No declared servers -> no `mcpServers` key (keeps a tool-free payload byte-identical). + payload = request_to_wire( + engine="pi", + harness=HarnessType.PI, + sandbox="local", + config=PiAgentConfig(), + messages=[Message(role="user", content="hi")], + ) + assert "mcpServers" not in payload diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py b/sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/tools/__init__.py @@ -0,0 +1 @@ + diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py new file mode 100644 index 0000000000..f823b4f32c --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from agenta.sdk.agents.tools import ( + CallbackToolSpec, + CodeToolConfig, + CodeToolSpec, +) + + +def test_canonical_config_forbids_unexpected_fields(): + with pytest.raises(ValidationError): + CodeToolConfig( + name="calc", + script="def main(): return 1", + unexpected=True, + ) + + +def test_code_spec_serializes_only_runner_fields(): + spec = CodeToolSpec( + name="calc", + description="Calculate", + input_schema={"type": "object", "properties": {}}, + runtime="python", + code="def main(): return 1", + env={"TOKEN": "secret"}, + needs_approval=True, + render={"kind": "component", "component": "Calculator"}, + ) + assert spec.to_wire() == { + "name": "calc", + "description": "Calculate", + "inputSchema": {"type": "object", "properties": {}}, + "kind": "code", + "runtime": "python", + "code": "def main(): return 1", + "env": {"TOKEN": "secret"}, + "needsApproval": True, + "render": {"kind": "component", "component": "Calculator"}, + } + + +def test_callback_spec_has_stable_typed_contract(): + spec = CallbackToolSpec( + name="get_user", + description="Get user", + call_ref="tools.composio.github.GET_USER.c1", + ) + assert spec.to_wire()["kind"] == "callback" + assert spec.to_wire()["callRef"] == "tools.composio.github.GET_USER.c1" + + +def test_secret_values_are_hidden_from_repr(): + spec = CodeToolSpec( + name="private", + description="private", + code="...", + env={"TOKEN": "do-not-print"}, + ) + assert "do-not-print" not in repr(spec) diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py new file mode 100644 index 0000000000..ff6f212f9f --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import pytest + +from agenta.sdk.agents.tools import ( + BuiltinToolConfig, + GatewayToolConfig, + ToolConfigurationError, + coerce_tool_config, + coerce_tool_configs, + parse_tool_config, +) + + +def test_strict_parser_accepts_only_canonical_mapping(): + tool = parse_tool_config({"type": "builtin", "name": "read"}) + assert isinstance(tool, BuiltinToolConfig) + with pytest.raises(ToolConfigurationError): + parse_tool_config({"name": "read"}) + + +def test_compat_parser_accepts_legacy_shapes(): + assert coerce_tool_config("bash") == BuiltinToolConfig(name="bash") + gateway = coerce_tool_config( + { + "type": "composio", + "integration": "github", + "action": "GET_USER", + "connection": "c1", + } + ) + assert isinstance(gateway, GatewayToolConfig) + assert gateway.provider == "composio" + + +def test_compat_parser_accepts_playground_gateway_slug_and_metadata(): + gateway = coerce_tool_config( + { + "function": {"name": "tools__composio__github__GET_USER__c1"}, + "needs_approval": True, + "render": {"kind": "component", "component": "User"}, + } + ) + assert gateway.needs_approval is True + assert gateway.render == {"kind": "component", "component": "User"} + + +def test_collect_mode_reports_invalid_entries(): + result = coerce_tool_configs( + ["read", {"invalid": True}, None], + on_error="collect", + ) + assert result.tool_configs == [BuiltinToolConfig(name="read")] + assert [diagnostic.index for diagnostic in result.diagnostics] == [1, 2] + + +def test_default_compat_mode_raises_with_index(): + with pytest.raises(ToolConfigurationError) as caught: + coerce_tool_configs(["read", {"invalid": True}]) + assert caught.value.index == 1 diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py new file mode 100644 index 0000000000..7c7ef58b46 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_resolver.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +from typing import Mapping, Sequence + +import pytest + +from agenta.sdk.agents.tools import ( + BuiltinToolConfig, + CallbackToolSpec, + ClientToolConfig, + CodeToolConfig, + DuplicateToolNameError, + GatewayToolConfig, + GatewayToolResolution, + MissingSecretPolicy, + MissingToolSecretError, + ToolCallback, + ToolResolver, + UnsupportedToolProviderError, +) + + +class DictSecretProvider: + def __init__(self, values: Mapping[str, str]): + self.values = values + self.requests: list[list[str]] = [] + + async def get_many(self, names: Sequence[str]) -> Mapping[str, str]: + self.requests.append(list(names)) + return {name: self.values[name] for name in names if name in self.values} + + +class FakeGatewayResolver: + async def resolve( + self, + tools: Sequence[GatewayToolConfig], + ) -> GatewayToolResolution: + return GatewayToolResolution( + tool_specs=[ + CallbackToolSpec( + name=tool.name or f"{tool.integration}__{tool.action}", + description=tool.name or tool.action, + call_ref=tool.reference, + needs_approval=tool.needs_approval, + render=tool.render, + ) + for tool in tools + ], + tool_callback=ToolCallback(endpoint="https://example/tools/call"), + ) + + +async def test_resolves_builtin_code_client_and_scopes_secrets(): + secrets = DictSecretProvider({"A": "a", "B": "b"}) + resolved = await ToolResolver(secret_provider=secrets).resolve( + [ + BuiltinToolConfig(name="read"), + CodeToolConfig(name="one", script="...", secrets=["A"]), + CodeToolConfig(name="two", script="...", secrets=["B"]), + ClientToolConfig(name="pick"), + ] + ) + assert resolved.builtin_names == ["read"] + assert secrets.requests == [["A", "B"]] + by_name = {spec.name: spec for spec in resolved.tool_specs} + assert by_name["one"].env == {"A": "a"} + assert by_name["two"].env == {"B": "b"} + assert by_name["pick"].kind == "client" + + +async def test_missing_declared_secret_fails_by_default(): + resolver = ToolResolver(secret_provider=DictSecretProvider({})) + with pytest.raises(MissingToolSecretError) as caught: + await resolver.resolve( + [CodeToolConfig(name="charge", script="...", secrets=["TOKEN"])] + ) + assert caught.value.secret_names == ("TOKEN",) + + +async def test_missing_secret_can_be_explicitly_omitted_for_compatibility(): + resolved = await ToolResolver( + secret_provider=DictSecretProvider({}), + missing_secret_policy=MissingSecretPolicy.OMIT, + ).resolve([CodeToolConfig(name="charge", script="...", secrets=["TOKEN"])]) + assert resolved.tool_specs[0].env == {} + + +async def test_gateway_requires_injected_adapter(): + with pytest.raises(UnsupportedToolProviderError): + await ToolResolver().resolve( + [ + GatewayToolConfig( + integration="github", + action="GET_USER", + connection="c1", + ) + ] + ) + + +async def test_gateway_metadata_survives_resolution(): + resolved = await ToolResolver(gateway_resolver=FakeGatewayResolver()).resolve( + [ + GatewayToolConfig( + integration="github", + action="GET_USER", + connection="c1", + needs_approval=True, + render={"kind": "component", "component": "User"}, + ) + ] + ) + spec = resolved.tool_specs[0] + assert spec.needs_approval is True + assert spec.render == {"kind": "component", "component": "User"} + + +@pytest.mark.parametrize( + "configs", + [ + [BuiltinToolConfig(name="read"), BuiltinToolConfig(name="read")], + [ + BuiltinToolConfig(name="same"), + ClientToolConfig(name="same"), + ], + [ClientToolConfig(name="same"), ClientToolConfig(name="same")], + ], +) +async def test_duplicate_model_visible_names_are_rejected(configs): + with pytest.raises(DuplicateToolNameError): + await ToolResolver().resolve(configs) diff --git a/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py b/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py index b796680685..94d99e0fdf 100644 --- a/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py +++ b/sdks/python/oss/tests/pytest/unit/test_normalizer_passthrough.py @@ -79,6 +79,36 @@ def handler(parameters): assert kwargs["parameters"] == {"correct_answer_key": "answer"} + @pytest.mark.asyncio + async def test_session_id_is_passed_to_explicit_handler_argument(self): + def handler(session_id): + return session_id + + request = WorkflowServiceRequest( + session_id="sess_request", + data=WorkflowRequestData(), + ) + + mw = NormalizerMiddleware() + kwargs = await mw._normalize_request(request, handler) + + assert kwargs["session_id"] == "sess_request" + + @pytest.mark.asyncio + async def test_session_id_is_not_added_to_var_kwargs(self): + def handler(**kwargs): + return kwargs + + request = WorkflowServiceRequest( + session_id="sess_request", + data=WorkflowRequestData(inputs={"prompt": "hi"}), + ) + + mw = NormalizerMiddleware() + kwargs = await mw._normalize_request(request, handler) + + assert "session_id" not in kwargs + class TestAsyncGenerator: @pytest.mark.asyncio diff --git a/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py new file mode 100644 index 0000000000..89a06d6783 --- /dev/null +++ b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py @@ -0,0 +1,284 @@ +"""Tests for the agent ``/messages`` + ``/load-session`` endpoints. + +Two layers: + +- Direct unit tests of the two pure Vercel routing helpers (``resolve_session_id``, + ``inject_stream_session_id``). +- HTTP tests over a Starlette ``TestClient`` driving the real ``route(flags={"is_agent": + True})`` wiring with a fake agent handler (no harness/runner). Registering on a bare + ``FastAPI`` app keeps the auth middleware out; a stand-in sets ``request.state.auth``. The + offline tracing mock (mirroring ``test_negotiation_integration``) lets ``wf.invoke`` run + without ``ag.init()``. +""" + +import json +from unittest.mock import MagicMock, patch + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from agenta.sdk.agents import Message +from agenta.sdk.agents.adapters.vercel.routing import ( + VERCEL_MESSAGE_PROTOCOL, + VERCEL_MESSAGE_PROTOCOL_VERSION, + inject_stream_session_id, + make_load_session_endpoint, + resolve_session_id, +) +from agenta.sdk.decorators.routing import route +from agenta.sdk.models.workflows import ( + LoadSessionRequest, + WorkflowBatchResponse, + WorkflowServiceStatus, + WorkflowStreamingResponse, +) + + +# --------------------------------------------------------------------------- +# Pure helpers +# --------------------------------------------------------------------------- + + +def test_resolve_session_id_mints_echoes_and_validates(): + assert resolve_session_id("sess_ok") == "sess_ok" + assert resolve_session_id(None).startswith("sess_") + assert resolve_session_id("bad id!") is None # space + '!' are out of charset + assert resolve_session_id("x" * 200) is None # over the length bound + + +@pytest.mark.asyncio +async def test_inject_stream_session_id_stamps_first_start_part(): + async def base(): + yield {"type": "start", "messageId": "m1"} + yield {"type": "text-delta", "id": "t1", "delta": "x"} + + resp = WorkflowStreamingResponse(generator=base) + inject_stream_session_id(resp, "sess_z") + + parts = [p async for p in resp.iterator()] + assert parts[0]["messageMetadata"]["sessionId"] == "sess_z" + assert parts[1] == {"type": "text-delta", "id": "t1", "delta": "x"} + + +# --------------------------------------------------------------------------- +# HTTP wiring +# --------------------------------------------------------------------------- + + +_UI_MESSAGE = {"role": "user", "parts": [{"type": "text", "text": "hello"}]} + + +def _assert_vercel_message_protocol(response): + assert response.headers["x-ag-messages-format"] == VERCEL_MESSAGE_PROTOCOL + assert response.headers["x-ag-messages-version"] == VERCEL_MESSAGE_PROTOCOL_VERSION + + +def _build_client() -> TestClient: + app = FastAPI() + + # Stand in for AuthMiddleware (omitted by using a bare app): the endpoints read + # ``request.state.auth``. No credentials needed — the fake handler runs locally. + @app.middleware("http") + async def _fake_auth(request, call_next): + request.state.auth = {} + return await call_next(request) + + @route("/", app=app, flags={"is_agent": True}) + async def agent( + messages=None, + inputs=None, + parameters=None, + stream=None, + session_id=None, + ): + if stream: + + async def gen(): + yield {"type": "start", "messageId": "m1"} + yield {"type": "text-start", "id": "t1"} + yield {"type": "text-delta", "id": "t1", "delta": "hi"} + yield {"type": "text-end", "id": "t1"} + yield {"type": "finish"} + + return gen() + return { + "role": "assistant", + "content": "hi", + "echoed": messages, + "session_id": session_id, + } + + return TestClient(app) + + +def _build_failing_client() -> TestClient: + app = FastAPI() + + @app.middleware("http") + async def _fake_auth(request, call_next): + request.state.auth = {} + return await call_next(request) + + @route("/", app=app, flags={"is_agent": True}) + async def failing_agent(messages=None, inputs=None, parameters=None, stream=None): + return WorkflowBatchResponse( + status=WorkflowServiceStatus( + code=500, + message="tool resolution failed before stream", + type="https://agenta.ai/docs/errors#v1:sdk:tool-resolution-error", + ) + ) + + return TestClient(app) + + +@pytest.fixture() +def client(): + """A TestClient with the offline tracing mock active so ``wf.invoke`` runs without + ``ag.init()`` (same approach as ``test_negotiation_integration``).""" + with ( + patch("agenta.sdk.decorators.tracing.ag") as mock_ag, + patch("agenta.sdk.decorators.running.ag") as mock_run_ag, + ): + mock_span = MagicMock() + mock_span.is_recording.return_value = False + mock_span.get_span_context.return_value = MagicMock(trace_id=0, span_id=0) + mock_ag.tracing = MagicMock() + mock_ag.tracing.get_current_span.return_value = mock_span + mock_ag.tracing.redact = None + mock_tracer = MagicMock() + mock_tracer.start_as_current_span.return_value.__enter__ = MagicMock( + return_value=mock_span + ) + mock_tracer.start_as_current_span.return_value.__exit__ = MagicMock( + return_value=None + ) + mock_ag.tracer = mock_tracer + mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE = MagicMock() + mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.api_key = None + yield _build_client() + + +def test_messages_json_mints_session_and_folds_conversation(client): + res = client.post("/messages", json={"data": {"messages": [_UI_MESSAGE]}}) + assert res.status_code == 200 + _assert_vercel_message_protocol(res) + body = res.json() + assert body["session_id"].startswith("sess_") + assert body["data"]["outputs"]["content"] == "hi" + assert body["data"]["outputs"]["session_id"] == body["session_id"] + # The Vercel UIMessage was folded to a neutral {role, content} message for the handler. + assert body["data"]["outputs"]["echoed"] == [{"role": "user", "content": "hello"}] + + +def test_messages_echoes_supplied_session_id(client): + res = client.post( + "/messages", + json={"session_id": "sess_keep", "data": {"messages": [_UI_MESSAGE]}}, + ) + assert res.status_code == 200 + _assert_vercel_message_protocol(res) + assert res.json()["session_id"] == "sess_keep" + assert res.json()["data"]["outputs"]["session_id"] == "sess_keep" + + +def test_messages_sse_streams_with_done_and_session_in_start(client): + res = client.post( + "/messages", + headers={"accept": "text/event-stream"}, + json={"session_id": "sess_abc", "data": {"messages": [_UI_MESSAGE]}}, + ) + assert res.status_code == 200 + _assert_vercel_message_protocol(res) + assert res.headers["x-vercel-ai-ui-message-stream"] == "v1" + text = res.text + assert '"sessionId": "sess_abc"' in text # stamped onto the start part + assert '"type": "text-delta"' in text + assert "data: [DONE]" in text + + +def test_messages_sse_preserves_json_error_before_stream(): + with ( + patch("agenta.sdk.decorators.tracing.ag") as mock_ag, + patch("agenta.sdk.decorators.running.ag") as mock_run_ag, + ): + mock_span = MagicMock() + mock_span.is_recording.return_value = False + mock_span.get_span_context.return_value = MagicMock(trace_id=0, span_id=0) + mock_ag.tracing = MagicMock() + mock_ag.tracing.get_current_span.return_value = mock_span + mock_ag.tracing.redact = None + mock_tracer = MagicMock() + mock_tracer.start_as_current_span.return_value.__enter__ = MagicMock( + return_value=mock_span + ) + mock_tracer.start_as_current_span.return_value.__exit__ = MagicMock( + return_value=None + ) + mock_ag.tracer = mock_tracer + mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE = MagicMock() + mock_run_ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.api_key = None + client = _build_failing_client() + + response = client.post( + "/messages", + headers={"accept": "text/event-stream"}, + json={ + "session_id": "sess_error", + "data": {"messages": [_UI_MESSAGE]}, + }, + ) + + assert response.status_code == 500 + _assert_vercel_message_protocol(response) + assert response.headers["content-type"].startswith("application/json") + assert "x-vercel-ai-ui-message-stream" not in response.headers + body = response.json() + assert body["status"]["code"] == 500 + assert "tool resolution failed before stream" in body["status"]["message"] + assert body["session_id"] == "sess_error" + assert "[DONE]" not in response.text + + +def test_messages_rejects_invalid_session_id(client): + res = client.post( + "/messages", json={"session_id": "bad id!", "data": {"messages": []}} + ) + assert res.status_code == 400 + _assert_vercel_message_protocol(res) + + +def test_load_session_returns_stub_history(client): + res = client.post("/load-session", json={"session_id": "sess_abc"}) + assert res.status_code == 200 + _assert_vercel_message_protocol(res) + assert res.json() == {"session_id": "sess_abc", "messages": []} + + +@pytest.mark.asyncio +async def test_load_session_uses_session_store_port(): + class _Store: + async def load(self, session_id): + assert session_id == "sess_abc" + return [Message(role="user", content="hello")] + + async def save_turn(self, session_id, *, messages, result=None): + raise AssertionError("load-session should only load") + + endpoint = make_load_session_endpoint(session_store=_Store()) + response = await endpoint(None, LoadSessionRequest(session_id="sess_abc")) + + assert response.status_code == 200 + assert response.headers["x-ag-messages-format"] == VERCEL_MESSAGE_PROTOCOL + assert response.headers["x-ag-messages-version"] == VERCEL_MESSAGE_PROTOCOL_VERSION + assert json.loads(response.body) == { + "session_id": "sess_abc", + "messages": [ + { + "id": "msg-1", + "role": "user", + "parts": [{"type": "text", "text": "hello"}], + } + ], + } diff --git a/sdks/python/oss/tests/pytest/utils/test_routing.py b/sdks/python/oss/tests/pytest/utils/test_routing.py index 0bed6e7922..a1851e5907 100644 --- a/sdks/python/oss/tests/pytest/utils/test_routing.py +++ b/sdks/python/oss/tests/pytest/utils/test_routing.py @@ -7,6 +7,8 @@ 3. router= param — issues DeprecationWarning, falls back to prefixed registration """ +import asyncio +import json import warnings import pytest @@ -15,10 +17,13 @@ from agenta.sdk.decorators.routing import ( _RESERVED_PATHS, + _make_stream_response, _validate_path, create_app, route, ) +from agenta.sdk.agents.adapters.vercel.sse import vercel_sse_stream +from agenta.sdk.models.workflows import WorkflowStreamingResponse # --------------------------------------------------------------------------- @@ -127,6 +132,49 @@ async def foo(): assert "/foo/invoke" not in parent_schema.get("paths", {}) +# --------------------------------------------------------------------------- +# 2b. Agent-only endpoints (/messages + /load-session), gated on is_agent +# --------------------------------------------------------------------------- + + +class TestAgentEndpoints: + def test_is_agent_sub_app_has_messages_and_load_session(self): + app = create_app() + + @route("/chat", app=app, flags={"is_agent": True}) + async def chat(): + return {"role": "assistant", "content": "hi"} + + schema = _mounts(app)["/chat"].app.openapi() + assert "/messages" in schema["paths"] + assert "/load-session" in schema["paths"] + assert "/invoke" in schema["paths"] # the base routes are still present + + def test_non_agent_route_has_no_agent_endpoints(self): + app = create_app() + + @route("/qa", app=app) + async def qa(): + return "answer" + + schema = _mounts(app)["/qa"].app.openapi() + assert "/messages" not in schema["paths"] + assert "/load-session" not in schema["paths"] + + def test_root_agent_route_registers_on_mount_root(self): + # The agent app uses route("/", app=app, flags={"is_agent": True}); the endpoints + # land on the app itself, not a mounted sub-app. + app = create_app() + + @route("/", app=app, flags={"is_agent": True}) + async def agent(): + return {"role": "assistant", "content": "hi"} + + schema = app.openapi() + assert "/messages" in schema["paths"] + assert "/load-session" in schema["paths"] + + # --------------------------------------------------------------------------- # 3. router= deprecation warning # --------------------------------------------------------------------------- @@ -179,3 +227,76 @@ async def noisy(): mounts_after = set(_mount_paths(default_app)) # No new mounts should have appeared on default_app assert mounts_after == mounts_before + + +# --------------------------------------------------------------------------- +# 4. Reserved agent paths (/messages, /load-session) +# --------------------------------------------------------------------------- + + +class TestReservedAgentPaths: + def test_agent_endpoint_names_are_reserved(self): + assert {"messages", "load-session"} <= _RESERVED_PATHS + + @pytest.mark.parametrize("reserved", ["messages", "load-session"]) + def test_route_rejects_reserved_agent_path(self, reserved): + with pytest.raises(ValueError, match=reserved): + route(f"/{reserved}") + + +# --------------------------------------------------------------------------- +# 5. Vercel UI Message Stream framing +# --------------------------------------------------------------------------- + + +async def _collect(aiter): + return [chunk async for chunk in aiter] + + +def _sse_payload(chunk: str) -> str: + """The JSON body of one `data: \\n\\n` SSE event.""" + assert chunk.startswith("data: ") and chunk.endswith("\n\n") + return chunk[len("data: ") : -2] + + +class TestVercelUIMessageStream: + def test_framing_wraps_each_part_and_appends_done(self): + async def parts(): + yield {"type": "start", "messageMetadata": {"sessionId": "sess_1"}} + yield {"type": "text-delta", "id": "t1", "delta": "hi"} + yield {"type": "finish"} + + chunks = asyncio.run(_collect(vercel_sse_stream(parts()))) + + # one SSE event per part, plus the terminal [DONE] + assert len(chunks) == 4 + assert json.loads(_sse_payload(chunks[0])) == { + "type": "start", + "messageMetadata": {"sessionId": "sess_1"}, + } + assert json.loads(_sse_payload(chunks[1])) == { + "type": "text-delta", + "id": "t1", + "delta": "hi", + } + assert chunks[-1] == "data: [DONE]\n\n" + + def test_done_is_emitted_for_an_empty_stream(self): + async def parts(): + return + yield # pragma: no cover — makes this an async generator + + chunks = asyncio.run(_collect(vercel_sse_stream(parts()))) + assert chunks == ["data: [DONE]\n\n"] + + def test_make_stream_response_vercel_sets_headers_and_media_type(self): + async def parts(): + yield {"type": "start"} + + response = WorkflowStreamingResponse(generator=lambda: parts()) + res = _make_stream_response(response, "vercel") + + assert res.media_type == "text/event-stream" + assert res.headers["x-vercel-ai-ui-message-stream"] == "v1" + assert res.headers["cache-control"] == "no-cache" + assert res.headers["x-accel-buffering"] == "no" From 741fc7365b6ea450e658dc8ebce52205ecb213a3 Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Fri, 19 Jun 2026 22:10:54 +0200 Subject: [PATCH 2/4] fix(sdk): validate agent runner configuration --- sdks/python/agenta/sdk/agents/__init__.py | 7 ++- .../sdk/agents/adapters/_runner_config.py | 40 +++++++++++++ .../agenta/sdk/agents/adapters/in_process.py | 10 +++- .../agenta/sdk/agents/adapters/rivet.py | 10 +++- sdks/python/agenta/sdk/agents/dtos.py | 2 +- sdks/python/agenta/sdk/agents/errors.py | 10 +++- .../unit/agents/test_dtos_agent_config.py | 20 +++++++ .../unit/agents/test_harness_adapters.py | 2 +- .../unit/agents/test_runner_adapter_config.py | 60 +++++++++++++++++++ 9 files changed, 151 insertions(+), 10 deletions(-) create mode 100644 sdks/python/agenta/sdk/agents/adapters/_runner_config.py create mode 100644 sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py diff --git a/sdks/python/agenta/sdk/agents/__init__.py b/sdks/python/agenta/sdk/agents/__init__.py index b1cd4370d2..6dc3bd3196 100644 --- a/sdks/python/agenta/sdk/agents/__init__.py +++ b/sdks/python/agenta/sdk/agents/__init__.py @@ -48,7 +48,11 @@ TraceContext, to_messages, ) -from .errors import ToolResolutionError, UnsupportedHarnessError +from .errors import ( + AgentRunnerConfigurationError, + ToolResolutionError, + UnsupportedHarnessError, +) from .interfaces import ( Backend, Environment, @@ -170,6 +174,7 @@ "Environment", "Harness", # Errors + "AgentRunnerConfigurationError", "UnsupportedHarnessError", "ToolResolutionError", # Adapters diff --git a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py new file mode 100644 index 0000000000..94398ae3f8 --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py @@ -0,0 +1,40 @@ +"""Shared constructor validation for runner-backed adapters.""" + +from __future__ import annotations + +from pathlib import Path +from typing import List, Optional, Sequence + +from ..errors import AgentRunnerConfigurationError + +DEFAULT_RUNNER_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"] +RUNNER_CLI_PATH = Path("src") / "cli.ts" + + +def resolve_runner_command( + *, + backend_name: str, + url: Optional[str], + command: Optional[Sequence[str]], + cwd: Optional[str], +) -> List[str]: + if url: + return list(command) if command is not None else list(DEFAULT_RUNNER_COMMAND) + if command is not None: + return list(command) + if not cwd: + raise AgentRunnerConfigurationError( + f"{backend_name} requires a runner transport: pass url for an HTTP runner, " + "pass command for a custom subprocess runner, or pass cwd pointing to a " + f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}." + ) + + cli_path = Path(cwd) / RUNNER_CLI_PATH + if not cli_path.is_file(): + raise AgentRunnerConfigurationError( + f"{backend_name} could not find runner CLI at {cli_path}. Pass url for an " + "HTTP runner, pass command for a custom subprocess runner, or set cwd to a " + f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}." + ) + + return list(DEFAULT_RUNNER_COMMAND) diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py index bfd1528bd7..aef8d7bc64 100644 --- a/sdks/python/agenta/sdk/agents/adapters/in_process.py +++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py @@ -32,8 +32,7 @@ request_to_wire, result_from_wire, ) - -_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"] +from ._runner_config import resolve_runner_command class InProcessSandbox(Sandbox): @@ -127,7 +126,12 @@ def __init__( timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")), ) -> None: self._url = url - self._command: List[str] = list(command or _DEFAULT_COMMAND) + self._command: List[str] = resolve_runner_command( + backend_name=type(self).__name__, + url=url, + command=command, + cwd=cwd, + ) self._cwd = cwd self._timeout = timeout diff --git a/sdks/python/agenta/sdk/agents/adapters/rivet.py b/sdks/python/agenta/sdk/agents/adapters/rivet.py index 2316eb0dea..78dbee0635 100644 --- a/sdks/python/agenta/sdk/agents/adapters/rivet.py +++ b/sdks/python/agenta/sdk/agents/adapters/rivet.py @@ -32,8 +32,7 @@ request_to_wire, result_from_wire, ) - -_DEFAULT_COMMAND = ["pnpm", "exec", "tsx", "src/cli.ts"] +from ._runner_config import resolve_runner_command class RivetSandbox(Sandbox): @@ -128,7 +127,12 @@ def __init__( ) -> None: self._sandbox = sandbox self._url = url - self._command: List[str] = list(command or _DEFAULT_COMMAND) + self._command: List[str] = resolve_runner_command( + backend_name=type(self).__name__, + url=url, + command=command, + cwd=cwd, + ) self._cwd = cwd self._timeout = timeout diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py index 0a050b4cb1..d066eee132 100644 --- a/sdks/python/agenta/sdk/agents/dtos.py +++ b/sdks/python/agenta/sdk/agents/dtos.py @@ -677,7 +677,7 @@ def _parse_agent_fields( or agent.get("instructions") or defaults.instructions, agent.get("model") or defaults.model, - agent.get("tools"), + agent.get("tools") if agent.get("tools") is not None else defaults.tools, ) prompt_cfg = params.get("prompt") diff --git a/sdks/python/agenta/sdk/agents/errors.py b/sdks/python/agenta/sdk/agents/errors.py index b9f136a472..7517df9061 100644 --- a/sdks/python/agenta/sdk/agents/errors.py +++ b/sdks/python/agenta/sdk/agents/errors.py @@ -7,7 +7,11 @@ from .dtos import HarnessType from .tools.errors import ToolResolutionError -__all__ = ["UnsupportedHarnessError", "ToolResolutionError"] +__all__ = [ + "AgentRunnerConfigurationError", + "UnsupportedHarnessError", + "ToolResolutionError", +] if TYPE_CHECKING: from .interfaces import Backend @@ -24,3 +28,7 @@ def __init__(self, harness: HarnessType, backend: "Backend") -> None: ) self.harness = harness self.backend = backend + + +class AgentRunnerConfigurationError(RuntimeError): + """Raised when a runner-backed adapter lacks a usable transport configuration.""" diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py index f4bacd92d4..e4cf65716e 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py +++ b/sdks/python/oss/tests/pytest/unit/agents/test_dtos_agent_config.py @@ -90,6 +90,26 @@ def test_from_params_falls_back_to_defaults(): assert config.tools == [BuiltinToolConfig(name="d")] +def test_from_params_agent_element_preserves_default_tools_when_absent(): + config = AgentConfig.from_params( + {"agent": {"instructions": "I", "model": "M"}}, + defaults=_DEFAULTS, + ) + + assert config.instructions == "I" + assert config.model == "M" + assert config.tools == [BuiltinToolConfig(name="d")] + + +def test_from_params_agent_element_empty_tools_clears_defaults(): + config = AgentConfig.from_params( + {"agent": {"tools": []}}, + defaults=_DEFAULTS, + ) + + assert config.tools == [] + + def test_from_params_coerces_single_tool_dict_to_list(): config = AgentConfig.from_params({"agent": {"tools": {"name": "solo"}}}) assert config.tools == [BuiltinToolConfig(name="solo")] diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py index 7e68d3af93..fe0eb52fbe 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py +++ b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py @@ -159,7 +159,7 @@ def test_agenta_passes_through_user_pi_options(make_env): def test_agenta_is_in_process_pi_supported(): from agenta.sdk.agents import InProcessPiBackend - assert InProcessPiBackend().supports(HarnessType.AGENTA) + assert InProcessPiBackend(url="http://runner").supports(HarnessType.AGENTA) # ------------------------------------------------------------------------- Claude diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py new file mode 100644 index 0000000000..f71863915e --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py @@ -0,0 +1,60 @@ +"""Constructor validation for runner-backed backend adapters.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +from agenta.sdk.agents import ( + AgentRunnerConfigurationError, + InProcessPiBackend, + RivetBackend, +) + + +@pytest.fixture +def runner_dir(tmp_path: Path) -> Path: + cli = tmp_path / "src" / "cli.ts" + cli.parent.mkdir() + cli.write_text("console.log('runner')\n", encoding="utf-8") + return tmp_path + + +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +def test_default_subprocess_requires_cwd(backend_cls): + with pytest.raises(AgentRunnerConfigurationError, match="pass cwd"): + backend_cls() + + +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +def test_default_subprocess_requires_runner_cli(backend_cls, tmp_path: Path): + with pytest.raises(AgentRunnerConfigurationError, match="src/cli.ts"): + backend_cls(cwd=str(tmp_path)) + + +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +def test_default_subprocess_accepts_runner_wrapper_cwd(backend_cls, runner_dir: Path): + backend = backend_cls(cwd=str(runner_dir)) + + assert backend._cwd == str(runner_dir) + assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"] + + +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +def test_http_transport_does_not_require_runner_wrapper(backend_cls): + backend = backend_cls(url="http://agent-pi:8765") + + assert backend._url == "http://agent-pi:8765" + assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"] + + +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +def test_custom_command_does_not_require_runner_wrapper(backend_cls): + command = [sys.executable, "-m", "runner"] + + backend = backend_cls(command=command) + + assert backend._command == command + assert backend._cwd is None From 2a7c1299b265d0248362c23c9ae91beb11a7c034 Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Mon, 22 Jun 2026 12:32:03 +0200 Subject: [PATCH 3/4] refactor(sdk): rename rivet adapter/backend to sandbox-agent --- sdks/python/agenta/__init__.py | 2 +- sdks/python/agenta/sdk/agents/__init__.py | 8 ++-- .../agenta/sdk/agents/adapters/__init__.py | 6 +-- .../sdk/agents/adapters/_runner_config.py | 4 +- .../agenta/sdk/agents/adapters/in_process.py | 8 ++-- .../agenta/sdk/agents/adapters/local.py | 13 ++++-- .../adapters/{rivet.py => sandbox_agent.py} | 46 +++++++++++-------- sdks/python/agenta/sdk/agents/dtos.py | 2 +- sdks/python/agenta/sdk/agents/interfaces.py | 2 +- .../agenta/sdk/agents/utils/ts_runner.py | 2 +- sdks/python/agenta/sdk/agents/utils/wire.py | 2 +- .../agents/golden/run_request.claude.json | 2 +- .../unit/agents/test_harness_adapters.py | 9 ++++ .../unit/agents/test_runner_adapter_config.py | 16 +++---- .../pytest/unit/agents/test_wire_contract.py | 2 +- 15 files changed, 72 insertions(+), 52 deletions(-) rename sdks/python/agenta/sdk/agents/adapters/{rivet.py => sandbox_agent.py} (78%) diff --git a/sdks/python/agenta/__init__.py b/sdks/python/agenta/__init__.py index dc01c3396a..15d1af84a4 100644 --- a/sdks/python/agenta/__init__.py +++ b/sdks/python/agenta/__init__.py @@ -63,7 +63,7 @@ InProcessPiBackend, LocalBackend, PiHarness, - RivetBackend, + SandboxAgentBackend, RunSelection, SessionConfig, make_harness, diff --git a/sdks/python/agenta/sdk/agents/__init__.py b/sdks/python/agenta/sdk/agents/__init__.py index 6dc3bd3196..534ca0f650 100644 --- a/sdks/python/agenta/sdk/agents/__init__.py +++ b/sdks/python/agenta/sdk/agents/__init__.py @@ -5,7 +5,7 @@ - ``dtos.py`` — data contracts (``AgentConfig``, ``SessionConfig``, ``Message``, ...). - ``interfaces.py`` — the ports (ABCs): ``Backend``, ``Environment``, ``Sandbox``, ``Session``, ``Harness``. -- ``adapters/`` — implementations: ``RivetBackend`` / ``InProcessPiBackend`` / ``LocalBackend`` +- ``adapters/`` — implementations: ``SandboxAgentBackend`` / ``InProcessPiBackend`` / ``LocalBackend`` and ``PiHarness`` / ``ClaudeHarness``. - ``utils/`` — shared plumbing (the ``/run`` wire and the transports to the TS runner). @@ -16,7 +16,7 @@ cfg = ag.ConfigManager.get_from_registry(app_slug="my-agent") agent = ag.AgentConfig.from_params(cfg) - harness = ag.PiHarness(ag.Environment(ag.RivetBackend())) + harness = ag.PiHarness(ag.Environment(ag.SandboxAgentBackend())) result = await harness.prompt(ag.SessionConfig(agent=agent), [Message(role="user", content="hi")]) """ @@ -26,7 +26,7 @@ InProcessPiBackend, LocalBackend, PiHarness, - RivetBackend, + SandboxAgentBackend, make_harness, ) from .dtos import ( @@ -178,7 +178,7 @@ "UnsupportedHarnessError", "ToolResolutionError", # Adapters - "RivetBackend", + "SandboxAgentBackend", "InProcessPiBackend", "LocalBackend", "PiHarness", diff --git a/sdks/python/agenta/sdk/agents/adapters/__init__.py b/sdks/python/agenta/sdk/agents/adapters/__init__.py index 30e555d82b..9cce3f7240 100644 --- a/sdks/python/agenta/sdk/agents/adapters/__init__.py +++ b/sdks/python/agenta/sdk/agents/adapters/__init__.py @@ -1,6 +1,6 @@ """Adapters: concrete implementations of the agent runtime ports. -- Backend adapters: ``RivetBackend`` (rivet over ACP), ``InProcessPiBackend`` (in-process Pi, +- Backend adapters: ``SandboxAgentBackend`` (sandbox-agent over ACP), ``InProcessPiBackend`` (in-process Pi, the reference backend), ``LocalBackend`` (standalone SDK runs; not yet implemented). - Harness adapters: ``PiHarness``, ``ClaudeHarness``, ``AgentaHarness`` (+ ``make_harness``). - HTTP/browser protocol adapters live in subpackages, e.g. ``adapters.vercel``. @@ -11,10 +11,10 @@ from .harnesses import AgentaHarness, ClaudeHarness, PiHarness, make_harness from .in_process import InProcessPiBackend from .local import LocalBackend -from .rivet import RivetBackend +from .sandbox_agent import SandboxAgentBackend __all__ = [ - "RivetBackend", + "SandboxAgentBackend", "InProcessPiBackend", "LocalBackend", "PiHarness", diff --git a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py index 94398ae3f8..b3daab6e2e 100644 --- a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py +++ b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py @@ -26,7 +26,7 @@ def resolve_runner_command( raise AgentRunnerConfigurationError( f"{backend_name} requires a runner transport: pass url for an HTTP runner, " "pass command for a custom subprocess runner, or pass cwd pointing to a " - f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}." + f"runner directory containing {RUNNER_CLI_PATH.as_posix()}." ) cli_path = Path(cwd) / RUNNER_CLI_PATH @@ -34,7 +34,7 @@ def resolve_runner_command( raise AgentRunnerConfigurationError( f"{backend_name} could not find runner CLI at {cli_path}. Pass url for an " "HTTP runner, pass command for a custom subprocess runner, or set cwd to a " - f"runner wrapper containing {RUNNER_CLI_PATH.as_posix()}." + f"runner directory containing {RUNNER_CLI_PATH.as_posix()}." ) return list(DEFAULT_RUNNER_COMMAND) diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py index aef8d7bc64..3a7b1a9110 100644 --- a/sdks/python/agenta/sdk/agents/adapters/in_process.py +++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py @@ -1,11 +1,11 @@ -"""InProcessPiBackend: drive Pi in-process through the TS runner, no rivet daemon. +"""InProcessPiBackend: drive Pi in-process through the TS runner, no sandbox-agent daemon. This was the first backend implementation and stays as the simplest one: a single harness (Pi), a single place (local), the legacy in-process Pi engine (``engines/pi.ts``). It is the reference to read when writing a new backend. It is its own class and hard-codes its differences (the ``pi`` engine, Pi-only support, -local-only). It is deliberately NOT a subclass of ``RivetBackend``; the two are different +local-only). It is deliberately NOT a subclass of ``SandboxAgentBackend``; the two are different engines that happen to share the ``utils`` wire and transport helpers. """ @@ -111,7 +111,7 @@ def stream(self, messages: Sequence[Message]) -> AgentRun: class InProcessPiBackend(Backend): """The in-process Pi engine: drives the Pi SDK directly in the TS runner. Pi only, local - only, no rivet daemon.""" + only, no sandbox-agent daemon.""" # Agenta is Pi with an opinion: same in-process engine, so this backend drives it too. supported_harnesses = frozenset({HarnessType.PI, HarnessType.AGENTA}) @@ -123,7 +123,7 @@ def __init__( url: Optional[str] = None, command: Optional[Sequence[str]] = None, cwd: Optional[str] = None, - timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")), + timeout: float = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180")), ) -> None: self._url = url self._command: List[str] = resolve_runner_command( diff --git a/sdks/python/agenta/sdk/agents/adapters/local.py b/sdks/python/agenta/sdk/agents/adapters/local.py index 5435ea4751..d0c304c793 100644 --- a/sdks/python/agenta/sdk/agents/adapters/local.py +++ b/sdks/python/agenta/sdk/agents/adapters/local.py @@ -1,16 +1,21 @@ -"""LocalBackend: run a harness on this machine, no rivet daemon and no Agenta sidecar. +"""LocalBackend: run a harness on this machine, no sandbox-agent daemon and no Agenta runner. This is the backend a standalone SDK user gets. It is two mechanisms, one per harness, which is exactly a backend's "plumbing per harness" job: -- Pi -> the bundled JS runner (the in-process Pi engine), shipped inside the wheel, run - with ``node``. +- Pi -> the Node agent runner (``services/agent``), driven over the subprocess transport. - Claude -> the pure-Python ``claude-agent-sdk``, in-process, no TS bridge. +NOTE on packaging: the Node runner is NOT part of this Python wheel (``pip install agenta`` +stays pure Python; the wheel contains zero ``.ts``/``.js``). How a standalone Pi user obtains +the runner -- an ``npx`` npm package, a local checkout, or a Docker sidecar over HTTP -- is an +open distribution decision; see ``docs/design/agent-workflows/typescript-structure/``. Do NOT +silently bundle a JS runner into the wheel. + NOT YET IMPLEMENTED. Tracked as Phase 3 (Pi) and Phase 4 (Claude) in ``docs/design/agent-workflows/scratch/sdk-local-backend/plan.md``. The class is present so the adapter layout is complete and the port shape is visible; the methods raise until the -bundling build step and the ``claude-agent-sdk`` wiring land. +runner-delivery decision and the ``claude-agent-sdk`` wiring land. """ from __future__ import annotations diff --git a/sdks/python/agenta/sdk/agents/adapters/rivet.py b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py similarity index 78% rename from sdks/python/agenta/sdk/agents/adapters/rivet.py rename to sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py index 78dbee0635..5fbd7898eb 100644 --- a/sdks/python/agenta/sdk/agents/adapters/rivet.py +++ b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py @@ -1,9 +1,11 @@ -"""RivetBackend: drive a harness over ACP via the TypeScript rivet runner. +"""SandboxAgentBackend: drive a harness over ACP via the TypeScript sandbox-agent runner. -This backend hard-codes that it is the rivet engine. It reaches the same runner the deployed +This backend hard-codes that it is the sandbox-agent engine. It reaches the same runner the deployed sidecar runs (HTTP when a ``url`` is set, otherwise a subprocess CLI), and the runner starts -the rivet daemon, the ACP adapter, and the harness. Supports Pi and Claude. The ``sandbox`` -axis (``local`` / ``daytona``) is a real runtime choice, so it stays a constructor arg. +the sandbox-agent daemon, the ACP adapter, and the harness. Supports Pi, Claude, and Agenta (Pi with +an opinion, which the runner drives on the same ``pi`` ACP agent plus forced skills). The +``sandbox`` axis (``local`` / ``daytona``) is a real runtime choice, so it stays a constructor +arg. It is its own class, not a subclass of any other backend; it shares only the ``utils`` wire and transport helpers. @@ -35,7 +37,7 @@ from ._runner_config import resolve_runner_command -class RivetSandbox(Sandbox): +class SandboxAgentSandbox(Sandbox): """Carries the sandbox axis for the run. The real sandbox (a local daemon or a Daytona VM) is created inside the TS runner; here we hold the axis and buffer provisioning files (today AGENTS.md rides the wire, so this is informational).""" @@ -48,13 +50,13 @@ async def add_files(self, files: Mapping[str, bytes]) -> None: self.files.update(files) -class RivetSession(Session): +class SandboxAgentSession(Session): """One turn-per-prompt session. Each prompt sends one ``/run`` (cold + replay).""" def __init__( self, - backend: "RivetBackend", - sandbox: RivetSandbox, + backend: "SandboxAgentBackend", + sandbox: SandboxAgentSandbox, config: HarnessAgentConfig, *, harness: HarnessType, @@ -77,7 +79,7 @@ def id(self) -> Optional[str]: def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]: """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``).""" return request_to_wire( - engine=RivetBackend._ENGINE, + engine=SandboxAgentBackend._ENGINE, harness=self._harness, sandbox=self._sandbox.sandbox_id, config=self._config, @@ -110,11 +112,13 @@ def stream(self, messages: Sequence[Message]) -> AgentRun: return AgentRun(records).on_result(self._absorb_result) -class RivetBackend(Backend): - """The rivet engine: a harness over ACP through the TS runner. Pi and Claude.""" +class SandboxAgentBackend(Backend): + """The sandbox-agent engine: a harness over ACP through the TS runner. Pi, Claude, and Agenta.""" - supported_harnesses = frozenset({HarnessType.PI, HarnessType.CLAUDE}) - _ENGINE = "rivet" # hard-coded engine identity, not a constructor arg + supported_harnesses = frozenset( + {HarnessType.PI, HarnessType.CLAUDE, HarnessType.AGENTA} + ) + _ENGINE = "sandbox-agent" # hard-coded engine identity, not a constructor arg def __init__( self, @@ -123,7 +127,7 @@ def __init__( url: Optional[str] = None, command: Optional[Sequence[str]] = None, cwd: Optional[str] = None, - timeout: float = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")), + timeout: float = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180")), ) -> None: self._sandbox = sandbox self._url = url @@ -136,8 +140,8 @@ def __init__( self._cwd = cwd self._timeout = timeout - async def create_sandbox(self) -> RivetSandbox: - return RivetSandbox(self._sandbox) + async def create_sandbox(self) -> SandboxAgentSandbox: + return SandboxAgentSandbox(self._sandbox) async def create_session( self, @@ -148,10 +152,12 @@ async def create_session( secrets: Optional[Mapping[str, str]] = None, trace: Optional[TraceContext] = None, session_id: Optional[str] = None, - ) -> RivetSession: - if not isinstance(sandbox, RivetSandbox): - raise TypeError("RivetBackend.create_session requires a RivetSandbox") - return RivetSession( + ) -> SandboxAgentSession: + if not isinstance(sandbox, SandboxAgentSandbox): + raise TypeError( + "SandboxAgentBackend.create_session requires a SandboxAgentSandbox" + ) + return SandboxAgentSession( self, sandbox, config, diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py index d066eee132..db089eec67 100644 --- a/sdks/python/agenta/sdk/agents/dtos.py +++ b/sdks/python/agenta/sdk/agents/dtos.py @@ -56,7 +56,7 @@ def coerce(cls, value: "HarnessType | str") -> "HarnessType": class HarnessCapabilities(BaseModel): - """What a harness can do, probed by the backend (rivet ``AgentCapabilities``). + """What a harness can do, probed by the sandbox-agent backend. Adapters branch on these flags rather than the harness name (no ``if pi``): deliver tools over MCP only when ``mcp_tools`` is set, skip image blocks without ``images``. diff --git a/sdks/python/agenta/sdk/agents/interfaces.py b/sdks/python/agenta/sdk/agents/interfaces.py index a7df7280d5..75c9858d22 100644 --- a/sdks/python/agenta/sdk/agents/interfaces.py +++ b/sdks/python/agenta/sdk/agents/interfaces.py @@ -4,7 +4,7 @@ - ``Backend`` is the engine. It declares which harnesses it can drive (``supported_harnesses``), owns sandbox + session lifecycle, and is pure plumbing: it - takes an already-harness-shaped config and launches it. Adapters: ``RivetBackend``, + takes an already-harness-shaped config and launches it. Adapters: ``SandboxAgentBackend``, ``InProcessPiBackend``, ``LocalBackend``. - ``Sandbox`` is where a session's process tree lives, plus the provisioning verb (``add_files``). diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py index f7a5497d1c..b95f708ba6 100644 --- a/sdks/python/agenta/sdk/agents/utils/ts_runner.py +++ b/sdks/python/agenta/sdk/agents/utils/ts_runner.py @@ -11,7 +11,7 @@ import os from typing import Any, AsyncIterator, Dict, Optional, Sequence -_DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_TIMEOUT", "180")) +_DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180")) async def deliver_http( diff --git a/sdks/python/agenta/sdk/agents/utils/wire.py b/sdks/python/agenta/sdk/agents/utils/wire.py index b7558a4530..1b203ed287 100644 --- a/sdks/python/agenta/sdk/agents/utils/wire.py +++ b/sdks/python/agenta/sdk/agents/utils/wire.py @@ -1,6 +1,6 @@ """The ``/run`` wire contract: our DTOs <-> the runner's camelCase JSON. -Shared by the runner-backed adapters (rivet, in-process Pi). The TS side mirrors these names +Shared by the runner-backed adapters (sandbox-agent, in-process Pi). The TS side mirrors these names in ``services/agent/src/protocol.ts``, and the contract is pinned by shared golden fixtures under ``sdks/python/oss/tests/pytest/unit/agents/golden/`` (see ``test_wire_contract.py``). The caller passes the engine id explicitly, since each adapter hard-codes its own. diff --git a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json index 318722efe5..14944896fb 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json +++ b/sdks/python/oss/tests/pytest/unit/agents/golden/run_request.claude.json @@ -1,5 +1,5 @@ { - "backend": "rivet", + "backend": "sandbox-agent", "harness": "claude", "sandbox": "local", "sessionId": null, diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py index fe0eb52fbe..0b3b64ad43 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py +++ b/sdks/python/oss/tests/pytest/unit/agents/test_harness_adapters.py @@ -162,6 +162,15 @@ def test_agenta_is_in_process_pi_supported(): assert InProcessPiBackend(url="http://runner").supports(HarnessType.AGENTA) +def test_agenta_is_sandbox_agent_supported(): + # Agenta is Pi with an opinion, so the sandbox-agent backend drives it too (on the `pi` ACP + # agent, with the runner laying the forced skills into the sandbox). This is what lets + # `agenta` run on a non-local sandbox (e.g. daytona) instead of raising. + from agenta.sdk.agents import SandboxAgentBackend + + assert SandboxAgentBackend(url="http://runner").supports(HarnessType.AGENTA) + + # ------------------------------------------------------------------------- Claude diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py index f71863915e..b60575fc8c 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py +++ b/sdks/python/oss/tests/pytest/unit/agents/test_runner_adapter_config.py @@ -10,7 +10,7 @@ from agenta.sdk.agents import ( AgentRunnerConfigurationError, InProcessPiBackend, - RivetBackend, + SandboxAgentBackend, ) @@ -22,19 +22,19 @@ def runner_dir(tmp_path: Path) -> Path: return tmp_path -@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend]) def test_default_subprocess_requires_cwd(backend_cls): with pytest.raises(AgentRunnerConfigurationError, match="pass cwd"): backend_cls() -@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend]) def test_default_subprocess_requires_runner_cli(backend_cls, tmp_path: Path): with pytest.raises(AgentRunnerConfigurationError, match="src/cli.ts"): backend_cls(cwd=str(tmp_path)) -@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend]) def test_default_subprocess_accepts_runner_wrapper_cwd(backend_cls, runner_dir: Path): backend = backend_cls(cwd=str(runner_dir)) @@ -42,15 +42,15 @@ def test_default_subprocess_accepts_runner_wrapper_cwd(backend_cls, runner_dir: assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"] -@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend]) def test_http_transport_does_not_require_runner_wrapper(backend_cls): - backend = backend_cls(url="http://agent-pi:8765") + backend = backend_cls(url="http://sandbox-agent:8765") - assert backend._url == "http://agent-pi:8765" + assert backend._url == "http://sandbox-agent:8765" assert backend._command == ["pnpm", "exec", "tsx", "src/cli.ts"] -@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, RivetBackend]) +@pytest.mark.parametrize("backend_cls", [InProcessPiBackend, SandboxAgentBackend]) def test_custom_command_does_not_require_runner_wrapper(backend_cls): command = [sys.executable, "-m", "runner"] diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py index 4aa24a86b1..c7f9497495 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py +++ b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py @@ -96,7 +96,7 @@ def _claude_payload(): permission_policy="deny", ) return request_to_wire( - engine="rivet", + engine="sandbox-agent", harness=HarnessType.CLAUDE, sandbox="local", config=config, From 0beb1207f7b5e8837dac38b74feb58abe54fc068 Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Mon, 22 Jun 2026 14:16:26 +0200 Subject: [PATCH 4/4] fix(sdk): address review feedback (locking, input validation, stream/error handling) --- .../sdk/agents/adapters/_runner_config.py | 18 +++++++++-- .../agenta/sdk/agents/adapters/in_process.py | 5 +++- .../sdk/agents/adapters/sandbox_agent.py | 7 ++++- .../sdk/agents/adapters/vercel/routing.py | 12 ++++++++ sdks/python/agenta/sdk/agents/dtos.py | 18 +++++++---- sdks/python/agenta/sdk/agents/interfaces.py | 6 +++- sdks/python/agenta/sdk/agents/mcp/models.py | 8 +++++ sdks/python/agenta/sdk/agents/streaming.py | 8 +++++ sdks/python/agenta/sdk/agents/tools/compat.py | 7 ++++- .../agenta/sdk/agents/utils/ts_runner.py | 30 ++++++++++++++++--- .../pytest/unit/agents/tools/test_parsing.py | 25 ++++++++++++++++ .../pytest/utils/test_messages_endpoint.py | 14 +++++++-- 12 files changed, 141 insertions(+), 17 deletions(-) diff --git a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py index b3daab6e2e..a8b01531e6 100644 --- a/sdks/python/agenta/sdk/agents/adapters/_runner_config.py +++ b/sdks/python/agenta/sdk/agents/adapters/_runner_config.py @@ -18,10 +18,24 @@ def resolve_runner_command( command: Optional[Sequence[str]], cwd: Optional[str], ) -> List[str]: + def _validated_command(raw: Sequence[str]) -> List[str]: + cmd = list(raw) + if not cmd: + raise AgentRunnerConfigurationError( + f"{backend_name} received an empty command. Pass a non-empty command, " + "pass url for an HTTP runner, or set cwd to a runner directory containing " + f"{RUNNER_CLI_PATH.as_posix()}." + ) + return cmd + if url: - return list(command) if command is not None else list(DEFAULT_RUNNER_COMMAND) + return ( + _validated_command(command) + if command is not None + else list(DEFAULT_RUNNER_COMMAND) + ) if command is not None: - return list(command) + return _validated_command(command) if not cwd: raise AgentRunnerConfigurationError( f"{backend_name} requires a runner transport: pass url for an HTTP runner, " diff --git a/sdks/python/agenta/sdk/agents/adapters/in_process.py b/sdks/python/agenta/sdk/agents/adapters/in_process.py index 3a7b1a9110..114d0aa79f 100644 --- a/sdks/python/agenta/sdk/agents/adapters/in_process.py +++ b/sdks/python/agenta/sdk/agents/adapters/in_process.py @@ -54,12 +54,14 @@ def __init__( backend: "InProcessPiBackend", config: HarnessAgentConfig, *, + harness: HarnessType, secrets: Optional[Mapping[str, str]], trace: Optional[TraceContext], session_id: Optional[str], ) -> None: self._backend = backend self._config = config + self._harness = harness self._secrets = dict(secrets or {}) self._trace = trace self._session_id = session_id @@ -72,7 +74,7 @@ def _wire_payload(self, messages: Sequence[Message]) -> Dict[str, Any]: """The ``/run`` request JSON for this turn (shared by ``prompt`` and ``stream``).""" return request_to_wire( engine=InProcessPiBackend._ENGINE, - harness=HarnessType.PI, + harness=self._harness, sandbox="local", config=self._config, messages=messages, @@ -151,6 +153,7 @@ async def create_session( return InProcessPiSession( self, config, + harness=harness, secrets=secrets, trace=trace, session_id=session_id, diff --git a/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py index 5fbd7898eb..24f0f84781 100644 --- a/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py +++ b/sdks/python/agenta/sdk/agents/adapters/sandbox_agent.py @@ -13,6 +13,7 @@ from __future__ import annotations +import logging import os from typing import Any, AsyncIterator, Dict, List, Mapping, Optional, Sequence @@ -36,6 +37,8 @@ ) from ._runner_config import resolve_runner_command +_log = logging.getLogger(__name__) + class SandboxAgentSandbox(Sandbox): """Carries the sandbox axis for the run. The real sandbox (a local daemon or a Daytona @@ -193,4 +196,6 @@ def _emit_events(result: AgentResult, on_event: Optional[EventSink]) -> None: try: on_event(event) except Exception: # pylint: disable=broad-except - pass + # The sink is caller-provided; don't let it crash the result. Log at debug so a + # misbehaving sink is still diagnosable. + _log.debug("event sink raised; suppressing", exc_info=True) diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py index a854ca0460..5f7c2cc37f 100644 --- a/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py +++ b/sdks/python/agenta/sdk/agents/adapters/vercel/routing.py @@ -156,6 +156,18 @@ def make_load_session_endpoint( store = session_store or NoopSessionStore() async def load_session_endpoint(req: Request, request: LoadSessionRequest): + # Gate the id with the same charset/length bound as ``/messages`` before it reaches + # the store, so both endpoints share one trust boundary. Unlike ``/messages`` we never + # mint here: loading needs an existing id, so an absent/invalid one is a 400. + if not _SESSION_ID_RE.match(request.session_id or ""): + return set_vercel_message_protocol_headers( + JSONResponse( + status_code=400, + content={ + "detail": "session_id violates the allowed charset/length" + }, + ) + ) messages = await store.load(request.session_id) response = LoadSessionResponse( session_id=request.session_id, diff --git a/sdks/python/agenta/sdk/agents/dtos.py b/sdks/python/agenta/sdk/agents/dtos.py index db089eec67..44629c3bb9 100644 --- a/sdks/python/agenta/sdk/agents/dtos.py +++ b/sdks/python/agenta/sdk/agents/dtos.py @@ -11,7 +11,7 @@ from __future__ import annotations from enum import Enum -from typing import Any, Callable, ClassVar, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, ClassVar, Dict, List, Literal, Optional, Tuple, Union from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator @@ -47,7 +47,7 @@ def coerce(cls, value: "HarnessType | str") -> "HarnessType": # Permission policy for harness tool use in a headless run. ``auto`` approves (tools are # backend-resolved and trusted, no human to prompt); ``deny`` rejects. -PermissionPolicy = str # "auto" | "deny" +PermissionPolicy = Literal["auto", "deny"] # --------------------------------------------------------------------------- @@ -180,10 +180,18 @@ def from_raw(cls, raw: Any) -> "ContentBlock": class Message(BaseModel): - """A chat message in the conversation. ``content`` is text or content blocks. + """A chat message in an agent-runtime conversation. ``content`` is text or content blocks. - This is the runtime's own message type, distinct from the SDK's prompt ``Message`` - (``agenta.Message``); the two serve different layers. + Two unrelated types share the name ``Message`` in this SDK, on purpose, for two layers: + + - this one — the agent runtime's conversation message, imported from + ``agenta.sdk.agents`` (it is deliberately *not* re-exported as ``agenta.Message``); + - the prompt-template message ``agenta.Message`` (``agenta.sdk.utils.types.Message``), + used by the prompt/completion layer. + + They never appear together in the same call, so the namespacing (top-level vs. + ``agenta.sdk.agents``) is what keeps them apart. Import the agents one explicitly when you + need both in one module. """ role: str diff --git a/sdks/python/agenta/sdk/agents/interfaces.py b/sdks/python/agenta/sdk/agents/interfaces.py index 75c9858d22..e03fb646a6 100644 --- a/sdks/python/agenta/sdk/agents/interfaces.py +++ b/sdks/python/agenta/sdk/agents/interfaces.py @@ -17,6 +17,7 @@ from __future__ import annotations +import asyncio from abc import ABC, abstractmethod from typing import ClassVar, FrozenSet, Mapping, Optional, Sequence @@ -185,6 +186,7 @@ def __init__(self, backend: Backend, *, sandbox_per_session: bool = True) -> Non self._backend = backend self._sandbox_per_session = sandbox_per_session self._shared: Optional[Sandbox] = None + self._shared_lock = asyncio.Lock() @property def backend(self) -> Backend: @@ -203,7 +205,9 @@ async def _sandbox(self) -> Sandbox: if self._sandbox_per_session: return await self._backend.create_sandbox() if self._shared is None: - self._shared = await self._backend.create_sandbox() + async with self._shared_lock: + if self._shared is None: + self._shared = await self._backend.create_sandbox() return self._shared async def create_session( diff --git a/sdks/python/agenta/sdk/agents/mcp/models.py b/sdks/python/agenta/sdk/agents/mcp/models.py index e4df7f87e5..37c3f6806b 100644 --- a/sdks/python/agenta/sdk/agents/mcp/models.py +++ b/sdks/python/agenta/sdk/agents/mcp/models.py @@ -39,6 +39,14 @@ class ResolvedMCPServer(BaseModel): url: Optional[str] = None tools: List[str] = Field(default_factory=list) + @model_validator(mode="after") + def _validate_transport(self) -> "ResolvedMCPServer": + if self.transport == "stdio" and not self.command: + raise ValueError("stdio MCP server requires command") + if self.transport == "http" and not self.url: + raise ValueError("http MCP server requires url") + return self + def to_wire(self) -> Dict[str, Any]: wire: Dict[str, Any] = { "name": self.name, diff --git a/sdks/python/agenta/sdk/agents/streaming.py b/sdks/python/agenta/sdk/agents/streaming.py index e631d0ecdc..2ae86ea6fc 100644 --- a/sdks/python/agenta/sdk/agents/streaming.py +++ b/sdks/python/agenta/sdk/agents/streaming.py @@ -62,6 +62,7 @@ def on_cleanup(self, cleanup: Cleanup) -> "AgentRun": return self async def __aiter__(self) -> AsyncIterator[AgentEvent]: + saw_terminal = False try: async for record in self._records: kind = record.get("kind") @@ -74,7 +75,14 @@ async def __aiter__(self) -> AsyncIterator[AgentEvent]: self._result = result_from_wire(record.get("result") or {}) for hook in self._result_hooks: hook(self._result) + saw_terminal = True return + if not saw_terminal: + # A truncated stream (runner disconnect/early exit) would otherwise leave + # ``result()`` raising an opaque "not available" later; fail loud here instead. + raise RuntimeError( + "AgentRun stream ended without a terminal result record" + ) finally: for cleanup in self._cleanups: try: diff --git a/sdks/python/agenta/sdk/agents/tools/compat.py b/sdks/python/agenta/sdk/agents/tools/compat.py index e356abfdde..d2ddf16b4b 100644 --- a/sdks/python/agenta/sdk/agents/tools/compat.py +++ b/sdks/python/agenta/sdk/agents/tools/compat.py @@ -51,7 +51,9 @@ def _copy_tool_metadata( ) -> dict[str, Any]: result = dict(target) if "needs_approval" in source: - result["needs_approval"] = bool(source["needs_approval"]) + # Pass the raw value through; the model's bool field coerces it correctly. Using + # ``bool(...)`` here would flip legacy string payloads (``"false"`` -> ``True``). + result["needs_approval"] = source["needs_approval"] if isinstance(source.get("render"), dict): result["render"] = dict(source["render"]) return result @@ -102,6 +104,9 @@ def coerce_tool_configs( on_error: Literal["raise", "collect"] = "raise", ) -> ToolConfigParseResult: """Convert legacy values, either raising or returning structured diagnostics.""" + if on_error not in {"raise", "collect"}: + raise ValueError("on_error must be 'raise' or 'collect'") + tool_configs: list[ToolConfig] = [] diagnostics: list[ToolConfigDiagnostic] = [] for index, value in enumerate(values or []): diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py index b95f708ba6..590f47cd1c 100644 --- a/sdks/python/agenta/sdk/agents/utils/ts_runner.py +++ b/sdks/python/agenta/sdk/agents/utils/ts_runner.py @@ -26,7 +26,9 @@ async def deliver_http( url = base_url.rstrip("/") + "/run" async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post(url, json=payload) - if response.status_code >= 500: + # Any non-2xx is a transport failure; 4xx left to fall through surfaces as an opaque + # JSON parse error instead of a clear runner failure. + if response.status_code >= 400: raise RuntimeError( f"Agent runner HTTP {response.status_code}: {response.text[:1000]}" ) @@ -101,11 +103,12 @@ async def deliver_http_stream( url = base_url.rstrip("/") + "/run" headers = {"Accept": "application/x-ndjson"} + saw_result = False async with httpx.AsyncClient(timeout=timeout) as client: async with client.stream( "POST", url, json=payload, headers=headers ) as response: - if response.status_code >= 500: + if response.status_code >= 400: body = await response.aread() raise RuntimeError( f"Agent runner HTTP {response.status_code}: {body[:1000]!r}" @@ -113,7 +116,12 @@ async def deliver_http_stream( async for line in response.aiter_lines(): line = line.strip() if line: - yield json.loads(line) + record = json.loads(line) + if record.get("kind") == "result": + saw_result = True + yield record + if not saw_result: + raise RuntimeError("Agent runner stream ended without a terminal result record") async def deliver_subprocess_stream( @@ -143,6 +151,7 @@ async def deliver_subprocess_stream( proc.stdin.close() loop = asyncio.get_event_loop() deadline = loop.time() + timeout + saw_result = False try: while True: remaining = deadline - loop.time() @@ -155,8 +164,21 @@ async def deliver_subprocess_stream( break line = raw.decode("utf-8", "replace").strip() if line: - yield json.loads(line) + record = json.loads(line) + if record.get("kind") == "result": + saw_result = True + yield record await proc.wait() + # A clean drain that never produced a terminal result means the runner exited or + # disconnected early; fail loud rather than leaving the consumer without a result. + if not saw_result: + err = b"" + if proc.stderr is not None: + err = await proc.stderr.read() + raise RuntimeError( + "Agent runner stream ended without a terminal result record. " + f"exit={proc.returncode} stderr={err.decode('utf-8', 'replace')[-2000:]}" + ) finally: if proc.returncode is None: proc.kill() diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py index ff6f212f9f..2f707a7ab5 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py +++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py @@ -45,6 +45,31 @@ def test_compat_parser_accepts_playground_gateway_slug_and_metadata(): assert gateway.render == {"kind": "component", "component": "User"} +def test_compat_parser_does_not_flip_string_false_needs_approval(): + # Legacy payloads may carry the flag as the string "false"; it must not coerce to True + # (a plain ``bool("false")`` would). + gateway = coerce_tool_config( + { + "function": {"name": "tools__composio__github__GET_USER__c1"}, + "needs_approval": "false", + } + ) + assert gateway.needs_approval is False + + approved = coerce_tool_config( + { + "function": {"name": "tools__composio__github__GET_USER__c1"}, + "needs_approval": "true", + } + ) + assert approved.needs_approval is True + + +def test_coerce_tool_configs_rejects_invalid_on_error(): + with pytest.raises(ValueError): + coerce_tool_configs(["read"], on_error="bogus") # type: ignore[arg-type] + + def test_collect_mode_reports_invalid_entries(): result = coerce_tool_configs( ["read", {"invalid": True}, None], diff --git a/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py index 89a06d6783..4ade145a1c 100644 --- a/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py +++ b/sdks/python/oss/tests/pytest/utils/test_messages_endpoint.py @@ -193,8 +193,18 @@ def test_messages_sse_streams_with_done_and_session_in_start(client): _assert_vercel_message_protocol(res) assert res.headers["x-vercel-ai-ui-message-stream"] == "v1" text = res.text - assert '"sessionId": "sess_abc"' in text # stamped onto the start part - assert '"type": "text-delta"' in text + # Parse the SSE payloads so the check survives serializer formatting changes (whitespace, + # key order) rather than matching a literal JSON substring. + payloads = [ + json.loads(line.removeprefix("data: ")) + for line in text.splitlines() + if line.startswith("data: ") and line != "data: [DONE]" + ] + start = next(p for p in payloads if p.get("type") == "start") + assert ( + start["messageMetadata"]["sessionId"] == "sess_abc" + ) # stamped onto the start part + assert any(p.get("type") == "text-delta" for p in payloads) assert "data: [DONE]" in text