friday-platform · ljagiello · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -87,12 +87,19 @@ jobs:
               agent, run, ok, err,
               AgentContext, AgentResult, AgentExtras,
               ArtifactRef, OutlineRef, OkResult, ErrResult,
-              Http, HttpError, HttpResponse,
-              Llm, LlmError, LlmResponse,
-              SessionData, SkillDefinition, StreamEmitter,
-              ToolCallError, ToolDefinition, Tools,
+              Http, HttpError, HttpProtocol, HttpResponse,
+              Llm, LlmError, LlmProtocol, LlmResponse,
+              SessionData, SkillDefinition, StreamEmitter, StreamProtocol,
+              ToolCallError, ToolDefinition, Tools, ToolsProtocol,
+              make_test_context,
               parse_input, parse_operation,
           )
+          from friday_agent_sdk.testing import FakeLlm, FakeHttp, FakeTools, FakeStream
           assert __version__, "friday_agent_sdk.__version__ should be set"
+          ctx = make_test_context()
+          assert isinstance(ctx.llm, LlmProtocol)
+          assert isinstance(ctx.http, HttpProtocol)
+          assert isinstance(ctx.tools, ToolsProtocol)
+          assert isinstance(ctx.stream, StreamProtocol)
           print(f"OK: friday-agent-sdk {__version__} installs and imports cleanly")
           PY
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- Capability protocols `LlmProtocol`, `HttpProtocol`, `ToolsProtocol`, and `StreamProtocol` (re-exported from `friday_agent_sdk`). The `AgentContext` capability fields are now typed as protocols so agents can be unit-tested by substituting any object that satisfies the protocol.
+- New public module `friday_agent_sdk.testing` with `make_test_context()` plus `FakeLlm`, `FakeHttp`, `FakeTools`, and `FakeStream` helpers. `make_test_context()` is also re-exported at the top level for convenience.
+
 ## [0.1.0] - 2026-04-30
 
 Initial public release. **Alpha — APIs may change.**

diff --git a/packages/python/docs/README.md b/packages/python/docs/README.md
@@ -21,6 +21,7 @@ Task-focused recipes for common patterns:
 | [Use MCP tools](how-to/use-mcp-tools.md)                     | Invoke Model Context Protocol servers                        |
 | [Handle structured input](how-to/handle-structured-input.md) | Extract JSON from Friday's enriched prompts                  |
 | [Stream progress](how-to/stream-progress.md)                 | Emit real-time updates to the UI                             |
+| [Unit-test agents](how-to/unit-test-agents.md)               | Test agent handlers with `make_test_context()` and fakes     |
 
 ## Reference
 

diff --git a/packages/python/docs/how-to/unit-test-agents.md b/packages/python/docs/how-to/unit-test-agents.md
@@ -0,0 +1,162 @@
+# Unit-test agents
+
+Friday agents are plain Python functions that take a prompt and an
+`AgentContext`. To test them in isolation you need an `AgentContext` whose
+capability fields don't actually talk to the daemon — that's what
+`make_test_context()` is for.
+
+## The 30-second version
+
+```python
+from friday_agent_sdk import make_test_context
+
+from my_agent import execute  # the @agent-decorated handler
+
+
+def test_echoes_prompt():
+    ctx = make_test_context()
+    result = execute("hello", ctx)
+    assert result.data == "hello"
+```
+
+`make_test_context()` returns an `AgentContext` with `Fake*` instances for
+`ctx.llm`, `ctx.http`, `ctx.tools`, and `ctx.stream`. Each fake records every
+call and returns a permissive default — no daemon, no NATS, no API keys.
+
+## Asserting on emitted progress
+
+`FakeStream` records every event into `ctx.stream.events`:
+
+```python
+from friday_agent_sdk import make_test_context
+
+
+def test_emits_progress():
+    ctx = make_test_context()
+
+    execute("hi", ctx)
+
+    assert ctx.stream.events == [
+        ("data-tool-progress", {"toolName": "agent", "content": "Starting"}),
+        ("data-tool-progress", {"toolName": "agent", "content": "Done"}),
+    ]
+```
+
+## Stubbing LLM responses
+
+Pass canned responses (FIFO queue) or a callable:
+
+```python
+from friday_agent_sdk import LlmResponse, make_test_context
+from friday_agent_sdk.testing import FakeLlm
+
+
+def test_uses_llm_output():
+    fake_llm = FakeLlm(
+        responses=[
+            LlmResponse(
+                text="42",
+                object=None,
+                model="fake",
+                usage={},
+                finish_reason="stop",
+            )
+        ]
+    )
+    ctx = make_test_context(llm=fake_llm)
+
+    result = execute("what is 6 * 7?", ctx)
+
+    assert result.data == "42"
+    assert fake_llm.calls[0]["messages"][-1]["content"] == "what is 6 * 7?"
+```
+
+For dynamic responses, use `on_generate`:
+
+```python
+fake_llm = FakeLlm(
+    on_generate=lambda messages, **kwargs: LlmResponse(
+        text=f"echo: {messages[-1]['content']}",
+        object=None,
+        model="fake",
+        usage={},
+        finish_reason="stop",
+    )
+)
+```
+
+## Stubbing HTTP responses
+
+```python
+from friday_agent_sdk import HttpResponse, make_test_context
+from friday_agent_sdk.testing import FakeHttp
+
+
+fake_http = FakeHttp(
+    responses=[
+        HttpResponse(status=200, headers={}, body='{"ok": true}'),
+        HttpResponse(status=429, headers={"Retry-After": "10"}, body=""),
+    ]
+)
+ctx = make_test_context(http=fake_http)
+```
+
+`FakeHttp` also accepts an `on_fetch=callable` for URL-aware logic.
+
+## Stubbing tool calls
+
+`FakeTools` requires you to register handlers explicitly — unhandled calls
+raise `ToolCallError` rather than silently returning empty results:
+
+```python
+from friday_agent_sdk import ToolDefinition, make_test_context
+from friday_agent_sdk.testing import FakeTools
+
+
+fake_tools = FakeTools(
+    tools=[
+        ToolDefinition(name="add", description="adds two numbers", input_schema={}),
+    ],
+    handlers={"add": lambda args: {"sum": args["a"] + args["b"]}},
+)
+ctx = make_test_context(tools=fake_tools)
+```
+
+## Custom protocol implementations
+
+You don't have to use the `Fake*` classes. The capability fields are typed as
+**protocols** (`LlmProtocol`, `HttpProtocol`, `ToolsProtocol`, `StreamProtocol`),
+so any object with the right method shape is accepted:
+
+```python
+class RecordingLlm:
+    def __init__(self):
+        self.seen: list[str] = []
+
+    def generate(self, messages, **kwargs):
+        self.seen.append(messages[-1]["content"])
+        return LlmResponse(text="ok", object=None, model="x", usage={}, finish_reason="stop")
+
+    def generate_object(self, messages, schema, **kwargs):
+        return LlmResponse(text=None, object={}, model="x", usage={}, finish_reason="stop")
+
+
+ctx = make_test_context(llm=RecordingLlm())
+```
+
+This is the same mechanism a custom production gateway would use — the
+protocols are the contract, the `Fake*` classes are convenience.
+
+## What goes in `make_test_context()`
+
+| Argument        | Type              | Default        |
+| --------------- | ----------------- | -------------- |
+| `env`           | `dict[str, str]`  | `{}`           |
+| `config`        | `dict`            | `{}`           |
+| `skills`        | `list[Skill...]`  | `[]`           |
+| `session`       | `SessionData?`    | `None`         |
+| `output_schema` | `dict?`           | `None`         |
+| `llm`           | `LlmProtocol?`    | `FakeLlm()`    |
+| `http`          | `HttpProtocol?`   | `FakeHttp()`   |
+| `tools`         | `ToolsProtocol?`  | `FakeTools()`  |
+| `stream`        | `StreamProtocol?` | `FakeStream()` |
diff --git a/packages/python/docs/reference/agent-context.md b/packages/python/docs/reference/agent-context.md
@@ -11,12 +11,22 @@ class AgentContext:
     config: dict = field(default_factory=dict)
     session: SessionData | None = None
     output_schema: dict | None = None
-    tools: Tools = field(default_factory=_uninitialized_tools)
-    llm: Llm = field(default_factory=_uninitialized_llm)
-    http: Http = field(default_factory=_uninitialized_http)
-    stream: StreamEmitter = field(default_factory=_uninitialized_stream)
+    tools: ToolsProtocol = field(default_factory=_uninitialized_tools)
+    llm: LlmProtocol = field(default_factory=_uninitialized_llm)
+    http: HttpProtocol = field(default_factory=_uninitialized_http)
+    stream: StreamProtocol = field(default_factory=_uninitialized_stream)
 ```
 
+The capability fields are typed as **structural protocols** so test doubles
+or custom gateways can be substituted without subclassing. The default
+factories return the production NATS-backed `Tools` / `Llm` / `Http` /
+`StreamEmitter` classes, which all satisfy the protocols natively.
+
+For unit tests, use
+[`make_test_context()`](../how-to/unit-test-agents.md) — it wires up
+`FakeTools`, `FakeLlm`, `FakeHttp`, and `FakeStream` so handlers can run
+without a Friday daemon.
+
 ## Fields
 
 ### `env`

diff --git a/packages/python/friday_agent_sdk/__init__.py b/packages/python/friday_agent_sdk/__init__.py
@@ -19,17 +19,22 @@
     AgentContext,
     Http,
     HttpError,
+    HttpProtocol,
     HttpResponse,
     Llm,
     LlmError,
+    LlmProtocol,
     LlmResponse,
     SessionData,
     SkillDefinition,
     StreamEmitter,
+    StreamProtocol,
     ToolCallError,
     ToolDefinition,
     Tools,
+    ToolsProtocol,
 )
+from friday_agent_sdk.testing import make_test_context
 
 __all__ = [
     "AgentContext",
@@ -39,21 +44,26 @@
     "ErrResult",
     "Http",
     "HttpError",
+    "HttpProtocol",
     "HttpResponse",
     "Llm",
     "LlmError",
+    "LlmProtocol",
     "LlmResponse",
     "OkResult",
     "OutlineRef",
     "SessionData",
     "SkillDefinition",
     "StreamEmitter",
+    "StreamProtocol",
     "ToolCallError",
     "ToolDefinition",
     "Tools",
+    "ToolsProtocol",
     "__version__",
     "agent",
     "err",
+    "make_test_context",
     "ok",
     "parse_input",
     "parse_operation",

diff --git a/packages/python/friday_agent_sdk/_types.py b/packages/python/friday_agent_sdk/_types.py
@@ -3,7 +3,7 @@
 import json
 from collections.abc import Callable
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Protocol, runtime_checkable
 
 
 class ToolCallError(Exception):
@@ -244,6 +244,77 @@ class SessionData:
     datetime: str
 
 
+# ---------------------------------------------------------------------------
+# Capability protocols — structural types so users can substitute test doubles
+# (or their own gateways) for ctx.llm / ctx.http / ctx.tools / ctx.stream.
+#
+# Concrete implementations: ``Llm``/``Http``/``Tools``/``StreamEmitter`` (the
+# NATS-backed wrappers above) and the ``Fake*`` classes in
+# ``friday_agent_sdk.testing``.
+# ---------------------------------------------------------------------------
+
+
+@runtime_checkable
+class ToolsProtocol(Protocol):
+    """Structural type for the tools capability (`ctx.tools`)."""
+
+    def call(self, name: str, args: dict) -> dict: ...
+
+    def list(self) -> list[ToolDefinition]: ...
+
+
+@runtime_checkable
+class LlmProtocol(Protocol):
+    """Structural type for the LLM capability (`ctx.llm`)."""
+
+    def generate(
+        self,
+        messages: list[dict[str, str]],
+        *,
+        model: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        provider_options: dict | None = None,
+    ) -> LlmResponse: ...
+
+    def generate_object(
+        self,
+        messages: list[dict[str, str]],
+        schema: dict,
+        *,
+        model: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        provider_options: dict | None = None,
+    ) -> LlmResponse: ...
+
+
+@runtime_checkable
+class HttpProtocol(Protocol):
+    """Structural type for the HTTP capability (`ctx.http`)."""
+
+    def fetch(
+        self,
+        url: str,
+        *,
+        method: str = "GET",
+        headers: dict[str, str] | None = None,
+        body: str | None = None,
+        timeout_ms: int | None = None,
+    ) -> HttpResponse: ...
+
+
+@runtime_checkable
+class StreamProtocol(Protocol):
+    """Structural type for the stream-emitter capability (`ctx.stream`)."""
+
+    def emit(self, event_type: str, data: dict | str) -> None: ...
+
+    def progress(self, content: str, *, tool_name: str | None = None) -> None: ...
+
+    def intent(self, content: str) -> None: ...
+
+
 def _uninitialized_llm():
     """Factory for uninitialized LLM stub."""
 
@@ -298,16 +369,20 @@ class SkillDefinition:
 class AgentContext:
     """Execution context passed to agent handlers.
 
-    Capability fields (llm, tools, http, stream) are always non-None.
-    Defaults are safe stubs that raise if called outside the host environment.
+    Capability fields (`llm`, `tools`, `http`, `stream`) are typed as
+    structural protocols so they can be substituted for tests or custom
+    gateways. The default factories return the production NATS-backed
+    classes pre-wired to raise a clear error if called outside the host
+    environment — for unit tests prefer
+    `friday_agent_sdk.testing.make_test_context()`.
     """
 
     env: dict[str, str] = field(default_factory=dict)
     config: dict = field(default_factory=dict)
     skills: list[SkillDefinition] = field(default_factory=list)
     session: SessionData | None = None
     output_schema: dict | None = None
-    tools: Tools = field(default_factory=_uninitialized_tools)
-    llm: Llm = field(default_factory=_uninitialized_llm)
-    http: Http = field(default_factory=_uninitialized_http)
-    stream: StreamEmitter = field(default_factory=_uninitialized_stream)
+    tools: ToolsProtocol = field(default_factory=_uninitialized_tools)
+    llm: LlmProtocol = field(default_factory=_uninitialized_llm)
+    http: HttpProtocol = field(default_factory=_uninitialized_http)
+    stream: StreamProtocol = field(default_factory=_uninitialized_stream)