From 9e05024140463dbdb633a7feddad13cbedce8669 Mon Sep 17 00:00:00 2001
From: Lukasz Jagiello <jagiello.lukasz@gmail.com>
Date: Thu, 30 Apr 2026 19:36:22 -0700
Subject: [PATCH 1/3] feat(python): add capability protocols and
 friday_agent_sdk.testing

Make agent handlers cheaply unit-testable. The `AgentContext` capability
fields (`ctx.llm`, `ctx.http`, `ctx.tools`, `ctx.stream`) are now typed
as structural protocols (`LlmProtocol`, `HttpProtocol`, `ToolsProtocol`,
`StreamProtocol`) so any object with the right method shape can stand in
for the production NATS-backed wrappers.

A new public submodule `friday_agent_sdk.testing` ships ready-made fakes
(`FakeLlm`, `FakeHttp`, `FakeTools`, `FakeStream`) plus a
`make_test_context()` constructor that wires them up with sensible
defaults. `make_test_context` is also re-exported at the top level.

  from friday_agent_sdk import make_test_context
  from friday_agent_sdk.testing import FakeLlm

  ctx = make_test_context(llm=FakeLlm(responses=[...]))
  result = my_handler("hi", ctx)
  assert ctx.stream.events == [...]  # FakeStream records every emit

The concrete `Llm`/`Http`/`Tools`/`StreamEmitter` classes structurally
implement the new protocols, so `build_context()` and the bridge are
unchanged. Existing test that pokes at the private `Llm._config` now
narrows via `isinstance` first to satisfy the protocol field type.

Wheel smoke test in CI imports the new symbols and runs the protocol
isinstance checks against `make_test_context()` defaults.
---
 .github/workflows/ci.yml                     |  15 +-
 CHANGELOG.md                                 |   5 +
 packages/python/friday_agent_sdk/__init__.py |  10 +
 packages/python/friday_agent_sdk/_types.py   |  89 +++++-
 packages/python/friday_agent_sdk/testing.py  | 296 +++++++++++++++++++
 packages/python/tests/test_context.py        |   5 +-
 packages/python/tests/test_testing.py        | 232 +++++++++++++++
 7 files changed, 640 insertions(+), 12 deletions(-)
 create mode 100644 packages/python/friday_agent_sdk/testing.py
 create mode 100644 packages/python/tests/test_testing.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f1e226f..40165ea 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -87,12 +87,19 @@ jobs:
               agent, run, ok, err,
               AgentContext, AgentResult, AgentExtras,
               ArtifactRef, OutlineRef, OkResult, ErrResult,
-              Http, HttpError, HttpResponse,
-              Llm, LlmError, LlmResponse,
-              SessionData, SkillDefinition, StreamEmitter,
-              ToolCallError, ToolDefinition, Tools,
+              Http, HttpError, HttpProtocol, HttpResponse,
+              Llm, LlmError, LlmProtocol, LlmResponse,
+              SessionData, SkillDefinition, StreamEmitter, StreamProtocol,
+              ToolCallError, ToolDefinition, Tools, ToolsProtocol,
+              make_test_context,
               parse_input, parse_operation,
           )
+          from friday_agent_sdk.testing import FakeLlm, FakeHttp, FakeTools, FakeStream
           assert __version__, "friday_agent_sdk.__version__ should be set"
+          ctx = make_test_context()
+          assert isinstance(ctx.llm, LlmProtocol)
+          assert isinstance(ctx.http, HttpProtocol)
+          assert isinstance(ctx.tools, ToolsProtocol)
+          assert isinstance(ctx.stream, StreamProtocol)
           print(f"OK: friday-agent-sdk {__version__} installs and imports cleanly")
           PY
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 29ca81b..5ddf3c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- Capability protocols `LlmProtocol`, `HttpProtocol`, `ToolsProtocol`, and `StreamProtocol` (re-exported from `friday_agent_sdk`). The `AgentContext` capability fields are now typed as protocols so agents can be unit-tested by substituting any object that satisfies the protocol.
+- New public module `friday_agent_sdk.testing` with `make_test_context()` plus `FakeLlm`, `FakeHttp`, `FakeTools`, and `FakeStream` helpers. `make_test_context()` is also re-exported at the top level for convenience.
+
 ## [0.1.0] - 2026-04-30
 
 Initial public release. **Alpha — APIs may change.**
diff --git a/packages/python/friday_agent_sdk/__init__.py b/packages/python/friday_agent_sdk/__init__.py
index 260d492..8f8d67d 100644
--- a/packages/python/friday_agent_sdk/__init__.py
+++ b/packages/python/friday_agent_sdk/__init__.py
@@ -19,17 +19,22 @@
     AgentContext,
     Http,
     HttpError,
+    HttpProtocol,
     HttpResponse,
     Llm,
     LlmError,
+    LlmProtocol,
     LlmResponse,
     SessionData,
     SkillDefinition,
     StreamEmitter,
+    StreamProtocol,
     ToolCallError,
     ToolDefinition,
     Tools,
+    ToolsProtocol,
 )
+from friday_agent_sdk.testing import make_test_context
 
 __all__ = [
     "AgentContext",
@@ -39,21 +44,26 @@
     "ErrResult",
     "Http",
     "HttpError",
+    "HttpProtocol",
     "HttpResponse",
     "Llm",
     "LlmError",
+    "LlmProtocol",
     "LlmResponse",
     "OkResult",
     "OutlineRef",
     "SessionData",
     "SkillDefinition",
     "StreamEmitter",
+    "StreamProtocol",
     "ToolCallError",
     "ToolDefinition",
     "Tools",
+    "ToolsProtocol",
     "__version__",
     "agent",
     "err",
+    "make_test_context",
     "ok",
     "parse_input",
     "parse_operation",
diff --git a/packages/python/friday_agent_sdk/_types.py b/packages/python/friday_agent_sdk/_types.py
index 7793312..84afee8 100644
--- a/packages/python/friday_agent_sdk/_types.py
+++ b/packages/python/friday_agent_sdk/_types.py
@@ -3,7 +3,7 @@
 import json
 from collections.abc import Callable
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Protocol, runtime_checkable
 
 
 class ToolCallError(Exception):
@@ -244,6 +244,77 @@ class SessionData:
     datetime: str
 
 
+# ---------------------------------------------------------------------------
+# Capability protocols — structural types so users can substitute test doubles
+# (or their own gateways) for ctx.llm / ctx.http / ctx.tools / ctx.stream.
+#
+# Concrete implementations: ``Llm``/``Http``/``Tools``/``StreamEmitter`` (the
+# NATS-backed wrappers above) and the ``Fake*`` classes in
+# ``friday_agent_sdk.testing``.
+# ---------------------------------------------------------------------------
+
+
+@runtime_checkable
+class ToolsProtocol(Protocol):
+    """Structural type for the tools capability (`ctx.tools`)."""
+
+    def call(self, name: str, args: dict) -> dict: ...
+
+    def list(self) -> list[ToolDefinition]: ...
+
+
+@runtime_checkable
+class LlmProtocol(Protocol):
+    """Structural type for the LLM capability (`ctx.llm`)."""
+
+    def generate(
+        self,
+        messages: list[dict[str, str]],
+        *,
+        model: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        provider_options: dict | None = None,
+    ) -> LlmResponse: ...
+
+    def generate_object(
+        self,
+        messages: list[dict[str, str]],
+        schema: dict,
+        *,
+        model: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        provider_options: dict | None = None,
+    ) -> LlmResponse: ...
+
+
+@runtime_checkable
+class HttpProtocol(Protocol):
+    """Structural type for the HTTP capability (`ctx.http`)."""
+
+    def fetch(
+        self,
+        url: str,
+        *,
+        method: str = "GET",
+        headers: dict[str, str] | None = None,
+        body: str | None = None,
+        timeout_ms: int | None = None,
+    ) -> HttpResponse: ...
+
+
+@runtime_checkable
+class StreamProtocol(Protocol):
+    """Structural type for the stream-emitter capability (`ctx.stream`)."""
+
+    def emit(self, event_type: str, data: dict | str) -> None: ...
+
+    def progress(self, content: str, *, tool_name: str | None = None) -> None: ...
+
+    def intent(self, content: str) -> None: ...
+
+
 def _uninitialized_llm():
     """Factory for uninitialized LLM stub."""
 
@@ -298,8 +369,12 @@ class SkillDefinition:
 class AgentContext:
     """Execution context passed to agent handlers.
 
-    Capability fields (llm, tools, http, stream) are always non-None.
-    Defaults are safe stubs that raise if called outside the host environment.
+    Capability fields (`llm`, `tools`, `http`, `stream`) are typed as
+    structural protocols so they can be substituted for tests or custom
+    gateways. The default factories return the production NATS-backed
+    classes pre-wired to raise a clear error if called outside the host
+    environment — for unit tests prefer
+    `friday_agent_sdk.testing.make_test_context()`.
     """
 
     env: dict[str, str] = field(default_factory=dict)
@@ -307,7 +382,7 @@ class AgentContext:
     skills: list[SkillDefinition] = field(default_factory=list)
     session: SessionData | None = None
     output_schema: dict | None = None
-    tools: Tools = field(default_factory=_uninitialized_tools)
-    llm: Llm = field(default_factory=_uninitialized_llm)
-    http: Http = field(default_factory=_uninitialized_http)
-    stream: StreamEmitter = field(default_factory=_uninitialized_stream)
+    tools: ToolsProtocol = field(default_factory=_uninitialized_tools)
+    llm: LlmProtocol = field(default_factory=_uninitialized_llm)
+    http: HttpProtocol = field(default_factory=_uninitialized_http)
+    stream: StreamProtocol = field(default_factory=_uninitialized_stream)
diff --git a/packages/python/friday_agent_sdk/testing.py b/packages/python/friday_agent_sdk/testing.py
new file mode 100644
index 0000000..d1e0ce0
--- /dev/null
+++ b/packages/python/friday_agent_sdk/testing.py
@@ -0,0 +1,296 @@
+"""Test helpers for unit-testing Friday agents.
+
+The `AgentContext` capability fields (`ctx.llm`, `ctx.http`, `ctx.tools`,
+`ctx.stream`) are typed as protocols so any object implementing the right
+methods can be substituted in tests. This module provides ready-made fakes
+plus a `make_test_context()` constructor that wires sensible defaults.
+
+Example:
+
+    from friday_agent_sdk import LlmResponse
+    from friday_agent_sdk.testing import FakeLlm, make_test_context
+
+    fake_llm = FakeLlm(
+        responses=[
+            LlmResponse(
+                text="hello there",
+                object=None,
+                model="fake",
+                usage={},
+                finish_reason="stop",
+            )
+        ]
+    )
+    ctx = make_test_context(env={"API_KEY": "x"}, llm=fake_llm)
+
+    result = my_agent_handler("hi", ctx)
+    assert fake_llm.calls[0]["messages"] == [{"role": "user", "content": "hi"}]
+"""
+
+from collections.abc import Callable
+from typing import Any
+
+from friday_agent_sdk._types import (
+    AgentContext,
+    HttpProtocol,
+    HttpResponse,
+    LlmProtocol,
+    LlmResponse,
+    SessionData,
+    SkillDefinition,
+    StreamProtocol,
+    ToolCallError,
+    ToolDefinition,
+    ToolsProtocol,
+)
+
+__all__ = [
+    "FakeHttp",
+    "FakeLlm",
+    "FakeStream",
+    "FakeTools",
+    "make_test_context",
+]
+
+
+def _empty_llm_response() -> LlmResponse:
+    return LlmResponse(
+        text="",
+        object=None,
+        model="fake",
+        usage={},
+        finish_reason="stop",
+    )
+
+
+def _empty_http_response() -> HttpResponse:
+    return HttpResponse(status=200, headers={}, body="")
+
+
+class FakeLlm:
+    """Test double for `LlmProtocol`.
+
+    Default behaviour returns an empty success `LlmResponse` for every call.
+    Override by passing one of:
+
+    - `responses=[LlmResponse(...), ...]` — FIFO queue of canned responses.
+      Falls back to the empty default once exhausted.
+    - `on_generate=lambda **kwargs: LlmResponse(...)` — custom callable
+      invoked for every `generate` and `generate_object` call.
+
+    All calls are appended to `self.calls` for assertion.
+    """
+
+    def __init__(
+        self,
+        responses: list[LlmResponse] | None = None,
+        *,
+        on_generate: Callable[..., LlmResponse] | None = None,
+    ) -> None:
+        self._responses: list[LlmResponse] = list(responses or [])
+        self._on_generate = on_generate
+        self.calls: list[dict[str, Any]] = []
+
+    def _next(self, **kwargs: Any) -> LlmResponse:
+        if self._on_generate is not None:
+            return self._on_generate(**kwargs)
+        if self._responses:
+            return self._responses.pop(0)
+        return _empty_llm_response()
+
+    def generate(
+        self,
+        messages: list[dict[str, str]],
+        *,
+        model: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        provider_options: dict | None = None,
+    ) -> LlmResponse:
+        self.calls.append(
+            {
+                "method": "generate",
+                "messages": messages,
+                "model": model,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "provider_options": provider_options,
+            }
+        )
+        return self._next(
+            messages=messages,
+            model=model,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            provider_options=provider_options,
+        )
+
+    def generate_object(
+        self,
+        messages: list[dict[str, str]],
+        schema: dict,
+        *,
+        model: str | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        provider_options: dict | None = None,
+    ) -> LlmResponse:
+        self.calls.append(
+            {
+                "method": "generate_object",
+                "messages": messages,
+                "schema": schema,
+                "model": model,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "provider_options": provider_options,
+            }
+        )
+        return self._next(
+            messages=messages,
+            schema=schema,
+            model=model,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            provider_options=provider_options,
+        )
+
+
+class FakeHttp:
+    """Test double for `HttpProtocol`.
+
+    Default behaviour returns `HttpResponse(status=200, headers={}, body="")`
+    for every call. Override with:
+
+    - `responses=[HttpResponse(...), ...]` — FIFO queue.
+    - `on_fetch=lambda url, **kwargs: HttpResponse(...)` — custom callable.
+
+    All calls are appended to `self.calls`.
+    """
+
+    def __init__(
+        self,
+        responses: list[HttpResponse] | None = None,
+        *,
+        on_fetch: Callable[..., HttpResponse] | None = None,
+    ) -> None:
+        self._responses: list[HttpResponse] = list(responses or [])
+        self._on_fetch = on_fetch
+        self.calls: list[dict[str, Any]] = []
+
+    def fetch(
+        self,
+        url: str,
+        *,
+        method: str = "GET",
+        headers: dict[str, str] | None = None,
+        body: str | None = None,
+        timeout_ms: int | None = None,
+    ) -> HttpResponse:
+        self.calls.append(
+            {
+                "url": url,
+                "method": method,
+                "headers": headers,
+                "body": body,
+                "timeout_ms": timeout_ms,
+            }
+        )
+        if self._on_fetch is not None:
+            return self._on_fetch(
+                url,
+                method=method,
+                headers=headers,
+                body=body,
+                timeout_ms=timeout_ms,
+            )
+        if self._responses:
+            return self._responses.pop(0)
+        return _empty_http_response()
+
+
+class FakeTools:
+    """Test double for `ToolsProtocol`.
+
+    `list()` returns the `tools` argument verbatim. `call(name, args)`
+    dispatches via `handlers[name]`; if no handler is registered the call
+    raises `ToolCallError` with a clear message — surfacing missing test
+    setup loudly is preferred over silent empty returns.
+
+    All calls are appended to `self.calls` as `(name, args)` tuples.
+    """
+
+    def __init__(
+        self,
+        *,
+        tools: list[ToolDefinition] | None = None,
+        handlers: dict[str, Callable[[dict], dict]] | None = None,
+    ) -> None:
+        self._tools: list[ToolDefinition] = list(tools or [])
+        self._handlers: dict[str, Callable[[dict], dict]] = dict(handlers or {})
+        self.calls: list[tuple[str, dict]] = []
+
+    def call(self, name: str, args: dict) -> dict:
+        self.calls.append((name, args))
+        handler = self._handlers.get(name)
+        if handler is None:
+            raise ToolCallError(
+                f"FakeTools: no handler registered for tool {name!r}. Pass handlers={{...}} when constructing FakeTools()."
+            )
+        return handler(args)
+
+    def list(self) -> list[ToolDefinition]:
+        return list(self._tools)
+
+
+class FakeStream:
+    """Test double for `StreamProtocol`. Records every emitted event into
+    `self.events` as `(event_type, data)` tuples. Never raises.
+    """
+
+    def __init__(self) -> None:
+        self.events: list[tuple[str, dict | str]] = []
+
+    def emit(self, event_type: str, data: dict | str) -> None:
+        self.events.append((event_type, data))
+
+    def progress(self, content: str, *, tool_name: str | None = None) -> None:
+        self.emit(
+            "data-tool-progress",
+            {"toolName": tool_name or "agent", "content": content},
+        )
+
+    def intent(self, content: str) -> None:
+        self.emit("data-intent", {"content": content})
+
+
+def make_test_context(
+    *,
+    env: dict[str, str] | None = None,
+    config: dict | None = None,
+    skills: list[SkillDefinition] | None = None,
+    session: SessionData | None = None,
+    output_schema: dict | None = None,
+    llm: LlmProtocol | None = None,
+    http: HttpProtocol | None = None,
+    tools: ToolsProtocol | None = None,
+    stream: StreamProtocol | None = None,
+) -> AgentContext:
+    """Construct an `AgentContext` for unit-testing agent handlers.
+
+    Every capability you don't override gets a default `Fake*` instance from
+    this module — no NATS, no daemon required. Pass your own protocol
+    implementation (or a pre-configured `Fake*`) to control behaviour for
+    a specific capability.
+    """
+    return AgentContext(
+        env=env if env is not None else {},
+        config=config if config is not None else {},
+        skills=skills if skills is not None else [],
+        session=session,
+        output_schema=output_schema,
+        llm=llm if llm is not None else FakeLlm(),
+        http=http if http is not None else FakeHttp(),
+        tools=tools if tools is not None else FakeTools(),
+        stream=stream if stream is not None else FakeStream(),
+    )
diff --git a/packages/python/tests/test_context.py b/packages/python/tests/test_context.py
index a41cd13..8ec41e3 100644
--- a/packages/python/tests/test_context.py
+++ b/packages/python/tests/test_context.py
@@ -3,7 +3,7 @@
 from unittest.mock import MagicMock
 
 from friday_agent_sdk._context import build_context
-from friday_agent_sdk._types import AgentContext, SessionData
+from friday_agent_sdk._types import AgentContext, Llm, SessionData
 
 
 def _build(raw: dict) -> AgentContext:
@@ -85,4 +85,7 @@ def test_llm_config_read_from_raw(self):
         """llm_config key in raw dict is stored for LLM builder."""
         raw = {"llm_config": {"model": "anthropic:claude-haiku-4-5"}}
         ctx = _build(raw)
+        # ctx.llm is typed as LlmProtocol; narrow to the concrete Llm to
+        # reach the internal _config attribute.
+        assert isinstance(ctx.llm, Llm)
         assert ctx.llm._config == {"model": "anthropic:claude-haiku-4-5"}
diff --git a/packages/python/tests/test_testing.py b/packages/python/tests/test_testing.py
new file mode 100644
index 0000000..be051ba
--- /dev/null
+++ b/packages/python/tests/test_testing.py
@@ -0,0 +1,232 @@
+"""Tests for the public friday_agent_sdk.testing module."""
+
+import pytest
+
+from friday_agent_sdk import (
+    AgentContext,
+    HttpProtocol,
+    HttpResponse,
+    LlmProtocol,
+    LlmResponse,
+    SessionData,
+    StreamProtocol,
+    ToolCallError,
+    ToolDefinition,
+    ToolsProtocol,
+    make_test_context,
+)
+from friday_agent_sdk.testing import FakeHttp, FakeLlm, FakeStream, FakeTools
+
+
+def _llm_response(text: str = "ok") -> LlmResponse:
+    return LlmResponse(
+        text=text,
+        object=None,
+        model="fake",
+        usage={},
+        finish_reason="stop",
+    )
+
+
+class TestMakeTestContext:
+    def test_zero_arg_returns_agent_context(self):
+        ctx = make_test_context()
+        assert isinstance(ctx, AgentContext)
+
+    def test_zero_arg_capabilities_satisfy_protocols(self):
+        ctx = make_test_context()
+        assert isinstance(ctx.llm, LlmProtocol)
+        assert isinstance(ctx.http, HttpProtocol)
+        assert isinstance(ctx.tools, ToolsProtocol)
+        assert isinstance(ctx.stream, StreamProtocol)
+
+    def test_zero_arg_capabilities_are_fakes(self):
+        ctx = make_test_context()
+        assert isinstance(ctx.llm, FakeLlm)
+        assert isinstance(ctx.http, FakeHttp)
+        assert isinstance(ctx.tools, FakeTools)
+        assert isinstance(ctx.stream, FakeStream)
+
+    def test_passes_through_simple_fields(self):
+        session = SessionData(
+            id="s1",
+            workspace_id="w1",
+            user_id="u1",
+            datetime="2026-01-01T00:00:00Z",
+        )
+        ctx = make_test_context(
+            env={"FOO": "bar"},
+            config={"k": 1},
+            session=session,
+            output_schema={"type": "object"},
+        )
+        assert ctx.env == {"FOO": "bar"}
+        assert ctx.config == {"k": 1}
+        assert ctx.session is session
+        assert ctx.output_schema == {"type": "object"}
+
+    def test_overrides_individual_capabilities(self):
+        my_llm = FakeLlm()
+        my_http = FakeHttp()
+        my_tools = FakeTools()
+        my_stream = FakeStream()
+        ctx = make_test_context(
+            llm=my_llm,
+            http=my_http,
+            tools=my_tools,
+            stream=my_stream,
+        )
+        assert ctx.llm is my_llm
+        assert ctx.http is my_http
+        assert ctx.tools is my_tools
+        assert ctx.stream is my_stream
+
+    def test_accepts_arbitrary_protocol_implementations(self):
+        """Any object with the right method shape satisfies the field type.
+
+        Verifies the structural-typing claim: users don't need to subclass
+        the SDK's Fake* helpers, they can drop in a hand-written mock.
+        """
+
+        class HandRolledLlm:
+            def generate(self, messages, **kwargs):
+                return _llm_response("from-handrolled")
+
+            def generate_object(self, messages, schema, **kwargs):
+                return _llm_response("from-handrolled")
+
+        ctx = make_test_context(llm=HandRolledLlm())  # type: ignore[arg-type]
+        assert ctx.llm.generate(messages=[{"role": "user", "content": "x"}]).text == "from-handrolled"
+
+
+class TestFakeLlm:
+    def test_default_returns_empty_response(self):
+        llm = FakeLlm()
+        result = llm.generate(messages=[{"role": "user", "content": "hi"}])
+        assert result.text == ""
+        assert result.model == "fake"
+        assert result.finish_reason == "stop"
+
+    def test_records_calls(self):
+        llm = FakeLlm()
+        llm.generate(messages=[{"role": "user", "content": "hi"}], model="m1")
+        llm.generate_object(
+            messages=[{"role": "user", "content": "hi"}],
+            schema={"type": "object"},
+        )
+        assert len(llm.calls) == 2
+        assert llm.calls[0]["method"] == "generate"
+        assert llm.calls[0]["model"] == "m1"
+        assert llm.calls[1]["method"] == "generate_object"
+        assert llm.calls[1]["schema"] == {"type": "object"}
+
+    def test_canned_response_queue_is_fifo(self):
+        llm = FakeLlm(responses=[_llm_response("a"), _llm_response("b")])
+        assert llm.generate(messages=[]).text == "a"
+        assert llm.generate(messages=[]).text == "b"
+
+    def test_falls_back_to_empty_when_queue_exhausted(self):
+        llm = FakeLlm(responses=[_llm_response("only")])
+        assert llm.generate(messages=[]).text == "only"
+        assert llm.generate(messages=[]).text == ""
+
+    def test_on_generate_callable_takes_precedence(self):
+        captured: list = []
+
+        def handler(**kwargs):
+            captured.append(kwargs)
+            return _llm_response("dynamic")
+
+        llm = FakeLlm(responses=[_llm_response("queued")], on_generate=handler)
+        result = llm.generate(messages=[{"role": "user", "content": "hi"}], model="m1")
+        assert result.text == "dynamic"
+        assert len(captured) == 1
+        assert captured[0]["model"] == "m1"
+
+
+class TestFakeHttp:
+    def test_default_returns_200_empty(self):
+        http = FakeHttp()
+        result = http.fetch("https://example.com")
+        assert result.status == 200
+        assert result.body == ""
+
+    def test_records_calls(self):
+        http = FakeHttp()
+        http.fetch(
+            "https://example.com/api",
+            method="POST",
+            headers={"X": "1"},
+            body="payload",
+        )
+        assert len(http.calls) == 1
+        assert http.calls[0]["url"] == "https://example.com/api"
+        assert http.calls[0]["method"] == "POST"
+        assert http.calls[0]["headers"] == {"X": "1"}
+        assert http.calls[0]["body"] == "payload"
+
+    def test_canned_responses_fifo(self):
+        http = FakeHttp(
+            responses=[
+                HttpResponse(status=201, headers={}, body="a"),
+                HttpResponse(status=404, headers={}, body="b"),
+            ]
+        )
+        assert http.fetch("https://example.com").status == 201
+        assert http.fetch("https://example.com").status == 404
+        # Queue exhausted → empty default
+        assert http.fetch("https://example.com").status == 200
+
+    def test_on_fetch_callable(self):
+        def handler(url, **kwargs):
+            return HttpResponse(status=418, headers={}, body=url)
+
+        http = FakeHttp(on_fetch=handler)
+        result = http.fetch("https://teapot")
+        assert result.status == 418
+        assert result.body == "https://teapot"
+
+
+class TestFakeTools:
+    def test_list_returns_provided_tools(self):
+        tool = ToolDefinition(name="echo", description="d", input_schema={})
+        tools = FakeTools(tools=[tool])
+        assert tools.list() == [tool]
+
+    def test_call_dispatches_to_handler(self):
+        tools = FakeTools(handlers={"add": lambda args: {"sum": args["a"] + args["b"]}})
+        result = tools.call("add", {"a": 1, "b": 2})
+        assert result == {"sum": 3}
+
+    def test_call_records(self):
+        tools = FakeTools(handlers={"echo": lambda args: args})
+        tools.call("echo", {"x": 1})
+        tools.call("echo", {"y": 2})
+        assert tools.calls == [("echo", {"x": 1}), ("echo", {"y": 2})]
+
+    def test_unhandled_tool_raises(self):
+        tools = FakeTools()
+        with pytest.raises(ToolCallError, match="no handler registered"):
+            tools.call("missing", {})
+
+
+class TestFakeStream:
+    def test_emit_records_event(self):
+        stream = FakeStream()
+        stream.emit("custom-event", {"k": "v"})
+        assert stream.events == [("custom-event", {"k": "v"})]
+
+    def test_progress_records_canonical_event(self):
+        stream = FakeStream()
+        stream.progress("doing thing", tool_name="my-tool")
+        assert stream.events == [("data-tool-progress", {"toolName": "my-tool", "content": "doing thing"})]
+
+    def test_progress_defaults_tool_name_to_agent(self):
+        stream = FakeStream()
+        stream.progress("step")
+        assert stream.events[0][1] == {"toolName": "agent", "content": "step"}
+
+    def test_intent_records_canonical_event(self):
+        stream = FakeStream()
+        stream.intent("planning to clone repo")
+        assert stream.events == [("data-intent", {"content": "planning to clone repo"})]

From 4b72626db0e8f7e9b021f2003500a67d4fde8950 Mon Sep 17 00:00:00 2001
From: Lukasz Jagiello <jagiello.lukasz@gmail.com>
Date: Thu, 30 Apr 2026 19:45:50 -0700
Subject: [PATCH 2/3] docs: add how-to guide and AgentContext reference for
 testing protocols

---
 packages/python/docs/README.md                |   1 +
 .../python/docs/how-to/unit-test-agents.md    | 162 ++++++++++++++++++
 .../python/docs/reference/agent-context.md    |  18 +-
 3 files changed, 177 insertions(+), 4 deletions(-)
 create mode 100644 packages/python/docs/how-to/unit-test-agents.md

diff --git a/packages/python/docs/README.md b/packages/python/docs/README.md
index c1e8fac..bcd90b6 100644
--- a/packages/python/docs/README.md
+++ b/packages/python/docs/README.md
@@ -21,6 +21,7 @@ Task-focused recipes for common patterns:
 | [Use MCP tools](how-to/use-mcp-tools.md)                     | Invoke Model Context Protocol servers                        |
 | [Handle structured input](how-to/handle-structured-input.md) | Extract JSON from Friday's enriched prompts                  |
 | [Stream progress](how-to/stream-progress.md)                 | Emit real-time updates to the UI                             |
+| [Unit-test agents](how-to/unit-test-agents.md)               | Test agent handlers with `make_test_context()` and fakes     |
 
 ## Reference
 
diff --git a/packages/python/docs/how-to/unit-test-agents.md b/packages/python/docs/how-to/unit-test-agents.md
new file mode 100644
index 0000000..7b795ed
--- /dev/null
+++ b/packages/python/docs/how-to/unit-test-agents.md
@@ -0,0 +1,162 @@
+# Unit-test agents
+
+Friday agents are plain Python functions that take a prompt and an
+`AgentContext`. To test them in isolation you need an `AgentContext` whose
+capability fields don't actually talk to the daemon — that's what
+`make_test_context()` is for.
+
+## The 30-second version
+
+```python
+from friday_agent_sdk import make_test_context
+
+from my_agent import execute  # the @agent-decorated handler
+
+
+def test_echoes_prompt():
+    ctx = make_test_context()
+    result = execute("hello", ctx)
+    assert result.data == "hello"
+```
+
+`make_test_context()` returns an `AgentContext` with `Fake*` instances for
+`ctx.llm`, `ctx.http`, `ctx.tools`, and `ctx.stream`. Each fake records every
+call and returns a permissive default — no daemon, no NATS, no API keys.
+
+## Asserting on emitted progress
+
+`FakeStream` records every event into `ctx.stream.events`:
+
+```python
+from friday_agent_sdk import make_test_context
+
+
+def test_emits_progress():
+    ctx = make_test_context()
+
+    execute("hi", ctx)
+
+    assert ctx.stream.events == [
+        ("data-tool-progress", {"toolName": "agent", "content": "Starting"}),
+        ("data-tool-progress", {"toolName": "agent", "content": "Done"}),
+    ]
+```
+
+## Stubbing LLM responses
+
+Pass canned responses (FIFO queue) or a callable:
+
+```python
+from friday_agent_sdk import LlmResponse, make_test_context
+from friday_agent_sdk.testing import FakeLlm
+
+
+def test_uses_llm_output():
+    fake_llm = FakeLlm(
+        responses=[
+            LlmResponse(
+                text="42",
+                object=None,
+                model="fake",
+                usage={},
+                finish_reason="stop",
+            )
+        ]
+    )
+    ctx = make_test_context(llm=fake_llm)
+
+    result = execute("what is 6 * 7?", ctx)
+
+    assert result.data == "42"
+    assert fake_llm.calls[0]["messages"][-1]["content"] == "what is 6 * 7?"
+```
+
+For dynamic responses, use `on_generate`:
+
+```python
+fake_llm = FakeLlm(
+    on_generate=lambda messages, **kwargs: LlmResponse(
+        text=f"echo: {messages[-1]['content']}",
+        object=None,
+        model="fake",
+        usage={},
+        finish_reason="stop",
+    )
+)
+```
+
+## Stubbing HTTP responses
+
+```python
+from friday_agent_sdk import HttpResponse, make_test_context
+from friday_agent_sdk.testing import FakeHttp
+
+
+fake_http = FakeHttp(
+    responses=[
+        HttpResponse(status=200, headers={}, body='{"ok": true}'),
+        HttpResponse(status=429, headers={"Retry-After": "10"}, body=""),
+    ]
+)
+ctx = make_test_context(http=fake_http)
+```
+
+`FakeHttp` also accepts an `on_fetch=callable` for URL-aware logic.
+
+## Stubbing tool calls
+
+`FakeTools` requires you to register handlers explicitly — unhandled calls
+raise `ToolCallError` rather than silently returning empty results:
+
+```python
+from friday_agent_sdk import ToolDefinition, make_test_context
+from friday_agent_sdk.testing import FakeTools
+
+
+fake_tools = FakeTools(
+    tools=[
+        ToolDefinition(name="add", description="adds two numbers", input_schema={}),
+    ],
+    handlers={"add": lambda args: {"sum": args["a"] + args["b"]}},
+)
+ctx = make_test_context(tools=fake_tools)
+```
+
+## Custom protocol implementations
+
+You don't have to use the `Fake*` classes. The capability fields are typed as
+**protocols** (`LlmProtocol`, `HttpProtocol`, `ToolsProtocol`, `StreamProtocol`),
+so any object with the right method shape is accepted:
+
+```python
+class RecordingLlm:
+    def __init__(self):
+        self.seen: list[str] = []
+
+    def generate(self, messages, **kwargs):
+        self.seen.append(messages[-1]["content"])
+        return LlmResponse(text="ok", object=None, model="x", usage={}, finish_reason="stop")
+
+    def generate_object(self, messages, schema, **kwargs):
+        return LlmResponse(text=None, object={}, model="x", usage={}, finish_reason="stop")
+
+
+ctx = make_test_context(llm=RecordingLlm())
+```
+
+This is the same mechanism a custom production gateway would use — the
+protocols are the contract, the `Fake*` classes are convenience.
+
+## What goes in `make_test_context()`
+
+| Argument        | Type                | Default               |
+| --------------- | ------------------- | --------------------- |
+| `env`           | `dict[str, str]`    | `{}`                  |
+| `config`        | `dict`              | `{}`                  |
+| `skills`        | `list[Skill...]`    | `[]`                  |
+| `session`       | `SessionData?`      | `None`                |
+| `output_schema` | `dict?`             | `None`                |
+| `llm`           | `LlmProtocol?`      | `FakeLlm()`           |
+| `http`          | `HttpProtocol?`     | `FakeHttp()`          |
+| `tools`         | `ToolsProtocol?`    | `FakeTools()`         |
+| `stream`        | `StreamProtocol?`   | `FakeStream()`        |
diff --git a/packages/python/docs/reference/agent-context.md b/packages/python/docs/reference/agent-context.md
index aab1923..4f49cd7 100644
--- a/packages/python/docs/reference/agent-context.md
+++ b/packages/python/docs/reference/agent-context.md
@@ -11,12 +11,22 @@ class AgentContext:
     config: dict = field(default_factory=dict)
     session: SessionData | None = None
     output_schema: dict | None = None
-    tools: Tools = field(default_factory=_uninitialized_tools)
-    llm: Llm = field(default_factory=_uninitialized_llm)
-    http: Http = field(default_factory=_uninitialized_http)
-    stream: StreamEmitter = field(default_factory=_uninitialized_stream)
+    tools: ToolsProtocol = field(default_factory=_uninitialized_tools)
+    llm: LlmProtocol = field(default_factory=_uninitialized_llm)
+    http: HttpProtocol = field(default_factory=_uninitialized_http)
+    stream: StreamProtocol = field(default_factory=_uninitialized_stream)
 ```
 
+The capability fields are typed as **structural protocols** so test doubles
+or custom gateways can be substituted without subclassing. The default
+factories return the production NATS-backed `Tools` / `Llm` / `Http` /
+`StreamEmitter` classes, which all satisfy the protocols natively.
+
+For unit tests, use
+[`make_test_context()`](../how-to/unit-test-agents.md) — it wires up
+`FakeTools`, `FakeLlm`, `FakeHttp`, and `FakeStream` so handlers can run
+without a Friday daemon.
+
 ## Fields
 
 ### `env`

From 5242a99de75d8685fd37fb8628fe9b9c87407cd7 Mon Sep 17 00:00:00 2001
From: Lukasz Jagiello <jagiello.lukasz@gmail.com>
Date: Thu, 30 Apr 2026 20:02:44 -0700
Subject: [PATCH 3/3] style: vp fmt unit-test-agents docs

---
 .../python/docs/how-to/unit-test-agents.md    | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/packages/python/docs/how-to/unit-test-agents.md b/packages/python/docs/how-to/unit-test-agents.md
index 7b795ed..16ed72e 100644
--- a/packages/python/docs/how-to/unit-test-agents.md
+++ b/packages/python/docs/how-to/unit-test-agents.md
@@ -149,14 +149,14 @@ protocols are the contract, the `Fake*` classes are convenience.
 
 ## What goes in `make_test_context()`
 
-| Argument        | Type                | Default               |
-| --------------- | ------------------- | --------------------- |
-| `env`           | `dict[str, str]`    | `{}`                  |
-| `config`        | `dict`              | `{}`                  |
-| `skills`        | `list[Skill...]`    | `[]`                  |
-| `session`       | `SessionData?`      | `None`                |
-| `output_schema` | `dict?`             | `None`                |
-| `llm`           | `LlmProtocol?`      | `FakeLlm()`           |
-| `http`          | `HttpProtocol?`     | `FakeHttp()`          |
-| `tools`         | `ToolsProtocol?`    | `FakeTools()`         |
-| `stream`        | `StreamProtocol?`   | `FakeStream()`        |
+| Argument        | Type              | Default        |
+| --------------- | ----------------- | -------------- |
+| `env`           | `dict[str, str]`  | `{}`           |
+| `config`        | `dict`            | `{}`           |
+| `skills`        | `list[Skill...]`  | `[]`           |
+| `session`       | `SessionData?`    | `None`         |
+| `output_schema` | `dict?`           | `None`         |
+| `llm`           | `LlmProtocol?`    | `FakeLlm()`    |
+| `http`          | `HttpProtocol?`   | `FakeHttp()`   |
+| `tools`         | `ToolsProtocol?`  | `FakeTools()`  |
+| `stream`        | `StreamProtocol?` | `FakeStream()` |