From 9e05024140463dbdb633a7feddad13cbedce8669 Mon Sep 17 00:00:00 2001 From: Lukasz Jagiello Date: Thu, 30 Apr 2026 19:36:22 -0700 Subject: [PATCH 1/3] feat(python): add capability protocols and friday_agent_sdk.testing Make agent handlers cheaply unit-testable. The `AgentContext` capability fields (`ctx.llm`, `ctx.http`, `ctx.tools`, `ctx.stream`) are now typed as structural protocols (`LlmProtocol`, `HttpProtocol`, `ToolsProtocol`, `StreamProtocol`) so any object with the right method shape can stand in for the production NATS-backed wrappers. A new public submodule `friday_agent_sdk.testing` ships ready-made fakes (`FakeLlm`, `FakeHttp`, `FakeTools`, `FakeStream`) plus a `make_test_context()` constructor that wires them up with sensible defaults. `make_test_context` is also re-exported at the top level. from friday_agent_sdk import make_test_context from friday_agent_sdk.testing import FakeLlm ctx = make_test_context(llm=FakeLlm(responses=[...])) result = my_handler("hi", ctx) assert ctx.stream.events == [...] # FakeStream records every emit The concrete `Llm`/`Http`/`Tools`/`StreamEmitter` classes structurally implement the new protocols, so `build_context()` and the bridge are unchanged. Existing test that pokes at the private `Llm._config` now narrows via `isinstance` first to satisfy the protocol field type. Wheel smoke test in CI imports the new symbols and runs the protocol isinstance checks against `make_test_context()` defaults. --- .github/workflows/ci.yml | 15 +- CHANGELOG.md | 5 + packages/python/friday_agent_sdk/__init__.py | 10 + packages/python/friday_agent_sdk/_types.py | 89 +++++- packages/python/friday_agent_sdk/testing.py | 296 +++++++++++++++++++ packages/python/tests/test_context.py | 5 +- packages/python/tests/test_testing.py | 232 +++++++++++++++ 7 files changed, 640 insertions(+), 12 deletions(-) create mode 100644 packages/python/friday_agent_sdk/testing.py create mode 100644 packages/python/tests/test_testing.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1e226f..40165ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,12 +87,19 @@ jobs: agent, run, ok, err, AgentContext, AgentResult, AgentExtras, ArtifactRef, OutlineRef, OkResult, ErrResult, - Http, HttpError, HttpResponse, - Llm, LlmError, LlmResponse, - SessionData, SkillDefinition, StreamEmitter, - ToolCallError, ToolDefinition, Tools, + Http, HttpError, HttpProtocol, HttpResponse, + Llm, LlmError, LlmProtocol, LlmResponse, + SessionData, SkillDefinition, StreamEmitter, StreamProtocol, + ToolCallError, ToolDefinition, Tools, ToolsProtocol, + make_test_context, parse_input, parse_operation, ) + from friday_agent_sdk.testing import FakeLlm, FakeHttp, FakeTools, FakeStream assert __version__, "friday_agent_sdk.__version__ should be set" + ctx = make_test_context() + assert isinstance(ctx.llm, LlmProtocol) + assert isinstance(ctx.http, HttpProtocol) + assert isinstance(ctx.tools, ToolsProtocol) + assert isinstance(ctx.stream, StreamProtocol) print(f"OK: friday-agent-sdk {__version__} installs and imports cleanly") PY diff --git a/CHANGELOG.md b/CHANGELOG.md index 29ca81b..5ddf3c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Capability protocols `LlmProtocol`, `HttpProtocol`, `ToolsProtocol`, and `StreamProtocol` (re-exported from `friday_agent_sdk`). The `AgentContext` capability fields are now typed as protocols so agents can be unit-tested by substituting any object that satisfies the protocol. +- New public module `friday_agent_sdk.testing` with `make_test_context()` plus `FakeLlm`, `FakeHttp`, `FakeTools`, and `FakeStream` helpers. `make_test_context()` is also re-exported at the top level for convenience. + ## [0.1.0] - 2026-04-30 Initial public release. **Alpha — APIs may change.** diff --git a/packages/python/friday_agent_sdk/__init__.py b/packages/python/friday_agent_sdk/__init__.py index 260d492..8f8d67d 100644 --- a/packages/python/friday_agent_sdk/__init__.py +++ b/packages/python/friday_agent_sdk/__init__.py @@ -19,17 +19,22 @@ AgentContext, Http, HttpError, + HttpProtocol, HttpResponse, Llm, LlmError, + LlmProtocol, LlmResponse, SessionData, SkillDefinition, StreamEmitter, + StreamProtocol, ToolCallError, ToolDefinition, Tools, + ToolsProtocol, ) +from friday_agent_sdk.testing import make_test_context __all__ = [ "AgentContext", @@ -39,21 +44,26 @@ "ErrResult", "Http", "HttpError", + "HttpProtocol", "HttpResponse", "Llm", "LlmError", + "LlmProtocol", "LlmResponse", "OkResult", "OutlineRef", "SessionData", "SkillDefinition", "StreamEmitter", + "StreamProtocol", "ToolCallError", "ToolDefinition", "Tools", + "ToolsProtocol", "__version__", "agent", "err", + "make_test_context", "ok", "parse_input", "parse_operation", diff --git a/packages/python/friday_agent_sdk/_types.py b/packages/python/friday_agent_sdk/_types.py index 7793312..84afee8 100644 --- a/packages/python/friday_agent_sdk/_types.py +++ b/packages/python/friday_agent_sdk/_types.py @@ -3,7 +3,7 @@ import json from collections.abc import Callable from dataclasses import dataclass, field -from typing import Any +from typing import Any, Protocol, runtime_checkable class ToolCallError(Exception): @@ -244,6 +244,77 @@ class SessionData: datetime: str +# --------------------------------------------------------------------------- +# Capability protocols — structural types so users can substitute test doubles +# (or their own gateways) for ctx.llm / ctx.http / ctx.tools / ctx.stream. +# +# Concrete implementations: ``Llm``/``Http``/``Tools``/``StreamEmitter`` (the +# NATS-backed wrappers above) and the ``Fake*`` classes in +# ``friday_agent_sdk.testing``. +# --------------------------------------------------------------------------- + + +@runtime_checkable +class ToolsProtocol(Protocol): + """Structural type for the tools capability (`ctx.tools`).""" + + def call(self, name: str, args: dict) -> dict: ... + + def list(self) -> list[ToolDefinition]: ... + + +@runtime_checkable +class LlmProtocol(Protocol): + """Structural type for the LLM capability (`ctx.llm`).""" + + def generate( + self, + messages: list[dict[str, str]], + *, + model: str | None = None, + max_tokens: int | None = None, + temperature: float | None = None, + provider_options: dict | None = None, + ) -> LlmResponse: ... + + def generate_object( + self, + messages: list[dict[str, str]], + schema: dict, + *, + model: str | None = None, + max_tokens: int | None = None, + temperature: float | None = None, + provider_options: dict | None = None, + ) -> LlmResponse: ... + + +@runtime_checkable +class HttpProtocol(Protocol): + """Structural type for the HTTP capability (`ctx.http`).""" + + def fetch( + self, + url: str, + *, + method: str = "GET", + headers: dict[str, str] | None = None, + body: str | None = None, + timeout_ms: int | None = None, + ) -> HttpResponse: ... + + +@runtime_checkable +class StreamProtocol(Protocol): + """Structural type for the stream-emitter capability (`ctx.stream`).""" + + def emit(self, event_type: str, data: dict | str) -> None: ... + + def progress(self, content: str, *, tool_name: str | None = None) -> None: ... + + def intent(self, content: str) -> None: ... + + def _uninitialized_llm(): """Factory for uninitialized LLM stub.""" @@ -298,8 +369,12 @@ class SkillDefinition: class AgentContext: """Execution context passed to agent handlers. - Capability fields (llm, tools, http, stream) are always non-None. - Defaults are safe stubs that raise if called outside the host environment. + Capability fields (`llm`, `tools`, `http`, `stream`) are typed as + structural protocols so they can be substituted for tests or custom + gateways. The default factories return the production NATS-backed + classes pre-wired to raise a clear error if called outside the host + environment — for unit tests prefer + `friday_agent_sdk.testing.make_test_context()`. """ env: dict[str, str] = field(default_factory=dict) @@ -307,7 +382,7 @@ class AgentContext: skills: list[SkillDefinition] = field(default_factory=list) session: SessionData | None = None output_schema: dict | None = None - tools: Tools = field(default_factory=_uninitialized_tools) - llm: Llm = field(default_factory=_uninitialized_llm) - http: Http = field(default_factory=_uninitialized_http) - stream: StreamEmitter = field(default_factory=_uninitialized_stream) + tools: ToolsProtocol = field(default_factory=_uninitialized_tools) + llm: LlmProtocol = field(default_factory=_uninitialized_llm) + http: HttpProtocol = field(default_factory=_uninitialized_http) + stream: StreamProtocol = field(default_factory=_uninitialized_stream) diff --git a/packages/python/friday_agent_sdk/testing.py b/packages/python/friday_agent_sdk/testing.py new file mode 100644 index 0000000..d1e0ce0 --- /dev/null +++ b/packages/python/friday_agent_sdk/testing.py @@ -0,0 +1,296 @@ +"""Test helpers for unit-testing Friday agents. + +The `AgentContext` capability fields (`ctx.llm`, `ctx.http`, `ctx.tools`, +`ctx.stream`) are typed as protocols so any object implementing the right +methods can be substituted in tests. This module provides ready-made fakes +plus a `make_test_context()` constructor that wires sensible defaults. + +Example: + + from friday_agent_sdk import LlmResponse + from friday_agent_sdk.testing import FakeLlm, make_test_context + + fake_llm = FakeLlm( + responses=[ + LlmResponse( + text="hello there", + object=None, + model="fake", + usage={}, + finish_reason="stop", + ) + ] + ) + ctx = make_test_context(env={"API_KEY": "x"}, llm=fake_llm) + + result = my_agent_handler("hi", ctx) + assert fake_llm.calls[0]["messages"] == [{"role": "user", "content": "hi"}] +""" + +from collections.abc import Callable +from typing import Any + +from friday_agent_sdk._types import ( + AgentContext, + HttpProtocol, + HttpResponse, + LlmProtocol, + LlmResponse, + SessionData, + SkillDefinition, + StreamProtocol, + ToolCallError, + ToolDefinition, + ToolsProtocol, +) + +__all__ = [ + "FakeHttp", + "FakeLlm", + "FakeStream", + "FakeTools", + "make_test_context", +] + + +def _empty_llm_response() -> LlmResponse: + return LlmResponse( + text="", + object=None, + model="fake", + usage={}, + finish_reason="stop", + ) + + +def _empty_http_response() -> HttpResponse: + return HttpResponse(status=200, headers={}, body="") + + +class FakeLlm: + """Test double for `LlmProtocol`. + + Default behaviour returns an empty success `LlmResponse` for every call. + Override by passing one of: + + - `responses=[LlmResponse(...), ...]` — FIFO queue of canned responses. + Falls back to the empty default once exhausted. + - `on_generate=lambda **kwargs: LlmResponse(...)` — custom callable + invoked for every `generate` and `generate_object` call. + + All calls are appended to `self.calls` for assertion. + """ + + def __init__( + self, + responses: list[LlmResponse] | None = None, + *, + on_generate: Callable[..., LlmResponse] | None = None, + ) -> None: + self._responses: list[LlmResponse] = list(responses or []) + self._on_generate = on_generate + self.calls: list[dict[str, Any]] = [] + + def _next(self, **kwargs: Any) -> LlmResponse: + if self._on_generate is not None: + return self._on_generate(**kwargs) + if self._responses: + return self._responses.pop(0) + return _empty_llm_response() + + def generate( + self, + messages: list[dict[str, str]], + *, + model: str | None = None, + max_tokens: int | None = None, + temperature: float | None = None, + provider_options: dict | None = None, + ) -> LlmResponse: + self.calls.append( + { + "method": "generate", + "messages": messages, + "model": model, + "max_tokens": max_tokens, + "temperature": temperature, + "provider_options": provider_options, + } + ) + return self._next( + messages=messages, + model=model, + max_tokens=max_tokens, + temperature=temperature, + provider_options=provider_options, + ) + + def generate_object( + self, + messages: list[dict[str, str]], + schema: dict, + *, + model: str | None = None, + max_tokens: int | None = None, + temperature: float | None = None, + provider_options: dict | None = None, + ) -> LlmResponse: + self.calls.append( + { + "method": "generate_object", + "messages": messages, + "schema": schema, + "model": model, + "max_tokens": max_tokens, + "temperature": temperature, + "provider_options": provider_options, + } + ) + return self._next( + messages=messages, + schema=schema, + model=model, + max_tokens=max_tokens, + temperature=temperature, + provider_options=provider_options, + ) + + +class FakeHttp: + """Test double for `HttpProtocol`. + + Default behaviour returns `HttpResponse(status=200, headers={}, body="")` + for every call. Override with: + + - `responses=[HttpResponse(...), ...]` — FIFO queue. + - `on_fetch=lambda url, **kwargs: HttpResponse(...)` — custom callable. + + All calls are appended to `self.calls`. + """ + + def __init__( + self, + responses: list[HttpResponse] | None = None, + *, + on_fetch: Callable[..., HttpResponse] | None = None, + ) -> None: + self._responses: list[HttpResponse] = list(responses or []) + self._on_fetch = on_fetch + self.calls: list[dict[str, Any]] = [] + + def fetch( + self, + url: str, + *, + method: str = "GET", + headers: dict[str, str] | None = None, + body: str | None = None, + timeout_ms: int | None = None, + ) -> HttpResponse: + self.calls.append( + { + "url": url, + "method": method, + "headers": headers, + "body": body, + "timeout_ms": timeout_ms, + } + ) + if self._on_fetch is not None: + return self._on_fetch( + url, + method=method, + headers=headers, + body=body, + timeout_ms=timeout_ms, + ) + if self._responses: + return self._responses.pop(0) + return _empty_http_response() + + +class FakeTools: + """Test double for `ToolsProtocol`. + + `list()` returns the `tools` argument verbatim. `call(name, args)` + dispatches via `handlers[name]`; if no handler is registered the call + raises `ToolCallError` with a clear message — surfacing missing test + setup loudly is preferred over silent empty returns. + + All calls are appended to `self.calls` as `(name, args)` tuples. + """ + + def __init__( + self, + *, + tools: list[ToolDefinition] | None = None, + handlers: dict[str, Callable[[dict], dict]] | None = None, + ) -> None: + self._tools: list[ToolDefinition] = list(tools or []) + self._handlers: dict[str, Callable[[dict], dict]] = dict(handlers or {}) + self.calls: list[tuple[str, dict]] = [] + + def call(self, name: str, args: dict) -> dict: + self.calls.append((name, args)) + handler = self._handlers.get(name) + if handler is None: + raise ToolCallError( + f"FakeTools: no handler registered for tool {name!r}. Pass handlers={{...}} when constructing FakeTools()." + ) + return handler(args) + + def list(self) -> list[ToolDefinition]: + return list(self._tools) + + +class FakeStream: + """Test double for `StreamProtocol`. Records every emitted event into + `self.events` as `(event_type, data)` tuples. Never raises. + """ + + def __init__(self) -> None: + self.events: list[tuple[str, dict | str]] = [] + + def emit(self, event_type: str, data: dict | str) -> None: + self.events.append((event_type, data)) + + def progress(self, content: str, *, tool_name: str | None = None) -> None: + self.emit( + "data-tool-progress", + {"toolName": tool_name or "agent", "content": content}, + ) + + def intent(self, content: str) -> None: + self.emit("data-intent", {"content": content}) + + +def make_test_context( + *, + env: dict[str, str] | None = None, + config: dict | None = None, + skills: list[SkillDefinition] | None = None, + session: SessionData | None = None, + output_schema: dict | None = None, + llm: LlmProtocol | None = None, + http: HttpProtocol | None = None, + tools: ToolsProtocol | None = None, + stream: StreamProtocol | None = None, +) -> AgentContext: + """Construct an `AgentContext` for unit-testing agent handlers. + + Every capability you don't override gets a default `Fake*` instance from + this module — no NATS, no daemon required. Pass your own protocol + implementation (or a pre-configured `Fake*`) to control behaviour for + a specific capability. + """ + return AgentContext( + env=env if env is not None else {}, + config=config if config is not None else {}, + skills=skills if skills is not None else [], + session=session, + output_schema=output_schema, + llm=llm if llm is not None else FakeLlm(), + http=http if http is not None else FakeHttp(), + tools=tools if tools is not None else FakeTools(), + stream=stream if stream is not None else FakeStream(), + ) diff --git a/packages/python/tests/test_context.py b/packages/python/tests/test_context.py index a41cd13..8ec41e3 100644 --- a/packages/python/tests/test_context.py +++ b/packages/python/tests/test_context.py @@ -3,7 +3,7 @@ from unittest.mock import MagicMock from friday_agent_sdk._context import build_context -from friday_agent_sdk._types import AgentContext, SessionData +from friday_agent_sdk._types import AgentContext, Llm, SessionData def _build(raw: dict) -> AgentContext: @@ -85,4 +85,7 @@ def test_llm_config_read_from_raw(self): """llm_config key in raw dict is stored for LLM builder.""" raw = {"llm_config": {"model": "anthropic:claude-haiku-4-5"}} ctx = _build(raw) + # ctx.llm is typed as LlmProtocol; narrow to the concrete Llm to + # reach the internal _config attribute. + assert isinstance(ctx.llm, Llm) assert ctx.llm._config == {"model": "anthropic:claude-haiku-4-5"} diff --git a/packages/python/tests/test_testing.py b/packages/python/tests/test_testing.py new file mode 100644 index 0000000..be051ba --- /dev/null +++ b/packages/python/tests/test_testing.py @@ -0,0 +1,232 @@ +"""Tests for the public friday_agent_sdk.testing module.""" + +import pytest + +from friday_agent_sdk import ( + AgentContext, + HttpProtocol, + HttpResponse, + LlmProtocol, + LlmResponse, + SessionData, + StreamProtocol, + ToolCallError, + ToolDefinition, + ToolsProtocol, + make_test_context, +) +from friday_agent_sdk.testing import FakeHttp, FakeLlm, FakeStream, FakeTools + + +def _llm_response(text: str = "ok") -> LlmResponse: + return LlmResponse( + text=text, + object=None, + model="fake", + usage={}, + finish_reason="stop", + ) + + +class TestMakeTestContext: + def test_zero_arg_returns_agent_context(self): + ctx = make_test_context() + assert isinstance(ctx, AgentContext) + + def test_zero_arg_capabilities_satisfy_protocols(self): + ctx = make_test_context() + assert isinstance(ctx.llm, LlmProtocol) + assert isinstance(ctx.http, HttpProtocol) + assert isinstance(ctx.tools, ToolsProtocol) + assert isinstance(ctx.stream, StreamProtocol) + + def test_zero_arg_capabilities_are_fakes(self): + ctx = make_test_context() + assert isinstance(ctx.llm, FakeLlm) + assert isinstance(ctx.http, FakeHttp) + assert isinstance(ctx.tools, FakeTools) + assert isinstance(ctx.stream, FakeStream) + + def test_passes_through_simple_fields(self): + session = SessionData( + id="s1", + workspace_id="w1", + user_id="u1", + datetime="2026-01-01T00:00:00Z", + ) + ctx = make_test_context( + env={"FOO": "bar"}, + config={"k": 1}, + session=session, + output_schema={"type": "object"}, + ) + assert ctx.env == {"FOO": "bar"} + assert ctx.config == {"k": 1} + assert ctx.session is session + assert ctx.output_schema == {"type": "object"} + + def test_overrides_individual_capabilities(self): + my_llm = FakeLlm() + my_http = FakeHttp() + my_tools = FakeTools() + my_stream = FakeStream() + ctx = make_test_context( + llm=my_llm, + http=my_http, + tools=my_tools, + stream=my_stream, + ) + assert ctx.llm is my_llm + assert ctx.http is my_http + assert ctx.tools is my_tools + assert ctx.stream is my_stream + + def test_accepts_arbitrary_protocol_implementations(self): + """Any object with the right method shape satisfies the field type. + + Verifies the structural-typing claim: users don't need to subclass + the SDK's Fake* helpers, they can drop in a hand-written mock. + """ + + class HandRolledLlm: + def generate(self, messages, **kwargs): + return _llm_response("from-handrolled") + + def generate_object(self, messages, schema, **kwargs): + return _llm_response("from-handrolled") + + ctx = make_test_context(llm=HandRolledLlm()) # type: ignore[arg-type] + assert ctx.llm.generate(messages=[{"role": "user", "content": "x"}]).text == "from-handrolled" + + +class TestFakeLlm: + def test_default_returns_empty_response(self): + llm = FakeLlm() + result = llm.generate(messages=[{"role": "user", "content": "hi"}]) + assert result.text == "" + assert result.model == "fake" + assert result.finish_reason == "stop" + + def test_records_calls(self): + llm = FakeLlm() + llm.generate(messages=[{"role": "user", "content": "hi"}], model="m1") + llm.generate_object( + messages=[{"role": "user", "content": "hi"}], + schema={"type": "object"}, + ) + assert len(llm.calls) == 2 + assert llm.calls[0]["method"] == "generate" + assert llm.calls[0]["model"] == "m1" + assert llm.calls[1]["method"] == "generate_object" + assert llm.calls[1]["schema"] == {"type": "object"} + + def test_canned_response_queue_is_fifo(self): + llm = FakeLlm(responses=[_llm_response("a"), _llm_response("b")]) + assert llm.generate(messages=[]).text == "a" + assert llm.generate(messages=[]).text == "b" + + def test_falls_back_to_empty_when_queue_exhausted(self): + llm = FakeLlm(responses=[_llm_response("only")]) + assert llm.generate(messages=[]).text == "only" + assert llm.generate(messages=[]).text == "" + + def test_on_generate_callable_takes_precedence(self): + captured: list = [] + + def handler(**kwargs): + captured.append(kwargs) + return _llm_response("dynamic") + + llm = FakeLlm(responses=[_llm_response("queued")], on_generate=handler) + result = llm.generate(messages=[{"role": "user", "content": "hi"}], model="m1") + assert result.text == "dynamic" + assert len(captured) == 1 + assert captured[0]["model"] == "m1" + + +class TestFakeHttp: + def test_default_returns_200_empty(self): + http = FakeHttp() + result = http.fetch("https://example.com") + assert result.status == 200 + assert result.body == "" + + def test_records_calls(self): + http = FakeHttp() + http.fetch( + "https://example.com/api", + method="POST", + headers={"X": "1"}, + body="payload", + ) + assert len(http.calls) == 1 + assert http.calls[0]["url"] == "https://example.com/api" + assert http.calls[0]["method"] == "POST" + assert http.calls[0]["headers"] == {"X": "1"} + assert http.calls[0]["body"] == "payload" + + def test_canned_responses_fifo(self): + http = FakeHttp( + responses=[ + HttpResponse(status=201, headers={}, body="a"), + HttpResponse(status=404, headers={}, body="b"), + ] + ) + assert http.fetch("https://example.com").status == 201 + assert http.fetch("https://example.com").status == 404 + # Queue exhausted → empty default + assert http.fetch("https://example.com").status == 200 + + def test_on_fetch_callable(self): + def handler(url, **kwargs): + return HttpResponse(status=418, headers={}, body=url) + + http = FakeHttp(on_fetch=handler) + result = http.fetch("https://teapot") + assert result.status == 418 + assert result.body == "https://teapot" + + +class TestFakeTools: + def test_list_returns_provided_tools(self): + tool = ToolDefinition(name="echo", description="d", input_schema={}) + tools = FakeTools(tools=[tool]) + assert tools.list() == [tool] + + def test_call_dispatches_to_handler(self): + tools = FakeTools(handlers={"add": lambda args: {"sum": args["a"] + args["b"]}}) + result = tools.call("add", {"a": 1, "b": 2}) + assert result == {"sum": 3} + + def test_call_records(self): + tools = FakeTools(handlers={"echo": lambda args: args}) + tools.call("echo", {"x": 1}) + tools.call("echo", {"y": 2}) + assert tools.calls == [("echo", {"x": 1}), ("echo", {"y": 2})] + + def test_unhandled_tool_raises(self): + tools = FakeTools() + with pytest.raises(ToolCallError, match="no handler registered"): + tools.call("missing", {}) + + +class TestFakeStream: + def test_emit_records_event(self): + stream = FakeStream() + stream.emit("custom-event", {"k": "v"}) + assert stream.events == [("custom-event", {"k": "v"})] + + def test_progress_records_canonical_event(self): + stream = FakeStream() + stream.progress("doing thing", tool_name="my-tool") + assert stream.events == [("data-tool-progress", {"toolName": "my-tool", "content": "doing thing"})] + + def test_progress_defaults_tool_name_to_agent(self): + stream = FakeStream() + stream.progress("step") + assert stream.events[0][1] == {"toolName": "agent", "content": "step"} + + def test_intent_records_canonical_event(self): + stream = FakeStream() + stream.intent("planning to clone repo") + assert stream.events == [("data-intent", {"content": "planning to clone repo"})] From 4b72626db0e8f7e9b021f2003500a67d4fde8950 Mon Sep 17 00:00:00 2001 From: Lukasz Jagiello Date: Thu, 30 Apr 2026 19:45:50 -0700 Subject: [PATCH 2/3] docs: add how-to guide and AgentContext reference for testing protocols --- packages/python/docs/README.md | 1 + .../python/docs/how-to/unit-test-agents.md | 162 ++++++++++++++++++ .../python/docs/reference/agent-context.md | 18 +- 3 files changed, 177 insertions(+), 4 deletions(-) create mode 100644 packages/python/docs/how-to/unit-test-agents.md diff --git a/packages/python/docs/README.md b/packages/python/docs/README.md index c1e8fac..bcd90b6 100644 --- a/packages/python/docs/README.md +++ b/packages/python/docs/README.md @@ -21,6 +21,7 @@ Task-focused recipes for common patterns: | [Use MCP tools](how-to/use-mcp-tools.md) | Invoke Model Context Protocol servers | | [Handle structured input](how-to/handle-structured-input.md) | Extract JSON from Friday's enriched prompts | | [Stream progress](how-to/stream-progress.md) | Emit real-time updates to the UI | +| [Unit-test agents](how-to/unit-test-agents.md) | Test agent handlers with `make_test_context()` and fakes | ## Reference diff --git a/packages/python/docs/how-to/unit-test-agents.md b/packages/python/docs/how-to/unit-test-agents.md new file mode 100644 index 0000000..7b795ed --- /dev/null +++ b/packages/python/docs/how-to/unit-test-agents.md @@ -0,0 +1,162 @@ +# Unit-test agents + +Friday agents are plain Python functions that take a prompt and an +`AgentContext`. To test them in isolation you need an `AgentContext` whose +capability fields don't actually talk to the daemon — that's what +`make_test_context()` is for. + +## The 30-second version + +```python +from friday_agent_sdk import make_test_context + +from my_agent import execute # the @agent-decorated handler + + +def test_echoes_prompt(): + ctx = make_test_context() + result = execute("hello", ctx) + assert result.data == "hello" +``` + +`make_test_context()` returns an `AgentContext` with `Fake*` instances for +`ctx.llm`, `ctx.http`, `ctx.tools`, and `ctx.stream`. Each fake records every +call and returns a permissive default — no daemon, no NATS, no API keys. + +## Asserting on emitted progress + +`FakeStream` records every event into `ctx.stream.events`: + +```python +from friday_agent_sdk import make_test_context + + +def test_emits_progress(): + ctx = make_test_context() + + execute("hi", ctx) + + assert ctx.stream.events == [ + ("data-tool-progress", {"toolName": "agent", "content": "Starting"}), + ("data-tool-progress", {"toolName": "agent", "content": "Done"}), + ] +``` + +## Stubbing LLM responses + +Pass canned responses (FIFO queue) or a callable: + +```python +from friday_agent_sdk import LlmResponse, make_test_context +from friday_agent_sdk.testing import FakeLlm + + +def test_uses_llm_output(): + fake_llm = FakeLlm( + responses=[ + LlmResponse( + text="42", + object=None, + model="fake", + usage={}, + finish_reason="stop", + ) + ] + ) + ctx = make_test_context(llm=fake_llm) + + result = execute("what is 6 * 7?", ctx) + + assert result.data == "42" + assert fake_llm.calls[0]["messages"][-1]["content"] == "what is 6 * 7?" +``` + +For dynamic responses, use `on_generate`: + +```python +fake_llm = FakeLlm( + on_generate=lambda messages, **kwargs: LlmResponse( + text=f"echo: {messages[-1]['content']}", + object=None, + model="fake", + usage={}, + finish_reason="stop", + ) +) +``` + +## Stubbing HTTP responses + +```python +from friday_agent_sdk import HttpResponse, make_test_context +from friday_agent_sdk.testing import FakeHttp + + +fake_http = FakeHttp( + responses=[ + HttpResponse(status=200, headers={}, body='{"ok": true}'), + HttpResponse(status=429, headers={"Retry-After": "10"}, body=""), + ] +) +ctx = make_test_context(http=fake_http) +``` + +`FakeHttp` also accepts an `on_fetch=callable` for URL-aware logic. + +## Stubbing tool calls + +`FakeTools` requires you to register handlers explicitly — unhandled calls +raise `ToolCallError` rather than silently returning empty results: + +```python +from friday_agent_sdk import ToolDefinition, make_test_context +from friday_agent_sdk.testing import FakeTools + + +fake_tools = FakeTools( + tools=[ + ToolDefinition(name="add", description="adds two numbers", input_schema={}), + ], + handlers={"add": lambda args: {"sum": args["a"] + args["b"]}}, +) +ctx = make_test_context(tools=fake_tools) +``` + +## Custom protocol implementations + +You don't have to use the `Fake*` classes. The capability fields are typed as +**protocols** (`LlmProtocol`, `HttpProtocol`, `ToolsProtocol`, `StreamProtocol`), +so any object with the right method shape is accepted: + +```python +class RecordingLlm: + def __init__(self): + self.seen: list[str] = [] + + def generate(self, messages, **kwargs): + self.seen.append(messages[-1]["content"]) + return LlmResponse(text="ok", object=None, model="x", usage={}, finish_reason="stop") + + def generate_object(self, messages, schema, **kwargs): + return LlmResponse(text=None, object={}, model="x", usage={}, finish_reason="stop") + + +ctx = make_test_context(llm=RecordingLlm()) +``` + +This is the same mechanism a custom production gateway would use — the +protocols are the contract, the `Fake*` classes are convenience. + +## What goes in `make_test_context()` + +| Argument | Type | Default | +| --------------- | ------------------- | --------------------- | +| `env` | `dict[str, str]` | `{}` | +| `config` | `dict` | `{}` | +| `skills` | `list[Skill...]` | `[]` | +| `session` | `SessionData?` | `None` | +| `output_schema` | `dict?` | `None` | +| `llm` | `LlmProtocol?` | `FakeLlm()` | +| `http` | `HttpProtocol?` | `FakeHttp()` | +| `tools` | `ToolsProtocol?` | `FakeTools()` | +| `stream` | `StreamProtocol?` | `FakeStream()` | diff --git a/packages/python/docs/reference/agent-context.md b/packages/python/docs/reference/agent-context.md index aab1923..4f49cd7 100644 --- a/packages/python/docs/reference/agent-context.md +++ b/packages/python/docs/reference/agent-context.md @@ -11,12 +11,22 @@ class AgentContext: config: dict = field(default_factory=dict) session: SessionData | None = None output_schema: dict | None = None - tools: Tools = field(default_factory=_uninitialized_tools) - llm: Llm = field(default_factory=_uninitialized_llm) - http: Http = field(default_factory=_uninitialized_http) - stream: StreamEmitter = field(default_factory=_uninitialized_stream) + tools: ToolsProtocol = field(default_factory=_uninitialized_tools) + llm: LlmProtocol = field(default_factory=_uninitialized_llm) + http: HttpProtocol = field(default_factory=_uninitialized_http) + stream: StreamProtocol = field(default_factory=_uninitialized_stream) ``` +The capability fields are typed as **structural protocols** so test doubles +or custom gateways can be substituted without subclassing. The default +factories return the production NATS-backed `Tools` / `Llm` / `Http` / +`StreamEmitter` classes, which all satisfy the protocols natively. + +For unit tests, use +[`make_test_context()`](../how-to/unit-test-agents.md) — it wires up +`FakeTools`, `FakeLlm`, `FakeHttp`, and `FakeStream` so handlers can run +without a Friday daemon. + ## Fields ### `env` From 5242a99de75d8685fd37fb8628fe9b9c87407cd7 Mon Sep 17 00:00:00 2001 From: Lukasz Jagiello Date: Thu, 30 Apr 2026 20:02:44 -0700 Subject: [PATCH 3/3] style: vp fmt unit-test-agents docs --- .../python/docs/how-to/unit-test-agents.md | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/python/docs/how-to/unit-test-agents.md b/packages/python/docs/how-to/unit-test-agents.md index 7b795ed..16ed72e 100644 --- a/packages/python/docs/how-to/unit-test-agents.md +++ b/packages/python/docs/how-to/unit-test-agents.md @@ -149,14 +149,14 @@ protocols are the contract, the `Fake*` classes are convenience. ## What goes in `make_test_context()` -| Argument | Type | Default | -| --------------- | ------------------- | --------------------- | -| `env` | `dict[str, str]` | `{}` | -| `config` | `dict` | `{}` | -| `skills` | `list[Skill...]` | `[]` | -| `session` | `SessionData?` | `None` | -| `output_schema` | `dict?` | `None` | -| `llm` | `LlmProtocol?` | `FakeLlm()` | -| `http` | `HttpProtocol?` | `FakeHttp()` | -| `tools` | `ToolsProtocol?` | `FakeTools()` | -| `stream` | `StreamProtocol?` | `FakeStream()` | +| Argument | Type | Default | +| --------------- | ----------------- | -------------- | +| `env` | `dict[str, str]` | `{}` | +| `config` | `dict` | `{}` | +| `skills` | `list[Skill...]` | `[]` | +| `session` | `SessionData?` | `None` | +| `output_schema` | `dict?` | `None` | +| `llm` | `LlmProtocol?` | `FakeLlm()` | +| `http` | `HttpProtocol?` | `FakeHttp()` | +| `tools` | `ToolsProtocol?` | `FakeTools()` | +| `stream` | `StreamProtocol?` | `FakeStream()` |