From c192c78673f45be2be22d05faa2170c25401f8e3 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 09:57:39 -0500
Subject: [PATCH 01/27] Fresh diff

---
 docs/source/agent.py                         |   4 +-
 effectful/handlers/llm/completions.py        | 467 ++++++++-----------
 tests/test_handlers_llm.py                   | 101 ----
 tests/test_handlers_llm_provider.py          |   8 +-
 tests/test_handlers_llm_template.py          |   4 +-
 tests/test_handlers_llm_tool_calling_book.py |   4 +-
 tests/test_handlers_llm_tool_calling_poem.py |   4 +-
 7 files changed, 199 insertions(+), 393 deletions(-)

diff --git a/docs/source/agent.py b/docs/source/agent.py
index 1ab8794b..7682429d 100644
--- a/docs/source/agent.py
+++ b/docs/source/agent.py
@@ -3,8 +3,8 @@
 from effectful.handlers.llm import Template
 from effectful.handlers.llm.completions import (
     LiteLLMProvider,
+    call_user,
     compute_response,
-    format_model_input,
 )
 from effectful.ops.semantics import fwd, handler
 from effectful.ops.syntax import defop
@@ -31,7 +31,7 @@ def wrapper(self, *args, **kwargs):
                 with handler(
                     {
                         Agent.current_agent: lambda: self,
-                        format_model_input: self._format_model_input,
+                        call_user: self._format_model_input,
                         compute_response: self._compute_response,
                     }
                 ):
diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index ad9ee207..7fb64e01 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -1,346 +1,253 @@
-import contextlib
+import collections
+import collections.abc
 import functools
 import inspect
 import string
-import traceback
-import typing
-from collections.abc import Callable, Mapping
+import textwrap
 from typing import Any
 
 import litellm
 import pydantic
 from litellm import (
-    Choices,
+    ChatCompletionMessageToolCall,
+    ChatCompletionTextObject,
+    ChatCompletionToolParam,
     Message,
-    OpenAIChatCompletionToolParam,
-    OpenAIMessageContent,
     OpenAIMessageContentListBlock,
 )
-from litellm.types.utils import ModelResponse
 
 from effectful.handlers.llm import Template, Tool
-from effectful.handlers.llm.encoding import Encodable
-from effectful.ops.semantics import fwd, handler
-from effectful.ops.syntax import ObjectInterpretation, defop, implements
-
-
-class _OpenAIPromptFormatter(string.Formatter):
-    def format_as_messages(
-        self, format_str: str, /, *args, **kwargs
-    ) -> list[OpenAIMessageContentListBlock]:
-        prompt_parts: list[OpenAIMessageContentListBlock] = []
-        current_text = ""
-
-        def push_current_text():
-            nonlocal current_text
-            if current_text:
-                prompt_parts.append({"type": "text", "text": current_text})
-            current_text = ""
-
-        for literal, field_name, format_spec, conversion in self.parse(format_str):
-            current_text += literal
-
-            if field_name is not None:
-                obj, _ = self.get_field(field_name, args, kwargs)
-                part = self.convert_field(obj, conversion)
-                # special casing for text
-                if (
-                    isinstance(part, list)
-                    and len(part) == 1
-                    and part[0]["type"] == "text"
-                ):
-                    current_text += self.format_field(
-                        part[0]["text"], format_spec if format_spec else ""
-                    )
-                elif isinstance(part, list):
-                    push_current_text()
-                    prompt_parts.extend(part)
-                else:
-                    prompt_parts.append(part)
-
-        push_current_text()
-        return prompt_parts
-
-
-@defop
-@functools.wraps(litellm.completion)
-def completion(*args, **kwargs) -> Any:
-    """Low-level LLM request. Handlers may log/modify requests and delegate via fwd().
+from effectful.handlers.llm.encoding import Encodable, type_to_encodable_type
+from effectful.ops.semantics import fwd
+from effectful.ops.syntax import ObjectInterpretation, implements
+from effectful.ops.types import Operation
 
-    This effect is emitted for model request/response rounds so handlers can
-    observe/log requests.
 
-    """
-    return litellm.completion(*args, **kwargs)
+def _parameter_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
+    return pydantic.create_model(
+        "Params",
+        __config__={"extra": "forbid"},
+        **{
+            name: type_to_encodable_type(param.annotation).t
+            for name, param in sig.parameters.items()
+        },  # type: ignore
+    )
 
 
-def parameter_model(
-    tool: Tool, ctx: Mapping[str, Any] | None = None
-) -> type[pydantic.BaseModel]:
-    fields: dict[str, Any] = {
-        name: Encodable.define(param.annotation, ctx).enc
-        for name, param in tool.__signature__.parameters.items()
-    }
-    parameter_model = pydantic.create_model(
-        "Params",
+def _response_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
+    return pydantic.create_model(
+        "Response",
+        value=type_to_encodable_type(sig.return_annotation).t,
         __config__={"extra": "forbid"},
-        **fields,
     )
-    return parameter_model
 
 
-def function_definition(
-    tool: Tool, ctx: Mapping[str, Any] | None = None
-) -> OpenAIChatCompletionToolParam:
-    param_model = parameter_model(tool, ctx)
+def _tool_model(tool: Tool) -> ChatCompletionToolParam:
+    param_model = _parameter_model(inspect.signature(tool))
     response_format = litellm.utils.type_to_response_format_param(param_model)
-    description = tool.__default__.__doc__
     assert response_format is not None
-    assert description is not None
+    assert tool.__default__.__doc__ is not None
     return {
         "type": "function",
         "function": {
             "name": tool.__name__,
-            "description": description,
+            "description": textwrap.dedent(tool.__default__.__doc__),
             "parameters": response_format["json_schema"]["schema"],
             "strict": True,
         },
     }
 
 
-def call_with_json_args(
-    tool: Tool, context: Mapping[str, Any], json_str: str
-) -> OpenAIMessageContent:
+@Operation.define
+def call_assistant(
+    messages: collections.abc.Sequence[Message],
+    response_format: type[pydantic.BaseModel] | None,
+    tools: collections.abc.Mapping[str, ChatCompletionToolParam],
+    *,
+    model: str,
+    **kwargs,
+) -> Message:
+    """Low-level LLM request. Handlers may log/modify requests and delegate via fwd().
+
+    This effect is emitted for model request/response rounds so handlers can
+    observe/log requests.
+
+    """
+    response: litellm.types.utils.ModelResponse = litellm.completion(
+        model=model,
+        messages=list(messages),
+        response_format=response_format,
+        tools=list(tools.values()),
+        **kwargs,
+    )
+    choice = response.choices[0]
+    assert isinstance(choice, litellm.types.utils.Choices)
+    message: Message = choice.message
+    assert message.role == "assistant"
+    return message
+
+
+@Operation.define
+def call_tool(
+    tool_call: ChatCompletionMessageToolCall,
+    tools: collections.abc.Mapping[str, Tool],
+) -> Message:
     """Implements a roundtrip call to a python function. Input is a json
     string representing an LLM tool call request parameters. The output is
     the serialised response to the model.
 
     """
-    sig = tool.__signature__
-    param_model = parameter_model(tool, context)
-    try:
-        # build dict of raw encodable types U
-        raw_args = param_model.model_validate_json(json_str)
-
-        # use encoders to decode Us to python types T
-        params: dict[str, Any] = {
-            param_name: Encodable.define(
-                sig.parameters[param_name].annotation, context
+    assert tool_call.function.name is not None
+    tool = tools[tool_call.function.name]
+    json_str = tool_call.function.arguments
+
+    sig = inspect.signature(tool)
+    param_model = _parameter_model(sig)
+    return_type = type_to_encodable_type(sig.return_annotation)
+
+    # build dict of raw encodable types U
+    raw_args = param_model.model_validate_json(json_str)
+
+    # use encoders to decode Us to python types T
+    bound_sig: inspect.BoundArguments = sig.bind(
+        **{
+            param_name: type_to_encodable_type(
+                sig.parameters[param_name].annotation
             ).decode(getattr(raw_args, param_name))
             for param_name in raw_args.model_fields_set
         }
+    )
 
-        # call tool with python types
-        result = tool(**params)
-
-        # serialize back to U using encoder for return type
-        encoded_ty = Encodable.define(sig.return_annotation, context)
-        encoded_value = encoded_ty.encode(result)
-
-        # serialise back to Json
-        return encoded_ty.serialize(encoded_value)
-    except Exception as exn:
-        return str({"status": "failure", "exception": str(exn)})
+    # call tool with python types
+    result = tool(*bound_sig.args, **bound_sig.kwargs)
 
+    # serialize back to U using encoder for return type
+    encoded_result = return_type.serialize(return_type.encode(result))
+    return Message.model_validate(dict(role="tool", content=encoded_result))
 
-@defop
-def compute_response(template: Template, model_input: list[Any]) -> ModelResponse:
-    """Produce a complete model response for an input message sequence. This may
-    involve multiple API requests if tools are invoked by the model.
 
+@Operation.define
+def call_user(
+    template: str,
+    env: collections.abc.Mapping[str, Any],
+) -> Message:
     """
-    ret_type = template.__signature__.return_annotation
-    tools = template.tools
-
-    tool_schemas = [
-        function_definition(t, template.__context__) for t in tools.values()
-    ]
-    response_encoding_type: type | None = Encodable.define(
-        ret_type, template.__context__
-    ).enc
-    if response_encoding_type == str:
-        response_encoding_type = None
-
-    # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls
-    while True:
-        response: ModelResponse = completion(
-            messages=model_input,
-            response_format=pydantic.create_model(
-                "Response", value=response_encoding_type, __config__={"extra": "forbid"}
-            )
-            if response_encoding_type
-            else None,
-            tools=tool_schemas,
-        )
-
-        choice: Choices = typing.cast(Choices, response.choices[0])
-        message: Message = choice.message
-        if not message.tool_calls:
-            return response
-        model_input.append(message.to_dict())
-
-        for tool_call in message.tool_calls:
-            function = tool_call.function
-            function_name = function.name
-            assert function_name is not None
-            tool = tools[function_name]
-            tool_result = call_with_json_args(
-                tool, template.__context__, function.arguments
-            )
-            model_input.append(
-                {
-                    "role": "tool",
-                    "tool_call_id": tool_call.id,
-                    "name": function_name,
-                    "content": tool_result,
-                }
-            )
-
-
-def decode_response[**P, T](template: Callable[P, T], response: ModelResponse) -> T:
-    """Decode an LLM response into an instance of the template return type. This
-    operation should raise if the output cannot be decoded.
+    Format a template applied to arguments into a user message.
     """
-    assert isinstance(template, Template)
-    choice: Choices = typing.cast(Choices, response.choices[0])
-    last_resp: Message = choice.message
-    assert isinstance(last_resp, Message)
-    result_str = last_resp.content or last_resp.reasoning_content
-    assert result_str
-
-    ret_type = template.__signature__.return_annotation
-    encodable_ty = Encodable.define(ret_type, template.__context__)
-
-    if encodable_ty.enc == str:
-        # if encoding as a type, value is just directly what the llm returned
-        value: Any = result_str
-        return typing.cast(T, encodable_ty.decode(value))
-    else:
-        Result = pydantic.create_model("Result", value=encodable_ty.enc)
-        result = Result.model_validate_json(result_str)
-        assert isinstance(result, Result)
-        value = getattr(result, "value")
-        return typing.cast(T, encodable_ty.decode(value))
-
-
-@defop
-def format_model_input[**P, T](
-    template: Template[P, T], *args: P.args, **kwargs: P.kwargs
-) -> list[Any]:
-    """Format a template applied to arguments into a sequence of input
-    messages.
-
-    """
-    bound_args = template.__signature__.bind(*args, **kwargs)
-    bound_args.apply_defaults()
-    # encode arguments
-    arguments = {}
-    for param in bound_args.arguments:
-        encoder = Encodable.define(
-            template.__signature__.parameters[param].annotation, template.__context__
-        )
-        encoded = encoder.encode(bound_args.arguments[param])
-        arguments[param] = encoder.serialize(encoded)
-
-    prompt = _OpenAIPromptFormatter().format_as_messages(
-        template.__prompt_template__, **arguments
-    )
+    formatter: string.Formatter = string.Formatter()
+    prompt_parts: list[OpenAIMessageContentListBlock] = []
+
+    for literal, field_name, fspec, cspec in formatter.parse(textwrap.dedent(template)):
+        if literal:
+            prompt_parts.append(ChatCompletionTextObject(type="text", text=literal))
+        if field_name is not None:
+            obj, _ = formatter.get_field(field_name, (), env)
+            encoder = type_to_encodable_type(type(obj))
+            encoded_obj = encoder.serialize(encoder.encode(obj))
+            for part in formatter.convert_field(encoded_obj, cspec):
+                if part["type"] == "text":
+                    part["text"] = formatter.format_field(part["text"], fspec or "")
+                prompt_parts.append(part)
 
     # Note: The OpenAI api only seems to accept images in the 'user' role. The
     # effect of different roles on the model's response is currently unclear.
-    messages = [{"type": "message", "content": prompt, "role": "user"}]
-    return messages
-
+    return Message.model_validate(dict(role="user", content=prompt_parts), strict=True)
 
-class InstructionHandler(ObjectInterpretation):
-    """Scoped handler that injects additional instructions into model input.
 
-    This handler appends instruction messages to the formatted model input.
-    It's designed to be used as a scoped handler within RetryLLMHandler to
-    provide error feedback without polluting shared state.
-    """
+@Operation.define
+def call_system(template: Template) -> collections.abc.Sequence[Message]:
+    """Get system instruction message(s) to prepend to all LLM prompts."""
+    return ()
 
-    def __init__(self, instruction: str):
-        """Initialize with an instruction message to inject.
 
-        Args:
-            instruction: The instruction text to append to model input.
-        """
-        self.instruction = instruction
-
-    @implements(format_model_input)
-    def _inject_instruction(self, template: Template, *args, **kwargs) -> list[Any]:
-        """Append instruction message to the formatted model input."""
-        messages = fwd()
-        return messages + [
-            {"type": "message", "content": self.instruction, "role": "user"}
-        ]
+class LiteLLMProvider(ObjectInterpretation):
+    """Implements templates using the LiteLLM API."""
 
+    config: collections.abc.Mapping[str, Any]
 
-class RetryLLMHandler(ObjectInterpretation):
-    """Retries LLM requests if they fail.
+    def __init__(self, **config):
+        self.config = (
+            inspect.signature(litellm.completion).bind_partial(**config).kwargs
+        )
 
-    If the request fails, the handler retries with optional error feedback injected
-    into the prompt via scoped InstructionHandler instances. This ensures nested
-    template calls maintain independent error tracking.
+    @implements(call_assistant)
+    @functools.wraps(call_assistant)
+    def _completion(self, *args, **kwargs):
+        return fwd(*args, **{**self.config, **kwargs})
 
-    Args:
-        max_retries: The maximum number of retries.
-        add_error_feedback: Whether to add error feedback to the prompt on retry.
-        exception_cls: The exception class to catch and retry on.
-    """
+    @implements(Template.__apply__)
+    @staticmethod
+    def _call[**P, T](template: Template[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
+        response_encoding_type: Encodable[T] = type_to_encodable_type(
+            inspect.signature(template).return_annotation
+        )
+        response_model = _response_model(inspect.signature(template))
 
-    def __init__(
-        self,
-        max_retries: int = 3,
-        add_error_feedback: bool = False,
-        exception_cls: type[BaseException] = Exception,
-    ):
-        self.max_retries = max_retries
-        self.add_error_feedback = add_error_feedback
-        self.exception_cls = exception_cls
+        messages: list[Message] = [*call_system(template)]
 
-    @implements(Template.__apply__)
-    def _retry_completion(self, template: Template, *args, **kwargs) -> Any:
-        """Retry template execution with error feedback injection via scoped handlers."""
-        failures: list[str] = []
-
-        for attempt in range(self.max_retries):
-            try:
-                # Install scoped handlers for each accumulated failure
-                with contextlib.ExitStack() as stack:
-                    for failure in failures:
-                        stack.enter_context(handler(InstructionHandler(failure)))
-                    return fwd()
-            except self.exception_cls:
-                if attempt == self.max_retries - 1:
-                    raise  # Last attempt, re-raise the exception
-                if self.add_error_feedback:
-                    tb = traceback.format_exc()
-                    failures.append(f"\nError from previous attempt:\n```\n{tb}```")
-
-        # This should not be reached, but just in case
-        return fwd()
+        # encode arguments
+        bound_args = inspect.signature(template).bind(*args, **kwargs)
+        bound_args.apply_defaults()
+        env = template.__context__.new_child(bound_args.arguments)
 
+        message: Message = call_user(template.__prompt_template__, env)
+        messages.append(message)
 
-class LiteLLMProvider(ObjectInterpretation):
-    """Implements templates using the LiteLLM API."""
+        tools = {
+            **template.tools,
+            **{k: t for k, t in bound_args.arguments.items() if isinstance(t, Tool)},
+        }
+        tool_specs = {k: _tool_model(t) for k, t in tools.items()}
+
+        # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls
+        while message.role != "assistant" or message.tool_calls:
+            message = call_assistant(messages, response_model, tool_specs)
+            messages.append(message)
+
+            for tool_call in message.tool_calls or []:
+                message = call_tool(tool_call, tools)
+                messages.append(message)
+
+        # return response
+        serialized_result = message.content or message.reasoning_content
+        encoded_result = (
+            serialized_result
+            if response_model is None
+            else response_model.model_validate_json(serialized_result).value  # type: ignore
+        )
+        return response_encoding_type.decode(encoded_result)
 
-    model_name: str
-    config: dict[str, Any]
 
-    def __init__(self, model_name: str = "gpt-4o", **config):
-        self.model_name = model_name
-        self.config = inspect.signature(completion).bind_partial(**config).kwargs
+class InstructionsHandler(ObjectInterpretation):
+    """Implements system instructions using the LiteLLM API."""
 
-    @implements(completion)
-    def _completion(self, *args, **kwargs):
-        return fwd(self.model_name, *args, **(self.config | kwargs))
+    instructions: str | collections.abc.Mapping[Template, str]
 
-    @implements(Template.__apply__)
-    def _call[**P, T](
-        self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs
-    ) -> T:
-        model_input = format_model_input(template, *args, **kwargs)
-        resp = compute_response(template, model_input)
-        return decode_response(template, resp)
+    def __init__(self, instructions: str | collections.abc.Mapping[Template, str]):
+        if isinstance(instructions, collections.abc.Mapping):
+            assert instructions, "Instructions mapping cannot be empty."
+            assert all(instr for instr in instructions.values()), (
+                "All instructions in the mapping must be non-empty."
+            )
+        else:
+            assert instructions, "Instructions string cannot be empty."
+        self.instructions = instructions
+
+    @implements(call_system)
+    def _system_instruction(
+        self, template: Template
+    ) -> collections.abc.Sequence[Message]:
+        if isinstance(self.instructions, str):
+            return (
+                *fwd(),
+                Message.model_validate(dict(role="system", content=self.instructions)),
+            )
+        elif template in self.instructions:
+            return (
+                *fwd(),
+                Message.model_validate(
+                    dict(role="system", content=self.instructions[template])
+                ),
+            )
+        else:
+            return fwd()
diff --git a/tests/test_handlers_llm.py b/tests/test_handlers_llm.py
index 4ad3d81c..2c98a650 100644
--- a/tests/test_handlers_llm.py
+++ b/tests/test_handlers_llm.py
@@ -4,11 +4,6 @@
 import pytest
 
 from effectful.handlers.llm import Template
-from effectful.handlers.llm.completions import (
-    RetryLLMHandler,
-    compute_response,
-    format_model_input,
-)
 from effectful.handlers.llm.synthesis import ProgramSynthesis
 from effectful.handlers.llm.template import IsRecursive
 from effectful.ops.semantics import NotHandled, handler
@@ -171,102 +166,6 @@ def _call[**P](
         return self.success_response
 
 
-def test_retry_handler_succeeds_after_failures():
-    """Test that RetryLLMHandler retries and eventually succeeds."""
-    provider = FailingThenSucceedingProvider(
-        fail_count=2,
-        success_response="Success after retries!",
-        exception_factory=lambda: ValueError("Temporary failure"),
-    )
-    retry_handler = RetryLLMHandler(max_retries=3, exception_cls=ValueError)
-
-    with handler(provider), handler(retry_handler):
-        result = limerick("test")
-        assert result == "Success after retries!"
-        assert provider.call_count == 3  # 2 failures + 1 success
-
-
-def test_retry_handler_exhausts_retries():
-    """Test that RetryLLMHandler raises after max retries exhausted."""
-    provider = FailingThenSucceedingProvider(
-        fail_count=5,  # More failures than retries
-        success_response="Never reached",
-        exception_factory=lambda: ValueError("Persistent failure"),
-    )
-    retry_handler = RetryLLMHandler(max_retries=3, exception_cls=ValueError)
-
-    with pytest.raises(ValueError, match="Persistent failure"):
-        with handler(provider), handler(retry_handler):
-            limerick("test")
-
-    assert provider.call_count == 3  # Should have tried 3 times
-
-
-def test_retry_handler_only_catches_specified_exception():
-    """Test that RetryLLMHandler only catches the specified exception class."""
-    provider = FailingThenSucceedingProvider(
-        fail_count=1,
-        success_response="Success",
-        exception_factory=lambda: TypeError("Wrong type"),  # Different exception type
-    )
-    retry_handler = RetryLLMHandler(max_retries=3, exception_cls=ValueError)
-
-    # TypeError should not be caught, should propagate immediately
-    with pytest.raises(TypeError, match="Wrong type"):
-        with handler(provider), handler(retry_handler):
-            limerick("test")
-
-    assert provider.call_count == 1  # Should have only tried once
-
-
-def test_retry_handler_with_error_feedback():
-    """Test that RetryLLMHandler includes error feedback when enabled."""
-
-    captured_messages: list[list] = []
-
-    class MessageCapturingProvider(ObjectInterpretation):
-        """Provider that captures formatted messages and fails once."""
-
-        def __init__(self):
-            self.call_count = 0
-
-        @implements(compute_response)
-        def _capture_and_respond(self, template: Template, messages: list):
-            """Capture messages at compute_response level (after error injection)."""
-            self.call_count += 1
-            captured_messages.append(messages)
-            if self.call_count == 1:
-                raise ValueError("First attempt failed")
-            # Return a mock response - not used since we return directly
-            return None
-
-        @implements(Template.__apply__)
-        def _call(self, template: Template, *args, **kwargs):
-            # Call the format/compute chain but return directly
-            messages = format_model_input(template, *args, **kwargs)
-            compute_response(template, messages)
-            return "Success on retry"
-
-    provider = MessageCapturingProvider()
-    retry_handler = RetryLLMHandler(
-        max_retries=2, add_error_feedback=True, exception_cls=ValueError
-    )
-
-    with handler(provider), handler(retry_handler):
-        result = limerick("test")
-        assert result == "Success on retry"
-
-    assert len(captured_messages) == 2
-    # First call has original prompt only
-    first_msg_content = str(captured_messages[0])
-    assert (
-        "limerick" in first_msg_content.lower() or "theme" in first_msg_content.lower()
-    )
-    # Second call should include error feedback
-    second_msg_content = str(captured_messages[1])
-    assert "First attempt failed" in second_msg_content
-
-
 def test_template_captures_other_templates_in_lexical_context():
     """Test that Templates defined in lexical scope are captured (orchestrator pattern)."""
 
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index 8f60aafe..12bb3812 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -22,7 +22,7 @@
 from effectful.handlers.llm import Template
 from effectful.handlers.llm.completions import (
     LiteLLMProvider,
-    completion,
+    call_assistant,
 )
 from effectful.handlers.llm.synthesis import ProgramSynthesis, SynthesisError
 from effectful.ops.semantics import fwd, handler
@@ -83,7 +83,7 @@ def call_id(self):
         self.call_count += 1
         return call_id
 
-    @implements(completion)
+    @implements(call_assistant)
     def _completion(self, *args, **kwargs):
         path = FIXTURE_DIR / f"{self.test_id}{self.call_id()}.json"
         if not REBUILD_FIXTURES:
@@ -92,7 +92,7 @@ def _completion(self, *args, **kwargs):
             with path.open() as f:
                 result = ModelResponse.model_validate(json.load(f))
                 return result
-        result = fwd(self.model_name, *args, **(self.config | kwargs))
+        result = fwd(self.model, *args, **(self.config | kwargs))
         path.parent.mkdir(exist_ok=True, parents=True)
         with path.open("w") as f:
             json.dump(result.model_dump(), f, indent=2, sort_keys=True)
@@ -106,7 +106,7 @@ class LimitLLMCallsHandler(ObjectInterpretation):
     def __init__(self, max_calls: int):
         self.max_calls = max_calls
 
-    @implements(completion)
+    @implements(call_assistant)
     def _completion(self, *args, **kwargs):
         if self.no_calls >= self.max_calls:
             raise RuntimeError(
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index aa73467a..ac8df10a 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -3,7 +3,7 @@
 import pytest
 
 from effectful.handlers.llm import Template, Tool
-from effectful.handlers.llm.completions import format_model_input
+from effectful.handlers.llm.completions import call_user
 from effectful.ops.semantics import NotHandled, handler
 from effectful.ops.syntax import ObjectInterpretation, implements
 
@@ -155,7 +155,7 @@ class TemplateStringIntp(ObjectInterpretation):
     def _[**P, T](
         self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs
     ) -> T:
-        model_input = format_model_input(template, *args, **kwargs)
+        model_input = call_user(template, *args, **kwargs)
         template_result = model_input[0]["content"]
         assert len(template_result) == 1
         return template_result[0]["text"]
diff --git a/tests/test_handlers_llm_tool_calling_book.py b/tests/test_handlers_llm_tool_calling_book.py
index 2e96b858..81cd1d05 100644
--- a/tests/test_handlers_llm_tool_calling_book.py
+++ b/tests/test_handlers_llm_tool_calling_book.py
@@ -10,7 +10,7 @@
 from pydantic import BaseModel, Field
 
 from effectful.handlers.llm import Template, Tool
-from effectful.handlers.llm.completions import LiteLLMProvider, completion
+from effectful.handlers.llm.completions import LiteLLMProvider, call_assistant
 from effectful.ops.semantics import fwd, handler
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import NotHandled
@@ -36,7 +36,7 @@ class LimitLLMCallsHandler(ObjectInterpretation):
     max_calls: int = 10
     call_count: int = 0
 
-    @implements(completion)
+    @implements(call_assistant)
     def _completion(self, *args, **kwargs):
         self.call_count += 1
         if self.call_count > self.max_calls:
diff --git a/tests/test_handlers_llm_tool_calling_poem.py b/tests/test_handlers_llm_tool_calling_poem.py
index 18aed942..e905c697 100644
--- a/tests/test_handlers_llm_tool_calling_poem.py
+++ b/tests/test_handlers_llm_tool_calling_poem.py
@@ -14,7 +14,7 @@
 from effectful.handlers.llm import Template, Tool
 from effectful.handlers.llm.completions import (
     LiteLLMProvider,
-    completion,
+    call_assistant,
 )
 from effectful.ops.semantics import fwd, handler
 from effectful.ops.syntax import ObjectInterpretation, implements
@@ -41,7 +41,7 @@ class LimitLLMCallsHandler(ObjectInterpretation):
     max_calls: int = 10
     call_count: int = 0
 
-    @implements(completion)
+    @implements(call_assistant)
     def _completion(self, *args, **kwargs):
         self.call_count += 1
         if self.call_count > self.max_calls:

From ea3d25c93682949afb78a748f39eb44d9247283d Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Fri, 16 Jan 2026 17:53:23 -0500
Subject: [PATCH 02/27] remove instructionhandler

---
 effectful/handlers/llm/completions.py | 41 ++-------------------------
 1 file changed, 3 insertions(+), 38 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 7fb64e01..13478fb5 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -4,7 +4,7 @@
 import inspect
 import string
 import textwrap
-from typing import Any
+import typing
 
 import litellm
 import pydantic
@@ -129,7 +129,7 @@ def call_tool(
 @Operation.define
 def call_user(
     template: str,
-    env: collections.abc.Mapping[str, Any],
+    env: collections.abc.Mapping[str, typing.Any],
 ) -> Message:
     """
     Format a template applied to arguments into a user message.
@@ -163,7 +163,7 @@ def call_system(template: Template) -> collections.abc.Sequence[Message]:
 class LiteLLMProvider(ObjectInterpretation):
     """Implements templates using the LiteLLM API."""
 
-    config: collections.abc.Mapping[str, Any]
+    config: collections.abc.Mapping[str, typing.Any]
 
     def __init__(self, **config):
         self.config = (
@@ -216,38 +216,3 @@ def _call[**P, T](template: Template[P, T], *args: P.args, **kwargs: P.kwargs) -
             else response_model.model_validate_json(serialized_result).value  # type: ignore
         )
         return response_encoding_type.decode(encoded_result)
-
-
-class InstructionsHandler(ObjectInterpretation):
-    """Implements system instructions using the LiteLLM API."""
-
-    instructions: str | collections.abc.Mapping[Template, str]
-
-    def __init__(self, instructions: str | collections.abc.Mapping[Template, str]):
-        if isinstance(instructions, collections.abc.Mapping):
-            assert instructions, "Instructions mapping cannot be empty."
-            assert all(instr for instr in instructions.values()), (
-                "All instructions in the mapping must be non-empty."
-            )
-        else:
-            assert instructions, "Instructions string cannot be empty."
-        self.instructions = instructions
-
-    @implements(call_system)
-    def _system_instruction(
-        self, template: Template
-    ) -> collections.abc.Sequence[Message]:
-        if isinstance(self.instructions, str):
-            return (
-                *fwd(),
-                Message.model_validate(dict(role="system", content=self.instructions)),
-            )
-        elif template in self.instructions:
-            return (
-                *fwd(),
-                Message.model_validate(
-                    dict(role="system", content=self.instructions[template])
-                ),
-            )
-        else:
-            return fwd()

From f83d312e42175f3dba2f3b8de4b860f61405dd77 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Wed, 28 Jan 2026 15:12:13 -0500
Subject: [PATCH 03/27] updated internal interface to make all tests pass

---
 docs/source/agent.py                          |  12 +-
 effectful/handlers/llm/completions.py         | 147 ++++++++++++++----
 ...LLMProvider__test_integer_return_type.json |  50 +-----
 ...ompt_cross_endpoint[claude-haiku-4-5].json |  45 +-----
 ...le_prompt_cross_endpoint[gpt-4o-mini].json |  50 +-----
 ...e_prompt_multiple_models[gpt-4o-mini].json |  50 +-----
 ...le_prompt_multiple_models[gpt-5-nano].json |  50 +-----
 ...teLLMProvider__test_structured_output.json |  50 +-----
 ...eLLMProvider__test_with_config_params.json |  50 +-----
 ...eturn__test_pydantic_basemodel_return.json |  50 +-----
 ...ers_llm_provider.py__test_image_input.json |  50 +-----
 tests/test_handlers_llm_provider.py           |  22 ++-
 tests/test_handlers_llm_template.py           |   6 +-
 tests/test_handlers_llm_tool_calling_book.py  |   6 +-
 tests/test_handlers_llm_tool_calling_poem.py  |   6 +-
 15 files changed, 211 insertions(+), 433 deletions(-)

diff --git a/docs/source/agent.py b/docs/source/agent.py
index 7682429d..8950fdc4 100644
--- a/docs/source/agent.py
+++ b/docs/source/agent.py
@@ -3,8 +3,9 @@
 from effectful.handlers.llm import Template
 from effectful.handlers.llm.completions import (
     LiteLLMProvider,
+    Message,
+    call_assistant,
     call_user,
-    compute_response,
 )
 from effectful.ops.semantics import fwd, handler
 from effectful.ops.syntax import defop
@@ -32,27 +33,26 @@ def wrapper(self, *args, **kwargs):
                     {
                         Agent.current_agent: lambda: self,
                         call_user: self._format_model_input,
-                        compute_response: self._compute_response,
+                        call_assistant: self._compute_response,
                     }
                 ):
                     return template(self, *args, **kwargs)
 
             setattr(cls, method_name, wrapper)
 
-    def _format_model_input(self, template, other, *args, **kwargs):
+    def _format_model_input(self, template, env):
         # update prompt with previous list of messages
         prompt = fwd()
         if Agent.current_agent() is self:
-            assert self is other
             self.state.extend(prompt)
             prompt = self.state
         return prompt
 
     def _compute_response(self, *args, **kwargs):
         # save response into persisted state
-        response = fwd()
+        response: Message = fwd()
         if Agent.current_agent() is self:
-            self.state.append(response.choices[0].message.model_dump())
+            self.state.append(response)
         return response
 
 
diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 13478fb5..71acdeb5 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -9,10 +9,14 @@
 import litellm
 import pydantic
 from litellm import (
+    ChatCompletionFunctionMessage,
     ChatCompletionMessageToolCall,
     ChatCompletionTextObject,
+    ChatCompletionToolMessage,
     ChatCompletionToolParam,
-    Message,
+    OpenAIChatCompletionAssistantMessage,
+    OpenAIChatCompletionSystemMessage,
+    OpenAIChatCompletionUserMessage,
     OpenAIMessageContentListBlock,
 )
 
@@ -22,6 +26,50 @@
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import Operation
 
+ToolCall: pydantic.TypeAdapter[list[ChatCompletionMessageToolCall]] = (
+    pydantic.TypeAdapter(list[ChatCompletionMessageToolCall])
+)
+MessageContent: pydantic.TypeAdapter[list[OpenAIMessageContentListBlock] | str] = (
+    pydantic.TypeAdapter(list[OpenAIMessageContentListBlock] | str)
+)
+Message = (
+    OpenAIChatCompletionAssistantMessage
+    | ChatCompletionToolMessage
+    | ChatCompletionFunctionMessage
+    | OpenAIChatCompletionSystemMessage
+    | OpenAIChatCompletionUserMessage
+)
+
+MessageAdapter: pydantic.TypeAdapter[Message] = pydantic.TypeAdapter(Message)
+
+
+def validate_data[T](adapter: pydantic.TypeAdapter[T], data: typing.Any) -> T:
+    adapter.validate_python(data, strict=True)
+    return adapter.dump_python(data)
+
+
+def _message_role(message: Message) -> str:
+    return message["role"]
+
+
+def _message_content(message: Message) -> list[OpenAIMessageContentListBlock] | str:
+    return validate_data(MessageContent, message.get("content"))
+
+
+def _message_reasoning_content(
+    message: Message,
+) -> list[OpenAIMessageContentListBlock] | str:
+    return validate_data(MessageContent, message.get("reasoning_content"))
+
+
+def _message_tool_calls(message: Message) -> list[ChatCompletionMessageToolCall]:
+    tool_calls = message.get("tool_calls") or []
+    assert isinstance(tool_calls, list)
+    return [
+        ChatCompletionMessageToolCall.model_validate(tool_call)
+        for tool_call in tool_calls
+    ]
+
 
 def _parameter_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
     return pydantic.create_model(
@@ -63,7 +111,6 @@ def call_assistant(
     messages: collections.abc.Sequence[Message],
     response_format: type[pydantic.BaseModel] | None,
     tools: collections.abc.Mapping[str, ChatCompletionToolParam],
-    *,
     model: str,
     **kwargs,
 ) -> Message:
@@ -74,7 +121,7 @@ def call_assistant(
 
     """
     response: litellm.types.utils.ModelResponse = litellm.completion(
-        model=model,
+        model,
         messages=list(messages),
         response_format=response_format,
         tools=list(tools.values()),
@@ -82,9 +129,9 @@ def call_assistant(
     )
     choice = response.choices[0]
     assert isinstance(choice, litellm.types.utils.Choices)
-    message: Message = choice.message
+    message: litellm.Message = choice.message
     assert message.role == "assistant"
-    return message
+    return validate_data(MessageAdapter, message.model_dump(mode="json"))
 
 
 @Operation.define
@@ -123,35 +170,61 @@ def call_tool(
 
     # serialize back to U using encoder for return type
     encoded_result = return_type.serialize(return_type.encode(result))
-    return Message.model_validate(dict(role="tool", content=encoded_result))
+    return validate_data(
+        MessageAdapter,
+        dict(role="tool", content=encoded_result, tool_call_id=tool_call.id),
+    )
 
 
 @Operation.define
 def call_user(
     template: str,
     env: collections.abc.Mapping[str, typing.Any],
-) -> Message:
+) -> list[Message]:
     """
     Format a template applied to arguments into a user message.
     """
-    formatter: string.Formatter = string.Formatter()
-    prompt_parts: list[OpenAIMessageContentListBlock] = []
+    formatter = string.Formatter()
+    parts: list[OpenAIMessageContentListBlock] = []
+
+    buf: list[str] = []
 
-    for literal, field_name, fspec, cspec in formatter.parse(textwrap.dedent(template)):
+    def flush_text() -> None:
+        if buf:
+            parts.append(ChatCompletionTextObject(type="text", text="".join(buf)))
+            buf.clear()
+
+    for literal, field_name, format_spec, conversion in formatter.parse(
+        textwrap.dedent(template)
+    ):
         if literal:
-            prompt_parts.append(ChatCompletionTextObject(type="text", text=literal))
-        if field_name is not None:
-            obj, _ = formatter.get_field(field_name, (), env)
-            encoder = type_to_encodable_type(type(obj))
-            encoded_obj = encoder.serialize(encoder.encode(obj))
-            for part in formatter.convert_field(encoded_obj, cspec):
-                if part["type"] == "text":
-                    part["text"] = formatter.format_field(part["text"], fspec or "")
-                prompt_parts.append(part)
+            buf.append(literal)
+
+        if field_name is None:
+            continue
+
+        obj, _ = formatter.get_field(field_name, (), env)
+        encoder = type_to_encodable_type(type(obj))
+        encoded_obj: list[OpenAIMessageContentListBlock] = encoder.serialize(
+            encoder.encode(obj)
+        )
+        for part in encoded_obj:
+            if part["type"] == "text":
+                text = (
+                    formatter.convert_field(part["text"], conversion)
+                    if conversion
+                    else part["text"]
+                )
+                buf.append(formatter.format_field(text, format_spec or ""))
+            else:
+                flush_text()
+                parts.append(part)
+
+    flush_text()
 
     # Note: The OpenAI api only seems to accept images in the 'user' role. The
     # effect of different roles on the model's response is currently unclear.
-    return Message.model_validate(dict(role="user", content=prompt_parts), strict=True)
+    return [validate_data(MessageAdapter, dict(role="user", content=parts))]
 
 
 @Operation.define
@@ -165,10 +238,11 @@ class LiteLLMProvider(ObjectInterpretation):
 
     config: collections.abc.Mapping[str, typing.Any]
 
-    def __init__(self, **config):
-        self.config = (
-            inspect.signature(litellm.completion).bind_partial(**config).kwargs
-        )
+    def __init__(self, model="gpt-4o", **config):
+        self.config = {
+            "model": model,
+            **inspect.signature(litellm.completion).bind_partial(**config).kwargs,
+        }
 
     @implements(call_assistant)
     @functools.wraps(call_assistant)
@@ -176,8 +250,9 @@ def _completion(self, *args, **kwargs):
         return fwd(*args, **{**self.config, **kwargs})
 
     @implements(Template.__apply__)
-    @staticmethod
-    def _call[**P, T](template: Template[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
+    def _call[**P, T](
+        self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs
+    ) -> T:
         response_encoding_type: Encodable[T] = type_to_encodable_type(
             inspect.signature(template).return_annotation
         )
@@ -190,8 +265,8 @@ def _call[**P, T](template: Template[P, T], *args: P.args, **kwargs: P.kwargs) -
         bound_args.apply_defaults()
         env = template.__context__.new_child(bound_args.arguments)
 
-        message: Message = call_user(template.__prompt_template__, env)
-        messages.append(message)
+        user_messages: list[Message] = call_user(template.__prompt_template__, env)
+        messages.extend(user_messages)
 
         tools = {
             **template.tools,
@@ -200,16 +275,24 @@ def _call[**P, T](template: Template[P, T], *args: P.args, **kwargs: P.kwargs) -
         tool_specs = {k: _tool_model(t) for k, t in tools.items()}
 
         # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls
-        while message.role != "assistant" or message.tool_calls:
+        tool_calls: list[ChatCompletionMessageToolCall] = []
+
+        message = messages[-1]
+        while _message_role(message) != "assistant" or tool_calls:
             message = call_assistant(messages, response_model, tool_specs)
             messages.append(message)
-
-            for tool_call in message.tool_calls or []:
+            tool_calls = _message_tool_calls(message)
+            for tool_call in tool_calls:
                 message = call_tool(tool_call, tools)
                 messages.append(message)
 
         # return response
-        serialized_result = message.content or message.reasoning_content
+        serialized_result = _message_content(message) or _message_reasoning_content(
+            message
+        )
+        assert isinstance(serialized_result, str), (
+            "final response from the model should be a string"
+        )
         encoded_result = (
             serialized_result
             if response_model is None
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json
index 51d944a3..7aa83a42 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "{\"value\":67}",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182739,
-  "id": "chatcmpl-CspFLXojfuOibqKzI1QdRgfOJtd36",
-  "model": "gpt-5-nano-2025-08-07",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": null,
-  "usage": {
-    "completion_tokens": 529,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 512,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 429,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 958
-  }
+  "annotations": [],
+  "content": "{\"value\":73}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json
index a3092084..98c1770d 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json
@@ -1,42 +1,7 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "content": "Testing is a critical process that helps identify bugs, verify functionality, and ensure that software meets quality standards before deployment.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "citations": null,
-          "thinking_blocks": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      }
-    }
-  ],
-  "created": 1767182732,
-  "id": "chatcmpl-aa66067c-df8b-4adf-8978-68e8cdcaaa4f",
-  "model": "claude-haiku-4-5-20251001",
-  "object": "chat.completion",
-  "system_fingerprint": null,
-  "usage": {
-    "cache_creation_input_tokens": 0,
-    "cache_read_input_tokens": 0,
-    "completion_tokens": 26,
-    "completion_tokens_details": null,
-    "prompt_tokens": 1145,
-    "prompt_tokens_details": {
-      "audio_tokens": null,
-      "cache_creation_token_details": {
-        "ephemeral_1h_input_tokens": 0,
-        "ephemeral_5m_input_tokens": 0
-      },
-      "cache_creation_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 1171
-  }
+  "content": "{\"value\": \"Testing is a critical process that ensures software quality and reliability by identifying bugs and verifying that applications work as expected.\"}",
+  "function_call": null,
+  "provider_specific_fields": null,
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json
index acf9c8d6..27f8cd26 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Testing is essential for ensuring the quality and reliability of software and products before they are released.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182730,
-  "id": "chatcmpl-CspFCkNxMBr1YyZtqzWJU1rMAQykg",
-  "model": "gpt-4o-mini-2024-07-18",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_c4585b5b9c",
-  "usage": {
-    "completion_tokens": 19,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 313,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 332
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Testing is an essential process to ensure the quality and functionality of a product before its release.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json
index 4e924fce..7218eeaf 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Testing is a crucial process that ensures the quality and functionality of a product or system before its release.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182727,
-  "id": "chatcmpl-CspF9mdsKgygvf8Pogy7DCFgjydme",
-  "model": "gpt-4o-mini-2024-07-18",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_c4585b5b9c",
-  "usage": {
-    "completion_tokens": 21,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 313,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 334
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Testing is a critical process that ensures the quality and reliability of products before they reach consumers.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json
index 18a1d429..b0b22793 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Testing helps ensure product quality by catching issues early.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182728,
-  "id": "chatcmpl-CspFA6VcwnTzTxjuvpG76RCuRt3KM",
-  "model": "gpt-5-nano-2025-08-07",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": null,
-  "usage": {
-    "completion_tokens": 211,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 192,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 394,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 605
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Testing helps ensure reliability before release.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json
index 796d447f..3162a367 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "{\"value\":{\"genre\":\"action\",\"explanation\":\"The story centers on a rogue cop confronting an organized threat in a high-stakes, high-adrenaline setting (a skyscraper), with emphasis on pursuits, gunfights, and stunts typical of action films.\"}}",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182732,
-  "id": "chatcmpl-CspFEhYoCToW0c9B0GjXeKbjiIn0K",
-  "model": "gpt-5-nano-2025-08-07",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": null,
-  "usage": {
-    "completion_tokens": 451,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 384,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 541,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 992
-  }
+  "annotations": [],
+  "content": "{\"value\":{\"genre\":\"action\",\"explanation\":\"The plot centers on high-intensity, action-driven conflict as a rogue cop confronts an evil group attempting to seize control of a skyscraper. It emphasizes combat, chases, and siege-style sequences typical of the action genre.\"}}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json
index 5e3b75b5..4050c6a7 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "A deterministic test consistently produces the same results under the same conditions, ensuring reliability and repeatability in software testing.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182744,
-  "id": "chatcmpl-CspFQyXeEzOBDFdjK0p7UJn2Fe6RV",
-  "model": "gpt-4o-mini-2024-07-18",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_c4585b5b9c",
-  "usage": {
-    "completion_tokens": 23,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 314,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 337
-  }
+  "annotations": [],
+  "content": "{\"value\":\"A deterministic test consistently produces the same results under the same conditions, ensuring reliability and repeatability in its outcomes.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json
index ec46553f..07c71634 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "{\"value\":{\"title\":\"The Spark Within the Spire\",\"rating\":4,\"summary\":\"The Spark Within the Spire follows a young student who discovers latent magical powers and earns a place at a venerable wizarding academy. Across his first year, he navigates challenging classes, budding friendships, and a growing sense of destiny as a looming threat quietly unfolds. The book blends classic wizard-school charm with inventive magic, delivering moments of wonder, humor, and quiet courage. While it uses familiar tropes, its strong character work and brisk pace make it a heartfelt coming-of-age tale with enough fresh twists to stay compelling for readers who enjoy magical school adventures.\"}}",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182755,
-  "id": "chatcmpl-CspFbHJXFBDmP4RrqnjVI0CDIiePi",
-  "model": "gpt-5-nano-2025-08-07",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": null,
-  "usage": {
-    "completion_tokens": 2192,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 2048,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 526,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 2718
-  }
+  "annotations": [],
+  "content": "{\"value\":{\"title\":\"The First Spark at Eldoria Academy\",\"rating\":4,\"summary\":\"An energetic coming-of-age fantasy that revisits a classic premise with warmth and wit. A young wizard discovers his powers and enters a sprawling magical school, where spellwork, friendship, and rivalry illuminate his path to self-discovery. The world is vividly imagined, with inventive magical systems and memorable mentors. While some beats feel familiar, the narrative keeps the pace brisk and the characters earnest, delivering charm, heart, and enough intrigue to carry a promising series.\"}}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json
index 713c124b..0bcba9e1 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "The image appears to depict a simple, pixelated smiley face with two rectangular eyes and a wide, curved mouth, giving a cheerful expression.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1767182821,
-  "id": "chatcmpl-CspGfBSEI7umvGbclbA4o1OsXZsil",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 30,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 567,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 597
-  }
+  "annotations": [],
+  "content": "{\"value\":\"The image is a simple black and white pixel art representation of a smiling face, resembling an emoticon or smiley. It features two white square eyes and a wide rectangular mouth against a black background.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index 12bb3812..1095370f 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -92,10 +92,10 @@ def _completion(self, *args, **kwargs):
             with path.open() as f:
                 result = ModelResponse.model_validate(json.load(f))
                 return result
-        result = fwd(self.model, *args, **(self.config | kwargs))
+        result = fwd(*args, **(self.config | kwargs))
         path.parent.mkdir(exist_ok=True, parents=True)
         with path.open("w") as f:
-            json.dump(result.model_dump(), f, indent=2, sort_keys=True)
+            json.dump(result, f, indent=2, sort_keys=True)
         return result
 
 
@@ -172,7 +172,7 @@ class TestLiteLLMProvider:
     def test_simple_prompt_multiple_models(self, request, model_name):
         """Test that LiteLLMProvider works with different model configurations."""
         with (
-            handler(ReplayLiteLLMProvider(request, model_name=model_name)),
+            handler(ReplayLiteLLMProvider(request, model=model_name)),
             handler(LimitLLMCallsHandler(max_calls=1)),
         ):
             result = simple_prompt("testing")
@@ -189,7 +189,7 @@ def test_simple_prompt_multiple_models(self, request, model_name):
     def test_simple_prompt_cross_endpoint(self, request, model_name):
         """Test that ReplayLiteLLMProvider works across different API endpoints."""
         with (
-            handler(ReplayLiteLLMProvider(request, model_name=model_name)),
+            handler(ReplayLiteLLMProvider(request, model=model_name)),
             handler(LimitLLMCallsHandler(max_calls=1)),
         ):
             result = simple_prompt("testing")
@@ -202,7 +202,7 @@ def test_structured_output(self, request):
         plot = "A rogue cop must stop a evil group from taking over a skyscraper."
 
         with (
-            handler(ReplayLiteLLMProvider(request, model_name="gpt-5-nano")),
+            handler(ReplayLiteLLMProvider(request, model="gpt-5-nano")),
             handler(LimitLLMCallsHandler(max_calls=1)),
         ):
             classification = classify_genre(plot)
@@ -217,7 +217,7 @@ def test_structured_output(self, request):
     def test_integer_return_type(self, request):
         """Test LiteLLMProvider with integer return type."""
         with (
-            handler(ReplayLiteLLMProvider(request, model_name="gpt-5-nano")),
+            handler(ReplayLiteLLMProvider(request, model="gpt-5-nano")),
             handler(LimitLLMCallsHandler(max_calls=1)),
         ):
             result = generate_number(100)
@@ -231,9 +231,7 @@ def test_with_config_params(self, request):
         # Test with temperature parameter
         with (
             handler(
-                ReplayLiteLLMProvider(
-                    request, model_name="gpt-4o-mini", temperature=0.1
-                )
+                ReplayLiteLLMProvider(request, model="gpt-4o-mini", temperature=0.1)
             ),
             handler(LimitLLMCallsHandler(max_calls=1)),
         ):
@@ -251,7 +249,7 @@ class TestProgramSynthesis:
     def test_generates_callable(self, request):
         """Test ProgramSynthesis handler generates executable code."""
         with (
-            handler(ReplayLiteLLMProvider(request, model_name="gpt-4o-mini")),
+            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
             handler(ProgramSynthesis()),
             handler(LimitLLMCallsHandler(max_calls=1)),
         ):
@@ -293,7 +291,7 @@ def categorise_image(image: Image.Image) -> str:
 @requires_openai
 def test_image_input(request):
     with (
-        handler(ReplayLiteLLMProvider(request, model_name="gpt-4o")),
+        handler(ReplayLiteLLMProvider(request, model="gpt-4o")),
         handler(LimitLLMCallsHandler(max_calls=3)),
     ):
         assert any("smile" in categorise_image(smiley_face()) for _ in range(3))
@@ -319,7 +317,7 @@ def test_pydantic_basemodel_return(self, request):
         plot = "A young wizard discovers he has magical powers and goes to a school for wizards."
 
         with (
-            handler(ReplayLiteLLMProvider(request, model_name="gpt-5-nano")),
+            handler(ReplayLiteLLMProvider(request, model="gpt-5-nano")),
             handler(LimitLLMCallsHandler(max_calls=1)),
         ):
             review = review_book(plot)
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index ac8df10a..1270934c 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -1,3 +1,4 @@
+import inspect
 from dataclasses import dataclass
 
 import pytest
@@ -155,7 +156,10 @@ class TemplateStringIntp(ObjectInterpretation):
     def _[**P, T](
         self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs
     ) -> T:
-        model_input = call_user(template, *args, **kwargs)
+        bound_args = inspect.signature(template).bind(*args, **kwargs)
+        bound_args.apply_defaults()
+        env = template.__context__.new_child(bound_args.arguments)
+        model_input = call_user(template.__prompt_template__, env)
         template_result = model_input[0]["content"]
         assert len(template_result) == 1
         return template_result[0]["text"]
diff --git a/tests/test_handlers_llm_tool_calling_book.py b/tests/test_handlers_llm_tool_calling_book.py
index 81cd1d05..5759a22b 100644
--- a/tests/test_handlers_llm_tool_calling_book.py
+++ b/tests/test_handlers_llm_tool_calling_book.py
@@ -98,17 +98,17 @@ def get_book_recommendation(user_preference: str) -> BookRecommendation:
 
 class TestPydanticBaseModelToolCalls:
     @pytest.mark.parametrize(
-        "model_name",
+        "model",
         [
             pytest.param("gpt-5-nano", marks=requires_openai),
             pytest.param("claude-sonnet-4-5-20250929", marks=requires_anthropic),
         ],
     )
-    def test_pydantic_basemodel_tool_calling(self, model_name):
+    def test_pydantic_basemodel_tool_calling(self, model):
         """Test that templates with tools work with Pydantic BaseModel."""
         book_rec_ctx = LoggingBookRecommendationInterpretation()
         with (
-            handler(LiteLLMProvider(model_name=model_name)),
+            handler(LiteLLMProvider(model=model)),
             handler(LimitLLMCallsHandler(max_calls=4)),
             handler(book_rec_ctx),
         ):
diff --git a/tests/test_handlers_llm_tool_calling_poem.py b/tests/test_handlers_llm_tool_calling_poem.py
index e905c697..d36b1ce4 100644
--- a/tests/test_handlers_llm_tool_calling_poem.py
+++ b/tests/test_handlers_llm_tool_calling_poem.py
@@ -115,17 +115,17 @@ class TestToolCalling:
     """Tests for templates with tool calling functionality."""
 
     @pytest.mark.parametrize(
-        "model_name",
+        "model",
         [
             pytest.param("gpt-5-nano", marks=requires_openai),
             pytest.param("claude-sonnet-4-5-20250929", marks=requires_anthropic),
         ],
     )
-    def test_tool_calling(self, model_name):
+    def test_tool_calling(self, model):
         """Test that templates with tools work with openai."""
         poem_eval_ctx = LoggingPoemEvaluationInterpretation()
         with (
-            handler(LiteLLMProvider(model_name=model_name)),
+            handler(LiteLLMProvider(model=model)),
             handler(LimitLLMCallsHandler(max_calls=4)),
             handler(poem_eval_ctx),
         ):

From d8d52e75dbab64a8f3ff124fa07615dc277aefd7 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Wed, 28 Jan 2026 15:27:03 -0500
Subject: [PATCH 04/27] fixed caching tests

---
 ....py__test_litellm_caching_integration.json | 50 +++----------------
 ...y__test_litellm_caching_integration_1.json | 46 +++--------------
 ...y__test_litellm_caching_integration_2.json | 50 +++----------------
 ..._litellm_caching_integration_disabled.json | 50 +++----------------
 ...itellm_caching_integration_disabled_1.json | 50 +++----------------
 ...er.py__test_litellm_caching_selective.json | 50 +++----------------
 ....py__test_litellm_caching_selective_1.json | 50 +++----------------
 ....py__test_litellm_caching_selective_2.json | 50 +++----------------
 ....py__test_litellm_caching_selective_3.json | 46 +++--------------
 ....py__test_litellm_caching_selective_4.json | 50 +++----------------
 ....py__test_litellm_caching_selective_5.json | 50 +++----------------
 tests/test_handlers_llm_provider.py           |  6 +--
 12 files changed, 87 insertions(+), 461 deletions(-)

diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json
index 1b0b3a9f..0313940e 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are crisp, sweet fruits that come in a variety of colors, including red, green, and yellow.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462344,
-  "id": "chatcmpl-D2OHAYV2epNFazAphxZKLpJglzrCc",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 24,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 256
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, offering a delicious and healthy snack.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json
index 9bcea7fb..066c79e6 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json
@@ -1,42 +1,8 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are crisp, sweet fruits that come in a variety of colors, including red, green, and yellow.",
-        "function_call": null,
-        "provider_specific_fields": null,
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462344,
-  "id": "chatcmpl-D2OHAYV2epNFazAphxZKLpJglzrCc",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 24,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 256
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, offering a delicious and healthy snack.\"}",
+  "function_call": null,
+  "provider_specific_fields": null,
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json
index 00cd45ef..c5518255 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Oranges are a juicy and sweet citrus fruit, rich in vitamin C and enjoyed worldwide as a snack or juice.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462345,
-  "id": "chatcmpl-D2OHBpQKOkF5lxOZcOPvnuno9XCZS",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 24,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 256
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Oranges are juicy and vibrant citrus fruits known for their refreshing taste and high vitamin C content.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json
index 6ebc5f62..cd071234 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are a crisp and juicy fruit that come in a variety of flavors and colors, from sweet to tart.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462584,
-  "id": "chatcmpl-D2OL2w9GHCV8jAbzqup98G9KU6yRq",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 24,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 256
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are versatile fruits known for their crisp texture and sweet-tart flavor, often enjoyed fresh or used in a variety of culinary dishes.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json
index b86900df..e1ccc0d3 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are crisp, sweet fruits that come in a variety of colors and flavors, making them a popular and healthy snack worldwide.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462586,
-  "id": "chatcmpl-D2OL4MbMdKpmNB8y9bjtECBqAgZzX",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 27,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 259
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are a crisp, juicy fruit that come in a variety of colors and flavors, often enjoyed fresh or used in cooking and baking.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json
index b68dc25d..cbc8afaa 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are a crisp, sweet fruit enjoyed worldwide for their delicious taste and numerous health benefits.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462545,
-  "id": "chatcmpl-D2OKPlpJgl2jZ8DyplDSKiO9IwfYr",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 20,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 252
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are crunchy, sweet fruits that are enjoyed fresh or in various culinary dishes around the world.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json
index 50942520..005acb0c 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are crisp, juicy fruits that come in a variety of colors and flavors, offering both health benefits and delicious taste.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462546,
-  "id": "chatcmpl-D2OKQHwQdpdvVbLnSke1iU5VTB2I8",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 26,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 258
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are a versatile fruit known for their sweet taste and crisp texture, making them a popular snack and ingredient worldwide.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json
index de7f4d31..727a94dc 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are nutritious, delicious fruits often enjoyed fresh or used in cooking and baking.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462547,
-  "id": "chatcmpl-D2OKR1t0VtB7Aq5ucg2A0cBUColFG",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 18,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 250
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, often enjoyed fresh or used in various culinary dishes.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json
index 801b0db0..5ac68392 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json
@@ -1,42 +1,8 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are nutritious, delicious fruits often enjoyed fresh or used in cooking and baking.",
-        "function_call": null,
-        "provider_specific_fields": null,
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462547,
-  "id": "chatcmpl-D2OKR1t0VtB7Aq5ucg2A0cBUColFG",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 18,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 250
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, often enjoyed fresh or used in various culinary dishes.\"}",
+  "function_call": null,
+  "provider_specific_fields": null,
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json
index 2daada04..9945a97d 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are crisp, juicy fruits that come in a variety of colors and flavors, ranging from sweet to tart.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462548,
-  "id": "chatcmpl-D2OKSpI9arm9GY0TUEQRKpccZRRzz",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 24,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 256
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are a crisp, juicy fruit enjoyed worldwide for their delightful taste and nutritional benefits.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json
index b960a726..5cbd12cd 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json
@@ -1,44 +1,10 @@
 {
-  "choices": [
-    {
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "annotations": [],
-        "content": "Apples are crisp, juicy fruits that come in a variety of colors and flavors, popular for both eating fresh and using in cooking.",
-        "function_call": null,
-        "provider_specific_fields": {
-          "refusal": null
-        },
-        "role": "assistant",
-        "tool_calls": null
-      },
-      "provider_specific_fields": {}
-    }
-  ],
-  "created": 1769462549,
-  "id": "chatcmpl-D2OKTKMxZspj5sauNISAznpOeivoR",
-  "model": "gpt-4o-2024-08-06",
-  "object": "chat.completion",
-  "service_tier": "default",
-  "system_fingerprint": "fp_deacdd5f6f",
-  "usage": {
-    "completion_tokens": 28,
-    "completion_tokens_details": {
-      "accepted_prediction_tokens": 0,
-      "audio_tokens": 0,
-      "image_tokens": null,
-      "reasoning_tokens": 0,
-      "rejected_prediction_tokens": 0,
-      "text_tokens": null
-    },
-    "prompt_tokens": 232,
-    "prompt_tokens_details": {
-      "audio_tokens": 0,
-      "cached_tokens": 0,
-      "image_tokens": null,
-      "text_tokens": null
-    },
-    "total_tokens": 260
-  }
+  "annotations": [],
+  "content": "{\"value\":\"Apples are crisp and juicy fruits, often enjoyed fresh or used in a variety of culinary dishes.\"}",
+  "function_call": null,
+  "provider_specific_fields": {
+    "refusal": null
+  },
+  "role": "assistant",
+  "tool_calls": null
 }
\ No newline at end of file
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index 1095370f..e6091d7a 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -333,7 +333,7 @@ def test_pydantic_basemodel_return(self, request):
 
 def test_litellm_caching_integration(request):
     litellm.cache = Cache()
-    with handler(ReplayLiteLLMProvider(request, model_name="gpt-4o")):
+    with handler(ReplayLiteLLMProvider(request, model="gpt-4o")):
         p1 = simple_prompt("apples")
         p2 = simple_prompt("apples")
         p3 = simple_prompt("oranges")
@@ -345,14 +345,14 @@ def test_litellm_caching_integration(request):
 
 def test_litellm_caching_integration_disabled(request):
     litellm.cache = Cache()
-    with handler(ReplayLiteLLMProvider(request, model_name="gpt-4o", caching=False)):
+    with handler(ReplayLiteLLMProvider(request, model="gpt-4o", caching=False)):
         p1 = simple_prompt("apples")
         p2 = simple_prompt("apples")
         assert p1 != p2, "if caching is not enabled, inputs produce different outputs"
 
 
 def test_litellm_caching_selective(request):
-    with handler(ReplayLiteLLMProvider(request, model_name="gpt-4o")):
+    with handler(ReplayLiteLLMProvider(request, model="gpt-4o")):
         p1 = simple_prompt("apples")
         p2 = simple_prompt("apples")
         assert p1 != p2, "when caching is not enabled, llm outputs should be different"

From a322a35e595d9e17cfdac052a763eb750a43073e Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Wed, 28 Jan 2026 15:38:52 -0500
Subject: [PATCH 05/27] updated llm.ipynb

---
 docs/source/llm.ipynb | 153 +++++++++++++-----------------------------
 1 file changed, 45 insertions(+), 108 deletions(-)

diff --git a/docs/source/llm.ipynb b/docs/source/llm.ipynb
index c6a815bc..f40a6fdb 100644
--- a/docs/source/llm.ipynb
+++ b/docs/source/llm.ipynb
@@ -13,15 +13,11 @@
     "import dotenv\n",
     "import litellm\n",
     "import pydantic\n",
-    "from pydantic import ValidationError, field_validator\n",
+    "from pydantic import field_validator\n",
     "from pydantic_core import PydanticCustomError\n",
     "\n",
     "from effectful.handlers.llm import Template, Tool\n",
-    "from effectful.handlers.llm.completions import (\n",
-    "    LiteLLMProvider,\n",
-    "    RetryLLMHandler,\n",
-    "    completion,\n",
-    ")\n",
+    "from effectful.handlers.llm.completions import LiteLLMProvider, call_assistant\n",
     "from effectful.ops.semantics import NotHandled, fwd, handler\n",
     "\n",
     "dotenv.load_dotenv()\n",
@@ -389,27 +385,9 @@
      "output_type": "stream",
      "text": [
       "> Write a haiku on the theme of fish2. Do not use any tools.\n",
-      "None\n",
-      "> Write a haiku on the theme of fish. Do not use any tools.\n",
-      "In tranquil waters,  \n",
-      "Swim the silent silver fish,  \n",
-      "Ripples tell their tales.\n",
-      "> Write a haiku on the theme of fish2. Do not use any tools.\n",
-      "> None\n",
-      "> In tranquil waters,  \n",
-      "Swim the silent silver fish,  \n",
-      "Ripples tell their tales.\n",
-      "Here's a haiku on the theme of fish:\n",
-      "\n",
-      "In tranquil waters,  \n",
-      "Swim the silent silver fish,  \n",
-      "Ripples tell their tales.\n",
+      "{\"value\":\"Fish swim in silence,  \\nRipples dance on the water,  \\nNature's soft ballet.\"}\n",
       "> Write a limerick on the theme of fish. Do not use any tools.\n",
-      "In the deep ocean blue did a fish,  \n",
-      "Dream of sailing on a grand, grand dish.  \n",
-      "With a flip and a flop,  \n",
-      "It leapt up to the top,  \n",
-      "And found itself grant its one wish!  \n"
+      "{\"value\":\"In the depths of the sea so brash,\\nSwam a fish with a silvery flash.\\nHe swayed left and right,\\nIn the cool moonlight,\\nAnd made quite a watery splash!\"}\n"
      ]
     }
    ],
@@ -418,7 +396,8 @@
     "    result = fwd()\n",
     "\n",
     "    # Print all messages (to show injected instructions like error feedback)\n",
-    "    for msg in kwargs[\"messages\"]:\n",
+    "    messages = args[0]\n",
+    "    for msg in messages:\n",
     "        content = msg.get(\"content\", \"\")\n",
     "        if isinstance(content, list):\n",
     "            text = \"\\n\".join(\n",
@@ -429,7 +408,7 @@
     "        else:\n",
     "            text = str(content)\n",
     "        print(f\"> {text}\")\n",
-    "    print(result.choices[0].message.content)\n",
+    "    print(result[\"content\"])\n",
     "    return result\n",
     "\n",
     "\n",
@@ -440,7 +419,7 @@
     "    pass\n",
     "\n",
     "# Put completion handler innermost so it has highest precedence during the call\n",
-    "with handler(provider), handler({completion: log_llm}):\n",
+    "with handler(provider), handler({call_assistant: log_llm}):\n",
     "    _ = haiku(\"fish2\")\n",
     "    _ = limerick(\"fish\")  # or use haiku(\"fish-2\") to avoid cache"
    ]
@@ -465,104 +444,72 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'count_char', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n",
+      "Sub-templates available to write_story: dict_keys(['limerick', 'haiku_no_cache', 'primes', 'cities', 'weather', 'vacation', 'write_joke', 'rate_joke', 'story_with_moral', 'story_funny'])\n",
       "=== Story with moral ===\n",
       "> Write a story about a curious cat in the style: moral.\n",
-      "    Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
+      "Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
       "None\n",
       "> Write a short story about a curious cat and end with a moral lesson. Do not use any tools.\n",
-      "Once upon a time, in a quaint little village surrounded by lush green meadows, there lived a curious cat named Whiskers. Whiskers had a coat as soft as silk and eyes that shimmered like emeralds. Known throughout the village for his insatiable curiosity, Whiskers could often be found exploring every nook and cranny of his surroundings.\n",
+      "{\"value\":\"**Title: The Curious Cat and the Mysterious Box**\\n\\nOnce upon a time in a quaint little village, there lived a curious cat named Whiskers. Whiskers had the fluffiest coat, the brightest green eyes, and a nose that twitched endlessly in search of new discoveries. Known throughout the village as the most inquisitive feline, Whiskers spent his days exploring every nook and cranny of the town.\\n\\nOne sunny morning, as Whiskers trotted down the cobblestone streets, he stumbled upon something unusual: a large cardboard box placed outside the general store. The box was sealed, its contents hidden from view, and a peculiar scent wafted from within. Whiskers peered around, checking if any villagers were watching, and then approached the box with feline finesse.\\n\\nWith a nimble jump, Whiskers landed atop the box. His curiosity piqued, he poked and prodded, trying to find a way to unravel its mystery. But the box remained stubbornly shut. Determined, Whiskers flexed his small but effective claws and worked them under the edges, pulling and tugging until the box gave way with a soft pop.\\n\\nJust then, the shopkeeper, Mr. Thompson, emerged from his store. Startled by the sight of his prized package being invaded by the village’s most curious cat, he called out, \\\"Whiskers! What are you up to?\\\"\\n\\nWith a proud meow, Whiskers triumphantly removed the top flap of the box. To his surprise, inside was a collection of exotic-smelling spices from lands far away. Whiskers sneezed, overwhelmed by the myriad of scents that danced around him. Mr. Thompson chuckled and gently moved Whiskers aside.\\n\\n\\\"Curiosity sure got the better of you, didn’t it, Whiskers?\\\" Mr. Thompson said kindly as he patted the cat softly. \\\"These are precious spices meant for tonight's village feast.\\\"\\n\\nWhiskers, sneezing again from the spicy intrigue, realized perhaps he’d gone too far this time. He certainly hadn’t meant to disrupt Mr. Thompson’s preparations or spoil any surprises.\\n\\nThat evening, as the village gathered to enjoy the feast, Whiskers sat by the fire, contemplating his adventurous day. Though he relished every opportunity for discovery, he learned a valuable lesson that day.\\n\\n**Moral:** \\\"Curiosity is a wonderful trait, but it must be tempered with respect and consideration for others.\\\"\\n\\nFrom that day forward, Whiskers continued his explorations with a bit more thoughtfulness, ensuring that while his curiosity took him on grand adventures, it never disrupted the lives of those around him.\"}\n",
+      "> Write a story about a curious cat in the style: moral.\n",
+      "Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
+      "> None\n",
+      "> **Title: The Curious Cat and the Mysterious Box**\n",
       "\n",
-      "One sunny afternoon, Whiskers noticed a peculiar flicker coming from the old barn at the edge of the village. Naturally intrigued, he trotted over to investigate. As he neared the barn, he heard a soft, melodic tune that seemed to be coming from inside. The door was slightly ajar, and Whiskers, being the curious cat he was, pushed it open just enough to slip through.\n",
+      "Once upon a time in a quaint little village, there lived a curious cat named Whiskers. Whiskers had the fluffiest coat, the brightest green eyes, and a nose that twitched endlessly in search of new discoveries. Known throughout the village as the most inquisitive feline, Whiskers spent his days exploring every nook and cranny of the town.\n",
       "\n",
-      "Inside the barn, Whiskers discovered a grand scene: a group of mice were sitting in a circle, playing tiny instruments. They were so absorbed in their symphony that they didn't notice Whiskers at first. The mesmerized cat quietly sat down, his tail curling around his paws, listening intently to the enchanting melody.\n",
+      "One sunny morning, as Whiskers trotted down the cobblestone streets, he stumbled upon something unusual: a large cardboard box placed outside the general store. The box was sealed, its contents hidden from view, and a peculiar scent wafted from within. Whiskers peered around, checking if any villagers were watching, and then approached the box with feline finesse.\n",
       "\n",
-      "After the song ended, Whiskers clapped his paws together, startling the mice. They scattered in all directions, but soon realized that Whiskers meant no harm. Seeing that their secret was out, the mice tentatively returned and introduced themselves to Whiskers. They explained that they met every week to practice their music, away from the hustle and bustle of the village.\n",
+      "With a nimble jump, Whiskers landed atop the box. His curiosity piqued, he poked and prodded, trying to find a way to unravel its mystery. But the box remained stubbornly shut. Determined, Whiskers flexed his small but effective claws and worked them under the edges, pulling and tugging until the box gave way with a soft pop.\n",
       "\n",
-      "Intrigued, Whiskers asked if he could join them in their future rehearsals. Hesitant at first, the mice agreed, as long as Whiskers promised not to use his claws or reveal their secret hideout. Delighted, Whiskers eagerly accepted the terms.\n",
+      "Just then, the shopkeeper, Mr. Thompson, emerged from his store. Startled by the sight of his prized package being invaded by the village’s most curious cat, he called out, \"Whiskers! What are you up to?\"\n",
       "\n",
-      "From then on, Whiskers became a regular at the mice's rehearsals. He even learned to tap his paws to the rhythm, enhancing the music with his gentle percussion. The unlikely friendship became the talk of the village, showing everyone that curiosity can lead to unexpected and joyful connections.\n",
+      "With a proud meow, Whiskers triumphantly removed the top flap of the box. To his surprise, inside was a collection of exotic-smelling spices from lands far away. Whiskers sneezed, overwhelmed by the myriad of scents that danced around him. Mr. Thompson chuckled and gently moved Whiskers aside.\n",
       "\n",
-      "The moral of the story is: Curiosity, when approached with an open heart and mind, can lead to understanding and friendship, bridging gaps that seem insurmountable.\n",
-      "> Write a story about a curious cat in the style: moral.\n",
-      "    Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
-      "> None\n",
-      "> Once upon a time, in a quaint little village surrounded by lush green meadows, there lived a curious cat named Whiskers. Whiskers had a coat as soft as silk and eyes that shimmered like emeralds. Known throughout the village for his insatiable curiosity, Whiskers could often be found exploring every nook and cranny of his surroundings.\n",
+      "\"Curiosity sure got the better of you, didn’t it, Whiskers?\" Mr. Thompson said kindly as he patted the cat softly. \"These are precious spices meant for tonight's village feast.\"\n",
       "\n",
-      "One sunny afternoon, Whiskers noticed a peculiar flicker coming from the old barn at the edge of the village. Naturally intrigued, he trotted over to investigate. As he neared the barn, he heard a soft, melodic tune that seemed to be coming from inside. The door was slightly ajar, and Whiskers, being the curious cat he was, pushed it open just enough to slip through.\n",
+      "Whiskers, sneezing again from the spicy intrigue, realized perhaps he’d gone too far this time. He certainly hadn’t meant to disrupt Mr. Thompson’s preparations or spoil any surprises.\n",
       "\n",
-      "Inside the barn, Whiskers discovered a grand scene: a group of mice were sitting in a circle, playing tiny instruments. They were so absorbed in their symphony that they didn't notice Whiskers at first. The mesmerized cat quietly sat down, his tail curling around his paws, listening intently to the enchanting melody.\n",
+      "That evening, as the village gathered to enjoy the feast, Whiskers sat by the fire, contemplating his adventurous day. Though he relished every opportunity for discovery, he learned a valuable lesson that day.\n",
       "\n",
-      "After the song ended, Whiskers clapped his paws together, startling the mice. They scattered in all directions, but soon realized that Whiskers meant no harm. Seeing that their secret was out, the mice tentatively returned and introduced themselves to Whiskers. They explained that they met every week to practice their music, away from the hustle and bustle of the village.\n",
+      "**Moral:** \"Curiosity is a wonderful trait, but it must be tempered with respect and consideration for others.\"\n",
       "\n",
-      "Intrigued, Whiskers asked if he could join them in their future rehearsals. Hesitant at first, the mice agreed, as long as Whiskers promised not to use his claws or reveal their secret hideout. Delighted, Whiskers eagerly accepted the terms.\n",
+      "From that day forward, Whiskers continued his explorations with a bit more thoughtfulness, ensuring that while his curiosity took him on grand adventures, it never disrupted the lives of those around him.\n",
+      "{\"value\":\"\\n\\nWhiskers became a beloved symbol of curiosity and wisdom in the village, his story passed down to remind everyone of the importance of balancing the adventurous spirit with mindful compassion.\"}\n",
       "\n",
-      "From then on, Whiskers became a regular at the mice's rehearsals. He even learned to tap his paws to the rhythm, enhancing the music with his gentle percussion. The unlikely friendship became the talk of the village, showing everyone that curiosity can lead to unexpected and joyful connections.\n",
       "\n",
-      "The moral of the story is: Curiosity, when approached with an open heart and mind, can lead to understanding and friendship, bridging gaps that seem insurmountable.\n",
-      "The story of Whiskers, the curious cat, teaches us that curiosity, when embraced with an open heart, can lead to unexpected friendships and understanding. By exploring the world around him, Whiskers formed an unlikely bond with the village mice, reminding us all that differences can be bridged through open-mindedness and kindness.\n",
-      "The story of Whiskers, the curious cat, teaches us that curiosity, when embraced with an open heart, can lead to unexpected friendships and understanding. By exploring the world around him, Whiskers formed an unlikely bond with the village mice, reminding us all that differences can be bridged through open-mindedness and kindness.\n",
+      "Whiskers became a beloved symbol of curiosity and wisdom in the village, his story passed down to remind everyone of the importance of balancing the adventurous spirit with mindful compassion.\n",
       "\n",
       "=== Funny story ===\n",
       "> Write a story about a curious cat in the style: funny.\n",
-      "    Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
+      "Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
       "None\n",
-      "> Write a funny, humorous story about a curious cat. Do not use any tools.\n",
-      "In the quaint little town of Whiskerton, lived a cat named Mr. Whiskers. Now, Mr. Whiskers wasn't your typical feline. While most cats were content with napping in sunlit spots or chasing after balls of yarn, Mr. Whiskers had a peculiar obsession with discovering more about the human world.\n",
-      "\n",
-      "One day, as he prowled around the kitchen, he caught a glimpse of the shimmering appliance humans seemed to worship every morning—the coffee maker. \"What sorcery is this?\" he pondered, his tail twitching with curiosity. Determined to unveil its mysteries, he hopped onto the counter and began his investigation.\n",
-      "\n",
-      "Mr. Whiskers nudged the buttons and was thrilled when they emitted a satisfying beep. Encouraged, he mewed in excitement. Emboldened by this, he pawed at a button that seemed promising and watched in awe as the coffee maker whirred to life, pouring steaming liquid into a cup. He peered in, sniffed, and recoiled. \"Why on earth would they drink this bitter potion?\" he thought, flicking his tail dismissively.\n",
-      "\n",
-      "But Mr. Whiskers was not to be deterred. Next, he sauntered into the bathroom, a place of endless intrigue, with its porcelain throne and swirling waterworks. Eyes wide with anticipation, he hopped onto the toilet tank and stretched, balancing perilously. With a deft bat of his paw, he managed to flush it. Startled by the sudden whirlpool, he leaped backward, knocking over a roll of toilet paper which comically unraveled across the floor like a celebratory ticker tape.\n",
-      "\n",
-      "Undeterred by the morning's escapades, Mr. Whiskers decided his next adventure would involve the mysterious glowing box—the television. He'd seen his humans stare at it with rapt attention, occasionally making odd noises that Mr. Whiskers assumed were their version of purring. With a theatrical leap, he landed on the remote and brought the TV to life, startling himself with the sudden blare of a cat food commercial. Eyes wide, he scampered under the couch, where he stayed until his heart stopped racing like a clock's pendulum.\n",
-      "\n",
-      "Returning to his usual feline grace—or so he hoped—Mr. Whiskers emerged with a nonchalant air, now convinced that the human world was delightfully bizarre, if not a tad overwhelming. \n",
-      "\n",
-      "From that day on, Mr. Whiskers took it upon himself to continue exploring, albeit with a bit more caution. He became the neighborhood legend, the cat who dared to decipher the human realm, earning himself nicknames from \"The Whiskerton Wonder\" to \"Ciao Bella Catastrope.\" Each adventure added new tales to his whiskers, leaving him with stories to purr about for all of his nine lives. And so, every day, Mr. Whiskers proved that curiosity, in fact, DOES have nine lives.\n",
+      "> Write a funny, humorous story about curious cat. Do not use any tools.\n",
+      "{\"value\":\"Once upon a time in the bustling town of Whiskerville, there lived an exceptionally curious cat named Mr. Whiskers. Mr. Whiskers was known far and wide for his endless curiosity about everything happening around him. Unlike other cats who were content chasing their tails or napping in sun patches, Mr. Whiskers spent his days investigating every nook and cranny of their cozy neighborhood.\\n\\nOne sunny afternoon, as Mr. Whiskers prowled through the backyards of Whiskerville, he stumbled upon something he'd never seen before. It was a peculiar, round object with colorful blades spinning rapidly. Little did Mr. Whiskers know that this was a drone, piloted by young Tommy from next door.\\n\\nDetermined to unravel this mystery, Mr. Whiskers mustered all his feline grace and leapt up a tree to get closer to the buzzing contraption. As he climbed higher, the branches swayed perilously under his dainty paws, but his excitement fuelled his balance.\\n\\nWhen he reached the drone, he batted at it with his paw, causing it to wobble wildly in the air. Mr. Whiskers’ eyes widened with delight as the drone tried to escape his playful clutches. Meanwhile, Tommy, watching his precious drone being nearly commandeered by a cat, hurriedly steered it in a zigzag pattern.\\n\\nAmused by this game of aerial chase, Mr. Whiskers continued his pawing assaults until finally, with a jubilant meow, he swiped it one too many times. The drone suddenly veered left, bounced off a tree trunk, and landed safely on a soft patch of grass, its rotors giving a final, defeated spin.\\n\\nWith pride, Mr. Whiskers descended the tree to inspect his new earthbound trophy, only to lose interest quickly when he realized it wasn't going to offer any more spectacular acrobatics. Tommy, laughing at the spectacle, came over to retrieve his drone and offered Mr. Whiskers a treat in peace offering.\\n\\nFrom that day on, Mr. Whiskers was crowned the king of curiosity in Whiskerville, and whenever an unusual noise, flash of color, or buzzing sound appeared, everyone knew that Mr. Whiskers’s curiosity would be piqued and an adventure would be afoot. He became the resident inventor's assistant, tirelessly investigating every tool, gizmo, and gadget until its mystery had been solved — or until a new distraction came fluttering by.\\n\\nAnd thus, Mr. Whiskers continued to live his days fueled by curiosity, making sure that the people of Whiskerville never had a dull day thanks to his hilarious antics and unending quest to understand the world around him.\"}\n",
       "> Write a story about a curious cat in the style: funny.\n",
-      "    Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
+      "Available styles: 'moral' for a story with a lesson, 'funny' for humor. Use story_funny for humor, story_with_moral for a story with a lesson.\n",
       "> None\n",
-      "> In the quaint little town of Whiskerton, lived a cat named Mr. Whiskers. Now, Mr. Whiskers wasn't your typical feline. While most cats were content with napping in sunlit spots or chasing after balls of yarn, Mr. Whiskers had a peculiar obsession with discovering more about the human world.\n",
-      "\n",
-      "One day, as he prowled around the kitchen, he caught a glimpse of the shimmering appliance humans seemed to worship every morning—the coffee maker. \"What sorcery is this?\" he pondered, his tail twitching with curiosity. Determined to unveil its mysteries, he hopped onto the counter and began his investigation.\n",
-      "\n",
-      "Mr. Whiskers nudged the buttons and was thrilled when they emitted a satisfying beep. Encouraged, he mewed in excitement. Emboldened by this, he pawed at a button that seemed promising and watched in awe as the coffee maker whirred to life, pouring steaming liquid into a cup. He peered in, sniffed, and recoiled. \"Why on earth would they drink this bitter potion?\" he thought, flicking his tail dismissively.\n",
-      "\n",
-      "But Mr. Whiskers was not to be deterred. Next, he sauntered into the bathroom, a place of endless intrigue, with its porcelain throne and swirling waterworks. Eyes wide with anticipation, he hopped onto the toilet tank and stretched, balancing perilously. With a deft bat of his paw, he managed to flush it. Startled by the sudden whirlpool, he leaped backward, knocking over a roll of toilet paper which comically unraveled across the floor like a celebratory ticker tape.\n",
-      "\n",
-      "Undeterred by the morning's escapades, Mr. Whiskers decided his next adventure would involve the mysterious glowing box—the television. He'd seen his humans stare at it with rapt attention, occasionally making odd noises that Mr. Whiskers assumed were their version of purring. With a theatrical leap, he landed on the remote and brought the TV to life, startling himself with the sudden blare of a cat food commercial. Eyes wide, he scampered under the couch, where he stayed until his heart stopped racing like a clock's pendulum.\n",
+      "> Once upon a time in the bustling town of Whiskerville, there lived an exceptionally curious cat named Mr. Whiskers. Mr. Whiskers was known far and wide for his endless curiosity about everything happening around him. Unlike other cats who were content chasing their tails or napping in sun patches, Mr. Whiskers spent his days investigating every nook and cranny of their cozy neighborhood.\n",
       "\n",
-      "Returning to his usual feline grace—or so he hoped—Mr. Whiskers emerged with a nonchalant air, now convinced that the human world was delightfully bizarre, if not a tad overwhelming. \n",
+      "One sunny afternoon, as Mr. Whiskers prowled through the backyards of Whiskerville, he stumbled upon something he'd never seen before. It was a peculiar, round object with colorful blades spinning rapidly. Little did Mr. Whiskers know that this was a drone, piloted by young Tommy from next door.\n",
       "\n",
-      "From that day on, Mr. Whiskers took it upon himself to continue exploring, albeit with a bit more caution. He became the neighborhood legend, the cat who dared to decipher the human realm, earning himself nicknames from \"The Whiskerton Wonder\" to \"Ciao Bella Catastrope.\" Each adventure added new tales to his whiskers, leaving him with stories to purr about for all of his nine lives. And so, every day, Mr. Whiskers proved that curiosity, in fact, DOES have nine lives.\n",
-      "In the quaint little town of Whiskerton, there lived a cat named Mr. Whiskers, who was anything but typical. While most cats found contentment in sunlit naps or whimsical yarn pursuits, Mr. Whiskers had a peculiar obsession: unraveling the mysteries of the human world.\n",
+      "Determined to unravel this mystery, Mr. Whiskers mustered all his feline grace and leapt up a tree to get closer to the buzzing contraption. As he climbed higher, the branches swayed perilously under his dainty paws, but his excitement fuelled his balance.\n",
       "\n",
-      "One day, during his usual kitchen prowling, he caught sight of the shimmering appliance humans seemed to worship every morning—the coffee maker. \"What sorcery is this?\" he mused, tail twitching with intrigue. Determined to demystify its wonders, Mr. Whiskers leapt onto the counter for a closer look.\n",
+      "When he reached the drone, he batted at it with his paw, causing it to wobble wildly in the air. Mr. Whiskers’ eyes widened with delight as the drone tried to escape his playful clutches. Meanwhile, Tommy, watching his precious drone being nearly commandeered by a cat, hurriedly steered it in a zigzag pattern.\n",
       "\n",
-      "With a nudge of his paw, he was thrilled when the device responded with a satisfying beep. Encouraged, he mewed in excitement and daringly pawed a button that looked promising. The coffee maker came to life, its mechanical heart whirring, producing a fragrant, steaming elixir. \"What potion captivates them?\" he thought, nose crinkling at the bitter aroma emanating from the cup.\n",
+      "Amused by this game of aerial chase, Mr. Whiskers continued his pawing assaults until finally, with a jubilant meow, he swiped it one too many times. The drone suddenly veered left, bounced off a tree trunk, and landed safely on a soft patch of grass, its rotors giving a final, defeated spin.\n",
       "\n",
-      "Yet, his curiosity was far from satiated. He now set his sights on the bathroom, a cavern of enigmas with its porcelain throne and swirling marvels. Eyes gleaming with anticipation, he balanced precariously on the toilet tank and, with a strategic paw, initiated a flush. Surprised by the ensuing whirlpool, Mr. Whiskers leapt backward, inadvertently unfurling a toilet paper cascade that spilled humorously across the floor, resembling celebratory confetti.\n",
+      "With pride, Mr. Whiskers descended the tree to inspect his new earthbound trophy, only to lose interest quickly when he realized it wasn't going to offer any more spectacular acrobatics. Tommy, laughing at the spectacle, came over to retrieve his drone and offered Mr. Whiskers a treat in peace offering.\n",
       "\n",
-      "Unflagged by his comedic morning, his next foray targeted the glowing enchantment known as the television. He'd observed humans gazing at it with rapt attention, emitting sounds that Mr. Whiskers assumed were their variant of purring. With a dramatic leap, he activated the remote, summoning the television's noisy life with a jarring cat food commercial. Instinctively, he scampered under the couch, heart racing like a frenzied pendulum.\n",
+      "From that day on, Mr. Whiskers was crowned the king of curiosity in Whiskerville, and whenever an unusual noise, flash of color, or buzzing sound appeared, everyone knew that Mr. Whiskers’s curiosity would be piqued and an adventure would be afoot. He became the resident inventor's assistant, tirelessly investigating every tool, gizmo, and gadget until its mystery had been solved — or until a new distraction came fluttering by.\n",
       "\n",
-      "Emerging with a newfound grace—or so he fancied—Mr. Whiskers resolved that the human world, though delightfully bizarre, was also engrossingly entertaining. \n",
+      "And thus, Mr. Whiskers continued to live his days fueled by curiosity, making sure that the people of Whiskerville never had a dull day thanks to his hilarious antics and unending quest to understand the world around him.\n",
+      "{\"value\":\"\\n\\nAnd while he never quite solved the enigma of flying objects, he sure brought laughter and joy to everyone around him, proving that sometimes the journey and the company make the best adventures. \\n\\nThe end.\"}\n",
       "\n",
-      "Mr. Whiskers' adventurous pursuits in Whiskerton earned him legendary status, bearing titles from \"The Whiskerton Wonder\" to \"Ciao Bella Catastrope.\" Each escapade added new tales to share and whiskers, ensuring this curious cat had captivating stories purred through all nine lives. Indeed, curiosity proved it DOES have nine lives.\n",
-      "In the quaint little town of Whiskerton, there lived a cat named Mr. Whiskers, who was anything but typical. While most cats found contentment in sunlit naps or whimsical yarn pursuits, Mr. Whiskers had a peculiar obsession: unraveling the mysteries of the human world.\n",
       "\n",
-      "One day, during his usual kitchen prowling, he caught sight of the shimmering appliance humans seemed to worship every morning—the coffee maker. \"What sorcery is this?\" he mused, tail twitching with intrigue. Determined to demystify its wonders, Mr. Whiskers leapt onto the counter for a closer look.\n",
+      "And while he never quite solved the enigma of flying objects, he sure brought laughter and joy to everyone around him, proving that sometimes the journey and the company make the best adventures. \n",
       "\n",
-      "With a nudge of his paw, he was thrilled when the device responded with a satisfying beep. Encouraged, he mewed in excitement and daringly pawed a button that looked promising. The coffee maker came to life, its mechanical heart whirring, producing a fragrant, steaming elixir. \"What potion captivates them?\" he thought, nose crinkling at the bitter aroma emanating from the cup.\n",
-      "\n",
-      "Yet, his curiosity was far from satiated. He now set his sights on the bathroom, a cavern of enigmas with its porcelain throne and swirling marvels. Eyes gleaming with anticipation, he balanced precariously on the toilet tank and, with a strategic paw, initiated a flush. Surprised by the ensuing whirlpool, Mr. Whiskers leapt backward, inadvertently unfurling a toilet paper cascade that spilled humorously across the floor, resembling celebratory confetti.\n",
-      "\n",
-      "Unflagged by his comedic morning, his next foray targeted the glowing enchantment known as the television. He'd observed humans gazing at it with rapt attention, emitting sounds that Mr. Whiskers assumed were their variant of purring. With a dramatic leap, he activated the remote, summoning the television's noisy life with a jarring cat food commercial. Instinctively, he scampered under the couch, heart racing like a frenzied pendulum.\n",
-      "\n",
-      "Emerging with a newfound grace—or so he fancied—Mr. Whiskers resolved that the human world, though delightfully bizarre, was also engrossingly entertaining. \n",
-      "\n",
-      "Mr. Whiskers' adventurous pursuits in Whiskerton earned him legendary status, bearing titles from \"The Whiskerton Wonder\" to \"Ciao Bella Catastrope.\" Each escapade added new tales to share and whiskers, ensuring this curious cat had captivating stories purred through all nine lives. Indeed, curiosity proved it DOES have nine lives.\n"
+      "The end.\n"
      ]
     }
    ],
@@ -593,7 +540,7 @@
     "assert story_funny in write_story.tools.values()\n",
     "print(\"Sub-templates available to write_story:\", write_story.tools.keys())\n",
     "\n",
-    "with handler(provider), handler({completion: log_llm}):\n",
+    "with handler(provider), handler({call_assistant: log_llm}):\n",
     "    print(\"=== Story with moral ===\")\n",
     "    print(write_story(\"a curious cat\", \"moral\"))\n",
     "    print()\n",
@@ -679,9 +626,7 @@
     "    raise NotHandled\n",
     "\n",
     "\n",
-    "retry_handler = RetryLLMHandler(max_retries=5, add_error_feedback=True)\n",
-    "\n",
-    "with handler(provider), handler(retry_handler), handler({completion: log_llm}):\n",
+    "with handler(provider), handler({call_assistant: log_llm}):\n",
     "    result = fetch_data()\n",
     "    print(f\"Result: {result}\", \"Retries:\", call_count)"
    ]
@@ -788,15 +733,7 @@
     "    raise NotHandled\n",
     "\n",
     "\n",
-    "# RetryLLMHandler with error feedback - the traceback helps LLM correct validation errors\n",
-    "# Note: Pydantic wraps PydanticCustomError inside ValidationError, so we catch ValidationError instead\n",
-    "retry_handler = RetryLLMHandler(\n",
-    "    max_retries=3,\n",
-    "    add_error_feedback=True,\n",
-    "    exception_cls=ValidationError,  # Catch validation errors\n",
-    ")\n",
-    "\n",
-    "with handler(provider), handler(retry_handler), handler({completion: log_llm}):\n",
+    "with handler(provider), handler({call_assistant: log_llm}):\n",
     "    rating = give_rating_for_movie(\"Die Hard\")\n",
     "    print(f\"Score: {rating.score}/5\")\n",
     "    print(f\"Explanation: {rating.explanation}\")"

From 41beb7845f10a57336f193f427210abf706637b9 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Thu, 29 Jan 2026 12:31:41 -0500
Subject: [PATCH 06/27] removed unnecessarily defensive validation

---
 effectful/handlers/llm/completions.py | 54 ++++-----------------------
 1 file changed, 8 insertions(+), 46 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 71acdeb5..ed8a9da2 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -26,12 +26,6 @@
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import Operation
 
-ToolCall: pydantic.TypeAdapter[list[ChatCompletionMessageToolCall]] = (
-    pydantic.TypeAdapter(list[ChatCompletionMessageToolCall])
-)
-MessageContent: pydantic.TypeAdapter[list[OpenAIMessageContentListBlock] | str] = (
-    pydantic.TypeAdapter(list[OpenAIMessageContentListBlock] | str)
-)
 Message = (
     OpenAIChatCompletionAssistantMessage
     | ChatCompletionToolMessage
@@ -40,36 +34,6 @@
     | OpenAIChatCompletionUserMessage
 )
 
-MessageAdapter: pydantic.TypeAdapter[Message] = pydantic.TypeAdapter(Message)
-
-
-def validate_data[T](adapter: pydantic.TypeAdapter[T], data: typing.Any) -> T:
-    adapter.validate_python(data, strict=True)
-    return adapter.dump_python(data)
-
-
-def _message_role(message: Message) -> str:
-    return message["role"]
-
-
-def _message_content(message: Message) -> list[OpenAIMessageContentListBlock] | str:
-    return validate_data(MessageContent, message.get("content"))
-
-
-def _message_reasoning_content(
-    message: Message,
-) -> list[OpenAIMessageContentListBlock] | str:
-    return validate_data(MessageContent, message.get("reasoning_content"))
-
-
-def _message_tool_calls(message: Message) -> list[ChatCompletionMessageToolCall]:
-    tool_calls = message.get("tool_calls") or []
-    assert isinstance(tool_calls, list)
-    return [
-        ChatCompletionMessageToolCall.model_validate(tool_call)
-        for tool_call in tool_calls
-    ]
-
 
 def _parameter_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
     return pydantic.create_model(
@@ -131,7 +95,7 @@ def call_assistant(
     assert isinstance(choice, litellm.types.utils.Choices)
     message: litellm.Message = choice.message
     assert message.role == "assistant"
-    return validate_data(MessageAdapter, message.model_dump(mode="json"))
+    return typing.cast(Message, message.model_dump(mode="json"))
 
 
 @Operation.define
@@ -170,9 +134,8 @@ def call_tool(
 
     # serialize back to U using encoder for return type
     encoded_result = return_type.serialize(return_type.encode(result))
-    return validate_data(
-        MessageAdapter,
-        dict(role="tool", content=encoded_result, tool_call_id=tool_call.id),
+    return typing.cast(
+        Message, dict(role="tool", content=encoded_result, tool_call_id=tool_call.id)
     )
 
 
@@ -224,7 +187,7 @@ def flush_text() -> None:
 
     # Note: The OpenAI api only seems to accept images in the 'user' role. The
     # effect of different roles on the model's response is currently unclear.
-    return [validate_data(MessageAdapter, dict(role="user", content=parts))]
+    return [typing.cast(Message, dict(role="user", content=parts))]
 
 
 @Operation.define
@@ -278,18 +241,17 @@ def _call[**P, T](
         tool_calls: list[ChatCompletionMessageToolCall] = []
 
         message = messages[-1]
-        while _message_role(message) != "assistant" or tool_calls:
+        while message["role"] != "assistant" or tool_calls:
             message = call_assistant(messages, response_model, tool_specs)
             messages.append(message)
-            tool_calls = _message_tool_calls(message)
+            tool_calls = message.get("tool_calls") or []
             for tool_call in tool_calls:
+                tool_call = ChatCompletionMessageToolCall.model_validate(tool_call)
                 message = call_tool(tool_call, tools)
                 messages.append(message)
 
         # return response
-        serialized_result = _message_content(message) or _message_reasoning_content(
-            message
-        )
+        serialized_result = message.get("content") or message.get("reasoning_content")
         assert isinstance(serialized_result, str), (
             "final response from the model should be a string"
         )

From 1400d1961f4fbf1c82360c9e63566c97e15cde1d Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Thu, 29 Jan 2026 12:33:30 -0500
Subject: [PATCH 07/27] updated tool call decoding to use concrete type of tool
 result instead of annotations

---
 effectful/handlers/llm/completions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index ed8a9da2..aaa01292 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -114,7 +114,6 @@ def call_tool(
 
     sig = inspect.signature(tool)
     param_model = _parameter_model(sig)
-    return_type = type_to_encodable_type(sig.return_annotation)
 
     # build dict of raw encodable types U
     raw_args = param_model.model_validate_json(json_str)
@@ -133,6 +132,7 @@ def call_tool(
     result = tool(*bound_sig.args, **bound_sig.kwargs)
 
     # serialize back to U using encoder for return type
+    return_type = type_to_encodable_type(type(result))
     encoded_result = return_type.serialize(return_type.encode(result))
     return typing.cast(
         Message, dict(role="tool", content=encoded_result, tool_call_id=tool_call.id)

From a06296dd16a671a2d003eabc4ee9531c3868e975 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 10:02:05 -0500
Subject: [PATCH 08/27] updated completions to fix basic type errors

---
 effectful/handlers/llm/completions.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index aaa01292..e597de75 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -21,7 +21,7 @@
 )
 
 from effectful.handlers.llm import Template, Tool
-from effectful.handlers.llm.encoding import Encodable, type_to_encodable_type
+from effectful.handlers.llm.encoding import Encodable
 from effectful.ops.semantics import fwd
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import Operation
@@ -40,7 +40,7 @@ def _parameter_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
         "Params",
         __config__={"extra": "forbid"},
         **{
-            name: type_to_encodable_type(param.annotation).t
+            name: Encodable.define(param.annotation).enc
             for name, param in sig.parameters.items()
         },  # type: ignore
     )
@@ -49,7 +49,7 @@ def _parameter_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
 def _response_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
     return pydantic.create_model(
         "Response",
-        value=type_to_encodable_type(sig.return_annotation).t,
+        value=Encodable.define(sig.return_annotation).enc,
         __config__={"extra": "forbid"},
     )
 
@@ -121,8 +121,8 @@ def call_tool(
     # use encoders to decode Us to python types T
     bound_sig: inspect.BoundArguments = sig.bind(
         **{
-            param_name: type_to_encodable_type(
-                sig.parameters[param_name].annotation
+            param_name: Encodable.define(
+                sig.parameters[param_name].annotation, {}
             ).decode(getattr(raw_args, param_name))
             for param_name in raw_args.model_fields_set
         }
@@ -132,7 +132,7 @@ def call_tool(
     result = tool(*bound_sig.args, **bound_sig.kwargs)
 
     # serialize back to U using encoder for return type
-    return_type = type_to_encodable_type(type(result))
+    return_type = Encodable.define(type(result))
     encoded_result = return_type.serialize(return_type.encode(result))
     return typing.cast(
         Message, dict(role="tool", content=encoded_result, tool_call_id=tool_call.id)
@@ -167,8 +167,8 @@ def flush_text() -> None:
             continue
 
         obj, _ = formatter.get_field(field_name, (), env)
-        encoder = type_to_encodable_type(type(obj))
-        encoded_obj: list[OpenAIMessageContentListBlock] = encoder.serialize(
+        encoder = Encodable.define(type(obj))
+        encoded_obj: typing.Sequence[OpenAIMessageContentListBlock] = encoder.serialize(
             encoder.encode(obj)
         )
         for part in encoded_obj:
@@ -216,8 +216,8 @@ def _completion(self, *args, **kwargs):
     def _call[**P, T](
         self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs
     ) -> T:
-        response_encoding_type: Encodable[T] = type_to_encodable_type(
-            inspect.signature(template).return_annotation
+        response_encoding_type: Encodable = Encodable.define(
+            inspect.signature(template).return_annotation, template.__context__
         )
         response_model = _response_model(inspect.signature(template))
 

From c47abd6a0a36ff32a8f2b3d07ecc2120a6808eb5 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 15:31:03 -0500
Subject: [PATCH 09/27] updated call assistant to handle decoding tool calls

---
 effectful/handlers/llm/completions.py         | 181 +++++++++---------
 effectful/handlers/llm/template.py            |  43 +++++
 ...LLMProvider__test_integer_return_type.json |  48 ++++-
 ...ompt_cross_endpoint[claude-haiku-4-5].json |  42 +++-
 ...le_prompt_cross_endpoint[gpt-4o-mini].json |  48 ++++-
 ...e_prompt_multiple_models[gpt-4o-mini].json |  48 ++++-
 ...le_prompt_multiple_models[gpt-5-nano].json |  48 ++++-
 ...teLLMProvider__test_structured_output.json |  48 ++++-
 ...eLLMProvider__test_with_config_params.json |  48 ++++-
 ...eturn__test_pydantic_basemodel_return.json |  48 ++++-
 ...ers_llm_provider.py__test_image_input.json |  48 ++++-
 ....py__test_litellm_caching_integration.json |  48 ++++-
 ...y__test_litellm_caching_integration_1.json |  46 ++++-
 ...y__test_litellm_caching_integration_2.json |  48 ++++-
 ..._litellm_caching_integration_disabled.json |  48 ++++-
 ...itellm_caching_integration_disabled_1.json |  48 ++++-
 ...er.py__test_litellm_caching_selective.json |  48 ++++-
 ....py__test_litellm_caching_selective_1.json |  48 ++++-
 ....py__test_litellm_caching_selective_2.json |  48 ++++-
 ....py__test_litellm_caching_selective_3.json |  46 ++++-
 ....py__test_litellm_caching_selective_4.json |  48 ++++-
 ....py__test_litellm_caching_selective_5.json |  48 ++++-
 tests/test_handlers_llm_provider.py           |   7 +-
 23 files changed, 949 insertions(+), 232 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index e597de75..2fe880af 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -13,7 +13,6 @@
     ChatCompletionMessageToolCall,
     ChatCompletionTextObject,
     ChatCompletionToolMessage,
-    ChatCompletionToolParam,
     OpenAIChatCompletionAssistantMessage,
     OpenAIChatCompletionSystemMessage,
     OpenAIChatCompletionUserMessage,
@@ -22,7 +21,6 @@
 
 from effectful.handlers.llm import Template, Tool
 from effectful.handlers.llm.encoding import Encodable
-from effectful.ops.semantics import fwd
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import Operation
 
@@ -34,102 +32,117 @@
     | OpenAIChatCompletionUserMessage
 )
 
+type ToolCallID = str
 
-def _parameter_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
-    return pydantic.create_model(
-        "Params",
-        __config__={"extra": "forbid"},
+
+class DecodedToolCall[T](typing.NamedTuple):
+    tool: Tool[..., T]
+    bound_args: inspect.BoundArguments
+    id: ToolCallID
+
+
+type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None]
+
+
+def decode_tool_call(
+    tool_call: ChatCompletionMessageToolCall,
+    tools: collections.abc.Mapping[str, Tool],
+) -> DecodedToolCall:
+    """Decode a tool call from the LLM response into a DecodedToolCall."""
+    assert tool_call.function.name is not None
+    tool = tools[tool_call.function.name]
+    json_str = tool_call.function.arguments
+
+    sig = inspect.signature(tool)
+
+    # build dict of raw encodable types U
+    raw_args = tool.param_model.model_validate_json(json_str)
+
+    # use encoders to decode Us to python types T
+    bound_sig: inspect.BoundArguments = sig.bind(
         **{
-            name: Encodable.define(param.annotation).enc
-            for name, param in sig.parameters.items()
-        },  # type: ignore
+            param_name: Encodable.define(
+                sig.parameters[param_name].annotation, {}
+            ).decode(getattr(raw_args, param_name))
+            for param_name in raw_args.model_fields_set
+        }
     )
+    return DecodedToolCall(tool, bound_sig, tool_call.id)
 
 
-def _response_model(sig: inspect.Signature) -> type[pydantic.BaseModel]:
-    return pydantic.create_model(
-        "Response",
-        value=Encodable.define(sig.return_annotation).enc,
-        __config__={"extra": "forbid"},
-    )
+@Operation.define
+@functools.wraps(litellm.completion)
+def completion(*args, **kwargs) -> typing.Any:
+    """Low-level LLM request. Handlers may log/modify requests and delegate via fwd().
 
+    This effect is emitted for model request/response rounds so handlers can
+    observe/log requests.
 
-def _tool_model(tool: Tool) -> ChatCompletionToolParam:
-    param_model = _parameter_model(inspect.signature(tool))
-    response_format = litellm.utils.type_to_response_format_param(param_model)
-    assert response_format is not None
-    assert tool.__default__.__doc__ is not None
-    return {
-        "type": "function",
-        "function": {
-            "name": tool.__name__,
-            "description": textwrap.dedent(tool.__default__.__doc__),
-            "parameters": response_format["json_schema"]["schema"],
-            "strict": True,
-        },
-    }
+    """
+    return litellm.completion(*args, **kwargs)
 
 
 @Operation.define
-def call_assistant(
+def call_assistant[T, U](
     messages: collections.abc.Sequence[Message],
-    response_format: type[pydantic.BaseModel] | None,
-    tools: collections.abc.Mapping[str, ChatCompletionToolParam],
+    tools: collections.abc.Mapping[str, Tool],
+    response_format: Encodable[T, U],
     model: str,
     **kwargs,
-) -> Message:
+) -> MessageResult[T]:
     """Low-level LLM request. Handlers may log/modify requests and delegate via fwd().
 
     This effect is emitted for model request/response rounds so handlers can
     observe/log requests.
 
     """
-    response: litellm.types.utils.ModelResponse = litellm.completion(
+    tool_specs = {k: t.model for k, t in tools.items()}
+    response_model = pydantic.create_model(
+        "Response", value=response_format.enc, __config__={"extra": "forbid"}
+    )
+
+    response: litellm.types.utils.ModelResponse = completion(
         model,
         messages=list(messages),
-        response_format=response_format,
-        tools=list(tools.values()),
+        response_format=response_model,
+        tools=list(tool_specs.values()),
         **kwargs,
     )
     choice = response.choices[0]
     assert isinstance(choice, litellm.types.utils.Choices)
+
     message: litellm.Message = choice.message
     assert message.role == "assistant"
-    return typing.cast(Message, message.model_dump(mode="json"))
+
+    tool_calls: list[DecodedToolCall] = []
+    raw_tool_calls = message.get("tool_calls") or []
+    for tool_call in raw_tool_calls:
+        tool_call = ChatCompletionMessageToolCall.model_validate(tool_call)
+        decoded_tool_call = decode_tool_call(tool_call, tools)
+        tool_calls.append(decoded_tool_call)
+
+    result = None
+    if not tool_calls:
+        # return response
+        serialized_result = message.get("content") or message.get("reasoning_content")
+        assert isinstance(serialized_result, str), (
+            "final response from the model should be a string"
+        )
+        raw_result = response_model.model_validate_json(serialized_result)
+        result = response_format.decode(raw_result.value)  # type: ignore
+
+    return (typing.cast(Message, message.model_dump(mode="json")), tool_calls, result)
 
 
 @Operation.define
-def call_tool(
-    tool_call: ChatCompletionMessageToolCall,
-    tools: collections.abc.Mapping[str, Tool],
-) -> Message:
+def call_tool(tool_call: DecodedToolCall) -> Message:
     """Implements a roundtrip call to a python function. Input is a json
     string representing an LLM tool call request parameters. The output is
     the serialised response to the model.
 
     """
-    assert tool_call.function.name is not None
-    tool = tools[tool_call.function.name]
-    json_str = tool_call.function.arguments
-
-    sig = inspect.signature(tool)
-    param_model = _parameter_model(sig)
-
-    # build dict of raw encodable types U
-    raw_args = param_model.model_validate_json(json_str)
-
-    # use encoders to decode Us to python types T
-    bound_sig: inspect.BoundArguments = sig.bind(
-        **{
-            param_name: Encodable.define(
-                sig.parameters[param_name].annotation, {}
-            ).decode(getattr(raw_args, param_name))
-            for param_name in raw_args.model_fields_set
-        }
-    )
-
     # call tool with python types
-    result = tool(*bound_sig.args, **bound_sig.kwargs)
+    result = tool_call.tool(*tool_call.bound_args.args, **tool_call.bound_args.kwargs)
 
     # serialize back to U using encoder for return type
     return_type = Encodable.define(type(result))
@@ -207,20 +220,10 @@ def __init__(self, model="gpt-4o", **config):
             **inspect.signature(litellm.completion).bind_partial(**config).kwargs,
         }
 
-    @implements(call_assistant)
-    @functools.wraps(call_assistant)
-    def _completion(self, *args, **kwargs):
-        return fwd(*args, **{**self.config, **kwargs})
-
     @implements(Template.__apply__)
     def _call[**P, T](
         self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs
     ) -> T:
-        response_encoding_type: Encodable = Encodable.define(
-            inspect.signature(template).return_annotation, template.__context__
-        )
-        response_model = _response_model(inspect.signature(template))
-
         messages: list[Message] = [*call_system(template)]
 
         # encode arguments
@@ -228,36 +231,28 @@ def _call[**P, T](
         bound_args.apply_defaults()
         env = template.__context__.new_child(bound_args.arguments)
 
+        # Create response_model with env so tools passed as arguments are available
+        response_model = Encodable.define(template.__signature__.return_annotation, env)
+
         user_messages: list[Message] = call_user(template.__prompt_template__, env)
         messages.extend(user_messages)
 
-        tools = {
-            **template.tools,
-            **{k: t for k, t in bound_args.arguments.items() if isinstance(t, Tool)},
-        }
-        tool_specs = {k: _tool_model(t) for k, t in tools.items()}
-
         # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls
-        tool_calls: list[ChatCompletionMessageToolCall] = []
+        tool_calls: list[DecodedToolCall] = []
 
         message = messages[-1]
+        result: T | None = None
         while message["role"] != "assistant" or tool_calls:
-            message = call_assistant(messages, response_model, tool_specs)
+            message, tool_calls, result = call_assistant(
+                messages, template.tools, response_model, **self.config
+            )
             messages.append(message)
-            tool_calls = message.get("tool_calls") or []
             for tool_call in tool_calls:
-                tool_call = ChatCompletionMessageToolCall.model_validate(tool_call)
-                message = call_tool(tool_call, tools)
+                message = call_tool(tool_call)
                 messages.append(message)
 
-        # return response
-        serialized_result = message.get("content") or message.get("reasoning_content")
-        assert isinstance(serialized_result, str), (
-            "final response from the model should be a string"
+        assert result is not None, (
+            "call_assistant did not produce a result nor tool_calls"
         )
-        encoded_result = (
-            serialized_result
-            if response_model is None
-            else response_model.model_validate_json(serialized_result).value  # type: ignore
-        )
-        return response_encoding_type.decode(encoded_result)
+        # return response
+        return result
diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py
index 76ddb402..5cec8fb8 100644
--- a/effectful/handlers/llm/template.py
+++ b/effectful/handlers/llm/template.py
@@ -1,4 +1,6 @@
+import functools
 import inspect
+import textwrap
 import types
 import typing
 from collections import ChainMap
@@ -6,6 +8,11 @@
 from dataclasses import dataclass
 from typing import Annotated, Any
 
+import litellm
+import pydantic
+from litellm import ChatCompletionToolParam
+
+from effectful.handlers.llm.encoding import Encodable
 from effectful.ops.types import INSTANCE_OP_PREFIX, Annotation, Operation
 
 
@@ -95,6 +102,33 @@ def __init__(
         signature = IsRecursive.infer_annotations(signature)
         super().__init__(signature, name, default)
 
+    @functools.cached_property
+    def param_model(self) -> type[pydantic.BaseModel]:
+        sig = inspect.signature(self)
+        return pydantic.create_model(
+            "Params",
+            __config__={"extra": "forbid"},
+            **{
+                name: Encodable.define(param.annotation).enc
+                for name, param in sig.parameters.items()
+            },  # type: ignore
+        )
+
+    @functools.cached_property
+    def model(self) -> ChatCompletionToolParam:
+        response_format = litellm.utils.type_to_response_format_param(self.param_model)
+        assert response_format is not None
+        assert self.__default__.__doc__ is not None
+        return {
+            "type": "function",
+            "function": {
+                "name": self.__name__,
+                "description": textwrap.dedent(self.__default__.__doc__),
+                "parameters": response_format["json_schema"]["schema"],
+                "strict": True,
+            },
+        }
+
     @classmethod
     def define(cls, *args, **kwargs) -> "Tool[P, T]":
         """Define a tool.
@@ -185,6 +219,7 @@ def tools(self) -> Mapping[str, Tool]:
         for name, obj in self.__context__.items():
             if obj is self and not is_recursive:
                 continue
+
             # Collect tools in context
             if isinstance(obj, Tool):
                 result[name] = obj
@@ -260,6 +295,14 @@ def define[**Q, V](
             *typing.cast(list[MutableMapping[str, Any]], contexts)
         )
 
+        is_recursive = _is_recursive_signature(inspect.signature(default))
+        # todo: make this more pythonic
+        if not is_recursive:
+            # drop default.__name__ from context
+            pass
+
         op = super().define(default, *args, **kwargs)
         op.__context__ = context  # type: ignore[attr-defined]
+        # todo: drop self from contexts if not is_recursive
+
         return typing.cast(Template[Q, V], op)
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json
index 7aa83a42..bc5d3bc0 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_integer_return_type.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":73}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNbSUc9fUpX7qU6kanq3CXVh8eJ",
+  "created": 1769812547,
+  "model": "gpt-5-nano-2025-08-07",
+  "object": "chat.completion",
+  "system_fingerprint": null,
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":73}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 401,
+    "prompt_tokens": 340,
+    "total_tokens": 741,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 384,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json
index 98c1770d..7fdb0c6e 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[claude-haiku-4-5].json
@@ -1,7 +1,39 @@
 {
-  "content": "{\"value\": \"Testing is a critical process that ensures software quality and reliability by identifying bugs and verifying that applications work as expected.\"}",
-  "function_call": null,
-  "provider_specific_fields": null,
-  "role": "assistant",
-  "tool_calls": null
+  "id": "chatcmpl-b08c0c1f-3fd9-45eb-834a-73fe347714ee",
+  "created": 1769812544,
+  "model": "claude-haiku-4-5-20251001",
+  "object": "chat.completion",
+  "system_fingerprint": null,
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\": \"Testing is a crucial process that ensures software quality, identifies bugs, and validates that systems work as intended before deployment to users.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": null
+      }
+    }
+  ],
+  "usage": {
+    "completion_tokens": 56,
+    "prompt_tokens": 1179,
+    "total_tokens": 1235,
+    "completion_tokens_details": null,
+    "prompt_tokens_details": {
+      "audio_tokens": null,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null,
+      "cache_creation_tokens": 0,
+      "cache_creation_token_details": {
+        "ephemeral_5m_input_tokens": 0,
+        "ephemeral_1h_input_tokens": 0
+      }
+    },
+    "cache_creation_input_tokens": 0,
+    "cache_read_input_tokens": 0
+  }
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json
index 27f8cd26..be954644 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_cross_endpoint[gpt-4o-mini].json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Testing is an essential process to ensure the quality and functionality of a product before its release.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNU2lVVTdXXJhrnqD9Vs2LUtPNT",
+  "created": 1769812540,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Testing ensures that a product meets its requirements and functions as intended.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 21,
+    "prompt_tokens": 262,
+    "total_tokens": 283,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json
index 7218eeaf..9d40c5f2 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-4o-mini].json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Testing is a critical process that ensures the quality and reliability of products before they reach consumers.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNPzidwbTkz4hc0mGchwjqbvlq9",
+  "created": 1769812535,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Testing is the process of evaluating a system or component to ensure it meets specified requirements and functions correctly.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 28,
+    "prompt_tokens": 262,
+    "total_tokens": 290,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json
index b0b22793..83ad3c82 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_simple_prompt_multiple_models[gpt-5-nano].json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Testing helps ensure reliability before release.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNQ45EqoVx6YQZKRX86eowLhhcv",
+  "created": 1769812536,
+  "model": "gpt-5-nano-2025-08-07",
+  "object": "chat.completion",
+  "system_fingerprint": null,
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Testing helps catch mistakes before they reach users.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 282,
+    "prompt_tokens": 342,
+    "total_tokens": 624,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 256,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json
index 3162a367..ae5497ee 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_structured_output.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":{\"genre\":\"action\",\"explanation\":\"The plot centers on high-intensity, action-driven conflict as a rogue cop confronts an evil group attempting to seize control of a skyscraper. It emphasizes combat, chases, and siege-style sequences typical of the action genre.\"}}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNYTWIcoJmMGU1mjErSsz4rJgM6",
+  "created": 1769812544,
+  "model": "gpt-5-nano-2025-08-07",
+  "object": "chat.completion",
+  "system_fingerprint": null,
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\": {\"genre\": \"action\", \"explanation\": \"The plot centers on a rogue cop taking on an evil group to stop them from taking over a skyscraper, featuring high-stakes conflict and action-oriented sequences typical of the action genre.\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 384,
+    "prompt_tokens": 457,
+    "total_tokens": 841,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 320,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json
index 4050c6a7..3a5c57fd 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestLiteLLMProvider__test_with_config_params.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"A deterministic test consistently produces the same results under the same conditions, ensuring reliability and repeatability in its outcomes.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNgQEQWcxYghJaJXPS4eNvmHgq9",
+  "created": 1769812552,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"A deterministic test consistently produces the same output for a given input, ensuring reliability and repeatability in its results.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 30,
+    "prompt_tokens": 263,
+    "total_tokens": 293,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json
index 07c71634..74c6c5d9 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestPydanticBaseModelReturn__test_pydantic_basemodel_return.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":{\"title\":\"The First Spark at Eldoria Academy\",\"rating\":4,\"summary\":\"An energetic coming-of-age fantasy that revisits a classic premise with warmth and wit. A young wizard discovers his powers and enters a sprawling magical school, where spellwork, friendship, and rivalry illuminate his path to self-discovery. The world is vividly imagined, with inventive magical systems and memorable mentors. While some beats feel familiar, the narrative keeps the pace brisk and the characters earnest, delivering charm, heart, and enough intrigue to carry a promising series.\"}}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNjYHHVoJdYAi1XKbt6f4aGFFS8",
+  "created": 1769812555,
+  "model": "gpt-5-nano-2025-08-07",
+  "object": "chat.completion",
+  "system_fingerprint": null,
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"title\":\"The First Spark\",\"rating\":4,\"summary\":\"A brisk coming-of-age fantasy about a young wizard who discovers his powers and enrolls at a wizarding academy. The world-building is vivid, with a fresh take on magic grounded in runes and elemental affinities. The character work—friendships, mentor dynamics, and self-doubt—drives a fast-paced school-year plot that balances whimsy with danger. While it leans on familiar tropes of secret origins and rival houses, the emotional honesty and lively prose keep it engaging. Ideal for readers who enjoy classic magical school tales with a modern, character-forward edge.\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 1744,
+    "prompt_tokens": 445,
+    "total_tokens": 2189,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 1600,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json
index 0bcba9e1..3c2d22d5 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_image_input.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"The image is a simple black and white pixel art representation of a smiling face, resembling an emoticon or smiley. It features two white square eyes and a wide rectangular mouth against a black background.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNhHv6KxKgfgH1DTBqe9gLaNzpU",
+  "created": 1769812553,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_eadf229d54",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"The image is a simple pixel art of a smiley face. It features two square white eyes and a wide white smile set against a black background.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 38,
+    "prompt_tokens": 492,
+    "total_tokens": 530,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json
index 0313940e..40b905a6 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, offering a delicious and healthy snack.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNwzesxKrjfwjxNOJDf2C92mCHg",
+  "created": 1769812568,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors, offering a sweet or tart flavor profile perfect for snacking or baking.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 37,
+    "prompt_tokens": 262,
+    "total_tokens": 299,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json
index 066c79e6..8c5a0bf8 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_1.json
@@ -1,8 +1,42 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, offering a delicious and healthy snack.\"}",
-  "function_call": null,
-  "provider_specific_fields": null,
-  "role": "assistant",
-  "tool_calls": null
+  "id": "chatcmpl-D3rNwzesxKrjfwjxNOJDf2C92mCHg",
+  "created": 1769812568,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors, offering a sweet or tart flavor profile perfect for snacking or baking.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": null,
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 37,
+    "prompt_tokens": 262,
+    "total_tokens": 299,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json
index c5518255..34190c89 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_2.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Oranges are juicy and vibrant citrus fruits known for their refreshing taste and high vitamin C content.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNx5R4CLldDSB27z7E3EWVmgpnY",
+  "created": 1769812569,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Oranges are vibrant citrus fruits known for their juicy sweetness and rich vitamin C content.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 25,
+    "prompt_tokens": 262,
+    "total_tokens": 287,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json
index cd071234..8a57f076 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are versatile fruits known for their crisp texture and sweet-tart flavor, often enjoyed fresh or used in a variety of culinary dishes.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rNyx4soJAv5yDrEl5Hlq8n66iem",
+  "created": 1769812570,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are a versatile and nutritious fruit, enjoyed in a variety of dishes worldwide.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 25,
+    "prompt_tokens": 262,
+    "total_tokens": 287,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json
index e1ccc0d3..45b4613d 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_integration_disabled_1.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are a crisp, juicy fruit that come in a variety of colors and flavors, often enjoyed fresh or used in cooking and baking.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rO02RMv2hfN45N9prEMtvyGO6M1",
+  "created": 1769812572,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are crisp, sweet fruits that come in a variety of colors and are packed with essential vitamins and fiber.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 31,
+    "prompt_tokens": 262,
+    "total_tokens": 293,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json
index cbc8afaa..fd332bfc 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are crunchy, sweet fruits that are enjoyed fresh or in various culinary dishes around the world.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rOjVJJM2ucVI5hYkHosRuzeWxqC",
+  "created": 1769812617,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, often enjoyed fresh or used in delicious recipes.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 34,
+    "prompt_tokens": 262,
+    "total_tokens": 296,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json
index 005acb0c..4822c6d3 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_1.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are a versatile fruit known for their sweet taste and crisp texture, making them a popular snack and ingredient worldwide.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rOnYRas1FEzNW5AQBkzZVINzkxJ",
+  "created": 1769812621,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are crisp, sweet fruits that come in a variety of colors, including red, green, and yellow.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 31,
+    "prompt_tokens": 262,
+    "total_tokens": 293,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json
index 727a94dc..beb3c03e 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_2.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, often enjoyed fresh or used in various culinary dishes.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rOpavNWvVE7vrJWgXfUJyM8fZgB",
+  "created": 1769812623,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are a popular and nutritious fruit, known for their crisp texture and a range of flavors from sweet to tart.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 32,
+    "prompt_tokens": 262,
+    "total_tokens": 294,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json
index 5ac68392..d4211c06 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_3.json
@@ -1,8 +1,42 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors and flavors, often enjoyed fresh or used in various culinary dishes.\"}",
-  "function_call": null,
-  "provider_specific_fields": null,
-  "role": "assistant",
-  "tool_calls": null
+  "id": "chatcmpl-D3rOpavNWvVE7vrJWgXfUJyM8fZgB",
+  "created": 1769812623,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are a popular and nutritious fruit, known for their crisp texture and a range of flavors from sweet to tart.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": null,
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 32,
+    "prompt_tokens": 262,
+    "total_tokens": 294,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json
index 9945a97d..2bf9bc05 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_4.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are a crisp, juicy fruit enjoyed worldwide for their delightful taste and nutritional benefits.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rOqf0mpXe1uenrxnQIlQjR5rKJP",
+  "created": 1769812624,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are crisp, juicy fruits that come in a variety of colors, offering both health benefits and a sweet taste.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 32,
+    "prompt_tokens": 262,
+    "total_tokens": 294,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json
index 5cbd12cd..92639a81 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__test_litellm_caching_selective_5.json
@@ -1,10 +1,44 @@
 {
-  "annotations": [],
-  "content": "{\"value\":\"Apples are crisp and juicy fruits, often enjoyed fresh or used in a variety of culinary dishes.\"}",
-  "function_call": null,
-  "provider_specific_fields": {
-    "refusal": null
+  "id": "chatcmpl-D3rOsz6dyAVbv1qQSZWtL3aEE2dxN",
+  "created": 1769812626,
+  "model": "gpt-4o-2024-08-06",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_fa7f5b168b",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":\"Apples are nutritious fruits that come in a variety of colors and flavors, offering a refreshing and naturally sweet snack.\"}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 31,
+    "prompt_tokens": 262,
+    "total_tokens": 293,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
   },
-  "role": "assistant",
-  "tool_calls": null
+  "service_tier": "default"
 }
\ No newline at end of file
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index e6091d7a..66c7af7b 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -23,6 +23,7 @@
 from effectful.handlers.llm.completions import (
     LiteLLMProvider,
     call_assistant,
+    completion,
 )
 from effectful.handlers.llm.synthesis import ProgramSynthesis, SynthesisError
 from effectful.ops.semantics import fwd, handler
@@ -83,7 +84,7 @@ def call_id(self):
         self.call_count += 1
         return call_id
 
-    @implements(call_assistant)
+    @implements(completion)
     def _completion(self, *args, **kwargs):
         path = FIXTURE_DIR / f"{self.test_id}{self.call_id()}.json"
         if not REBUILD_FIXTURES:
@@ -92,10 +93,10 @@ def _completion(self, *args, **kwargs):
             with path.open() as f:
                 result = ModelResponse.model_validate(json.load(f))
                 return result
-        result = fwd(*args, **(self.config | kwargs))
+        result = fwd(*args, **kwargs)
         path.parent.mkdir(exist_ok=True, parents=True)
         with path.open("w") as f:
-            json.dump(result, f, indent=2, sort_keys=True)
+            f.write(result.model_dump_json(indent=2))
         return result
 
 

From 43e5b78165ad43b7030b860f52e6c8e38a85c374 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 18:18:41 -0500
Subject: [PATCH 10/27] dropped stale comments

---
 effectful/handlers/llm/template.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py
index 5cec8fb8..260a8bbb 100644
--- a/effectful/handlers/llm/template.py
+++ b/effectful/handlers/llm/template.py
@@ -295,14 +295,7 @@ def define[**Q, V](
             *typing.cast(list[MutableMapping[str, Any]], contexts)
         )
 
-        is_recursive = _is_recursive_signature(inspect.signature(default))
-        # todo: make this more pythonic
-        if not is_recursive:
-            # drop default.__name__ from context
-            pass
-
         op = super().define(default, *args, **kwargs)
         op.__context__ = context  # type: ignore[attr-defined]
-        # todo: drop self from contexts if not is_recursive
 
         return typing.cast(Template[Q, V], op)

From 6bb2b13c7a70662fd09a77878ec043f34d2fa7b7 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 18:22:25 -0500
Subject: [PATCH 11/27] moved model and param model back to internals of
 `completions`

---
 effectful/handlers/llm/completions.py | 34 +++++++++++++++++++++++++--
 effectful/handlers/llm/template.py    | 34 ---------------------------
 2 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 2fe880af..5d77c881 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -13,6 +13,7 @@
     ChatCompletionMessageToolCall,
     ChatCompletionTextObject,
     ChatCompletionToolMessage,
+    ChatCompletionToolParam,
     OpenAIChatCompletionAssistantMessage,
     OpenAIChatCompletionSystemMessage,
     OpenAIChatCompletionUserMessage,
@@ -44,6 +45,35 @@ class DecodedToolCall[T](typing.NamedTuple):
 type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None]
 
 
+@functools.cache
+def _param_model(tool: Tool) -> type[pydantic.BaseModel]:
+    sig = inspect.signature(tool)
+    return pydantic.create_model(
+        "Params",
+        __config__={"extra": "forbid"},
+        **{
+            name: Encodable.define(param.annotation).enc
+            for name, param in sig.parameters.items()
+        },  # type: ignore
+    )
+
+
+@functools.cache
+def _function_model(tool: Tool) -> ChatCompletionToolParam:
+    response_format = litellm.utils.type_to_response_format_param(_param_model(tool))
+    assert response_format is not None
+    assert tool.__default__.__doc__ is not None
+    return {
+        "type": "function",
+        "function": {
+            "name": tool.__name__,
+            "description": textwrap.dedent(tool.__default__.__doc__),
+            "parameters": response_format["json_schema"]["schema"],
+            "strict": True,
+        },
+    }
+
+
 def decode_tool_call(
     tool_call: ChatCompletionMessageToolCall,
     tools: collections.abc.Mapping[str, Tool],
@@ -56,7 +86,7 @@ def decode_tool_call(
     sig = inspect.signature(tool)
 
     # build dict of raw encodable types U
-    raw_args = tool.param_model.model_validate_json(json_str)
+    raw_args = _param_model(tool).model_validate_json(json_str)
 
     # use encoders to decode Us to python types T
     bound_sig: inspect.BoundArguments = sig.bind(
@@ -96,7 +126,7 @@ def call_assistant[T, U](
     observe/log requests.
 
     """
-    tool_specs = {k: t.model for k, t in tools.items()}
+    tool_specs = {k: _function_model(t) for k, t in tools.items()}
     response_model = pydantic.create_model(
         "Response", value=response_format.enc, __config__={"extra": "forbid"}
     )
diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py
index 260a8bbb..1a74b100 100644
--- a/effectful/handlers/llm/template.py
+++ b/effectful/handlers/llm/template.py
@@ -1,6 +1,4 @@
-import functools
 import inspect
-import textwrap
 import types
 import typing
 from collections import ChainMap
@@ -8,11 +6,6 @@
 from dataclasses import dataclass
 from typing import Annotated, Any
 
-import litellm
-import pydantic
-from litellm import ChatCompletionToolParam
-
-from effectful.handlers.llm.encoding import Encodable
 from effectful.ops.types import INSTANCE_OP_PREFIX, Annotation, Operation
 
 
@@ -102,33 +95,6 @@ def __init__(
         signature = IsRecursive.infer_annotations(signature)
         super().__init__(signature, name, default)
 
-    @functools.cached_property
-    def param_model(self) -> type[pydantic.BaseModel]:
-        sig = inspect.signature(self)
-        return pydantic.create_model(
-            "Params",
-            __config__={"extra": "forbid"},
-            **{
-                name: Encodable.define(param.annotation).enc
-                for name, param in sig.parameters.items()
-            },  # type: ignore
-        )
-
-    @functools.cached_property
-    def model(self) -> ChatCompletionToolParam:
-        response_format = litellm.utils.type_to_response_format_param(self.param_model)
-        assert response_format is not None
-        assert self.__default__.__doc__ is not None
-        return {
-            "type": "function",
-            "function": {
-                "name": self.__name__,
-                "description": textwrap.dedent(self.__default__.__doc__),
-                "parameters": response_format["json_schema"]["schema"],
-                "strict": True,
-            },
-        }
-
     @classmethod
     def define(cls, *args, **kwargs) -> "Tool[P, T]":
         """Define a tool.

From 88da657f4e86426fe579189a5bcd8a5f6e9cc9ba Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 18:14:44 -0500
Subject: [PATCH 12/27] added default encodable instance for Callable

---
 effectful/handlers/llm/encoding.py   |  66 +++++++++++++-
 effectful/handlers/llm/evaluation.py |  83 ++++++++++++++++++
 tests/test_handlers_llm_encoding.py  | 124 +++++++++++++++++++++++++++
 3 files changed, 272 insertions(+), 1 deletion(-)
 create mode 100644 effectful/handlers/llm/evaluation.py

diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py
index 65c4491c..76823530 100644
--- a/effectful/handlers/llm/encoding.py
+++ b/effectful/handlers/llm/encoding.py
@@ -1,9 +1,13 @@
+import ast
 import base64
+import inspect
 import io
+import textwrap
 import typing
 from abc import ABC, abstractmethod
-from collections.abc import Callable, Mapping, Sequence
+from collections.abc import Callable, Mapping, MutableMapping, Sequence
 from dataclasses import dataclass
+from types import CodeType
 from typing import Any
 
 import pydantic
@@ -13,6 +17,7 @@
 )
 from PIL import Image
 
+import effectful.handlers.llm.evaluation as evaluation
 from effectful.ops.semantics import _simple_type
 from effectful.ops.syntax import _CustomSingleDispatchCallable
 from effectful.ops.types import Operation, Term
@@ -253,6 +258,60 @@ def deserialize(self, serialized_value: str) -> typing.Any:
         return typing.cast(typing.Any, adapter.validate_json(serialized_value))
 
 
+@dataclass
+class CallableEncodable(Encodable[Callable, str]):
+    base: type[Callable]
+    enc: type[str]
+    ctx: Mapping[str, Any]
+
+    def encode(self, t: Callable) -> typing.Any:
+        # (https://github.com/python/mypy/issues/14928)
+        if not isinstance(t, Callable):  # type: ignore
+            raise TypeError(f"Expected callable, got {type(t)}")
+        try:
+            source = inspect.getsource(t)
+        except (OSError, TypeError):
+            source = None
+
+        if not source:
+            # create source stub using signature and docstring of callable (useful for builtins etc.)
+            pass
+
+        assert source, "Could not retrieve source code or docstring for function"
+
+        return textwrap.dedent(source)
+
+    def decode(self, encoded_value: str) -> Callable:
+        filename = f"<synthesis:{id(self)}>"
+
+        # https://docs.python.org/3/library/functions.html#exec
+        g: MutableMapping[str, Any] = {}
+        g.update(self.ctx or {})
+
+        before_keys = set(g.keys())
+
+        module: ast.AST = evaluation.parse(encoded_value, filename)
+        bytecode: CodeType = evaluation.compile(module, filename)
+        evaluation.exec(bytecode, g)
+
+        # Otherwise: find newly-created callables (in insertion order).
+        new_callables = [
+            v for k, v in g.items() if k not in before_keys and callable(v)
+        ]
+        if not new_callables or len(new_callables) > 1:
+            raise ValueError(
+                "decode() required source code to define exactly one callable."
+            )
+
+        return new_callables[0]
+
+    @Operation.define
+    @classmethod
+    def encoding_instructions(cls) -> str | None:
+        """Instructions to be prefixed onto synthesis prompts to tune the encoding of the result."""
+        return None
+
+
 @Encodable.define.register(object)
 def _encodable_object[T, U](
     ty: type[T], ctx: Mapping[str, Any] | None
@@ -355,3 +414,8 @@ def _encodable_list[T, U](
     return typing.cast(
         Encodable[T, U], ListEncodable(ty, encoded_ty, ctx, has_image, element_encoder)
     )
+
+
+@Encodable.define.register(Callable)
+def _encodable_callable(*args, **kwargs):
+    raise NotImplementedError
diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py
new file mode 100644
index 00000000..05ec7419
--- /dev/null
+++ b/effectful/handlers/llm/evaluation.py
@@ -0,0 +1,83 @@
+import ast
+import builtins
+import linecache
+import typing
+from collections.abc import MutableMapping
+from types import CodeType
+from typing import Any
+
+from effectful.ops.syntax import ObjectInterpretation, defop, implements
+
+
+@defop
+def parse(source: str, filename: str) -> ast.AST:
+    """
+    Parse source text into an AST.
+
+    source: The Python source code to parse.
+    filename: The filename recorded in the resulting AST for tracebacks and tooling.
+
+    Returns the parsed AST.
+    """
+    raise TypeError("An eval provider must be installed in order to parse code.")
+
+
+@defop
+def compile(module: ast.AST, filename: str) -> CodeType:
+    """
+    Compile an AST into a Python code object.
+
+    module: The AST to compile (typically produced by parse()).
+    filename: The filename recorded in the resulting code object (CodeType.co_filename), used in tracebacks and by inspect.getsource().
+
+    Returns the compiled code object.
+    """
+    raise TypeError("An eval provider must be installed in order to compile code.")
+
+
+@defop
+def exec(
+    bytecode: CodeType,
+    env: MutableMapping[str, Any],
+) -> None:
+    """
+    Execute a compiled code object.
+
+    bytecode: A code object to execute (typically produced by compile()).
+    env: The namespace mapping used during execution.
+    """
+    raise TypeError("An eval provider must be installed in order to execute code.")
+
+
+class UnsafeEvalProvider(ObjectInterpretation):
+    """UNSAFE provider that handles parse, comple and exec operations
+    by shelling out to python *without* any further checks. Only use for testing."""
+
+    @implements(parse)
+    def parse(self, source: str, filename: str) -> ast.AST:
+        # Cache source under `filename` so inspect.getsource() can retrieve it later.
+        # inspect uses f.__code__.co_filename -> linecache.getlines(filename)
+        linecache.cache[filename] = (
+            len(source),
+            None,
+            source.splitlines(True),
+            filename,
+        )
+
+        return ast.parse(source, filename=filename, mode="exec")
+
+    @implements(compile)
+    def compile(self, module: ast.AST, filename: str) -> CodeType:
+        return builtins.compile(typing.cast(typing.Any, module), filename, "exec")
+
+    @implements(exec)
+    def exec(
+        self,
+        bytecode: CodeType,
+        env: dict[str, Any],
+    ) -> None:
+        # Ensure builtins exist in the execution environment.
+        env.setdefault("__builtins__", __builtins__)
+
+        # Execute module-style so top-level defs land in `env`.
+        builtins.exec(bytecode, env, env)
diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index dd21436a..f1d5d4c3 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -6,6 +6,8 @@
 from PIL import Image
 
 from effectful.handlers.llm.encoding import Encodable
+from effectful.handlers.llm.evaluation import UnsafeEvalProvider
+from effectful.ops.semantics import handler
 from effectful.ops.types import Operation, Term
 
 
@@ -718,3 +720,125 @@ class Person(pydantic.BaseModel):
     assert decoded_from_model == person
     assert isinstance(decoded_from_model, Person)
     assert isinstance(decoded_from_model.address, Address)
+
+
+class TestCallableEncodable:
+    """Tests for CallableEncodable - encoding/decoding callables as source code."""
+
+    def test_encode_decode_function(self):
+        from collections.abc import Callable
+
+        def add(a: int, b: int) -> int:
+            return a + b
+
+        encodable = Encodable.define(Callable, {})
+        encoded = encodable.encode(add)
+        assert isinstance(encoded, str)
+        assert "def add" in encoded
+        assert "return a + b" in encoded
+
+        with handler(UnsafeEvalProvider()):
+            decoded = encodable.decode(encoded)
+        assert callable(decoded)
+        assert decoded(2, 3) == 5
+        assert decoded.__name__ == "add"
+
+    def test_decode_lambda(self):
+        from collections.abc import Callable
+
+        # Lambdas should work if defined in a way that inspect.getsource can find them
+        # Note: lambdas defined inline may not always have retrievable source
+        encodable = Encodable.define(Callable, {})
+
+        # Test decoding a lambda from source string
+        lambda_source = "f = lambda x: x * 2"
+        with handler(UnsafeEvalProvider()):
+            decoded = encodable.decode(lambda_source)
+        assert callable(decoded)
+        assert decoded(5) == 10
+
+    def test_decode_with_env(self):
+        from collections.abc import Callable
+
+        # Test decoding a function that uses env variables
+        encodable = Encodable.define(Callable, {"factor": 3})
+        source = """def multiply(x):
+    return x * factor"""
+
+        with handler(UnsafeEvalProvider()):
+            decoded = encodable.decode(source)
+        assert callable(decoded)
+        assert decoded(4) == 12
+
+    def test_encode_non_callable_raises(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable, {})
+        with pytest.raises(TypeError, match="Expected callable"):
+            encodable.encode("not a callable", {})
+
+    def test_encode_builtin_raises(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable, {})
+        # Built-in functions don't have source code
+        with pytest.raises(RuntimeError, match="Source code of callable .* not found"):
+            with handler(UnsafeEvalProvider()):
+                encodable.encode(len)
+
+    def test_decode_no_callable_raises(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable, {})
+        # Source code that defines no callable
+        source = "x = 42"
+        with pytest.raises(ValueError, match="exactly one callable"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_decode_multiple_callables_raises(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable, {})
+        # Source code that defines multiple callables
+        source = """def foo():
+    return 1
+
+def bar():
+    return 2"""
+        with pytest.raises(ValueError, match="exactly one callable"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_decode_class(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable, {})
+        # Classes are callable, decode should work with class definitions
+        source = """class Greeter:
+    def __init__(self, name):
+        self.name = name
+
+    def greet(self):
+        return f"Hello, {self.name}!\""""
+
+        with handler(UnsafeEvalProvider()):
+            decoded = encodable.decode(source)
+        assert callable(decoded)
+        instance = decoded("World")
+        assert instance.greet() == "Hello, World!"
+
+    def test_roundtrip(self):
+        from collections.abc import Callable
+
+        def greet(name: str) -> str:
+            return f"Hello, {name}!"
+
+        encodable = Encodable.define(Callable, {})
+        with handler(UnsafeEvalProvider()):
+            encoded = encodable.encode(greet)
+            decoded = encodable.decode(encoded)
+
+        assert callable(decoded)
+        assert decoded("Alice") == "Hello, Alice!"
+        assert decoded.__name__ == "greet"

From 88c65ee9e076216fc2473370ced386c2e8fa3547 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 18:15:36 -0500
Subject: [PATCH 13/27] fixed type errors

---
 effectful/handlers/llm/evaluation.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py
index 05ec7419..463041fe 100644
--- a/effectful/handlers/llm/evaluation.py
+++ b/effectful/handlers/llm/evaluation.py
@@ -2,7 +2,6 @@
 import builtins
 import linecache
 import typing
-from collections.abc import MutableMapping
 from types import CodeType
 from typing import Any
 
@@ -10,7 +9,7 @@
 
 
 @defop
-def parse(source: str, filename: str) -> ast.AST:
+def parse(source: str, filename: str) -> ast.Module:
     """
     Parse source text into an AST.
 
@@ -23,7 +22,7 @@ def parse(source: str, filename: str) -> ast.AST:
 
 
 @defop
-def compile(module: ast.AST, filename: str) -> CodeType:
+def compile(module: ast.Module, filename: str) -> CodeType:
     """
     Compile an AST into a Python code object.
 
@@ -38,7 +37,7 @@ def compile(module: ast.AST, filename: str) -> CodeType:
 @defop
 def exec(
     bytecode: CodeType,
-    env: MutableMapping[str, Any],
+    env: dict[str, Any],
 ) -> None:
     """
     Execute a compiled code object.
@@ -54,7 +53,7 @@ class UnsafeEvalProvider(ObjectInterpretation):
     by shelling out to python *without* any further checks. Only use for testing."""
 
     @implements(parse)
-    def parse(self, source: str, filename: str) -> ast.AST:
+    def parse(self, source: str, filename: str) -> ast.Module:
         # Cache source under `filename` so inspect.getsource() can retrieve it later.
         # inspect uses f.__code__.co_filename -> linecache.getlines(filename)
         linecache.cache[filename] = (

From 18da11bfd214a5a8435af2f5a30c08c138846bde Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 19:11:12 -0500
Subject: [PATCH 14/27] update to use more structured type for synthesis

---
 effectful/handlers/llm/encoding.py            | 253 ++++++++++-
 effectful/handlers/llm/synthesis.py           |  14 +
 ...hesis__test_synthesize_adder_function.json |  44 ++
 ...sis__test_synthesize_bool_return_type.json |  44 ++
 ...est_synthesize_counter_with_parameter.json |  53 +++
 ...t_synthesize_counter_with_parameter_1.json |  44 ++
 ...t_synthesize_counter_with_parameter_2.json |  44 ++
 ...sis__test_synthesize_string_processor.json |  44 ++
 ...nthesis__test_synthesize_three_params.json |  44 ++
 ...__test_synthesized_function_roundtrip.json |  44 ++
 tests/test_handlers_llm_encoding.py           | 420 ++++++++++++++++--
 tests/test_handlers_llm_provider.py           | 187 ++++++++
 12 files changed, 1170 insertions(+), 65 deletions(-)
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json
 create mode 100644 tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json

diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py
index 76823530..3047e7d5 100644
--- a/effectful/handlers/llm/encoding.py
+++ b/effectful/handlers/llm/encoding.py
@@ -18,6 +18,7 @@
 from PIL import Image
 
 import effectful.handlers.llm.evaluation as evaluation
+from effectful.handlers.llm.synthesis import SynthesizedFunction
 from effectful.ops.semantics import _simple_type
 from effectful.ops.syntax import _CustomSingleDispatchCallable
 from effectful.ops.types import Operation, Term
@@ -258,13 +259,112 @@ def deserialize(self, serialized_value: str) -> typing.Any:
         return typing.cast(typing.Any, adapter.validate_json(serialized_value))
 
 
+def _format_callable_type(callable_type: type[Callable]) -> str:
+    """Format a Callable type annotation as a string for LLM instructions."""
+    args = typing.get_args(callable_type)
+    if not args:
+        return "Callable"
+
+    # Callable[[arg1, arg2, ...], return_type]
+    if len(args) >= 2:
+        param_types = args[0]
+        return_type = args[-1]
+
+        if param_types is ...:
+            params_str = "..."
+        elif isinstance(param_types, (list, tuple)):
+            params_str = ", ".join(getattr(t, "__name__", str(t)) for t in param_types)
+        else:
+            params_str = str(param_types)
+
+        return_str = getattr(return_type, "__name__", str(return_type))
+        return f"Callable[[{params_str}], {return_str}]"
+
+    return str(callable_type)
+
+
+def _create_typed_synthesized_function(
+    callable_type: type[Callable],
+) -> type[SynthesizedFunction]:
+    """Create a SynthesizedFunction subclass with type signature in the model description.
+
+    Uses pydantic.create_model to ensure the description is included in the JSON schema
+    sent to the LLM, informing it of the expected function signature.
+    """
+    type_signature = _format_callable_type(callable_type)
+
+    description = f"""Given the specification above, generate a Python function satisfying the following specification and type signature.
+
+<signature>{type_signature}</signature>
+
+<instructions>
+1. Produce one block of Python code.
+2. Do not include usage examples.
+3. Your output function def must be the final statement in the code block.
+</instructions>
+"""
+
+    # Use pydantic.create_model to create a proper model with the description
+    # The __doc__ becomes the model's description in the JSON schema
+    model = pydantic.create_model(
+        "TypedSynthesizedFunction",
+        __base__=SynthesizedFunction,
+        __doc__=description,
+    )
+    return model
+
+
+def _validate_signature_ast(
+    func_ast: ast.FunctionDef | ast.AsyncFunctionDef,
+    expected_params: list[type] | None,
+) -> None:
+    """Validate the function signature from AST before execution."""
+    if expected_params is not None:
+        ast_params = func_ast.args.args + func_ast.args.posonlyargs
+        if len(ast_params) != len(expected_params):
+            raise ValueError(
+                f"decode() expected function with {len(expected_params)} parameters, "
+                f"got {len(ast_params)}"
+            )
+
+
+def _validate_signature_callable(
+    func: Callable,
+    expected_params: list[type] | None,
+    expected_return: type | None,
+) -> None:
+    """Validate the function signature from runtime callable after execution."""
+    sig = inspect.signature(func)
+
+    if expected_params is not None:
+        actual_params = list(sig.parameters.values())
+        if len(actual_params) != len(expected_params):
+            raise ValueError(
+                f"decode() expected function with {len(expected_params)} parameters, "
+                f"got {len(actual_params)}"
+            )
+
+    if expected_return is not None:
+        actual_return = sig.return_annotation
+        if actual_return is not inspect.Parameter.empty:
+            expected_name = getattr(expected_return, "__name__", str(expected_return))
+            actual_name = getattr(actual_return, "__name__", str(actual_return))
+            if expected_name != actual_name:
+                raise ValueError(
+                    f"decode() expected function with return type {expected_name}, "
+                    f"got {actual_name}"
+                )
+
+
 @dataclass
-class CallableEncodable(Encodable[Callable, str]):
+class CallableEncodable(Encodable[Callable, SynthesizedFunction]):
     base: type[Callable]
-    enc: type[str]
+    enc: type[SynthesizedFunction]
     ctx: Mapping[str, Any]
+    expected_params: list[type] | None = None
+    expected_return: type | None = None  # None means decode is disabled
 
-    def encode(self, t: Callable) -> typing.Any:
+    def encode(self, t: Callable) -> SynthesizedFunction:
         # (https://github.com/python/mypy/issues/14928)
         if not isinstance(t, Callable):  # type: ignore
             raise TypeError(f"Expected callable, got {type(t)}")
@@ -273,37 +373,100 @@ def encode(self, t: Callable) -> typing.Any:
         except (OSError, TypeError):
             source = None
 
-        if not source:
-            # create source stub using signature and docstring of callable (useful for builtins etc.)
-            pass
+        if source:
+            return self.enc(module_code=textwrap.dedent(source))
+
+        # Source not available - create stub from name, signature, and docstring
+        # This is useful for builtins and C extensions
+        name = getattr(t, "__name__", None)
+        if not name:
+            raise RuntimeError(
+                f"Cannot encode callable {t}: no source code and no __name__"
+            )
 
-        assert source, "Could not retrieve source code or docstring for function"
+        try:
+            sig = inspect.signature(t)
+            sig_str = str(sig)
+        except (ValueError, TypeError):
+            # Some builtins don't have inspectable signatures
+            sig_str = "(...)"
+
+        docstring = inspect.getdoc(t)
+        if not docstring:
+            raise RuntimeError(
+                f"Cannot encode callable {t}: no source code and no docstring"
+            )
 
-        return textwrap.dedent(source)
+        # Format as a stub function with docstring
+        stub_code = f'''def {name}{sig_str}:
+    """{docstring}"""
+    ...
+'''
+        return self.enc(module_code=stub_code)
+
+    def decode(self, encoded_value: SynthesizedFunction) -> Callable:
+        # Decode requires a concrete return type for synthesis
+        if self.expected_return is None:
+            raise TypeError(
+                "Cannot decode/synthesize callable without a concrete type signature. "
+                "Use Callable[[ParamTypes...], ReturnType] or Callable[..., ReturnType] "
+                "with a concrete return type (not Any)."
+            )
 
-    def decode(self, encoded_value: str) -> Callable:
         filename = f"<synthesis:{id(self)}>"
 
+        module_code = encoded_value.module_code
+
+        # Parse and validate AST before execution
+        module: ast.AST = evaluation.parse(module_code, filename)
+
+        if not isinstance(module, ast.Module) or not module.body:
+            raise ValueError(
+                "decode() requires module code with at least one statement."
+            )
+
+        last_stmt = module.body[-1]
+        if not isinstance(last_stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            raise ValueError(
+                f"decode() requires the last statement to be a function definition, "
+                f"got {type(last_stmt).__name__}"
+            )
+
+        # Validate signature from AST before execution
+        _validate_signature_ast(last_stmt, self.expected_params)
+
+        # Compile and execute
         # https://docs.python.org/3/library/functions.html#exec
         g: MutableMapping[str, Any] = {}
         g.update(self.ctx or {})
 
-        before_keys = set(g.keys())
-
-        module: ast.AST = evaluation.parse(encoded_value, filename)
         bytecode: CodeType = evaluation.compile(module, filename)
         evaluation.exec(bytecode, g)
 
-        # Otherwise: find newly-created callables (in insertion order).
-        new_callables = [
-            v for k, v in g.items() if k not in before_keys and callable(v)
-        ]
-        if not new_callables or len(new_callables) > 1:
+        func_name = last_stmt.name
+        if func_name not in g:
+            raise ValueError(
+                f"decode() expected function '{func_name}' to be defined in globals"
+            )
+
+        result = g[func_name]
+        if not callable(result):
             raise ValueError(
-                "decode() required source code to define exactly one callable."
+                f"decode() expected '{func_name}' to be callable, got {type(result)}"
             )
 
-        return new_callables[0]
+        # Validate signature from runtime callable after execution
+        _validate_signature_callable(result, self.expected_params, self.expected_return)
+
+        return result
+
+    def serialize(
+        self, encoded_value: SynthesizedFunction
+    ) -> Sequence[OpenAIMessageContentListBlock]:
+        return [{"type": "text", "text": encoded_value.model_dump_json()}]
+
+    def deserialize(self, serialized_value: str) -> SynthesizedFunction:
+        return SynthesizedFunction.model_validate_json(serialized_value)
 
     @Operation.define
     @classmethod
@@ -417,5 +580,53 @@ def _encodable_list[T, U](
 
 
 @Encodable.define.register(Callable)
-def _encodable_callable(*args, **kwargs):
-    raise NotImplementedError
+def _encodable_callable(
+    ty: type[Callable], ctx: Mapping[str, Any] | None
+) -> Encodable[Callable, SynthesizedFunction]:
+    ctx = ctx or {}
+
+    # Extract type args - Callable requires a type signature
+    type_args = typing.get_args(ty)
+
+    # Handle bare Callable without type args - allow encoding but disable decode
+    # this occurs when encoding Tools which return callable (need to Encodable.define(return_type) for return type)
+    if not type_args:
+        typed_enc = _create_typed_synthesized_function(Callable[..., typing.Any])  # type: ignore[arg-type]
+        return CallableEncodable(
+            ty, typed_enc, ctx, expected_params=None, expected_return=None
+        )
+
+    if len(type_args) < 2:
+        raise TypeError(
+            f"Callable type signature incomplete: {ty}. "
+            "Expected Callable[[ParamTypes...], ReturnType] or Callable[..., ReturnType]."
+        )
+
+    # Extract param and return types for validation
+    param_types = type_args[0]
+    expected_return: type | None = type_args[-1]
+
+    # Handle Any as return type - allow encoding but disable decode
+    # Any doesn't provide useful information for synthesis (expected_return=None)
+    if expected_return is typing.Any:
+        typed_enc = _create_typed_synthesized_function(ty)
+        return CallableEncodable(
+            ty, typed_enc, ctx, expected_params=None, expected_return=None
+        )
+
+    # Create a typed SynthesizedFunction model with the type signature in the description
+    typed_enc = _create_typed_synthesized_function(ty)
+
+    # Handle Callable[..., ReturnType] - ellipsis means any params, skip param validation
+    expected_params: list[type] | None = None
+    if param_types is not ...:
+        if isinstance(param_types, (list, tuple)):
+            expected_params = list(param_types)
+
+    return CallableEncodable(
+        ty,
+        typed_enc,
+        ctx,
+        expected_params=expected_params,
+        expected_return=expected_return,
+    )
diff --git a/effectful/handlers/llm/synthesis.py b/effectful/handlers/llm/synthesis.py
index 3db32fd7..00674901 100644
--- a/effectful/handlers/llm/synthesis.py
+++ b/effectful/handlers/llm/synthesis.py
@@ -1,6 +1,20 @@
+import pydantic
+
 from effectful.ops.syntax import ObjectInterpretation
 
 
+class SynthesizedFunction(pydantic.BaseModel):
+    """Structured output for function synthesis.
+
+    Pydantic model representing synthesized code with function name and module code.
+    """
+
+    module_code: str = pydantic.Field(
+        ...,
+        description="Complete Python module code (no imports needed)",
+    )
+
+
 class SynthesisError(Exception):
     """Raised when program synthesis fails."""
 
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json
new file mode 100644
index 00000000..4bdfe004
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json
@@ -0,0 +1,44 @@
+{
+  "id": "chatcmpl-D3sUKrwqZya06r6zsvSXah888Aijz",
+  "created": 1769816808,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_ac84da453f",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"module_code\":\"def add_two_integers(a: int, b: int) -> int:\\n    return a + b\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 34,
+    "prompt_tokens": 519,
+    "total_tokens": 553,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json
new file mode 100644
index 00000000..2f569c7a
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json
@@ -0,0 +1,44 @@
+{
+  "id": "chatcmpl-D3shh7iwJIWi9oOlY7uWPX2QHL97c",
+  "created": 1769817637,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"module_code\":\"def is_even(num: int) -> bool:\\n    return num % 2 == 0\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 31,
+    "prompt_tokens": 584,
+    "total_tokens": 615,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json
new file mode 100644
index 00000000..7692e13a
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json
@@ -0,0 +1,53 @@
+{
+  "id": "chatcmpl-D3sUfUT79FEdeJAEZEGHUUNZU8FK1",
+  "created": 1769816829,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "tool_calls",
+      "index": 0,
+      "message": {
+        "content": null,
+        "role": "assistant",
+        "tool_calls": [
+          {
+            "function": {
+              "arguments": "{\"char\":\"a\"}",
+              "name": "create_function"
+            },
+            "id": "call_lSUu19dmF7Lwv2kqXvJnrz7E",
+            "type": "function"
+          }
+        ],
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 14,
+    "prompt_tokens": 505,
+    "total_tokens": 519,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json
new file mode 100644
index 00000000..2597f7a8
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json
@@ -0,0 +1,44 @@
+{
+  "id": "chatcmpl-D3sUgwLc3rUdXkhD6mV0xn1UrirF7",
+  "created": 1769816830,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"module_code\":\"def count_a_occurrences(input_string: str) -> int:\\n    # Initialize a counter for 'a' occurrences\\n    count = 0\\n    # Loop through each character in the string\\n    for char in input_string:\\n        # Increment the counter if the character is 'a'\\n        if char == 'a':\\n            count += 1\\n    # Return the total count\\n    return count\\n\\n\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 100,
+    "prompt_tokens": 507,
+    "total_tokens": 607,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json
new file mode 100644
index 00000000..0f0a6b76
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json
@@ -0,0 +1,44 @@
+{
+  "id": "chatcmpl-D3sUi3VD9X0tyPos3U7Rz3gVDyan0",
+  "created": 1769816832,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"module_code\":\"def count_a_occurrences(input_string: str) -> int:\\n    # Initialize a counter for 'a' occurrences\\n    count = 0\\n    # Loop through each character in the string\\n    for char in input_string:\\n        # Increment the counter if the character is 'a'\\n        if char == 'a':\\n            count += 1\\n    # Return the total count\\n    return count\\n\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 99,
+    "prompt_tokens": 620,
+    "total_tokens": 719,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json
new file mode 100644
index 00000000..5ede3b72
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json
@@ -0,0 +1,44 @@
+{
+  "id": "chatcmpl-D3sUMR9bE9GX0jjS3E4qMJuKqy0n4",
+  "created": 1769816810,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"module_code\":\"def convert_to_uppercase_with_exclamation(input_string: str) -> str:\\n    return input_string.upper() + '!!!'\"} }",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 39,
+    "prompt_tokens": 516,
+    "total_tokens": 555,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json
new file mode 100644
index 00000000..1d321df5
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json
@@ -0,0 +1,44 @@
+{
+  "id": "chatcmpl-D3shiBP0DDvJW1lzI4npNrQYFa7xz",
+  "created": 1769817638,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"module_code\":\"def multiply_three_numbers(a: int, b: int, c: int) -> int:\\n    return a * b * c\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 38,
+    "prompt_tokens": 586,
+    "total_tokens": 624,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json
new file mode 100644
index 00000000..8fd29225
--- /dev/null
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json
@@ -0,0 +1,44 @@
+{
+  "id": "chatcmpl-D3sUOB2HTlXb62S0zS3RZRoNRDXfF",
+  "created": 1769816812,
+  "model": "gpt-4o-mini-2024-07-18",
+  "object": "chat.completion",
+  "system_fingerprint": "fp_1590f93f9d",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "{\"value\":{\"module_code\":\"def add_two_integers(a: int, b: int) -> int:\\n    return a + b\\n\"}}",
+        "role": "assistant",
+        "tool_calls": null,
+        "function_call": null,
+        "provider_specific_fields": {
+          "refusal": null
+        },
+        "annotations": []
+      },
+      "provider_specific_fields": {}
+    }
+  ],
+  "usage": {
+    "completion_tokens": 35,
+    "prompt_tokens": 519,
+    "total_tokens": 554,
+    "completion_tokens_details": {
+      "accepted_prediction_tokens": 0,
+      "audio_tokens": 0,
+      "reasoning_tokens": 0,
+      "rejected_prediction_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    },
+    "prompt_tokens_details": {
+      "audio_tokens": 0,
+      "cached_tokens": 0,
+      "text_tokens": null,
+      "image_tokens": null
+    }
+  },
+  "service_tier": "default"
+}
\ No newline at end of file
diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index f1d5d4c3..649f7d24 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -7,6 +7,7 @@
 
 from effectful.handlers.llm.encoding import Encodable
 from effectful.handlers.llm.evaluation import UnsafeEvalProvider
+from effectful.handlers.llm.synthesis import SynthesizedFunction
 from effectful.ops.semantics import handler
 from effectful.ops.types import Operation, Term
 
@@ -723,19 +724,54 @@ class Person(pydantic.BaseModel):
 
 
 class TestCallableEncodable:
-    """Tests for CallableEncodable - encoding/decoding callables as source code."""
+    """Tests for CallableEncodable - encoding/decoding callables as SynthesizedFunction."""
 
-    def test_encode_decode_function(self):
+    def test_bare_callable_allows_encode_but_not_decode(self):
         from collections.abc import Callable
 
         def add(a: int, b: int) -> int:
             return a + b
 
+        # Bare Callable allows encoding
         encodable = Encodable.define(Callable, {})
         encoded = encodable.encode(add)
-        assert isinstance(encoded, str)
-        assert "def add" in encoded
-        assert "return a + b" in encoded
+        assert isinstance(encoded, SynthesizedFunction)
+        assert "def add" in encoded.module_code
+
+        # But decode is disabled
+        with pytest.raises(TypeError, match="Cannot decode/synthesize callable"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(encoded)
+
+    def test_callable_with_any_return_allows_encode_but_not_decode(self):
+        from collections.abc import Callable
+        from typing import Any
+
+        def add(a: int, b: int) -> int:
+            return a + b
+
+        # Callable[..., Any] allows encoding
+        encodable = Encodable.define(Callable[..., Any], {})
+        encoded = encodable.encode(add)
+        assert isinstance(encoded, SynthesizedFunction)
+
+        # But decode is disabled
+        with pytest.raises(TypeError, match="Cannot decode/synthesize callable"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(encoded)
+
+    def test_encode_decode_function(self):
+        from collections.abc import Callable
+
+        def add(a: int, b: int) -> int:
+            return a + b
+
+        # Use typed Callable with matching signature
+        encodable = Encodable.define(Callable[[int, int], int], {})
+        encoded = encodable.encode(add)
+        assert isinstance(encoded, SynthesizedFunction)
+        assert "def add" in encoded.module_code
+        assert "return a + b" in encoded.module_code
 
         with handler(UnsafeEvalProvider()):
             decoded = encodable.decode(encoded)
@@ -743,17 +779,18 @@ def add(a: int, b: int) -> int:
         assert decoded(2, 3) == 5
         assert decoded.__name__ == "add"
 
-    def test_decode_lambda(self):
+    def test_decode_with_ellipsis_params(self):
         from collections.abc import Callable
 
-        # Lambdas should work if defined in a way that inspect.getsource can find them
-        # Note: lambdas defined inline may not always have retrievable source
-        encodable = Encodable.define(Callable, {})
+        # Callable[..., int] allows any params but validates return type
+        encodable = Encodable.define(Callable[..., int], {})
 
-        # Test decoding a lambda from source string
-        lambda_source = "f = lambda x: x * 2"
+        # Test decoding a function - must end with function def
+        func_source = SynthesizedFunction(
+            module_code="def double(x):\n    return x * 2"
+        )
         with handler(UnsafeEvalProvider()):
-            decoded = encodable.decode(lambda_source)
+            decoded = encodable.decode(func_source)
         assert callable(decoded)
         assert decoded(5) == 10
 
@@ -761,9 +798,11 @@ def test_decode_with_env(self):
         from collections.abc import Callable
 
         # Test decoding a function that uses env variables
-        encodable = Encodable.define(Callable, {"factor": 3})
-        source = """def multiply(x):
+        encodable = Encodable.define(Callable[..., int], {"factor": 3})
+        source = SynthesizedFunction(
+            module_code="""def multiply(x):
     return x * factor"""
+        )
 
         with handler(UnsafeEvalProvider()):
             decoded = encodable.decode(source)
@@ -773,60 +812,85 @@ def test_decode_with_env(self):
     def test_encode_non_callable_raises(self):
         from collections.abc import Callable
 
-        encodable = Encodable.define(Callable, {})
+        encodable = Encodable.define(Callable[..., int], {})
         with pytest.raises(TypeError, match="Expected callable"):
-            encodable.encode("not a callable", {})
+            encodable.encode("not a callable")
 
-    def test_encode_builtin_raises(self):
+    def test_encode_builtin_creates_stub(self):
         from collections.abc import Callable
 
-        encodable = Encodable.define(Callable, {})
-        # Built-in functions don't have source code
-        with pytest.raises(RuntimeError, match="Source code of callable .* not found"):
-            with handler(UnsafeEvalProvider()):
-                encodable.encode(len)
+        encodable = Encodable.define(Callable[..., int], {})
+        # Built-in functions don't have source code but have docstrings
+        encoded = encodable.encode(len)
+        assert isinstance(encoded, SynthesizedFunction)
+        assert "def len" in encoded.module_code
+        assert '"""' in encoded.module_code  # docstring present
+        assert "..." in encoded.module_code  # stub body
 
-    def test_decode_no_callable_raises(self):
+    def test_encode_builtin_no_docstring_raises(self):
         from collections.abc import Callable
 
-        encodable = Encodable.define(Callable, {})
-        # Source code that defines no callable
-        source = "x = 42"
-        with pytest.raises(ValueError, match="exactly one callable"):
+        # Create a callable without source and without docstring
+        class NoDocCallable:
+            __name__ = "nodoc"
+            __doc__ = None
+
+            def __call__(self):
+                pass
+
+        encodable = Encodable.define(Callable[..., int], {})
+        with pytest.raises(RuntimeError, match="no source code and no docstring"):
+            encodable.encode(NoDocCallable())
+
+    def test_decode_no_function_at_end_raises(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[..., int], {})
+        # Source code where last statement is not a function definition
+        source = SynthesizedFunction(module_code="x = 42")
+        with pytest.raises(
+            ValueError, match="last statement to be a function definition"
+        ):
             with handler(UnsafeEvalProvider()):
                 encodable.decode(source)
 
-    def test_decode_multiple_callables_raises(self):
+    def test_decode_multiple_functions_uses_last(self):
         from collections.abc import Callable
 
-        encodable = Encodable.define(Callable, {})
-        # Source code that defines multiple callables
-        source = """def foo():
+        encodable = Encodable.define(Callable[..., int], {})
+        # Source code that defines multiple functions - should use the last one
+        source = SynthesizedFunction(
+            module_code="""def foo():
     return 1
 
 def bar():
     return 2"""
-        with pytest.raises(ValueError, match="exactly one callable"):
-            with handler(UnsafeEvalProvider()):
-                encodable.decode(source)
+        )
+        with handler(UnsafeEvalProvider()):
+            decoded = encodable.decode(source)
+        assert callable(decoded)
+        assert decoded.__name__ == "bar"
+        assert decoded() == 2
 
-    def test_decode_class(self):
+    def test_decode_class_raises(self):
         from collections.abc import Callable
 
-        encodable = Encodable.define(Callable, {})
-        # Classes are callable, decode should work with class definitions
-        source = """class Greeter:
+        encodable = Encodable.define(Callable[..., int], {})
+        # Classes are callable but the last statement must be a function definition
+        source = SynthesizedFunction(
+            module_code="""class Greeter:
     def __init__(self, name):
         self.name = name
 
     def greet(self):
         return f"Hello, {self.name}!\""""
+        )
 
-        with handler(UnsafeEvalProvider()):
-            decoded = encodable.decode(source)
-        assert callable(decoded)
-        instance = decoded("World")
-        assert instance.greet() == "Hello, World!"
+        with pytest.raises(
+            ValueError, match="last statement to be a function definition"
+        ):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
 
     def test_roundtrip(self):
         from collections.abc import Callable
@@ -834,7 +898,7 @@ def test_roundtrip(self):
         def greet(name: str) -> str:
             return f"Hello, {name}!"
 
-        encodable = Encodable.define(Callable, {})
+        encodable = Encodable.define(Callable[[str], str], {})
         with handler(UnsafeEvalProvider()):
             encoded = encodable.encode(greet)
             decoded = encodable.decode(encoded)
@@ -842,3 +906,271 @@ def greet(name: str) -> str:
         assert callable(decoded)
         assert decoded("Alice") == "Hello, Alice!"
         assert decoded.__name__ == "greet"
+
+    def test_serialize_deserialize(self):
+        from collections.abc import Callable
+
+        def add(a: int, b: int) -> int:
+            return a + b
+
+        encodable = Encodable.define(Callable[[int, int], int], {})
+        encoded = encodable.encode(add)
+
+        # Test serialization
+        serialized = encodable.serialize(encoded)
+        assert len(serialized) == 1
+        assert serialized[0]["type"] == "text"
+        assert "module_code" in serialized[0]["text"]
+
+        # Test deserialization
+        deserialized = encodable.deserialize(serialized[0]["text"])
+        assert isinstance(deserialized, SynthesizedFunction)
+        assert "def add" in deserialized.module_code
+
+    def test_decode_validates_last_statement(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[..., int], {})
+
+        # Helper function followed by assignment - should fail
+        source = SynthesizedFunction(
+            module_code="""def helper():
+    return 42
+
+result = helper()"""
+        )
+        with pytest.raises(
+            ValueError, match="last statement to be a function definition"
+        ):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_typed_callable_includes_signature_in_docstring(self):
+        from collections.abc import Callable
+
+        # Test that the enc type has the signature in its docstring
+        encodable = Encodable.define(Callable[[int, int], int], {})
+        assert encodable.enc.__doc__ is not None
+        assert "Callable[[int, int], int]" in encodable.enc.__doc__
+        assert "<signature>" in encodable.enc.__doc__
+
+    def test_typed_callable_validates_param_count(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[[int, int], int], {})
+
+        # Function with wrong number of parameters
+        source = SynthesizedFunction(
+            module_code="""def add(a: int) -> int:
+    return a"""
+        )
+        with pytest.raises(ValueError, match="expected function with 2 parameters"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_typed_callable_validates_return_type(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[[int, int], int], {})
+
+        # Function with wrong return type
+        source = SynthesizedFunction(
+            module_code="""def add(a: int, b: int) -> str:
+    return str(a + b)"""
+        )
+        with pytest.raises(ValueError, match="expected function with return type int"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_typed_callable_accepts_correct_signature(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[[int, int], int], {})
+
+        # Function with correct signature
+        source = SynthesizedFunction(
+            module_code="""def add(a: int, b: int) -> int:
+    return a + b"""
+        )
+        with handler(UnsafeEvalProvider()):
+            result = encodable.decode(source)
+        assert callable(result)
+        assert result(2, 3) == 5
+
+    def test_ellipsis_callable_skips_param_validation(self):
+        from collections.abc import Callable
+
+        # Callable[..., int] should skip param validation but still validate return
+        encodable = Encodable.define(Callable[..., int], {})
+
+        source = SynthesizedFunction(
+            module_code="""def anything(a, b, c, d, e) -> int:
+    return 42"""
+        )
+        with handler(UnsafeEvalProvider()):
+            result = encodable.decode(source)
+        assert callable(result)
+        assert result(1, 2, 3, 4, 5) == 42
+
+    def test_typed_callable_json_schema_includes_signature(self):
+        from collections.abc import Callable
+
+        # Test that the JSON schema includes the type signature for the LLM
+        encodable = Encodable.define(Callable[[int, int], int], {})
+
+        # Get the JSON schema from the enc model
+        schema = encodable.enc.model_json_schema()
+
+        # The description should contain the type signature
+        assert "description" in schema
+        assert "Callable[[int, int], int]" in schema["description"]
+        assert "<signature>" in schema["description"]
+        assert "<instructions>" in schema["description"]
+
+    def test_typed_callable_json_schema_different_signatures(self):
+        from collections.abc import Callable
+
+        # Test that different type signatures produce different schemas
+        enc1 = Encodable.define(Callable[[str], str], {})
+        enc2 = Encodable.define(Callable[[int, int, int], bool], {})
+
+        schema1 = enc1.enc.model_json_schema()
+        schema2 = enc2.enc.model_json_schema()
+
+        assert "Callable[[str], str]" in schema1["description"]
+        assert "Callable[[int, int, int], bool]" in schema2["description"]
+
+    def test_validates_param_count_via_ast(self):
+        from collections.abc import Callable
+
+        # Test that param validation happens via AST analysis
+        encodable = Encodable.define(Callable[[int, int], int], {})
+
+        # Function with 3 params when 2 expected
+        source = SynthesizedFunction(
+            module_code="""def add(a: int, b: int, c: int) -> int:
+    return a + b + c"""
+        )
+        with pytest.raises(ValueError, match="expected function with 2 parameters"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_validates_param_count_zero_params(self):
+        from collections.abc import Callable
+
+        # Test callable with no params
+        encodable = Encodable.define(Callable[[], int], {})
+
+        # Function with params when 0 expected
+        source = SynthesizedFunction(
+            module_code="""def get_value(x: int) -> int:
+    return x"""
+        )
+        with pytest.raises(ValueError, match="expected function with 0 parameters"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_validates_accepts_zero_params(self):
+        from collections.abc import Callable
+
+        # Test callable with no params - correct signature
+        encodable = Encodable.define(Callable[[], int], {})
+
+        source = SynthesizedFunction(
+            module_code="""def get_value() -> int:
+    return 42"""
+        )
+        with handler(UnsafeEvalProvider()):
+            result = encodable.decode(source)
+        assert callable(result)
+        assert result() == 42
+
+    def test_ellipsis_callable_json_schema_includes_signature(self):
+        from collections.abc import Callable
+
+        # Test that Callable[..., int] has signature in schema
+        encodable = Encodable.define(Callable[..., int], {})
+
+        schema = encodable.enc.model_json_schema()
+        assert "description" in schema
+        assert "Callable[[...], int]" in schema["description"]
+        assert "<signature>" in schema["description"]
+
+    def test_ellipsis_callable_validates_return_type(self):
+        from collections.abc import Callable
+
+        # Callable[..., int] should still validate return type
+        encodable = Encodable.define(Callable[..., int], {})
+
+        source = SynthesizedFunction(
+            module_code="""def get_value() -> str:
+    return "wrong type\""""
+        )
+        with pytest.raises(ValueError, match="expected function with return type int"):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
+
+    def test_callable_with_single_param(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[[str], int], {})
+
+        source = SynthesizedFunction(
+            module_code="""def count_chars(s: str) -> int:
+    return len(s)"""
+        )
+        with handler(UnsafeEvalProvider()):
+            result = encodable.decode(source)
+        assert callable(result)
+        assert result("hello") == 5
+
+    def test_callable_with_many_params(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[[int, int, int, int], int], {})
+
+        source = SynthesizedFunction(
+            module_code="""def sum_four(a: int, b: int, c: int, d: int) -> int:
+    return a + b + c + d"""
+        )
+        with handler(UnsafeEvalProvider()):
+            result = encodable.decode(source)
+        assert callable(result)
+        assert result(1, 2, 3, 4) == 10
+
+    def test_callable_with_bool_return(self):
+        from collections.abc import Callable
+
+        encodable = Encodable.define(Callable[[int], bool], {})
+
+        source = SynthesizedFunction(
+            module_code="""def is_positive(x: int) -> bool:
+    return x > 0"""
+        )
+        with handler(UnsafeEvalProvider()):
+            result = encodable.decode(source)
+        assert callable(result)
+        assert result(5) is True
+        assert result(-1) is False
+
+    def test_callable_type_variations_schema(self):
+        from collections.abc import Callable
+        from typing import Any
+
+        # Test various callable type variations have correct schemas
+        test_cases = [
+            (Callable[[], int], "Callable[[], int]"),
+            (Callable[[str], str], "Callable[[str], str]"),
+            (Callable[[int, str], bool], "Callable[[int, str], bool]"),
+            (Callable[..., int], "Callable[[...], int]"),
+            (Callable[..., Any], "Callable[[...], Any]"),
+        ]
+
+        for callable_type, expected_sig in test_cases:
+            encodable = Encodable.define(callable_type, {})
+            schema = encodable.enc.model_json_schema()
+            assert "description" in schema, f"No description for {callable_type}"
+            assert expected_sig in schema["description"], (
+                f"Expected {expected_sig} in schema for {callable_type}, "
+                f"got: {schema['description'][:100]}..."
+            )
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index 66c7af7b..e882de71 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -25,6 +25,7 @@
     call_assistant,
     completion,
 )
+from effectful.handlers.llm.evaluation import UnsafeEvalProvider
 from effectful.handlers.llm.synthesis import ProgramSynthesis, SynthesisError
 from effectful.ops.semantics import fwd, handler
 from effectful.ops.syntax import ObjectInterpretation, implements
@@ -367,3 +368,189 @@ def test_litellm_caching_selective(request):
         p1 = simple_prompt("apples")
         p2 = simple_prompt("apples")
         assert p1 != p2, "when caching is not enabled, llm outputs should be different"
+
+
+# ============================================================================
+# Callable Synthesis Tests
+# ============================================================================
+
+
+@Template.define
+def synthesize_adder() -> Callable[[int, int], int]:
+    """Generate a Python function that adds two integers together.
+
+    The function should take two integer parameters and return their sum.
+    """
+    raise NotHandled
+
+
+@Template.define
+def synthesize_string_processor() -> Callable[[str], str]:
+    """Generate a Python function that converts a string to uppercase
+    and adds exclamation marks at the end.
+    """
+    raise NotHandled
+
+
+@Template.define
+def synthesize_counter(char: str) -> Callable[[str], int]:
+    """Generate a Python function that counts occurrences of the character '{char}'
+    in a given input string.
+
+    The function should be case-sensitive.
+    """
+    raise NotHandled
+
+
+@Template.define
+def synthesize_is_even() -> Callable[[int], bool]:
+    """Generate a Python function that checks if a number is even.
+
+    Return True if the number is divisible by 2, False otherwise.
+    """
+    raise NotHandled
+
+
+@Template.define
+def synthesize_three_param_func() -> Callable[[int, int, int], int]:
+    """Generate a Python function that takes exactly three integer parameters
+    and returns their product (multiplication).
+    """
+    raise NotHandled
+
+
+class TestCallableSynthesis:
+    """Tests for synthesizing callable functions via LLM."""
+
+    @requires_openai
+    def test_synthesize_adder_function(self, request):
+        """Test that LLM can synthesize a simple addition function with correct signature."""
+        with (
+            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
+            handler(UnsafeEvalProvider()),
+            handler(LimitLLMCallsHandler(max_calls=1)),
+        ):
+            add_func = synthesize_adder()
+
+            assert callable(add_func)
+            assert add_func(2, 3) == 5
+            assert add_func(0, 0) == 0
+            assert add_func(-1, 1) == 0
+            assert add_func(100, 200) == 300
+
+    @requires_openai
+    def test_synthesize_string_processor(self, request):
+        """Test that LLM can synthesize a string processing function."""
+        with (
+            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
+            handler(UnsafeEvalProvider()),
+            handler(LimitLLMCallsHandler(max_calls=1)),
+        ):
+            process_func = synthesize_string_processor()
+
+            assert callable(process_func)
+            result = process_func("hello")
+            assert isinstance(result, str)
+            assert "HELLO" in result
+            assert "!" in result
+
+    @requires_openai
+    def test_synthesize_counter_with_parameter(self, request):
+        """Test that LLM can synthesize a parameterized counting function."""
+        with (
+            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
+            handler(UnsafeEvalProvider()),
+            handler(LimitLLMCallsHandler(max_calls=3)),
+        ):
+            count_a = synthesize_counter("a")
+
+            assert callable(count_a)
+            assert count_a("banana") == 3
+            assert count_a("cherry") == 0
+            assert count_a("aardvark") == 3
+            assert count_a("AAA") == 0  # case-sensitive
+
+    @requires_openai
+    def test_callable_type_signature_in_schema(self, request):
+        """Test that the callable type signature is communicated to the LLM."""
+        from effectful.handlers.llm.encoding import Encodable
+
+        # Verify that the enc type includes the signature in its docstring
+        encodable = Encodable.define(Callable[[int, int], int], {})
+        assert encodable.enc.__doc__ is not None
+        assert "Callable[[int, int], int]" in encodable.enc.__doc__
+
+        encodable2 = Encodable.define(Callable[[str], str], {})
+        assert encodable2.enc.__doc__ is not None
+        assert "Callable[[str], str]" in encodable2.enc.__doc__
+
+    @requires_openai
+    def test_synthesized_function_roundtrip(self, request):
+        """Test that a synthesized function can be encoded and decoded."""
+        from effectful.handlers.llm.encoding import Encodable
+        from effectful.handlers.llm.synthesis import SynthesizedFunction
+
+        with (
+            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
+            handler(UnsafeEvalProvider()),
+            handler(LimitLLMCallsHandler(max_calls=1)),
+        ):
+            # Synthesize a function
+            add_func = synthesize_adder()
+            assert callable(add_func)
+
+            # Encode it back to SynthesizedFunction
+            encodable = Encodable.define(Callable[[int, int], int], {})
+            encoded = encodable.encode(add_func)
+            assert isinstance(encoded, SynthesizedFunction)
+            assert "def " in encoded.module_code
+
+            # Decode it again and verify it still works
+            decoded = encodable.decode(encoded)
+            assert callable(decoded)
+            assert decoded(5, 7) == 12
+
+    @requires_openai
+    def test_synthesize_bool_return_type(self, request):
+        """Test that LLM respects bool return type in signature."""
+        import inspect
+
+        with (
+            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
+            handler(UnsafeEvalProvider()),
+            handler(LimitLLMCallsHandler(max_calls=1)),
+        ):
+            is_even = synthesize_is_even()
+
+            assert callable(is_even)
+            # Verify return type annotation
+            sig = inspect.signature(is_even)
+            assert sig.return_annotation == bool
+
+            # Verify behavior
+            assert is_even(2) is True
+            assert is_even(3) is False
+            assert is_even(0) is True
+            assert is_even(-4) is True
+
+    @requires_openai
+    def test_synthesize_three_params(self, request):
+        """Test that LLM respects the exact number of parameters in signature."""
+        import inspect
+
+        with (
+            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
+            handler(UnsafeEvalProvider()),
+            handler(LimitLLMCallsHandler(max_calls=1)),
+        ):
+            multiply_three = synthesize_three_param_func()
+
+            assert callable(multiply_three)
+            # Verify parameter count
+            sig = inspect.signature(multiply_three)
+            assert len(sig.parameters) == 3
+
+            # Verify behavior
+            assert multiply_three(2, 3, 4) == 24
+            assert multiply_three(1, 1, 1) == 1
+            assert multiply_three(5, 0, 10) == 0

From 52df3f6ed9a6087d7d0f7c9122a11a15e027a1c4 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 19:20:58 -0500
Subject: [PATCH 15/27] updated callable encoding tests

---
 effectful/handlers/llm/encoding.py            | 38 ++++----
 ...hesis__test_synthesize_adder_function.json | 14 +--
 ...sis__test_synthesize_bool_return_type.json | 12 +--
 ...est_synthesize_counter_with_parameter.json | 10 +--
 ...t_synthesize_counter_with_parameter_1.json | 12 +--
 ...t_synthesize_counter_with_parameter_2.json | 12 +--
 ...sis__test_synthesize_string_processor.json | 12 +--
 ...nthesis__test_synthesize_three_params.json | 12 +--
 ...__test_synthesized_function_roundtrip.json |  8 +-
 tests/test_handlers_llm_encoding.py           | 90 +++++--------------
 10 files changed, 91 insertions(+), 129 deletions(-)

diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py
index 3047e7d5..b11bc0df 100644
--- a/effectful/handlers/llm/encoding.py
+++ b/effectful/handlers/llm/encoding.py
@@ -299,8 +299,9 @@ def _create_typed_synthesized_function(
 
 <instructions>
 1. Produce one block of Python code.
-2. Do not include usage examples.
-3. Your output function def must be the final statement in the code block.
+2. The function MUST have type annotations for all parameters and the return type.
+3. The function definition must be the LAST statement - do not add any code after it.
+4. Do not include usage examples or function calls.
 </instructions>
 """
 
@@ -331,9 +332,12 @@ def _validate_signature_ast(
 def _validate_signature_callable(
     func: Callable,
     expected_params: list[type] | None,
-    expected_return: type | None,
+    expected_return: type,
 ) -> None:
-    """Validate the function signature from runtime callable after execution."""
+    """Validate the function signature from runtime callable after execution.
+
+    The synthesized function must have type annotations for parameters and return type.
+    """
     sig = inspect.signature(func)
 
     if expected_params is not None:
@@ -344,16 +348,19 @@ def _validate_signature_callable(
                 f"got {len(actual_params)}"
             )
 
-    if expected_return is not None:
-        actual_return = sig.return_annotation
-        if actual_return is not inspect.Parameter.empty:
-            expected_name = getattr(expected_return, "__name__", str(expected_return))
-            actual_name = getattr(actual_return, "__name__", str(actual_return))
-            if expected_name != actual_name:
-                raise ValueError(
-                    f"decode() expected function with return type {expected_name}, "
-                    f"got {actual_name}"
-                )
+    actual_return = sig.return_annotation
+    if actual_return is inspect.Parameter.empty:
+        raise ValueError(
+            "decode() requires synthesized function to have a return type annotation"
+        )
+
+    expected_name = getattr(expected_return, "__name__", str(expected_return))
+    actual_name = getattr(actual_return, "__name__", str(actual_return))
+    if expected_name != actual_name:
+        raise ValueError(
+            f"decode() expected function with return type {expected_name}, "
+            f"got {actual_name}"
+        )
 
 
 @dataclass
@@ -368,6 +375,7 @@ def encode(self, t: Callable) -> SynthesizedFunction:
         # (https://github.com/python/mypy/issues/14928)
         if not isinstance(t, Callable):  # type: ignore
             raise TypeError(f"Expected callable, got {type(t)}")
+
         try:
             source = inspect.getsource(t)
         except (OSError, TypeError):
@@ -426,7 +434,7 @@ def decode(self, encoded_value: SynthesizedFunction) -> Callable:
             )
 
         last_stmt = module.body[-1]
-        if not isinstance(last_stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
+        if not isinstance(last_stmt, ast.FunctionDef):
             raise ValueError(
                 f"decode() requires the last statement to be a function definition, "
                 f"got {type(last_stmt).__name__}"
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json
index 4bdfe004..af02c2d6 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_adder_function.json
@@ -1,15 +1,15 @@
 {
-  "id": "chatcmpl-D3sUKrwqZya06r6zsvSXah888Aijz",
-  "created": 1769816808,
+  "id": "chatcmpl-D3t07MhXj7upN73HTU9CZuGWVI26D",
+  "created": 1769818779,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
-  "system_fingerprint": "fp_ac84da453f",
+  "system_fingerprint": "fp_1590f93f9d",
   "choices": [
     {
       "finish_reason": "stop",
       "index": 0,
       "message": {
-        "content": "{\"value\":{\"module_code\":\"def add_two_integers(a: int, b: int) -> int:\\n    return a + b\"}}",
+        "content": "{\"value\":{\"module_code\":\"def add_two_numbers(a: int, b: int) -> int:\\n    return a + b\\n\"}}",
         "role": "assistant",
         "tool_calls": null,
         "function_call": null,
@@ -22,9 +22,9 @@
     }
   ],
   "usage": {
-    "completion_tokens": 34,
-    "prompt_tokens": 519,
-    "total_tokens": 553,
+    "completion_tokens": 33,
+    "prompt_tokens": 605,
+    "total_tokens": 638,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json
index 2f569c7a..334c226d 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_bool_return_type.json
@@ -1,6 +1,6 @@
 {
-  "id": "chatcmpl-D3shh7iwJIWi9oOlY7uWPX2QHL97c",
-  "created": 1769817637,
+  "id": "chatcmpl-D3t0CeUsrUYRG4uFENTc97eSR8dNV",
+  "created": 1769818784,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
   "system_fingerprint": "fp_1590f93f9d",
@@ -9,7 +9,7 @@
       "finish_reason": "stop",
       "index": 0,
       "message": {
-        "content": "{\"value\":{\"module_code\":\"def is_even(num: int) -> bool:\\n    return num % 2 == 0\"}}",
+        "content": "{\"value\":{\"module_code\":\"def is_even(number: int) -> bool:\\n    return number % 2 == 0\\n\"}}",
         "role": "assistant",
         "tool_calls": null,
         "function_call": null,
@@ -22,9 +22,9 @@
     }
   ],
   "usage": {
-    "completion_tokens": 31,
-    "prompt_tokens": 584,
-    "total_tokens": 615,
+    "completion_tokens": 32,
+    "prompt_tokens": 603,
+    "total_tokens": 635,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json
index 7692e13a..b0c5f9bf 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter.json
@@ -1,6 +1,6 @@
 {
-  "id": "chatcmpl-D3sUfUT79FEdeJAEZEGHUUNZU8FK1",
-  "created": 1769816829,
+  "id": "chatcmpl-D3t094sc9fvHTU8EcpX5U3jmycshT",
+  "created": 1769818781,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
   "system_fingerprint": "fp_1590f93f9d",
@@ -17,7 +17,7 @@
               "arguments": "{\"char\":\"a\"}",
               "name": "create_function"
             },
-            "id": "call_lSUu19dmF7Lwv2kqXvJnrz7E",
+            "id": "call_yHyAUG2fywMoQVlgDgfW5tni",
             "type": "function"
           }
         ],
@@ -32,8 +32,8 @@
   ],
   "usage": {
     "completion_tokens": 14,
-    "prompt_tokens": 505,
-    "total_tokens": 519,
+    "prompt_tokens": 591,
+    "total_tokens": 605,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json
index 2597f7a8..f1dc5cb2 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_1.json
@@ -1,6 +1,6 @@
 {
-  "id": "chatcmpl-D3sUgwLc3rUdXkhD6mV0xn1UrirF7",
-  "created": 1769816830,
+  "id": "chatcmpl-D3t09UG6mHSIp7eUBauqT1qp1D1Cz",
+  "created": 1769818781,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
   "system_fingerprint": "fp_1590f93f9d",
@@ -9,7 +9,7 @@
       "finish_reason": "stop",
       "index": 0,
       "message": {
-        "content": "{\"value\":{\"module_code\":\"def count_a_occurrences(input_string: str) -> int:\\n    # Initialize a counter for 'a' occurrences\\n    count = 0\\n    # Loop through each character in the string\\n    for char in input_string:\\n        # Increment the counter if the character is 'a'\\n        if char == 'a':\\n            count += 1\\n    # Return the total count\\n    return count\\n\\n\"}}",
+        "content": "{\"value\":{\"module_code\":\"def count_character_a(input_string: str) -> int:\\n    return input_string.count('a')\\n\"}}",
         "role": "assistant",
         "tool_calls": null,
         "function_call": null,
@@ -22,9 +22,9 @@
     }
   ],
   "usage": {
-    "completion_tokens": 100,
-    "prompt_tokens": 507,
-    "total_tokens": 607,
+    "completion_tokens": 34,
+    "prompt_tokens": 593,
+    "total_tokens": 627,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json
index 0f0a6b76..9970c3d0 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_counter_with_parameter_2.json
@@ -1,6 +1,6 @@
 {
-  "id": "chatcmpl-D3sUi3VD9X0tyPos3U7Rz3gVDyan0",
-  "created": 1769816832,
+  "id": "chatcmpl-D3t0A8TB6nGWKFLinAIo6MtI8A9C7",
+  "created": 1769818782,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
   "system_fingerprint": "fp_1590f93f9d",
@@ -9,7 +9,7 @@
       "finish_reason": "stop",
       "index": 0,
       "message": {
-        "content": "{\"value\":{\"module_code\":\"def count_a_occurrences(input_string: str) -> int:\\n    # Initialize a counter for 'a' occurrences\\n    count = 0\\n    # Loop through each character in the string\\n    for char in input_string:\\n        # Increment the counter if the character is 'a'\\n        if char == 'a':\\n            count += 1\\n    # Return the total count\\n    return count\\n\"}}",
+        "content": "{\"value\":{\"module_code\":\"def count_character_a(input_string: str) -> int:\\n    return input_string.count('a')\\n\"}}",
         "role": "assistant",
         "tool_calls": null,
         "function_call": null,
@@ -22,9 +22,9 @@
     }
   ],
   "usage": {
-    "completion_tokens": 99,
-    "prompt_tokens": 620,
-    "total_tokens": 719,
+    "completion_tokens": 34,
+    "prompt_tokens": 641,
+    "total_tokens": 675,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json
index 5ede3b72..d49188fd 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_string_processor.json
@@ -1,6 +1,6 @@
 {
-  "id": "chatcmpl-D3sUMR9bE9GX0jjS3E4qMJuKqy0n4",
-  "created": 1769816810,
+  "id": "chatcmpl-D3t08sdCPU3BvwMIBHiYaLfneCLCQ",
+  "created": 1769818780,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
   "system_fingerprint": "fp_1590f93f9d",
@@ -9,7 +9,7 @@
       "finish_reason": "stop",
       "index": 0,
       "message": {
-        "content": "{\"value\":{\"module_code\":\"def convert_to_uppercase_with_exclamation(input_string: str) -> str:\\n    return input_string.upper() + '!!!'\"} }",
+        "content": "{\"value\":{\"module_code\":\"def convert_to_uppercase_with_exclamations(input_string: str) -> str:\\n    return input_string.upper() + '!!' \\n\"}}",
         "role": "assistant",
         "tool_calls": null,
         "function_call": null,
@@ -22,9 +22,9 @@
     }
   ],
   "usage": {
-    "completion_tokens": 39,
-    "prompt_tokens": 516,
-    "total_tokens": 555,
+    "completion_tokens": 41,
+    "prompt_tokens": 602,
+    "total_tokens": 643,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json
index 1d321df5..4ae29502 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesize_three_params.json
@@ -1,6 +1,6 @@
 {
-  "id": "chatcmpl-D3shiBP0DDvJW1lzI4npNrQYFa7xz",
-  "created": 1769817638,
+  "id": "chatcmpl-D3t0DNoQDXPHAYMTMFDFalBuDkS6W",
+  "created": 1769818785,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
   "system_fingerprint": "fp_1590f93f9d",
@@ -9,7 +9,7 @@
       "finish_reason": "stop",
       "index": 0,
       "message": {
-        "content": "{\"value\":{\"module_code\":\"def multiply_three_numbers(a: int, b: int, c: int) -> int:\\n    return a * b * c\"}}",
+        "content": "{\"value\":{\"module_code\":\"def multiply_three_numbers(a: int, b: int, c: int) -> int:\\n    return a * b * c\\n\"}}",
         "role": "assistant",
         "tool_calls": null,
         "function_call": null,
@@ -22,9 +22,9 @@
     }
   ],
   "usage": {
-    "completion_tokens": 38,
-    "prompt_tokens": 586,
-    "total_tokens": 624,
+    "completion_tokens": 39,
+    "prompt_tokens": 605,
+    "total_tokens": 644,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json
index 8fd29225..65a95290 100644
--- a/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json
+++ b/tests/fixtures/tests_test_handlers_llm_provider.py__TestCallableSynthesis__test_synthesized_function_roundtrip.json
@@ -1,6 +1,6 @@
 {
-  "id": "chatcmpl-D3sUOB2HTlXb62S0zS3RZRoNRDXfF",
-  "created": 1769816812,
+  "id": "chatcmpl-D3t0BrdzBcHyqvKaYpMKgolp3bdRO",
+  "created": 1769818783,
   "model": "gpt-4o-mini-2024-07-18",
   "object": "chat.completion",
   "system_fingerprint": "fp_1590f93f9d",
@@ -23,8 +23,8 @@
   ],
   "usage": {
     "completion_tokens": 35,
-    "prompt_tokens": 519,
-    "total_tokens": 554,
+    "prompt_tokens": 605,
+    "total_tokens": 640,
     "completion_tokens_details": {
       "accepted_prediction_tokens": 0,
       "audio_tokens": 0,
diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index 649f7d24..e89b9ef6 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -1,5 +1,6 @@
+from collections.abc import Callable
 from dataclasses import asdict, dataclass
-from typing import NamedTuple, TypedDict
+from typing import Any, NamedTuple, TypedDict
 
 import pydantic
 import pytest
@@ -727,8 +728,6 @@ class TestCallableEncodable:
     """Tests for CallableEncodable - encoding/decoding callables as SynthesizedFunction."""
 
     def test_bare_callable_allows_encode_but_not_decode(self):
-        from collections.abc import Callable
-
         def add(a: int, b: int) -> int:
             return a + b
 
@@ -744,9 +743,6 @@ def add(a: int, b: int) -> int:
                 encodable.decode(encoded)
 
     def test_callable_with_any_return_allows_encode_but_not_decode(self):
-        from collections.abc import Callable
-        from typing import Any
-
         def add(a: int, b: int) -> int:
             return a + b
 
@@ -761,8 +757,6 @@ def add(a: int, b: int) -> int:
                 encodable.decode(encoded)
 
     def test_encode_decode_function(self):
-        from collections.abc import Callable
-
         def add(a: int, b: int) -> int:
             return a + b
 
@@ -780,14 +774,12 @@ def add(a: int, b: int) -> int:
         assert decoded.__name__ == "add"
 
     def test_decode_with_ellipsis_params(self):
-        from collections.abc import Callable
-
         # Callable[..., int] allows any params but validates return type
         encodable = Encodable.define(Callable[..., int], {})
 
-        # Test decoding a function - must end with function def
+        # Test decoding a function - must end with function def with return annotation
         func_source = SynthesizedFunction(
-            module_code="def double(x):\n    return x * 2"
+            module_code="def double(x) -> int:\n    return x * 2"
         )
         with handler(UnsafeEvalProvider()):
             decoded = encodable.decode(func_source)
@@ -795,12 +787,10 @@ def test_decode_with_ellipsis_params(self):
         assert decoded(5) == 10
 
     def test_decode_with_env(self):
-        from collections.abc import Callable
-
         # Test decoding a function that uses env variables
         encodable = Encodable.define(Callable[..., int], {"factor": 3})
         source = SynthesizedFunction(
-            module_code="""def multiply(x):
+            module_code="""def multiply(x) -> int:
     return x * factor"""
         )
 
@@ -810,15 +800,11 @@ def test_decode_with_env(self):
         assert decoded(4) == 12
 
     def test_encode_non_callable_raises(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[..., int], {})
         with pytest.raises(TypeError, match="Expected callable"):
             encodable.encode("not a callable")
 
     def test_encode_builtin_creates_stub(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[..., int], {})
         # Built-in functions don't have source code but have docstrings
         encoded = encodable.encode(len)
@@ -828,8 +814,6 @@ def test_encode_builtin_creates_stub(self):
         assert "..." in encoded.module_code  # stub body
 
     def test_encode_builtin_no_docstring_raises(self):
-        from collections.abc import Callable
-
         # Create a callable without source and without docstring
         class NoDocCallable:
             __name__ = "nodoc"
@@ -843,8 +827,6 @@ def __call__(self):
             encodable.encode(NoDocCallable())
 
     def test_decode_no_function_at_end_raises(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[..., int], {})
         # Source code where last statement is not a function definition
         source = SynthesizedFunction(module_code="x = 42")
@@ -855,15 +837,13 @@ def test_decode_no_function_at_end_raises(self):
                 encodable.decode(source)
 
     def test_decode_multiple_functions_uses_last(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[..., int], {})
         # Source code that defines multiple functions - should use the last one
         source = SynthesizedFunction(
-            module_code="""def foo():
+            module_code="""def foo() -> int:
     return 1
 
-def bar():
+def bar() -> int:
     return 2"""
         )
         with handler(UnsafeEvalProvider()):
@@ -873,8 +853,6 @@ def bar():
         assert decoded() == 2
 
     def test_decode_class_raises(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[..., int], {})
         # Classes are callable but the last statement must be a function definition
         source = SynthesizedFunction(
@@ -893,8 +871,6 @@ def greet(self):
                 encodable.decode(source)
 
     def test_roundtrip(self):
-        from collections.abc import Callable
-
         def greet(name: str) -> str:
             return f"Hello, {name}!"
 
@@ -908,8 +884,6 @@ def greet(name: str) -> str:
         assert decoded.__name__ == "greet"
 
     def test_serialize_deserialize(self):
-        from collections.abc import Callable
-
         def add(a: int, b: int) -> int:
             return a + b
 
@@ -928,8 +902,6 @@ def add(a: int, b: int) -> int:
         assert "def add" in deserialized.module_code
 
     def test_decode_validates_last_statement(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[..., int], {})
 
         # Helper function followed by assignment - should fail
@@ -946,8 +918,6 @@ def test_decode_validates_last_statement(self):
                 encodable.decode(source)
 
     def test_typed_callable_includes_signature_in_docstring(self):
-        from collections.abc import Callable
-
         # Test that the enc type has the signature in its docstring
         encodable = Encodable.define(Callable[[int, int], int], {})
         assert encodable.enc.__doc__ is not None
@@ -955,8 +925,6 @@ def test_typed_callable_includes_signature_in_docstring(self):
         assert "<signature>" in encodable.enc.__doc__
 
     def test_typed_callable_validates_param_count(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[[int, int], int], {})
 
         # Function with wrong number of parameters
@@ -969,8 +937,6 @@ def test_typed_callable_validates_param_count(self):
                 encodable.decode(source)
 
     def test_typed_callable_validates_return_type(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[[int, int], int], {})
 
         # Function with wrong return type
@@ -982,9 +948,22 @@ def test_typed_callable_validates_return_type(self):
             with handler(UnsafeEvalProvider()):
                 encodable.decode(source)
 
-    def test_typed_callable_accepts_correct_signature(self):
-        from collections.abc import Callable
+    def test_typed_callable_requires_return_annotation(self):
+        encodable = Encodable.define(Callable[[int, int], int], {})
+
+        # Function missing return type annotation
+        source = SynthesizedFunction(
+            module_code="""def add(a: int, b: int):
+    return a + b"""
+        )
+        with pytest.raises(
+            ValueError,
+            match="requires synthesized function to have a return type annotation",
+        ):
+            with handler(UnsafeEvalProvider()):
+                encodable.decode(source)
 
+    def test_typed_callable_accepts_correct_signature(self):
         encodable = Encodable.define(Callable[[int, int], int], {})
 
         # Function with correct signature
@@ -998,8 +977,6 @@ def test_typed_callable_accepts_correct_signature(self):
         assert result(2, 3) == 5
 
     def test_ellipsis_callable_skips_param_validation(self):
-        from collections.abc import Callable
-
         # Callable[..., int] should skip param validation but still validate return
         encodable = Encodable.define(Callable[..., int], {})
 
@@ -1013,8 +990,6 @@ def test_ellipsis_callable_skips_param_validation(self):
         assert result(1, 2, 3, 4, 5) == 42
 
     def test_typed_callable_json_schema_includes_signature(self):
-        from collections.abc import Callable
-
         # Test that the JSON schema includes the type signature for the LLM
         encodable = Encodable.define(Callable[[int, int], int], {})
 
@@ -1028,8 +1003,6 @@ def test_typed_callable_json_schema_includes_signature(self):
         assert "<instructions>" in schema["description"]
 
     def test_typed_callable_json_schema_different_signatures(self):
-        from collections.abc import Callable
-
         # Test that different type signatures produce different schemas
         enc1 = Encodable.define(Callable[[str], str], {})
         enc2 = Encodable.define(Callable[[int, int, int], bool], {})
@@ -1041,8 +1014,6 @@ def test_typed_callable_json_schema_different_signatures(self):
         assert "Callable[[int, int, int], bool]" in schema2["description"]
 
     def test_validates_param_count_via_ast(self):
-        from collections.abc import Callable
-
         # Test that param validation happens via AST analysis
         encodable = Encodable.define(Callable[[int, int], int], {})
 
@@ -1056,8 +1027,6 @@ def test_validates_param_count_via_ast(self):
                 encodable.decode(source)
 
     def test_validates_param_count_zero_params(self):
-        from collections.abc import Callable
-
         # Test callable with no params
         encodable = Encodable.define(Callable[[], int], {})
 
@@ -1071,8 +1040,6 @@ def test_validates_param_count_zero_params(self):
                 encodable.decode(source)
 
     def test_validates_accepts_zero_params(self):
-        from collections.abc import Callable
-
         # Test callable with no params - correct signature
         encodable = Encodable.define(Callable[[], int], {})
 
@@ -1086,8 +1053,6 @@ def test_validates_accepts_zero_params(self):
         assert result() == 42
 
     def test_ellipsis_callable_json_schema_includes_signature(self):
-        from collections.abc import Callable
-
         # Test that Callable[..., int] has signature in schema
         encodable = Encodable.define(Callable[..., int], {})
 
@@ -1097,8 +1062,6 @@ def test_ellipsis_callable_json_schema_includes_signature(self):
         assert "<signature>" in schema["description"]
 
     def test_ellipsis_callable_validates_return_type(self):
-        from collections.abc import Callable
-
         # Callable[..., int] should still validate return type
         encodable = Encodable.define(Callable[..., int], {})
 
@@ -1111,8 +1074,6 @@ def test_ellipsis_callable_validates_return_type(self):
                 encodable.decode(source)
 
     def test_callable_with_single_param(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[[str], int], {})
 
         source = SynthesizedFunction(
@@ -1125,8 +1086,6 @@ def test_callable_with_single_param(self):
         assert result("hello") == 5
 
     def test_callable_with_many_params(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[[int, int, int, int], int], {})
 
         source = SynthesizedFunction(
@@ -1139,8 +1098,6 @@ def test_callable_with_many_params(self):
         assert result(1, 2, 3, 4) == 10
 
     def test_callable_with_bool_return(self):
-        from collections.abc import Callable
-
         encodable = Encodable.define(Callable[[int], bool], {})
 
         source = SynthesizedFunction(
@@ -1154,9 +1111,6 @@ def test_callable_with_bool_return(self):
         assert result(-1) is False
 
     def test_callable_type_variations_schema(self):
-        from collections.abc import Callable
-        from typing import Any
-
         # Test various callable type variations have correct schemas
         test_cases = [
             (Callable[[], int], "Callable[[], int]"),

From 4b10ac3df0c1c9d881d8c14161ac683d192ac898 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 19:27:41 -0500
Subject: [PATCH 16/27] s/TypeError/NotImplementedError

---
 effectful/handlers/llm/evaluation.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py
index 463041fe..271e55f6 100644
--- a/effectful/handlers/llm/evaluation.py
+++ b/effectful/handlers/llm/evaluation.py
@@ -18,7 +18,9 @@ def parse(source: str, filename: str) -> ast.Module:
 
     Returns the parsed AST.
     """
-    raise TypeError("An eval provider must be installed in order to parse code.")
+    raise NotImplementedError(
+        "An eval provider must be installed in order to parse code."
+    )
 
 
 @defop
@@ -31,7 +33,9 @@ def compile(module: ast.Module, filename: str) -> CodeType:
 
     Returns the compiled code object.
     """
-    raise TypeError("An eval provider must be installed in order to compile code.")
+    raise NotImplementedError(
+        "An eval provider must be installed in order to compile code."
+    )
 
 
 @defop
@@ -45,7 +49,9 @@ def exec(
     bytecode: A code object to execute (typically produced by compile()).
     env: The namespace mapping used during execution.
     """
-    raise TypeError("An eval provider must be installed in order to execute code.")
+    raise NotImplementedError(
+        "An eval provider must be installed in order to execute code."
+    )
 
 
 class UnsafeEvalProvider(ObjectInterpretation):

From 2b4449a45a5aa82e4177efd8bfebbd2e22e5d4e9 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 19:43:34 -0500
Subject: [PATCH 17/27] simplified smart constructor

---
 effectful/handlers/llm/encoding.py  | 41 ++++++++---------------------
 tests/test_handlers_llm_encoding.py | 14 ----------
 2 files changed, 11 insertions(+), 44 deletions(-)

diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py
index b11bc0df..1922bdca 100644
--- a/effectful/handlers/llm/encoding.py
+++ b/effectful/handlers/llm/encoding.py
@@ -3,6 +3,7 @@
 import inspect
 import io
 import textwrap
+import types
 import typing
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Mapping, MutableMapping, Sequence
@@ -593,16 +594,14 @@ def _encodable_callable(
 ) -> Encodable[Callable, SynthesizedFunction]:
     ctx = ctx or {}
 
-    # Extract type args - Callable requires a type signature
     type_args = typing.get_args(ty)
 
-    # Handle bare Callable without type args - allow encoding but disable decode
-    # this occurs when encoding Tools which return callable (need to Encodable.define(return_type) for return type)
+    # Bare Callable without type args - allow encoding but disable decode
+    # this occurs when decoding the result of Tools which return callable (need to Encodable.define(return_type) for return type)
     if not type_args:
+        assert ty is types.FunctionType, f"Callable must have type signatures {ty}"
         typed_enc = _create_typed_synthesized_function(Callable[..., typing.Any])  # type: ignore[arg-type]
-        return CallableEncodable(
-            ty, typed_enc, ctx, expected_params=None, expected_return=None
-        )
+        return CallableEncodable(ty, typed_enc, ctx)
 
     if len(type_args) < 2:
         raise TypeError(
@@ -610,31 +609,13 @@ def _encodable_callable(
             "Expected Callable[[ParamTypes...], ReturnType] or Callable[..., ReturnType]."
         )
 
-    # Extract param and return types for validation
-    param_types = type_args[0]
-    expected_return: type | None = type_args[-1]
-
-    # Handle Any as return type - allow encoding but disable decode
-    # Any doesn't provide useful information for synthesis (expected_return=None)
-    if expected_return is typing.Any:
-        typed_enc = _create_typed_synthesized_function(ty)
-        return CallableEncodable(
-            ty, typed_enc, ctx, expected_params=None, expected_return=None
-        )
+    param_types, expected_return = type_args[0], type_args[-1]
 
-    # Create a typed SynthesizedFunction model with the type signature in the description
     typed_enc = _create_typed_synthesized_function(ty)
 
-    # Handle Callable[..., ReturnType] - ellipsis means any params, skip param validation
+    # Ellipsis means any params, skip param validation
     expected_params: list[type] | None = None
-    if param_types is not ...:
-        if isinstance(param_types, (list, tuple)):
-            expected_params = list(param_types)
-
-    return CallableEncodable(
-        ty,
-        typed_enc,
-        ctx,
-        expected_params=expected_params,
-        expected_return=expected_return,
-    )
+    if param_types is not ... and isinstance(param_types, (list, tuple)):
+        expected_params = list(param_types)
+
+    return CallableEncodable(ty, typed_enc, ctx, expected_params, expected_return)
diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index e89b9ef6..670a8b46 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -742,20 +742,6 @@ def add(a: int, b: int) -> int:
             with handler(UnsafeEvalProvider()):
                 encodable.decode(encoded)
 
-    def test_callable_with_any_return_allows_encode_but_not_decode(self):
-        def add(a: int, b: int) -> int:
-            return a + b
-
-        # Callable[..., Any] allows encoding
-        encodable = Encodable.define(Callable[..., Any], {})
-        encoded = encodable.encode(add)
-        assert isinstance(encoded, SynthesizedFunction)
-
-        # But decode is disabled
-        with pytest.raises(TypeError, match="Cannot decode/synthesize callable"):
-            with handler(UnsafeEvalProvider()):
-                encodable.decode(encoded)
-
     def test_encode_decode_function(self):
         def add(a: int, b: int) -> int:
             return a + b

From 553450f98bda0cb92bb2e2ffb786b5ff5953e2c7 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 19:49:12 -0500
Subject: [PATCH 18/27] bare callables not allowed

---
 tests/test_handlers_llm_encoding.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index 670a8b46..fb04e37e 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -727,21 +727,6 @@ class Person(pydantic.BaseModel):
 class TestCallableEncodable:
     """Tests for CallableEncodable - encoding/decoding callables as SynthesizedFunction."""
 
-    def test_bare_callable_allows_encode_but_not_decode(self):
-        def add(a: int, b: int) -> int:
-            return a + b
-
-        # Bare Callable allows encoding
-        encodable = Encodable.define(Callable, {})
-        encoded = encodable.encode(add)
-        assert isinstance(encoded, SynthesizedFunction)
-        assert "def add" in encoded.module_code
-
-        # But decode is disabled
-        with pytest.raises(TypeError, match="Cannot decode/synthesize callable"):
-            with handler(UnsafeEvalProvider()):
-                encodable.decode(encoded)
-
     def test_encode_decode_function(self):
         def add(a: int, b: int) -> int:
             return a + b

From aac0eb9ca4a2d00e97695318388ced50bd22001a Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 19:52:00 -0500
Subject: [PATCH 19/27] droped synthesis and removed encoding_instructions

---
 effectful/handlers/llm/encoding.py  | 19 ++++++++++++-------
 effectful/handlers/llm/synthesis.py | 28 ----------------------------
 2 files changed, 12 insertions(+), 35 deletions(-)
 delete mode 100644 effectful/handlers/llm/synthesis.py

diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py
index 1922bdca..d0796d6c 100644
--- a/effectful/handlers/llm/encoding.py
+++ b/effectful/handlers/llm/encoding.py
@@ -19,7 +19,6 @@
 from PIL import Image
 
 import effectful.handlers.llm.evaluation as evaluation
-from effectful.handlers.llm.synthesis import SynthesizedFunction
 from effectful.ops.semantics import _simple_type
 from effectful.ops.syntax import _CustomSingleDispatchCallable
 from effectful.ops.types import Operation, Term
@@ -284,6 +283,18 @@ def _format_callable_type(callable_type: type[Callable]) -> str:
     return str(callable_type)
 
 
+class SynthesizedFunction(pydantic.BaseModel):
+    """Structured output for function synthesis.
+
+    Pydantic model representing synthesized code with function name and module code.
+    """
+
+    module_code: str = pydantic.Field(
+        ...,
+        description="Complete Python module code (no imports needed)",
+    )
+
+
 def _create_typed_synthesized_function(
     callable_type: type[Callable],
 ) -> type[SynthesizedFunction]:
@@ -477,12 +488,6 @@ def serialize(
     def deserialize(self, serialized_value: str) -> SynthesizedFunction:
         return SynthesizedFunction.model_validate_json(serialized_value)
 
-    @Operation.define
-    @classmethod
-    def encoding_instructions(cls) -> str | None:
-        """Instructions to be prefixed onto synthesis prompts to tune the encoding of the result."""
-        return None
-
 
 @Encodable.define.register(object)
 def _encodable_object[T, U](
diff --git a/effectful/handlers/llm/synthesis.py b/effectful/handlers/llm/synthesis.py
deleted file mode 100644
index 00674901..00000000
--- a/effectful/handlers/llm/synthesis.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import pydantic
-
-from effectful.ops.syntax import ObjectInterpretation
-
-
-class SynthesizedFunction(pydantic.BaseModel):
-    """Structured output for function synthesis.
-
-    Pydantic model representing synthesized code with function name and module code.
-    """
-
-    module_code: str = pydantic.Field(
-        ...,
-        description="Complete Python module code (no imports needed)",
-    )
-
-
-class SynthesisError(Exception):
-    """Raised when program synthesis fails."""
-
-    def __init__(self, message, code=None):
-        super().__init__(message)
-        self.code = code
-
-
-class ProgramSynthesis(ObjectInterpretation):
-    def __init__(self, *args, **kwargs):
-        raise NotImplementedError

From 5b3a559028c3bde46a5039b2e3db488fb11479b9 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 19:56:37 -0500
Subject: [PATCH 20/27] fixed imports

---
 tests/test_handlers_llm_encoding.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index fb04e37e..775d5f72 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -6,9 +6,8 @@
 import pytest
 from PIL import Image
 
-from effectful.handlers.llm.encoding import Encodable
+from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction
 from effectful.handlers.llm.evaluation import UnsafeEvalProvider
-from effectful.handlers.llm.synthesis import SynthesizedFunction
 from effectful.ops.semantics import handler
 from effectful.ops.types import Operation, Term
 

From 711b27d9b8f6ae02e9eb931f89943bb3feb3af46 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 20:02:01 -0500
Subject: [PATCH 21/27] fixed imports and tests

---
 tests/test_handlers_llm.py          | 19 ------------------
 tests/test_handlers_llm_provider.py | 31 ++---------------------------
 2 files changed, 2 insertions(+), 48 deletions(-)

diff --git a/tests/test_handlers_llm.py b/tests/test_handlers_llm.py
index 2c98a650..c4c8be2c 100644
--- a/tests/test_handlers_llm.py
+++ b/tests/test_handlers_llm.py
@@ -1,10 +1,7 @@
 from collections.abc import Callable
 from typing import Annotated
 
-import pytest
-
 from effectful.handlers.llm import Template
-from effectful.handlers.llm.synthesis import ProgramSynthesis
 from effectful.handlers.llm.template import IsRecursive
 from effectful.ops.semantics import NotHandled, handler
 from effectful.ops.syntax import ObjectInterpretation, implements
@@ -119,22 +116,6 @@ def test_primes_decode_int():
         assert isinstance(result, int)
 
 
-@pytest.mark.xfail(reason="Synthesis handler not yet implemented")
-def test_count_char_with_program_synthesis():
-    """Test the count_char template with program synthesis."""
-    mock_code = """<code>
-def count_occurrences(s):
-    return s.count('a')
-</code>"""
-    mock_provider = SingleResponseLLMProvider(mock_code)
-
-    with handler(mock_provider), handler(ProgramSynthesis()):
-        count_a = count_char("a")
-        assert callable(count_a)
-        assert count_a("banana") == 3
-        assert count_a("cherry") == 0
-
-
 class FailingThenSucceedingProvider[T](ObjectInterpretation):
     """Mock provider that fails a specified number of times before succeeding."""
 
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index e882de71..0a766231 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -5,6 +5,7 @@
 """
 
 import functools
+import inspect
 import json
 import os
 from collections.abc import Callable
@@ -25,8 +26,8 @@
     call_assistant,
     completion,
 )
+from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction
 from effectful.handlers.llm.evaluation import UnsafeEvalProvider
-from effectful.handlers.llm.synthesis import ProgramSynthesis, SynthesisError
 from effectful.ops.semantics import fwd, handler
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import NotHandled
@@ -241,29 +242,6 @@ def test_with_config_params(self, request):
             assert isinstance(result, str)
 
 
-@pytest.mark.xfail(reason="Program synthesis not implemented")
-class TestProgramSynthesis:
-    """Tests for ProgramSynthesis handler functionality."""
-
-    @pytest.mark.xfail
-    @requires_openai
-    @retry_on_error(error=SynthesisError, n=3)
-    def test_generates_callable(self, request):
-        """Test ProgramSynthesis handler generates executable code."""
-        with (
-            handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
-            handler(ProgramSynthesis()),
-            handler(LimitLLMCallsHandler(max_calls=1)),
-        ):
-            count_func = create_function("a")
-
-            assert callable(count_func)
-            # Test the generated function
-            assert count_func("banana") == 3
-            assert count_func("cherry") == 0
-            assert count_func("aardvark") == 3
-
-
 def smiley_face() -> Image.Image:
     bmp = [
         "00000000",
@@ -473,7 +451,6 @@ def test_synthesize_counter_with_parameter(self, request):
     @requires_openai
     def test_callable_type_signature_in_schema(self, request):
         """Test that the callable type signature is communicated to the LLM."""
-        from effectful.handlers.llm.encoding import Encodable
 
         # Verify that the enc type includes the signature in its docstring
         encodable = Encodable.define(Callable[[int, int], int], {})
@@ -487,8 +464,6 @@ def test_callable_type_signature_in_schema(self, request):
     @requires_openai
     def test_synthesized_function_roundtrip(self, request):
         """Test that a synthesized function can be encoded and decoded."""
-        from effectful.handlers.llm.encoding import Encodable
-        from effectful.handlers.llm.synthesis import SynthesizedFunction
 
         with (
             handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
@@ -513,7 +488,6 @@ def test_synthesized_function_roundtrip(self, request):
     @requires_openai
     def test_synthesize_bool_return_type(self, request):
         """Test that LLM respects bool return type in signature."""
-        import inspect
 
         with (
             handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),
@@ -536,7 +510,6 @@ def test_synthesize_bool_return_type(self, request):
     @requires_openai
     def test_synthesize_three_params(self, request):
         """Test that LLM respects the exact number of parameters in signature."""
-        import inspect
 
         with (
             handler(ReplayLiteLLMProvider(request, model="gpt-4o-mini")),

From 3f1aa656b92e426421f824c5ba2e13a44403331c Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 20:15:17 -0500
Subject: [PATCH 22/27] added restricted python again

---
 effectful/handlers/llm/evaluation.py |  65 ++++++++++++
 pyproject.toml                       |   4 +-
 tests/test_handlers_llm_encoding.py  | 153 ++++++++++++++++++++-------
 3 files changed, 180 insertions(+), 42 deletions(-)

diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py
index 271e55f6..9ab6da78 100644
--- a/effectful/handlers/llm/evaluation.py
+++ b/effectful/handlers/llm/evaluation.py
@@ -5,6 +5,13 @@
 from types import CodeType
 from typing import Any
 
+from RestrictedPython import (
+    Eval,
+    Guards,
+    compile_restricted,
+    safe_globals,
+)
+
 from effectful.ops.syntax import ObjectInterpretation, defop, implements
 
 
@@ -86,3 +93,61 @@ def exec(
 
         # Execute module-style so top-level defs land in `env`.
         builtins.exec(bytecode, env, env)
+
+
+class RestrictedEvalProvider(ObjectInterpretation):
+    """
+    Safer provider using RestrictedPython.
+
+    RestrictedPython is not a complete sandbox, but it enforces a restricted
+    language subset and expects you to provide a constrained exec environment.
+    """
+
+    config: dict[str, Any]
+
+    def __init__(self, **kwargs):
+        self.config = kwargs
+
+    @implements(parse)
+    def parse(self, source: str, filename: str) -> ast.Module:
+        # Keep inspect.getsource() working for dynamically-defined objects.
+        linecache.cache[filename] = (
+            len(source),
+            None,
+            source.splitlines(True),
+            filename,
+        )
+        return ast.parse(source, filename=filename, mode="exec")
+
+    @implements(compile)
+    def compile(self, module: ast.Module, filename: str) -> CodeType:
+        # RestrictedPython can compile from an AST directly.
+        return compile_restricted(module, filename=filename, mode="exec", **self.config)
+
+    @implements(exec)
+    def exec(
+        self,
+        bytecode: CodeType,
+        env: dict[str, Any],
+    ) -> None:
+        # Build restricted globals from RestrictedPython's defaults
+        rglobals: dict[str, Any] = safe_globals.copy()
+
+        # Enable class definitions (required for Python 3)
+        rglobals["__metaclass__"] = type
+        rglobals["__name__"] = "restricted"
+
+        # Layer `env` on top (without letting callers replace the restricted builtins).
+        rglobals.update({k: v for k, v in env.items() if k != "__builtins__"})
+
+        # Enable for loops and comprehensions
+        rglobals["_getiter_"] = Eval.default_guarded_getiter
+
+        # Enable sequence unpacking in comprehensions and for loops
+        rglobals["_iter_unpack_sequence_"] = Guards.guarded_iter_unpack_sequence
+        rglobals["getattr"] = Guards.safer_getattr
+        rglobals["setattr"] = Guards.guarded_setattr
+        rglobals["_write_"] = lambda x: x
+
+        # Execute with locals=env so top-level defs land in `env`.
+        builtins.exec(bytecode, rglobals, env)
diff --git a/pyproject.toml b/pyproject.toml
index 38ff53ba..da8b5295 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,9 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
 ]
-dependencies = []
+dependencies = [
+    "restrictedpython>=8.1",
+]
 
 [project.urls]
 Homepage = "https://www.basis.ai/"
diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index 775d5f72..acabee39 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -7,10 +7,16 @@
 from PIL import Image
 
 from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction
-from effectful.handlers.llm.evaluation import UnsafeEvalProvider
+from effectful.handlers.llm.evaluation import RestrictedEvalProvider, UnsafeEvalProvider
 from effectful.ops.semantics import handler
 from effectful.ops.types import Operation, Term
 
+# Eval providers for parameterized tests
+EVAL_PROVIDERS = [
+    pytest.param(UnsafeEvalProvider(), id="unsafe"),
+    pytest.param(RestrictedEvalProvider(), id="restricted"),
+]
+
 
 def test_type_to_encodable_type_term():
     with pytest.raises(TypeError):
@@ -726,7 +732,8 @@ class Person(pydantic.BaseModel):
 class TestCallableEncodable:
     """Tests for CallableEncodable - encoding/decoding callables as SynthesizedFunction."""
 
-    def test_encode_decode_function(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_encode_decode_function(self, eval_provider):
         def add(a: int, b: int) -> int:
             return a + b
 
@@ -737,13 +744,14 @@ def add(a: int, b: int) -> int:
         assert "def add" in encoded.module_code
         assert "return a + b" in encoded.module_code
 
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             decoded = encodable.decode(encoded)
         assert callable(decoded)
         assert decoded(2, 3) == 5
         assert decoded.__name__ == "add"
 
-    def test_decode_with_ellipsis_params(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_decode_with_ellipsis_params(self, eval_provider):
         # Callable[..., int] allows any params but validates return type
         encodable = Encodable.define(Callable[..., int], {})
 
@@ -751,12 +759,13 @@ def test_decode_with_ellipsis_params(self):
         func_source = SynthesizedFunction(
             module_code="def double(x) -> int:\n    return x * 2"
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             decoded = encodable.decode(func_source)
         assert callable(decoded)
         assert decoded(5) == 10
 
-    def test_decode_with_env(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_decode_with_env(self, eval_provider):
         # Test decoding a function that uses env variables
         encodable = Encodable.define(Callable[..., int], {"factor": 3})
         source = SynthesizedFunction(
@@ -764,7 +773,7 @@ def test_decode_with_env(self):
     return x * factor"""
         )
 
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             decoded = encodable.decode(source)
         assert callable(decoded)
         assert decoded(4) == 12
@@ -796,17 +805,19 @@ def __call__(self):
         with pytest.raises(RuntimeError, match="no source code and no docstring"):
             encodable.encode(NoDocCallable())
 
-    def test_decode_no_function_at_end_raises(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_decode_no_function_at_end_raises(self, eval_provider):
         encodable = Encodable.define(Callable[..., int], {})
         # Source code where last statement is not a function definition
         source = SynthesizedFunction(module_code="x = 42")
         with pytest.raises(
             ValueError, match="last statement to be a function definition"
         ):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_decode_multiple_functions_uses_last(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_decode_multiple_functions_uses_last(self, eval_provider):
         encodable = Encodable.define(Callable[..., int], {})
         # Source code that defines multiple functions - should use the last one
         source = SynthesizedFunction(
@@ -816,13 +827,14 @@ def test_decode_multiple_functions_uses_last(self):
 def bar() -> int:
     return 2"""
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             decoded = encodable.decode(source)
         assert callable(decoded)
         assert decoded.__name__ == "bar"
         assert decoded() == 2
 
-    def test_decode_class_raises(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_decode_class_raises(self, eval_provider):
         encodable = Encodable.define(Callable[..., int], {})
         # Classes are callable but the last statement must be a function definition
         source = SynthesizedFunction(
@@ -837,15 +849,16 @@ def greet(self):
         with pytest.raises(
             ValueError, match="last statement to be a function definition"
         ):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_roundtrip(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_roundtrip(self, eval_provider):
         def greet(name: str) -> str:
             return f"Hello, {name}!"
 
         encodable = Encodable.define(Callable[[str], str], {})
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             encoded = encodable.encode(greet)
             decoded = encodable.decode(encoded)
 
@@ -871,7 +884,8 @@ def add(a: int, b: int) -> int:
         assert isinstance(deserialized, SynthesizedFunction)
         assert "def add" in deserialized.module_code
 
-    def test_decode_validates_last_statement(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_decode_validates_last_statement(self, eval_provider):
         encodable = Encodable.define(Callable[..., int], {})
 
         # Helper function followed by assignment - should fail
@@ -884,7 +898,7 @@ def test_decode_validates_last_statement(self):
         with pytest.raises(
             ValueError, match="last statement to be a function definition"
         ):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
     def test_typed_callable_includes_signature_in_docstring(self):
@@ -894,7 +908,8 @@ def test_typed_callable_includes_signature_in_docstring(self):
         assert "Callable[[int, int], int]" in encodable.enc.__doc__
         assert "<signature>" in encodable.enc.__doc__
 
-    def test_typed_callable_validates_param_count(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_typed_callable_validates_param_count(self, eval_provider):
         encodable = Encodable.define(Callable[[int, int], int], {})
 
         # Function with wrong number of parameters
@@ -903,10 +918,11 @@ def test_typed_callable_validates_param_count(self):
     return a"""
         )
         with pytest.raises(ValueError, match="expected function with 2 parameters"):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_typed_callable_validates_return_type(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_typed_callable_validates_return_type(self, eval_provider):
         encodable = Encodable.define(Callable[[int, int], int], {})
 
         # Function with wrong return type
@@ -915,10 +931,11 @@ def test_typed_callable_validates_return_type(self):
     return str(a + b)"""
         )
         with pytest.raises(ValueError, match="expected function with return type int"):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_typed_callable_requires_return_annotation(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_typed_callable_requires_return_annotation(self, eval_provider):
         encodable = Encodable.define(Callable[[int, int], int], {})
 
         # Function missing return type annotation
@@ -930,10 +947,11 @@ def test_typed_callable_requires_return_annotation(self):
             ValueError,
             match="requires synthesized function to have a return type annotation",
         ):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_typed_callable_accepts_correct_signature(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_typed_callable_accepts_correct_signature(self, eval_provider):
         encodable = Encodable.define(Callable[[int, int], int], {})
 
         # Function with correct signature
@@ -941,12 +959,13 @@ def test_typed_callable_accepts_correct_signature(self):
             module_code="""def add(a: int, b: int) -> int:
     return a + b"""
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             result = encodable.decode(source)
         assert callable(result)
         assert result(2, 3) == 5
 
-    def test_ellipsis_callable_skips_param_validation(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_ellipsis_callable_skips_param_validation(self, eval_provider):
         # Callable[..., int] should skip param validation but still validate return
         encodable = Encodable.define(Callable[..., int], {})
 
@@ -954,7 +973,7 @@ def test_ellipsis_callable_skips_param_validation(self):
             module_code="""def anything(a, b, c, d, e) -> int:
     return 42"""
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             result = encodable.decode(source)
         assert callable(result)
         assert result(1, 2, 3, 4, 5) == 42
@@ -983,7 +1002,8 @@ def test_typed_callable_json_schema_different_signatures(self):
         assert "Callable[[str], str]" in schema1["description"]
         assert "Callable[[int, int, int], bool]" in schema2["description"]
 
-    def test_validates_param_count_via_ast(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_validates_param_count_via_ast(self, eval_provider):
         # Test that param validation happens via AST analysis
         encodable = Encodable.define(Callable[[int, int], int], {})
 
@@ -993,10 +1013,11 @@ def test_validates_param_count_via_ast(self):
     return a + b + c"""
         )
         with pytest.raises(ValueError, match="expected function with 2 parameters"):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_validates_param_count_zero_params(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_validates_param_count_zero_params(self, eval_provider):
         # Test callable with no params
         encodable = Encodable.define(Callable[[], int], {})
 
@@ -1006,10 +1027,11 @@ def test_validates_param_count_zero_params(self):
     return x"""
         )
         with pytest.raises(ValueError, match="expected function with 0 parameters"):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_validates_accepts_zero_params(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_validates_accepts_zero_params(self, eval_provider):
         # Test callable with no params - correct signature
         encodable = Encodable.define(Callable[[], int], {})
 
@@ -1017,7 +1039,7 @@ def test_validates_accepts_zero_params(self):
             module_code="""def get_value() -> int:
     return 42"""
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             result = encodable.decode(source)
         assert callable(result)
         assert result() == 42
@@ -1031,7 +1053,8 @@ def test_ellipsis_callable_json_schema_includes_signature(self):
         assert "Callable[[...], int]" in schema["description"]
         assert "<signature>" in schema["description"]
 
-    def test_ellipsis_callable_validates_return_type(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_ellipsis_callable_validates_return_type(self, eval_provider):
         # Callable[..., int] should still validate return type
         encodable = Encodable.define(Callable[..., int], {})
 
@@ -1040,41 +1063,44 @@ def test_ellipsis_callable_validates_return_type(self):
     return "wrong type\""""
         )
         with pytest.raises(ValueError, match="expected function with return type int"):
-            with handler(UnsafeEvalProvider()):
+            with handler(eval_provider):
                 encodable.decode(source)
 
-    def test_callable_with_single_param(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_callable_with_single_param(self, eval_provider):
         encodable = Encodable.define(Callable[[str], int], {})
 
         source = SynthesizedFunction(
             module_code="""def count_chars(s: str) -> int:
     return len(s)"""
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             result = encodable.decode(source)
         assert callable(result)
         assert result("hello") == 5
 
-    def test_callable_with_many_params(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_callable_with_many_params(self, eval_provider):
         encodable = Encodable.define(Callable[[int, int, int, int], int], {})
 
         source = SynthesizedFunction(
             module_code="""def sum_four(a: int, b: int, c: int, d: int) -> int:
     return a + b + c + d"""
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             result = encodable.decode(source)
         assert callable(result)
         assert result(1, 2, 3, 4) == 10
 
-    def test_callable_with_bool_return(self):
+    @pytest.mark.parametrize("eval_provider", EVAL_PROVIDERS)
+    def test_callable_with_bool_return(self, eval_provider):
         encodable = Encodable.define(Callable[[int], bool], {})
 
         source = SynthesizedFunction(
             module_code="""def is_positive(x: int) -> bool:
     return x > 0"""
         )
-        with handler(UnsafeEvalProvider()):
+        with handler(eval_provider):
             result = encodable.decode(source)
         assert callable(result)
         assert result(5) is True
@@ -1098,3 +1124,48 @@ def test_callable_type_variations_schema(self):
                 f"Expected {expected_sig} in schema for {callable_type}, "
                 f"got: {schema['description'][:100]}..."
             )
+
+
+class TestRestrictedEvalProviderConfig:
+    """Tests for RestrictedEvalProvider configuration options."""
+
+    def test_restricted_blocks_private_attribute_access(self):
+        """RestrictedPython blocks access to underscore-prefixed attributes by default."""
+        encodable = Encodable.define(Callable[[str], int], {})
+        source = SynthesizedFunction(
+            module_code="""def get_private(s: str) -> int:
+    return s.__class__.__name__"""
+        )
+        # Should raise due to restricted attribute access
+        with pytest.raises(Exception):  # Could be NameError or AttributeError
+            with handler(RestrictedEvalProvider()):
+                fn = encodable.decode(source)
+                fn("test")
+
+    def test_restricted_with_custom_policy(self):
+        """Can pass custom policy via compile_kwargs."""
+        from RestrictedPython import RestrictingNodeTransformer
+
+        # Create a custom policy that's the same as default (just to test the plumbing)
+        class CustomPolicy(RestrictingNodeTransformer):
+            pass
+
+        encodable = Encodable.define(Callable[[int, int], int], {})
+        source = SynthesizedFunction(
+            module_code="""def add(a: int, b: int) -> int:
+    return a + b"""
+        )
+        with handler(RestrictedEvalProvider(compile_kwargs={"policy": CustomPolicy})):
+            fn = encodable.decode(source)
+        assert fn(2, 3) == 5
+
+    def test_unsafe_allows_private_attribute_access(self):
+        """UnsafeEvalProvider allows access that RestrictedEvalProvider blocks."""
+        encodable = Encodable.define(Callable[[str], str], {})
+        source = SynthesizedFunction(
+            module_code="""def get_class_name(s: str) -> str:
+    return s.__class__.__name__"""
+        )
+        with handler(UnsafeEvalProvider()):
+            fn = encodable.decode(source)
+        assert fn("test") == "str"

From fd4041e5fcb830183782fb5cf8b89b38a3245881 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 20:17:00 -0500
Subject: [PATCH 23/27] added test for custom policies for restricted python

---
 tests/test_handlers_llm_encoding.py | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index acabee39..fe4c0ec3 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -1143,7 +1143,7 @@ def test_restricted_blocks_private_attribute_access(self):
                 fn("test")
 
     def test_restricted_with_custom_policy(self):
-        """Can pass custom policy via compile_kwargs."""
+        """Can pass custom policy via kwargs."""
         from RestrictedPython import RestrictingNodeTransformer
 
         # Create a custom policy that's the same as default (just to test the plumbing)
@@ -1155,17 +1155,6 @@ class CustomPolicy(RestrictingNodeTransformer):
             module_code="""def add(a: int, b: int) -> int:
     return a + b"""
         )
-        with handler(RestrictedEvalProvider(compile_kwargs={"policy": CustomPolicy})):
+        with handler(RestrictedEvalProvider(policy=CustomPolicy)):
             fn = encodable.decode(source)
         assert fn(2, 3) == 5
-
-    def test_unsafe_allows_private_attribute_access(self):
-        """UnsafeEvalProvider allows access that RestrictedEvalProvider blocks."""
-        encodable = Encodable.define(Callable[[str], str], {})
-        source = SynthesizedFunction(
-            module_code="""def get_class_name(s: str) -> str:
-    return s.__class__.__name__"""
-        )
-        with handler(UnsafeEvalProvider()):
-            fn = encodable.decode(source)
-        assert fn("test") == "str"

From fbe478f20ee1e0d367926078793131faefab665a Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 20:53:03 -0500
Subject: [PATCH 24/27] more specific arguments to RestrictedEvalProvider and
 made exec more customisable

---
 effectful/handlers/llm/evaluation.py | 85 +++++++++++++++++++++++-----
 1 file changed, 71 insertions(+), 14 deletions(-)

diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py
index 9ab6da78..bd6e1107 100644
--- a/effectful/handlers/llm/evaluation.py
+++ b/effectful/handlers/llm/evaluation.py
@@ -8,6 +8,7 @@
 from RestrictedPython import (
     Eval,
     Guards,
+    RestrictingNodeTransformer,
     compile_restricted,
     safe_globals,
 )
@@ -101,12 +102,54 @@ class RestrictedEvalProvider(ObjectInterpretation):
 
     RestrictedPython is not a complete sandbox, but it enforces a restricted
     language subset and expects you to provide a constrained exec environment.
+
+    policy : dict[str, Any], optional
+        RestrictedPython compile_restricted policy for compilation
+    allow_class_definitions : bool, default True
+        Enable class definitions by providing __metaclass__ and __name__.
+    allow_iteration : bool, default True
+        Enable for loops and comprehensions via _getiter_.
+    allow_sequence_unpacking : bool, default True
+        Enable sequence unpacking in comprehensions and for loops.
+    allow_getattr : bool, default True
+        Enable safer_getattr for attribute access.
+    allow_setattr : bool, default True
+        Enable guarded_setattr for attribute assignment.
+    allow_write : bool, default True
+        Enable _write_ guard (identity function allowing write operations).
+    module_name : str, default "restricted"
+        The __name__ to use for the restricted module namespace.
     """
 
-    config: dict[str, Any]
+    policy: type[RestrictingNodeTransformer] | None = None
+    allow_class_definitions: bool
+    allow_iteration: bool
+    allow_sequence_unpacking: bool
+    allow_getattr: bool
+    allow_setattr: bool
+    allow_write: bool
+    module_name: str
 
-    def __init__(self, **kwargs):
-        self.config = kwargs
+    def __init__(
+        self,
+        *,
+        policy: type[RestrictingNodeTransformer] | None = None,
+        allow_class_definitions: bool = True,
+        allow_iteration: bool = True,
+        allow_sequence_unpacking: bool = True,
+        allow_getattr: bool = True,
+        allow_setattr: bool = True,
+        allow_write: bool = True,
+        module_name: str = "restricted",
+    ):
+        self.policy = policy
+        self.allow_class_definitions = allow_class_definitions
+        self.allow_iteration = allow_iteration
+        self.allow_sequence_unpacking = allow_sequence_unpacking
+        self.allow_getattr = allow_getattr
+        self.allow_setattr = allow_setattr
+        self.allow_write = allow_write
+        self.module_name = module_name
 
     @implements(parse)
     def parse(self, source: str, filename: str) -> ast.Module:
@@ -122,7 +165,12 @@ def parse(self, source: str, filename: str) -> ast.Module:
     @implements(compile)
     def compile(self, module: ast.Module, filename: str) -> CodeType:
         # RestrictedPython can compile from an AST directly.
-        return compile_restricted(module, filename=filename, mode="exec", **self.config)
+        return compile_restricted(
+            module,
+            filename=filename,
+            mode="exec",
+            policy=self.policy or RestrictingNodeTransformer,
+        )
 
     @implements(exec)
     def exec(
@@ -133,21 +181,30 @@ def exec(
         # Build restricted globals from RestrictedPython's defaults
         rglobals: dict[str, Any] = safe_globals.copy()
 
-        # Enable class definitions (required for Python 3)
-        rglobals["__metaclass__"] = type
-        rglobals["__name__"] = "restricted"
+        if self.allow_class_definitions:
+            # Enable class definitions (required for Python 3)
+            rglobals["__metaclass__"] = type
+            rglobals["__name__"] = self.module_name
 
         # Layer `env` on top (without letting callers replace the restricted builtins).
         rglobals.update({k: v for k, v in env.items() if k != "__builtins__"})
 
-        # Enable for loops and comprehensions
-        rglobals["_getiter_"] = Eval.default_guarded_getiter
+        if self.allow_iteration:
+            # Enable for loops and comprehensions
+            rglobals["_getiter_"] = Eval.default_guarded_getiter
+
+        if self.allow_sequence_unpacking:
+            # Enable sequence unpacking in comprehensions and for loops
+            rglobals["_iter_unpack_sequence_"] = Guards.guarded_iter_unpack_sequence
+
+        if self.allow_getattr:
+            rglobals["getattr"] = Guards.safer_getattr
+
+        if self.allow_setattr:
+            rglobals["setattr"] = Guards.guarded_setattr
 
-        # Enable sequence unpacking in comprehensions and for loops
-        rglobals["_iter_unpack_sequence_"] = Guards.guarded_iter_unpack_sequence
-        rglobals["getattr"] = Guards.safer_getattr
-        rglobals["setattr"] = Guards.guarded_setattr
-        rglobals["_write_"] = lambda x: x
+        if self.allow_write:
+            rglobals["_write_"] = lambda x: x
 
         # Execute with locals=env so top-level defs land in `env`.
         builtins.exec(bytecode, rglobals, env)

From 6aa4d1b94a6862bf8390ba61096b13e4d2100c63 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 21:09:45 -0500
Subject: [PATCH 25/27] reverted flags for customizing rglobals, using same
 rglobals for local and globals for exec, fixing bug

---
 effectful/handlers/llm/evaluation.py | 68 +++++-----------------------
 1 file changed, 11 insertions(+), 57 deletions(-)

diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py
index bd6e1107..e8657a09 100644
--- a/effectful/handlers/llm/evaluation.py
+++ b/effectful/handlers/llm/evaluation.py
@@ -105,51 +105,16 @@ class RestrictedEvalProvider(ObjectInterpretation):
 
     policy : dict[str, Any], optional
         RestrictedPython compile_restricted policy for compilation
-    allow_class_definitions : bool, default True
-        Enable class definitions by providing __metaclass__ and __name__.
-    allow_iteration : bool, default True
-        Enable for loops and comprehensions via _getiter_.
-    allow_sequence_unpacking : bool, default True
-        Enable sequence unpacking in comprehensions and for loops.
-    allow_getattr : bool, default True
-        Enable safer_getattr for attribute access.
-    allow_setattr : bool, default True
-        Enable guarded_setattr for attribute assignment.
-    allow_write : bool, default True
-        Enable _write_ guard (identity function allowing write operations).
-    module_name : str, default "restricted"
-        The __name__ to use for the restricted module namespace.
     """
 
     policy: type[RestrictingNodeTransformer] | None = None
-    allow_class_definitions: bool
-    allow_iteration: bool
-    allow_sequence_unpacking: bool
-    allow_getattr: bool
-    allow_setattr: bool
-    allow_write: bool
-    module_name: str
 
     def __init__(
         self,
         *,
         policy: type[RestrictingNodeTransformer] | None = None,
-        allow_class_definitions: bool = True,
-        allow_iteration: bool = True,
-        allow_sequence_unpacking: bool = True,
-        allow_getattr: bool = True,
-        allow_setattr: bool = True,
-        allow_write: bool = True,
-        module_name: str = "restricted",
     ):
         self.policy = policy
-        self.allow_class_definitions = allow_class_definitions
-        self.allow_iteration = allow_iteration
-        self.allow_sequence_unpacking = allow_sequence_unpacking
-        self.allow_getattr = allow_getattr
-        self.allow_setattr = allow_setattr
-        self.allow_write = allow_write
-        self.module_name = module_name
 
     @implements(parse)
     def parse(self, source: str, filename: str) -> ast.Module:
@@ -181,30 +146,19 @@ def exec(
         # Build restricted globals from RestrictedPython's defaults
         rglobals: dict[str, Any] = safe_globals.copy()
 
-        if self.allow_class_definitions:
-            # Enable class definitions (required for Python 3)
-            rglobals["__metaclass__"] = type
-            rglobals["__name__"] = self.module_name
-
         # Layer `env` on top (without letting callers replace the restricted builtins).
         rglobals.update({k: v for k, v in env.items() if k != "__builtins__"})
 
-        if self.allow_iteration:
-            # Enable for loops and comprehensions
-            rglobals["_getiter_"] = Eval.default_guarded_getiter
-
-        if self.allow_sequence_unpacking:
-            # Enable sequence unpacking in comprehensions and for loops
-            rglobals["_iter_unpack_sequence_"] = Guards.guarded_iter_unpack_sequence
-
-        if self.allow_getattr:
-            rglobals["getattr"] = Guards.safer_getattr
-
-        if self.allow_setattr:
-            rglobals["setattr"] = Guards.guarded_setattr
+        # Enable class definitions (required for Python 3)
+        rglobals["__metaclass__"] = type
+        rglobals["__name__"] = "restricted"
+        # Enable for loops and comprehensions
+        rglobals["_getiter_"] = Eval.default_guarded_getiter
+        # Enable sequence unpacking in comprehensions and for loops
+        rglobals["_iter_unpack_sequence_"] = Guards.guarded_iter_unpack_sequence
 
-        if self.allow_write:
-            rglobals["_write_"] = lambda x: x
+        rglobals["getattr"] = Guards.safer_getattr
+        rglobals["setattr"] = Guards.guarded_setattr
+        rglobals["_write_"] = lambda x: x
 
-        # Execute with locals=env so top-level defs land in `env`.
-        builtins.exec(bytecode, rglobals, env)
+        builtins.exec(bytecode, rglobals, rglobals)

From 4f0c96389299525c2f00f320217a489ea5bb5465 Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 21:16:11 -0500
Subject: [PATCH 26/27] fixed failing tests (env was not being mutated)

---
 effectful/handlers/llm/evaluation.py | 15 +++++--
 tests/test_handlers_llm_encoding.py  | 63 +++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/effectful/handlers/llm/evaluation.py b/effectful/handlers/llm/evaluation.py
index e8657a09..a673eb29 100644
--- a/effectful/handlers/llm/evaluation.py
+++ b/effectful/handlers/llm/evaluation.py
@@ -146,12 +146,13 @@ def exec(
         # Build restricted globals from RestrictedPython's defaults
         rglobals: dict[str, Any] = safe_globals.copy()
 
-        # Layer `env` on top (without letting callers replace the restricted builtins).
-        rglobals.update({k: v for k, v in env.items() if k != "__builtins__"})
-
         # Enable class definitions (required for Python 3)
         rglobals["__metaclass__"] = type
         rglobals["__name__"] = "restricted"
+
+        # Layer `env` on top (without letting callers replace the restricted builtins).
+        rglobals.update({k: v for k, v in env.items() if k != "__builtins__"})
+
         # Enable for loops and comprehensions
         rglobals["_getiter_"] = Eval.default_guarded_getiter
         # Enable sequence unpacking in comprehensions and for loops
@@ -161,4 +162,12 @@ def exec(
         rglobals["setattr"] = Guards.guarded_setattr
         rglobals["_write_"] = lambda x: x
 
+        # Track keys before execution to identify new definitions
+        keys_before = set(rglobals.keys())
+
         builtins.exec(bytecode, rglobals, rglobals)
+
+        # Copy newly defined items back to env so caller can access them
+        for key in rglobals:
+            if key not in keys_before:
+                env[key] = rglobals[key]
diff --git a/tests/test_handlers_llm_encoding.py b/tests/test_handlers_llm_encoding.py
index fe4c0ec3..472db961 100644
--- a/tests/test_handlers_llm_encoding.py
+++ b/tests/test_handlers_llm_encoding.py
@@ -1,3 +1,4 @@
+import builtins
 from collections.abc import Callable
 from dataclasses import asdict, dataclass
 from typing import Any, NamedTuple, TypedDict
@@ -5,6 +6,7 @@
 import pydantic
 import pytest
 from PIL import Image
+from RestrictedPython import RestrictingNodeTransformer
 
 from effectful.handlers.llm.encoding import Encodable, SynthesizedFunction
 from effectful.handlers.llm.evaluation import RestrictedEvalProvider, UnsafeEvalProvider
@@ -1144,7 +1146,6 @@ def test_restricted_blocks_private_attribute_access(self):
 
     def test_restricted_with_custom_policy(self):
         """Can pass custom policy via kwargs."""
-        from RestrictedPython import RestrictingNodeTransformer
 
         # Create a custom policy that's the same as default (just to test the plumbing)
         class CustomPolicy(RestrictingNodeTransformer):
@@ -1158,3 +1159,63 @@ class CustomPolicy(RestrictingNodeTransformer):
         with handler(RestrictedEvalProvider(policy=CustomPolicy)):
             fn = encodable.decode(source)
         assert fn(2, 3) == 5
+
+    def test_builtins_in_env_does_not_bypass_security(self):
+        """Including __builtins__ in env should not bypass RestrictedEvalProvider security.
+
+        RestrictedEvalProvider explicitly filters out __builtins__ from the env
+        to prevent callers from replacing the restricted builtins with full Python builtins.
+        This test verifies that even if __builtins__ is passed in the context,
+        dangerous operations remain blocked.
+        """
+
+        # Attempt to pass full builtins in the context, which should be filtered out
+        dangerous_ctx = {"__builtins__": builtins.__dict__}
+
+        # Test 1: open() should not be usable even with __builtins__ in context
+        # The function may fail at compile/exec time or at call time, but either way
+        # it should not be able to actually open files
+        encodable_open = Encodable.define(Callable[[str], str], dangerous_ctx)
+        source_open = SynthesizedFunction(
+            module_code="""def read_file(path: str) -> str:
+    return open(path).read()"""
+        )
+        with pytest.raises(Exception):  # Could be NameError, ValueError, or other
+            with handler(RestrictedEvalProvider()):
+                fn = encodable_open.decode(source_open)
+                # If decode succeeded (shouldn't), calling should still fail
+                fn("/etc/passwd")
+
+        # Test 2: __import__ should not be usable
+        encodable_import = Encodable.define(Callable[[], str], dangerous_ctx)
+        source_import = SynthesizedFunction(
+            module_code="""def get_os_name() -> str:
+    os = __import__('os')
+    return os.name"""
+        )
+        with pytest.raises(Exception):
+            with handler(RestrictedEvalProvider()):
+                fn = encodable_import.decode(source_import)
+                fn()
+
+        # Test 3: Verify safe code still works with dangerous context
+        # This confirms we're not just breaking everything
+        encodable_safe = Encodable.define(Callable[[int, int], int], dangerous_ctx)
+        source_safe = SynthesizedFunction(
+            module_code="""def add(a: int, b: int) -> int:
+    return a + b"""
+        )
+        with handler(RestrictedEvalProvider()):
+            fn = encodable_safe.decode(source_safe)
+            assert fn(2, 3) == 5, "Safe code should still work"
+
+        # Test 4: Private attribute access should still be blocked
+        encodable_private = Encodable.define(Callable[[str], str], dangerous_ctx)
+        source_private = SynthesizedFunction(
+            module_code="""def get_class(s: str) -> str:
+    return s.__class__.__name__"""
+        )
+        with pytest.raises(Exception):
+            with handler(RestrictedEvalProvider()):
+                fn = encodable_private.decode(source_private)
+                fn("test")

From 1998d381ce6f4923edd8a79ffb2f8a2172c0173a Mon Sep 17 00:00:00 2001
From: Kiran Gopinathan <kiran@basis.ai>
Date: Fri, 30 Jan 2026 21:34:06 -0500
Subject: [PATCH 27/27] updated restricted python to be a llm dependency

---
 pyproject.toml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index da8b5295..c75c21d9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,9 +28,7 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
 ]
-dependencies = [
-    "restrictedpython>=8.1",
-]
+dependencies = []
 
 [project.urls]
 Homepage = "https://www.basis.ai/"
@@ -46,6 +44,7 @@ llm = [
   "litellm",
   "pillow",
   "pydantic",
+  "restrictedpython>=8.1"
 ]
 prettyprinter = ["prettyprinter"]
 docs = [