From 1b8d82571527b5701f9c75e6bf9bb719ac7e143b Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 18:33:44 -0500
Subject: [PATCH 01/17] Use tool to compute final answer

---
 effectful/handlers/llm/__init__.py    |   4 +-
 effectful/handlers/llm/completions.py |  53 ++++-
 effectful/handlers/llm/template.py    |  58 +++++-
 tests/test_handlers_llm_template.py   | 271 +++++++++++++++++++++++++-
 4 files changed, 379 insertions(+), 7 deletions(-)

diff --git a/effectful/handlers/llm/__init__.py b/effectful/handlers/llm/__init__.py
index cdda93479..3398c3160 100644
--- a/effectful/handlers/llm/__init__.py
+++ b/effectful/handlers/llm/__init__.py
@@ -1,3 +1,3 @@
-from .template import Agent, Template, Tool
+from .template import Agent, IsFinalAnswer, Template, Tool
 
-__all__ = ["Agent", "Template", "Tool"]
+__all__ = ["Agent", "IsFinalAnswer", "Template", "Tool"]
diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 18abddcb0..470ecbfad 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -147,6 +147,23 @@ def to_feedback_message(self, include_traceback: bool) -> Message:
         )
 
 
+class DirectReturn[T](BaseException):
+    """Raised internally to short-circuit the completion loop when a tool
+    annotated with :class:`~effectful.handlers.llm.template.IsFinalAnswer`
+    produces a result.
+
+    Extends :class:`BaseException` so it is not caught by handlers that
+    catch :class:`Exception` (e.g. ``call_tool``'s wrapping in
+    :class:`ToolCallExecutionError`, or :class:`RetryLLMHandler`).
+    """
+
+    value: T
+
+    def __init__(self, value: T):
+        self.value = value
+        super().__init__(value)
+
+
 class DecodedToolCall[T](typing.NamedTuple):
     tool: Tool[..., T]
     bound_args: inspect.BoundArguments
@@ -317,7 +334,13 @@ def call_tool(tool_call: DecodedToolCall) -> Message:
     string representing an LLM tool call request parameters. The output is
     the serialised response to the model.
 
+    If the tool is annotated with
+    :class:`~effectful.handlers.llm.template.IsFinalAnswer`, a
+    :class:`DirectReturn` exception is raised carrying the raw Python
+    result, which short-circuits the completion loop.
     """
+    from effectful.handlers.llm.template import _is_final_answer_tool
+
     # call tool with python types
     try:
         result = tool_call.tool(
@@ -335,6 +358,10 @@ def call_tool(tool_call: DecodedToolCall) -> Message:
         dict(role="tool", content=encoded_result, tool_call_id=tool_call.id),
     )
     append_message(message)
+
+    if _is_final_answer_tool(tool_call.tool):
+        raise DirectReturn(result)
+
     return message
 
 
@@ -555,8 +582,30 @@ def _call[**P, T](
                 message, tool_calls, result = call_assistant(
                     template.tools, response_model, **self.config
                 )
-                for tool_call in tool_calls:
-                    message = call_tool(tool_call)
+                for i, tool_call in enumerate(tool_calls):
+                    try:
+                        message = call_tool(tool_call)
+                    except DirectReturn as dr:
+                        result = typing.cast(T, dr.value)
+                        # Placeholder messages for remaining unprocessed
+                        # tool calls to keep history valid for Agents.
+                        for remaining_tc in tool_calls[i + 1 :]:
+                            append_message(
+                                _make_message(
+                                    dict(
+                                        role="tool",
+                                        content=[
+                                            {
+                                                "type": "text",
+                                                "text": "[skipped]",
+                                            }
+                                        ],
+                                        tool_call_id=remaining_tc.id,
+                                    )
+                                )
+                            )
+                        tool_calls = []
+                        break
 
         try:
             _get_history()
diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py
index c84958983..992a02522 100644
--- a/effectful/handlers/llm/template.py
+++ b/effectful/handlers/llm/template.py
@@ -42,8 +42,9 @@ def factorial(n: int) -> Annotated[int, IsRecursive]:
 
     @classmethod
     def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature:
-        for name, ty in sig.parameters.items():
-            if not ty or not typing.get_origin(ty) is Annotated:
+        for name, param in sig.parameters.items():
+            ty = param.annotation
+            if ty is inspect.Parameter.empty or typing.get_origin(ty) is not Annotated:
                 continue
             if any(isinstance(arg, cls) for arg in typing.get_args(ty)):
                 raise TypeError(
@@ -62,6 +63,58 @@ def _is_recursive_signature(sig: inspect.Signature):
     return any(annotation is IsRecursive for annotation in annotations)
 
 
+class _IsFinalAnswerAnnotation(Annotation):
+    """
+    A special type annotation for return types in the signature of a
+    :class:`Tool` that indicates its result should be returned directly
+    as the final answer of the enclosing :class:`Template`, skipping
+    the final LLM API call.
+
+    .. warning::
+
+        :class:`IsFinalAnswer` annotations are only defined to ascribe
+        return annotations, and if used in a parameter will raise a
+        :class:`TypeError` at tool construction time.
+
+    **Example usage**::
+
+        >>> from typing import Annotated
+        >>> from effectful.handlers.llm import Tool
+        >>> from effectful.handlers.llm.template import IsFinalAnswer
+
+        >>> @Tool.define
+        ... def generate(prompt: str) -> Annotated[str, IsFinalAnswer]:
+        ...     \"""Generate content for the given prompt.\"""
+        ...     return "generated content"
+    """
+
+    @classmethod
+    def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature:
+        for name, param in sig.parameters.items():
+            ty = param.annotation
+            if ty is inspect.Parameter.empty or typing.get_origin(ty) is not Annotated:
+                continue
+            if any(isinstance(arg, cls) for arg in typing.get_args(ty)):
+                raise TypeError(
+                    f"Illegal annotation {ty} for parameter {name}, "
+                    "IsFinalAnswer must only be used to annotate return types."
+                )
+        return sig
+
+
+IsFinalAnswer = _IsFinalAnswerAnnotation()
+
+
+def _is_final_answer_tool(tool: Any) -> bool:
+    """Check if a tool's return type is annotated with IsFinalAnswer."""
+    ret = tool.__signature__.return_annotation
+    if typing.get_origin(ret) is not Annotated:
+        return False
+    return any(
+        isinstance(arg, _IsFinalAnswerAnnotation) for arg in typing.get_args(ret)
+    )
+
+
 class Tool[**P, T](Operation[P, T]):
     """A :class:`Tool` is a function that may be called by a :class:`Template`.
 
@@ -96,6 +149,7 @@ def __init__(
         if not default.__doc__:
             raise ValueError("Tools must have docstrings.")
         signature = IsRecursive.infer_annotations(signature)
+        signature = IsFinalAnswer.infer_annotations(signature)
         super().__init__(signature, name, default)
 
     @classmethod
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index bd63ea551..181b3abf1 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -4,17 +4,22 @@
 import dataclasses
 import inspect
 from dataclasses import dataclass
+from typing import Annotated
 
 import pytest
 from litellm import ModelResponse
 
-from effectful.handlers.llm import Agent, Template, Tool
+from effectful.handlers.llm import Agent, IsFinalAnswer, Template, Tool
 from effectful.handlers.llm.completions import (
+    DecodedToolCall,
+    DirectReturn,
     LiteLLMProvider,
     RetryLLMHandler,
+    call_tool,
     call_user,
     completion,
 )
+from effectful.handlers.llm.template import _is_final_answer_tool
 from effectful.ops.semantics import handler
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import NotHandled
@@ -1324,3 +1329,267 @@ def test_validate_format_spec_on_undefined_var():
         def bad(x: int) -> str:
             """Value: {x} and {missing:.2f}."""
             raise NotHandled
+
+
+# ---------------------------------------------------------------------------
+# IsFinalAnswer annotation tests
+# ---------------------------------------------------------------------------
+
+
+class TestIsFinalAnswerAnnotation:
+    """Tests for the IsFinalAnswer type annotation."""
+
+    def test_tool_with_is_final_answer_return_type(self):
+        """Tool with IsFinalAnswer on return type creates successfully."""
+
+        @Tool.define
+        def my_tool(x: int) -> Annotated[str, IsFinalAnswer]:
+            """A tool that returns a final answer."""
+            return str(x)
+
+        assert _is_final_answer_tool(my_tool)
+
+    def test_tool_without_is_final_answer(self):
+        """Normal tool is not detected as final answer."""
+
+        @Tool.define
+        def normal_tool(x: int) -> str:
+            """A normal tool."""
+            return str(x)
+
+        assert not _is_final_answer_tool(normal_tool)
+
+    def test_is_final_answer_on_parameter_raises(self):
+        """IsFinalAnswer on a parameter raises TypeError at define time."""
+        with pytest.raises(TypeError, match="IsFinalAnswer"):
+
+            @Tool.define
+            def bad_tool(x: Annotated[int, IsFinalAnswer]) -> str:
+                """A tool with bad annotation."""
+                return str(x)
+
+    def test_is_final_answer_combined_with_is_recursive(self):
+        """IsFinalAnswer and IsRecursive can coexist on a return type."""
+        from effectful.handlers.llm.template import IsRecursive
+
+        @Tool.define
+        def combo_tool(x: int) -> Annotated[str, IsFinalAnswer, IsRecursive]:
+            """A tool with both annotations."""
+            return str(x)
+
+        assert _is_final_answer_tool(combo_tool)
+
+
+class TestIsFinalAnswerCallTool:
+    """Tests for call_tool behavior with IsFinalAnswer tools."""
+
+    def test_call_tool_raises_direct_return_for_final_answer_tool(self):
+        """call_tool raises DirectReturn when tool has IsFinalAnswer."""
+
+        @Tool.define
+        def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+            """Returns a final answer."""
+            return x * 2
+
+        sig = inspect.signature(final_tool)
+        bound_args = sig.bind(x=5)
+        tc = DecodedToolCall(final_tool, bound_args, "call_final")
+
+        with pytest.raises(DirectReturn) as exc_info:
+            call_tool(tc)
+
+        assert exc_info.value.value == 10
+
+    def test_call_tool_normal_for_non_final_answer_tool(self):
+        """call_tool returns a Message normally for non-IsFinalAnswer tools."""
+
+        @Tool.define
+        def normal_tool(x: int) -> int:
+            """A normal tool."""
+            return x + 1
+
+        sig = inspect.signature(normal_tool)
+        bound_args = sig.bind(x=3)
+        tc = DecodedToolCall(normal_tool, bound_args, "call_normal")
+
+        result = call_tool(tc)
+        assert result["role"] == "tool"
+        assert result["tool_call_id"] == "call_normal"
+
+    def test_call_tool_final_answer_with_retry_handler(self):
+        """DirectReturn propagates through RetryLLMHandler._call_tool."""
+
+        @Tool.define
+        def final_tool(x: int) -> Annotated[str, IsFinalAnswer]:
+            """Returns a final answer."""
+            return f"answer: {x}"
+
+        sig = inspect.signature(final_tool)
+        bound_args = sig.bind(x=42)
+        tc = DecodedToolCall(final_tool, bound_args, "call_retry_final")
+
+        with pytest.raises(DirectReturn) as exc_info:
+            with handler(RetryLLMHandler()):
+                call_tool(tc)
+
+        assert exc_info.value.value == "answer: 42"
+
+
+class TestIsFinalAnswerCompletionLoop:
+    """Tests for IsFinalAnswer through the full completion loop."""
+
+    def test_final_answer_tool_skips_final_llm_call(self):
+        """When LLM calls a final-answer tool, result is returned
+        directly without a second call_assistant invocation."""
+
+        @Tool.define
+        def compute(x: int) -> Annotated[int, IsFinalAnswer]:
+            """Compute and return the result directly."""
+            return x * 10
+
+        @Template.define
+        def task(n: int) -> int:
+            """Call compute with {n}."""
+            raise NotHandled
+
+        mock = MockCompletionHandler(
+            [make_tool_call_response("compute", '{"x": 7}')]
+        )
+
+        with handler(LiteLLMProvider()), handler(mock):
+            result = task(7)
+
+        assert result == 70
+        # Only 1 call_assistant, not 2 (no final LLM round-trip)
+        assert mock.call_count == 1
+
+    def test_final_answer_returns_raw_python_object(self):
+        """The returned value is the raw Python object, not serialized text."""
+
+        @dataclass
+        class MyResult:
+            value: int
+            label: str
+
+        @Tool.define
+        def make_result() -> Annotated[MyResult, IsFinalAnswer]:
+            """Create a structured result."""
+            return MyResult(value=42, label="answer")
+
+        @Template.define
+        def task() -> MyResult:
+            """Call make_result."""
+            raise NotHandled
+
+        mock = MockCompletionHandler(
+            [make_tool_call_response("make_result", "{}")]
+        )
+
+        with handler(LiteLLMProvider()), handler(mock):
+            result = task()
+
+        assert isinstance(result, MyResult)
+        assert result.value == 42
+        assert result.label == "answer"
+
+    def test_agent_history_valid_after_final_answer(self):
+        """Agent history has no orphaned tool_calls after IsFinalAnswer."""
+
+        @Tool.define
+        def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+            """Return final answer."""
+            return x
+
+        @dataclasses.dataclass
+        class MyAgent(Agent):
+            @Template.define
+            def do_work(self, n: int) -> int:
+                """Process {n}."""
+                raise NotHandled
+
+        mock = MockCompletionHandler(
+            [make_tool_call_response("final_tool", '{"x": 5}')]
+        )
+        agent = MyAgent()
+
+        with handler(LiteLLMProvider()), handler(mock):
+            result = agent.do_work(5)
+
+        assert result == 5
+
+        # Verify no orphaned tool_calls in history
+        for msg in agent.__history__.values():
+            tool_calls = msg.get("tool_calls")
+            if tool_calls:
+                for tc in tool_calls:
+                    tc_id = tc["id"] if isinstance(tc, dict) else tc.id
+                    has_response = any(
+                        m.get("tool_call_id") == tc_id
+                        for m in agent.__history__.values()
+                        if m.get("role") == "tool"
+                    )
+                    assert has_response, (
+                        f"Orphaned tool_call {tc_id} in history"
+                    )
+
+    def test_agent_subsequent_call_after_final_answer(self):
+        """A follow-up call on the same Agent works after IsFinalAnswer."""
+
+        @Tool.define
+        def final_tool() -> Annotated[str, IsFinalAnswer]:
+            """Return final answer."""
+            return "direct result"
+
+        @dataclasses.dataclass
+        class MyAgent(Agent):
+            @Template.define
+            def step(self, msg: str) -> str:
+                """Do: {msg}"""
+                raise NotHandled
+
+        call_count = 0
+
+        class PhaseHandler(ObjectInterpretation):
+            @implements(completion)
+            def _completion(self, model, messages=None, **kwargs):
+                nonlocal call_count
+                call_count += 1
+                if call_count == 1:
+                    return make_tool_call_response("final_tool", "{}")
+                return make_text_response('{"value": "llm result"}')
+
+        agent = MyAgent()
+
+        with handler(LiteLLMProvider()), handler(PhaseHandler()):
+            r1 = agent.step("first")
+            r2 = agent.step("second")
+
+        assert r1 == "direct result"
+        assert r2 == "llm result"
+
+    def test_final_answer_with_retry_handler_active(self):
+        """IsFinalAnswer works correctly with RetryLLMHandler."""
+
+        @Tool.define
+        def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+            """Return final answer."""
+            return x * 3
+
+        @Template.define
+        def task(n: int) -> int:
+            """Call final_tool with {n}."""
+            raise NotHandled
+
+        mock = MockCompletionHandler(
+            [make_tool_call_response("final_tool", '{"x": 4}')]
+        )
+
+        with (
+            handler(LiteLLMProvider()),
+            handler(RetryLLMHandler()),
+            handler(mock),
+        ):
+            result = task(4)
+
+        assert result == 12
+        assert mock.call_count == 1

From 6495fc20affcb004c82cf43f22ed7397f8ca1b7c Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 19:31:22 -0500
Subject: [PATCH 02/17] stash

---
 effectful/handlers/llm/completions.py | 97 ++++++++++-----------------
 tests/test_handlers_llm_provider.py   | 14 ++--
 tests/test_handlers_llm_template.py   | 49 ++++++--------
 3 files changed, 65 insertions(+), 95 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 470ecbfad..236d65e49 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -147,27 +147,12 @@ def to_feedback_message(self, include_traceback: bool) -> Message:
         )
 
 
-class DirectReturn[T](BaseException):
-    """Raised internally to short-circuit the completion loop when a tool
-    annotated with :class:`~effectful.handlers.llm.template.IsFinalAnswer`
-    produces a result.
-
-    Extends :class:`BaseException` so it is not caught by handlers that
-    catch :class:`Exception` (e.g. ``call_tool``'s wrapping in
-    :class:`ToolCallExecutionError`, or :class:`RetryLLMHandler`).
-    """
-
-    value: T
-
-    def __init__(self, value: T):
-        self.value = value
-        super().__init__(value)
-
-
-class DecodedToolCall[T](typing.NamedTuple):
+@dataclasses.dataclass
+class DecodedToolCall[T]:
     tool: Tool[..., T]
     bound_args: inspect.BoundArguments
     id: ToolCallID
+    is_final: bool = False
 
 
 type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None]
@@ -248,7 +233,11 @@ def decode_tool_call(
             tool_name, tool_call.id, e, raw_message=raw_message
         ) from e
 
-    return DecodedToolCall(tool, bound_sig, tool_call.id)
+    from effectful.handlers.llm.template import _is_final_answer_tool
+
+    return DecodedToolCall(
+        tool, bound_sig, tool_call.id, is_final=_is_final_answer_tool(tool)
+    )
 
 
 @Operation.define
@@ -312,6 +301,18 @@ def call_assistant[T, U](
         decoded_tool_call = decode_tool_call(validated_tool_call, tools, raw_message)
         tool_calls.append(decoded_tool_call)
 
+    if any(tc.is_final for tc in tool_calls) and len(tool_calls) > 1:
+        final_name = next(tc.tool.__name__ for tc in tool_calls if tc.is_final)
+        raise ToolCallDecodingError(
+            final_name,
+            next(tc.id for tc in tool_calls if tc.is_final),
+            ValueError(
+                f"IsFinalAnswer tool '{final_name}' must be the only tool call "
+                f"in a round, but {len(tool_calls)} tool calls were generated."
+            ),
+            raw_message=raw_message,
+        )
+
     result = None
     if not tool_calls:
         # return response
@@ -329,18 +330,13 @@ def call_assistant[T, U](
 
 
 @Operation.define
-def call_tool(tool_call: DecodedToolCall) -> Message:
-    """Implements a roundtrip call to a python function. Input is a json
-    string representing an LLM tool call request parameters. The output is
-    the serialised response to the model.
-
-    If the tool is annotated with
-    :class:`~effectful.handlers.llm.template.IsFinalAnswer`, a
-    :class:`DirectReturn` exception is raised carrying the raw Python
-    result, which short-circuits the completion loop.
-    """
-    from effectful.handlers.llm.template import _is_final_answer_tool
+def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T]:
+    """Execute a tool and return the serialised message and the raw Python result.
 
+    The message is appended to the conversation history.  The raw result is
+    returned alongside so that callers (e.g. the completion loop) can use it
+    directly when the tool is marked ``is_final``.
+    """
     # call tool with python types
     try:
         result = tool_call.tool(
@@ -358,11 +354,7 @@ def call_tool(tool_call: DecodedToolCall) -> Message:
         dict(role="tool", content=encoded_result, tool_call_id=tool_call.id),
     )
     append_message(message)
-
-    if _is_final_answer_tool(tool_call.tool):
-        raise DirectReturn(result)
-
-    return message
+    return message, result
 
 
 @Operation.define
@@ -524,7 +516,7 @@ def _attempt() -> MessageResult[T]:
         return (message, tool_calls, result)
 
     @implements(call_tool)
-    def _call_tool(self, tool_call: DecodedToolCall) -> Message:
+    def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None]:
         """Handle tool execution with runtime error capture.
 
         Runtime errors from tool execution are captured and returned as
@@ -537,7 +529,7 @@ def _call_tool(self, tool_call: DecodedToolCall) -> Message:
             if isinstance(e.original_error, self.catch_tool_errors):
                 message = e.to_feedback_message(self.include_traceback)
                 append_message(message)
-                return message
+                return message, None
             else:
                 raise
 
@@ -578,34 +570,19 @@ def _call[**P, T](
             # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls
             tool_calls: list[DecodedToolCall] = []
             result: T | None = None
+            is_final = False
             while message["role"] != "assistant" or tool_calls:
                 message, tool_calls, result = call_assistant(
                     template.tools, response_model, **self.config
                 )
-                for i, tool_call in enumerate(tool_calls):
-                    try:
-                        message = call_tool(tool_call)
-                    except DirectReturn as dr:
-                        result = typing.cast(T, dr.value)
-                        # Placeholder messages for remaining unprocessed
-                        # tool calls to keep history valid for Agents.
-                        for remaining_tc in tool_calls[i + 1 :]:
-                            append_message(
-                                _make_message(
-                                    dict(
-                                        role="tool",
-                                        content=[
-                                            {
-                                                "type": "text",
-                                                "text": "[skipped]",
-                                            }
-                                        ],
-                                        tool_call_id=remaining_tc.id,
-                                    )
-                                )
-                            )
-                        tool_calls = []
+                for tool_call in tool_calls:
+                    message, raw_result = call_tool(tool_call)
+                    if tool_call.is_final:
+                        result = typing.cast(T, raw_result)
+                        is_final = True
                         break
+                if is_final:
+                    break
 
         try:
             _get_history()
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index db0df23c9..b79ba8e02 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -970,7 +970,7 @@ def test_retry_handler_catches_tool_runtime_error(self):
         tool_call = DecodedToolCall(failing_tool, bound_args, "call_1")
 
         with handler(RetryLLMHandler()):
-            result = call_tool(tool_call)
+            result, _ = call_tool(tool_call)
 
         # The result should be an error message, not an exception
         assert result["role"] == "tool"
@@ -987,7 +987,7 @@ def test_retry_handler_catches_division_by_zero(self):
         tool_call = DecodedToolCall(divide_tool, bound_args, "call_div")
 
         with handler(RetryLLMHandler()):
-            result = call_tool(tool_call)
+            result, _ = call_tool(tool_call)
 
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_div"
@@ -1002,7 +1002,7 @@ def test_successful_tool_execution_returns_result(self):
         tool_call = DecodedToolCall(add_numbers, bound_args, "call_add")
 
         with handler(RetryLLMHandler()):
-            result = call_tool(tool_call)
+            result, _ = call_tool(tool_call)
 
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_add"
@@ -1553,7 +1553,7 @@ def test_call_tool_success_does_not_raise(self):
         bound_args = sig.bind(a=3, b=4)
         tc = DecodedToolCall(add_numbers, bound_args, "call_ok")
 
-        result = call_tool(tc)
+        result, _ = call_tool(tc)
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_ok"
 
@@ -1568,7 +1568,7 @@ def test_matching_error_returns_feedback_message(self):
         tc = DecodedToolCall(flaky_tool, bound_args, "call_match")
 
         with handler(RetryLLMHandler(catch_tool_errors=ConnectionError)):
-            result = call_tool(tc)
+            result, _ = call_tool(tc)
 
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_match"
@@ -1595,7 +1595,7 @@ def test_default_catch_all_catches_everything(self):
         tc = DecodedToolCall(type_error_tool, bound_args, "call_default")
 
         with handler(RetryLLMHandler()):
-            result = call_tool(tc)
+            result, _ = call_tool(tc)
 
         assert result["role"] == "tool"
         assert "Tool execution failed" in result["content"]
@@ -1611,7 +1611,7 @@ def test_tuple_of_error_types(self):
                 catch_tool_errors=(ConnectionError, ValueError),
             )
         ):
-            result = call_tool(tc)
+            result, _ = call_tool(tc)
 
         assert result["role"] == "tool"
         assert "Tool execution failed" in result["content"]
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index 181b3abf1..283f32029 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -12,7 +12,6 @@
 from effectful.handlers.llm import Agent, IsFinalAnswer, Template, Tool
 from effectful.handlers.llm.completions import (
     DecodedToolCall,
-    DirectReturn,
     LiteLLMProvider,
     RetryLLMHandler,
     call_tool,
@@ -1383,8 +1382,8 @@ def combo_tool(x: int) -> Annotated[str, IsFinalAnswer, IsRecursive]:
 class TestIsFinalAnswerCallTool:
     """Tests for call_tool behavior with IsFinalAnswer tools."""
 
-    def test_call_tool_raises_direct_return_for_final_answer_tool(self):
-        """call_tool raises DirectReturn when tool has IsFinalAnswer."""
+    def test_call_tool_returns_raw_result_for_final_answer_tool(self):
+        """call_tool returns the raw Python result alongside the message."""
 
         @Tool.define
         def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
@@ -1393,15 +1392,14 @@ def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
 
         sig = inspect.signature(final_tool)
         bound_args = sig.bind(x=5)
-        tc = DecodedToolCall(final_tool, bound_args, "call_final")
+        tc = DecodedToolCall(final_tool, bound_args, "call_final", is_final=True)
 
-        with pytest.raises(DirectReturn) as exc_info:
-            call_tool(tc)
+        message, raw_result = call_tool(tc)
+        assert message["role"] == "tool"
+        assert raw_result == 10
 
-        assert exc_info.value.value == 10
-
-    def test_call_tool_normal_for_non_final_answer_tool(self):
-        """call_tool returns a Message normally for non-IsFinalAnswer tools."""
+    def test_call_tool_returns_raw_result_for_normal_tool(self):
+        """call_tool returns the raw Python result for all tools."""
 
         @Tool.define
         def normal_tool(x: int) -> int:
@@ -1412,12 +1410,13 @@ def normal_tool(x: int) -> int:
         bound_args = sig.bind(x=3)
         tc = DecodedToolCall(normal_tool, bound_args, "call_normal")
 
-        result = call_tool(tc)
-        assert result["role"] == "tool"
-        assert result["tool_call_id"] == "call_normal"
+        message, raw_result = call_tool(tc)
+        assert message["role"] == "tool"
+        assert message["tool_call_id"] == "call_normal"
+        assert raw_result == 4
 
     def test_call_tool_final_answer_with_retry_handler(self):
-        """DirectReturn propagates through RetryLLMHandler._call_tool."""
+        """call_tool works with RetryLLMHandler for IsFinalAnswer tools."""
 
         @Tool.define
         def final_tool(x: int) -> Annotated[str, IsFinalAnswer]:
@@ -1426,13 +1425,13 @@ def final_tool(x: int) -> Annotated[str, IsFinalAnswer]:
 
         sig = inspect.signature(final_tool)
         bound_args = sig.bind(x=42)
-        tc = DecodedToolCall(final_tool, bound_args, "call_retry_final")
+        tc = DecodedToolCall(final_tool, bound_args, "call_retry_final", is_final=True)
 
-        with pytest.raises(DirectReturn) as exc_info:
-            with handler(RetryLLMHandler()):
-                call_tool(tc)
+        with handler(RetryLLMHandler()):
+            message, raw_result = call_tool(tc)
 
-        assert exc_info.value.value == "answer: 42"
+        assert message["role"] == "tool"
+        assert raw_result == "answer: 42"
 
 
 class TestIsFinalAnswerCompletionLoop:
@@ -1452,9 +1451,7 @@ def task(n: int) -> int:
             """Call compute with {n}."""
             raise NotHandled
 
-        mock = MockCompletionHandler(
-            [make_tool_call_response("compute", '{"x": 7}')]
-        )
+        mock = MockCompletionHandler([make_tool_call_response("compute", '{"x": 7}')])
 
         with handler(LiteLLMProvider()), handler(mock):
             result = task(7)
@@ -1481,9 +1478,7 @@ def task() -> MyResult:
             """Call make_result."""
             raise NotHandled
 
-        mock = MockCompletionHandler(
-            [make_tool_call_response("make_result", "{}")]
-        )
+        mock = MockCompletionHandler([make_tool_call_response("make_result", "{}")])
 
         with handler(LiteLLMProvider()), handler(mock):
             result = task()
@@ -1528,9 +1523,7 @@ def do_work(self, n: int) -> int:
                         for m in agent.__history__.values()
                         if m.get("role") == "tool"
                     )
-                    assert has_response, (
-                        f"Orphaned tool_call {tc_id} in history"
-                    )
+                    assert has_response, f"Orphaned tool_call {tc_id} in history"
 
     def test_agent_subsequent_call_after_final_answer(self):
         """A follow-up call on the same Agent works after IsFinalAnswer."""

From 0005c1ce6f075d25e1e8f71a1bffd037f712be07 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 20:15:10 -0500
Subject: [PATCH 03/17] stash

---
 effectful/handlers/llm/completions.py | 40 +++++++++-------
 tests/test_handlers_llm_provider.py   | 30 ++++++------
 tests/test_handlers_llm_template.py   | 68 +++++++++++++++++++++++++--
 3 files changed, 102 insertions(+), 36 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 236d65e49..1ac0e9e20 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -155,7 +155,7 @@ class DecodedToolCall[T]:
     is_final: bool = False
 
 
-type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None]
+type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None, bool]
 
 
 @functools.cache
@@ -326,16 +326,19 @@ def call_assistant[T, U](
         except (pydantic.ValidationError, TypeError, ValueError, SyntaxError) as e:
             raise ResultDecodingError(e, raw_message=raw_message) from e
 
-    return (raw_message, tool_calls, result)
+    is_final = any(tc.is_final for tc in tool_calls)
+    return (raw_message, tool_calls, result, is_final)
 
 
 @Operation.define
-def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T]:
-    """Execute a tool and return the serialised message and the raw Python result.
-
-    The message is appended to the conversation history.  The raw result is
-    returned alongside so that callers (e.g. the completion loop) can use it
-    directly when the tool is marked ``is_final``.
+def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool]:
+    """Execute a tool and return the serialised message, the raw result, and
+    whether this result is a final answer.
+
+    Returns:
+        A 3-tuple ``(message, result, is_final)``.  ``message`` is appended
+        to the conversation history.  When ``is_final`` is ``True`` the
+        completion loop uses ``result`` directly as the template return value.
     """
     # call tool with python types
     try:
@@ -354,7 +357,7 @@ def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T]:
         dict(role="tool", content=encoded_result, tool_call_id=tool_call.id),
     )
     append_message(message)
-    return message, result
+    return message, result, tool_call.is_final
 
 
 @Operation.define
@@ -510,18 +513,20 @@ def _attempt() -> MessageResult[T]:
             return fwd(tools, response_format, model, **kwargs)
 
         with handler({_get_history: lambda: _message_sequence}):
-            message, tool_calls, result = self.call_assistant_retryer(_attempt)
+            message, tool_calls, result, is_final = self.call_assistant_retryer(_attempt)
 
         append_message(message)
-        return (message, tool_calls, result)
+        return (message, tool_calls, result, is_final)
 
     @implements(call_tool)
-    def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None]:
+    def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool]:
         """Handle tool execution with runtime error capture.
 
         Runtime errors from tool execution are captured and returned as
         error messages to the LLM. Only exceptions matching `catch_tool_errors`
-        are caught; others propagate up.
+        are caught; others propagate up.  When an error is caught,
+        ``is_final`` is always ``False`` so the error feedback goes back
+        to the LLM rather than being mistaken for a final answer.
         """
         try:
             return fwd(tool_call)
@@ -529,7 +534,7 @@ def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | Non
             if isinstance(e.original_error, self.catch_tool_errors):
                 message = e.to_feedback_message(self.include_traceback)
                 append_message(message)
-                return message, None
+                return message, None, False
             else:
                 raise
 
@@ -572,14 +577,13 @@ def _call[**P, T](
             result: T | None = None
             is_final = False
             while message["role"] != "assistant" or tool_calls:
-                message, tool_calls, result = call_assistant(
+                message, tool_calls, result, is_final = call_assistant(
                     template.tools, response_model, **self.config
                 )
                 for tool_call in tool_calls:
-                    message, raw_result = call_tool(tool_call)
-                    if tool_call.is_final:
+                    message, raw_result, is_final = call_tool(tool_call)
+                    if is_final:
                         result = typing.cast(T, raw_result)
-                        is_final = True
                         break
                 if is_final:
                     break
diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py
index b79ba8e02..ab250c3e7 100644
--- a/tests/test_handlers_llm_provider.py
+++ b/tests/test_handlers_llm_provider.py
@@ -506,7 +506,7 @@ def test_retry_handler_succeeds_on_first_attempt(self):
             handler(mock_handler),
             handler(message_sequence_provider),
         ):
-            message, tool_calls, result = call_assistant(
+            message, tool_calls, result, _ = call_assistant(
                 tools={},
                 response_format=Encodable.define(str),
                 model="test-model",
@@ -536,7 +536,7 @@ def test_retry_handler_retries_on_invalid_tool_call(self):
             handler(mock_handler),
             handler(message_sequence_provider),
         ):
-            message, tool_calls, result = call_assistant(
+            message, tool_calls, result, _ = call_assistant(
                 tools={"add_numbers": add_numbers},
                 response_format=Encodable.define(str),
                 model="test-model",
@@ -568,7 +568,7 @@ def test_retry_handler_retries_on_unknown_tool(self):
             handler(mock_handler),
             handler(message_sequence_provider),
         ):
-            message, tool_calls, result = call_assistant(
+            message, tool_calls, result, _ = call_assistant(
                 tools={"add_numbers": add_numbers},
                 response_format=Encodable.define(str),
                 model="test-model",
@@ -645,7 +645,7 @@ def test_retry_handler_valid_tool_call_passes_through(self):
             handler(mock_handler),
             handler(message_sequence_provider),
         ):
-            message, tool_calls, result = call_assistant(
+            message, tool_calls, result, _ = call_assistant(
                 tools={"add_numbers": add_numbers},
                 response_format=Encodable.define(str),
                 model="test-model",
@@ -720,7 +720,7 @@ def test_retry_handler_retries_on_invalid_result(self):
             handler(mock_handler),
             handler(message_sequence_provider),
         ):
-            message, tool_calls, result = call_assistant(
+            message, tool_calls, result, _ = call_assistant(
                 tools={},
                 response_format=Encodable.define(int),
                 model="test-model",
@@ -970,7 +970,7 @@ def test_retry_handler_catches_tool_runtime_error(self):
         tool_call = DecodedToolCall(failing_tool, bound_args, "call_1")
 
         with handler(RetryLLMHandler()):
-            result, _ = call_tool(tool_call)
+            result, _, _ = call_tool(tool_call)
 
         # The result should be an error message, not an exception
         assert result["role"] == "tool"
@@ -987,7 +987,7 @@ def test_retry_handler_catches_division_by_zero(self):
         tool_call = DecodedToolCall(divide_tool, bound_args, "call_div")
 
         with handler(RetryLLMHandler()):
-            result, _ = call_tool(tool_call)
+            result, _, _ = call_tool(tool_call)
 
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_div"
@@ -1002,7 +1002,7 @@ def test_successful_tool_execution_returns_result(self):
         tool_call = DecodedToolCall(add_numbers, bound_args, "call_add")
 
         with handler(RetryLLMHandler()):
-            result, _ = call_tool(tool_call)
+            result, _, _ = call_tool(tool_call)
 
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_add"
@@ -1039,7 +1039,7 @@ def _call_assistant(self, tools, response_format, model, **kwargs):
             handler(mock_handler),
             handler(message_sequence_provider),
         ):
-            message, tool_calls, result = call_assistant(
+            message, tool_calls, result, _ = call_assistant(
                 tools={"failing_tool": failing_tool},
                 response_format=Encodable.define(str),
                 model="test-model",
@@ -1382,13 +1382,13 @@ def _completion(self_, model, messages, *args, **kwargs):
             handler({_get_history: lambda: message_sequence}),
         ):
             # First call: input is the latest message (msg_user)
-            resp1, _, _ = call_assistant(
+            resp1, _, _, _ = call_assistant(
                 tools={},
                 response_format=Encodable.define(str),
                 model="test-model",
             )
             # Second call: input is the first response
-            resp2, _, _ = call_assistant(
+            resp2, _, _, _ = call_assistant(
                 tools={},
                 response_format=Encodable.define(str),
                 model="test-model",
@@ -1553,7 +1553,7 @@ def test_call_tool_success_does_not_raise(self):
         bound_args = sig.bind(a=3, b=4)
         tc = DecodedToolCall(add_numbers, bound_args, "call_ok")
 
-        result, _ = call_tool(tc)
+        result, _, _ = call_tool(tc)
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_ok"
 
@@ -1568,7 +1568,7 @@ def test_matching_error_returns_feedback_message(self):
         tc = DecodedToolCall(flaky_tool, bound_args, "call_match")
 
         with handler(RetryLLMHandler(catch_tool_errors=ConnectionError)):
-            result, _ = call_tool(tc)
+            result, _, _ = call_tool(tc)
 
         assert result["role"] == "tool"
         assert result["tool_call_id"] == "call_match"
@@ -1595,7 +1595,7 @@ def test_default_catch_all_catches_everything(self):
         tc = DecodedToolCall(type_error_tool, bound_args, "call_default")
 
         with handler(RetryLLMHandler()):
-            result, _ = call_tool(tc)
+            result, _, _ = call_tool(tc)
 
         assert result["role"] == "tool"
         assert "Tool execution failed" in result["content"]
@@ -1611,7 +1611,7 @@ def test_tuple_of_error_types(self):
                 catch_tool_errors=(ConnectionError, ValueError),
             )
         ):
-            result, _ = call_tool(tc)
+            result, _, _ = call_tool(tc)
 
         assert result["role"] == "tool"
         assert "Tool execution failed" in result["content"]
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index 283f32029..e06c15335 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -1394,9 +1394,10 @@ def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
         bound_args = sig.bind(x=5)
         tc = DecodedToolCall(final_tool, bound_args, "call_final", is_final=True)
 
-        message, raw_result = call_tool(tc)
+        message, raw_result, is_final = call_tool(tc)
         assert message["role"] == "tool"
         assert raw_result == 10
+        assert is_final is True
 
     def test_call_tool_returns_raw_result_for_normal_tool(self):
         """call_tool returns the raw Python result for all tools."""
@@ -1410,10 +1411,11 @@ def normal_tool(x: int) -> int:
         bound_args = sig.bind(x=3)
         tc = DecodedToolCall(normal_tool, bound_args, "call_normal")
 
-        message, raw_result = call_tool(tc)
+        message, raw_result, is_final = call_tool(tc)
         assert message["role"] == "tool"
         assert message["tool_call_id"] == "call_normal"
         assert raw_result == 4
+        assert is_final is False
 
     def test_call_tool_final_answer_with_retry_handler(self):
         """call_tool works with RetryLLMHandler for IsFinalAnswer tools."""
@@ -1428,10 +1430,11 @@ def final_tool(x: int) -> Annotated[str, IsFinalAnswer]:
         tc = DecodedToolCall(final_tool, bound_args, "call_retry_final", is_final=True)
 
         with handler(RetryLLMHandler()):
-            message, raw_result = call_tool(tc)
+            message, raw_result, is_final = call_tool(tc)
 
         assert message["role"] == "tool"
         assert raw_result == "answer: 42"
+        assert is_final is True
 
 
 class TestIsFinalAnswerCompletionLoop:
@@ -1586,3 +1589,62 @@ def task(n: int) -> int:
 
         assert result == 12
         assert mock.call_count == 1
+
+    def test_retry_handler_error_on_final_tool_does_not_produce_final_answer(self):
+        """When RetryLLMHandler catches an error on an is_final tool,
+        the error feedback goes back to the LLM instead of None being
+        returned as the final answer."""
+        call_count = 0
+
+        @Tool.define
+        def flaky_final(x: int) -> Annotated[int, IsFinalAnswer]:
+            """Return a final answer, but fail on first call."""
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                raise ValueError("transient failure")
+            return x * 10
+
+        @Template.define
+        def task(n: int) -> int:
+            """Call flaky_final with {n}."""
+            raise NotHandled
+
+        # Round 1: LLM calls flaky_final → error caught by RetryLLMHandler
+        # Round 2: LLM calls flaky_final again → succeeds
+        mock = MockCompletionHandler([
+            make_tool_call_response("flaky_final", '{"x": 5}'),
+            make_tool_call_response("flaky_final", '{"x": 5}'),
+        ])
+
+        with (
+            handler(LiteLLMProvider()),
+            handler(RetryLLMHandler()),
+            handler(mock),
+        ):
+            result = task(5)
+
+        assert result == 50  # NOT None
+        assert call_count == 2
+        assert mock.call_count == 2
+
+    def test_call_tool_returns_is_final_false_on_retry_handler_error(self):
+        """call_tool returns is_final=False when RetryLLMHandler catches
+        an error on an is_final tool."""
+
+        @Tool.define
+        def failing_final(x: int) -> Annotated[int, IsFinalAnswer]:
+            """Return a final answer."""
+            raise ValueError("boom")
+
+        sig = inspect.signature(failing_final)
+        bound_args = sig.bind(x=1)
+        tc = DecodedToolCall(failing_final, bound_args, "call_err", is_final=True)
+
+        with handler(RetryLLMHandler()):
+            message, raw_result, is_final = call_tool(tc)
+
+        assert message["role"] == "tool"
+        assert "Tool execution failed" in message["content"]
+        assert raw_result is None
+        assert is_final is False

From d46da71b2c75a3d282f74cc2601d9cde920bdab0 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 20:20:34 -0500
Subject: [PATCH 04/17] interaction with retry

---
 effectful/handlers/llm/completions.py | 48 +++++++++++-----
 pyproject.toml                        |  1 +
 tests/test_handlers_llm_template.py   | 82 +++++++++++++++++++++++++--
 3 files changed, 114 insertions(+), 17 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 1ac0e9e20..4f1cbfa7d 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -301,17 +301,35 @@ def call_assistant[T, U](
         decoded_tool_call = decode_tool_call(validated_tool_call, tools, raw_message)
         tool_calls.append(decoded_tool_call)
 
-    if any(tc.is_final for tc in tool_calls) and len(tool_calls) > 1:
-        final_name = next(tc.tool.__name__ for tc in tool_calls if tc.is_final)
-        raise ToolCallDecodingError(
-            final_name,
-            next(tc.id for tc in tool_calls if tc.is_final),
-            ValueError(
-                f"IsFinalAnswer tool '{final_name}' must be the only tool call "
-                f"in a round, but {len(tool_calls)} tool calls were generated."
-            ),
-            raw_message=raw_message,
-        )
+    final_tcs = [tc for tc in tool_calls if tc.is_final]
+    if final_tcs:
+        final_tc = final_tcs[0]
+        if len(tool_calls) > 1:
+            raise ToolCallDecodingError(
+                final_tc.tool.__name__,
+                final_tc.id,
+                ValueError(
+                    f"IsFinalAnswer tool '{final_tc.tool.__name__}' must be the "
+                    f"only tool call in a round, but {len(tool_calls)} tool calls "
+                    f"were generated."
+                ),
+                raw_message=raw_message,
+            )
+        # Validate that the tool's return type matches the template's.
+        tool_ret = inspect.signature(final_tc.tool).return_annotation
+        if typing.get_origin(tool_ret) is typing.Annotated:
+            tool_ret = typing.get_args(tool_ret)[0]
+        if tool_ret != response_format.base:
+            raise ToolCallDecodingError(
+                final_tc.tool.__name__,
+                final_tc.id,
+                TypeError(
+                    f"IsFinalAnswer tool '{final_tc.tool.__name__}' returns "
+                    f"{tool_ret!r}, but the enclosing template expects "
+                    f"{response_format.base!r}."
+                ),
+                raw_message=raw_message,
+            )
 
     result = None
     if not tool_calls:
@@ -513,13 +531,17 @@ def _attempt() -> MessageResult[T]:
             return fwd(tools, response_format, model, **kwargs)
 
         with handler({_get_history: lambda: _message_sequence}):
-            message, tool_calls, result, is_final = self.call_assistant_retryer(_attempt)
+            message, tool_calls, result, is_final = self.call_assistant_retryer(
+                _attempt
+            )
 
         append_message(message)
         return (message, tool_calls, result, is_final)
 
     @implements(call_tool)
-    def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool]:
+    def _call_tool[T](
+        self, tool_call: DecodedToolCall[T]
+    ) -> tuple[Message, T | None, bool]:
         """Handle tool execution with runtime error capture.
 
         Runtime errors from tool execution are captured and returned as
diff --git a/pyproject.toml b/pyproject.toml
index cdf674f59..3b79ba4e1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,7 @@ test = [
     "ruff",
     "nbval",
     "nbqa",
+    "pytest-timeout",
 ]
 
 [dependency-groups]
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index e06c15335..5b3b538db 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -14,10 +14,14 @@
     DecodedToolCall,
     LiteLLMProvider,
     RetryLLMHandler,
+    ToolCallDecodingError,
+    _get_history,
+    call_assistant,
     call_tool,
     call_user,
     completion,
 )
+from effectful.handlers.llm.encoding import Encodable
 from effectful.handlers.llm.template import _is_final_answer_tool
 from effectful.ops.semantics import handler
 from effectful.ops.syntax import ObjectInterpretation, implements
@@ -1612,10 +1616,12 @@ def task(n: int) -> int:
 
         # Round 1: LLM calls flaky_final → error caught by RetryLLMHandler
         # Round 2: LLM calls flaky_final again → succeeds
-        mock = MockCompletionHandler([
-            make_tool_call_response("flaky_final", '{"x": 5}'),
-            make_tool_call_response("flaky_final", '{"x": 5}'),
-        ])
+        mock = MockCompletionHandler(
+            [
+                make_tool_call_response("flaky_final", '{"x": 5}'),
+                make_tool_call_response("flaky_final", '{"x": 5}'),
+            ]
+        )
 
         with (
             handler(LiteLLMProvider()),
@@ -1648,3 +1654,71 @@ def failing_final(x: int) -> Annotated[int, IsFinalAnswer]:
         assert "Tool execution failed" in message["content"]
         assert raw_result is None
         assert is_final is False
+
+
+class TestIsFinalAnswerReturnTypeValidation:
+    """call_assistant should reject IsFinalAnswer tools whose return type
+    does not match the enclosing template's return type."""
+
+    def test_mismatched_return_type_raises_tool_call_decoding_error(self):
+        """IsFinalAnswer tool returning str when template expects int is rejected."""
+
+        @Tool.define
+        def wrong_type_tool(x: int) -> Annotated[str, IsFinalAnswer]:
+            """Return a string, but template expects int."""
+            return str(x)
+
+        message_sequence = collections.OrderedDict(
+            id1={"id": "id1", "role": "user", "content": "test"},
+        )
+
+        mock = MockCompletionHandler(
+            [
+                make_tool_call_response("wrong_type_tool", '{"x": 5}'),
+            ]
+        )
+
+        with (
+            handler(mock),
+            handler({_get_history: lambda: message_sequence}),
+        ):
+            with pytest.raises(ToolCallDecodingError) as exc_info:
+                call_assistant(
+                    tools={"wrong_type_tool": wrong_type_tool},
+                    response_format=Encodable.define(int),
+                    model="test-model",
+                )
+
+        assert isinstance(exc_info.value.original_error, TypeError)
+        assert "wrong_type_tool" in str(exc_info.value.original_error)
+
+    def test_matching_return_type_passes_validation(self):
+        """IsFinalAnswer tool with matching return type is accepted."""
+
+        @Tool.define
+        def correct_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+            """Return an int matching template."""
+            return x * 2
+
+        message_sequence = collections.OrderedDict(
+            id1={"id": "id1", "role": "user", "content": "test"},
+        )
+
+        mock = MockCompletionHandler(
+            [
+                make_tool_call_response("correct_tool", '{"x": 5}'),
+            ]
+        )
+
+        with (
+            handler(mock),
+            handler({_get_history: lambda: message_sequence}),
+        ):
+            _, tool_calls, _, is_final = call_assistant(
+                tools={"correct_tool": correct_tool},
+                response_format=Encodable.define(int),
+                model="test-model",
+            )
+
+        assert len(tool_calls) == 1
+        assert is_final is True

From 38bd78d7799130b06dee85f6951d7f7b9454c59d Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 21:14:13 -0500
Subject: [PATCH 05/17] subclass

---
 effectful/handlers/llm/completions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 4f1cbfa7d..721398859 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -319,7 +319,7 @@ def call_assistant[T, U](
         tool_ret = inspect.signature(final_tc.tool).return_annotation
         if typing.get_origin(tool_ret) is typing.Annotated:
             tool_ret = typing.get_args(tool_ret)[0]
-        if tool_ret != response_format.base:
+        if not issubclass(tool_ret, response_format.base):
             raise ToolCallDecodingError(
                 final_tc.tool.__name__,
                 final_tc.id,

From f0e896038bff6e83adbae642e6dab841c57bdf33 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 21:29:46 -0500
Subject: [PATCH 06/17] lint

---
 effectful/handlers/llm/completions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 721398859..15f223d84 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -360,16 +360,16 @@ def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool
     """
     # call tool with python types
     try:
-        result = tool_call.tool(
+        result: T = tool_call.tool(
             *tool_call.bound_args.args, **tool_call.bound_args.kwargs
         )
     except Exception as e:
         raise ToolCallExecutionError(tool_call.tool.__name__, tool_call.id, e) from e
 
     # serialize back to U using encoder for return type
-    return_type = Encodable.define(
+    return_type: Encodable[T, typing.Any] = Encodable.define(
         typing.cast(type[typing.Any], nested_type(result).value)
-    )
+    )  # type: ignore
     encoded_result = return_type.serialize(return_type.encode(result))
     message = _make_message(
         dict(role="tool", content=encoded_result, tool_call_id=tool_call.id),

From 71e869c308eca66bdf504c868281c950fb431beb Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 21:34:46 -0500
Subject: [PATCH 07/17] is_final loop variable

---
 effectful/handlers/llm/completions.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 15f223d84..a69034b25 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -344,7 +344,7 @@ def call_assistant[T, U](
         except (pydantic.ValidationError, TypeError, ValueError, SyntaxError) as e:
             raise ResultDecodingError(e, raw_message=raw_message) from e
 
-    is_final = any(tc.is_final for tc in tool_calls)
+    is_final = not all(not tc.is_final for tc in tool_calls)
     return (raw_message, tool_calls, result, is_final)
 
 
@@ -597,18 +597,13 @@ def _call[**P, T](
             # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls
             tool_calls: list[DecodedToolCall] = []
             result: T | None = None
-            is_final = False
-            while message["role"] != "assistant" or tool_calls:
+            is_final: bool = False
+            while not is_final:
                 message, tool_calls, result, is_final = call_assistant(
                     template.tools, response_model, **self.config
                 )
                 for tool_call in tool_calls:
-                    message, raw_result, is_final = call_tool(tool_call)
-                    if is_final:
-                        result = typing.cast(T, raw_result)
-                        break
-                if is_final:
-                    break
+                    message, result, is_final = call_tool(tool_call)
 
         try:
             _get_history()

From 97fd4ab8c8acb07c4acb6764896ce0f87a8ac76a Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Sun, 15 Feb 2026 22:06:18 -0500
Subject: [PATCH 08/17] rename

---
 effectful/handlers/llm/__init__.py    |  4 +-
 effectful/handlers/llm/completions.py |  4 +-
 effectful/handlers/llm/template.py    | 20 ++++----
 tests/test_handlers_llm_template.py   | 70 +++++++++++++--------------
 4 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/effectful/handlers/llm/__init__.py b/effectful/handlers/llm/__init__.py
index 3398c3160..cdda93479 100644
--- a/effectful/handlers/llm/__init__.py
+++ b/effectful/handlers/llm/__init__.py
@@ -1,3 +1,3 @@
-from .template import Agent, IsFinalAnswer, Template, Tool
+from .template import Agent, Template, Tool
 
-__all__ = ["Agent", "IsFinalAnswer", "Template", "Tool"]
+__all__ = ["Agent", "Template", "Tool"]
diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index a69034b25..cf367cb5e 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -309,7 +309,7 @@ def call_assistant[T, U](
                 final_tc.tool.__name__,
                 final_tc.id,
                 ValueError(
-                    f"IsFinalAnswer tool '{final_tc.tool.__name__}' must be the "
+                    f"IsFinal tool '{final_tc.tool.__name__}' must be the "
                     f"only tool call in a round, but {len(tool_calls)} tool calls "
                     f"were generated."
                 ),
@@ -324,7 +324,7 @@ def call_assistant[T, U](
                 final_tc.tool.__name__,
                 final_tc.id,
                 TypeError(
-                    f"IsFinalAnswer tool '{final_tc.tool.__name__}' returns "
+                    f"IsFinal tool '{final_tc.tool.__name__}' returns "
                     f"{tool_ret!r}, but the enclosing template expects "
                     f"{response_format.base!r}."
                 ),
diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py
index 992a02522..f5d84ec32 100644
--- a/effectful/handlers/llm/template.py
+++ b/effectful/handlers/llm/template.py
@@ -63,7 +63,7 @@ def _is_recursive_signature(sig: inspect.Signature):
     return any(annotation is IsRecursive for annotation in annotations)
 
 
-class _IsFinalAnswerAnnotation(Annotation):
+class _IsFinalAnnotation(Annotation):
     """
     A special type annotation for return types in the signature of a
     :class:`Tool` that indicates its result should be returned directly
@@ -72,7 +72,7 @@ class _IsFinalAnswerAnnotation(Annotation):
 
     .. warning::
 
-        :class:`IsFinalAnswer` annotations are only defined to ascribe
+        :class:`IsFinal` annotations are only defined to ascribe
         return annotations, and if used in a parameter will raise a
         :class:`TypeError` at tool construction time.
 
@@ -80,10 +80,10 @@ class _IsFinalAnswerAnnotation(Annotation):
 
         >>> from typing import Annotated
         >>> from effectful.handlers.llm import Tool
-        >>> from effectful.handlers.llm.template import IsFinalAnswer
+        >>> from effectful.handlers.llm.template import IsFinal
 
         >>> @Tool.define
-        ... def generate(prompt: str) -> Annotated[str, IsFinalAnswer]:
+        ... def generate(prompt: str) -> Annotated[str, IsFinal]:
         ...     \"""Generate content for the given prompt.\"""
         ...     return "generated content"
     """
@@ -97,22 +97,20 @@ def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature:
             if any(isinstance(arg, cls) for arg in typing.get_args(ty)):
                 raise TypeError(
                     f"Illegal annotation {ty} for parameter {name}, "
-                    "IsFinalAnswer must only be used to annotate return types."
+                    "IsFinal must only be used to annotate return types."
                 )
         return sig
 
 
-IsFinalAnswer = _IsFinalAnswerAnnotation()
+IsFinal = _IsFinalAnnotation()
 
 
 def _is_final_answer_tool(tool: Any) -> bool:
-    """Check if a tool's return type is annotated with IsFinalAnswer."""
+    """Check if a tool's return type is annotated with IsFinal."""
     ret = tool.__signature__.return_annotation
     if typing.get_origin(ret) is not Annotated:
         return False
-    return any(
-        isinstance(arg, _IsFinalAnswerAnnotation) for arg in typing.get_args(ret)
-    )
+    return any(isinstance(arg, _IsFinalAnnotation) for arg in typing.get_args(ret))
 
 
 class Tool[**P, T](Operation[P, T]):
@@ -149,7 +147,7 @@ def __init__(
         if not default.__doc__:
             raise ValueError("Tools must have docstrings.")
         signature = IsRecursive.infer_annotations(signature)
-        signature = IsFinalAnswer.infer_annotations(signature)
+        signature = IsFinal.infer_annotations(signature)
         super().__init__(signature, name, default)
 
     @classmethod
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index 5b3b538db..9a516aae0 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -9,7 +9,7 @@
 import pytest
 from litellm import ModelResponse
 
-from effectful.handlers.llm import Agent, IsFinalAnswer, Template, Tool
+from effectful.handlers.llm import Agent, Template, Tool
 from effectful.handlers.llm.completions import (
     DecodedToolCall,
     LiteLLMProvider,
@@ -22,7 +22,7 @@
     completion,
 )
 from effectful.handlers.llm.encoding import Encodable
-from effectful.handlers.llm.template import _is_final_answer_tool
+from effectful.handlers.llm.template import IsFinal, _is_final_answer_tool
 from effectful.ops.semantics import handler
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import NotHandled
@@ -1335,18 +1335,18 @@ def bad(x: int) -> str:
 
 
 # ---------------------------------------------------------------------------
-# IsFinalAnswer annotation tests
+# IsFinal annotation tests
 # ---------------------------------------------------------------------------
 
 
-class TestIsFinalAnswerAnnotation:
-    """Tests for the IsFinalAnswer type annotation."""
+class TestIsFinalAnnotation:
+    """Tests for the IsFinal type annotation."""
 
     def test_tool_with_is_final_answer_return_type(self):
-        """Tool with IsFinalAnswer on return type creates successfully."""
+        """Tool with IsFinal on return type creates successfully."""
 
         @Tool.define
-        def my_tool(x: int) -> Annotated[str, IsFinalAnswer]:
+        def my_tool(x: int) -> Annotated[str, IsFinal]:
             """A tool that returns a final answer."""
             return str(x)
 
@@ -1363,34 +1363,34 @@ def normal_tool(x: int) -> str:
         assert not _is_final_answer_tool(normal_tool)
 
     def test_is_final_answer_on_parameter_raises(self):
-        """IsFinalAnswer on a parameter raises TypeError at define time."""
-        with pytest.raises(TypeError, match="IsFinalAnswer"):
+        """IsFinal on a parameter raises TypeError at define time."""
+        with pytest.raises(TypeError, match="IsFinal"):
 
             @Tool.define
-            def bad_tool(x: Annotated[int, IsFinalAnswer]) -> str:
+            def bad_tool(x: Annotated[int, IsFinal]) -> str:
                 """A tool with bad annotation."""
                 return str(x)
 
     def test_is_final_answer_combined_with_is_recursive(self):
-        """IsFinalAnswer and IsRecursive can coexist on a return type."""
+        """IsFinal and IsRecursive can coexist on a return type."""
         from effectful.handlers.llm.template import IsRecursive
 
         @Tool.define
-        def combo_tool(x: int) -> Annotated[str, IsFinalAnswer, IsRecursive]:
+        def combo_tool(x: int) -> Annotated[str, IsFinal, IsRecursive]:
             """A tool with both annotations."""
             return str(x)
 
         assert _is_final_answer_tool(combo_tool)
 
 
-class TestIsFinalAnswerCallTool:
-    """Tests for call_tool behavior with IsFinalAnswer tools."""
+class TestIsFinalCallTool:
+    """Tests for call_tool behavior with IsFinal tools."""
 
     def test_call_tool_returns_raw_result_for_final_answer_tool(self):
         """call_tool returns the raw Python result alongside the message."""
 
         @Tool.define
-        def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+        def final_tool(x: int) -> Annotated[int, IsFinal]:
             """Returns a final answer."""
             return x * 2
 
@@ -1422,10 +1422,10 @@ def normal_tool(x: int) -> int:
         assert is_final is False
 
     def test_call_tool_final_answer_with_retry_handler(self):
-        """call_tool works with RetryLLMHandler for IsFinalAnswer tools."""
+        """call_tool works with RetryLLMHandler for IsFinal tools."""
 
         @Tool.define
-        def final_tool(x: int) -> Annotated[str, IsFinalAnswer]:
+        def final_tool(x: int) -> Annotated[str, IsFinal]:
             """Returns a final answer."""
             return f"answer: {x}"
 
@@ -1441,15 +1441,15 @@ def final_tool(x: int) -> Annotated[str, IsFinalAnswer]:
         assert is_final is True
 
 
-class TestIsFinalAnswerCompletionLoop:
-    """Tests for IsFinalAnswer through the full completion loop."""
+class TestIsFinalCompletionLoop:
+    """Tests for IsFinal through the full completion loop."""
 
     def test_final_answer_tool_skips_final_llm_call(self):
         """When LLM calls a final-answer tool, result is returned
         directly without a second call_assistant invocation."""
 
         @Tool.define
-        def compute(x: int) -> Annotated[int, IsFinalAnswer]:
+        def compute(x: int) -> Annotated[int, IsFinal]:
             """Compute and return the result directly."""
             return x * 10
 
@@ -1476,7 +1476,7 @@ class MyResult:
             label: str
 
         @Tool.define
-        def make_result() -> Annotated[MyResult, IsFinalAnswer]:
+        def make_result() -> Annotated[MyResult, IsFinal]:
             """Create a structured result."""
             return MyResult(value=42, label="answer")
 
@@ -1495,10 +1495,10 @@ def task() -> MyResult:
         assert result.label == "answer"
 
     def test_agent_history_valid_after_final_answer(self):
-        """Agent history has no orphaned tool_calls after IsFinalAnswer."""
+        """Agent history has no orphaned tool_calls after IsFinal."""
 
         @Tool.define
-        def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+        def final_tool(x: int) -> Annotated[int, IsFinal]:
             """Return final answer."""
             return x
 
@@ -1533,10 +1533,10 @@ def do_work(self, n: int) -> int:
                     assert has_response, f"Orphaned tool_call {tc_id} in history"
 
     def test_agent_subsequent_call_after_final_answer(self):
-        """A follow-up call on the same Agent works after IsFinalAnswer."""
+        """A follow-up call on the same Agent works after IsFinal."""
 
         @Tool.define
-        def final_tool() -> Annotated[str, IsFinalAnswer]:
+        def final_tool() -> Annotated[str, IsFinal]:
             """Return final answer."""
             return "direct result"
 
@@ -1568,10 +1568,10 @@ def _completion(self, model, messages=None, **kwargs):
         assert r2 == "llm result"
 
     def test_final_answer_with_retry_handler_active(self):
-        """IsFinalAnswer works correctly with RetryLLMHandler."""
+        """IsFinal works correctly with RetryLLMHandler."""
 
         @Tool.define
-        def final_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+        def final_tool(x: int) -> Annotated[int, IsFinal]:
             """Return final answer."""
             return x * 3
 
@@ -1601,7 +1601,7 @@ def test_retry_handler_error_on_final_tool_does_not_produce_final_answer(self):
         call_count = 0
 
         @Tool.define
-        def flaky_final(x: int) -> Annotated[int, IsFinalAnswer]:
+        def flaky_final(x: int) -> Annotated[int, IsFinal]:
             """Return a final answer, but fail on first call."""
             nonlocal call_count
             call_count += 1
@@ -1639,7 +1639,7 @@ def test_call_tool_returns_is_final_false_on_retry_handler_error(self):
         an error on an is_final tool."""
 
         @Tool.define
-        def failing_final(x: int) -> Annotated[int, IsFinalAnswer]:
+        def failing_final(x: int) -> Annotated[int, IsFinal]:
             """Return a final answer."""
             raise ValueError("boom")
 
@@ -1656,15 +1656,15 @@ def failing_final(x: int) -> Annotated[int, IsFinalAnswer]:
         assert is_final is False
 
 
-class TestIsFinalAnswerReturnTypeValidation:
-    """call_assistant should reject IsFinalAnswer tools whose return type
+class TestIsFinalReturnTypeValidation:
+    """call_assistant should reject IsFinal tools whose return type
     does not match the enclosing template's return type."""
 
     def test_mismatched_return_type_raises_tool_call_decoding_error(self):
-        """IsFinalAnswer tool returning str when template expects int is rejected."""
+        """IsFinal tool returning str when template expects int is rejected."""
 
         @Tool.define
-        def wrong_type_tool(x: int) -> Annotated[str, IsFinalAnswer]:
+        def wrong_type_tool(x: int) -> Annotated[str, IsFinal]:
             """Return a string, but template expects int."""
             return str(x)
 
@@ -1693,10 +1693,10 @@ def wrong_type_tool(x: int) -> Annotated[str, IsFinalAnswer]:
         assert "wrong_type_tool" in str(exc_info.value.original_error)
 
     def test_matching_return_type_passes_validation(self):
-        """IsFinalAnswer tool with matching return type is accepted."""
+        """IsFinal tool with matching return type is accepted."""
 
         @Tool.define
-        def correct_tool(x: int) -> Annotated[int, IsFinalAnswer]:
+        def correct_tool(x: int) -> Annotated[int, IsFinal]:
             """Return an int matching template."""
             return x * 2
 

From e656c58d4aa638bd9bffb954ba2ac5d43015b6cd Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:03:21 -0500
Subject: [PATCH 09/17] compress

---
 effectful/handlers/llm/completions.py | 51 +++++++++++----------------
 effectful/handlers/llm/encoding.py    |  7 ++--
 2 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 557b0885c..5383796d4 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -27,7 +27,7 @@
 from effectful.handlers.llm.encoding import DecodedToolCall, Encodable
 from effectful.handlers.llm.template import Template, Tool
 from effectful.internals.unification import nested_type
-from effectful.ops.semantics import fwd, handler
+from effectful.ops.semantics import _simple_type, fwd, handler
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import Operation
 
@@ -217,6 +217,23 @@ def call_assistant[T, U](
     for raw_tool_call in raw_tool_calls:
         try:
             tool_calls += [encoding.decode(raw_tool_call)]  # type: ignore
+            if tool_calls[-1].is_final:
+                if len(raw_tool_calls) > 1:
+                    raise ValueError(
+                        f"IsFinal tool '{tool_calls[-1].tool.__name__}' must be the "
+                        f"only tool call in a round, but {len(raw_tool_calls)} tool calls "
+                        f"were generated."
+                    )
+                # Validate that the tool's return type matches the template's.
+                tool_sig = inspect.signature(tool_calls[-1].tool)
+                if not issubclass(
+                    _simple_type(tool_sig.return_annotation), response_format.base
+                ):
+                    raise TypeError(
+                        f"IsFinal tool '{raw_tool_call.function.name}' has signature "
+                        f"{tool_sig.format()}, but the enclosing template expects "
+                        f"{response_format.base!r}."
+                    )
         except Exception as e:
             raise ToolCallDecodingError(
                 raw_tool_call=raw_tool_call,
@@ -224,34 +241,6 @@ def call_assistant[T, U](
                 raw_message=raw_message,
             ) from e
 
-    final_tcs = [tc for tc in tool_calls if tc.is_final]
-    if final_tcs:
-        final_tc = final_tcs[0]
-        if len(tool_calls) > 1:
-            raise ToolCallDecodingError(
-                raw_tool_call=raw_message.tool_calls[0],  # type: ignore
-                original_error=ValueError(
-                    f"IsFinal tool '{final_tc.tool.__name__}' must be the "
-                    f"only tool call in a round, but {len(tool_calls)} tool calls "
-                    f"were generated."
-                ),
-                raw_message=raw_message,
-            )
-        # Validate that the tool's return type matches the template's.
-        tool_ret = inspect.signature(final_tc.tool).return_annotation
-        if typing.get_origin(tool_ret) is typing.Annotated:
-            tool_ret = typing.get_args(tool_ret)[0]
-        if not issubclass(tool_ret, response_format.base):
-            raise ToolCallDecodingError(
-                raw_tool_call=raw_message.tool_calls[0],  # type: ignore
-                original_error=TypeError(
-                    f"IsFinal tool '{final_tc.tool.__name__}' returns "
-                    f"{tool_ret!r}, but the enclosing template expects "
-                    f"{response_format.base!r}."
-                ),
-                raw_message=raw_message,
-            )
-
     result = None
     if not tool_calls:
         # return response
@@ -288,8 +277,8 @@ def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool
     except Exception as e:
         raise ToolCallExecutionError(raw_tool_call=tool_call, original_error=e) from e
 
-    return_type = Encodable.define(nested_type(result).value)
-    encoded_result = return_type.serialize(return_type.encode(result))
+    return_type = Encodable.define(nested_type(result).value)  # type: ignore
+    encoded_result = return_type.serialize(return_type.encode(result))  # type: ignore
     message = _make_message(
         dict(role="tool", content=encoded_result, tool_call_id=tool_call.id),
     )
diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py
index 43bd1301a..04932d57a 100644
--- a/effectful/handlers/llm/encoding.py
+++ b/effectful/handlers/llm/encoding.py
@@ -31,7 +31,7 @@
 from PIL import Image
 
 import effectful.handlers.llm.evaluation as evaluation
-from effectful.handlers.llm.template import Tool
+from effectful.handlers.llm.template import Tool, _is_final_answer_tool
 from effectful.internals.unification import nested_type
 from effectful.ops.semantics import _simple_type
 from effectful.ops.syntax import _CustomSingleDispatchCallable
@@ -60,7 +60,10 @@ class DecodedToolCall[T]:
     bound_args: inspect.BoundArguments
     id: ToolCallID
     name: str
-    is_final: bool = False
+
+    @property
+    def is_final(self) -> bool:
+        return _is_final_answer_tool(self.tool)
 
 
 class Encodable[T, U](ABC):

From 94ce38f2d4a9e0ef267cbea8ec72b2695aa97557 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:08:51 -0500
Subject: [PATCH 10/17] inline helper

---
 effectful/handlers/llm/encoding.py | 7 +++++--
 effectful/handlers/llm/template.py | 8 --------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py
index 04932d57a..f6af514f7 100644
--- a/effectful/handlers/llm/encoding.py
+++ b/effectful/handlers/llm/encoding.py
@@ -31,7 +31,7 @@
 from PIL import Image
 
 import effectful.handlers.llm.evaluation as evaluation
-from effectful.handlers.llm.template import Tool, _is_final_answer_tool
+from effectful.handlers.llm.template import Tool, _IsFinalAnnotation
 from effectful.internals.unification import nested_type
 from effectful.ops.semantics import _simple_type
 from effectful.ops.syntax import _CustomSingleDispatchCallable
@@ -63,7 +63,10 @@ class DecodedToolCall[T]:
 
     @property
     def is_final(self) -> bool:
-        return _is_final_answer_tool(self.tool)
+        ret = inspect.signature(self.tool).return_annotation
+        return typing.get_origin(ret) is typing.Annotated and any(
+            isinstance(arg, _IsFinalAnnotation) for arg in ret.__metadata__
+        )
 
 
 class Encodable[T, U](ABC):
diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py
index 2183f8252..6ebf049be 100644
--- a/effectful/handlers/llm/template.py
+++ b/effectful/handlers/llm/template.py
@@ -105,14 +105,6 @@ def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature:
 IsFinal = _IsFinalAnnotation()
 
 
-def _is_final_answer_tool(tool: Any) -> bool:
-    """Check if a tool's return type is annotated with IsFinal."""
-    ret = tool.__signature__.return_annotation
-    if typing.get_origin(ret) is not Annotated:
-        return False
-    return any(isinstance(arg, _IsFinalAnnotation) for arg in typing.get_args(ret))
-
-
 class Tool[**P, T](Operation[P, T]):
     """A :class:`Tool` is a function that may be called by a :class:`Template`.
 

From 18485ce94a1b0c3c287c70062f879a11e8ededd9 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:10:35 -0500
Subject: [PATCH 11/17] remove dumb test

---
 tests/test_handlers_llm_template.py | 46 +----------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index 01a1d284f..04c77f8ac 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -22,7 +22,7 @@
     completion,
 )
 from effectful.handlers.llm.encoding import DecodedToolCall, Encodable
-from effectful.handlers.llm.template import IsFinal, _is_final_answer_tool
+from effectful.handlers.llm.template import IsFinal
 from effectful.ops.semantics import handler
 from effectful.ops.syntax import ObjectInterpretation, implements
 from effectful.ops.types import NotHandled
@@ -1532,50 +1532,6 @@ def bad(x: int) -> str:
 # ---------------------------------------------------------------------------
 
 
-class TestIsFinalAnnotation:
-    """Tests for the IsFinal type annotation."""
-
-    def test_tool_with_is_final_answer_return_type(self):
-        """Tool with IsFinal on return type creates successfully."""
-
-        @Tool.define
-        def my_tool(x: int) -> Annotated[str, IsFinal]:
-            """A tool that returns a final answer."""
-            return str(x)
-
-        assert _is_final_answer_tool(my_tool)
-
-    def test_tool_without_is_final_answer(self):
-        """Normal tool is not detected as final answer."""
-
-        @Tool.define
-        def normal_tool(x: int) -> str:
-            """A normal tool."""
-            return str(x)
-
-        assert not _is_final_answer_tool(normal_tool)
-
-    def test_is_final_answer_on_parameter_raises(self):
-        """IsFinal on a parameter raises TypeError at define time."""
-        with pytest.raises(TypeError, match="IsFinal"):
-
-            @Tool.define
-            def bad_tool(x: Annotated[int, IsFinal]) -> str:
-                """A tool with bad annotation."""
-                return str(x)
-
-    def test_is_final_answer_combined_with_is_recursive(self):
-        """IsFinal and IsRecursive can coexist on a return type."""
-        from effectful.handlers.llm.template import IsRecursive
-
-        @Tool.define
-        def combo_tool(x: int) -> Annotated[str, IsFinal, IsRecursive]:
-            """A tool with both annotations."""
-            return str(x)
-
-        assert _is_final_answer_tool(combo_tool)
-
-
 class TestIsFinalCallTool:
     """Tests for call_tool behavior with IsFinal tools."""
 

From 61e55d66fb69663b3c08e850be32b3e58eedbc36 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:16:27 -0500
Subject: [PATCH 12/17] nit

---
 effectful/handlers/llm/completions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 5383796d4..865ba81d7 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -220,7 +220,7 @@ def call_assistant[T, U](
             if tool_calls[-1].is_final:
                 if len(raw_tool_calls) > 1:
                     raise ValueError(
-                        f"IsFinal tool '{tool_calls[-1].tool.__name__}' must be the "
+                        f"IsFinal tool '{raw_tool_call.function.name}' must be the "
                         f"only tool call in a round, but {len(raw_tool_calls)} tool calls "
                         f"were generated."
                     )

From 7a2af1e3f4f47946a81f66bbae3cfe27b501ad67 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:19:46 -0500
Subject: [PATCH 13/17] lint

---
 effectful/handlers/llm/completions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 865ba81d7..92e4155f1 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -231,7 +231,7 @@ def call_assistant[T, U](
                 ):
                     raise TypeError(
                         f"IsFinal tool '{raw_tool_call.function.name}' has signature "
-                        f"{tool_sig.format()}, but the enclosing template expects "
+                        f"{tool_sig!r}, but the enclosing template expects "
                         f"{response_format.base!r}."
                     )
         except Exception as e:

From bab83a402c5b433e5cec5017ed91f3e925259c7b Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:40:31 -0500
Subject: [PATCH 14/17] fix tests

---
 effectful/handlers/llm/completions.py |  5 +++--
 tests/test_handlers_llm_template.py   | 32 +++++++++++++++++----------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py
index 92e4155f1..900b55d30 100644
--- a/effectful/handlers/llm/completions.py
+++ b/effectful/handlers/llm/completions.py
@@ -226,8 +226,9 @@ def call_assistant[T, U](
                     )
                 # Validate that the tool's return type matches the template's.
                 tool_sig = inspect.signature(tool_calls[-1].tool)
+                return_annotation = typing.get_args(tool_sig.return_annotation)[0]
                 if not issubclass(
-                    _simple_type(tool_sig.return_annotation), response_format.base
+                    _simple_type(return_annotation), response_format.base
                 ):
                     raise TypeError(
                         f"IsFinal tool '{raw_tool_call.function.name}' has signature "
@@ -255,7 +256,7 @@ def call_assistant[T, U](
         except (pydantic.ValidationError, TypeError, ValueError, SyntaxError) as e:
             raise ResultDecodingError(e, raw_message=raw_message) from e
 
-    is_final = not all(not tc.is_final for tc in tool_calls)
+    is_final = any(tc.is_final for tc in tool_calls) or not tool_calls
     return (raw_message, tool_calls, result, is_final)
 
 
diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index 04c77f8ac..092b317aa 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -1545,7 +1545,7 @@ def final_tool(x: int) -> Annotated[int, IsFinal]:
 
         sig = inspect.signature(final_tool)
         bound_args = sig.bind(x=5)
-        tc = DecodedToolCall(final_tool, bound_args, "call_final", is_final=True)
+        tc = DecodedToolCall(final_tool, bound_args, id="call_final", name="final_tool")
 
         message, raw_result, is_final = call_tool(tc)
         assert message["role"] == "tool"
@@ -1562,7 +1562,9 @@ def normal_tool(x: int) -> int:
 
         sig = inspect.signature(normal_tool)
         bound_args = sig.bind(x=3)
-        tc = DecodedToolCall(normal_tool, bound_args, "call_normal")
+        tc = DecodedToolCall(
+            normal_tool, bound_args, id="call_normal", name="normal_tool"
+        )
 
         message, raw_result, is_final = call_tool(tc)
         assert message["role"] == "tool"
@@ -1580,7 +1582,9 @@ def final_tool(x: int) -> Annotated[str, IsFinal]:
 
         sig = inspect.signature(final_tool)
         bound_args = sig.bind(x=42)
-        tc = DecodedToolCall(final_tool, bound_args, "call_retry_final", is_final=True)
+        tc = DecodedToolCall(
+            final_tool, bound_args, id="call_retry_final", name="final_tool"
+        )
 
         with handler(RetryLLMHandler()):
             message, raw_result, is_final = call_tool(tc)
@@ -1607,7 +1611,9 @@ def task(n: int) -> int:
             """Call compute with {n}."""
             raise NotHandled
 
-        mock = MockCompletionHandler([make_tool_call_response("compute", '{"x": 7}')])
+        mock = MockCompletionHandler(
+            [make_tool_call_response("compute", '{"x": {"value": 7}}')]
+        )
 
         with handler(LiteLLMProvider()), handler(mock):
             result = task(7)
@@ -1659,7 +1665,7 @@ def do_work(self, n: int) -> int:
                 raise NotHandled
 
         mock = MockCompletionHandler(
-            [make_tool_call_response("final_tool", '{"x": 5}')]
+            [make_tool_call_response("final_tool", '{"x": {"value": 5}}')]
         )
         agent = MyAgent()
 
@@ -1705,7 +1711,7 @@ def _completion(self, model, messages=None, **kwargs):
                 call_count += 1
                 if call_count == 1:
                     return make_tool_call_response("final_tool", "{}")
-                return make_text_response('{"value": "llm result"}')
+                return make_text_response("llm result")
 
         agent = MyAgent()
 
@@ -1730,7 +1736,7 @@ def task(n: int) -> int:
             raise NotHandled
 
         mock = MockCompletionHandler(
-            [make_tool_call_response("final_tool", '{"x": 4}')]
+            [make_tool_call_response("final_tool", '{"x": {"value": 4}}')]
         )
 
         with (
@@ -1767,8 +1773,8 @@ def task(n: int) -> int:
         # Round 2: LLM calls flaky_final again → succeeds
         mock = MockCompletionHandler(
             [
-                make_tool_call_response("flaky_final", '{"x": 5}'),
-                make_tool_call_response("flaky_final", '{"x": 5}'),
+                make_tool_call_response("flaky_final", '{"x": {"value": 5}}'),
+                make_tool_call_response("flaky_final", '{"x": {"value": 5}}'),
             ]
         )
 
@@ -1794,7 +1800,9 @@ def failing_final(x: int) -> Annotated[int, IsFinal]:
 
         sig = inspect.signature(failing_final)
         bound_args = sig.bind(x=1)
-        tc = DecodedToolCall(failing_final, bound_args, "call_err", is_final=True)
+        tc = DecodedToolCall(
+            failing_final, bound_args, id="call_err", name="failing_final"
+        )
 
         with handler(RetryLLMHandler()):
             message, raw_result, is_final = call_tool(tc)
@@ -1823,7 +1831,7 @@ def wrong_type_tool(x: int) -> Annotated[str, IsFinal]:
 
         mock = MockCompletionHandler(
             [
-                make_tool_call_response("wrong_type_tool", '{"x": 5}'),
+                make_tool_call_response("wrong_type_tool", '{"x": {"value": 5}}'),
             ]
         )
 
@@ -1855,7 +1863,7 @@ def correct_tool(x: int) -> Annotated[int, IsFinal]:
 
         mock = MockCompletionHandler(
             [
-                make_tool_call_response("correct_tool", '{"x": 5}'),
+                make_tool_call_response("correct_tool", '{"x": {"value": 5}}'),
             ]
         )
 

From 2d8b70ededd8aa07d9b9fd7c87f697873935c054 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:49:00 -0500
Subject: [PATCH 15/17] remove more dumb tests

---
 tests/test_handlers_llm_template.py | 92 +----------------------------
 1 file changed, 1 insertion(+), 91 deletions(-)

diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index 092b317aa..31a6574bc 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -1532,68 +1532,6 @@ def bad(x: int) -> str:
 # ---------------------------------------------------------------------------
 
 
-class TestIsFinalCallTool:
-    """Tests for call_tool behavior with IsFinal tools."""
-
-    def test_call_tool_returns_raw_result_for_final_answer_tool(self):
-        """call_tool returns the raw Python result alongside the message."""
-
-        @Tool.define
-        def final_tool(x: int) -> Annotated[int, IsFinal]:
-            """Returns a final answer."""
-            return x * 2
-
-        sig = inspect.signature(final_tool)
-        bound_args = sig.bind(x=5)
-        tc = DecodedToolCall(final_tool, bound_args, id="call_final", name="final_tool")
-
-        message, raw_result, is_final = call_tool(tc)
-        assert message["role"] == "tool"
-        assert raw_result == 10
-        assert is_final is True
-
-    def test_call_tool_returns_raw_result_for_normal_tool(self):
-        """call_tool returns the raw Python result for all tools."""
-
-        @Tool.define
-        def normal_tool(x: int) -> int:
-            """A normal tool."""
-            return x + 1
-
-        sig = inspect.signature(normal_tool)
-        bound_args = sig.bind(x=3)
-        tc = DecodedToolCall(
-            normal_tool, bound_args, id="call_normal", name="normal_tool"
-        )
-
-        message, raw_result, is_final = call_tool(tc)
-        assert message["role"] == "tool"
-        assert message["tool_call_id"] == "call_normal"
-        assert raw_result == 4
-        assert is_final is False
-
-    def test_call_tool_final_answer_with_retry_handler(self):
-        """call_tool works with RetryLLMHandler for IsFinal tools."""
-
-        @Tool.define
-        def final_tool(x: int) -> Annotated[str, IsFinal]:
-            """Returns a final answer."""
-            return f"answer: {x}"
-
-        sig = inspect.signature(final_tool)
-        bound_args = sig.bind(x=42)
-        tc = DecodedToolCall(
-            final_tool, bound_args, id="call_retry_final", name="final_tool"
-        )
-
-        with handler(RetryLLMHandler()):
-            message, raw_result, is_final = call_tool(tc)
-
-        assert message["role"] == "tool"
-        assert raw_result == "answer: 42"
-        assert is_final is True
-
-
 class TestIsFinalCompletionLoop:
     """Tests for IsFinal through the full completion loop."""
 
@@ -1622,33 +1560,6 @@ def task(n: int) -> int:
         # Only 1 call_assistant, not 2 (no final LLM round-trip)
         assert mock.call_count == 1
 
-    def test_final_answer_returns_raw_python_object(self):
-        """The returned value is the raw Python object, not serialized text."""
-
-        @dataclass
-        class MyResult:
-            value: int
-            label: str
-
-        @Tool.define
-        def make_result() -> Annotated[MyResult, IsFinal]:
-            """Create a structured result."""
-            return MyResult(value=42, label="answer")
-
-        @Template.define
-        def task() -> MyResult:
-            """Call make_result."""
-            raise NotHandled
-
-        mock = MockCompletionHandler([make_tool_call_response("make_result", "{}")])
-
-        with handler(LiteLLMProvider()), handler(mock):
-            result = task()
-
-        assert isinstance(result, MyResult)
-        assert result.value == 42
-        assert result.label == "answer"
-
     def test_agent_history_valid_after_final_answer(self):
         """Agent history has no orphaned tool_calls after IsFinal."""
 
@@ -1808,7 +1719,7 @@ def failing_final(x: int) -> Annotated[int, IsFinal]:
             message, raw_result, is_final = call_tool(tc)
 
         assert message["role"] == "tool"
-        assert "Tool execution failed" in message["content"]
+        assert message["content"]  # non-empty error feedback
         assert raw_result is None
         assert is_final is False
 
@@ -1847,7 +1758,6 @@ def wrong_type_tool(x: int) -> Annotated[str, IsFinal]:
                 )
 
         assert isinstance(exc_info.value.original_error, TypeError)
-        assert "wrong_type_tool" in str(exc_info.value.original_error)
 
     def test_matching_return_type_passes_validation(self):
         """IsFinal tool with matching return type is accepted."""

From e6c449631b3eea8b25dfe56763cb28d72a51075a Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:50:29 -0500
Subject: [PATCH 16/17] condense

---
 tests/test_handlers_llm_template.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py
index 31a6574bc..1198637dd 100644
--- a/tests/test_handlers_llm_template.py
+++ b/tests/test_handlers_llm_template.py
@@ -1723,11 +1723,6 @@ def failing_final(x: int) -> Annotated[int, IsFinal]:
         assert raw_result is None
         assert is_final is False
 
-
-class TestIsFinalReturnTypeValidation:
-    """call_assistant should reject IsFinal tools whose return type
-    does not match the enclosing template's return type."""
-
     def test_mismatched_return_type_raises_tool_call_decoding_error(self):
         """IsFinal tool returning str when template expects int is rejected."""
 

From 0444b8c6f7520bbbe4984e5c85064cdc679359c0 Mon Sep 17 00:00:00 2001
From: Eli <eli@basis.ai>
Date: Tue, 24 Feb 2026 02:54:31 -0500
Subject: [PATCH 17/17] remove pytest-timeout

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3b79ba4e1..cdf674f59 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,7 +71,6 @@ test = [
     "ruff",
     "nbval",
     "nbqa",
-    "pytest-timeout",
 ]
 
 [dependency-groups]