From 1b8d82571527b5701f9c75e6bf9bb719ac7e143b Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 18:33:44 -0500 Subject: [PATCH 01/17] Use tool to compute final answer --- effectful/handlers/llm/__init__.py | 4 +- effectful/handlers/llm/completions.py | 53 ++++- effectful/handlers/llm/template.py | 58 +++++- tests/test_handlers_llm_template.py | 271 +++++++++++++++++++++++++- 4 files changed, 379 insertions(+), 7 deletions(-) diff --git a/effectful/handlers/llm/__init__.py b/effectful/handlers/llm/__init__.py index cdda93479..3398c3160 100644 --- a/effectful/handlers/llm/__init__.py +++ b/effectful/handlers/llm/__init__.py @@ -1,3 +1,3 @@ -from .template import Agent, Template, Tool +from .template import Agent, IsFinalAnswer, Template, Tool -__all__ = ["Agent", "Template", "Tool"] +__all__ = ["Agent", "IsFinalAnswer", "Template", "Tool"] diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 18abddcb0..470ecbfad 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -147,6 +147,23 @@ def to_feedback_message(self, include_traceback: bool) -> Message: ) +class DirectReturn[T](BaseException): + """Raised internally to short-circuit the completion loop when a tool + annotated with :class:`~effectful.handlers.llm.template.IsFinalAnswer` + produces a result. + + Extends :class:`BaseException` so it is not caught by handlers that + catch :class:`Exception` (e.g. ``call_tool``'s wrapping in + :class:`ToolCallExecutionError`, or :class:`RetryLLMHandler`). + """ + + value: T + + def __init__(self, value: T): + self.value = value + super().__init__(value) + + class DecodedToolCall[T](typing.NamedTuple): tool: Tool[..., T] bound_args: inspect.BoundArguments @@ -317,7 +334,13 @@ def call_tool(tool_call: DecodedToolCall) -> Message: string representing an LLM tool call request parameters. The output is the serialised response to the model. + If the tool is annotated with + :class:`~effectful.handlers.llm.template.IsFinalAnswer`, a + :class:`DirectReturn` exception is raised carrying the raw Python + result, which short-circuits the completion loop. """ + from effectful.handlers.llm.template import _is_final_answer_tool + # call tool with python types try: result = tool_call.tool( @@ -335,6 +358,10 @@ def call_tool(tool_call: DecodedToolCall) -> Message: dict(role="tool", content=encoded_result, tool_call_id=tool_call.id), ) append_message(message) + + if _is_final_answer_tool(tool_call.tool): + raise DirectReturn(result) + return message @@ -555,8 +582,30 @@ def _call[**P, T]( message, tool_calls, result = call_assistant( template.tools, response_model, **self.config ) - for tool_call in tool_calls: - message = call_tool(tool_call) + for i, tool_call in enumerate(tool_calls): + try: + message = call_tool(tool_call) + except DirectReturn as dr: + result = typing.cast(T, dr.value) + # Placeholder messages for remaining unprocessed + # tool calls to keep history valid for Agents. + for remaining_tc in tool_calls[i + 1 :]: + append_message( + _make_message( + dict( + role="tool", + content=[ + { + "type": "text", + "text": "[skipped]", + } + ], + tool_call_id=remaining_tc.id, + ) + ) + ) + tool_calls = [] + break try: _get_history() diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py index c84958983..992a02522 100644 --- a/effectful/handlers/llm/template.py +++ b/effectful/handlers/llm/template.py @@ -42,8 +42,9 @@ def factorial(n: int) -> Annotated[int, IsRecursive]: @classmethod def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature: - for name, ty in sig.parameters.items(): - if not ty or not typing.get_origin(ty) is Annotated: + for name, param in sig.parameters.items(): + ty = param.annotation + if ty is inspect.Parameter.empty or typing.get_origin(ty) is not Annotated: continue if any(isinstance(arg, cls) for arg in typing.get_args(ty)): raise TypeError( @@ -62,6 +63,58 @@ def _is_recursive_signature(sig: inspect.Signature): return any(annotation is IsRecursive for annotation in annotations) +class _IsFinalAnswerAnnotation(Annotation): + """ + A special type annotation for return types in the signature of a + :class:`Tool` that indicates its result should be returned directly + as the final answer of the enclosing :class:`Template`, skipping + the final LLM API call. + + .. warning:: + + :class:`IsFinalAnswer` annotations are only defined to ascribe + return annotations, and if used in a parameter will raise a + :class:`TypeError` at tool construction time. + + **Example usage**:: + + >>> from typing import Annotated + >>> from effectful.handlers.llm import Tool + >>> from effectful.handlers.llm.template import IsFinalAnswer + + >>> @Tool.define + ... def generate(prompt: str) -> Annotated[str, IsFinalAnswer]: + ... \"""Generate content for the given prompt.\""" + ... return "generated content" + """ + + @classmethod + def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature: + for name, param in sig.parameters.items(): + ty = param.annotation + if ty is inspect.Parameter.empty or typing.get_origin(ty) is not Annotated: + continue + if any(isinstance(arg, cls) for arg in typing.get_args(ty)): + raise TypeError( + f"Illegal annotation {ty} for parameter {name}, " + "IsFinalAnswer must only be used to annotate return types." + ) + return sig + + +IsFinalAnswer = _IsFinalAnswerAnnotation() + + +def _is_final_answer_tool(tool: Any) -> bool: + """Check if a tool's return type is annotated with IsFinalAnswer.""" + ret = tool.__signature__.return_annotation + if typing.get_origin(ret) is not Annotated: + return False + return any( + isinstance(arg, _IsFinalAnswerAnnotation) for arg in typing.get_args(ret) + ) + + class Tool[**P, T](Operation[P, T]): """A :class:`Tool` is a function that may be called by a :class:`Template`. @@ -96,6 +149,7 @@ def __init__( if not default.__doc__: raise ValueError("Tools must have docstrings.") signature = IsRecursive.infer_annotations(signature) + signature = IsFinalAnswer.infer_annotations(signature) super().__init__(signature, name, default) @classmethod diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index bd63ea551..181b3abf1 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -4,17 +4,22 @@ import dataclasses import inspect from dataclasses import dataclass +from typing import Annotated import pytest from litellm import ModelResponse -from effectful.handlers.llm import Agent, Template, Tool +from effectful.handlers.llm import Agent, IsFinalAnswer, Template, Tool from effectful.handlers.llm.completions import ( + DecodedToolCall, + DirectReturn, LiteLLMProvider, RetryLLMHandler, + call_tool, call_user, completion, ) +from effectful.handlers.llm.template import _is_final_answer_tool from effectful.ops.semantics import handler from effectful.ops.syntax import ObjectInterpretation, implements from effectful.ops.types import NotHandled @@ -1324,3 +1329,267 @@ def test_validate_format_spec_on_undefined_var(): def bad(x: int) -> str: """Value: {x} and {missing:.2f}.""" raise NotHandled + + +# --------------------------------------------------------------------------- +# IsFinalAnswer annotation tests +# --------------------------------------------------------------------------- + + +class TestIsFinalAnswerAnnotation: + """Tests for the IsFinalAnswer type annotation.""" + + def test_tool_with_is_final_answer_return_type(self): + """Tool with IsFinalAnswer on return type creates successfully.""" + + @Tool.define + def my_tool(x: int) -> Annotated[str, IsFinalAnswer]: + """A tool that returns a final answer.""" + return str(x) + + assert _is_final_answer_tool(my_tool) + + def test_tool_without_is_final_answer(self): + """Normal tool is not detected as final answer.""" + + @Tool.define + def normal_tool(x: int) -> str: + """A normal tool.""" + return str(x) + + assert not _is_final_answer_tool(normal_tool) + + def test_is_final_answer_on_parameter_raises(self): + """IsFinalAnswer on a parameter raises TypeError at define time.""" + with pytest.raises(TypeError, match="IsFinalAnswer"): + + @Tool.define + def bad_tool(x: Annotated[int, IsFinalAnswer]) -> str: + """A tool with bad annotation.""" + return str(x) + + def test_is_final_answer_combined_with_is_recursive(self): + """IsFinalAnswer and IsRecursive can coexist on a return type.""" + from effectful.handlers.llm.template import IsRecursive + + @Tool.define + def combo_tool(x: int) -> Annotated[str, IsFinalAnswer, IsRecursive]: + """A tool with both annotations.""" + return str(x) + + assert _is_final_answer_tool(combo_tool) + + +class TestIsFinalAnswerCallTool: + """Tests for call_tool behavior with IsFinalAnswer tools.""" + + def test_call_tool_raises_direct_return_for_final_answer_tool(self): + """call_tool raises DirectReturn when tool has IsFinalAnswer.""" + + @Tool.define + def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: + """Returns a final answer.""" + return x * 2 + + sig = inspect.signature(final_tool) + bound_args = sig.bind(x=5) + tc = DecodedToolCall(final_tool, bound_args, "call_final") + + with pytest.raises(DirectReturn) as exc_info: + call_tool(tc) + + assert exc_info.value.value == 10 + + def test_call_tool_normal_for_non_final_answer_tool(self): + """call_tool returns a Message normally for non-IsFinalAnswer tools.""" + + @Tool.define + def normal_tool(x: int) -> int: + """A normal tool.""" + return x + 1 + + sig = inspect.signature(normal_tool) + bound_args = sig.bind(x=3) + tc = DecodedToolCall(normal_tool, bound_args, "call_normal") + + result = call_tool(tc) + assert result["role"] == "tool" + assert result["tool_call_id"] == "call_normal" + + def test_call_tool_final_answer_with_retry_handler(self): + """DirectReturn propagates through RetryLLMHandler._call_tool.""" + + @Tool.define + def final_tool(x: int) -> Annotated[str, IsFinalAnswer]: + """Returns a final answer.""" + return f"answer: {x}" + + sig = inspect.signature(final_tool) + bound_args = sig.bind(x=42) + tc = DecodedToolCall(final_tool, bound_args, "call_retry_final") + + with pytest.raises(DirectReturn) as exc_info: + with handler(RetryLLMHandler()): + call_tool(tc) + + assert exc_info.value.value == "answer: 42" + + +class TestIsFinalAnswerCompletionLoop: + """Tests for IsFinalAnswer through the full completion loop.""" + + def test_final_answer_tool_skips_final_llm_call(self): + """When LLM calls a final-answer tool, result is returned + directly without a second call_assistant invocation.""" + + @Tool.define + def compute(x: int) -> Annotated[int, IsFinalAnswer]: + """Compute and return the result directly.""" + return x * 10 + + @Template.define + def task(n: int) -> int: + """Call compute with {n}.""" + raise NotHandled + + mock = MockCompletionHandler( + [make_tool_call_response("compute", '{"x": 7}')] + ) + + with handler(LiteLLMProvider()), handler(mock): + result = task(7) + + assert result == 70 + # Only 1 call_assistant, not 2 (no final LLM round-trip) + assert mock.call_count == 1 + + def test_final_answer_returns_raw_python_object(self): + """The returned value is the raw Python object, not serialized text.""" + + @dataclass + class MyResult: + value: int + label: str + + @Tool.define + def make_result() -> Annotated[MyResult, IsFinalAnswer]: + """Create a structured result.""" + return MyResult(value=42, label="answer") + + @Template.define + def task() -> MyResult: + """Call make_result.""" + raise NotHandled + + mock = MockCompletionHandler( + [make_tool_call_response("make_result", "{}")] + ) + + with handler(LiteLLMProvider()), handler(mock): + result = task() + + assert isinstance(result, MyResult) + assert result.value == 42 + assert result.label == "answer" + + def test_agent_history_valid_after_final_answer(self): + """Agent history has no orphaned tool_calls after IsFinalAnswer.""" + + @Tool.define + def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: + """Return final answer.""" + return x + + @dataclasses.dataclass + class MyAgent(Agent): + @Template.define + def do_work(self, n: int) -> int: + """Process {n}.""" + raise NotHandled + + mock = MockCompletionHandler( + [make_tool_call_response("final_tool", '{"x": 5}')] + ) + agent = MyAgent() + + with handler(LiteLLMProvider()), handler(mock): + result = agent.do_work(5) + + assert result == 5 + + # Verify no orphaned tool_calls in history + for msg in agent.__history__.values(): + tool_calls = msg.get("tool_calls") + if tool_calls: + for tc in tool_calls: + tc_id = tc["id"] if isinstance(tc, dict) else tc.id + has_response = any( + m.get("tool_call_id") == tc_id + for m in agent.__history__.values() + if m.get("role") == "tool" + ) + assert has_response, ( + f"Orphaned tool_call {tc_id} in history" + ) + + def test_agent_subsequent_call_after_final_answer(self): + """A follow-up call on the same Agent works after IsFinalAnswer.""" + + @Tool.define + def final_tool() -> Annotated[str, IsFinalAnswer]: + """Return final answer.""" + return "direct result" + + @dataclasses.dataclass + class MyAgent(Agent): + @Template.define + def step(self, msg: str) -> str: + """Do: {msg}""" + raise NotHandled + + call_count = 0 + + class PhaseHandler(ObjectInterpretation): + @implements(completion) + def _completion(self, model, messages=None, **kwargs): + nonlocal call_count + call_count += 1 + if call_count == 1: + return make_tool_call_response("final_tool", "{}") + return make_text_response('{"value": "llm result"}') + + agent = MyAgent() + + with handler(LiteLLMProvider()), handler(PhaseHandler()): + r1 = agent.step("first") + r2 = agent.step("second") + + assert r1 == "direct result" + assert r2 == "llm result" + + def test_final_answer_with_retry_handler_active(self): + """IsFinalAnswer works correctly with RetryLLMHandler.""" + + @Tool.define + def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: + """Return final answer.""" + return x * 3 + + @Template.define + def task(n: int) -> int: + """Call final_tool with {n}.""" + raise NotHandled + + mock = MockCompletionHandler( + [make_tool_call_response("final_tool", '{"x": 4}')] + ) + + with ( + handler(LiteLLMProvider()), + handler(RetryLLMHandler()), + handler(mock), + ): + result = task(4) + + assert result == 12 + assert mock.call_count == 1 From 6495fc20affcb004c82cf43f22ed7397f8ca1b7c Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 19:31:22 -0500 Subject: [PATCH 02/17] stash --- effectful/handlers/llm/completions.py | 97 ++++++++++----------------- tests/test_handlers_llm_provider.py | 14 ++-- tests/test_handlers_llm_template.py | 49 ++++++-------- 3 files changed, 65 insertions(+), 95 deletions(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 470ecbfad..236d65e49 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -147,27 +147,12 @@ def to_feedback_message(self, include_traceback: bool) -> Message: ) -class DirectReturn[T](BaseException): - """Raised internally to short-circuit the completion loop when a tool - annotated with :class:`~effectful.handlers.llm.template.IsFinalAnswer` - produces a result. - - Extends :class:`BaseException` so it is not caught by handlers that - catch :class:`Exception` (e.g. ``call_tool``'s wrapping in - :class:`ToolCallExecutionError`, or :class:`RetryLLMHandler`). - """ - - value: T - - def __init__(self, value: T): - self.value = value - super().__init__(value) - - -class DecodedToolCall[T](typing.NamedTuple): +@dataclasses.dataclass +class DecodedToolCall[T]: tool: Tool[..., T] bound_args: inspect.BoundArguments id: ToolCallID + is_final: bool = False type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None] @@ -248,7 +233,11 @@ def decode_tool_call( tool_name, tool_call.id, e, raw_message=raw_message ) from e - return DecodedToolCall(tool, bound_sig, tool_call.id) + from effectful.handlers.llm.template import _is_final_answer_tool + + return DecodedToolCall( + tool, bound_sig, tool_call.id, is_final=_is_final_answer_tool(tool) + ) @Operation.define @@ -312,6 +301,18 @@ def call_assistant[T, U]( decoded_tool_call = decode_tool_call(validated_tool_call, tools, raw_message) tool_calls.append(decoded_tool_call) + if any(tc.is_final for tc in tool_calls) and len(tool_calls) > 1: + final_name = next(tc.tool.__name__ for tc in tool_calls if tc.is_final) + raise ToolCallDecodingError( + final_name, + next(tc.id for tc in tool_calls if tc.is_final), + ValueError( + f"IsFinalAnswer tool '{final_name}' must be the only tool call " + f"in a round, but {len(tool_calls)} tool calls were generated." + ), + raw_message=raw_message, + ) + result = None if not tool_calls: # return response @@ -329,18 +330,13 @@ def call_assistant[T, U]( @Operation.define -def call_tool(tool_call: DecodedToolCall) -> Message: - """Implements a roundtrip call to a python function. Input is a json - string representing an LLM tool call request parameters. The output is - the serialised response to the model. - - If the tool is annotated with - :class:`~effectful.handlers.llm.template.IsFinalAnswer`, a - :class:`DirectReturn` exception is raised carrying the raw Python - result, which short-circuits the completion loop. - """ - from effectful.handlers.llm.template import _is_final_answer_tool +def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T]: + """Execute a tool and return the serialised message and the raw Python result. + The message is appended to the conversation history. The raw result is + returned alongside so that callers (e.g. the completion loop) can use it + directly when the tool is marked ``is_final``. + """ # call tool with python types try: result = tool_call.tool( @@ -358,11 +354,7 @@ def call_tool(tool_call: DecodedToolCall) -> Message: dict(role="tool", content=encoded_result, tool_call_id=tool_call.id), ) append_message(message) - - if _is_final_answer_tool(tool_call.tool): - raise DirectReturn(result) - - return message + return message, result @Operation.define @@ -524,7 +516,7 @@ def _attempt() -> MessageResult[T]: return (message, tool_calls, result) @implements(call_tool) - def _call_tool(self, tool_call: DecodedToolCall) -> Message: + def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None]: """Handle tool execution with runtime error capture. Runtime errors from tool execution are captured and returned as @@ -537,7 +529,7 @@ def _call_tool(self, tool_call: DecodedToolCall) -> Message: if isinstance(e.original_error, self.catch_tool_errors): message = e.to_feedback_message(self.include_traceback) append_message(message) - return message + return message, None else: raise @@ -578,34 +570,19 @@ def _call[**P, T]( # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls tool_calls: list[DecodedToolCall] = [] result: T | None = None + is_final = False while message["role"] != "assistant" or tool_calls: message, tool_calls, result = call_assistant( template.tools, response_model, **self.config ) - for i, tool_call in enumerate(tool_calls): - try: - message = call_tool(tool_call) - except DirectReturn as dr: - result = typing.cast(T, dr.value) - # Placeholder messages for remaining unprocessed - # tool calls to keep history valid for Agents. - for remaining_tc in tool_calls[i + 1 :]: - append_message( - _make_message( - dict( - role="tool", - content=[ - { - "type": "text", - "text": "[skipped]", - } - ], - tool_call_id=remaining_tc.id, - ) - ) - ) - tool_calls = [] + for tool_call in tool_calls: + message, raw_result = call_tool(tool_call) + if tool_call.is_final: + result = typing.cast(T, raw_result) + is_final = True break + if is_final: + break try: _get_history() diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py index db0df23c9..b79ba8e02 100644 --- a/tests/test_handlers_llm_provider.py +++ b/tests/test_handlers_llm_provider.py @@ -970,7 +970,7 @@ def test_retry_handler_catches_tool_runtime_error(self): tool_call = DecodedToolCall(failing_tool, bound_args, "call_1") with handler(RetryLLMHandler()): - result = call_tool(tool_call) + result, _ = call_tool(tool_call) # The result should be an error message, not an exception assert result["role"] == "tool" @@ -987,7 +987,7 @@ def test_retry_handler_catches_division_by_zero(self): tool_call = DecodedToolCall(divide_tool, bound_args, "call_div") with handler(RetryLLMHandler()): - result = call_tool(tool_call) + result, _ = call_tool(tool_call) assert result["role"] == "tool" assert result["tool_call_id"] == "call_div" @@ -1002,7 +1002,7 @@ def test_successful_tool_execution_returns_result(self): tool_call = DecodedToolCall(add_numbers, bound_args, "call_add") with handler(RetryLLMHandler()): - result = call_tool(tool_call) + result, _ = call_tool(tool_call) assert result["role"] == "tool" assert result["tool_call_id"] == "call_add" @@ -1553,7 +1553,7 @@ def test_call_tool_success_does_not_raise(self): bound_args = sig.bind(a=3, b=4) tc = DecodedToolCall(add_numbers, bound_args, "call_ok") - result = call_tool(tc) + result, _ = call_tool(tc) assert result["role"] == "tool" assert result["tool_call_id"] == "call_ok" @@ -1568,7 +1568,7 @@ def test_matching_error_returns_feedback_message(self): tc = DecodedToolCall(flaky_tool, bound_args, "call_match") with handler(RetryLLMHandler(catch_tool_errors=ConnectionError)): - result = call_tool(tc) + result, _ = call_tool(tc) assert result["role"] == "tool" assert result["tool_call_id"] == "call_match" @@ -1595,7 +1595,7 @@ def test_default_catch_all_catches_everything(self): tc = DecodedToolCall(type_error_tool, bound_args, "call_default") with handler(RetryLLMHandler()): - result = call_tool(tc) + result, _ = call_tool(tc) assert result["role"] == "tool" assert "Tool execution failed" in result["content"] @@ -1611,7 +1611,7 @@ def test_tuple_of_error_types(self): catch_tool_errors=(ConnectionError, ValueError), ) ): - result = call_tool(tc) + result, _ = call_tool(tc) assert result["role"] == "tool" assert "Tool execution failed" in result["content"] diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index 181b3abf1..283f32029 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -12,7 +12,6 @@ from effectful.handlers.llm import Agent, IsFinalAnswer, Template, Tool from effectful.handlers.llm.completions import ( DecodedToolCall, - DirectReturn, LiteLLMProvider, RetryLLMHandler, call_tool, @@ -1383,8 +1382,8 @@ def combo_tool(x: int) -> Annotated[str, IsFinalAnswer, IsRecursive]: class TestIsFinalAnswerCallTool: """Tests for call_tool behavior with IsFinalAnswer tools.""" - def test_call_tool_raises_direct_return_for_final_answer_tool(self): - """call_tool raises DirectReturn when tool has IsFinalAnswer.""" + def test_call_tool_returns_raw_result_for_final_answer_tool(self): + """call_tool returns the raw Python result alongside the message.""" @Tool.define def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: @@ -1393,15 +1392,14 @@ def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: sig = inspect.signature(final_tool) bound_args = sig.bind(x=5) - tc = DecodedToolCall(final_tool, bound_args, "call_final") + tc = DecodedToolCall(final_tool, bound_args, "call_final", is_final=True) - with pytest.raises(DirectReturn) as exc_info: - call_tool(tc) + message, raw_result = call_tool(tc) + assert message["role"] == "tool" + assert raw_result == 10 - assert exc_info.value.value == 10 - - def test_call_tool_normal_for_non_final_answer_tool(self): - """call_tool returns a Message normally for non-IsFinalAnswer tools.""" + def test_call_tool_returns_raw_result_for_normal_tool(self): + """call_tool returns the raw Python result for all tools.""" @Tool.define def normal_tool(x: int) -> int: @@ -1412,12 +1410,13 @@ def normal_tool(x: int) -> int: bound_args = sig.bind(x=3) tc = DecodedToolCall(normal_tool, bound_args, "call_normal") - result = call_tool(tc) - assert result["role"] == "tool" - assert result["tool_call_id"] == "call_normal" + message, raw_result = call_tool(tc) + assert message["role"] == "tool" + assert message["tool_call_id"] == "call_normal" + assert raw_result == 4 def test_call_tool_final_answer_with_retry_handler(self): - """DirectReturn propagates through RetryLLMHandler._call_tool.""" + """call_tool works with RetryLLMHandler for IsFinalAnswer tools.""" @Tool.define def final_tool(x: int) -> Annotated[str, IsFinalAnswer]: @@ -1426,13 +1425,13 @@ def final_tool(x: int) -> Annotated[str, IsFinalAnswer]: sig = inspect.signature(final_tool) bound_args = sig.bind(x=42) - tc = DecodedToolCall(final_tool, bound_args, "call_retry_final") + tc = DecodedToolCall(final_tool, bound_args, "call_retry_final", is_final=True) - with pytest.raises(DirectReturn) as exc_info: - with handler(RetryLLMHandler()): - call_tool(tc) + with handler(RetryLLMHandler()): + message, raw_result = call_tool(tc) - assert exc_info.value.value == "answer: 42" + assert message["role"] == "tool" + assert raw_result == "answer: 42" class TestIsFinalAnswerCompletionLoop: @@ -1452,9 +1451,7 @@ def task(n: int) -> int: """Call compute with {n}.""" raise NotHandled - mock = MockCompletionHandler( - [make_tool_call_response("compute", '{"x": 7}')] - ) + mock = MockCompletionHandler([make_tool_call_response("compute", '{"x": 7}')]) with handler(LiteLLMProvider()), handler(mock): result = task(7) @@ -1481,9 +1478,7 @@ def task() -> MyResult: """Call make_result.""" raise NotHandled - mock = MockCompletionHandler( - [make_tool_call_response("make_result", "{}")] - ) + mock = MockCompletionHandler([make_tool_call_response("make_result", "{}")]) with handler(LiteLLMProvider()), handler(mock): result = task() @@ -1528,9 +1523,7 @@ def do_work(self, n: int) -> int: for m in agent.__history__.values() if m.get("role") == "tool" ) - assert has_response, ( - f"Orphaned tool_call {tc_id} in history" - ) + assert has_response, f"Orphaned tool_call {tc_id} in history" def test_agent_subsequent_call_after_final_answer(self): """A follow-up call on the same Agent works after IsFinalAnswer.""" From 0005c1ce6f075d25e1e8f71a1bffd037f712be07 Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 20:15:10 -0500 Subject: [PATCH 03/17] stash --- effectful/handlers/llm/completions.py | 40 +++++++++------- tests/test_handlers_llm_provider.py | 30 ++++++------ tests/test_handlers_llm_template.py | 68 +++++++++++++++++++++++++-- 3 files changed, 102 insertions(+), 36 deletions(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 236d65e49..1ac0e9e20 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -155,7 +155,7 @@ class DecodedToolCall[T]: is_final: bool = False -type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None] +type MessageResult[T] = tuple[Message, typing.Sequence[DecodedToolCall], T | None, bool] @functools.cache @@ -326,16 +326,19 @@ def call_assistant[T, U]( except (pydantic.ValidationError, TypeError, ValueError, SyntaxError) as e: raise ResultDecodingError(e, raw_message=raw_message) from e - return (raw_message, tool_calls, result) + is_final = any(tc.is_final for tc in tool_calls) + return (raw_message, tool_calls, result, is_final) @Operation.define -def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T]: - """Execute a tool and return the serialised message and the raw Python result. - - The message is appended to the conversation history. The raw result is - returned alongside so that callers (e.g. the completion loop) can use it - directly when the tool is marked ``is_final``. +def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool]: + """Execute a tool and return the serialised message, the raw result, and + whether this result is a final answer. + + Returns: + A 3-tuple ``(message, result, is_final)``. ``message`` is appended + to the conversation history. When ``is_final`` is ``True`` the + completion loop uses ``result`` directly as the template return value. """ # call tool with python types try: @@ -354,7 +357,7 @@ def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T]: dict(role="tool", content=encoded_result, tool_call_id=tool_call.id), ) append_message(message) - return message, result + return message, result, tool_call.is_final @Operation.define @@ -510,18 +513,20 @@ def _attempt() -> MessageResult[T]: return fwd(tools, response_format, model, **kwargs) with handler({_get_history: lambda: _message_sequence}): - message, tool_calls, result = self.call_assistant_retryer(_attempt) + message, tool_calls, result, is_final = self.call_assistant_retryer(_attempt) append_message(message) - return (message, tool_calls, result) + return (message, tool_calls, result, is_final) @implements(call_tool) - def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None]: + def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool]: """Handle tool execution with runtime error capture. Runtime errors from tool execution are captured and returned as error messages to the LLM. Only exceptions matching `catch_tool_errors` - are caught; others propagate up. + are caught; others propagate up. When an error is caught, + ``is_final`` is always ``False`` so the error feedback goes back + to the LLM rather than being mistaken for a final answer. """ try: return fwd(tool_call) @@ -529,7 +534,7 @@ def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | Non if isinstance(e.original_error, self.catch_tool_errors): message = e.to_feedback_message(self.include_traceback) append_message(message) - return message, None + return message, None, False else: raise @@ -572,14 +577,13 @@ def _call[**P, T]( result: T | None = None is_final = False while message["role"] != "assistant" or tool_calls: - message, tool_calls, result = call_assistant( + message, tool_calls, result, is_final = call_assistant( template.tools, response_model, **self.config ) for tool_call in tool_calls: - message, raw_result = call_tool(tool_call) - if tool_call.is_final: + message, raw_result, is_final = call_tool(tool_call) + if is_final: result = typing.cast(T, raw_result) - is_final = True break if is_final: break diff --git a/tests/test_handlers_llm_provider.py b/tests/test_handlers_llm_provider.py index b79ba8e02..ab250c3e7 100644 --- a/tests/test_handlers_llm_provider.py +++ b/tests/test_handlers_llm_provider.py @@ -506,7 +506,7 @@ def test_retry_handler_succeeds_on_first_attempt(self): handler(mock_handler), handler(message_sequence_provider), ): - message, tool_calls, result = call_assistant( + message, tool_calls, result, _ = call_assistant( tools={}, response_format=Encodable.define(str), model="test-model", @@ -536,7 +536,7 @@ def test_retry_handler_retries_on_invalid_tool_call(self): handler(mock_handler), handler(message_sequence_provider), ): - message, tool_calls, result = call_assistant( + message, tool_calls, result, _ = call_assistant( tools={"add_numbers": add_numbers}, response_format=Encodable.define(str), model="test-model", @@ -568,7 +568,7 @@ def test_retry_handler_retries_on_unknown_tool(self): handler(mock_handler), handler(message_sequence_provider), ): - message, tool_calls, result = call_assistant( + message, tool_calls, result, _ = call_assistant( tools={"add_numbers": add_numbers}, response_format=Encodable.define(str), model="test-model", @@ -645,7 +645,7 @@ def test_retry_handler_valid_tool_call_passes_through(self): handler(mock_handler), handler(message_sequence_provider), ): - message, tool_calls, result = call_assistant( + message, tool_calls, result, _ = call_assistant( tools={"add_numbers": add_numbers}, response_format=Encodable.define(str), model="test-model", @@ -720,7 +720,7 @@ def test_retry_handler_retries_on_invalid_result(self): handler(mock_handler), handler(message_sequence_provider), ): - message, tool_calls, result = call_assistant( + message, tool_calls, result, _ = call_assistant( tools={}, response_format=Encodable.define(int), model="test-model", @@ -970,7 +970,7 @@ def test_retry_handler_catches_tool_runtime_error(self): tool_call = DecodedToolCall(failing_tool, bound_args, "call_1") with handler(RetryLLMHandler()): - result, _ = call_tool(tool_call) + result, _, _ = call_tool(tool_call) # The result should be an error message, not an exception assert result["role"] == "tool" @@ -987,7 +987,7 @@ def test_retry_handler_catches_division_by_zero(self): tool_call = DecodedToolCall(divide_tool, bound_args, "call_div") with handler(RetryLLMHandler()): - result, _ = call_tool(tool_call) + result, _, _ = call_tool(tool_call) assert result["role"] == "tool" assert result["tool_call_id"] == "call_div" @@ -1002,7 +1002,7 @@ def test_successful_tool_execution_returns_result(self): tool_call = DecodedToolCall(add_numbers, bound_args, "call_add") with handler(RetryLLMHandler()): - result, _ = call_tool(tool_call) + result, _, _ = call_tool(tool_call) assert result["role"] == "tool" assert result["tool_call_id"] == "call_add" @@ -1039,7 +1039,7 @@ def _call_assistant(self, tools, response_format, model, **kwargs): handler(mock_handler), handler(message_sequence_provider), ): - message, tool_calls, result = call_assistant( + message, tool_calls, result, _ = call_assistant( tools={"failing_tool": failing_tool}, response_format=Encodable.define(str), model="test-model", @@ -1382,13 +1382,13 @@ def _completion(self_, model, messages, *args, **kwargs): handler({_get_history: lambda: message_sequence}), ): # First call: input is the latest message (msg_user) - resp1, _, _ = call_assistant( + resp1, _, _, _ = call_assistant( tools={}, response_format=Encodable.define(str), model="test-model", ) # Second call: input is the first response - resp2, _, _ = call_assistant( + resp2, _, _, _ = call_assistant( tools={}, response_format=Encodable.define(str), model="test-model", @@ -1553,7 +1553,7 @@ def test_call_tool_success_does_not_raise(self): bound_args = sig.bind(a=3, b=4) tc = DecodedToolCall(add_numbers, bound_args, "call_ok") - result, _ = call_tool(tc) + result, _, _ = call_tool(tc) assert result["role"] == "tool" assert result["tool_call_id"] == "call_ok" @@ -1568,7 +1568,7 @@ def test_matching_error_returns_feedback_message(self): tc = DecodedToolCall(flaky_tool, bound_args, "call_match") with handler(RetryLLMHandler(catch_tool_errors=ConnectionError)): - result, _ = call_tool(tc) + result, _, _ = call_tool(tc) assert result["role"] == "tool" assert result["tool_call_id"] == "call_match" @@ -1595,7 +1595,7 @@ def test_default_catch_all_catches_everything(self): tc = DecodedToolCall(type_error_tool, bound_args, "call_default") with handler(RetryLLMHandler()): - result, _ = call_tool(tc) + result, _, _ = call_tool(tc) assert result["role"] == "tool" assert "Tool execution failed" in result["content"] @@ -1611,7 +1611,7 @@ def test_tuple_of_error_types(self): catch_tool_errors=(ConnectionError, ValueError), ) ): - result, _ = call_tool(tc) + result, _, _ = call_tool(tc) assert result["role"] == "tool" assert "Tool execution failed" in result["content"] diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index 283f32029..e06c15335 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -1394,9 +1394,10 @@ def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: bound_args = sig.bind(x=5) tc = DecodedToolCall(final_tool, bound_args, "call_final", is_final=True) - message, raw_result = call_tool(tc) + message, raw_result, is_final = call_tool(tc) assert message["role"] == "tool" assert raw_result == 10 + assert is_final is True def test_call_tool_returns_raw_result_for_normal_tool(self): """call_tool returns the raw Python result for all tools.""" @@ -1410,10 +1411,11 @@ def normal_tool(x: int) -> int: bound_args = sig.bind(x=3) tc = DecodedToolCall(normal_tool, bound_args, "call_normal") - message, raw_result = call_tool(tc) + message, raw_result, is_final = call_tool(tc) assert message["role"] == "tool" assert message["tool_call_id"] == "call_normal" assert raw_result == 4 + assert is_final is False def test_call_tool_final_answer_with_retry_handler(self): """call_tool works with RetryLLMHandler for IsFinalAnswer tools.""" @@ -1428,10 +1430,11 @@ def final_tool(x: int) -> Annotated[str, IsFinalAnswer]: tc = DecodedToolCall(final_tool, bound_args, "call_retry_final", is_final=True) with handler(RetryLLMHandler()): - message, raw_result = call_tool(tc) + message, raw_result, is_final = call_tool(tc) assert message["role"] == "tool" assert raw_result == "answer: 42" + assert is_final is True class TestIsFinalAnswerCompletionLoop: @@ -1586,3 +1589,62 @@ def task(n: int) -> int: assert result == 12 assert mock.call_count == 1 + + def test_retry_handler_error_on_final_tool_does_not_produce_final_answer(self): + """When RetryLLMHandler catches an error on an is_final tool, + the error feedback goes back to the LLM instead of None being + returned as the final answer.""" + call_count = 0 + + @Tool.define + def flaky_final(x: int) -> Annotated[int, IsFinalAnswer]: + """Return a final answer, but fail on first call.""" + nonlocal call_count + call_count += 1 + if call_count == 1: + raise ValueError("transient failure") + return x * 10 + + @Template.define + def task(n: int) -> int: + """Call flaky_final with {n}.""" + raise NotHandled + + # Round 1: LLM calls flaky_final → error caught by RetryLLMHandler + # Round 2: LLM calls flaky_final again → succeeds + mock = MockCompletionHandler([ + make_tool_call_response("flaky_final", '{"x": 5}'), + make_tool_call_response("flaky_final", '{"x": 5}'), + ]) + + with ( + handler(LiteLLMProvider()), + handler(RetryLLMHandler()), + handler(mock), + ): + result = task(5) + + assert result == 50 # NOT None + assert call_count == 2 + assert mock.call_count == 2 + + def test_call_tool_returns_is_final_false_on_retry_handler_error(self): + """call_tool returns is_final=False when RetryLLMHandler catches + an error on an is_final tool.""" + + @Tool.define + def failing_final(x: int) -> Annotated[int, IsFinalAnswer]: + """Return a final answer.""" + raise ValueError("boom") + + sig = inspect.signature(failing_final) + bound_args = sig.bind(x=1) + tc = DecodedToolCall(failing_final, bound_args, "call_err", is_final=True) + + with handler(RetryLLMHandler()): + message, raw_result, is_final = call_tool(tc) + + assert message["role"] == "tool" + assert "Tool execution failed" in message["content"] + assert raw_result is None + assert is_final is False From d46da71b2c75a3d282f74cc2601d9cde920bdab0 Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 20:20:34 -0500 Subject: [PATCH 04/17] interaction with retry --- effectful/handlers/llm/completions.py | 48 +++++++++++----- pyproject.toml | 1 + tests/test_handlers_llm_template.py | 82 +++++++++++++++++++++++++-- 3 files changed, 114 insertions(+), 17 deletions(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 1ac0e9e20..4f1cbfa7d 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -301,17 +301,35 @@ def call_assistant[T, U]( decoded_tool_call = decode_tool_call(validated_tool_call, tools, raw_message) tool_calls.append(decoded_tool_call) - if any(tc.is_final for tc in tool_calls) and len(tool_calls) > 1: - final_name = next(tc.tool.__name__ for tc in tool_calls if tc.is_final) - raise ToolCallDecodingError( - final_name, - next(tc.id for tc in tool_calls if tc.is_final), - ValueError( - f"IsFinalAnswer tool '{final_name}' must be the only tool call " - f"in a round, but {len(tool_calls)} tool calls were generated." - ), - raw_message=raw_message, - ) + final_tcs = [tc for tc in tool_calls if tc.is_final] + if final_tcs: + final_tc = final_tcs[0] + if len(tool_calls) > 1: + raise ToolCallDecodingError( + final_tc.tool.__name__, + final_tc.id, + ValueError( + f"IsFinalAnswer tool '{final_tc.tool.__name__}' must be the " + f"only tool call in a round, but {len(tool_calls)} tool calls " + f"were generated." + ), + raw_message=raw_message, + ) + # Validate that the tool's return type matches the template's. + tool_ret = inspect.signature(final_tc.tool).return_annotation + if typing.get_origin(tool_ret) is typing.Annotated: + tool_ret = typing.get_args(tool_ret)[0] + if tool_ret != response_format.base: + raise ToolCallDecodingError( + final_tc.tool.__name__, + final_tc.id, + TypeError( + f"IsFinalAnswer tool '{final_tc.tool.__name__}' returns " + f"{tool_ret!r}, but the enclosing template expects " + f"{response_format.base!r}." + ), + raw_message=raw_message, + ) result = None if not tool_calls: @@ -513,13 +531,17 @@ def _attempt() -> MessageResult[T]: return fwd(tools, response_format, model, **kwargs) with handler({_get_history: lambda: _message_sequence}): - message, tool_calls, result, is_final = self.call_assistant_retryer(_attempt) + message, tool_calls, result, is_final = self.call_assistant_retryer( + _attempt + ) append_message(message) return (message, tool_calls, result, is_final) @implements(call_tool) - def _call_tool[T](self, tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool]: + def _call_tool[T]( + self, tool_call: DecodedToolCall[T] + ) -> tuple[Message, T | None, bool]: """Handle tool execution with runtime error capture. Runtime errors from tool execution are captured and returned as diff --git a/pyproject.toml b/pyproject.toml index cdf674f59..3b79ba4e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ test = [ "ruff", "nbval", "nbqa", + "pytest-timeout", ] [dependency-groups] diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index e06c15335..5b3b538db 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -14,10 +14,14 @@ DecodedToolCall, LiteLLMProvider, RetryLLMHandler, + ToolCallDecodingError, + _get_history, + call_assistant, call_tool, call_user, completion, ) +from effectful.handlers.llm.encoding import Encodable from effectful.handlers.llm.template import _is_final_answer_tool from effectful.ops.semantics import handler from effectful.ops.syntax import ObjectInterpretation, implements @@ -1612,10 +1616,12 @@ def task(n: int) -> int: # Round 1: LLM calls flaky_final → error caught by RetryLLMHandler # Round 2: LLM calls flaky_final again → succeeds - mock = MockCompletionHandler([ - make_tool_call_response("flaky_final", '{"x": 5}'), - make_tool_call_response("flaky_final", '{"x": 5}'), - ]) + mock = MockCompletionHandler( + [ + make_tool_call_response("flaky_final", '{"x": 5}'), + make_tool_call_response("flaky_final", '{"x": 5}'), + ] + ) with ( handler(LiteLLMProvider()), @@ -1648,3 +1654,71 @@ def failing_final(x: int) -> Annotated[int, IsFinalAnswer]: assert "Tool execution failed" in message["content"] assert raw_result is None assert is_final is False + + +class TestIsFinalAnswerReturnTypeValidation: + """call_assistant should reject IsFinalAnswer tools whose return type + does not match the enclosing template's return type.""" + + def test_mismatched_return_type_raises_tool_call_decoding_error(self): + """IsFinalAnswer tool returning str when template expects int is rejected.""" + + @Tool.define + def wrong_type_tool(x: int) -> Annotated[str, IsFinalAnswer]: + """Return a string, but template expects int.""" + return str(x) + + message_sequence = collections.OrderedDict( + id1={"id": "id1", "role": "user", "content": "test"}, + ) + + mock = MockCompletionHandler( + [ + make_tool_call_response("wrong_type_tool", '{"x": 5}'), + ] + ) + + with ( + handler(mock), + handler({_get_history: lambda: message_sequence}), + ): + with pytest.raises(ToolCallDecodingError) as exc_info: + call_assistant( + tools={"wrong_type_tool": wrong_type_tool}, + response_format=Encodable.define(int), + model="test-model", + ) + + assert isinstance(exc_info.value.original_error, TypeError) + assert "wrong_type_tool" in str(exc_info.value.original_error) + + def test_matching_return_type_passes_validation(self): + """IsFinalAnswer tool with matching return type is accepted.""" + + @Tool.define + def correct_tool(x: int) -> Annotated[int, IsFinalAnswer]: + """Return an int matching template.""" + return x * 2 + + message_sequence = collections.OrderedDict( + id1={"id": "id1", "role": "user", "content": "test"}, + ) + + mock = MockCompletionHandler( + [ + make_tool_call_response("correct_tool", '{"x": 5}'), + ] + ) + + with ( + handler(mock), + handler({_get_history: lambda: message_sequence}), + ): + _, tool_calls, _, is_final = call_assistant( + tools={"correct_tool": correct_tool}, + response_format=Encodable.define(int), + model="test-model", + ) + + assert len(tool_calls) == 1 + assert is_final is True From 38bd78d7799130b06dee85f6951d7f7b9454c59d Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 21:14:13 -0500 Subject: [PATCH 05/17] subclass --- effectful/handlers/llm/completions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 4f1cbfa7d..721398859 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -319,7 +319,7 @@ def call_assistant[T, U]( tool_ret = inspect.signature(final_tc.tool).return_annotation if typing.get_origin(tool_ret) is typing.Annotated: tool_ret = typing.get_args(tool_ret)[0] - if tool_ret != response_format.base: + if not issubclass(tool_ret, response_format.base): raise ToolCallDecodingError( final_tc.tool.__name__, final_tc.id, From f0e896038bff6e83adbae642e6dab841c57bdf33 Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 21:29:46 -0500 Subject: [PATCH 06/17] lint --- effectful/handlers/llm/completions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 721398859..15f223d84 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -360,16 +360,16 @@ def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool """ # call tool with python types try: - result = tool_call.tool( + result: T = tool_call.tool( *tool_call.bound_args.args, **tool_call.bound_args.kwargs ) except Exception as e: raise ToolCallExecutionError(tool_call.tool.__name__, tool_call.id, e) from e # serialize back to U using encoder for return type - return_type = Encodable.define( + return_type: Encodable[T, typing.Any] = Encodable.define( typing.cast(type[typing.Any], nested_type(result).value) - ) + ) # type: ignore encoded_result = return_type.serialize(return_type.encode(result)) message = _make_message( dict(role="tool", content=encoded_result, tool_call_id=tool_call.id), From 71e869c308eca66bdf504c868281c950fb431beb Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 21:34:46 -0500 Subject: [PATCH 07/17] is_final loop variable --- effectful/handlers/llm/completions.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 15f223d84..a69034b25 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -344,7 +344,7 @@ def call_assistant[T, U]( except (pydantic.ValidationError, TypeError, ValueError, SyntaxError) as e: raise ResultDecodingError(e, raw_message=raw_message) from e - is_final = any(tc.is_final for tc in tool_calls) + is_final = not all(not tc.is_final for tc in tool_calls) return (raw_message, tool_calls, result, is_final) @@ -597,18 +597,13 @@ def _call[**P, T]( # loop based on: https://cookbook.openai.com/examples/reasoning_function_calls tool_calls: list[DecodedToolCall] = [] result: T | None = None - is_final = False - while message["role"] != "assistant" or tool_calls: + is_final: bool = False + while not is_final: message, tool_calls, result, is_final = call_assistant( template.tools, response_model, **self.config ) for tool_call in tool_calls: - message, raw_result, is_final = call_tool(tool_call) - if is_final: - result = typing.cast(T, raw_result) - break - if is_final: - break + message, result, is_final = call_tool(tool_call) try: _get_history() From 97fd4ab8c8acb07c4acb6764896ce0f87a8ac76a Mon Sep 17 00:00:00 2001 From: Eli Date: Sun, 15 Feb 2026 22:06:18 -0500 Subject: [PATCH 08/17] rename --- effectful/handlers/llm/__init__.py | 4 +- effectful/handlers/llm/completions.py | 4 +- effectful/handlers/llm/template.py | 20 ++++---- tests/test_handlers_llm_template.py | 70 +++++++++++++-------------- 4 files changed, 48 insertions(+), 50 deletions(-) diff --git a/effectful/handlers/llm/__init__.py b/effectful/handlers/llm/__init__.py index 3398c3160..cdda93479 100644 --- a/effectful/handlers/llm/__init__.py +++ b/effectful/handlers/llm/__init__.py @@ -1,3 +1,3 @@ -from .template import Agent, IsFinalAnswer, Template, Tool +from .template import Agent, Template, Tool -__all__ = ["Agent", "IsFinalAnswer", "Template", "Tool"] +__all__ = ["Agent", "Template", "Tool"] diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index a69034b25..cf367cb5e 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -309,7 +309,7 @@ def call_assistant[T, U]( final_tc.tool.__name__, final_tc.id, ValueError( - f"IsFinalAnswer tool '{final_tc.tool.__name__}' must be the " + f"IsFinal tool '{final_tc.tool.__name__}' must be the " f"only tool call in a round, but {len(tool_calls)} tool calls " f"were generated." ), @@ -324,7 +324,7 @@ def call_assistant[T, U]( final_tc.tool.__name__, final_tc.id, TypeError( - f"IsFinalAnswer tool '{final_tc.tool.__name__}' returns " + f"IsFinal tool '{final_tc.tool.__name__}' returns " f"{tool_ret!r}, but the enclosing template expects " f"{response_format.base!r}." ), diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py index 992a02522..f5d84ec32 100644 --- a/effectful/handlers/llm/template.py +++ b/effectful/handlers/llm/template.py @@ -63,7 +63,7 @@ def _is_recursive_signature(sig: inspect.Signature): return any(annotation is IsRecursive for annotation in annotations) -class _IsFinalAnswerAnnotation(Annotation): +class _IsFinalAnnotation(Annotation): """ A special type annotation for return types in the signature of a :class:`Tool` that indicates its result should be returned directly @@ -72,7 +72,7 @@ class _IsFinalAnswerAnnotation(Annotation): .. warning:: - :class:`IsFinalAnswer` annotations are only defined to ascribe + :class:`IsFinal` annotations are only defined to ascribe return annotations, and if used in a parameter will raise a :class:`TypeError` at tool construction time. @@ -80,10 +80,10 @@ class _IsFinalAnswerAnnotation(Annotation): >>> from typing import Annotated >>> from effectful.handlers.llm import Tool - >>> from effectful.handlers.llm.template import IsFinalAnswer + >>> from effectful.handlers.llm.template import IsFinal >>> @Tool.define - ... def generate(prompt: str) -> Annotated[str, IsFinalAnswer]: + ... def generate(prompt: str) -> Annotated[str, IsFinal]: ... \"""Generate content for the given prompt.\""" ... return "generated content" """ @@ -97,22 +97,20 @@ def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature: if any(isinstance(arg, cls) for arg in typing.get_args(ty)): raise TypeError( f"Illegal annotation {ty} for parameter {name}, " - "IsFinalAnswer must only be used to annotate return types." + "IsFinal must only be used to annotate return types." ) return sig -IsFinalAnswer = _IsFinalAnswerAnnotation() +IsFinal = _IsFinalAnnotation() def _is_final_answer_tool(tool: Any) -> bool: - """Check if a tool's return type is annotated with IsFinalAnswer.""" + """Check if a tool's return type is annotated with IsFinal.""" ret = tool.__signature__.return_annotation if typing.get_origin(ret) is not Annotated: return False - return any( - isinstance(arg, _IsFinalAnswerAnnotation) for arg in typing.get_args(ret) - ) + return any(isinstance(arg, _IsFinalAnnotation) for arg in typing.get_args(ret)) class Tool[**P, T](Operation[P, T]): @@ -149,7 +147,7 @@ def __init__( if not default.__doc__: raise ValueError("Tools must have docstrings.") signature = IsRecursive.infer_annotations(signature) - signature = IsFinalAnswer.infer_annotations(signature) + signature = IsFinal.infer_annotations(signature) super().__init__(signature, name, default) @classmethod diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index 5b3b538db..9a516aae0 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -9,7 +9,7 @@ import pytest from litellm import ModelResponse -from effectful.handlers.llm import Agent, IsFinalAnswer, Template, Tool +from effectful.handlers.llm import Agent, Template, Tool from effectful.handlers.llm.completions import ( DecodedToolCall, LiteLLMProvider, @@ -22,7 +22,7 @@ completion, ) from effectful.handlers.llm.encoding import Encodable -from effectful.handlers.llm.template import _is_final_answer_tool +from effectful.handlers.llm.template import IsFinal, _is_final_answer_tool from effectful.ops.semantics import handler from effectful.ops.syntax import ObjectInterpretation, implements from effectful.ops.types import NotHandled @@ -1335,18 +1335,18 @@ def bad(x: int) -> str: # --------------------------------------------------------------------------- -# IsFinalAnswer annotation tests +# IsFinal annotation tests # --------------------------------------------------------------------------- -class TestIsFinalAnswerAnnotation: - """Tests for the IsFinalAnswer type annotation.""" +class TestIsFinalAnnotation: + """Tests for the IsFinal type annotation.""" def test_tool_with_is_final_answer_return_type(self): - """Tool with IsFinalAnswer on return type creates successfully.""" + """Tool with IsFinal on return type creates successfully.""" @Tool.define - def my_tool(x: int) -> Annotated[str, IsFinalAnswer]: + def my_tool(x: int) -> Annotated[str, IsFinal]: """A tool that returns a final answer.""" return str(x) @@ -1363,34 +1363,34 @@ def normal_tool(x: int) -> str: assert not _is_final_answer_tool(normal_tool) def test_is_final_answer_on_parameter_raises(self): - """IsFinalAnswer on a parameter raises TypeError at define time.""" - with pytest.raises(TypeError, match="IsFinalAnswer"): + """IsFinal on a parameter raises TypeError at define time.""" + with pytest.raises(TypeError, match="IsFinal"): @Tool.define - def bad_tool(x: Annotated[int, IsFinalAnswer]) -> str: + def bad_tool(x: Annotated[int, IsFinal]) -> str: """A tool with bad annotation.""" return str(x) def test_is_final_answer_combined_with_is_recursive(self): - """IsFinalAnswer and IsRecursive can coexist on a return type.""" + """IsFinal and IsRecursive can coexist on a return type.""" from effectful.handlers.llm.template import IsRecursive @Tool.define - def combo_tool(x: int) -> Annotated[str, IsFinalAnswer, IsRecursive]: + def combo_tool(x: int) -> Annotated[str, IsFinal, IsRecursive]: """A tool with both annotations.""" return str(x) assert _is_final_answer_tool(combo_tool) -class TestIsFinalAnswerCallTool: - """Tests for call_tool behavior with IsFinalAnswer tools.""" +class TestIsFinalCallTool: + """Tests for call_tool behavior with IsFinal tools.""" def test_call_tool_returns_raw_result_for_final_answer_tool(self): """call_tool returns the raw Python result alongside the message.""" @Tool.define - def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: + def final_tool(x: int) -> Annotated[int, IsFinal]: """Returns a final answer.""" return x * 2 @@ -1422,10 +1422,10 @@ def normal_tool(x: int) -> int: assert is_final is False def test_call_tool_final_answer_with_retry_handler(self): - """call_tool works with RetryLLMHandler for IsFinalAnswer tools.""" + """call_tool works with RetryLLMHandler for IsFinal tools.""" @Tool.define - def final_tool(x: int) -> Annotated[str, IsFinalAnswer]: + def final_tool(x: int) -> Annotated[str, IsFinal]: """Returns a final answer.""" return f"answer: {x}" @@ -1441,15 +1441,15 @@ def final_tool(x: int) -> Annotated[str, IsFinalAnswer]: assert is_final is True -class TestIsFinalAnswerCompletionLoop: - """Tests for IsFinalAnswer through the full completion loop.""" +class TestIsFinalCompletionLoop: + """Tests for IsFinal through the full completion loop.""" def test_final_answer_tool_skips_final_llm_call(self): """When LLM calls a final-answer tool, result is returned directly without a second call_assistant invocation.""" @Tool.define - def compute(x: int) -> Annotated[int, IsFinalAnswer]: + def compute(x: int) -> Annotated[int, IsFinal]: """Compute and return the result directly.""" return x * 10 @@ -1476,7 +1476,7 @@ class MyResult: label: str @Tool.define - def make_result() -> Annotated[MyResult, IsFinalAnswer]: + def make_result() -> Annotated[MyResult, IsFinal]: """Create a structured result.""" return MyResult(value=42, label="answer") @@ -1495,10 +1495,10 @@ def task() -> MyResult: assert result.label == "answer" def test_agent_history_valid_after_final_answer(self): - """Agent history has no orphaned tool_calls after IsFinalAnswer.""" + """Agent history has no orphaned tool_calls after IsFinal.""" @Tool.define - def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: + def final_tool(x: int) -> Annotated[int, IsFinal]: """Return final answer.""" return x @@ -1533,10 +1533,10 @@ def do_work(self, n: int) -> int: assert has_response, f"Orphaned tool_call {tc_id} in history" def test_agent_subsequent_call_after_final_answer(self): - """A follow-up call on the same Agent works after IsFinalAnswer.""" + """A follow-up call on the same Agent works after IsFinal.""" @Tool.define - def final_tool() -> Annotated[str, IsFinalAnswer]: + def final_tool() -> Annotated[str, IsFinal]: """Return final answer.""" return "direct result" @@ -1568,10 +1568,10 @@ def _completion(self, model, messages=None, **kwargs): assert r2 == "llm result" def test_final_answer_with_retry_handler_active(self): - """IsFinalAnswer works correctly with RetryLLMHandler.""" + """IsFinal works correctly with RetryLLMHandler.""" @Tool.define - def final_tool(x: int) -> Annotated[int, IsFinalAnswer]: + def final_tool(x: int) -> Annotated[int, IsFinal]: """Return final answer.""" return x * 3 @@ -1601,7 +1601,7 @@ def test_retry_handler_error_on_final_tool_does_not_produce_final_answer(self): call_count = 0 @Tool.define - def flaky_final(x: int) -> Annotated[int, IsFinalAnswer]: + def flaky_final(x: int) -> Annotated[int, IsFinal]: """Return a final answer, but fail on first call.""" nonlocal call_count call_count += 1 @@ -1639,7 +1639,7 @@ def test_call_tool_returns_is_final_false_on_retry_handler_error(self): an error on an is_final tool.""" @Tool.define - def failing_final(x: int) -> Annotated[int, IsFinalAnswer]: + def failing_final(x: int) -> Annotated[int, IsFinal]: """Return a final answer.""" raise ValueError("boom") @@ -1656,15 +1656,15 @@ def failing_final(x: int) -> Annotated[int, IsFinalAnswer]: assert is_final is False -class TestIsFinalAnswerReturnTypeValidation: - """call_assistant should reject IsFinalAnswer tools whose return type +class TestIsFinalReturnTypeValidation: + """call_assistant should reject IsFinal tools whose return type does not match the enclosing template's return type.""" def test_mismatched_return_type_raises_tool_call_decoding_error(self): - """IsFinalAnswer tool returning str when template expects int is rejected.""" + """IsFinal tool returning str when template expects int is rejected.""" @Tool.define - def wrong_type_tool(x: int) -> Annotated[str, IsFinalAnswer]: + def wrong_type_tool(x: int) -> Annotated[str, IsFinal]: """Return a string, but template expects int.""" return str(x) @@ -1693,10 +1693,10 @@ def wrong_type_tool(x: int) -> Annotated[str, IsFinalAnswer]: assert "wrong_type_tool" in str(exc_info.value.original_error) def test_matching_return_type_passes_validation(self): - """IsFinalAnswer tool with matching return type is accepted.""" + """IsFinal tool with matching return type is accepted.""" @Tool.define - def correct_tool(x: int) -> Annotated[int, IsFinalAnswer]: + def correct_tool(x: int) -> Annotated[int, IsFinal]: """Return an int matching template.""" return x * 2 From e656c58d4aa638bd9bffb954ba2ac5d43015b6cd Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:03:21 -0500 Subject: [PATCH 09/17] compress --- effectful/handlers/llm/completions.py | 51 +++++++++++---------------- effectful/handlers/llm/encoding.py | 7 ++-- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 557b0885c..5383796d4 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -27,7 +27,7 @@ from effectful.handlers.llm.encoding import DecodedToolCall, Encodable from effectful.handlers.llm.template import Template, Tool from effectful.internals.unification import nested_type -from effectful.ops.semantics import fwd, handler +from effectful.ops.semantics import _simple_type, fwd, handler from effectful.ops.syntax import ObjectInterpretation, implements from effectful.ops.types import Operation @@ -217,6 +217,23 @@ def call_assistant[T, U]( for raw_tool_call in raw_tool_calls: try: tool_calls += [encoding.decode(raw_tool_call)] # type: ignore + if tool_calls[-1].is_final: + if len(raw_tool_calls) > 1: + raise ValueError( + f"IsFinal tool '{tool_calls[-1].tool.__name__}' must be the " + f"only tool call in a round, but {len(raw_tool_calls)} tool calls " + f"were generated." + ) + # Validate that the tool's return type matches the template's. + tool_sig = inspect.signature(tool_calls[-1].tool) + if not issubclass( + _simple_type(tool_sig.return_annotation), response_format.base + ): + raise TypeError( + f"IsFinal tool '{raw_tool_call.function.name}' has signature " + f"{tool_sig.format()}, but the enclosing template expects " + f"{response_format.base!r}." + ) except Exception as e: raise ToolCallDecodingError( raw_tool_call=raw_tool_call, @@ -224,34 +241,6 @@ def call_assistant[T, U]( raw_message=raw_message, ) from e - final_tcs = [tc for tc in tool_calls if tc.is_final] - if final_tcs: - final_tc = final_tcs[0] - if len(tool_calls) > 1: - raise ToolCallDecodingError( - raw_tool_call=raw_message.tool_calls[0], # type: ignore - original_error=ValueError( - f"IsFinal tool '{final_tc.tool.__name__}' must be the " - f"only tool call in a round, but {len(tool_calls)} tool calls " - f"were generated." - ), - raw_message=raw_message, - ) - # Validate that the tool's return type matches the template's. - tool_ret = inspect.signature(final_tc.tool).return_annotation - if typing.get_origin(tool_ret) is typing.Annotated: - tool_ret = typing.get_args(tool_ret)[0] - if not issubclass(tool_ret, response_format.base): - raise ToolCallDecodingError( - raw_tool_call=raw_message.tool_calls[0], # type: ignore - original_error=TypeError( - f"IsFinal tool '{final_tc.tool.__name__}' returns " - f"{tool_ret!r}, but the enclosing template expects " - f"{response_format.base!r}." - ), - raw_message=raw_message, - ) - result = None if not tool_calls: # return response @@ -288,8 +277,8 @@ def call_tool[T](tool_call: DecodedToolCall[T]) -> tuple[Message, T | None, bool except Exception as e: raise ToolCallExecutionError(raw_tool_call=tool_call, original_error=e) from e - return_type = Encodable.define(nested_type(result).value) - encoded_result = return_type.serialize(return_type.encode(result)) + return_type = Encodable.define(nested_type(result).value) # type: ignore + encoded_result = return_type.serialize(return_type.encode(result)) # type: ignore message = _make_message( dict(role="tool", content=encoded_result, tool_call_id=tool_call.id), ) diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py index 43bd1301a..04932d57a 100644 --- a/effectful/handlers/llm/encoding.py +++ b/effectful/handlers/llm/encoding.py @@ -31,7 +31,7 @@ from PIL import Image import effectful.handlers.llm.evaluation as evaluation -from effectful.handlers.llm.template import Tool +from effectful.handlers.llm.template import Tool, _is_final_answer_tool from effectful.internals.unification import nested_type from effectful.ops.semantics import _simple_type from effectful.ops.syntax import _CustomSingleDispatchCallable @@ -60,7 +60,10 @@ class DecodedToolCall[T]: bound_args: inspect.BoundArguments id: ToolCallID name: str - is_final: bool = False + + @property + def is_final(self) -> bool: + return _is_final_answer_tool(self.tool) class Encodable[T, U](ABC): From 94ce38f2d4a9e0ef267cbea8ec72b2695aa97557 Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:08:51 -0500 Subject: [PATCH 10/17] inline helper --- effectful/handlers/llm/encoding.py | 7 +++++-- effectful/handlers/llm/template.py | 8 -------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/effectful/handlers/llm/encoding.py b/effectful/handlers/llm/encoding.py index 04932d57a..f6af514f7 100644 --- a/effectful/handlers/llm/encoding.py +++ b/effectful/handlers/llm/encoding.py @@ -31,7 +31,7 @@ from PIL import Image import effectful.handlers.llm.evaluation as evaluation -from effectful.handlers.llm.template import Tool, _is_final_answer_tool +from effectful.handlers.llm.template import Tool, _IsFinalAnnotation from effectful.internals.unification import nested_type from effectful.ops.semantics import _simple_type from effectful.ops.syntax import _CustomSingleDispatchCallable @@ -63,7 +63,10 @@ class DecodedToolCall[T]: @property def is_final(self) -> bool: - return _is_final_answer_tool(self.tool) + ret = inspect.signature(self.tool).return_annotation + return typing.get_origin(ret) is typing.Annotated and any( + isinstance(arg, _IsFinalAnnotation) for arg in ret.__metadata__ + ) class Encodable[T, U](ABC): diff --git a/effectful/handlers/llm/template.py b/effectful/handlers/llm/template.py index 2183f8252..6ebf049be 100644 --- a/effectful/handlers/llm/template.py +++ b/effectful/handlers/llm/template.py @@ -105,14 +105,6 @@ def infer_annotations(cls, sig: inspect.Signature) -> inspect.Signature: IsFinal = _IsFinalAnnotation() -def _is_final_answer_tool(tool: Any) -> bool: - """Check if a tool's return type is annotated with IsFinal.""" - ret = tool.__signature__.return_annotation - if typing.get_origin(ret) is not Annotated: - return False - return any(isinstance(arg, _IsFinalAnnotation) for arg in typing.get_args(ret)) - - class Tool[**P, T](Operation[P, T]): """A :class:`Tool` is a function that may be called by a :class:`Template`. From 18485ce94a1b0c3c287c70062f879a11e8ededd9 Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:10:35 -0500 Subject: [PATCH 11/17] remove dumb test --- tests/test_handlers_llm_template.py | 46 +---------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index 01a1d284f..04c77f8ac 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -22,7 +22,7 @@ completion, ) from effectful.handlers.llm.encoding import DecodedToolCall, Encodable -from effectful.handlers.llm.template import IsFinal, _is_final_answer_tool +from effectful.handlers.llm.template import IsFinal from effectful.ops.semantics import handler from effectful.ops.syntax import ObjectInterpretation, implements from effectful.ops.types import NotHandled @@ -1532,50 +1532,6 @@ def bad(x: int) -> str: # --------------------------------------------------------------------------- -class TestIsFinalAnnotation: - """Tests for the IsFinal type annotation.""" - - def test_tool_with_is_final_answer_return_type(self): - """Tool with IsFinal on return type creates successfully.""" - - @Tool.define - def my_tool(x: int) -> Annotated[str, IsFinal]: - """A tool that returns a final answer.""" - return str(x) - - assert _is_final_answer_tool(my_tool) - - def test_tool_without_is_final_answer(self): - """Normal tool is not detected as final answer.""" - - @Tool.define - def normal_tool(x: int) -> str: - """A normal tool.""" - return str(x) - - assert not _is_final_answer_tool(normal_tool) - - def test_is_final_answer_on_parameter_raises(self): - """IsFinal on a parameter raises TypeError at define time.""" - with pytest.raises(TypeError, match="IsFinal"): - - @Tool.define - def bad_tool(x: Annotated[int, IsFinal]) -> str: - """A tool with bad annotation.""" - return str(x) - - def test_is_final_answer_combined_with_is_recursive(self): - """IsFinal and IsRecursive can coexist on a return type.""" - from effectful.handlers.llm.template import IsRecursive - - @Tool.define - def combo_tool(x: int) -> Annotated[str, IsFinal, IsRecursive]: - """A tool with both annotations.""" - return str(x) - - assert _is_final_answer_tool(combo_tool) - - class TestIsFinalCallTool: """Tests for call_tool behavior with IsFinal tools.""" From 61e55d66fb69663b3c08e850be32b3e58eedbc36 Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:16:27 -0500 Subject: [PATCH 12/17] nit --- effectful/handlers/llm/completions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 5383796d4..865ba81d7 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -220,7 +220,7 @@ def call_assistant[T, U]( if tool_calls[-1].is_final: if len(raw_tool_calls) > 1: raise ValueError( - f"IsFinal tool '{tool_calls[-1].tool.__name__}' must be the " + f"IsFinal tool '{raw_tool_call.function.name}' must be the " f"only tool call in a round, but {len(raw_tool_calls)} tool calls " f"were generated." ) From 7a2af1e3f4f47946a81f66bbae3cfe27b501ad67 Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:19:46 -0500 Subject: [PATCH 13/17] lint --- effectful/handlers/llm/completions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 865ba81d7..92e4155f1 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -231,7 +231,7 @@ def call_assistant[T, U]( ): raise TypeError( f"IsFinal tool '{raw_tool_call.function.name}' has signature " - f"{tool_sig.format()}, but the enclosing template expects " + f"{tool_sig!r}, but the enclosing template expects " f"{response_format.base!r}." ) except Exception as e: From bab83a402c5b433e5cec5017ed91f3e925259c7b Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:40:31 -0500 Subject: [PATCH 14/17] fix tests --- effectful/handlers/llm/completions.py | 5 +++-- tests/test_handlers_llm_template.py | 32 +++++++++++++++++---------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/effectful/handlers/llm/completions.py b/effectful/handlers/llm/completions.py index 92e4155f1..900b55d30 100644 --- a/effectful/handlers/llm/completions.py +++ b/effectful/handlers/llm/completions.py @@ -226,8 +226,9 @@ def call_assistant[T, U]( ) # Validate that the tool's return type matches the template's. tool_sig = inspect.signature(tool_calls[-1].tool) + return_annotation = typing.get_args(tool_sig.return_annotation)[0] if not issubclass( - _simple_type(tool_sig.return_annotation), response_format.base + _simple_type(return_annotation), response_format.base ): raise TypeError( f"IsFinal tool '{raw_tool_call.function.name}' has signature " @@ -255,7 +256,7 @@ def call_assistant[T, U]( except (pydantic.ValidationError, TypeError, ValueError, SyntaxError) as e: raise ResultDecodingError(e, raw_message=raw_message) from e - is_final = not all(not tc.is_final for tc in tool_calls) + is_final = any(tc.is_final for tc in tool_calls) or not tool_calls return (raw_message, tool_calls, result, is_final) diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index 04c77f8ac..092b317aa 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -1545,7 +1545,7 @@ def final_tool(x: int) -> Annotated[int, IsFinal]: sig = inspect.signature(final_tool) bound_args = sig.bind(x=5) - tc = DecodedToolCall(final_tool, bound_args, "call_final", is_final=True) + tc = DecodedToolCall(final_tool, bound_args, id="call_final", name="final_tool") message, raw_result, is_final = call_tool(tc) assert message["role"] == "tool" @@ -1562,7 +1562,9 @@ def normal_tool(x: int) -> int: sig = inspect.signature(normal_tool) bound_args = sig.bind(x=3) - tc = DecodedToolCall(normal_tool, bound_args, "call_normal") + tc = DecodedToolCall( + normal_tool, bound_args, id="call_normal", name="normal_tool" + ) message, raw_result, is_final = call_tool(tc) assert message["role"] == "tool" @@ -1580,7 +1582,9 @@ def final_tool(x: int) -> Annotated[str, IsFinal]: sig = inspect.signature(final_tool) bound_args = sig.bind(x=42) - tc = DecodedToolCall(final_tool, bound_args, "call_retry_final", is_final=True) + tc = DecodedToolCall( + final_tool, bound_args, id="call_retry_final", name="final_tool" + ) with handler(RetryLLMHandler()): message, raw_result, is_final = call_tool(tc) @@ -1607,7 +1611,9 @@ def task(n: int) -> int: """Call compute with {n}.""" raise NotHandled - mock = MockCompletionHandler([make_tool_call_response("compute", '{"x": 7}')]) + mock = MockCompletionHandler( + [make_tool_call_response("compute", '{"x": {"value": 7}}')] + ) with handler(LiteLLMProvider()), handler(mock): result = task(7) @@ -1659,7 +1665,7 @@ def do_work(self, n: int) -> int: raise NotHandled mock = MockCompletionHandler( - [make_tool_call_response("final_tool", '{"x": 5}')] + [make_tool_call_response("final_tool", '{"x": {"value": 5}}')] ) agent = MyAgent() @@ -1705,7 +1711,7 @@ def _completion(self, model, messages=None, **kwargs): call_count += 1 if call_count == 1: return make_tool_call_response("final_tool", "{}") - return make_text_response('{"value": "llm result"}') + return make_text_response("llm result") agent = MyAgent() @@ -1730,7 +1736,7 @@ def task(n: int) -> int: raise NotHandled mock = MockCompletionHandler( - [make_tool_call_response("final_tool", '{"x": 4}')] + [make_tool_call_response("final_tool", '{"x": {"value": 4}}')] ) with ( @@ -1767,8 +1773,8 @@ def task(n: int) -> int: # Round 2: LLM calls flaky_final again → succeeds mock = MockCompletionHandler( [ - make_tool_call_response("flaky_final", '{"x": 5}'), - make_tool_call_response("flaky_final", '{"x": 5}'), + make_tool_call_response("flaky_final", '{"x": {"value": 5}}'), + make_tool_call_response("flaky_final", '{"x": {"value": 5}}'), ] ) @@ -1794,7 +1800,9 @@ def failing_final(x: int) -> Annotated[int, IsFinal]: sig = inspect.signature(failing_final) bound_args = sig.bind(x=1) - tc = DecodedToolCall(failing_final, bound_args, "call_err", is_final=True) + tc = DecodedToolCall( + failing_final, bound_args, id="call_err", name="failing_final" + ) with handler(RetryLLMHandler()): message, raw_result, is_final = call_tool(tc) @@ -1823,7 +1831,7 @@ def wrong_type_tool(x: int) -> Annotated[str, IsFinal]: mock = MockCompletionHandler( [ - make_tool_call_response("wrong_type_tool", '{"x": 5}'), + make_tool_call_response("wrong_type_tool", '{"x": {"value": 5}}'), ] ) @@ -1855,7 +1863,7 @@ def correct_tool(x: int) -> Annotated[int, IsFinal]: mock = MockCompletionHandler( [ - make_tool_call_response("correct_tool", '{"x": 5}'), + make_tool_call_response("correct_tool", '{"x": {"value": 5}}'), ] ) From 2d8b70ededd8aa07d9b9fd7c87f697873935c054 Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:49:00 -0500 Subject: [PATCH 15/17] remove more dumb tests --- tests/test_handlers_llm_template.py | 92 +---------------------------- 1 file changed, 1 insertion(+), 91 deletions(-) diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index 092b317aa..31a6574bc 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -1532,68 +1532,6 @@ def bad(x: int) -> str: # --------------------------------------------------------------------------- -class TestIsFinalCallTool: - """Tests for call_tool behavior with IsFinal tools.""" - - def test_call_tool_returns_raw_result_for_final_answer_tool(self): - """call_tool returns the raw Python result alongside the message.""" - - @Tool.define - def final_tool(x: int) -> Annotated[int, IsFinal]: - """Returns a final answer.""" - return x * 2 - - sig = inspect.signature(final_tool) - bound_args = sig.bind(x=5) - tc = DecodedToolCall(final_tool, bound_args, id="call_final", name="final_tool") - - message, raw_result, is_final = call_tool(tc) - assert message["role"] == "tool" - assert raw_result == 10 - assert is_final is True - - def test_call_tool_returns_raw_result_for_normal_tool(self): - """call_tool returns the raw Python result for all tools.""" - - @Tool.define - def normal_tool(x: int) -> int: - """A normal tool.""" - return x + 1 - - sig = inspect.signature(normal_tool) - bound_args = sig.bind(x=3) - tc = DecodedToolCall( - normal_tool, bound_args, id="call_normal", name="normal_tool" - ) - - message, raw_result, is_final = call_tool(tc) - assert message["role"] == "tool" - assert message["tool_call_id"] == "call_normal" - assert raw_result == 4 - assert is_final is False - - def test_call_tool_final_answer_with_retry_handler(self): - """call_tool works with RetryLLMHandler for IsFinal tools.""" - - @Tool.define - def final_tool(x: int) -> Annotated[str, IsFinal]: - """Returns a final answer.""" - return f"answer: {x}" - - sig = inspect.signature(final_tool) - bound_args = sig.bind(x=42) - tc = DecodedToolCall( - final_tool, bound_args, id="call_retry_final", name="final_tool" - ) - - with handler(RetryLLMHandler()): - message, raw_result, is_final = call_tool(tc) - - assert message["role"] == "tool" - assert raw_result == "answer: 42" - assert is_final is True - - class TestIsFinalCompletionLoop: """Tests for IsFinal through the full completion loop.""" @@ -1622,33 +1560,6 @@ def task(n: int) -> int: # Only 1 call_assistant, not 2 (no final LLM round-trip) assert mock.call_count == 1 - def test_final_answer_returns_raw_python_object(self): - """The returned value is the raw Python object, not serialized text.""" - - @dataclass - class MyResult: - value: int - label: str - - @Tool.define - def make_result() -> Annotated[MyResult, IsFinal]: - """Create a structured result.""" - return MyResult(value=42, label="answer") - - @Template.define - def task() -> MyResult: - """Call make_result.""" - raise NotHandled - - mock = MockCompletionHandler([make_tool_call_response("make_result", "{}")]) - - with handler(LiteLLMProvider()), handler(mock): - result = task() - - assert isinstance(result, MyResult) - assert result.value == 42 - assert result.label == "answer" - def test_agent_history_valid_after_final_answer(self): """Agent history has no orphaned tool_calls after IsFinal.""" @@ -1808,7 +1719,7 @@ def failing_final(x: int) -> Annotated[int, IsFinal]: message, raw_result, is_final = call_tool(tc) assert message["role"] == "tool" - assert "Tool execution failed" in message["content"] + assert message["content"] # non-empty error feedback assert raw_result is None assert is_final is False @@ -1847,7 +1758,6 @@ def wrong_type_tool(x: int) -> Annotated[str, IsFinal]: ) assert isinstance(exc_info.value.original_error, TypeError) - assert "wrong_type_tool" in str(exc_info.value.original_error) def test_matching_return_type_passes_validation(self): """IsFinal tool with matching return type is accepted.""" From e6c449631b3eea8b25dfe56763cb28d72a51075a Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:50:29 -0500 Subject: [PATCH 16/17] condense --- tests/test_handlers_llm_template.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_handlers_llm_template.py b/tests/test_handlers_llm_template.py index 31a6574bc..1198637dd 100644 --- a/tests/test_handlers_llm_template.py +++ b/tests/test_handlers_llm_template.py @@ -1723,11 +1723,6 @@ def failing_final(x: int) -> Annotated[int, IsFinal]: assert raw_result is None assert is_final is False - -class TestIsFinalReturnTypeValidation: - """call_assistant should reject IsFinal tools whose return type - does not match the enclosing template's return type.""" - def test_mismatched_return_type_raises_tool_call_decoding_error(self): """IsFinal tool returning str when template expects int is rejected.""" From 0444b8c6f7520bbbe4984e5c85064cdc679359c0 Mon Sep 17 00:00:00 2001 From: Eli Date: Tue, 24 Feb 2026 02:54:31 -0500 Subject: [PATCH 17/17] remove pytest-timeout --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3b79ba4e1..cdf674f59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,6 @@ test = [ "ruff", "nbval", "nbqa", - "pytest-timeout", ] [dependency-groups]