diff --git a/src/anthropic/lib/tools/_beta_runner.py b/src/anthropic/lib/tools/_beta_runner.py index 039cb4b5..bbd705d9 100644 --- a/src/anthropic/lib/tools/_beta_runner.py +++ b/src/anthropic/lib/tools/_beta_runner.py @@ -276,10 +276,18 @@ def __run__(self) -> Iterator[RunnerItemT]: if not self._check_and_compact(): response = self.generate_tool_call_response() if response is None: - log.debug("Tool call was not requested, exiting from tool runner loop.") - return - - if not self._messages_modified: + # A `pause_turn` stop reason means the model paused a long-running + # turn (e.g. while a server-side tool such as web_search or web_fetch + # is still running). There are no client-side tool calls to satisfy, + # but the turn isn't finished, so we resume it by sending the paused + # assistant message back unchanged instead of exiting the loop. + if message.stop_reason == "pause_turn": + if not self._messages_modified: + self.append_messages(message) + else: + log.debug("Tool call was not requested, exiting from tool runner loop.") + return + elif not self._messages_modified: self.append_messages(message, response) self._messages_modified = False @@ -557,10 +565,18 @@ async def __run__(self) -> AsyncIterator[RunnerItemT]: if not await self._check_and_compact(): response = await self.generate_tool_call_response() if response is None: - log.debug("Tool call was not requested, exiting from tool runner loop.") - return - - if not self._messages_modified: + # A `pause_turn` stop reason means the model paused a long-running + # turn (e.g. while a server-side tool such as web_search or web_fetch + # is still running). There are no client-side tool calls to satisfy, + # but the turn isn't finished, so we resume it by sending the paused + # assistant message back unchanged instead of exiting the loop. + if message.stop_reason == "pause_turn": + if not self._messages_modified: + self.append_messages(message) + else: + log.debug("Tool call was not requested, exiting from tool runner loop.") + return + elif not self._messages_modified: self.append_messages(message, response) self._messages_modified = False diff --git a/tests/lib/tools/test_runners.py b/tests/lib/tools/test_runners.py index 6f78a148..e970d41b 100644 --- a/tests/lib/tools/test_runners.py +++ b/tests/lib/tools/test_runners.py @@ -1,9 +1,12 @@ +import os import json import logging from typing import Any, Dict, List, Union, cast from typing_extensions import Literal +import httpx import pytest +from respx import MockRouter from inline_snapshot import external, snapshot from anthropic import Anthropic, AsyncAnthropic, beta_tool, beta_async_tool @@ -15,6 +18,8 @@ from ..utils import print_obj +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + # all the snapshots in this file are auto-generated from the live API # # you can update them with @@ -667,6 +672,135 @@ async def get_weather(location: str, units: Literal["c", "f"]) -> BetaFunctionTo ).until_done() +# Regression fixtures for #1170: a `pause_turn` stop reason carrying only a +# server tool use block (e.g. web_search/web_fetch), followed by the final answer. +_PAUSE_TURN_RESPONSE: Dict[str, Any] = { + "id": "msg_pause_turn_01", + "type": "message", + "role": "assistant", + "model": "claude-haiku-4-5-20251001", + "content": [ + { + "type": "server_tool_use", + "id": "srvtoolu_01", + "name": "web_search", + "input": {"query": "brownie recipe"}, + } + ], + "stop_reason": "pause_turn", + "stop_sequence": None, + "usage": { + "input_tokens": 100, + "output_tokens": 20, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "service_tier": "standard", + }, +} + +_FINAL_TEXT_RESPONSE: Dict[str, Any] = { + "id": "msg_final_01", + "type": "message", + "role": "assistant", + "model": "claude-haiku-4-5-20251001", + "content": [{"type": "text", "text": "Here is a brownie recipe you can try."}], + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": { + "input_tokens": 120, + "output_tokens": 15, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "service_tier": "standard", + }, +} + + +@pytest.mark.skipif(PYDANTIC_V1, reason="tool runner not supported with pydantic v1") +@pytest.mark.respx(base_url=base_url) +def test_pause_turn_resumes_loop_sync(client: Anthropic, respx_mock: MockRouter) -> None: + # Regression test for #1170: a `pause_turn` stop reason with only server tool + # use blocks (e.g. web_search) must not exit the tool runner loop early. The + # runner should resume the paused turn until the server produces a final answer. + respx_mock.post("/v1/messages").mock( + side_effect=[ + httpx.Response(200, json=_PAUSE_TURN_RESPONSE), + httpx.Response(200, json=_FINAL_TEXT_RESPONSE), + ] + ) + + message = client.beta.messages.tool_runner( + max_tokens=1024, + model="claude-haiku-4-5", + tools=[{"type": "web_search_20250305", "name": "web_search"}], + messages=[{"role": "user", "content": "Find a brownie recipe"}], + ).until_done() + + # The runner must make a second request to resume the paused turn rather than + # returning the intermediate `server_tool_use` block as the final message. + assert respx_mock.calls.call_count == 2 + assert message.stop_reason == "end_turn" + assert [block.type for block in message.content] == ["text"] + + # The paused turn must be resumed by sending the assistant message back, so the + # second request carries it (with the server tool use block) as the last message. + second_request = json.loads(respx_mock.calls[1].request.content) + assert second_request["messages"][-1]["role"] == "assistant" + assert any(block["type"] == "server_tool_use" for block in second_request["messages"][-1]["content"]) + + +@pytest.mark.skipif(PYDANTIC_V1, reason="tool runner not supported with pydantic v1") +@pytest.mark.respx(base_url=base_url) +def test_pause_turn_chained_sync(client: Anthropic, respx_mock: MockRouter) -> None: + # A long-running server tool may pause more than once before finishing; the + # runner must keep resuming until a terminal stop reason is reached. + respx_mock.post("/v1/messages").mock( + side_effect=[ + httpx.Response(200, json=_PAUSE_TURN_RESPONSE), + httpx.Response(200, json=_PAUSE_TURN_RESPONSE), + httpx.Response(200, json=_FINAL_TEXT_RESPONSE), + ] + ) + + message = client.beta.messages.tool_runner( + max_tokens=1024, + model="claude-haiku-4-5", + tools=[{"type": "web_search_20250305", "name": "web_search"}], + messages=[{"role": "user", "content": "Find a brownie recipe"}], + ).until_done() + + assert respx_mock.calls.call_count == 3 + assert message.stop_reason == "end_turn" + assert [block.type for block in message.content] == ["text"] + + +@pytest.mark.skipif(PYDANTIC_V1, reason="tool runner not supported with pydantic v1") +@pytest.mark.respx(base_url=base_url) +async def test_pause_turn_resumes_loop_async(async_client: AsyncAnthropic, respx_mock: MockRouter) -> None: + # Async counterpart of the #1170 regression test. + respx_mock.post("/v1/messages").mock( + side_effect=[ + httpx.Response(200, json=_PAUSE_TURN_RESPONSE), + httpx.Response(200, json=_FINAL_TEXT_RESPONSE), + ] + ) + + message = await async_client.beta.messages.tool_runner( + max_tokens=1024, + model="claude-haiku-4-5", + tools=[{"type": "web_search_20250305", "name": "web_search"}], + messages=[{"role": "user", "content": "Find a brownie recipe"}], + ).until_done() + + assert respx_mock.calls.call_count == 2 + assert message.stop_reason == "end_turn" + assert [block.type for block in message.content] == ["text"] + + second_request = json.loads(respx_mock.calls[1].request.content) + assert second_request["messages"][-1]["role"] == "assistant" + assert any(block["type"] == "server_tool_use" for block in second_request["messages"][-1]["content"]) + + def _get_weather(location: str, units: Literal["c", "f"]) -> Dict[str, Any]: # Simulate a weather API call print(f"Fetching weather for {location} in {units}")