fix(tools): use filtered messages list in async compaction

fede-kamel · fede-kamel · commit e6f0370bb566 · 2026-01-20T12:55:41.000-05:00
The async _check_and_compact() method was using self._params["messages"]
instead of the local `messages` variable when building the compaction
request. This caused the filtering logic (which removes tool_use blocks
from the last assistant message) to be ignored.

When compaction runs and the last message is an assistant with only
tool_use blocks, those blocks should be filtered out before sending
the summarization request. Without this fix, the API rejects with:

  "tool_use ids were found without tool_result blocks"

The sync version correctly uses `*messages`, the async version was
incorrectly using `*self._params["messages"]`.

Added regression test that verifies tool_use filtering works correctly.
diff --git a/src/anthropic/lib/tools/_beta_runner.py b/src/anthropic/lib/tools/_beta_runner.py
@@ -108,7 +108,7 @@ def append_messages(self, *messages: BetaMessageParam | ParsedBetaMessage[Respon
             for message in messages
         ]
         self._messages_modified = True
-        self.set_messages_params(lambda params: {**params, "messages": [*self._params["messages"], *message_params]})
+        self.set_messages_params(lambda params: {**params, "messages": [*messages, *message_params]})
         self._cached_tool_call_response = None
 
     def _should_stop(self) -> bool:
@@ -451,7 +451,7 @@ async def _check_and_compact(self) -> bool:
                 messages.pop()
 
         messages = [
-            *self._params["messages"],
+            *messages,
             BetaMessageParam(
                 role="user",
                 content=self._compaction_control.get("summary_prompt", DEFAULT_SUMMARY_PROMPT),
diff --git a/tests/lib/tools/test_runners.py b/tests/lib/tools/test_runners.py
@@ -581,6 +581,80 @@ async def get_weather(location: str, units: Literal["c", "f"]) -> BetaFunctionTo
     assert print_obj(message, monkeypatch) == snapshots["basic"]["result"]
 
 
+@pytest.mark.skipif(PYDANTIC_V1, reason="tool runner not supported with pydantic v1")
+async def test_async_compaction_filters_tool_use(async_client: AsyncAnthropic) -> None:
+    """Test that async compaction correctly filters out tool_use blocks.
+
+    When compaction runs and the last message is an assistant message with only
+    tool_use blocks (no text), the filtering should remove it to avoid API errors
+    about tool_use without corresponding tool_result.
+
+    This is a regression test for a bug where the async version used
+    self._params["messages"] instead of the filtered local `messages` variable.
+    """
+    from unittest.mock import AsyncMock, MagicMock
+
+    runner = async_client.beta.messages.tool_runner(
+        model="claude-sonnet-4-20250514",
+        max_tokens=500,
+        tools=[],
+        messages=[{"role": "user", "content": "test"}],
+        compaction_control={
+            "enabled": True,
+            "context_token_threshold": 100,
+        },
+    )
+
+    # Set up messages ending with assistant containing ONLY tool_use (no text)
+    runner._params["messages"] = [
+        {"role": "user", "content": "What is 2+2?"},
+        {
+            "role": "assistant",
+            "content": [
+                {
+                    "type": "tool_use",
+                    "id": "toolu_test123",
+                    "name": "calculator",
+                    "input": {"a": 2, "b": 2}
+                }
+            ]
+        },
+    ]
+
+    # Mock _get_last_message to return high token usage to trigger compaction
+    mock_message = MagicMock()
+    mock_message.usage.input_tokens = 500
+    mock_message.usage.output_tokens = 100
+    mock_message.usage.cache_creation_input_tokens = 0
+    mock_message.usage.cache_read_input_tokens = 0
+    runner._get_last_message = AsyncMock(return_value=mock_message)
+
+    # Mock the API call for compaction summary
+    mock_response = MagicMock()
+    mock_response.content = [MagicMock(type="text", text="Summary of conversation")]
+    mock_response.usage.output_tokens = 50
+    runner._client.beta.messages.create = AsyncMock(return_value=mock_response)
+
+    # This should succeed - the tool_use should be filtered out
+    # Before the fix, this would send tool_use without tool_result and fail
+    result = await runner._check_and_compact()
+
+    assert result is True, "Compaction should have run"
+
+    # Verify the API was called (compaction happened)
+    runner._client.beta.messages.create.assert_called_once()
+
+    # Get the messages that were sent to the API
+    call_kwargs = runner._client.beta.messages.create.call_args[1]
+    sent_messages = call_kwargs["messages"]
+
+    # The tool_use-only assistant message should have been removed
+    # So we should have: [user_message, summary_prompt]
+    assert len(sent_messages) == 2, f"Expected 2 messages, got {len(sent_messages)}"
+    assert sent_messages[0]["role"] == "user"
+    assert sent_messages[1]["role"] == "user"  # Summary prompt is a user message
+
+
 def _get_weather(location: str, units: Literal["c", "f"]) -> Dict[str, Any]:
     # Simulate a weather API call
     print(f"Fetching weather for {location} in {units}")