diff --git a/src/anthropic/lib/tools/agent_toolset.py b/src/anthropic/lib/tools/agent_toolset.py index d771996a..2fbb7031 100644 --- a/src/anthropic/lib/tools/agent_toolset.py +++ b/src/anthropic/lib/tools/agent_toolset.py @@ -36,6 +36,7 @@ import os import re import uuid +import base64 import shutil import logging import subprocess @@ -47,6 +48,7 @@ from contextlib import asynccontextmanager from dataclasses import field, dataclass from collections.abc import Mapping, Callable, Awaitable, AsyncIterator +from typing_extensions import Literal import anyio import anyio.abc @@ -56,6 +58,8 @@ from ..._types import NotGiven, not_given from ..._utils import is_given from ...types.beta import ( + BetaImageBlockParam, + BetaRequestDocumentBlockParam, BetaManagedAgentsAgentToolset20260401BashInput, BetaManagedAgentsAgentToolset20260401EditInput, BetaManagedAgentsAgentToolset20260401GlobInput, @@ -63,7 +67,8 @@ BetaManagedAgentsAgentToolset20260401ReadInput, BetaManagedAgentsAgentToolset20260401WriteInput, ) -from ._beta_functions import ToolError, BetaAsyncFunctionTool, beta_async_tool +from ._beta_functions import ToolError, BetaAsyncFunctionTool, BetaFunctionToolResultType, beta_async_tool +from ...types.beta.beta_tool_result_block_param import Content as BetaContent if TYPE_CHECKING: from ..._client import AsyncAnthropic @@ -144,6 +149,48 @@ def _fs_error(op: str, file_path: str, e: OSError) -> ToolError: return ToolError(f"{op}: {file_path}: {reason}") +def _sniff_binary_media_type(head: bytes) -> Optional[str]: + """Identify an image/PDF by leading magic bytes, returning its media type. + + Sniffing the content (rather than trusting the extension) is what lets the + ``read`` tool hand the model an ``image``/``document`` content block instead + of choking on a non-UTF-8 payload. Only the media types the tool-result + content blocks accept are recognised; anything else returns ``None`` and is + treated as text. + """ + if head.startswith(b"%PDF-"): + return "application/pdf" + if head.startswith(b"\xff\xd8\xff"): + return "image/jpeg" + if head.startswith(b"\x89PNG\r\n\x1a\n"): + return "image/png" + if head.startswith((b"GIF87a", b"GIF89a")): + return "image/gif" + if head[:4] == b"RIFF" and head[8:12] == b"WEBP": + return "image/webp" + return None + + +def _binary_content_block(media_type: str, raw: bytes) -> BetaContent: + """Wrap binary file bytes in the matching base64 tool-result content block.""" + data = base64.standard_b64encode(raw).decode("ascii") + if media_type == "application/pdf": + document: BetaRequestDocumentBlockParam = { + "type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": data}, + } + return document + image: BetaImageBlockParam = { + "type": "image", + "source": { + "type": "base64", + "media_type": cast('Literal["image/jpeg", "image/png", "image/gif", "image/webp"]', media_type), + "data": data, + }, + } + return image + + def _empty_skill_dirs() -> list[Path]: return [] @@ -494,7 +541,7 @@ async def bash( def beta_read_tool(ctx: AgentToolContext) -> BetaAsyncFunctionTool[Any]: @beta_async_tool(name="read", input_schema=BetaManagedAgentsAgentToolset20260401ReadInput) - async def read(file_path: str, view_range: Optional[List[int]] = None) -> str: + async def read(file_path: str, view_range: Optional[List[int]] = None) -> BetaFunctionToolResultType: """Read a file rooted at the working directory.""" try: target = resolve_path(ctx, file_path) @@ -513,7 +560,21 @@ async def read(file_path: str, view_range: Optional[List[int]] = None) -> str: f"read: {file_path} is {st.st_size} bytes, exceeds {limit}-byte limit. " "Use bash (head/tail/sed) to read a slice." ) - text = target.read_text() + raw = target.read_bytes() + media_type = _sniff_binary_media_type(raw[:16]) + if media_type is not None: + # Images/PDFs round-trip as content blocks; decoding them as + # UTF-8 used to raise an uncaught UnicodeDecodeError. + if view_range is not None: + raise ToolError(f"read: view_range is not supported for {media_type} files") + return [_binary_content_block(media_type, raw)] + try: + text = raw.decode("utf-8") + except UnicodeDecodeError: + raise ToolError( + f"read: {file_path}: not a UTF-8 text file and not a supported binary " + "(image/PDF) format. Use bash to inspect it." + ) from None except ToolError: raise except OSError as e: diff --git a/tests/lib/tools/test_agent_toolset.py b/tests/lib/tools/test_agent_toolset.py index 3486d679..75b69cec 100644 --- a/tests/lib/tools/test_agent_toolset.py +++ b/tests/lib/tools/test_agent_toolset.py @@ -108,6 +108,54 @@ async def test_read_rejects_directory(tmp_path: Path) -> None: await beta_read_tool(env).call({"file_path": "sub"}) +@needs_pydantic_v2 +@pytest.mark.parametrize( + ("name", "payload", "expected_type", "expected_media_type"), + [ + ("slide.jpg", b"\xff\xd8\xff\xe0\x00\x10JFIF" + b"\x00" * 8, "image", "image/jpeg"), + ("logo.png", b"\x89PNG\r\n\x1a\n" + b"\x00" * 8, "image", "image/png"), + ("anim.gif", b"GIF89a" + b"\x00" * 10, "image", "image/gif"), + ("pic.webp", b"RIFF\x00\x00\x00\x00WEBPVP8 ", "image", "image/webp"), + ("doc.pdf", b"%PDF-1.7\n%\xe2\xe3\xcf\xd3\n", "document", "application/pdf"), + ], +) +async def test_read_returns_content_block_for_binary( + tmp_path: Path, name: str, payload: bytes, expected_type: str, expected_media_type: str +) -> None: + import base64 + + (tmp_path / name).write_bytes(payload) + env = AgentToolContext(workdir=str(tmp_path)) + out = await beta_read_tool(env).call({"file_path": name}) + + assert isinstance(out, list) + assert len(out) == 1 + block = out[0] + assert block["type"] == expected_type + source = block["source"] + assert source["type"] == "base64" + assert source["media_type"] == expected_media_type + assert base64.standard_b64decode(source["data"]) == payload + + +@needs_pydantic_v2 +async def test_read_rejects_view_range_on_binary(tmp_path: Path) -> None: + (tmp_path / "pic.png").write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8) + env = AgentToolContext(workdir=str(tmp_path)) + with pytest.raises(ToolError, match="view_range is not supported"): + await beta_read_tool(env).call({"file_path": "pic.png", "view_range": [1, 2]}) + + +@needs_pydantic_v2 +async def test_read_rejects_non_text_non_binary(tmp_path: Path) -> None: + # Arbitrary non-UTF-8 bytes that aren't a recognised image/PDF used to raise + # an uncaught UnicodeDecodeError; now a clean ToolError. + (tmp_path / "blob.bin").write_bytes(b"\x00\x01\x02\xff\xfe garbage \x80\x81") + env = AgentToolContext(workdir=str(tmp_path)) + with pytest.raises(ToolError, match="not a UTF-8 text file"): + await beta_read_tool(env).call({"file_path": "blob.bin"}) + + @needs_pydantic_v2 async def test_edit_rejects_oversized_file(tmp_path: Path) -> None: (tmp_path / "big.txt").write_bytes(b"a" * (257 * 1024))