Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 64 additions & 3 deletions src/anthropic/lib/tools/agent_toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import os
import re
import uuid
import base64
import shutil
import logging
import subprocess
Expand All @@ -47,6 +48,7 @@
from contextlib import asynccontextmanager
from dataclasses import field, dataclass
from collections.abc import Mapping, Callable, Awaitable, AsyncIterator
from typing_extensions import Literal

import anyio
import anyio.abc
Expand All @@ -56,14 +58,17 @@
from ..._types import NotGiven, not_given
from ..._utils import is_given
from ...types.beta import (
BetaImageBlockParam,
BetaRequestDocumentBlockParam,
BetaManagedAgentsAgentToolset20260401BashInput,
BetaManagedAgentsAgentToolset20260401EditInput,
BetaManagedAgentsAgentToolset20260401GlobInput,
BetaManagedAgentsAgentToolset20260401GrepInput,
BetaManagedAgentsAgentToolset20260401ReadInput,
BetaManagedAgentsAgentToolset20260401WriteInput,
)
from ._beta_functions import ToolError, BetaAsyncFunctionTool, beta_async_tool
from ._beta_functions import ToolError, BetaAsyncFunctionTool, BetaFunctionToolResultType, beta_async_tool
from ...types.beta.beta_tool_result_block_param import Content as BetaContent

if TYPE_CHECKING:
from ..._client import AsyncAnthropic
Expand Down Expand Up @@ -144,6 +149,48 @@ def _fs_error(op: str, file_path: str, e: OSError) -> ToolError:
return ToolError(f"{op}: {file_path}: {reason}")


def _sniff_binary_media_type(head: bytes) -> Optional[str]:
"""Identify an image/PDF by leading magic bytes, returning its media type.

Sniffing the content (rather than trusting the extension) is what lets the
``read`` tool hand the model an ``image``/``document`` content block instead
of choking on a non-UTF-8 payload. Only the media types the tool-result
content blocks accept are recognised; anything else returns ``None`` and is
treated as text.
"""
if head.startswith(b"%PDF-"):
return "application/pdf"
if head.startswith(b"\xff\xd8\xff"):
return "image/jpeg"
if head.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if head.startswith((b"GIF87a", b"GIF89a")):
return "image/gif"
if head[:4] == b"RIFF" and head[8:12] == b"WEBP":
return "image/webp"
return None


def _binary_content_block(media_type: str, raw: bytes) -> BetaContent:
"""Wrap binary file bytes in the matching base64 tool-result content block."""
data = base64.standard_b64encode(raw).decode("ascii")
if media_type == "application/pdf":
document: BetaRequestDocumentBlockParam = {
"type": "document",
"source": {"type": "base64", "media_type": "application/pdf", "data": data},
}
return document
image: BetaImageBlockParam = {
"type": "image",
"source": {
"type": "base64",
"media_type": cast('Literal["image/jpeg", "image/png", "image/gif", "image/webp"]', media_type),
"data": data,
},
}
return image


def _empty_skill_dirs() -> list[Path]:
return []

Expand Down Expand Up @@ -494,7 +541,7 @@ async def bash(

def beta_read_tool(ctx: AgentToolContext) -> BetaAsyncFunctionTool[Any]:
@beta_async_tool(name="read", input_schema=BetaManagedAgentsAgentToolset20260401ReadInput)
async def read(file_path: str, view_range: Optional[List[int]] = None) -> str:
async def read(file_path: str, view_range: Optional[List[int]] = None) -> BetaFunctionToolResultType:
"""Read a file rooted at the working directory."""
try:
target = resolve_path(ctx, file_path)
Expand All @@ -513,7 +560,21 @@ async def read(file_path: str, view_range: Optional[List[int]] = None) -> str:
f"read: {file_path} is {st.st_size} bytes, exceeds {limit}-byte limit. "
"Use bash (head/tail/sed) to read a slice."
)
text = target.read_text()
raw = target.read_bytes()
media_type = _sniff_binary_media_type(raw[:16])
if media_type is not None:
# Images/PDFs round-trip as content blocks; decoding them as
# UTF-8 used to raise an uncaught UnicodeDecodeError.
if view_range is not None:
raise ToolError(f"read: view_range is not supported for {media_type} files")
return [_binary_content_block(media_type, raw)]
try:
text = raw.decode("utf-8")
except UnicodeDecodeError:
raise ToolError(
f"read: {file_path}: not a UTF-8 text file and not a supported binary "
"(image/PDF) format. Use bash to inspect it."
) from None
except ToolError:
raise
except OSError as e:
Expand Down
48 changes: 48 additions & 0 deletions tests/lib/tools/test_agent_toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,54 @@ async def test_read_rejects_directory(tmp_path: Path) -> None:
await beta_read_tool(env).call({"file_path": "sub"})


@needs_pydantic_v2
@pytest.mark.parametrize(
("name", "payload", "expected_type", "expected_media_type"),
[
("slide.jpg", b"\xff\xd8\xff\xe0\x00\x10JFIF" + b"\x00" * 8, "image", "image/jpeg"),
("logo.png", b"\x89PNG\r\n\x1a\n" + b"\x00" * 8, "image", "image/png"),
("anim.gif", b"GIF89a" + b"\x00" * 10, "image", "image/gif"),
("pic.webp", b"RIFF\x00\x00\x00\x00WEBPVP8 ", "image", "image/webp"),
("doc.pdf", b"%PDF-1.7\n%\xe2\xe3\xcf\xd3\n", "document", "application/pdf"),
],
)
async def test_read_returns_content_block_for_binary(
tmp_path: Path, name: str, payload: bytes, expected_type: str, expected_media_type: str
) -> None:
import base64

(tmp_path / name).write_bytes(payload)
env = AgentToolContext(workdir=str(tmp_path))
out = await beta_read_tool(env).call({"file_path": name})

assert isinstance(out, list)
assert len(out) == 1
block = out[0]
assert block["type"] == expected_type
source = block["source"]
assert source["type"] == "base64"
assert source["media_type"] == expected_media_type
assert base64.standard_b64decode(source["data"]) == payload


@needs_pydantic_v2
async def test_read_rejects_view_range_on_binary(tmp_path: Path) -> None:
(tmp_path / "pic.png").write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
env = AgentToolContext(workdir=str(tmp_path))
with pytest.raises(ToolError, match="view_range is not supported"):
await beta_read_tool(env).call({"file_path": "pic.png", "view_range": [1, 2]})


@needs_pydantic_v2
async def test_read_rejects_non_text_non_binary(tmp_path: Path) -> None:
# Arbitrary non-UTF-8 bytes that aren't a recognised image/PDF used to raise
# an uncaught UnicodeDecodeError; now a clean ToolError.
(tmp_path / "blob.bin").write_bytes(b"\x00\x01\x02\xff\xfe garbage \x80\x81")
env = AgentToolContext(workdir=str(tmp_path))
with pytest.raises(ToolError, match="not a UTF-8 text file"):
await beta_read_tool(env).call({"file_path": "blob.bin"})


@needs_pydantic_v2
async def test_edit_rejects_oversized_file(tmp_path: Path) -> None:
(tmp_path / "big.txt").write_bytes(b"a" * (257 * 1024))
Expand Down