Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions examples/test-extended-thinking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ async function main() {
});
console.log(" ✓ Agent created\n");

console.log("4. Executing task with thinkingBudget...\n");
console.log("4. Executing task with thinkingBudget (with multiple tool uses)...\n");
console.log("=" .repeat(60));

try {
const result = await agent.execute(
"Look at this page and tell me: What is the title of the page and what is the main heading? Provide a brief summary.",
`Look at this page and perform the following steps:
1. Take a screenshot to see the current page
2. Scroll down to see if there's more content
3. Take another screenshot
4. Tell me: What is the title of the page and what is the main heading? Provide a brief summary.

This task requires multiple tool uses to test message history management with thinking blocks.`,
undefined,
{
thinkingBudget: 2048,
Expand All @@ -57,7 +63,7 @@ async function main() {
console.log("=" .repeat(60));
console.log("\n5. Result:\n");
console.log(result);
console.log("\n✅ Extended thinking test PASSED!");
console.log("\n✅ Extended thinking test with multiple tool uses PASSED!");
} catch (error) {
console.log("=" .repeat(60));
console.error("\n❌ Extended thinking test FAILED!");
Expand All @@ -68,6 +74,10 @@ async function main() {
console.error("\n This appears to be a thinking block handling issue.");
console.error(" The fix may not be complete.");
}
if (error.message.includes("tool_use_id") || error.message.includes("tool_result")) {
console.error("\n This appears to be a tool_use/tool_result pairing issue.");
console.error(" The message history cleanup may need adjustment.");
}
} else {
console.error(error);
}
Expand Down
108 changes: 59 additions & 49 deletions utils/message-processing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,71 +176,72 @@ export function truncateMessageHistory(
* and preserve thinking blocks for extended thinking compatibility
* This prevents the "unexpected tool_use_id found in tool_result blocks" error
*
* IMPORTANT: The API requires that each tool_result must have its corresponding tool_use
* in the IMMEDIATELY PREVIOUS message, not just anywhere in history.
*
* @param messages - Array of conversation messages
*/
export function cleanMessageHistory(messages: BetaMessageParam[]): void {
const toolUseIds = new Set<string>();
// Process messages in order to maintain tool_use/tool_result pairing
for (let i = 0; i < messages.length; i++) {
const message = messages[i];
if (!message || !Array.isArray(message.content)) continue;

// First pass: collect all tool_use IDs
for (const message of messages) {
if (Array.isArray(message.content)) {
for (const block of message.content) {
if (
typeof block === "object" &&
block.type === "tool_use" &&
block.id
) {
toolUseIds.add(block.id);
// For user messages with tool_result blocks, verify the previous message has matching tool_use
if (message.role === "user") {
const prevMessage = i > 0 ? messages[i - 1] : null;
const prevToolUseIds = new Set<string>();

// Collect tool_use IDs from the immediately previous message
if (prevMessage?.role === "assistant" && Array.isArray(prevMessage.content)) {
for (const block of prevMessage.content) {
if (
typeof block === "object" &&
block.type === "tool_use" &&
block.id
) {
prevToolUseIds.add(block.id);
}
}
}
}
}

// Second pass: clean messages and preserve structure
for (const message of messages) {
if (Array.isArray(message.content)) {
let cleanedContent = message.content.filter((block) => {
// Filter out tool_result blocks that don't have a corresponding tool_use in the previous message
message.content = message.content.filter((block) => {
if (
typeof block === "object" &&
block.type === "tool_result" &&
block.tool_use_id
) {
return toolUseIds.has(block.tool_use_id);
return prevToolUseIds.has(block.tool_use_id);
}
return true;
});
}

// Ensure proper ordering for assistant messages with thinking:
// 1. thinking/redacted_thinking blocks first
// 2. text blocks
// 3. tool_use blocks
// 4. tool_result blocks
if (message.role === "assistant") {
const thinkingBlocks = cleanedContent.filter(
(block) =>
typeof block === "object" &&
(block.type === "thinking" || block.type === "redacted_thinking"),
);
const textBlocks = cleanedContent.filter(
(block) => typeof block === "object" && block.type === "text",
);
const toolUseBlocks = cleanedContent.filter(
(block) => typeof block === "object" && block.type === "tool_use",
);
const toolResultBlocks = cleanedContent.filter(
(block) => typeof block === "object" && block.type === "tool_result",
);

// Reconstruct with proper order
cleanedContent = [
...thinkingBlocks,
...textBlocks,
...toolUseBlocks,
...toolResultBlocks,
];
}
// Ensure proper ordering for assistant messages:
// 1. thinking/redacted_thinking blocks first
// 2. text blocks
// 3. tool_use blocks
// Note: tool_result blocks should never be in assistant messages
if (message.role === "assistant") {
const thinkingBlocks = message.content.filter(
(block) =>
typeof block === "object" &&
(block.type === "thinking" || block.type === "redacted_thinking"),
);
const textBlocks = message.content.filter(
(block) => typeof block === "object" && block.type === "text",
);
const toolUseBlocks = message.content.filter(
(block) => typeof block === "object" && block.type === "tool_use",
);

message.content = cleanedContent;
// Reconstruct with proper order
message.content = [
...thinkingBlocks,
...textBlocks,
...toolUseBlocks,
];
}
}
}
Expand All @@ -253,6 +254,9 @@ export function cleanMessageHistory(messages: BetaMessageParam[]): void {
* a thinking or redacted_thinking block. This function filters out any assistant messages
* that don't meet this requirement.
*
* Additionally, when removing assistant messages, we also need to remove the corresponding
* user message that follows (if any) to maintain proper conversation flow.
*
* @param messages - Array of conversation messages
* @param thinkingEnabled - Whether extended thinking is enabled
*/
Expand All @@ -265,7 +269,7 @@ export function ensureThinkingBlocksForExtendedThinking(
}

// Filter out assistant messages that don't start with a thinking block
// Keep user messages as they don't need thinking blocks
// Also remove the following user message to maintain conversation flow
const indicesToRemove: number[] = [];

for (let i = 0; i < messages.length; i++) {
Expand All @@ -279,6 +283,12 @@ export function ensureThinkingBlocksForExtendedThinking(

if (!hasThinkingBlock) {
indicesToRemove.push(i);

// Also mark the following user message for removal (if it exists)
// This maintains proper conversation flow (user -> assistant -> user -> assistant)
if (i + 1 < messages.length && messages[i + 1]?.role === "user") {
indicesToRemove.push(i + 1);
}
}
}
}
Expand Down