From 8d9146bfb7e26846f41217756b0d1eded5f7ef44 Mon Sep 17 00:00:00 2001 From: bigboateng Date: Mon, 15 Dec 2025 19:10:29 +0100 Subject: [PATCH] fix: improve tool_use/tool_result pairing and thinking block validation This commit adds two critical fixes for message history management: 1. **Tool_use/tool_result pairing fix**: The API requires that each tool_result must have its corresponding tool_use in the IMMEDIATELY PREVIOUS message, not just anywhere in history. Updated cleanMessageHistory() to check pairing on a per-message basis. 2. **Thinking block cleanup enhancement**: When filtering out assistant messages without thinking blocks, also remove the following user message to maintain proper conversation flow (user -> assistant -> user -> assistant pattern). 3. **Enhanced test coverage**: Updated test-extended-thinking.ts to include multiple tool uses, better exercising message history management. These fixes prevent 400 errors: - "unexpected tool_use_id found in tool_result blocks" - "Expected thinking or redacted_thinking, but found text" Tested with extended thinking example - no errors with multiple tool uses. --- examples/test-extended-thinking.ts | 16 ++++- utils/message-processing.ts | 108 ++++++++++++++++------------- 2 files changed, 72 insertions(+), 52 deletions(-) diff --git a/examples/test-extended-thinking.ts b/examples/test-extended-thinking.ts index 3e42d83..8f88c92 100644 --- a/examples/test-extended-thinking.ts +++ b/examples/test-extended-thinking.ts @@ -41,12 +41,18 @@ async function main() { }); console.log(" āœ“ Agent created\n"); - console.log("4. Executing task with thinkingBudget...\n"); + console.log("4. Executing task with thinkingBudget (with multiple tool uses)...\n"); console.log("=" .repeat(60)); try { const result = await agent.execute( - "Look at this page and tell me: What is the title of the page and what is the main heading? Provide a brief summary.", + `Look at this page and perform the following steps: + 1. Take a screenshot to see the current page + 2. Scroll down to see if there's more content + 3. Take another screenshot + 4. Tell me: What is the title of the page and what is the main heading? Provide a brief summary. + + This task requires multiple tool uses to test message history management with thinking blocks.`, undefined, { thinkingBudget: 2048, @@ -57,7 +63,7 @@ async function main() { console.log("=" .repeat(60)); console.log("\n5. Result:\n"); console.log(result); - console.log("\nāœ… Extended thinking test PASSED!"); + console.log("\nāœ… Extended thinking test with multiple tool uses PASSED!"); } catch (error) { console.log("=" .repeat(60)); console.error("\nāŒ Extended thinking test FAILED!"); @@ -68,6 +74,10 @@ async function main() { console.error("\n This appears to be a thinking block handling issue."); console.error(" The fix may not be complete."); } + if (error.message.includes("tool_use_id") || error.message.includes("tool_result")) { + console.error("\n This appears to be a tool_use/tool_result pairing issue."); + console.error(" The message history cleanup may need adjustment."); + } } else { console.error(error); } diff --git a/utils/message-processing.ts b/utils/message-processing.ts index 86b7c85..50d898b 100644 --- a/utils/message-processing.ts +++ b/utils/message-processing.ts @@ -176,71 +176,72 @@ export function truncateMessageHistory( * and preserve thinking blocks for extended thinking compatibility * This prevents the "unexpected tool_use_id found in tool_result blocks" error * + * IMPORTANT: The API requires that each tool_result must have its corresponding tool_use + * in the IMMEDIATELY PREVIOUS message, not just anywhere in history. + * * @param messages - Array of conversation messages */ export function cleanMessageHistory(messages: BetaMessageParam[]): void { - const toolUseIds = new Set(); + // Process messages in order to maintain tool_use/tool_result pairing + for (let i = 0; i < messages.length; i++) { + const message = messages[i]; + if (!message || !Array.isArray(message.content)) continue; - // First pass: collect all tool_use IDs - for (const message of messages) { - if (Array.isArray(message.content)) { - for (const block of message.content) { - if ( - typeof block === "object" && - block.type === "tool_use" && - block.id - ) { - toolUseIds.add(block.id); + // For user messages with tool_result blocks, verify the previous message has matching tool_use + if (message.role === "user") { + const prevMessage = i > 0 ? messages[i - 1] : null; + const prevToolUseIds = new Set(); + + // Collect tool_use IDs from the immediately previous message + if (prevMessage?.role === "assistant" && Array.isArray(prevMessage.content)) { + for (const block of prevMessage.content) { + if ( + typeof block === "object" && + block.type === "tool_use" && + block.id + ) { + prevToolUseIds.add(block.id); + } } } - } - } - // Second pass: clean messages and preserve structure - for (const message of messages) { - if (Array.isArray(message.content)) { - let cleanedContent = message.content.filter((block) => { + // Filter out tool_result blocks that don't have a corresponding tool_use in the previous message + message.content = message.content.filter((block) => { if ( typeof block === "object" && block.type === "tool_result" && block.tool_use_id ) { - return toolUseIds.has(block.tool_use_id); + return prevToolUseIds.has(block.tool_use_id); } return true; }); + } - // Ensure proper ordering for assistant messages with thinking: - // 1. thinking/redacted_thinking blocks first - // 2. text blocks - // 3. tool_use blocks - // 4. tool_result blocks - if (message.role === "assistant") { - const thinkingBlocks = cleanedContent.filter( - (block) => - typeof block === "object" && - (block.type === "thinking" || block.type === "redacted_thinking"), - ); - const textBlocks = cleanedContent.filter( - (block) => typeof block === "object" && block.type === "text", - ); - const toolUseBlocks = cleanedContent.filter( - (block) => typeof block === "object" && block.type === "tool_use", - ); - const toolResultBlocks = cleanedContent.filter( - (block) => typeof block === "object" && block.type === "tool_result", - ); - - // Reconstruct with proper order - cleanedContent = [ - ...thinkingBlocks, - ...textBlocks, - ...toolUseBlocks, - ...toolResultBlocks, - ]; - } + // Ensure proper ordering for assistant messages: + // 1. thinking/redacted_thinking blocks first + // 2. text blocks + // 3. tool_use blocks + // Note: tool_result blocks should never be in assistant messages + if (message.role === "assistant") { + const thinkingBlocks = message.content.filter( + (block) => + typeof block === "object" && + (block.type === "thinking" || block.type === "redacted_thinking"), + ); + const textBlocks = message.content.filter( + (block) => typeof block === "object" && block.type === "text", + ); + const toolUseBlocks = message.content.filter( + (block) => typeof block === "object" && block.type === "tool_use", + ); - message.content = cleanedContent; + // Reconstruct with proper order + message.content = [ + ...thinkingBlocks, + ...textBlocks, + ...toolUseBlocks, + ]; } } } @@ -253,6 +254,9 @@ export function cleanMessageHistory(messages: BetaMessageParam[]): void { * a thinking or redacted_thinking block. This function filters out any assistant messages * that don't meet this requirement. * + * Additionally, when removing assistant messages, we also need to remove the corresponding + * user message that follows (if any) to maintain proper conversation flow. + * * @param messages - Array of conversation messages * @param thinkingEnabled - Whether extended thinking is enabled */ @@ -265,7 +269,7 @@ export function ensureThinkingBlocksForExtendedThinking( } // Filter out assistant messages that don't start with a thinking block - // Keep user messages as they don't need thinking blocks + // Also remove the following user message to maintain conversation flow const indicesToRemove: number[] = []; for (let i = 0; i < messages.length; i++) { @@ -279,6 +283,12 @@ export function ensureThinkingBlocksForExtendedThinking( if (!hasThinkingBlock) { indicesToRemove.push(i); + + // Also mark the following user message for removal (if it exists) + // This maintains proper conversation flow (user -> assistant -> user -> assistant) + if (i + 1 < messages.length && messages[i + 1]?.role === "user") { + indicesToRemove.push(i + 1); + } } } }