centralinc · bigboateng · Dec 15, 2025 · Dec 15, 2025
diff --git a/examples/test-extended-thinking.ts b/examples/test-extended-thinking.ts
@@ -41,12 +41,18 @@ async function main() {
   });
   console.log("   ✓ Agent created\n");
 
-  console.log("4. Executing task with thinkingBudget...\n");
+  console.log("4. Executing task with thinkingBudget (with multiple tool uses)...\n");
   console.log("=" .repeat(60));
 
   try {
     const result = await agent.execute(
-      "Look at this page and tell me: What is the title of the page and what is the main heading? Provide a brief summary.",
+      `Look at this page and perform the following steps:
+      1. Take a screenshot to see the current page
+      2. Scroll down to see if there's more content
+      3. Take another screenshot
+      4. Tell me: What is the title of the page and what is the main heading? Provide a brief summary.
+
+      This task requires multiple tool uses to test message history management with thinking blocks.`,
       undefined,
       {
         thinkingBudget: 2048,
@@ -57,7 +63,7 @@ async function main() {
     console.log("=" .repeat(60));
     console.log("\n5. Result:\n");
     console.log(result);
-    console.log("\n✅ Extended thinking test PASSED!");
+    console.log("\n✅ Extended thinking test with multiple tool uses PASSED!");
   } catch (error) {
     console.log("=" .repeat(60));
     console.error("\n❌ Extended thinking test FAILED!");
@@ -68,6 +74,10 @@ async function main() {
         console.error("\n  This appears to be a thinking block handling issue.");
         console.error("  The fix may not be complete.");
       }
+      if (error.message.includes("tool_use_id") || error.message.includes("tool_result")) {
+        console.error("\n  This appears to be a tool_use/tool_result pairing issue.");
+        console.error("  The message history cleanup may need adjustment.");
+      }
     } else {
       console.error(error);
     }

diff --git a/utils/message-processing.ts b/utils/message-processing.ts
@@ -176,71 +176,72 @@ export function truncateMessageHistory(
  * and preserve thinking blocks for extended thinking compatibility
  * This prevents the "unexpected tool_use_id found in tool_result blocks" error
  *
+ * IMPORTANT: The API requires that each tool_result must have its corresponding tool_use
+ * in the IMMEDIATELY PREVIOUS message, not just anywhere in history.
+ *
  * @param messages - Array of conversation messages
  */
 export function cleanMessageHistory(messages: BetaMessageParam[]): void {
-  const toolUseIds = new Set<string>();
+  // Process messages in order to maintain tool_use/tool_result pairing
+  for (let i = 0; i < messages.length; i++) {
+    const message = messages[i];
+    if (!message || !Array.isArray(message.content)) continue;
 
-  // First pass: collect all tool_use IDs
-  for (const message of messages) {
-    if (Array.isArray(message.content)) {
-      for (const block of message.content) {
-        if (
-          typeof block === "object" &&
-          block.type === "tool_use" &&
-          block.id
-        ) {
-          toolUseIds.add(block.id);
+    // For user messages with tool_result blocks, verify the previous message has matching tool_use
+    if (message.role === "user") {
+      const prevMessage = i > 0 ? messages[i - 1] : null;
+      const prevToolUseIds = new Set<string>();
+
+      // Collect tool_use IDs from the immediately previous message
+      if (prevMessage?.role === "assistant" && Array.isArray(prevMessage.content)) {
+        for (const block of prevMessage.content) {
+          if (
+            typeof block === "object" &&
+            block.type === "tool_use" &&
+            block.id
+          ) {
+            prevToolUseIds.add(block.id);
+          }
         }
       }
-    }
-  }
 
-  // Second pass: clean messages and preserve structure
-  for (const message of messages) {
-    if (Array.isArray(message.content)) {
-      let cleanedContent = message.content.filter((block) => {
+      // Filter out tool_result blocks that don't have a corresponding tool_use in the previous message
+      message.content = message.content.filter((block) => {
         if (
           typeof block === "object" &&
           block.type === "tool_result" &&
           block.tool_use_id
         ) {
-          return toolUseIds.has(block.tool_use_id);
+          return prevToolUseIds.has(block.tool_use_id);
         }
         return true;
       });
+    }
 
-      // Ensure proper ordering for assistant messages with thinking:
-      // 1. thinking/redacted_thinking blocks first
-      // 2. text blocks
-      // 3. tool_use blocks
-      // 4. tool_result blocks
-      if (message.role === "assistant") {
-        const thinkingBlocks = cleanedContent.filter(
-          (block) =>
-            typeof block === "object" &&
-            (block.type === "thinking" || block.type === "redacted_thinking"),
-        );
-        const textBlocks = cleanedContent.filter(
-          (block) => typeof block === "object" && block.type === "text",
-        );
-        const toolUseBlocks = cleanedContent.filter(
-          (block) => typeof block === "object" && block.type === "tool_use",
-        );
-        const toolResultBlocks = cleanedContent.filter(
-          (block) => typeof block === "object" && block.type === "tool_result",
-        );
-
-        // Reconstruct with proper order
-        cleanedContent = [
-          ...thinkingBlocks,
-          ...textBlocks,
-          ...toolUseBlocks,
-          ...toolResultBlocks,
-        ];
-      }
+    // Ensure proper ordering for assistant messages:
+    // 1. thinking/redacted_thinking blocks first
+    // 2. text blocks
+    // 3. tool_use blocks
+    // Note: tool_result blocks should never be in assistant messages
+    if (message.role === "assistant") {
+      const thinkingBlocks = message.content.filter(
+        (block) =>
+          typeof block === "object" &&
+          (block.type === "thinking" || block.type === "redacted_thinking"),
+      );
+      const textBlocks = message.content.filter(
+        (block) => typeof block === "object" && block.type === "text",
+      );
+      const toolUseBlocks = message.content.filter(
+        (block) => typeof block === "object" && block.type === "tool_use",
+      );
 
-      message.content = cleanedContent;
+      // Reconstruct with proper order
+      message.content = [
+        ...thinkingBlocks,
+        ...textBlocks,
+        ...toolUseBlocks,
+      ];
     }
   }
 }
@@ -253,6 +254,9 @@ export function cleanMessageHistory(messages: BetaMessageParam[]): void {
  * a thinking or redacted_thinking block. This function filters out any assistant messages
  * that don't meet this requirement.
  *
+ * Additionally, when removing assistant messages, we also need to remove the corresponding
+ * user message that follows (if any) to maintain proper conversation flow.
+ *
  * @param messages - Array of conversation messages
  * @param thinkingEnabled - Whether extended thinking is enabled
  */
@@ -265,7 +269,7 @@ export function ensureThinkingBlocksForExtendedThinking(
   }
 
   // Filter out assistant messages that don't start with a thinking block
-  // Keep user messages as they don't need thinking blocks
+  // Also remove the following user message to maintain conversation flow
   const indicesToRemove: number[] = [];
 
   for (let i = 0; i < messages.length; i++) {
@@ -279,6 +283,12 @@ export function ensureThinkingBlocksForExtendedThinking(
 
       if (!hasThinkingBlock) {
         indicesToRemove.push(i);
+
+        // Also mark the following user message for removal (if it exists)
+        // This maintains proper conversation flow (user -> assistant -> user -> assistant)
+        if (i + 1 < messages.length && messages[i + 1]?.role === "user") {
+          indicesToRemove.push(i + 1);
+        }
       }
     }
   }