centralinc · bigboateng · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/.changeset/fix-extended-thinking-blocks.md b/.changeset/fix-extended-thinking-blocks.md
diff --git a/.changeset/fix-thinking-loops.md b/.changeset/fix-thinking-loops.md
@@ -0,0 +1,23 @@
+---
+"@centralinc/browseragent": patch
+---
+
+Fix critical message history management issues preventing 400 errors and infinite loops
+
+**Three Major Fixes:**
+
+1. **Extended Thinking Loop Prevention**: Fixed infinite loop where agents would repeat the same action (e.g., repeatedly calling `goto`). When Claude doesn't emit a thinking block, we now add a placeholder by reusing the most recent thinking block from history, instead of removing messages which caused context loss.
+
+2. **Extended Thinking Block Validation**: When `thinkingBudget` is enabled, the API requires every assistant message to start with a thinking or redacted_thinking block. The fix adds placeholder thinking blocks to responses when Claude doesn't emit them, preventing 400 errors while preserving agent context.
+
+3. **Tool Use/Result Pairing**: Fixed "unexpected tool_use_id found in tool_result blocks" error. The API requires each tool_result to have its corresponding tool_use in the IMMEDIATELY PREVIOUS message, not just anywhere in history. Rewrote `cleanMessageHistory()` to validate pairing on a per-message basis.
+
+**Errors Fixed:**
+- Infinite loops with extended thinking (agent repeating same actions)
+- `"Expected thinking or redacted_thinking, but found text"`
+- `"unexpected tool_use_id found in tool_result blocks: [id]. Each tool_result block must have a corresponding tool_use block in the previous message"`
+
+**Testing:** 
+- Extended thinking test passes with multiple tool uses in 27.5s (no loops)
+- Properly handles missing thinking blocks by adding placeholders
+- Message history stays clean with proper tool_use/tool_result pairing
diff --git a/loop.ts b/loop.ts
@@ -17,7 +17,7 @@ import {
   injectPromptCaching,
   truncateMessageHistory,
   cleanMessageHistory,
-  ensureThinkingBlocksForExtendedThinking,
+  ensureThinkingBlockForResponse,
   PROMPT_CACHING_BETA_FLAG,
 } from "./utils/message-processing";
 import { makeApiToolResult } from "./utils/tool-results";
@@ -220,10 +220,6 @@ ${capabilityDocs}`,
     // Clean message history to ensure tool_use and tool_result blocks are properly paired
     cleanMessageHistory(messages);
 
-    // Ensure all assistant messages have thinking blocks when extended thinking is enabled
-    // This prevents 400 errors from the API
-    ensureThinkingBlocksForExtendedThinking(messages, !!thinkingBudget);
-
     if (onlyNMostRecentImages) {
       maybeFilterToNMostRecentImages(
         messages,
@@ -256,6 +252,10 @@ ${capabilityDocs}`,
 
     const responseParams = responseToParams(response);
 
+    // Ensure response has a thinking block when extended thinking is enabled
+    // This prevents 400 errors on the next API call
+    ensureThinkingBlockForResponse(responseParams, messages, !!thinkingBudget);
+
     const loggableContent = responseParams.map((block) => {
       if (block.type === "tool_use") {
         // Deep log the full input including arrays

diff --git a/utils/message-processing.ts b/utils/message-processing.ts
@@ -247,57 +247,60 @@ export function cleanMessageHistory(messages: BetaMessageParam[]): void {
 }
 
 /**
- * Ensure all assistant messages start with thinking blocks when extended thinking is enabled
+ * Add placeholder thinking block to response when extended thinking is enabled but Claude didn't emit one
  * This prevents the 400 error: "Expected `thinking` or `redacted_thinking`, but found `text`"
  * 
  * When thinking is enabled, the API requires that every assistant message must start with
- * a thinking or redacted_thinking block. This function filters out any assistant messages
- * that don't meet this requirement.
+ * a thinking or redacted_thinking block. If Claude's response doesn't include one, we add
+ * a placeholder by reusing the most recent thinking block from history.
  *
- * Additionally, when removing assistant messages, we also need to remove the corresponding
- * user message that follows (if any) to maintain proper conversation flow.
+ * This function modifies the responseParams array in place by prepending a thinking block
+ * if one is missing.
  *
- * @param messages - Array of conversation messages
+ * @param responseParams - The content blocks from Claude's response
+ * @param messages - Message history to search for previous thinking blocks
  * @param thinkingEnabled - Whether extended thinking is enabled
  */
-export function ensureThinkingBlocksForExtendedThinking(
+export function ensureThinkingBlockForResponse(
+  responseParams: BetaContentBlock[],
   messages: BetaMessageParam[],
   thinkingEnabled: boolean,
 ): void {
-  if (!thinkingEnabled) {
+  if (!thinkingEnabled || responseParams.length === 0) {
     return;
   }
 
-  // Filter out assistant messages that don't start with a thinking block
-  // Also remove the following user message to maintain conversation flow
-  const indicesToRemove: number[] = [];
-
-  for (let i = 0; i < messages.length; i++) {
-    const message = messages[i];
-    if (message?.role === "assistant" && Array.isArray(message.content)) {
-      const firstBlock = message.content[0];
-      const hasThinkingBlock = 
-        firstBlock &&
-        typeof firstBlock === "object" &&
-        (firstBlock.type === "thinking" || firstBlock.type === "redacted_thinking");
-
-      if (!hasThinkingBlock) {
-        indicesToRemove.push(i);
-
-        // Also mark the following user message for removal (if it exists)
-        // This maintains proper conversation flow (user -> assistant -> user -> assistant)
-        if (i + 1 < messages.length && messages[i + 1]?.role === "user") {
-          indicesToRemove.push(i + 1);
-        }
+  // Check if response already has a thinking block at the start
+  const firstBlock = responseParams[0];
+  const hasThinkingBlock = 
+    firstBlock &&
+    (firstBlock.type === "thinking" || firstBlock.type === "redacted_thinking");
+
+  if (hasThinkingBlock) {
+    return; // Response already has thinking block
+  }
+
+  // Claude didn't emit a thinking block - find the most recent one from history
+  let placeholderThinking: BetaContentBlock | null = null;
+
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg?.role === "assistant" && Array.isArray(msg.content)) {
+      const thinkingBlock = msg.content.find(
+        (block): block is BetaContentBlock =>
+          typeof block === "object" &&
+          (block.type === "thinking" || block.type === "redacted_thinking"),
+      );
+      if (thinkingBlock) {
+        // Clone the thinking block to avoid mutating the original
+        placeholderThinking = { ...thinkingBlock };
+        break;
       }
     }
   }
 
-  // Remove messages in reverse order to maintain correct indices
-  for (let i = indicesToRemove.length - 1; i >= 0; i--) {
-    const index = indicesToRemove[i];
-    if (index !== undefined) {
-      messages.splice(index, 1);
-    }
+  // Prepend the placeholder thinking block if we found one
+  if (placeholderThinking) {
+    responseParams.unshift(placeholderThinking);
   }
 }