From 5e7c22c344d27cebde39e7e50405cbaf6ca79764 Mon Sep 17 00:00:00 2001 From: bigboateng Date: Mon, 15 Dec 2025 19:55:52 +0100 Subject: [PATCH 1/2] fix: add placeholder thinking blocks to prevent infinite loops Changed approach to handle missing thinking blocks in extended thinking mode: OLD APPROACH (caused loops): - Removed assistant messages without thinking blocks from history - Caused excessive context loss - Agent would forget recent actions and repeat them NEW APPROACH (prevents loops): - Add placeholder thinking block to NEW response if Claude doesn't emit one - Reuse the most recent thinking block from history as placeholder - Preserves all context in message history - Prevents 400 errors while maintaining agent memory This matches the pattern from production systems and prevents infinite loop issues where agents repeatedly call the same action (e.g., goto). The API error refers to the NEW message being added, not old messages in history. We fix the new response, not remove old context. Tested: Extended thinking example completes successfully in 27.5s without loops. --- .changeset/fix-extended-thinking-blocks.md | 16 +++-- loop.ts | 10 +-- utils/message-processing.ts | 73 +++++++++++----------- 3 files changed, 54 insertions(+), 45 deletions(-) diff --git a/.changeset/fix-extended-thinking-blocks.md b/.changeset/fix-extended-thinking-blocks.md index 49bf02f..b2de889 100644 --- a/.changeset/fix-extended-thinking-blocks.md +++ b/.changeset/fix-extended-thinking-blocks.md @@ -2,16 +2,22 @@ "@centralinc/browseragent": patch --- -Fix critical message history management issues preventing 400 errors +Fix critical message history management issues preventing 400 errors and infinite loops -**Two Major Fixes:** +**Three Major Fixes:** -1. **Extended Thinking Block Validation**: When `thinkingBudget` is enabled, the API requires every assistant message to start with a thinking or redacted_thinking block. Added `ensureThinkingBlocksForExtendedThinking()` to filter out assistant messages without thinking blocks and their corresponding user messages to maintain conversation flow. +1. **Extended Thinking Loop Prevention**: Fixed infinite loop where agents would repeat the same action (e.g., repeatedly calling `goto`). When Claude doesn't emit a thinking block, we now add a placeholder by reusing the most recent thinking block from history, instead of removing messages which caused context loss. -2. **Tool Use/Result Pairing**: Fixed "unexpected tool_use_id found in tool_result blocks" error. The API requires each tool_result to have its corresponding tool_use in the IMMEDIATELY PREVIOUS message, not just anywhere in history. Rewrote `cleanMessageHistory()` to validate pairing on a per-message basis. +2. **Extended Thinking Block Validation**: When `thinkingBudget` is enabled, the API requires every assistant message to start with a thinking or redacted_thinking block. The fix adds placeholder thinking blocks to responses when Claude doesn't emit them, preventing 400 errors while preserving agent context. + +3. **Tool Use/Result Pairing**: Fixed "unexpected tool_use_id found in tool_result blocks" error. The API requires each tool_result to have its corresponding tool_use in the IMMEDIATELY PREVIOUS message, not just anywhere in history. Rewrote `cleanMessageHistory()` to validate pairing on a per-message basis. **Errors Fixed:** +- Infinite loops with extended thinking (agent repeating same actions) - `"Expected thinking or redacted_thinking, but found text"` - `"unexpected tool_use_id found in tool_result blocks: [id]. Each tool_result block must have a corresponding tool_use block in the previous message"` -**Testing:** Extended thinking test passes with 15+ tool calls across multiple turns without errors. +**Testing:** +- Extended thinking test passes with multiple tool uses in 27.5s (no loops) +- Properly handles missing thinking blocks by adding placeholders +- Message history stays clean with proper tool_use/tool_result pairing diff --git a/loop.ts b/loop.ts index 6d6e088..8afdd47 100644 --- a/loop.ts +++ b/loop.ts @@ -17,7 +17,7 @@ import { injectPromptCaching, truncateMessageHistory, cleanMessageHistory, - ensureThinkingBlocksForExtendedThinking, + ensureThinkingBlockForResponse, PROMPT_CACHING_BETA_FLAG, } from "./utils/message-processing"; import { makeApiToolResult } from "./utils/tool-results"; @@ -220,10 +220,6 @@ ${capabilityDocs}`, // Clean message history to ensure tool_use and tool_result blocks are properly paired cleanMessageHistory(messages); - // Ensure all assistant messages have thinking blocks when extended thinking is enabled - // This prevents 400 errors from the API - ensureThinkingBlocksForExtendedThinking(messages, !!thinkingBudget); - if (onlyNMostRecentImages) { maybeFilterToNMostRecentImages( messages, @@ -256,6 +252,10 @@ ${capabilityDocs}`, const responseParams = responseToParams(response); + // Ensure response has a thinking block when extended thinking is enabled + // This prevents 400 errors on the next API call + ensureThinkingBlockForResponse(responseParams, messages, !!thinkingBudget); + const loggableContent = responseParams.map((block) => { if (block.type === "tool_use") { // Deep log the full input including arrays diff --git a/utils/message-processing.ts b/utils/message-processing.ts index 50d898b..8b9bad7 100644 --- a/utils/message-processing.ts +++ b/utils/message-processing.ts @@ -247,57 +247,60 @@ export function cleanMessageHistory(messages: BetaMessageParam[]): void { } /** - * Ensure all assistant messages start with thinking blocks when extended thinking is enabled + * Add placeholder thinking block to response when extended thinking is enabled but Claude didn't emit one * This prevents the 400 error: "Expected `thinking` or `redacted_thinking`, but found `text`" * * When thinking is enabled, the API requires that every assistant message must start with - * a thinking or redacted_thinking block. This function filters out any assistant messages - * that don't meet this requirement. + * a thinking or redacted_thinking block. If Claude's response doesn't include one, we add + * a placeholder by reusing the most recent thinking block from history. * - * Additionally, when removing assistant messages, we also need to remove the corresponding - * user message that follows (if any) to maintain proper conversation flow. + * This function modifies the responseParams array in place by prepending a thinking block + * if one is missing. * - * @param messages - Array of conversation messages + * @param responseParams - The content blocks from Claude's response + * @param messages - Message history to search for previous thinking blocks * @param thinkingEnabled - Whether extended thinking is enabled */ -export function ensureThinkingBlocksForExtendedThinking( +export function ensureThinkingBlockForResponse( + responseParams: BetaContentBlock[], messages: BetaMessageParam[], thinkingEnabled: boolean, ): void { - if (!thinkingEnabled) { + if (!thinkingEnabled || responseParams.length === 0) { return; } - // Filter out assistant messages that don't start with a thinking block - // Also remove the following user message to maintain conversation flow - const indicesToRemove: number[] = []; - - for (let i = 0; i < messages.length; i++) { - const message = messages[i]; - if (message?.role === "assistant" && Array.isArray(message.content)) { - const firstBlock = message.content[0]; - const hasThinkingBlock = - firstBlock && - typeof firstBlock === "object" && - (firstBlock.type === "thinking" || firstBlock.type === "redacted_thinking"); - - if (!hasThinkingBlock) { - indicesToRemove.push(i); - - // Also mark the following user message for removal (if it exists) - // This maintains proper conversation flow (user -> assistant -> user -> assistant) - if (i + 1 < messages.length && messages[i + 1]?.role === "user") { - indicesToRemove.push(i + 1); - } + // Check if response already has a thinking block at the start + const firstBlock = responseParams[0]; + const hasThinkingBlock = + firstBlock && + (firstBlock.type === "thinking" || firstBlock.type === "redacted_thinking"); + + if (hasThinkingBlock) { + return; // Response already has thinking block + } + + // Claude didn't emit a thinking block - find the most recent one from history + let placeholderThinking: BetaContentBlock | null = null; + + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg?.role === "assistant" && Array.isArray(msg.content)) { + const thinkingBlock = msg.content.find( + (block): block is BetaContentBlock => + typeof block === "object" && + (block.type === "thinking" || block.type === "redacted_thinking"), + ); + if (thinkingBlock) { + // Clone the thinking block to avoid mutating the original + placeholderThinking = { ...thinkingBlock }; + break; } } } - // Remove messages in reverse order to maintain correct indices - for (let i = indicesToRemove.length - 1; i >= 0; i--) { - const index = indicesToRemove[i]; - if (index !== undefined) { - messages.splice(index, 1); - } + // Prepend the placeholder thinking block if we found one + if (placeholderThinking) { + responseParams.unshift(placeholderThinking); } } From 58a263240aa48a2f4caa81d398cfd1d4f737a5dc Mon Sep 17 00:00:00 2001 From: bigboateng Date: Mon, 15 Dec 2025 20:00:21 +0100 Subject: [PATCH 2/2] chore: create new changeset for thinking loop fixes Replaced old changeset (already consumed) with new one covering: - Infinite loop prevention with placeholder thinking blocks - Extended thinking 400 error fixes - Tool use/result pairing validation --- .../{fix-extended-thinking-blocks.md => fix-thinking-loops.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .changeset/{fix-extended-thinking-blocks.md => fix-thinking-loops.md} (100%) diff --git a/.changeset/fix-extended-thinking-blocks.md b/.changeset/fix-thinking-loops.md similarity index 100% rename from .changeset/fix-extended-thinking-blocks.md rename to .changeset/fix-thinking-loops.md