diff --git a/.changeset/fix-extended-thinking-blocks.md b/.changeset/fix-extended-thinking-blocks.md deleted file mode 100644 index 49bf02f..0000000 --- a/.changeset/fix-extended-thinking-blocks.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -"@centralinc/browseragent": patch ---- - -Fix critical message history management issues preventing 400 errors - -**Two Major Fixes:** - -1. **Extended Thinking Block Validation**: When `thinkingBudget` is enabled, the API requires every assistant message to start with a thinking or redacted_thinking block. Added `ensureThinkingBlocksForExtendedThinking()` to filter out assistant messages without thinking blocks and their corresponding user messages to maintain conversation flow. - -2. **Tool Use/Result Pairing**: Fixed "unexpected tool_use_id found in tool_result blocks" error. The API requires each tool_result to have its corresponding tool_use in the IMMEDIATELY PREVIOUS message, not just anywhere in history. Rewrote `cleanMessageHistory()` to validate pairing on a per-message basis. - -**Errors Fixed:** -- `"Expected thinking or redacted_thinking, but found text"` -- `"unexpected tool_use_id found in tool_result blocks: [id]. Each tool_result block must have a corresponding tool_use block in the previous message"` - -**Testing:** Extended thinking test passes with 15+ tool calls across multiple turns without errors. diff --git a/.changeset/fix-thinking-loops.md b/.changeset/fix-thinking-loops.md new file mode 100644 index 0000000..b2de889 --- /dev/null +++ b/.changeset/fix-thinking-loops.md @@ -0,0 +1,23 @@ +--- +"@centralinc/browseragent": patch +--- + +Fix critical message history management issues preventing 400 errors and infinite loops + +**Three Major Fixes:** + +1. **Extended Thinking Loop Prevention**: Fixed infinite loop where agents would repeat the same action (e.g., repeatedly calling `goto`). When Claude doesn't emit a thinking block, we now add a placeholder by reusing the most recent thinking block from history, instead of removing messages which caused context loss. + +2. **Extended Thinking Block Validation**: When `thinkingBudget` is enabled, the API requires every assistant message to start with a thinking or redacted_thinking block. The fix adds placeholder thinking blocks to responses when Claude doesn't emit them, preventing 400 errors while preserving agent context. + +3. **Tool Use/Result Pairing**: Fixed "unexpected tool_use_id found in tool_result blocks" error. The API requires each tool_result to have its corresponding tool_use in the IMMEDIATELY PREVIOUS message, not just anywhere in history. Rewrote `cleanMessageHistory()` to validate pairing on a per-message basis. + +**Errors Fixed:** +- Infinite loops with extended thinking (agent repeating same actions) +- `"Expected thinking or redacted_thinking, but found text"` +- `"unexpected tool_use_id found in tool_result blocks: [id]. Each tool_result block must have a corresponding tool_use block in the previous message"` + +**Testing:** +- Extended thinking test passes with multiple tool uses in 27.5s (no loops) +- Properly handles missing thinking blocks by adding placeholders +- Message history stays clean with proper tool_use/tool_result pairing diff --git a/loop.ts b/loop.ts index 6d6e088..8afdd47 100644 --- a/loop.ts +++ b/loop.ts @@ -17,7 +17,7 @@ import { injectPromptCaching, truncateMessageHistory, cleanMessageHistory, - ensureThinkingBlocksForExtendedThinking, + ensureThinkingBlockForResponse, PROMPT_CACHING_BETA_FLAG, } from "./utils/message-processing"; import { makeApiToolResult } from "./utils/tool-results"; @@ -220,10 +220,6 @@ ${capabilityDocs}`, // Clean message history to ensure tool_use and tool_result blocks are properly paired cleanMessageHistory(messages); - // Ensure all assistant messages have thinking blocks when extended thinking is enabled - // This prevents 400 errors from the API - ensureThinkingBlocksForExtendedThinking(messages, !!thinkingBudget); - if (onlyNMostRecentImages) { maybeFilterToNMostRecentImages( messages, @@ -256,6 +252,10 @@ ${capabilityDocs}`, const responseParams = responseToParams(response); + // Ensure response has a thinking block when extended thinking is enabled + // This prevents 400 errors on the next API call + ensureThinkingBlockForResponse(responseParams, messages, !!thinkingBudget); + const loggableContent = responseParams.map((block) => { if (block.type === "tool_use") { // Deep log the full input including arrays diff --git a/utils/message-processing.ts b/utils/message-processing.ts index 50d898b..8b9bad7 100644 --- a/utils/message-processing.ts +++ b/utils/message-processing.ts @@ -247,57 +247,60 @@ export function cleanMessageHistory(messages: BetaMessageParam[]): void { } /** - * Ensure all assistant messages start with thinking blocks when extended thinking is enabled + * Add placeholder thinking block to response when extended thinking is enabled but Claude didn't emit one * This prevents the 400 error: "Expected `thinking` or `redacted_thinking`, but found `text`" * * When thinking is enabled, the API requires that every assistant message must start with - * a thinking or redacted_thinking block. This function filters out any assistant messages - * that don't meet this requirement. + * a thinking or redacted_thinking block. If Claude's response doesn't include one, we add + * a placeholder by reusing the most recent thinking block from history. * - * Additionally, when removing assistant messages, we also need to remove the corresponding - * user message that follows (if any) to maintain proper conversation flow. + * This function modifies the responseParams array in place by prepending a thinking block + * if one is missing. * - * @param messages - Array of conversation messages + * @param responseParams - The content blocks from Claude's response + * @param messages - Message history to search for previous thinking blocks * @param thinkingEnabled - Whether extended thinking is enabled */ -export function ensureThinkingBlocksForExtendedThinking( +export function ensureThinkingBlockForResponse( + responseParams: BetaContentBlock[], messages: BetaMessageParam[], thinkingEnabled: boolean, ): void { - if (!thinkingEnabled) { + if (!thinkingEnabled || responseParams.length === 0) { return; } - // Filter out assistant messages that don't start with a thinking block - // Also remove the following user message to maintain conversation flow - const indicesToRemove: number[] = []; - - for (let i = 0; i < messages.length; i++) { - const message = messages[i]; - if (message?.role === "assistant" && Array.isArray(message.content)) { - const firstBlock = message.content[0]; - const hasThinkingBlock = - firstBlock && - typeof firstBlock === "object" && - (firstBlock.type === "thinking" || firstBlock.type === "redacted_thinking"); - - if (!hasThinkingBlock) { - indicesToRemove.push(i); - - // Also mark the following user message for removal (if it exists) - // This maintains proper conversation flow (user -> assistant -> user -> assistant) - if (i + 1 < messages.length && messages[i + 1]?.role === "user") { - indicesToRemove.push(i + 1); - } + // Check if response already has a thinking block at the start + const firstBlock = responseParams[0]; + const hasThinkingBlock = + firstBlock && + (firstBlock.type === "thinking" || firstBlock.type === "redacted_thinking"); + + if (hasThinkingBlock) { + return; // Response already has thinking block + } + + // Claude didn't emit a thinking block - find the most recent one from history + let placeholderThinking: BetaContentBlock | null = null; + + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg?.role === "assistant" && Array.isArray(msg.content)) { + const thinkingBlock = msg.content.find( + (block): block is BetaContentBlock => + typeof block === "object" && + (block.type === "thinking" || block.type === "redacted_thinking"), + ); + if (thinkingBlock) { + // Clone the thinking block to avoid mutating the original + placeholderThinking = { ...thinkingBlock }; + break; } } } - // Remove messages in reverse order to maintain correct indices - for (let i = indicesToRemove.length - 1; i >= 0; i--) { - const index = indicesToRemove[i]; - if (index !== undefined) { - messages.splice(index, 1); - } + // Prepend the placeholder thinking block if we found one + if (placeholderThinking) { + responseParams.unshift(placeholderThinking); } }