diff --git a/.changeset/fix-thinking-block-handling.md b/.changeset/fix-thinking-block-handling.md new file mode 100644 index 0000000..087dbc8 --- /dev/null +++ b/.changeset/fix-thinking-block-handling.md @@ -0,0 +1,29 @@ +--- +"@centralinc/browseragent": patch +--- + +Fix thinking block handling to prevent 400 errors when using extended thinking + +**Problem:** Using `thinkingBudget` caused 400 errors from Anthropic's API with the message: "Expected thinking or redacted_thinking, but found text. When thinking is enabled, a final assistant message must start with a thinking block." + +**Root Cause:** The `BetaThinkingBlock` type incorrectly defined `thinking` as a config object instead of a string containing the actual thinking content. + +**Changes:** +- Fixed `BetaThinkingBlock` type: `thinking` is now correctly typed as `string` +- Added `BetaRedactedThinkingBlock` type for handling redacted thinking responses +- Updated `responseToParams` to properly parse both `thinking` and `redacted_thinking` blocks +- Added explicit block ordering when constructing assistant messages to ensure thinking blocks always come first (API requirement) +- Added test examples for extended thinking validation + +**Usage:** Extended thinking now works correctly across multi-turn conversations: + +```typescript +const result = await agent.execute( + "Complex task requiring reasoning", + undefined, + { + thinkingBudget: 4096, + maxTokens: 16384, + } +); +``` diff --git a/examples/test-extended-thinking-multi-turn.ts b/examples/test-extended-thinking-multi-turn.ts new file mode 100644 index 0000000..b2ba3d2 --- /dev/null +++ b/examples/test-extended-thinking-multi-turn.ts @@ -0,0 +1,111 @@ +/** + * Test Extended Thinking with Multi-Turn Tool Use + * + * This test exercises the thinking block handling across multiple tool calls, + * which is where the original 400 error would occur (issue #12). + * + * The agent needs to: + * 1. Navigate to a page (tool use) + * 2. Take screenshot and analyze (tool use) + * 3. Click on something (tool use) + * 4. Verify the result (tool use) + * + * Each turn with thinking enabled requires proper thinking block ordering. + */ + +import { config } from "dotenv"; +config({ path: "./examples/.env" }); + +import { chromium } from "playwright"; +import { ComputerUseAgent } from "../index"; +import { SimpleLogger } from "../utils/logger"; + +async function main() { + console.log("=== Extended Thinking Multi-Turn Test ===\n"); + + if (!process.env.ANTHROPIC_API_KEY) { + console.error("ERROR: ANTHROPIC_API_KEY environment variable is required"); + process.exit(1); + } + + console.log("1. Launching browser..."); + const browser = await chromium.launch({ headless: false }); + const context = await browser.newContext({ + viewport: { width: 1280, height: 800 }, + }); + const page = await context.newPage(); + console.log(" ✓ Browser launched\n"); + + console.log("2. Navigating to Wikipedia..."); + await page.goto("https://en.wikipedia.org/wiki/Main_Page"); + await page.waitForLoadState("networkidle"); + console.log(" ✓ Page loaded\n"); + + console.log("3. Creating agent with extended thinking..."); + const logger = new SimpleLogger(); + const agent = new ComputerUseAgent({ + apiKey: process.env.ANTHROPIC_API_KEY!, + page, + logger, + }); + console.log(" ✓ Agent created\n"); + + console.log("4. Executing multi-step task with thinkingBudget...\n"); + console.log(" Task: Find and click the 'Random article' link, then report the article title\n"); + console.log("=" .repeat(70)); + + const startTime = Date.now(); + + try { + const result = await agent.execute( + `You are on Wikipedia's main page. Please do the following: + 1. First, take a screenshot to see the current page + 2. Find and click on the "Random article" link (it's usually in the left sidebar) + 3. Wait for the new page to load + 4. Take another screenshot + 5. Tell me the title of the random article you landed on + + Be thorough in your reasoning.`, + undefined, + { + thinkingBudget: 4096, + maxTokens: 16384, + } + ); + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2); + + console.log("=" .repeat(70)); + console.log(`\n5. Result (completed in ${elapsed}s):\n`); + console.log(result); + console.log("\n✅ Multi-turn extended thinking test PASSED!"); + + } catch (error) { + console.log("=" .repeat(70)); + console.error("\n❌ Multi-turn extended thinking test FAILED!"); + console.error("\nError details:"); + if (error instanceof Error) { + console.error(` Message: ${error.message}`); + + // Check for the specific thinking block error + if (error.message.includes("thinking") || error.message.includes("redacted_thinking")) { + console.error("\n 🔴 This is a THINKING BLOCK HANDLING issue!"); + console.error(" The fix in issue #12 may not be complete."); + } + + if (error.message.includes("400")) { + console.error("\n 🔴 400 error from Anthropic API"); + console.error(" This usually indicates improper message construction."); + } + } else { + console.error(error); + } + process.exit(1); + } finally { + console.log("\n6. Closing browser..."); + await browser.close(); + console.log(" ✓ Browser closed"); + } +} + +main().catch(console.error); diff --git a/examples/test-extended-thinking.ts b/examples/test-extended-thinking.ts new file mode 100644 index 0000000..3e42d83 --- /dev/null +++ b/examples/test-extended-thinking.ts @@ -0,0 +1,82 @@ +/** + * Test Extended Thinking / thinkingBudget functionality + * + * This example verifies that extended thinking works correctly + * without causing 400 errors from improper thinking block handling. + */ + +import { config } from "dotenv"; +config({ path: "./examples/.env" }); +import { chromium } from "playwright"; +import { ComputerUseAgent } from "../index"; +import { SimpleLogger } from "../utils/logger"; + +const logger = new SimpleLogger(); + +async function main() { + console.log("=== Extended Thinking Test ===\n"); + + if (!process.env.ANTHROPIC_API_KEY) { + console.error("ERROR: ANTHROPIC_API_KEY environment variable is required"); + process.exit(1); + } + + console.log("1. Launching browser..."); + const browser = await chromium.launch({ headless: false }); + const context = await browser.newContext({ + viewport: { width: 1280, height: 800 }, + }); + const page = await context.newPage(); + console.log(" ✓ Browser launched\n"); + + console.log("2. Navigating to test page..."); + await page.goto("https://example.com"); + console.log(" ✓ Page loaded\n"); + + console.log("3. Creating agent with extended thinking enabled..."); + const agent = new ComputerUseAgent({ + apiKey: process.env.ANTHROPIC_API_KEY!, + page, + logger, + }); + console.log(" ✓ Agent created\n"); + + console.log("4. Executing task with thinkingBudget...\n"); + console.log("=" .repeat(60)); + + try { + const result = await agent.execute( + "Look at this page and tell me: What is the title of the page and what is the main heading? Provide a brief summary.", + undefined, + { + thinkingBudget: 2048, + maxTokens: 8192, + } + ); + + console.log("=" .repeat(60)); + console.log("\n5. Result:\n"); + console.log(result); + console.log("\n✅ Extended thinking test PASSED!"); + } catch (error) { + console.log("=" .repeat(60)); + console.error("\n❌ Extended thinking test FAILED!"); + console.error("\nError details:"); + if (error instanceof Error) { + console.error(` Message: ${error.message}`); + if (error.message.includes("thinking")) { + console.error("\n This appears to be a thinking block handling issue."); + console.error(" The fix may not be complete."); + } + } else { + console.error(error); + } + process.exit(1); + } finally { + console.log("\n6. Closing browser..."); + await browser.close(); + console.log(" ✓ Browser closed"); + } +} + +main().catch(console.error); diff --git a/loop.ts b/loop.ts index 5cca527..d5c9240 100644 --- a/loop.ts +++ b/loop.ts @@ -272,9 +272,23 @@ ${capabilityDocs}`, // Log LLM response logger.llmResponse(response.stop_reason ?? "unknown", stepIndex, loggableContent); + // Ensure proper block ordering for extended thinking: + // thinking/redacted_thinking blocks must come first in assistant messages + const orderedContent = [...responseParams].sort((a, b) => { + const order: Record = { + thinking: 0, + redacted_thinking: 1, + text: 2, + tool_use: 3, + }; + const aOrder = order[a.type] ?? 99; + const bOrder = order[b.type] ?? 99; + return aOrder - bOrder; + }); + messages.push({ role: "assistant", - content: responseParams, + content: orderedContent, }); if (response.stop_reason === "end_turn") { diff --git a/types/beta.ts b/types/beta.ts index 4caa9f1..d26f6c6 100644 --- a/types/beta.ts +++ b/types/beta.ts @@ -47,15 +47,15 @@ export interface BetaToolUseBlock { export interface BetaThinkingBlock { type: "thinking"; - thinking: - | { - type: "enabled"; - budget_tokens: number; - } - | { - type: "disabled"; - }; - signature?: string; + thinking: string; + signature: string; + id?: string; + cache_control?: { type: "ephemeral" }; +} + +export interface BetaRedactedThinkingBlock { + type: "redacted_thinking"; + data: string; id?: string; cache_control?: { type: "ephemeral" }; } @@ -74,4 +74,5 @@ export type BetaLocalContentBlock = | BetaImageBlock | BetaToolUseBlock | BetaThinkingBlock + | BetaRedactedThinkingBlock | BetaToolResultBlock; diff --git a/utils/message-processing.ts b/utils/message-processing.ts index 58cae8b..7bee476 100644 --- a/utils/message-processing.ts +++ b/utils/message-processing.ts @@ -16,8 +16,17 @@ export function responseToParams(response: BetaMessage): BetaContentBlock[] { }; } if (block.type === "thinking") { - const { thinking, signature, ...rest } = block; - return { ...rest, thinking, signature: signature || "" }; + return { + type: "thinking" as const, + thinking: block.thinking, + signature: block.signature, + }; + } + if (block.type === "redacted_thinking") { + return { + type: "redacted_thinking" as const, + data: block.data, + }; } return block as BetaContentBlock; });