From 6b01100534f832d9fcdbd929737f2cb312a871ee Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Sun, 3 May 2026 16:57:55 +0530
Subject: [PATCH 1/2] fix bugs

---
 backend/openmlr/agent/llm.py            | 45 +++++++++++--
 backend/openmlr/agent/loop.py           | 48 +++++++++++++-
 backend/openmlr/agent/types.py          |  7 ++
 frontend/src/App.tsx                    | 52 ++++++++++++++-
 frontend/src/components/InputArea.tsx   |  4 +-
 frontend/src/components/MessageList.tsx | 88 +++++++++++++++++++++----
 frontend/src/types.ts                   |  6 ++
 7 files changed, 226 insertions(+), 24 deletions(-)

diff --git a/backend/openmlr/agent/llm.py b/backend/openmlr/agent/llm.py
index c2f94f1..c22a18c 100644
--- a/backend/openmlr/agent/llm.py
+++ b/backend/openmlr/agent/llm.py
@@ -6,7 +6,7 @@
 from collections.abc import AsyncGenerator
 
 from ..config import AgentConfig
-from .types import LLMResult, ToolCall
+from .types import LLMResult, ThinkingChunk, ToolCall
 
 
 class LLMProvider:
@@ -112,6 +112,18 @@ def _is_anthropic_model(model_name: str) -> bool:
         OpenRouter-routed Claude models use the OpenAI-compatible path."""
         return model_name.lower().startswith("anthropic/")
 
+    @staticmethod
+    def _supports_thinking(model_name: str) -> bool:
+        """Check if an Anthropic model supports extended thinking (Claude 3.7+, Claude 4+)."""
+        normalized = LLMProvider._normalize_model(model_name).lower()
+        thinking_patterns = [
+            "claude-3-7",
+            "claude-3.7",  # Claude 3.7 Sonnet
+            "claude-sonnet-4",
+            "claude-opus-4",  # Claude 4 family
+        ]
+        return any(p in normalized for p in thinking_patterns)
+
     @staticmethod
     def _uses_anthropic_format(model_name: str, custom_providers: list | None = None) -> bool:
         """Check if model uses Anthropic message format (native Anthropic, OpenCode Go Anthropic, or custom provider with anthropic-sdk)."""
@@ -139,7 +151,7 @@ async def generate_stream(
         messages: list[dict],
         config: AgentConfig,
         tools: list[dict] | None = None,
-    ) -> AsyncGenerator[str | ToolCall | dict, None]:
+    ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]:
         async for chunk in LLMProvider._stream_with_retry(messages, config, tools):
             yield chunk
 
@@ -217,7 +229,7 @@ async def _stream_with_retry(
         messages: list[dict],
         config: AgentConfig,
         tools: list[dict] | None = None,
-    ) -> AsyncGenerator[str | ToolCall | dict, None]:
+    ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]:
         last_error = None
         for attempt in range(3):
             try:
@@ -319,7 +331,7 @@ async def _stream_openai(
         messages: list[dict],
         config: AgentConfig,
         tools: list[dict] | None,
-    ) -> AsyncGenerator[str | ToolCall | dict, None]:
+    ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]:
         client = LLMProvider._openai_client(config)
         model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
 
@@ -358,6 +370,11 @@ async def _stream_openai(
             if delta is None:
                 continue
 
+            # Reasoning content (OpenAI o1/o3 reasoning models)
+            reasoning = getattr(delta, "reasoning_content", None)
+            if reasoning:
+                yield ThinkingChunk(text=reasoning)
+
             # Text content
             if delta.content:
                 yield delta.content
@@ -533,12 +550,20 @@ async def _call_anthropic(
         if anthropic_tools:
             params["tools"] = anthropic_tools
 
+        # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+)
+        if LLMProvider._supports_thinking(config.model_name):
+            params["max_tokens"] = 16000
+            params["thinking"] = {"type": "enabled", "budget_tokens": 10000}
+
         params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"}
         response = await client.messages.create(**params)
 
         tool_calls = []
         text_content = ""
         for block in response.content:
+            if block.type == "thinking":
+                # Thinking blocks are not included in the response text
+                continue
             if block.type == "text":
                 text_content += block.text
             elif block.type == "tool_use":
@@ -564,7 +589,7 @@ async def _stream_anthropic(
         messages: list[dict],
         config: AgentConfig,
         tools: list[dict] | None,
-    ) -> AsyncGenerator[str | ToolCall | dict, None]:
+    ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]:
         model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
         client = LLMProvider._anthropic_client(config)
         system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages)
@@ -582,11 +607,19 @@ async def _stream_anthropic(
         if anthropic_tools:
             params["tools"] = anthropic_tools
 
+        # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+)
+        if LLMProvider._supports_thinking(config.model_name):
+            params["max_tokens"] = 16000
+            params["thinking"] = {"type": "enabled", "budget_tokens": 10000}
+
         params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"}
         async with client.messages.stream(**params) as stream:
             async for event in stream:
                 if event.type == "content_block_delta":
-                    if event.delta.type == "text_delta":
+                    if event.delta.type == "thinking_delta":
+                        # Extended thinking content
+                        yield ThinkingChunk(text=event.delta.thinking)
+                    elif event.delta.type == "text_delta":
                         yield event.delta.text
 
                 if event.type == "message_delta" and event.usage:
diff --git a/backend/openmlr/agent/loop.py b/backend/openmlr/agent/loop.py
index 54cad93..2d6cb32 100644
--- a/backend/openmlr/agent/loop.py
+++ b/backend/openmlr/agent/loop.py
@@ -2,13 +2,14 @@
 
 import asyncio
 import json
+import time
 import traceback
 
 from ..config import AgentConfig
 from .doom_loop import detect_doom_loop
 from .llm import LLMProvider
 from .session import Session
-from .types import AgentEvent, LLMResult, Message, OpType, Submission, ToolCall
+from .types import AgentEvent, LLMResult, Message, OpType, Submission, ThinkingChunk, ToolCall
 
 
 def _append_hint_to_last_user_msg(messages: list[Message], hint: str) -> None:
@@ -319,12 +320,35 @@ async def _stream_llm_call(
     content_buffer = ""
     tool_calls: list[ToolCall] = []
     usage_data = None
+    thinking_started: float | None = None
+    was_thinking = False
 
     async for chunk in LLMProvider.generate_stream(messages, session.config, tools):
         if session.is_cancelled():
             return None
 
-        if isinstance(chunk, str):
+        if isinstance(chunk, ThinkingChunk):
+            # Extended thinking / reasoning content
+            if thinking_started is None:
+                thinking_started = time.time()
+            was_thinking = True
+            await session.emit(
+                AgentEvent(
+                    event_type="thinking_chunk",
+                    data={"chunk": chunk.text},
+                )
+            )
+        elif isinstance(chunk, str):
+            # Transition from thinking to text — emit thinking_end
+            if was_thinking:
+                duration = time.time() - thinking_started if thinking_started else 0
+                await session.emit(
+                    AgentEvent(
+                        event_type="thinking_end",
+                        data={"duration_seconds": round(duration, 1)},
+                    )
+                )
+                was_thinking = False
             content_buffer += chunk
             await session.emit(
                 AgentEvent(
@@ -333,6 +357,16 @@ async def _stream_llm_call(
                 )
             )
         elif isinstance(chunk, ToolCall):
+            # Transition from thinking to tool call — emit thinking_end
+            if was_thinking:
+                duration = time.time() - thinking_started if thinking_started else 0
+                await session.emit(
+                    AgentEvent(
+                        event_type="thinking_end",
+                        data={"duration_seconds": round(duration, 1)},
+                    )
+                )
+                was_thinking = False
             tool_calls.append(chunk)
             await session.emit(
                 AgentEvent(
@@ -350,6 +384,16 @@ async def _stream_llm_call(
             if chunk.get("event") == "usage":
                 usage_data = chunk.get("usage")
 
+    # If thinking was still active at end of stream (no text/tool followed), close it
+    if was_thinking and thinking_started:
+        duration = time.time() - thinking_started
+        await session.emit(
+            AgentEvent(
+                event_type="thinking_end",
+                data={"duration_seconds": round(duration, 1)},
+            )
+        )
+
     if content_buffer or tool_calls:
         await session.emit(AgentEvent(event_type="assistant_stream_end"))
 
diff --git a/backend/openmlr/agent/types.py b/backend/openmlr/agent/types.py
index 8409e30..44b8986 100644
--- a/backend/openmlr/agent/types.py
+++ b/backend/openmlr/agent/types.py
@@ -6,6 +6,13 @@
 from typing import Any
 
 
+@dataclass
+class ThinkingChunk:
+    """A chunk of thinking/reasoning content from the LLM."""
+
+    text: str
+
+
 @dataclass
 class ToolCall:
     """A tool call requested by the LLM."""
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 747a3ad..257b4d4 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -480,12 +480,50 @@ function ChatUI({
           return [...prev, { id: nextId(), role: 'system', content: '::thinking::' }];
         });
         break;
+      case 'thinking_chunk': {
+        const tchunk = data?.chunk || '';
+        if (!tchunk) break;
+        setMessages((prev) => {
+          // Remove plain ::thinking:: indicator if present
+          let msgs = prev;
+          if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1);
+          // Append to existing thinking message or create new one
+          const last = msgs[msgs.length - 1];
+          if (last?.role === 'system' && last.content === '::thinking_content::') {
+            const updated = [...msgs];
+            updated[updated.length - 1] = { ...last, thinking: (last.thinking || '') + tchunk };
+            return updated;
+          }
+          return [...msgs, { id: nextId(), role: 'system', content: '::thinking_content::', thinking: tchunk }];
+        });
+        break;
+      }
+      case 'thinking_end': {
+        const duration = data?.duration_seconds || 0;
+        setMessages((prev) => {
+          const idx = findLastIndex(prev, (m: Message) => m.role === 'system' && m.content === '::thinking_content::');
+          if (idx >= 0) {
+            const updated = [...prev];
+            updated[idx] = { ...updated[idx], thinkingDuration: duration };
+            return updated;
+          }
+          return prev;
+        });
+        break;
+      }
       case 'assistant_chunk': {
         const chunk = data?.chunk || data?.content || '';
         if (!chunk) break;
         setMessages((prev) => {
           let msgs = prev;
+          // Remove plain ::thinking:: indicator if present
           if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1);
+          // Collapse thinking block when reply starts
+          const thinkIdx = findLastIndex(msgs, (m: Message) => m.role === 'system' && m.content === '::thinking_content::' && !m.thinkingCollapsed);
+          if (thinkIdx >= 0) {
+            msgs = [...msgs];
+            msgs[thinkIdx] = { ...msgs[thinkIdx], thinkingCollapsed: true };
+          }
           const last = msgs[msgs.length - 1];
           if (last?.role === 'assistant' && last.streaming) {
             const updated = [...msgs]; updated[updated.length - 1] = { ...last, content: last.content + chunk }; return updated;
@@ -512,7 +550,13 @@ function ChatUI({
         break;
       case 'tool_call':
         setMessages((prev) => {
-          const msgs = prev.filter((m) => !(m.role === 'system' && m.content === '::thinking::'));
+          let msgs = prev.filter((m) => !(m.role === 'system' && m.content === '::thinking::'));
+          // Collapse thinking block when tool call arrives
+          const thinkIdx = findLastIndex(msgs, (m: Message) => m.role === 'system' && m.content === '::thinking_content::' && !m.thinkingCollapsed);
+          if (thinkIdx >= 0) {
+            msgs = [...msgs];
+            msgs[thinkIdx] = { ...msgs[thinkIdx], thinkingCollapsed: true };
+          }
           return [...msgs, { id: nextId(), role: 'tool', content: '', metadata: { tool: data?.tool ?? '', tool_call_id: data?.id, args: typeof data?.arguments === 'string' ? data.arguments.slice(0, 120) : JSON.stringify(data?.arguments ?? {}).slice(0, 120) } }];
         });
         break;
@@ -608,7 +652,11 @@ function ChatUI({
         // Cancel any pending job_complete reload — SSE events already updated state
         if (reloadTimerRef.current) { clearTimeout(reloadTimerRef.current); reloadTimerRef.current = null; }
         setMessages((prev) => {
-          const c = prev.filter((m) => !(m.role === 'system' && m.content === '::thinking::'));
+          // Remove plain thinking indicator, collapse any uncollapsed thinking blocks
+          const c = prev
+            .filter((m) => !(m.role === 'system' && m.content === '::thinking::'))
+            .map((m) => (m.role === 'system' && m.content === '::thinking_content::' && !m.thinkingCollapsed)
+              ? { ...m, thinkingCollapsed: true } : m);
           const last = c[c.length - 1];
           setCurrentConvStatus(last?.role === 'assistant' && last.content.trim().endsWith('?') ? 'waiting_input' : 'idle');
           return c;
diff --git a/frontend/src/components/InputArea.tsx b/frontend/src/components/InputArea.tsx
index ecfe865..bab40bc 100644
--- a/frontend/src/components/InputArea.tsx
+++ b/frontend/src/components/InputArea.tsx
@@ -250,8 +250,8 @@ export const InputArea = React.memo(function InputArea({ disabled, showStop, mod
             />
           </div>
           
-          {/* Stop button */}
-          {showStop && (
+          {/* Stop button — only during active processing (not waiting_input/waiting_approval) */}
+          {showStop && disabled && (
             <button 
               className="h-11 w-11 rounded-lg flex items-center justify-center bg-error text-white hover:opacity-90 transition-all shrink-0"
               onClick={onStop} 
diff --git a/frontend/src/components/MessageList.tsx b/frontend/src/components/MessageList.tsx
index e8010c2..75d13eb 100644
--- a/frontend/src/components/MessageList.tsx
+++ b/frontend/src/components/MessageList.tsx
@@ -125,6 +125,65 @@ function SubAgentBlock({ msg, expanded, onToggle }: { msg: Message; expanded: bo
   );
 }
 
+/** Format thinking duration into human-readable text */
+function formatThinkingDuration(seconds?: number): string {
+  if (!seconds) return 'a moment';
+  if (seconds < 2) return '1 second';
+  if (seconds < 60) return `${Math.round(seconds)} seconds`;
+  const m = Math.floor(seconds / 60);
+  const s = Math.round(seconds % 60);
+  if (s === 0) return `${m} minute${m !== 1 ? 's' : ''}`;
+  return `${m}m ${s}s`;
+}
+
+/** Thinking/reasoning block — shows model thinking, collapses when reply starts */
+function ThinkingBlock({ msg, expanded, onToggle }: { msg: Message; expanded: boolean; onToggle: () => void }) {
+  const thinking = msg.thinking || '';
+  const duration = msg.thinkingDuration;
+  const collapsed = msg.thinkingCollapsed;
+
+  if (!collapsed) {
+    // Active thinking — show faded content with streaming indicator
+    return (
+      <div className="max-w-[95%] rounded-lg border border-border/40 bg-surface/30 px-4 py-3 mt-1">
+        <div className="flex items-center gap-2 mb-2 text-xs text-text-dim">
+          <span className="flex gap-1">
+            <span className="w-1.5 h-1.5 bg-primary/60 rounded-full animate-bounce" style={{ animationDelay: '0ms' }} />
+            <span className="w-1.5 h-1.5 bg-primary/60 rounded-full animate-bounce" style={{ animationDelay: '150ms' }} />
+            <span className="w-1.5 h-1.5 bg-primary/60 rounded-full animate-bounce" style={{ animationDelay: '300ms' }} />
+          </span>
+          <span>Thinking...</span>
+        </div>
+        <pre className="whitespace-pre-wrap text-sm text-text-dim/50 leading-relaxed m-0 p-0 bg-transparent max-h-48 overflow-y-auto font-sans">
+          {thinking}
+          <span className="inline-block w-[2px] h-[1em] bg-text-dim/30 animate-pulse align-middle ml-0.5" />
+        </pre>
+      </div>
+    );
+  }
+
+  // Collapsed — show summary label, clickable to expand
+  return (
+    <div className="max-w-[95%]">
+      <button
+        className="flex items-center gap-2 text-xs text-text-dim/60 hover:text-text-dim transition-colors py-1 bg-transparent border-none cursor-pointer"
+        onClick={onToggle}
+        title="Click to expand thinking"
+      >
+        <span className="text-text-dim/40">{expanded ? '\u25BC' : '\u25B6'}</span>
+        <span>Thought for {formatThinkingDuration(duration)}</span>
+      </button>
+      {expanded && thinking && (
+        <div className="rounded-lg border border-border/40 bg-surface/30 px-4 py-3 mt-1">
+          <pre className="whitespace-pre-wrap text-sm text-text-dim/50 leading-relaxed m-0 p-0 bg-transparent max-h-64 overflow-y-auto font-sans">
+            {thinking}
+          </pre>
+        </div>
+      )}
+    </div>
+  );
+}
+
 /** Individual message row — memoized to skip re-renders when other messages update */
 const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }: {
   msg: Message;
@@ -152,18 +211,14 @@ const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }:
         </div>
       )}
 
-      {/* Assistant messages — defer markdown while streaming for performance */}
+      {/* Assistant messages — render markdown seamlessly during streaming */}
       {msg.role === 'assistant' && (
         <div className="prose max-w-[95%] mt-1">
-          {msg.streaming ? (
-            <pre className="whitespace-pre-wrap font-sans text-text leading-relaxed m-0 p-0 bg-transparent prose-sm">
-              {msg.content}
-              <span className="inline-block w-[2px] h-[1em] bg-primary animate-pulse align-middle ml-0.5" />
-            </pre>
-          ) : (
-            <ReactMarkdown remarkPlugins={[remarkGfm]}>
-              {msg.content}
-            </ReactMarkdown>
+          <ReactMarkdown remarkPlugins={[remarkGfm]}>
+            {msg.content}
+          </ReactMarkdown>
+          {msg.streaming && (
+            <span className="inline-block w-[2px] h-[1em] bg-primary animate-pulse align-middle ml-0.5" />
           )}
         </div>
       )}
@@ -186,7 +241,7 @@ const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }:
         />
       )}
 
-      {/* Thinking indicator */}
+      {/* Thinking indicator (before any thinking content arrives) */}
       {msg.role === 'system' && msg.content === '::thinking::' && (
         <div className="flex items-center gap-3 py-3 text-text-dim">
           <span className="flex gap-1">
@@ -198,8 +253,17 @@ const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }:
         </div>
       )}
 
+      {/* Thinking content block (streaming or collapsed) */}
+      {msg.role === 'system' && msg.content === '::thinking_content::' && (
+        <ThinkingBlock
+          msg={msg}
+          expanded={isExpanded}
+          onToggle={handleToggle}
+        />
+      )}
+
       {/* System messages */}
-      {msg.role === 'system' && msg.content !== '::thinking::' && (
+      {msg.role === 'system' && msg.content !== '::thinking::' && msg.content !== '::thinking_content::' && (
         <div className="text-sm text-text-dim italic py-2 px-3 bg-surface/50 rounded-md">
           {msg.content}
         </div>
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index f39eee6..4f8f2d6 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -20,6 +20,12 @@ export interface Message {
   duration?: number;
   model?: string;
   mode?: string;
+  /** Accumulated thinking/reasoning content from the LLM */
+  thinking?: string;
+  /** Duration in seconds the model spent thinking */
+  thinkingDuration?: number;
+  /** Whether the thinking block is collapsed (model started replying) */
+  thinkingCollapsed?: boolean;
   metadata?: {
     tool?: string;
     args?: string;

From b45f5c1488a02cdeade81d09b08c6fa878d525ca Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Sun, 3 May 2026 19:43:19 +0530
Subject: [PATCH 2/2] appease sonarqube

---
 backend/openmlr/agent/llm.py            | 58 ++++++++++++-------------
 frontend/src/App.tsx                    | 57 ++++++++++++------------
 frontend/src/components/MessageList.tsx |  4 +-
 3 files changed, 58 insertions(+), 61 deletions(-)

diff --git a/backend/openmlr/agent/llm.py b/backend/openmlr/agent/llm.py
index c22a18c..12b4221 100644
--- a/backend/openmlr/agent/llm.py
+++ b/backend/openmlr/agent/llm.py
@@ -528,16 +528,15 @@ def _anthropic_client(config: AgentConfig):
         return AsyncAnthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
 
     @staticmethod
-    async def _call_anthropic(
-        messages: list[dict],
-        config: AgentConfig,
+    def _build_anthropic_params(
+        model: str,
+        chat_msgs: list[dict],
+        system_prompt: str,
         tools: list[dict] | None,
-    ) -> LLMResult:
-        model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
-        client = LLMProvider._anthropic_client(config)
-        system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages)
-
-        params = {"model": model, "messages": chat_msgs, "max_tokens": 4096}
+        model_name: str,
+    ) -> dict:
+        """Build the params dict shared by _call_anthropic and _stream_anthropic."""
+        params: dict = {"model": model, "messages": chat_msgs, "max_tokens": 4096}
         if system_prompt:
             params["system"] = [
                 {
@@ -549,13 +548,26 @@ async def _call_anthropic(
         anthropic_tools = LLMProvider._anthropic_tool_param(tools)
         if anthropic_tools:
             params["tools"] = anthropic_tools
-
         # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+)
-        if LLMProvider._supports_thinking(config.model_name):
+        if LLMProvider._supports_thinking(model_name):
             params["max_tokens"] = 16000
             params["thinking"] = {"type": "enabled", "budget_tokens": 10000}
-
         params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"}
+        return params
+
+    @staticmethod
+    async def _call_anthropic(
+        messages: list[dict],
+        config: AgentConfig,
+        tools: list[dict] | None,
+    ) -> LLMResult:
+        model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
+        client = LLMProvider._anthropic_client(config)
+        system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages)
+
+        params = LLMProvider._build_anthropic_params(
+            model, chat_msgs, system_prompt, tools, config.model_name
+        )
         response = await client.messages.create(**params)
 
         tool_calls = []
@@ -594,25 +606,9 @@ async def _stream_anthropic(
         client = LLMProvider._anthropic_client(config)
         system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages)
 
-        params = {"model": model, "messages": chat_msgs, "max_tokens": 4096}
-        if system_prompt:
-            params["system"] = [
-                {
-                    "type": "text",
-                    "text": system_prompt,
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ]
-        anthropic_tools = LLMProvider._anthropic_tool_param(tools)
-        if anthropic_tools:
-            params["tools"] = anthropic_tools
-
-        # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+)
-        if LLMProvider._supports_thinking(config.model_name):
-            params["max_tokens"] = 16000
-            params["thinking"] = {"type": "enabled", "budget_tokens": 10000}
-
-        params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"}
+        params = LLMProvider._build_anthropic_params(
+            model, chat_msgs, system_prompt, tools, config.model_name
+        )
         async with client.messages.stream(**params) as stream:
             async for event in stream:
                 if event.type == "content_block_delta":
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 257b4d4..26a7d81 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -480,35 +480,36 @@ function ChatUI({
           return [...prev, { id: nextId(), role: 'system', content: '::thinking::' }];
         });
         break;
-      case 'thinking_chunk': {
-        const tchunk = data?.chunk || '';
-        if (!tchunk) break;
-        setMessages((prev) => {
-          // Remove plain ::thinking:: indicator if present
-          let msgs = prev;
-          if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1);
-          // Append to existing thinking message or create new one
-          const last = msgs[msgs.length - 1];
-          if (last?.role === 'system' && last.content === '::thinking_content::') {
-            const updated = [...msgs];
-            updated[updated.length - 1] = { ...last, thinking: (last.thinking || '') + tchunk };
-            return updated;
-          }
-          return [...msgs, { id: nextId(), role: 'system', content: '::thinking_content::', thinking: tchunk }];
-        });
-        break;
-      }
+      case 'thinking_chunk':
       case 'thinking_end': {
-        const duration = data?.duration_seconds || 0;
-        setMessages((prev) => {
-          const idx = findLastIndex(prev, (m: Message) => m.role === 'system' && m.content === '::thinking_content::');
-          if (idx >= 0) {
-            const updated = [...prev];
-            updated[idx] = { ...updated[idx], thinkingDuration: duration };
-            return updated;
-          }
-          return prev;
-        });
+        if (event_type === 'thinking_chunk') {
+          const tchunk = data?.chunk || '';
+          if (!tchunk) break;
+          setMessages((prev) => {
+            // Remove plain ::thinking:: indicator if present
+            let msgs = prev;
+            if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1);
+            // Append to existing thinking message or create new one
+            const last = msgs[msgs.length - 1];
+            if (last?.role === 'system' && last.content === '::thinking_content::') {
+              const updated = [...msgs];
+              updated[updated.length - 1] = { ...last, thinking: (last.thinking || '') + tchunk };
+              return updated;
+            }
+            return [...msgs, { id: nextId(), role: 'system', content: '::thinking_content::', thinking: tchunk }];
+          });
+        } else {
+          const duration = data?.duration_seconds || 0;
+          setMessages((prev) => {
+            const idx = findLastIndex(prev, (m: Message) => m.role === 'system' && m.content === '::thinking_content::');
+            if (idx >= 0) {
+              const updated = [...prev];
+              updated[idx] = { ...updated[idx], thinkingDuration: duration };
+              return updated;
+            }
+            return prev;
+          });
+        }
         break;
       }
       case 'assistant_chunk': {
diff --git a/frontend/src/components/MessageList.tsx b/frontend/src/components/MessageList.tsx
index 75d13eb..fa8fadb 100644
--- a/frontend/src/components/MessageList.tsx
+++ b/frontend/src/components/MessageList.tsx
@@ -132,12 +132,12 @@ function formatThinkingDuration(seconds?: number): string {
   if (seconds < 60) return `${Math.round(seconds)} seconds`;
   const m = Math.floor(seconds / 60);
   const s = Math.round(seconds % 60);
-  if (s === 0) return `${m} minute${m !== 1 ? 's' : ''}`;
+  if (s === 0) return `${m} minute${m === 1 ? '' : 's'}`;
   return `${m}m ${s}s`;
 }
 
 /** Thinking/reasoning block — shows model thinking, collapses when reply starts */
-function ThinkingBlock({ msg, expanded, onToggle }: { msg: Message; expanded: boolean; onToggle: () => void }) {
+function ThinkingBlock({ msg, expanded, onToggle }: Readonly<{ msg: Message; expanded: boolean; onToggle: () => void }>) {
   const thinking = msg.thinking || '';
   const duration = msg.thinkingDuration;
   const collapsed = msg.thinkingCollapsed;