From 6b01100534f832d9fcdbd929737f2cb312a871ee Mon Sep 17 00:00:00 2001 From: xprilion Date: Sun, 3 May 2026 16:57:55 +0530 Subject: [PATCH 1/2] fix bugs --- backend/openmlr/agent/llm.py | 45 +++++++++++-- backend/openmlr/agent/loop.py | 48 +++++++++++++- backend/openmlr/agent/types.py | 7 ++ frontend/src/App.tsx | 52 ++++++++++++++- frontend/src/components/InputArea.tsx | 4 +- frontend/src/components/MessageList.tsx | 88 +++++++++++++++++++++---- frontend/src/types.ts | 6 ++ 7 files changed, 226 insertions(+), 24 deletions(-) diff --git a/backend/openmlr/agent/llm.py b/backend/openmlr/agent/llm.py index c2f94f1..c22a18c 100644 --- a/backend/openmlr/agent/llm.py +++ b/backend/openmlr/agent/llm.py @@ -6,7 +6,7 @@ from collections.abc import AsyncGenerator from ..config import AgentConfig -from .types import LLMResult, ToolCall +from .types import LLMResult, ThinkingChunk, ToolCall class LLMProvider: @@ -112,6 +112,18 @@ def _is_anthropic_model(model_name: str) -> bool: OpenRouter-routed Claude models use the OpenAI-compatible path.""" return model_name.lower().startswith("anthropic/") + @staticmethod + def _supports_thinking(model_name: str) -> bool: + """Check if an Anthropic model supports extended thinking (Claude 3.7+, Claude 4+).""" + normalized = LLMProvider._normalize_model(model_name).lower() + thinking_patterns = [ + "claude-3-7", + "claude-3.7", # Claude 3.7 Sonnet + "claude-sonnet-4", + "claude-opus-4", # Claude 4 family + ] + return any(p in normalized for p in thinking_patterns) + @staticmethod def _uses_anthropic_format(model_name: str, custom_providers: list | None = None) -> bool: """Check if model uses Anthropic message format (native Anthropic, OpenCode Go Anthropic, or custom provider with anthropic-sdk).""" @@ -139,7 +151,7 @@ async def generate_stream( messages: list[dict], config: AgentConfig, tools: list[dict] | None = None, - ) -> AsyncGenerator[str | ToolCall | dict, None]: + ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]: async for chunk in LLMProvider._stream_with_retry(messages, config, tools): yield chunk @@ -217,7 +229,7 @@ async def _stream_with_retry( messages: list[dict], config: AgentConfig, tools: list[dict] | None = None, - ) -> AsyncGenerator[str | ToolCall | dict, None]: + ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]: last_error = None for attempt in range(3): try: @@ -319,7 +331,7 @@ async def _stream_openai( messages: list[dict], config: AgentConfig, tools: list[dict] | None, - ) -> AsyncGenerator[str | ToolCall | dict, None]: + ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]: client = LLMProvider._openai_client(config) model = LLMProvider._normalize_model(config.model_name, config.custom_providers) @@ -358,6 +370,11 @@ async def _stream_openai( if delta is None: continue + # Reasoning content (OpenAI o1/o3 reasoning models) + reasoning = getattr(delta, "reasoning_content", None) + if reasoning: + yield ThinkingChunk(text=reasoning) + # Text content if delta.content: yield delta.content @@ -533,12 +550,20 @@ async def _call_anthropic( if anthropic_tools: params["tools"] = anthropic_tools + # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+) + if LLMProvider._supports_thinking(config.model_name): + params["max_tokens"] = 16000 + params["thinking"] = {"type": "enabled", "budget_tokens": 10000} + params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"} response = await client.messages.create(**params) tool_calls = [] text_content = "" for block in response.content: + if block.type == "thinking": + # Thinking blocks are not included in the response text + continue if block.type == "text": text_content += block.text elif block.type == "tool_use": @@ -564,7 +589,7 @@ async def _stream_anthropic( messages: list[dict], config: AgentConfig, tools: list[dict] | None, - ) -> AsyncGenerator[str | ToolCall | dict, None]: + ) -> AsyncGenerator[str | ToolCall | ThinkingChunk | dict, None]: model = LLMProvider._normalize_model(config.model_name, config.custom_providers) client = LLMProvider._anthropic_client(config) system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages) @@ -582,11 +607,19 @@ async def _stream_anthropic( if anthropic_tools: params["tools"] = anthropic_tools + # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+) + if LLMProvider._supports_thinking(config.model_name): + params["max_tokens"] = 16000 + params["thinking"] = {"type": "enabled", "budget_tokens": 10000} + params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"} async with client.messages.stream(**params) as stream: async for event in stream: if event.type == "content_block_delta": - if event.delta.type == "text_delta": + if event.delta.type == "thinking_delta": + # Extended thinking content + yield ThinkingChunk(text=event.delta.thinking) + elif event.delta.type == "text_delta": yield event.delta.text if event.type == "message_delta" and event.usage: diff --git a/backend/openmlr/agent/loop.py b/backend/openmlr/agent/loop.py index 54cad93..2d6cb32 100644 --- a/backend/openmlr/agent/loop.py +++ b/backend/openmlr/agent/loop.py @@ -2,13 +2,14 @@ import asyncio import json +import time import traceback from ..config import AgentConfig from .doom_loop import detect_doom_loop from .llm import LLMProvider from .session import Session -from .types import AgentEvent, LLMResult, Message, OpType, Submission, ToolCall +from .types import AgentEvent, LLMResult, Message, OpType, Submission, ThinkingChunk, ToolCall def _append_hint_to_last_user_msg(messages: list[Message], hint: str) -> None: @@ -319,12 +320,35 @@ async def _stream_llm_call( content_buffer = "" tool_calls: list[ToolCall] = [] usage_data = None + thinking_started: float | None = None + was_thinking = False async for chunk in LLMProvider.generate_stream(messages, session.config, tools): if session.is_cancelled(): return None - if isinstance(chunk, str): + if isinstance(chunk, ThinkingChunk): + # Extended thinking / reasoning content + if thinking_started is None: + thinking_started = time.time() + was_thinking = True + await session.emit( + AgentEvent( + event_type="thinking_chunk", + data={"chunk": chunk.text}, + ) + ) + elif isinstance(chunk, str): + # Transition from thinking to text — emit thinking_end + if was_thinking: + duration = time.time() - thinking_started if thinking_started else 0 + await session.emit( + AgentEvent( + event_type="thinking_end", + data={"duration_seconds": round(duration, 1)}, + ) + ) + was_thinking = False content_buffer += chunk await session.emit( AgentEvent( @@ -333,6 +357,16 @@ async def _stream_llm_call( ) ) elif isinstance(chunk, ToolCall): + # Transition from thinking to tool call — emit thinking_end + if was_thinking: + duration = time.time() - thinking_started if thinking_started else 0 + await session.emit( + AgentEvent( + event_type="thinking_end", + data={"duration_seconds": round(duration, 1)}, + ) + ) + was_thinking = False tool_calls.append(chunk) await session.emit( AgentEvent( @@ -350,6 +384,16 @@ async def _stream_llm_call( if chunk.get("event") == "usage": usage_data = chunk.get("usage") + # If thinking was still active at end of stream (no text/tool followed), close it + if was_thinking and thinking_started: + duration = time.time() - thinking_started + await session.emit( + AgentEvent( + event_type="thinking_end", + data={"duration_seconds": round(duration, 1)}, + ) + ) + if content_buffer or tool_calls: await session.emit(AgentEvent(event_type="assistant_stream_end")) diff --git a/backend/openmlr/agent/types.py b/backend/openmlr/agent/types.py index 8409e30..44b8986 100644 --- a/backend/openmlr/agent/types.py +++ b/backend/openmlr/agent/types.py @@ -6,6 +6,13 @@ from typing import Any +@dataclass +class ThinkingChunk: + """A chunk of thinking/reasoning content from the LLM.""" + + text: str + + @dataclass class ToolCall: """A tool call requested by the LLM.""" diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 747a3ad..257b4d4 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -480,12 +480,50 @@ function ChatUI({ return [...prev, { id: nextId(), role: 'system', content: '::thinking::' }]; }); break; + case 'thinking_chunk': { + const tchunk = data?.chunk || ''; + if (!tchunk) break; + setMessages((prev) => { + // Remove plain ::thinking:: indicator if present + let msgs = prev; + if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1); + // Append to existing thinking message or create new one + const last = msgs[msgs.length - 1]; + if (last?.role === 'system' && last.content === '::thinking_content::') { + const updated = [...msgs]; + updated[updated.length - 1] = { ...last, thinking: (last.thinking || '') + tchunk }; + return updated; + } + return [...msgs, { id: nextId(), role: 'system', content: '::thinking_content::', thinking: tchunk }]; + }); + break; + } + case 'thinking_end': { + const duration = data?.duration_seconds || 0; + setMessages((prev) => { + const idx = findLastIndex(prev, (m: Message) => m.role === 'system' && m.content === '::thinking_content::'); + if (idx >= 0) { + const updated = [...prev]; + updated[idx] = { ...updated[idx], thinkingDuration: duration }; + return updated; + } + return prev; + }); + break; + } case 'assistant_chunk': { const chunk = data?.chunk || data?.content || ''; if (!chunk) break; setMessages((prev) => { let msgs = prev; + // Remove plain ::thinking:: indicator if present if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1); + // Collapse thinking block when reply starts + const thinkIdx = findLastIndex(msgs, (m: Message) => m.role === 'system' && m.content === '::thinking_content::' && !m.thinkingCollapsed); + if (thinkIdx >= 0) { + msgs = [...msgs]; + msgs[thinkIdx] = { ...msgs[thinkIdx], thinkingCollapsed: true }; + } const last = msgs[msgs.length - 1]; if (last?.role === 'assistant' && last.streaming) { const updated = [...msgs]; updated[updated.length - 1] = { ...last, content: last.content + chunk }; return updated; @@ -512,7 +550,13 @@ function ChatUI({ break; case 'tool_call': setMessages((prev) => { - const msgs = prev.filter((m) => !(m.role === 'system' && m.content === '::thinking::')); + let msgs = prev.filter((m) => !(m.role === 'system' && m.content === '::thinking::')); + // Collapse thinking block when tool call arrives + const thinkIdx = findLastIndex(msgs, (m: Message) => m.role === 'system' && m.content === '::thinking_content::' && !m.thinkingCollapsed); + if (thinkIdx >= 0) { + msgs = [...msgs]; + msgs[thinkIdx] = { ...msgs[thinkIdx], thinkingCollapsed: true }; + } return [...msgs, { id: nextId(), role: 'tool', content: '', metadata: { tool: data?.tool ?? '', tool_call_id: data?.id, args: typeof data?.arguments === 'string' ? data.arguments.slice(0, 120) : JSON.stringify(data?.arguments ?? {}).slice(0, 120) } }]; }); break; @@ -608,7 +652,11 @@ function ChatUI({ // Cancel any pending job_complete reload — SSE events already updated state if (reloadTimerRef.current) { clearTimeout(reloadTimerRef.current); reloadTimerRef.current = null; } setMessages((prev) => { - const c = prev.filter((m) => !(m.role === 'system' && m.content === '::thinking::')); + // Remove plain thinking indicator, collapse any uncollapsed thinking blocks + const c = prev + .filter((m) => !(m.role === 'system' && m.content === '::thinking::')) + .map((m) => (m.role === 'system' && m.content === '::thinking_content::' && !m.thinkingCollapsed) + ? { ...m, thinkingCollapsed: true } : m); const last = c[c.length - 1]; setCurrentConvStatus(last?.role === 'assistant' && last.content.trim().endsWith('?') ? 'waiting_input' : 'idle'); return c; diff --git a/frontend/src/components/InputArea.tsx b/frontend/src/components/InputArea.tsx index ecfe865..bab40bc 100644 --- a/frontend/src/components/InputArea.tsx +++ b/frontend/src/components/InputArea.tsx @@ -250,8 +250,8 @@ export const InputArea = React.memo(function InputArea({ disabled, showStop, mod /> - {/* Stop button */} - {showStop && ( + {/* Stop button — only during active processing (not waiting_input/waiting_approval) */} + {showStop && disabled && ( + {expanded && thinking && ( +
+
+            {thinking}
+          
+
+ )} + + ); +} + /** Individual message row — memoized to skip re-renders when other messages update */ const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }: { msg: Message; @@ -152,18 +211,14 @@ const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }: )} - {/* Assistant messages — defer markdown while streaming for performance */} + {/* Assistant messages — render markdown seamlessly during streaming */} {msg.role === 'assistant' && (
- {msg.streaming ? ( -
-              {msg.content}
-              
-            
- ) : ( - - {msg.content} - + + {msg.content} + + {msg.streaming && ( + )}
)} @@ -186,7 +241,7 @@ const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }: /> )} - {/* Thinking indicator */} + {/* Thinking indicator (before any thinking content arrives) */} {msg.role === 'system' && msg.content === '::thinking::' && (
@@ -198,8 +253,17 @@ const MessageRow = React.memo(function MessageRow({ msg, isExpanded, onToggle }:
)} + {/* Thinking content block (streaming or collapsed) */} + {msg.role === 'system' && msg.content === '::thinking_content::' && ( + + )} + {/* System messages */} - {msg.role === 'system' && msg.content !== '::thinking::' && ( + {msg.role === 'system' && msg.content !== '::thinking::' && msg.content !== '::thinking_content::' && (
{msg.content}
diff --git a/frontend/src/types.ts b/frontend/src/types.ts index f39eee6..4f8f2d6 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -20,6 +20,12 @@ export interface Message { duration?: number; model?: string; mode?: string; + /** Accumulated thinking/reasoning content from the LLM */ + thinking?: string; + /** Duration in seconds the model spent thinking */ + thinkingDuration?: number; + /** Whether the thinking block is collapsed (model started replying) */ + thinkingCollapsed?: boolean; metadata?: { tool?: string; args?: string; From b45f5c1488a02cdeade81d09b08c6fa878d525ca Mon Sep 17 00:00:00 2001 From: xprilion Date: Sun, 3 May 2026 19:43:19 +0530 Subject: [PATCH 2/2] appease sonarqube --- backend/openmlr/agent/llm.py | 58 ++++++++++++------------- frontend/src/App.tsx | 57 ++++++++++++------------ frontend/src/components/MessageList.tsx | 4 +- 3 files changed, 58 insertions(+), 61 deletions(-) diff --git a/backend/openmlr/agent/llm.py b/backend/openmlr/agent/llm.py index c22a18c..12b4221 100644 --- a/backend/openmlr/agent/llm.py +++ b/backend/openmlr/agent/llm.py @@ -528,16 +528,15 @@ def _anthropic_client(config: AgentConfig): return AsyncAnthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) @staticmethod - async def _call_anthropic( - messages: list[dict], - config: AgentConfig, + def _build_anthropic_params( + model: str, + chat_msgs: list[dict], + system_prompt: str, tools: list[dict] | None, - ) -> LLMResult: - model = LLMProvider._normalize_model(config.model_name, config.custom_providers) - client = LLMProvider._anthropic_client(config) - system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages) - - params = {"model": model, "messages": chat_msgs, "max_tokens": 4096} + model_name: str, + ) -> dict: + """Build the params dict shared by _call_anthropic and _stream_anthropic.""" + params: dict = {"model": model, "messages": chat_msgs, "max_tokens": 4096} if system_prompt: params["system"] = [ { @@ -549,13 +548,26 @@ async def _call_anthropic( anthropic_tools = LLMProvider._anthropic_tool_param(tools) if anthropic_tools: params["tools"] = anthropic_tools - # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+) - if LLMProvider._supports_thinking(config.model_name): + if LLMProvider._supports_thinking(model_name): params["max_tokens"] = 16000 params["thinking"] = {"type": "enabled", "budget_tokens": 10000} - params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"} + return params + + @staticmethod + async def _call_anthropic( + messages: list[dict], + config: AgentConfig, + tools: list[dict] | None, + ) -> LLMResult: + model = LLMProvider._normalize_model(config.model_name, config.custom_providers) + client = LLMProvider._anthropic_client(config) + system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages) + + params = LLMProvider._build_anthropic_params( + model, chat_msgs, system_prompt, tools, config.model_name + ) response = await client.messages.create(**params) tool_calls = [] @@ -594,25 +606,9 @@ async def _stream_anthropic( client = LLMProvider._anthropic_client(config) system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages) - params = {"model": model, "messages": chat_msgs, "max_tokens": 4096} - if system_prompt: - params["system"] = [ - { - "type": "text", - "text": system_prompt, - "cache_control": {"type": "ephemeral"}, - } - ] - anthropic_tools = LLMProvider._anthropic_tool_param(tools) - if anthropic_tools: - params["tools"] = anthropic_tools - - # Enable extended thinking for compatible models (Claude 3.7+, Claude 4+) - if LLMProvider._supports_thinking(config.model_name): - params["max_tokens"] = 16000 - params["thinking"] = {"type": "enabled", "budget_tokens": 10000} - - params["extra_headers"] = {"anthropic-beta": "prompt-caching-2024-07-31"} + params = LLMProvider._build_anthropic_params( + model, chat_msgs, system_prompt, tools, config.model_name + ) async with client.messages.stream(**params) as stream: async for event in stream: if event.type == "content_block_delta": diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 257b4d4..26a7d81 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -480,35 +480,36 @@ function ChatUI({ return [...prev, { id: nextId(), role: 'system', content: '::thinking::' }]; }); break; - case 'thinking_chunk': { - const tchunk = data?.chunk || ''; - if (!tchunk) break; - setMessages((prev) => { - // Remove plain ::thinking:: indicator if present - let msgs = prev; - if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1); - // Append to existing thinking message or create new one - const last = msgs[msgs.length - 1]; - if (last?.role === 'system' && last.content === '::thinking_content::') { - const updated = [...msgs]; - updated[updated.length - 1] = { ...last, thinking: (last.thinking || '') + tchunk }; - return updated; - } - return [...msgs, { id: nextId(), role: 'system', content: '::thinking_content::', thinking: tchunk }]; - }); - break; - } + case 'thinking_chunk': case 'thinking_end': { - const duration = data?.duration_seconds || 0; - setMessages((prev) => { - const idx = findLastIndex(prev, (m: Message) => m.role === 'system' && m.content === '::thinking_content::'); - if (idx >= 0) { - const updated = [...prev]; - updated[idx] = { ...updated[idx], thinkingDuration: duration }; - return updated; - } - return prev; - }); + if (event_type === 'thinking_chunk') { + const tchunk = data?.chunk || ''; + if (!tchunk) break; + setMessages((prev) => { + // Remove plain ::thinking:: indicator if present + let msgs = prev; + if (msgs.length > 0 && msgs[msgs.length - 1].content === '::thinking::') msgs = msgs.slice(0, -1); + // Append to existing thinking message or create new one + const last = msgs[msgs.length - 1]; + if (last?.role === 'system' && last.content === '::thinking_content::') { + const updated = [...msgs]; + updated[updated.length - 1] = { ...last, thinking: (last.thinking || '') + tchunk }; + return updated; + } + return [...msgs, { id: nextId(), role: 'system', content: '::thinking_content::', thinking: tchunk }]; + }); + } else { + const duration = data?.duration_seconds || 0; + setMessages((prev) => { + const idx = findLastIndex(prev, (m: Message) => m.role === 'system' && m.content === '::thinking_content::'); + if (idx >= 0) { + const updated = [...prev]; + updated[idx] = { ...updated[idx], thinkingDuration: duration }; + return updated; + } + return prev; + }); + } break; } case 'assistant_chunk': { diff --git a/frontend/src/components/MessageList.tsx b/frontend/src/components/MessageList.tsx index 75d13eb..fa8fadb 100644 --- a/frontend/src/components/MessageList.tsx +++ b/frontend/src/components/MessageList.tsx @@ -132,12 +132,12 @@ function formatThinkingDuration(seconds?: number): string { if (seconds < 60) return `${Math.round(seconds)} seconds`; const m = Math.floor(seconds / 60); const s = Math.round(seconds % 60); - if (s === 0) return `${m} minute${m !== 1 ? 's' : ''}`; + if (s === 0) return `${m} minute${m === 1 ? '' : 's'}`; return `${m}m ${s}s`; } /** Thinking/reasoning block — shows model thinking, collapses when reply starts */ -function ThinkingBlock({ msg, expanded, onToggle }: { msg: Message; expanded: boolean; onToggle: () => void }) { +function ThinkingBlock({ msg, expanded, onToggle }: Readonly<{ msg: Message; expanded: boolean; onToggle: () => void }>) { const thinking = msg.thinking || ''; const duration = msg.thinkingDuration; const collapsed = msg.thinkingCollapsed;