From 8abc8e327f719b0de0d5b623d0bb0873b3a8b7bf Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 16 Jun 2026 09:29:08 -0400 Subject: [PATCH] fix(bookstack-agent): suppress reasoning text before tool calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On-prem models like Qwen emit reasoning/thinking text in a text content block before using tools. Previously this appeared in the UI for the full duration of the tool-call turn before being cleared by the tool_use event. Fix: watch for content_block_start with type=tool_use inside the stream. When detected, immediately yield a text_clear event so the UI discards any rendered text, then suppress further text_chunk events for that turn. The text_clear arrives as soon as the model signals a tool call — not after the full response completes — so the transient reasoning text is never visible to the user. Final-answer turns (no tool use) are unaffected and still stream character-by-character. --- .../ui/app/components/chat-page.tsx | 6 ++++- src/aieng_bot/bookstack/agent.py | 26 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/bookstack_agent/ui/app/components/chat-page.tsx b/bookstack_agent/ui/app/components/chat-page.tsx index f953d51..4c79a8b 100644 --- a/bookstack_agent/ui/app/components/chat-page.tsx +++ b/bookstack_agent/ui/app/components/chat-page.tsx @@ -220,8 +220,12 @@ export default function ChatPage({ user }: { user: User | null }) { setSessionId(event.session_id as string) break + case 'text_clear': + // Model emitted reasoning/thinking text before a tool call — discard it + patchLast((msg) => ({ ...msg, content: null })) + break + case 'tool_use': - // Clear any in-progress streamed text (it was planning text, not the answer) patchLast((msg) => ({ ...msg, content: null, diff --git a/src/aieng_bot/bookstack/agent.py b/src/aieng_bot/bookstack/agent.py index 898b255..db83217 100644 --- a/src/aieng_bot/bookstack/agent.py +++ b/src/aieng_bot/bookstack/agent.py @@ -244,8 +244,12 @@ async def ask_stream( for _ in range(self.MAX_TURNS): accumulated_text = "" final_response: Any = None + # Set to True the moment a tool_use content block starts so we + # can immediately clear the UI and stop forwarding text chunks. + # On-prem models (e.g. Qwen) emit reasoning text before tool + # calls; we must not show that transient text to the user. + tool_use_started = False - # Use the streaming API so text tokens flow to the client immediately async with self._async_client.messages.stream( model=self.model, max_tokens=8192, @@ -254,9 +258,23 @@ async def ask_stream( messages=cast(list[MessageParam], messages), ) as stream: async for event in stream: - # Yield text tokens as they arrive (only TextDelta has .text) - if ( - getattr(event, "type", None) == "content_block_delta" + event_type = getattr(event, "type", None) + + if event_type == "content_block_start": + block = getattr(event, "content_block", None) + if ( + getattr(block, "type", None) == "tool_use" + and not tool_use_started + ): + tool_use_started = True + # Immediately tell the UI to discard any text + # it has already rendered for this turn. + if accumulated_text: + yield {"type": "text_clear"} + + elif ( + not tool_use_started + and event_type == "content_block_delta" and getattr(getattr(event, "delta", None), "type", None) == "text_delta" ):