From 8abc8e327f719b0de0d5b623d0bb0873b3a8b7bf Mon Sep 17 00:00:00 2001
From: Amrit Krishnan <amrit110@gmail.com>
Date: Tue, 16 Jun 2026 09:29:08 -0400
Subject: [PATCH] fix(bookstack-agent): suppress reasoning text before tool
 calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On-prem models like Qwen emit reasoning/thinking text in a text content
block before using tools. Previously this appeared in the UI for the full
duration of the tool-call turn before being cleared by the tool_use event.

Fix: watch for content_block_start with type=tool_use inside the stream.
When detected, immediately yield a text_clear event so the UI discards any
rendered text, then suppress further text_chunk events for that turn.

The text_clear arrives as soon as the model signals a tool call — not after
the full response completes — so the transient reasoning text is never
visible to the user. Final-answer turns (no tool use) are unaffected and
still stream character-by-character.
---
 .../ui/app/components/chat-page.tsx           |  6 ++++-
 src/aieng_bot/bookstack/agent.py              | 26 ++++++++++++++++---
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/bookstack_agent/ui/app/components/chat-page.tsx b/bookstack_agent/ui/app/components/chat-page.tsx
index f953d51..4c79a8b 100644
--- a/bookstack_agent/ui/app/components/chat-page.tsx
+++ b/bookstack_agent/ui/app/components/chat-page.tsx
@@ -220,8 +220,12 @@ export default function ChatPage({ user }: { user: User | null }) {
               setSessionId(event.session_id as string)
               break
 
+            case 'text_clear':
+              // Model emitted reasoning/thinking text before a tool call — discard it
+              patchLast((msg) => ({ ...msg, content: null }))
+              break
+
             case 'tool_use':
-              // Clear any in-progress streamed text (it was planning text, not the answer)
               patchLast((msg) => ({
                 ...msg,
                 content:   null,
diff --git a/src/aieng_bot/bookstack/agent.py b/src/aieng_bot/bookstack/agent.py
index 898b255..db83217 100644
--- a/src/aieng_bot/bookstack/agent.py
+++ b/src/aieng_bot/bookstack/agent.py
@@ -244,8 +244,12 @@ async def ask_stream(
             for _ in range(self.MAX_TURNS):
                 accumulated_text = ""
                 final_response: Any = None
+                # Set to True the moment a tool_use content block starts so we
+                # can immediately clear the UI and stop forwarding text chunks.
+                # On-prem models (e.g. Qwen) emit reasoning text before tool
+                # calls; we must not show that transient text to the user.
+                tool_use_started = False
 
-                # Use the streaming API so text tokens flow to the client immediately
                 async with self._async_client.messages.stream(
                     model=self.model,
                     max_tokens=8192,
@@ -254,9 +258,23 @@ async def ask_stream(
                     messages=cast(list[MessageParam], messages),
                 ) as stream:
                     async for event in stream:
-                        # Yield text tokens as they arrive (only TextDelta has .text)
-                        if (
-                            getattr(event, "type", None) == "content_block_delta"
+                        event_type = getattr(event, "type", None)
+
+                        if event_type == "content_block_start":
+                            block = getattr(event, "content_block", None)
+                            if (
+                                getattr(block, "type", None) == "tool_use"
+                                and not tool_use_started
+                            ):
+                                tool_use_started = True
+                                # Immediately tell the UI to discard any text
+                                # it has already rendered for this turn.
+                                if accumulated_text:
+                                    yield {"type": "text_clear"}
+
+                        elif (
+                            not tool_use_started
+                            and event_type == "content_block_delta"
                             and getattr(getattr(event, "delta", None), "type", None)
                             == "text_delta"
                         ):