From f059dcfcae1ffc077c90301dad18f41d6a7dda78 Mon Sep 17 00:00:00 2001
From: quantumaikr <hi@quantumai.kr>
Date: Fri, 10 Apr 2026 16:24:02 +0900
Subject: [PATCH] ux(wasm): show "Thinking..." indicator during prompt prefill

First-token latency can be several seconds on a 0.8B model in WASM
(processing the full prompt through 28 layers in single-threaded
WASM). Without feedback, users see a blank assistant bubble and
think the demo is broken.

Add a spinner + "Thinking..." message inside the assistant bubble
that appears immediately after sending. Replaced by the first
streamed token. Also show "Processing prompt..." in the stats bar.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 wasm/index.html | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/wasm/index.html b/wasm/index.html
index 0abd7ec..925bdc9 100644
--- a/wasm/index.html
+++ b/wasm/index.html
@@ -68,6 +68,7 @@
 .message.user { background: #1a1a2e; border: 1px solid #2a2a4e; }
 .message.assistant { background: #111; border: 1px solid #222; }
 .message.assistant .cursor { animation: blink 1s step-end infinite; }
+.message.assistant .thinking { color: #6ee7b7; font-size: 13px; font-style: italic; }
 @keyframes blink { 50% { opacity: 0; } }
 .message.system { color: #666; font-size: 12px; text-align: center; white-space: normal; }
 .message code { background: #1a1a1a; padding: 1px 4px; border-radius: 3px; font-size: 13px; }
@@ -412,9 +413,13 @@ <h2>LLM in Your Browser</h2>
 
     addMessage('user', text);
     const assistantDiv = addMessage('assistant', '');
+    // Show "thinking" indicator during prompt prefill (before first token)
+    assistantDiv.innerHTML = '<span class="thinking"><span class="spinner" style="display:inline-block;width:12px;height:12px;vertical-align:middle;margin-right:6px"></span>Thinking...</span>';
     let output = '';
     let tokenCount = 0;
     const startTime = performance.now();
+    document.getElementById('statTokens').textContent = 'Processing prompt...';
+    document.getElementById('statSpeed').textContent = '';
 
     // Set streaming token callback
     Module.onToken = (token) => {