From f059dcfcae1ffc077c90301dad18f41d6a7dda78 Mon Sep 17 00:00:00 2001 From: quantumaikr Date: Fri, 10 Apr 2026 16:24:02 +0900 Subject: [PATCH] ux(wasm): show "Thinking..." indicator during prompt prefill First-token latency can be several seconds on a 0.8B model in WASM (processing the full prompt through 28 layers in single-threaded WASM). Without feedback, users see a blank assistant bubble and think the demo is broken. Add a spinner + "Thinking..." message inside the assistant bubble that appears immediately after sending. Replaced by the first streamed token. Also show "Processing prompt..." in the stats bar. Co-Authored-By: Claude Opus 4.6 (1M context) --- wasm/index.html | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/wasm/index.html b/wasm/index.html index 0abd7ec..925bdc9 100644 --- a/wasm/index.html +++ b/wasm/index.html @@ -68,6 +68,7 @@ .message.user { background: #1a1a2e; border: 1px solid #2a2a4e; } .message.assistant { background: #111; border: 1px solid #222; } .message.assistant .cursor { animation: blink 1s step-end infinite; } +.message.assistant .thinking { color: #6ee7b7; font-size: 13px; font-style: italic; } @keyframes blink { 50% { opacity: 0; } } .message.system { color: #666; font-size: 12px; text-align: center; white-space: normal; } .message code { background: #1a1a1a; padding: 1px 4px; border-radius: 3px; font-size: 13px; } @@ -412,9 +413,13 @@

LLM in Your Browser

addMessage('user', text); const assistantDiv = addMessage('assistant', ''); + // Show "thinking" indicator during prompt prefill (before first token) + assistantDiv.innerHTML = 'Thinking...'; let output = ''; let tokenCount = 0; const startTime = performance.now(); + document.getElementById('statTokens').textContent = 'Processing prompt...'; + document.getElementById('statSpeed').textContent = ''; // Set streaming token callback Module.onToken = (token) => {