feat(chat): stream tokens for non-image prompts; reduce default image size for faster generations

hackall360 · hackall360 · commit 649cad455b67 · 2025-09-24T15:04:51.000-07:00
diff --git a/Libs/pollilib/index.js b/Libs/pollilib/index.js
@@ -255,6 +255,86 @@ export async function chat(payload, client) {
   }
 }
 
+// Streaming chat helper (SSE). Yields content deltas as strings.
+export async function* chatStream(payload, client) {
+  const c = client instanceof PolliClient ? client : new PolliClient();
+  const referrer = resolveReferrer();
+  const { endpoint = 'openai', model: selectedModel = 'openai', messages = [], tools = null, tool_choice = 'auto', ...rest } = payload || {};
+  // Intentionally do not set response_format here to keep tokens human-readable
+  const filteredMessages = Array.isArray(messages) ? messages.filter(m => !m || typeof m !== 'object' || m.role !== 'system') : [];
+  const url = `${c.textPromptBase}/openai`;
+  const body = {
+    model: selectedModel,
+    messages: filteredMessages,
+    stream: true,
+    ...(referrer ? { referrer } : {}),
+    ...(rest.seed != null ? { seed: rest.seed } : {}),
+    ...(Array.isArray(tools) && tools.length ? { tools, tool_choice } : {}),
+    ...rest,
+  };
+  body.safe = false;
+  const controller = new AbortController();
+  const t = setTimeout(() => controller.abort(), c.timeoutMs);
+  try {
+    try {
+      let log = (globalThis && globalThis.__PANEL_LOG__);
+      if (!log && globalThis) { globalThis.__PANEL_LOG__ = []; log = globalThis.__PANEL_LOG__; }
+      if (log && Array.isArray(log)) {
+        log.push({ ts: Date.now(), kind: 'chat:request', url, model: selectedModel, referer: referrer || null, meta: { endpoint: endpoint || 'openai', json: false, stream: true } });
+      }
+    } catch {}
+    const resp = await c.fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Accept': 'text/event-stream' },
+      body: JSON.stringify(body),
+      signal: controller.signal,
+    });
+    if (!resp.ok) {
+      const err = new Error(`HTTP ${resp.status}`);
+      err.status = resp.status;
+      err.statusText = resp.statusText;
+      try { const log = (globalThis && globalThis.__PANEL_LOG__); if (log && Array.isArray(log)) log.push({ ts: Date.now(), kind: 'chat:error', url, model: selectedModel, ok: false, status: resp.status, meta: { stream: true } }); } catch {}
+      throw err;
+    }
+    // Iterate SSE lines
+    const reader = resp.body && typeof resp.body.getReader === 'function' ? resp.body.getReader() : null;
+    if (reader) {
+      const decoder = new TextDecoder();
+      let buf = '';
+      for (;;) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        buf += decoder.decode(value, { stream: true });
+        const parts = buf.split(/\r?\n/);
+        buf = parts.pop() ?? '';
+        for (const line of parts) {
+          if (!line.startsWith('data:')) continue;
+          const data = line.slice(5).trim();
+          if (data === '[DONE]') { buf = ''; break; }
+          try {
+            const obj = JSON.parse(data);
+            const content = obj?.choices?.[0]?.delta?.content;
+            if (content) yield content;
+          } catch {
+            // ignore non-JSON chunks
+          }
+        }
+      }
+    } else {
+      // Fallback: parse entire body if streaming unsupported
+      const text = await resp.text();
+      for (const line of String(text).split(/\r?\n/)) {
+        if (!line.startsWith('data:')) continue;
+        const data = line.slice(5).trim();
+        if (data === '[DONE]') break;
+        try { const obj = JSON.parse(data); const content = obj?.choices?.[0]?.delta?.content; if (content) yield content; } catch {}
+      }
+    }
+  } finally {
+    clearTimeout(t);
+  }
+}
+
 export async function image(prompt, options, client) {
   const c = client instanceof PolliClient ? client : new PolliClient();
   const referrer = resolveReferrer();
diff --git a/src/main.js b/src/main.js
@@ -2,7 +2,7 @@ import './style.css';
 import 'highlight.js/styles/github.css';
 import { renderMarkdown, enhanceCodeBlocksHtml } from './lib/markdown.js';
 import { looseJsonParse, repairModelOutput } from './lib/json-repair.js';
-import { chat, image, textModels } from '../Libs/pollilib/index.js';
+import { chat, chatStream, image, textModels } from '../Libs/pollilib/index.js';
 import { generateSeed } from './seed.js';
 import { createPollinationsClient } from './pollinations-client.js';
 import {
@@ -281,6 +281,53 @@ function renderDebugPanel(extra = {}) {
   }
 }
 
+// Fast-path streaming for text-only prompts to improve perceived latency
+async function sendPromptStreaming(prompt) {
+  const selectedModel = getSelectedModel();
+  if (!selectedModel) throw new Error('No model selected.');
+  if (!client) throw new Error('Pollinations client is not ready.');
+  const endpoints = buildEndpointSequence(selectedModel);
+  if (!endpoints.length) throw new Error(`No endpoints available for model "${selectedModel.label ?? selectedModel.id}".`);
+
+  const startingLength = state.conversation.length;
+  // Do NOT inject the JSON primer for streaming text-only turns
+  state.conversation.push({ role: 'user', content: prompt });
+  try {
+    setStatus('Streaming response…');
+    const assistantMsg = addMessage({ role: 'assistant', type: 'text', content: '' });
+    const pinnedId = state.pinnedModelId || selectedModel.id;
+    const endpoint = endpoints[0] || 'openai';
+    state.activeModel = { id: pinnedId, endpoint, info: selectedModel };
+    if (!state.pinnedModelId) state.pinnedModelId = pinnedId;
+    let streamed = '';
+    try {
+      for await (const chunk of chatStream({ model: pinnedId, endpoint, messages: state.conversation, seed: generateSeed() }, client)) {
+        if (typeof chunk === 'string' && chunk) {
+          streamed += chunk;
+          assistantMsg.content = streamed;
+          renderMessages();
+        }
+      }
+    } catch (e) {
+      // Fallback to existing non-stream flow
+      console.warn('Streaming failed; falling back to standard request', e);
+      state.conversation.length = startingLength; // revert user injection
+      return await sendPrompt(prompt);
+    }
+    if (streamed.trim()) {
+      state.conversation.push({ role: 'assistant', content: streamed });
+      if (state.voicePlayback && els.voiceSelect.value) {
+        void speakMessage(assistantMsg, { autoplay: true });
+      }
+    }
+    resetStatusIfIdle();
+  } catch (error) {
+    console.error('Chat error (streaming)', error);
+    state.conversation.length = startingLength;
+    throw error;
+  }
+}
+
 async function copyLogsToClipboard() {
   try {
     const data = (globalThis && globalThis.__PANEL_LOG__) || [];
@@ -1859,8 +1906,8 @@ async function generateImageAsset(prompt, { width, height, model: imageModel, se
     }
     const resolvedSeed = (typeof seed === 'number' || (typeof seed === 'string' && seed.trim().length)) ? seed : generateSeed();
     const dims = [];
-    const w = Number(width) || 1024;
-    const h = Number(height) || 1024;
+    const w = Number(width) || 768;
+    const h = Number(height) || 768;
     dims.push([w, h]);
     if (w > 512 || h > 512) dims.push([512, 512]);
 
@@ -2338,7 +2385,13 @@ els.form.addEventListener('submit', async event => {
       console.info('Generated Pollinations image with seed %s.', seed);
       resetStatusIfIdle();
     } else {
-      await sendPrompt(raw);
+      // Stream for non-image prompts to speed up perceived latency
+      const wantsImage = hasImageIntent(raw);
+      if (!wantsImage) {
+        await sendPromptStreaming(raw);
+      } else {
+        await sendPrompt(raw);
+      }
     }
   } catch (error) {
     console.error('Submission error', error);