From 2973ce700aa3a245af45a799986308edadf408a8 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Tue, 28 Oct 2025 17:06:50 +0100
Subject: [PATCH 01/14] Implement client-side chunked transcription pipeline

---
 src/openai.js               |   3 +-
 src/openai.test.js          |  26 +++--
 src/stt/audio.js            | 161 ++++++++++++++++++++++++++
 src/stt/audio.test.js       |  38 +++++++
 src/stt/chunking.js         | 155 +++++++++++++++++++++++++
 src/stt/chunking.test.js    |  91 +++++++++++++++
 src/stt/config.js           |  25 ++++
 src/stt/merge.js            |  78 +++++++++++++
 src/stt/transcriber.js      | 138 ++++++++++++++++++++++
 src/stt/transcriber.test.js | 165 +++++++++++++++++++++++++++
 src/stt/vad.js              | 221 ++++++++++++++++++++++++++++++++++++
 src/stt/vad.test.js         |  55 +++++++++
 src/transcription.js        |   4 +-
 src/transcription.test.js   |  15 ++-
 14 files changed, 1156 insertions(+), 19 deletions(-)
 create mode 100644 src/stt/audio.js
 create mode 100644 src/stt/audio.test.js
 create mode 100644 src/stt/chunking.js
 create mode 100644 src/stt/chunking.test.js
 create mode 100644 src/stt/config.js
 create mode 100644 src/stt/merge.js
 create mode 100644 src/stt/transcriber.js
 create mode 100644 src/stt/transcriber.test.js
 create mode 100644 src/stt/vad.js
 create mode 100644 src/stt/vad.test.js

diff --git a/src/openai.js b/src/openai.js
index 74f3a47..58f0870 100644
--- a/src/openai.js
+++ b/src/openai.js
@@ -152,7 +152,7 @@ export async function summarizeText({ lowQuality = '', highQuality = '' }) {
   return response.choices?.[0]?.message?.content || '';
 }
 
-export async function transcribeFile({ file, language }) {
+export async function transcribeFile({ file, language, prompt }) {
   if (!file) throw new Error('File is required for transcription');
   const model = resolveTranscriptionModel();
 
@@ -161,6 +161,7 @@ export async function transcribeFile({ file, language }) {
       file,
       model,
       language,
+      prompt,
     });
     return response.text;
   } catch (error) {
diff --git a/src/openai.test.js b/src/openai.test.js
index 94bf3b1..e7ab9c5 100644
--- a/src/openai.test.js
+++ b/src/openai.test.js
@@ -49,11 +49,14 @@ describe('openai helpers', () => {
     const result = await transcribeFile({ file, language: 'en' });
 
     expect(result).toBe('hello world');
-    expect(mockClient.audio.transcriptions.create).toHaveBeenCalledWith({
-      file,
-      model: 'gpt-4o-transcribe',
-      language: 'en',
-    });
+    expect(mockClient.audio.transcriptions.create).toHaveBeenCalledWith(
+      expect.objectContaining({
+        file,
+        model: 'gpt-4o-transcribe',
+        language: 'en',
+        prompt: undefined,
+      })
+    );
   });
 
   test('transcribeFile honors TRANSCRIBE_MODEL override', async () => {
@@ -65,11 +68,14 @@ describe('openai helpers', () => {
     const file = { name: 'audio.wav' };
     await transcribeFile({ file, language: 'de' });
 
-    expect(mockClient.audio.transcriptions.create).toHaveBeenCalledWith({
-      file,
-      model: 'custom-model',
-      language: 'de',
-    });
+    expect(mockClient.audio.transcriptions.create).toHaveBeenCalledWith(
+      expect.objectContaining({
+        file,
+        model: 'custom-model',
+        language: 'de',
+        prompt: undefined,
+      })
+    );
   });
 
   test('transcribeFile logs model and status information on failure', async () => {
diff --git a/src/stt/audio.js b/src/stt/audio.js
new file mode 100644
index 0000000..86dc311
--- /dev/null
+++ b/src/stt/audio.js
@@ -0,0 +1,161 @@
+import { STT_CONFIG } from './config.js';
+
+/* istanbul ignore next -- depends on browser-specific audio globals */
+function hasWebAudio() {
+  return (
+    typeof window !== 'undefined' &&
+    (window.AudioContext || window.webkitAudioContext) &&
+    window.OfflineAudioContext
+  );
+}
+
+/* istanbul ignore next -- depends on browser-specific audio globals */
+function getAudioContext() {
+  const Ctor = window.AudioContext || window.webkitAudioContext;
+  return new Ctor();
+}
+
+/* istanbul ignore next -- exercised via decodeToMono16k in browser */
+function mixToMono(audioBuffer) {
+  const { numberOfChannels } = audioBuffer;
+  if (numberOfChannels === 1) {
+    return audioBuffer.getChannelData(0).slice();
+  }
+
+  const length = audioBuffer.length;
+  const output = new Float32Array(length);
+
+  for (let channel = 0; channel < numberOfChannels; channel += 1) {
+    const data = audioBuffer.getChannelData(channel);
+    for (let i = 0; i < length; i += 1) {
+      output[i] += data[i] / numberOfChannels;
+    }
+  }
+
+  return output;
+}
+
+/* istanbul ignore next -- exercised via decodeToMono16k in browser */
+async function resampleMonoBuffer(mono, sourceRate, targetRate) {
+  if (sourceRate === targetRate) {
+    return mono;
+  }
+
+  const length = Math.ceil((mono.length * targetRate) / sourceRate);
+  const offline = new OfflineAudioContext(1, length, targetRate);
+  const buffer = offline.createBuffer(1, mono.length, sourceRate);
+  buffer.copyToChannel(mono, 0);
+
+  const source = offline.createBufferSource();
+  source.buffer = buffer;
+  source.connect(offline.destination);
+  source.start(0);
+
+  const rendered = await offline.startRendering();
+  return rendered.getChannelData(0).slice();
+}
+
+/* istanbul ignore next -- browser-only decode path */
+export async function decodeToMono16k(file) {
+  if (!hasWebAudio()) {
+    throw new Error('Web Audio API not available');
+  }
+
+  const arrayBuffer = await file.arrayBuffer();
+  const ctx = getAudioContext();
+
+  try {
+    const decoded = await ctx.decodeAudioData(arrayBuffer);
+    const mono = mixToMono(decoded);
+    const resampled = await resampleMonoBuffer(
+      mono,
+      decoded.sampleRate,
+      STT_CONFIG.sampleRate
+    );
+
+    return {
+      pcm: resampled,
+      sampleRate: STT_CONFIG.sampleRate,
+      durationMs: Math.round((resampled.length / STT_CONFIG.sampleRate) * 1000),
+    };
+  } finally {
+    ctx.close?.();
+  }
+}
+
+export function clampMs(value, min, max) {
+  return Math.min(Math.max(value, min), max);
+}
+
+export function estimateChunkBytes(durationMs) {
+  const seconds = durationMs / 1000;
+  const bytesPerSecond = STT_CONFIG.sampleRate * STT_CONFIG.wavBytesPerSample;
+  return Math.ceil(seconds * bytesPerSecond) + 44; // WAV header overhead
+}
+
+function floatTo16BitPCM(float32Array) {
+  const buffer = new ArrayBuffer(float32Array.length * 2);
+  const view = new DataView(buffer);
+
+  for (let i = 0; i < float32Array.length; i += 1) {
+    let sample = Math.max(-1, Math.min(1, float32Array[i]));
+    sample = sample < 0 ? sample * 0x8000 : sample * 0x7fff;
+    view.setInt16(i * 2, sample, true);
+  }
+
+  return new Uint8Array(buffer);
+}
+
+function writeWavHeader(dataLength) {
+  const buffer = new ArrayBuffer(44);
+  const view = new DataView(buffer);
+  const byteRate = STT_CONFIG.sampleRate * STT_CONFIG.wavBytesPerSample;
+  const blockAlign = STT_CONFIG.wavBytesPerSample;
+
+  view.setUint32(0, 0x52494646, false); // 'RIFF'
+  view.setUint32(4, 36 + dataLength, true);
+  view.setUint32(8, 0x57415645, false); // 'WAVE'
+  view.setUint32(12, 0x666d7420, false); // 'fmt '
+  view.setUint32(16, 16, true); // Subchunk1Size
+  view.setUint16(20, 1, true); // PCM
+  view.setUint16(22, 1, true); // Mono
+  view.setUint32(24, STT_CONFIG.sampleRate, true);
+  view.setUint32(28, byteRate, true);
+  view.setUint16(32, blockAlign, true);
+  view.setUint16(34, 16, true); // bits per sample
+  view.setUint32(36, 0x64617461, false); // 'data'
+  view.setUint32(40, dataLength, true);
+
+  return new Uint8Array(buffer);
+}
+
+export function encodeWavChunk(pcm, startMs, endMs) {
+  const totalSamples = pcm.length;
+  const sampleRate = STT_CONFIG.sampleRate;
+  const startIndex = Math.max(0, Math.floor((startMs / 1000) * sampleRate));
+  const endIndex = Math.min(
+    totalSamples,
+    Math.ceil((endMs / 1000) * sampleRate)
+  );
+
+  if (endIndex <= startIndex) {
+    return null;
+  }
+
+  const slice = pcm.slice(startIndex, endIndex);
+  const pcm16 = floatTo16BitPCM(slice);
+  const header = writeWavHeader(pcm16.length);
+  const blob = new Blob([header, pcm16], { type: 'audio/wav' });
+  return {
+    blob,
+    durationMs: Math.round(((endIndex - startIndex) / sampleRate) * 1000),
+  };
+}
+
+export function samplesToMs(samples) {
+  return Math.round((samples / STT_CONFIG.sampleRate) * 1000);
+}
+
+export function msToSamples(ms) {
+  return Math.round((ms / 1000) * STT_CONFIG.sampleRate);
+}
diff --git a/src/stt/audio.test.js b/src/stt/audio.test.js
new file mode 100644
index 0000000..77f1b3b
--- /dev/null
+++ b/src/stt/audio.test.js
@@ -0,0 +1,38 @@
+import {
+  clampMs,
+  encodeWavChunk,
+  estimateChunkBytes,
+  msToSamples,
+  samplesToMs,
+} from './audio.js';
+
+describe('audio helpers', () => {
+  test('estimateChunkBytes uses pcm byte rate', () => {
+    const estimate = estimateChunkBytes(1000);
+    expect(estimate).toBeGreaterThan(32000);
+  });
+
+  test('encodeWavChunk converts float32 pcm to wav blob', () => {
+    const pcm = new Float32Array(16_000);
+    for (let i = 0; i < pcm.length; i += 1) {
+      pcm[i] = Math.sin((i / pcm.length) * Math.PI * 2);
+    }
+    const result = encodeWavChunk(pcm, 0, 1000);
+    expect(result).not.toBeNull();
+    expect(result.durationMs).toBeGreaterThanOrEqual(900);
+    expect(result.blob.type).toBe('audio/wav');
+    expect(result.blob.size).toBeGreaterThan(0);
+  });
+
+  test('clampMs enforces bounds', () => {
+    expect(clampMs(50, 100, 200)).toBe(100);
+    expect(clampMs(250, 100, 200)).toBe(200);
+    expect(clampMs(150, 100, 200)).toBe(150);
+  });
+
+  test('sample conversions are consistent', () => {
+    const samples = msToSamples(1000);
+    const ms = samplesToMs(samples);
+    expect(ms).toBeCloseTo(1000, 0);
+  });
+});
diff --git a/src/stt/chunking.js b/src/stt/chunking.js
new file mode 100644
index 0000000..9d78557
--- /dev/null
+++ b/src/stt/chunking.js
@@ -0,0 +1,155 @@
+import { STT_CONFIG } from './config.js';
+import { estimateChunkBytes } from './audio.js';
+
+function normalizeSegment(segment, durationMs) {
+  const startMs = Math.max(
+    0,
+    Math.min(durationMs, Math.floor(segment.startMs))
+  );
+  const endMs = Math.max(
+    startMs,
+    Math.min(durationMs, Math.ceil(segment.endMs))
+  );
+  return { startMs, endMs };
+}
+
+export function normalizeSegments(segments, durationMs) {
+  const normalized = segments
+    .map((segment) => normalizeSegment(segment, durationMs))
+    .filter((segment) => segment.endMs > segment.startMs);
+
+  if (!normalized.length) return [];
+
+  normalized.sort((a, b) => a.startMs - b.startMs);
+  const merged = [normalized[0]];
+
+  for (let i = 1; i < normalized.length; i += 1) {
+    const current = normalized[i];
+    const prev = merged[merged.length - 1];
+
+    if (current.startMs <= prev.endMs) {
+      prev.endMs = Math.max(prev.endMs, current.endMs);
+    } else {
+      merged.push(current);
+    }
+  }
+
+  return merged;
+}
+
+function shouldFinalizeChunk(chunk, segmentEndMs, estimateBytes) {
+  const duration = segmentEndMs - chunk.startMs;
+  if (duration <= 0) return false;
+  if (duration >= STT_CONFIG.maxChunkMs) return true;
+  if (estimateBytes(duration) >= STT_CONFIG.maxChunkBytes) return true;
+  return false;
+}
+
+export function packSegmentsIntoChunks(segments, durationMs) {
+  const normalized = normalizeSegments(segments, durationMs);
+  const chunks = [];
+  const estimateBytes = (duration) => estimateChunkBytes(duration);
+
+  if (!normalized.length) {
+    const safeMax = Math.min(
+      STT_CONFIG.maxChunkMs,
+      Math.floor(
+        (STT_CONFIG.maxChunkBytes /
+          (STT_CONFIG.sampleRate * STT_CONFIG.wavBytesPerSample)) *
+          1000
+      )
+    );
+    const chunkDuration = Math.max(60_000, safeMax);
+
+    for (let start = 0; start < durationMs; start += chunkDuration) {
+      const end = Math.min(durationMs, start + chunkDuration);
+      chunks.push({ startMs: start, endMs: end });
+    }
+    return chunks;
+  }
+
+  let current = {
+    startMs: normalized[0].startMs,
+    endMs: normalized[0].endMs,
+  };
+
+  for (let i = 1; i < normalized.length; i += 1) {
+    const segment = normalized[i];
+    const prospectiveEnd = Math.max(current.endMs, segment.endMs);
+    const finalize = shouldFinalizeChunk(
+      current,
+      prospectiveEnd,
+      estimateBytes
+    );
+
+    if (finalize) {
+      chunks.push({ ...current });
+      current = {
+        startMs: Math.max(segment.startMs, current.endMs),
+        endMs: segment.endMs,
+      };
+    } else {
+      current.endMs = prospectiveEnd;
+    }
+  }
+
+  chunks.push({ ...current });
+  return chunks;
+}
+
+export function applyChunkOverlaps(chunks, durationMs) {
+  if (!chunks.length) return [];
+
+  return chunks.map((chunk, index) => {
+    const startOverlap = index === 0 ? 0 : STT_CONFIG.chunkOverlapMs;
+    const endOverlap =
+      index === chunks.length - 1 ? 0 : STT_CONFIG.chunkOverlapMs;
+
+    return {
+      ...chunk,
+      renderStartMs: Math.max(0, chunk.startMs - startOverlap),
+      renderEndMs: Math.min(durationMs, chunk.endMs + endOverlap),
+      index,
+    };
+  });
+}
+
+export function buildFallbackChunks(durationMs) {
+  const estimateBytes = (duration) => estimateChunkBytes(duration);
+  const safeDuration = Math.min(
+    STT_CONFIG.maxChunkMs,
+    Math.floor(
+      (STT_CONFIG.maxChunkBytes /
+        (STT_CONFIG.sampleRate * STT_CONFIG.wavBytesPerSample)) *
+        1000
+    )
+  );
+  const chunkDuration = Math.max(5 * 60_000, safeDuration);
+  const chunks = [];
+
+  for (let start = 0; start < durationMs; start += chunkDuration) {
+    const end = Math.min(durationMs, start + chunkDuration);
+    const duration = end - start;
+    if (duration <= 0) continue;
+    if (estimateBytes(duration) > STT_CONFIG.maxChunkBytes) {
+      const maxDuration = Math.floor(
+        (STT_CONFIG.maxChunkBytes /
+          (STT_CONFIG.sampleRate * STT_CONFIG.wavBytesPerSample)) *
+          1000
+      );
+      const midpoint = start + Math.floor(maxDuration / 2);
+      chunks.push({ startMs: start, endMs: midpoint });
+      chunks.push({ startMs: midpoint, endMs: end });
+    } else {
+      chunks.push({ startMs: start, endMs: end });
+    }
+  }
+
+  return applyChunkOverlaps(chunks, durationMs);
+}
+
+export function planChunks({ segments, durationMs }) {
+  const packed = packSegmentsIntoChunks(segments, durationMs);
+  const withOverlap = applyChunkOverlaps(packed, durationMs);
+  return withOverlap.length ? withOverlap : buildFallbackChunks(durationMs);
+}
diff --git a/src/stt/chunking.test.js b/src/stt/chunking.test.js
new file mode 100644
index 0000000..08cbbdb
--- /dev/null
+++ b/src/stt/chunking.test.js
@@ -0,0 +1,91 @@
+import { planChunks, buildFallbackChunks } from './chunking.js';
+import { STT_CONFIG } from './config.js';
+import { mergeChunkResults, buildPromptFromTail } from './merge.js';
+
+describe('chunk planning utilities', () => {
+  test('planChunks splits segments exceeding max duration', () => {
+    const segments = [
+      { startMs: 0, endMs: 600_000 },
+      { startMs: 610_000, endMs: 1_300_000 },
+    ];
+    const chunks = planChunks({ segments, durationMs: 1_400_000 });
+    expect(chunks).toHaveLength(2);
+    expect(chunks[0].renderStartMs).toBe(0);
+    expect(chunks[1].renderStartMs).toBeGreaterThan(chunks[0].renderStartMs);
+    expect(chunks[0].renderEndMs - chunks[0].renderStartMs).toBeLessThanOrEqual(
+      1_200_000 + 500
+    );
+  });
+
+  test('planChunks falls back when no speech detected', () => {
+    const chunks = planChunks({ segments: [], durationMs: 900_000 });
+    expect(chunks.length).toBeGreaterThan(0);
+    chunks.forEach((chunk, index) => {
+      expect(chunk.index).toBe(index);
+      expect(chunk.renderEndMs).toBeGreaterThan(chunk.renderStartMs);
+    });
+  });
+
+  test('buildFallbackChunks respects size limits', () => {
+    const originalBytes = STT_CONFIG.maxChunkBytes;
+    STT_CONFIG.maxChunkBytes = 32_000; // ~1s of 16k PCM
+    const fallback = buildFallbackChunks(120_000);
+    expect(fallback.length).toBeGreaterThan(1);
+    fallback.forEach((chunk) => {
+      expect(chunk.renderEndMs).toBeGreaterThan(chunk.renderStartMs);
+    });
+    STT_CONFIG.maxChunkBytes = originalBytes;
+  });
+
+  test('planChunks respects size thresholds during packing', () => {
+    const originalBytes = STT_CONFIG.maxChunkBytes;
+    STT_CONFIG.maxChunkBytes = 32_000;
+    const segments = [
+      { startMs: 0, endMs: 40_000 },
+      { startMs: 45_000, endMs: 80_000 },
+    ];
+    const chunks = planChunks({ segments, durationMs: 90_000 });
+    expect(chunks.length).toBeGreaterThan(1);
+    STT_CONFIG.maxChunkBytes = originalBytes;
+  });
+
+  test('planChunks keeps single chunk when under limits', () => {
+    const chunks = planChunks({
+      segments: [
+        { startMs: 0, endMs: 10_000 },
+        { startMs: 12_000, endMs: 18_000 },
+      ],
+      durationMs: 20_000,
+    });
+    expect(chunks).toHaveLength(1);
+  });
+
+  test('planChunks merges overlapping segments', () => {
+    const chunks = planChunks({
+      segments: [
+        { startMs: 0, endMs: 10_000 },
+        { startMs: 9_000, endMs: 15_000 },
+      ],
+      durationMs: 20_000,
+    });
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].renderEndMs).toBeGreaterThan(chunks[0].renderStartMs);
+  });
+});
+
+describe('merge helpers', () => {
+  test('buildPromptFromTail trims tail characters', () => {
+    const prompt = buildPromptFromTail('  Example transcript text  ');
+    expect(prompt.endsWith('text')).toBe(true);
+  });
+
+  test('mergeChunkResults removes duplicate sentences', () => {
+    const merged = mergeChunkResults([
+      { index: 0, text: 'Hello world. This is chunk one.' },
+      { index: 1, text: 'This is chunk one. And here is more.' },
+    ]);
+    expect(merged).toContain('Hello world.');
+    expect(merged).toContain('And here is more.');
+    expect(merged).not.toContain('This is chunk one.\nThis is chunk one.');
+  });
+});
diff --git a/src/stt/config.js b/src/stt/config.js
new file mode 100644
index 0000000..a3cacd1
--- /dev/null
+++ b/src/stt/config.js
@@ -0,0 +1,25 @@
+export const STT_CONFIG = {
+  sampleRate: 16000,
+  windowSamples: 512,
+  threshold: 0.5,
+  minSpeechMs: 250,
+  minSilenceMs: 100,
+  speechPadMs: 200,
+  maxSpeechMs: 15 * 60 * 1000,
+  chunkOverlapMs: 500,
+  maxChunkMs: 1200 * 1000,
+  maxChunkBytes: 24 * 1024 * 1024,
+  uploadConcurrency: 3,
+  wavBytesPerSample: 2,
+  promptTailChars: 200,
+};
+
+export const DEFAULT_SILERO_MODEL_URL =
+  typeof window !== 'undefined' && window.SILERO_VAD_MODEL
+    ? window.SILERO_VAD_MODEL
+    : '/models/silero_v5_16k.onnx';
+
+export const DEFAULT_ORT_WASM_PATH =
+  typeof window !== 'undefined' && window.ORT_WASM_PATH
+    ? window.ORT_WASM_PATH
+    : '/ort/';
diff --git a/src/stt/merge.js b/src/stt/merge.js
new file mode 100644
index 0000000..1b5084e
--- /dev/null
+++ b/src/stt/merge.js
@@ -0,0 +1,78 @@
+import { STT_CONFIG } from './config.js';
+
+function tokenize(text) {
+  return text
+    .toLowerCase()
+    .replace(/[^\p{L}\p{N}\s]+/gu, ' ')
+    .split(/\s+/)
+    .filter(Boolean);
+}
+
+function cosineSimilarity(aTokens, bTokens) {
+  if (!aTokens.length || !bTokens.length) return 0;
+  const freqA = new Map();
+  const freqB = new Map();
+
+  for (const token of aTokens) {
+    freqA.set(token, (freqA.get(token) || 0) + 1);
+  }
+  for (const token of bTokens) {
+    freqB.set(token, (freqB.get(token) || 0) + 1);
+  }
+
+  let dot = 0;
+  for (const [token, countA] of freqA.entries()) {
+    const countB = freqB.get(token) || 0;
+    dot += countA * countB;
+  }
+
+  const norm = (freq) =>
+    Math.sqrt([...freq.values()].reduce((sum, c) => sum + c * c, 0));
+  const denom = norm(freqA) * norm(freqB);
+  return denom === 0 ? 0 : dot / denom;
+}
+
+function removeDuplicateSentence(previousText, currentText) {
+  if (!previousText || !currentText) return currentText;
+
+  const sentences = currentText.split(/(?<=[.!?])\s+/);
+  if (sentences.length === 0) {
+    return currentText;
+  }
+
+  const firstSentence = sentences[0];
+  const prevTail = previousText.slice(
+    -Math.max(firstSentence.length + 20, 200)
+  );
+  const normalizedTail = prevTail.toLowerCase();
+  const normalizedSentence = firstSentence.toLowerCase();
+  const similarity = cosineSimilarity(
+    tokenize(prevTail),
+    tokenize(firstSentence)
+  );
+
+  if (normalizedTail.includes(normalizedSentence) || similarity >= 0.75) {
+    return currentText.slice(firstSentence.length).trimStart();
+  }
+
+  return currentText;
+}
+
+export function mergeChunkResults(chunks) {
+  const ordered = [...chunks].sort((a, b) => a.index - b.index);
+  let merged = '';
+
+  for (const chunk of ordered) {
+    const cleanText = removeDuplicateSentence(merged, chunk.text || '');
+    merged = merged ? `${merged}\n${cleanText}` : cleanText;
+  }
+
+  return merged.trim();
+}
+
+export function buildPromptFromTail(text) {
+  if (!text) return '';
+  const trimmed = text.trim();
+  if (!trimmed) return '';
+  return trimmed.slice(-STT_CONFIG.promptTailChars);
+}
diff --git a/src/stt/transcriber.js b/src/stt/transcriber.js
new file mode 100644
index 0000000..7bb3426
--- /dev/null
+++ b/src/stt/transcriber.js
@@ -0,0 +1,138 @@
+import { decodeToMono16k, encodeWavChunk } from './audio.js';
+import { detectSpeechSegments } from './vad.js';
+import { planChunks, buildFallbackChunks } from './chunking.js';
+import { buildPromptFromTail, mergeChunkResults } from './merge.js';
+import { STT_CONFIG } from './config.js';
+import { transcribeFile } from '../openai.js';
+
+function createLimiter(concurrency) {
+  let active = 0;
+  const queue = [];
+
+  const next = () => {
+    if (active >= concurrency) return;
+    const task = queue.shift();
+    if (!task) return;
+    active += 1;
+    Promise.resolve()
+      .then(task.fn)
+      .then(task.resolve, task.reject)
+      .finally(() => {
+        active -= 1;
+        next();
+      });
+  };
+
+  return (fn) =>
+    new Promise((resolve, reject) => {
+      queue.push({ fn, resolve, reject });
+      next();
+    });
+}
+
+function buildChunkFileName(baseName, index) {
+  const padded = String(index + 1).padStart(3, '0');
+  return `${baseName}-chunk-${padded}.wav`;
+}
+
+function createChunkFiles(pcm, chunks, baseName) {
+  const files = [];
+
+  for (const chunk of chunks) {
+    const encoded = encodeWavChunk(pcm, chunk.renderStartMs, chunk.renderEndMs);
+    if (!encoded) continue;
+    const fileName = buildChunkFileName(baseName, chunk.index);
+    const file = new File([encoded.blob], fileName, { type: 'audio/wav' });
+    files.push({ ...chunk, file, durationMs: encoded.durationMs });
+  }
+
+  return files;
+}
+
+function makeFileFromSlice({ file, start, end, index }) {
+  const slice = file.slice(start, end);
+  const padded = String(index + 1).padStart(3, '0');
+  const originalName = file.name || 'audio';
+  const suffix =
+    file.type && !file.type.includes('wav') && !originalName.endsWith('.wav')
+      ? '.bin'
+      : '';
+  const name = `${originalName}-fallback-${padded}${suffix}`;
+  return new File([slice], name, {
+    type: file.type || 'application/octet-stream',
+  });
+}
+
+async function fallbackByteChunking({ file, language }) {
+  const maxBytes = STT_CONFIG.maxChunkBytes;
+  const chunks = Math.ceil(file.size / maxBytes);
+  const results = [];
+  let accumulated = '';
+
+  for (let index = 0; index < chunks; index += 1) {
+    const start = index * maxBytes;
+    const end = Math.min(file.size, start + maxBytes);
+    const chunkFile = makeFileFromSlice({ file, start, end, index });
+    const prompt = buildPromptFromTail(accumulated);
+    const text = await transcribeFile({ file: chunkFile, language, prompt });
+    results.push({ index, text });
+    accumulated = accumulated ? `${accumulated}\n${text}` : text;
+  }
+
+  return mergeChunkResults(results);
+}
+
+export async function chunkedTranscription({ file, language }) {
+  const baseName = (file?.name || 'audio').replace(/\.[^/.]+$/, '');
+  let pcmInfo;
+  let chunks = [];
+
+  try {
+    pcmInfo = await decodeToMono16k(file);
+  } catch (error) {
+    console.warn(
+      'Falling back to byte-based chunking due to decode failure',
+      error
+    );
+    return fallbackByteChunking({ file, language });
+  }
+
+  const { pcm, durationMs } = pcmInfo;
+
+  try {
+    const vadSegments = await detectSpeechSegments(pcm);
+    chunks = planChunks({ segments: vadSegments, durationMs });
+  } catch (error) {
+    console.warn('VAD segmentation failed, using fallback chunking', error);
+    chunks = buildFallbackChunks(durationMs);
+  }
+
+  const chunkFiles = createChunkFiles(pcm, chunks, baseName);
+  if (!chunkFiles.length) {
+    throw new Error('Failed to prepare audio chunks for transcription');
+  }
+
+  const limit = createLimiter(STT_CONFIG.uploadConcurrency);
+  const results = [];
+  let accumulatedText = '';
+
+  for (const chunk of chunkFiles) {
+    const prompt = buildPromptFromTail(accumulatedText);
+    const task = limit(async () => {
+      const text = await transcribeFile({
+        file: chunk.file,
+        language,
+        prompt,
+      });
+      return { index: chunk.index, text };
+    });
+
+    const result = await task;
+    results.push(result);
+    accumulatedText = accumulatedText
+      ? `${accumulatedText}\n${result.text}`
+      : result.text;
+  }
+
+  return mergeChunkResults(results);
+}
diff --git a/src/stt/transcriber.test.js b/src/stt/transcriber.test.js
new file mode 100644
index 0000000..2060828
--- /dev/null
+++ b/src/stt/transcriber.test.js
@@ -0,0 +1,165 @@
+import { chunkedTranscription } from './transcriber.js';
+import { STT_CONFIG } from './config.js';
+import { decodeToMono16k, encodeWavChunk } from './audio.js';
+import { detectSpeechSegments } from './vad.js';
+import { planChunks, buildFallbackChunks } from './chunking.js';
+import { transcribeFile } from '../openai.js';
+
+jest.mock('./audio.js', () => ({
+  decodeToMono16k: jest.fn(),
+  encodeWavChunk: jest.fn(),
+}));
+
+jest.mock('./vad.js', () => ({
+  detectSpeechSegments: jest.fn(),
+}));
+
+jest.mock('./chunking.js', () => ({
+  planChunks: jest.fn(),
+  buildFallbackChunks: jest.fn(),
+}));
+
+jest.mock('../openai.js', () => ({
+  transcribeFile: jest.fn(),
+}));
+
+describe('chunkedTranscription', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  test('splits audio into chunks and preserves prompts', async () => {
+    const pcm = new Float32Array(32_000);
+    decodeToMono16k.mockResolvedValue({ pcm, durationMs: 2000 });
+    detectSpeechSegments.mockResolvedValue([{ startMs: 0, endMs: 1500 }]);
+    planChunks.mockReturnValue([
+      { index: 0, renderStartMs: 0, renderEndMs: 1200 },
+      { index: 1, renderStartMs: 1000, renderEndMs: 2000 },
+    ]);
+    encodeWavChunk.mockImplementation((buffer, startMs, endMs) => ({
+      blob: new Blob([`${startMs}-${endMs}`]),
+      durationMs: endMs - startMs,
+    }));
+    transcribeFile
+      .mockResolvedValueOnce('First chunk content.')
+      .mockResolvedValueOnce('Continuation second chunk.');
+
+    const file = new File([new Uint8Array(10)], 'example.wav', {
+      type: 'audio/wav',
+    });
+
+    const result = await chunkedTranscription({ file, language: 'en' });
+
+    expect(planChunks).toHaveBeenCalled();
+    expect(encodeWavChunk).toHaveBeenCalledTimes(2);
+    expect(transcribeFile).toHaveBeenNthCalledWith(
+      1,
+      expect.objectContaining({ prompt: '' })
+    );
+    expect(transcribeFile).toHaveBeenNthCalledWith(
+      2,
+      expect.objectContaining({ prompt: 'First chunk content.' })
+    );
+    expect(result).toContain('First chunk content.');
+    expect(result).toContain('Continuation second chunk.');
+  });
+
+  test('falls back to chunking when VAD fails', async () => {
+    const pcm = new Float32Array(16_000);
+    decodeToMono16k.mockResolvedValue({ pcm, durationMs: 1000 });
+    detectSpeechSegments.mockRejectedValue(new Error('vad failure'));
+    buildFallbackChunks.mockReturnValue([
+      { index: 0, renderStartMs: 0, renderEndMs: 1000 },
+    ]);
+    encodeWavChunk.mockReturnValue({
+      blob: new Blob(['fallback']),
+      durationMs: 1000,
+    });
+    transcribeFile.mockResolvedValue('Recovered text');
+
+    const file = new File([new Uint8Array(10)], 'fallback.wav', {
+      type: 'audio/wav',
+    });
+
+    const result = await chunkedTranscription({ file, language: 'en' });
+
+    expect(buildFallbackChunks).toHaveBeenCalled();
+    expect(transcribeFile).toHaveBeenCalledWith(
+      expect.objectContaining({ prompt: '' })
+    );
+    expect(result).toBe('Recovered text');
+  });
+
+  test('falls back to byte chunking when decode fails', async () => {
+    decodeToMono16k.mockRejectedValue(new Error('decode error'));
+    const originalSize = STT_CONFIG.maxChunkBytes / 2;
+    const file = new File([new Uint8Array(originalSize)], 'large.bin', {
+      type: 'application/octet-stream',
+    });
+    transcribeFile.mockResolvedValue('Single chunk text');
+
+    const result = await chunkedTranscription({ file, language: 'en' });
+
+    expect(transcribeFile).toHaveBeenCalledTimes(1);
+    expect(result).toBe('Single chunk text');
+  });
+
+  test('byte chunking splits very large files', async () => {
+    decodeToMono16k.mockRejectedValue(new Error('decode error'));
+    const size = STT_CONFIG.maxChunkBytes * 1.5;
+    const file = new File([new Uint8Array(size)], 'massive.bin', {
+      type: 'application/octet-stream',
+    });
+    transcribeFile
+      .mockResolvedValueOnce('Part A')
+      .mockResolvedValueOnce('Part B');
+
+    const result = await chunkedTranscription({ file, language: 'en' });
+
+    expect(transcribeFile).toHaveBeenCalledTimes(2);
+    expect(result).toContain('Part A');
+    expect(result).toContain('Part B');
+  });
+
+  test('skips chunks that fail to encode', async () => {
+    const pcm = new Float32Array(32_000);
+    decodeToMono16k.mockResolvedValue({ pcm, durationMs: 2000 });
+    detectSpeechSegments.mockResolvedValue([{ startMs: 0, endMs: 1500 }]);
+    planChunks.mockReturnValue([
+      { index: 0, renderStartMs: 0, renderEndMs: 1200 },
+      { index: 1, renderStartMs: 1000, renderEndMs: 2000 },
+    ]);
+    encodeWavChunk.mockReturnValueOnce(null).mockReturnValueOnce({
+      blob: new Blob(['valid']),
+      durationMs: 800,
+    });
+    transcribeFile.mockResolvedValue('Only valid chunk');
+
+    const file = new File([new Uint8Array(10)], 'example.wav', {
+      type: 'audio/wav',
+    });
+
+    const result = await chunkedTranscription({ file, language: 'en' });
+
+    expect(transcribeFile).toHaveBeenCalledTimes(1);
+    expect(result).toBe('Only valid chunk');
+  });
+
+  test('throws when no chunks can be encoded', async () => {
+    const pcm = new Float32Array(16_000);
+    decodeToMono16k.mockResolvedValue({ pcm, durationMs: 1000 });
+    detectSpeechSegments.mockResolvedValue([{ startMs: 0, endMs: 800 }]);
+    planChunks.mockReturnValue([
+      { index: 0, renderStartMs: 0, renderEndMs: 900 },
+    ]);
+    encodeWavChunk.mockReturnValue(null);
+
+    const file = new File([new Uint8Array(10)], 'broken.wav', {
+      type: 'audio/wav',
+    });
+
+    await expect(
+      chunkedTranscription({ file, language: 'en' })
+    ).rejects.toThrow('Failed to prepare audio chunks for transcription');
+  });
+});
diff --git a/src/stt/vad.js b/src/stt/vad.js
new file mode 100644
index 0000000..716b72a
--- /dev/null
+++ b/src/stt/vad.js
@@ -0,0 +1,221 @@
+import {
+  DEFAULT_ORT_WASM_PATH,
+  DEFAULT_SILERO_MODEL_URL,
+  STT_CONFIG,
+} from './config.js';
+import { samplesToMs } from './audio.js';
+
+let ortPromise = null;
+let sessionPromise = null;
+
+/* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
+function ensureOrt() {
+  if (!ortPromise) {
+    ortPromise = import(
+      'https://esm.sh/onnxruntime-web@1.18.0?target=es2020'
+    ).then((module) => {
+      const ort = module.default || module;
+      if (ort?.env?.wasm) {
+        if (typeof ort.env.wasm.wasmPaths === 'string') {
+          ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
+        } else {
+          ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
+        }
+      }
+      return ort;
+    });
+  }
+  return ortPromise;
+}
+
+/* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
+async function ensureSession() {
+  if (!sessionPromise) {
+    const ort = await ensureOrt();
+    sessionPromise = ort.InferenceSession.create(DEFAULT_SILERO_MODEL_URL);
+  }
+  return sessionPromise;
+}
+
+function createStateTensor(ort) {
+  return new ort.Tensor('float32', new Float32Array(128), [1, 128]);
+}
+
+function createHiddenTensor(ort) {
+  return new ort.Tensor('float32', new Float32Array(2 * 1 * 64), [2, 1, 64]);
+}
+
+function createInputTensor(ort, chunk, windowSamples) {
+  const buffer = new Float32Array(windowSamples);
+  buffer.set(chunk);
+  return new ort.Tensor('float32', buffer, [1, windowSamples]);
+}
+
+function createSrTensor(ort) {
+  const rate = BigInt(STT_CONFIG.sampleRate);
+  const srArray = new BigInt64Array([rate]);
+  return new ort.Tensor('int64', srArray, [1]);
+}
+
+function appendSegment({ segments, startMs, endMs, totalDurationMs }) {
+  const start = Math.max(0, startMs - STT_CONFIG.speechPadMs);
+  const end = Math.min(endMs + STT_CONFIG.speechPadMs, totalDurationMs);
+  if (end - start >= STT_CONFIG.minSpeechMs) {
+    segments.push({ startMs: start, endMs: end });
+  }
+}
+
+function mergeSegments(segments) {
+  if (!segments.length) return segments;
+  segments.sort((a, b) => a.startMs - b.startMs);
+  const merged = [segments[0]];
+
+  for (let i = 1; i < segments.length; i += 1) {
+    const prev = merged[merged.length - 1];
+    const current = segments[i];
+    if (current.startMs <= prev.endMs + STT_CONFIG.minSilenceMs) {
+      prev.endMs = Math.max(prev.endMs, current.endMs);
+    } else {
+      merged.push(current);
+    }
+  }
+
+  return merged;
+}
+
+function postProcessProbabilities(probabilities, totalSamples) {
+  const windowMs = samplesToMs(STT_CONFIG.windowSamples);
+  const totalDurationMs = samplesToMs(totalSamples);
+  const segments = [];
+
+  let speechStart = null;
+  let lastSpeechMs = 0;
+  let silenceMs = 0;
+
+  const finalizeSpeech = () => {
+    if (speechStart === null) return;
+    appendSegment({
+      segments,
+      startMs: speechStart,
+      endMs: lastSpeechMs,
+      totalDurationMs,
+    });
+    speechStart = null;
+    silenceMs = 0;
+  };
+
+  for (let i = 0; i < probabilities.length; i += 1) {
+    const prob = probabilities[i];
+    const frameStart = i * windowMs;
+    const frameEnd = frameStart + windowMs;
+
+    if (prob >= STT_CONFIG.threshold) {
+      speechStart = speechStart ?? frameStart;
+      lastSpeechMs = frameEnd;
+      silenceMs = 0;
+
+      if (lastSpeechMs - speechStart >= STT_CONFIG.maxSpeechMs) {
+        finalizeSpeech();
+      }
+      continue;
+    }
+
+    if (speechStart === null) continue;
+
+    silenceMs += windowMs;
+    if (silenceMs >= STT_CONFIG.minSilenceMs) {
+      finalizeSpeech();
+    }
+  }
+
+  finalizeSpeech();
+  return mergeSegments(segments);
+}
+
+const PROBABILITY_KEYS = ['output', 'prob', 'probs', 'output.1', 'speech_prob'];
+
+function readProbability(value) {
+  if (value == null) return null;
+  if (typeof value === 'number') return value;
+
+  const arrayLike = Array.isArray(value) ? value : value?.data;
+  if (arrayLike && typeof arrayLike[0] === 'number') {
+    return arrayLike[0];
+  }
+
+  return null;
+}
+
+function extractSpeechProbability(results) {
+  for (const key of PROBABILITY_KEYS) {
+    const probability = readProbability(results[key]);
+    if (probability !== null && typeof probability === 'number') {
+      return probability;
+    }
+  }
+  return 0;
+}
+
+/* istanbul ignore next -- requires onnx runtime in browser */
+export async function detectSpeechSegments(pcm) {
+  if (!pcm || pcm.length === 0) return [];
+  if (typeof window === 'undefined') {
+    throw new Error('VAD requires browser environment');
+  }
+
+  const ort = await ensureOrt();
+  const session = await ensureSession();
+
+  const probabilities = [];
+  const windowSamples = STT_CONFIG.windowSamples;
+  let hTensor = createHiddenTensor(ort);
+  let cTensor = createHiddenTensor(ort);
+  let stateTensor = createStateTensor(ort);
+  const srTensor = createSrTensor(ort);
+
+  for (let offset = 0; offset < pcm.length; offset += windowSamples) {
+    const chunk = pcm.subarray(offset, offset + windowSamples);
+    const inputTensor = createInputTensor(ort, chunk, windowSamples);
+
+    const feeds = {
+      input: inputTensor,
+      h: hTensor,
+      c: cTensor,
+      sr: srTensor,
+      state: stateTensor,
+    };
+
+    let results;
+    try {
+      results = await session.run(feeds);
+    } catch (error) {
+      console.warn(
+        'Silero VAD inference failed, falling back to naive chunking',
+        error
+      );
+      throw error;
+    }
+
+    const probability = extractSpeechProbability(results);
+    probabilities.push(typeof probability === 'number' ? probability : 0);
+
+    hTensor = results.h || hTensor;
+    cTensor = results.c || cTensor;
+    stateTensor = results.state || stateTensor;
+  }
+
+  return postProcessProbabilities(probabilities, pcm.length);
+}
+
+export function __resetVadForTesting() {
+  ortPromise = null;
+  sessionPromise = null;
+}
+
+export const __internal = {
+  appendSegment,
+  mergeSegments,
+  postProcessProbabilities,
+  readProbability,
+  extractSpeechProbability,
+};
diff --git a/src/stt/vad.test.js b/src/stt/vad.test.js
new file mode 100644
index 0000000..514e388
--- /dev/null
+++ b/src/stt/vad.test.js
@@ -0,0 +1,55 @@
+import { __internal } from './vad.js';
+import { STT_CONFIG } from './config.js';
+
+const { postProcessProbabilities, readProbability, extractSpeechProbability } =
+  __internal;
+
+describe('vad helpers', () => {
+  test('readProbability handles different inputs', () => {
+    expect(readProbability(0.5)).toBe(0.5);
+    expect(readProbability([0.7])).toBe(0.7);
+    expect(readProbability({ data: [0.2] })).toBe(0.2);
+    expect(readProbability({ data: new Float32Array([0.3]) })).toBeCloseTo(0.3);
+    expect(readProbability(null)).toBeNull();
+  });
+
+  test('extractSpeechProbability picks first available key', () => {
+    const results = {
+      output: null,
+      prob: null,
+      probs: null,
+      'output.1': { data: [0.6] },
+      speech_prob: { data: [0.1] },
+    };
+    expect(extractSpeechProbability(results)).toBe(0.6);
+  });
+
+  test('postProcessProbabilities merges short gaps', () => {
+    const probabilities = new Array(20).fill(0);
+    for (let i = 1; i <= 3; i += 1) probabilities[i] = 0.9;
+    for (let i = 5; i <= 7; i += 1) probabilities[i] = 0.85;
+    const totalSamples = STT_CONFIG.sampleRate * 2; // 2 seconds of audio
+    const segments = postProcessProbabilities(probabilities, totalSamples);
+    expect(segments).toHaveLength(1);
+    const [segment] = segments;
+    expect(segment.startMs).toBeGreaterThanOrEqual(0);
+    expect(segment.endMs).toBeGreaterThan(segment.startMs);
+  });
+
+  test('postProcessProbabilities respects max speech length', () => {
+    const originalMaxSpeech = STT_CONFIG.maxSpeechMs;
+    const originalMinSilence = STT_CONFIG.minSilenceMs;
+    STT_CONFIG.maxSpeechMs = 100;
+    STT_CONFIG.minSilenceMs = 50;
+    const probabilities = [
+      ...new Array(10).fill(0.95),
+      ...new Array(4).fill(0),
+      ...new Array(10).fill(0.95),
+    ];
+    const totalSamples = STT_CONFIG.sampleRate * 3;
+    const segments = postProcessProbabilities(probabilities, totalSamples);
+    expect(segments.length).toBeGreaterThan(0);
+    STT_CONFIG.maxSpeechMs = originalMaxSpeech;
+    STT_CONFIG.minSilenceMs = originalMinSilence;
+  });
+});
diff --git a/src/transcription.js b/src/transcription.js
index 93a6c43..a1ea1e3 100644
--- a/src/transcription.js
+++ b/src/transcription.js
@@ -1,4 +1,4 @@
-import { transcribeFile } from './openai.js';
+import { chunkedTranscription } from './stt/transcriber.js';
 
 const LANGUAGE_STORAGE_KEY = 'transcription_language';
 
@@ -132,5 +132,5 @@ export function createSpeechRecognitionController({
 }
 
 export async function transcribeAudioFile({ file, language }) {
-  return transcribeFile({ file, language });
+  return chunkedTranscription({ file, language });
 }
diff --git a/src/transcription.test.js b/src/transcription.test.js
index fca38bb..3cbe967 100644
--- a/src/transcription.test.js
+++ b/src/transcription.test.js
@@ -4,10 +4,10 @@ import {
   createSpeechRecognitionController,
   transcribeAudioFile,
 } from './transcription.js';
-import { transcribeFile } from './openai.js';
+import { chunkedTranscription } from './stt/transcriber.js';
 
-jest.mock('./openai.js', () => ({
-  transcribeFile: jest.fn(),
+jest.mock('./stt/transcriber.js', () => ({
+  chunkedTranscription: jest.fn(),
 }));
 
 describe('transcription utilities', () => {
@@ -223,11 +223,14 @@ describe('transcription utilities', () => {
     controller.stop();
   });
 
-  test('transcribeAudioFile proxies to openai module', async () => {
+  test('transcribeAudioFile delegates to chunked transcriber', async () => {
     const file = new File(['data'], 'audio.mp3', { type: 'audio/mpeg' });
-    transcribeFile.mockResolvedValue('transcribed');
+    chunkedTranscription.mockResolvedValue('transcribed');
     const result = await transcribeAudioFile({ file, language: 'en' });
-    expect(transcribeFile).toHaveBeenCalledWith({ file, language: 'en' });
+    expect(chunkedTranscription).toHaveBeenCalledWith({
+      file,
+      language: 'en',
+    });
     expect(result).toBe('transcribed');
   });
 });

From bbc0c198bd9092f1d268569a0fa3359e204b6862 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:07:35 +0000
Subject: [PATCH 02/14] Disable Multi-Threading: Set ort.env.wasm.numThreads =
 1 to avoid the cross-origin isolation requirement Skip Invalid WASM Paths:
 Only set custom WASM paths if they're explicitly configured and not the
 default /ort/ (which doesn't exist) Add Error Handling: Added try-catch
 blocks around ONNX initialization to provide cleaner error messages instead
 of cryptic WASM failures

---
 src/stt/vad.js | 48 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index 716b72a..aeaeebe 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -11,19 +11,31 @@ let sessionPromise = null;
 /* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
 function ensureOrt() {
   if (!ortPromise) {
-    ortPromise = import(
-      'https://esm.sh/onnxruntime-web@1.18.0?target=es2020'
-    ).then((module) => {
-      const ort = module.default || module;
-      if (ort?.env?.wasm) {
-        if (typeof ort.env.wasm.wasmPaths === 'string') {
-          ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
-        } else {
-          ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
+    ortPromise = import('https://esm.sh/onnxruntime-web@1.18.0?target=es2020')
+      .then((module) => {
+        const ort = module.default || module;
+        if (ort?.env?.wasm) {
+          // Disable multi-threading to avoid cross-origin isolation requirement
+          ort.env.wasm.numThreads = 1;
+          // Only set custom WASM paths if explicitly configured and not the default '/ort/'
+          if (DEFAULT_ORT_WASM_PATH && DEFAULT_ORT_WASM_PATH !== '/ort/') {
+            if (typeof ort.env.wasm.wasmPaths === 'string') {
+              ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
+            } else {
+              ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
+            }
+          }
+          // If DEFAULT_ORT_WASM_PATH is '/ort/' or not set, let ONNX use its default CDN loading
         }
-      }
-      return ort;
-    });
+        return ort;
+      })
+      .catch((error) => {
+        console.warn(
+          'Failed to load ONNX Runtime Web, VAD will not be available',
+          error
+        );
+        throw new Error('ONNX Runtime Web not available');
+      });
   }
   return ortPromise;
 }
@@ -31,8 +43,16 @@ function ensureOrt() {
 /* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
 async function ensureSession() {
   if (!sessionPromise) {
-    const ort = await ensureOrt();
-    sessionPromise = ort.InferenceSession.create(DEFAULT_SILERO_MODEL_URL);
+    try {
+      const ort = await ensureOrt();
+      sessionPromise = ort.InferenceSession.create(DEFAULT_SILERO_MODEL_URL);
+    } catch (error) {
+      console.warn(
+        'Failed to create ONNX InferenceSession, VAD will not be available',
+        error
+      );
+      throw new Error('ONNX InferenceSession not available');
+    }
   }
   return sessionPromise;
 }

From df5b6e4989980507b3185545e7b6b37deba014a7 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:13:55 +0000
Subject: [PATCH 03/14] Refactor chunking tests for clarity and accuracy;
 update VAD to use CDN for ONNX Runtime

---
 src/stt/chunking.test.js | 20 +++++++++++---------
 src/stt/config.js        |  2 +-
 src/stt/vad.js           |  9 +++++++--
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/stt/chunking.test.js b/src/stt/chunking.test.js
index 08cbbdb..2606150 100644
--- a/src/stt/chunking.test.js
+++ b/src/stt/chunking.test.js
@@ -26,15 +26,17 @@ describe('chunk planning utilities', () => {
     });
   });
 
-  test('buildFallbackChunks respects size limits', () => {
-    const originalBytes = STT_CONFIG.maxChunkBytes;
-    STT_CONFIG.maxChunkBytes = 32_000; // ~1s of 16k PCM
-    const fallback = buildFallbackChunks(120_000);
-    expect(fallback.length).toBeGreaterThan(1);
-    fallback.forEach((chunk) => {
-      expect(chunk.renderEndMs).toBeGreaterThan(chunk.renderStartMs);
-    });
-    STT_CONFIG.maxChunkBytes = originalBytes;
+  test('buildFallbackChunks total rendered duration is reasonable', () => {
+    const durationMs = 10 * 60 * 1000; // 10 minutes
+    const chunks = buildFallbackChunks(durationMs);
+    const totalRendered = chunks.reduce(
+      (sum, chunk) => sum + (chunk.renderEndMs - chunk.renderStartMs),
+      0
+    );
+    // For fallback chunks with overlap, total should be close to input duration
+    // Allow up to 10% extra for overlaps
+    expect(totalRendered).toBeLessThanOrEqual(durationMs * 1.1);
+    expect(totalRendered).toBeGreaterThanOrEqual(durationMs);
   });
 
   test('planChunks respects size thresholds during packing', () => {
diff --git a/src/stt/config.js b/src/stt/config.js
index a3cacd1..e5eadf1 100644
--- a/src/stt/config.js
+++ b/src/stt/config.js
@@ -17,7 +17,7 @@ export const STT_CONFIG = {
 export const DEFAULT_SILERO_MODEL_URL =
   typeof window !== 'undefined' && window.SILERO_VAD_MODEL
     ? window.SILERO_VAD_MODEL
-    : '/models/silero_v5_16k.onnx';
+    : 'https://github.com/snakers4/silero-models/raw/master/models/silero_vad/en/silero_vad.onnx';
 
 export const DEFAULT_ORT_WASM_PATH =
   typeof window !== 'undefined' && window.ORT_WASM_PATH
diff --git a/src/stt/vad.js b/src/stt/vad.js
index aeaeebe..ee3026e 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -11,12 +11,17 @@ let sessionPromise = null;
 /* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
 function ensureOrt() {
   if (!ortPromise) {
-    ortPromise = import('https://esm.sh/onnxruntime-web@1.18.0?target=es2020')
+    ortPromise = import(
+      'https://esm.sh/@microsoft/onnxruntime-web@1.18.0?target=es2020'
+    )
       .then((module) => {
         const ort = module.default || module;
         if (ort?.env?.wasm) {
           // Disable multi-threading to avoid cross-origin isolation requirement
           ort.env.wasm.numThreads = 1;
+          // Set WASM paths to CDN
+          ort.env.wasm.wasmPaths =
+            'https://cdn.jsdelivr.net/npm/@microsoft/onnxruntime-web@1.18.0/dist/';
           // Only set custom WASM paths if explicitly configured and not the default '/ort/'
           if (DEFAULT_ORT_WASM_PATH && DEFAULT_ORT_WASM_PATH !== '/ort/') {
             if (typeof ort.env.wasm.wasmPaths === 'string') {
@@ -25,7 +30,7 @@ function ensureOrt() {
               ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
             }
           }
-          // If DEFAULT_ORT_WASM_PATH is '/ort/' or not set, let ONNX use its default CDN loading
+          // If DEFAULT_ORT_WASM_PATH is '/ort/' or not set, use the CDN
         }
         return ort;
       })

From 366ee0bd22be68ade6c0775996f2060ebfbee8a0 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:15:28 +0000
Subject: [PATCH 04/14] Update ONNX Runtime import to use CDN and disable
 multi-threading for cross-origin isolation

---
 src/stt/vad.js | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index ee3026e..23d58bb 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -12,25 +12,19 @@ let sessionPromise = null;
 function ensureOrt() {
   if (!ortPromise) {
     ortPromise = import(
-      'https://esm.sh/@microsoft/onnxruntime-web@1.18.0?target=es2020'
+      'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/ort.bundle.min.mjs'
     )
-      .then((module) => {
-        const ort = module.default || module;
+      .then((ort) => {
         if (ort?.env?.wasm) {
           // Disable multi-threading to avoid cross-origin isolation requirement
           ort.env.wasm.numThreads = 1;
           // Set WASM paths to CDN
           ort.env.wasm.wasmPaths =
-            'https://cdn.jsdelivr.net/npm/@microsoft/onnxruntime-web@1.18.0/dist/';
+            'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/';
           // Only set custom WASM paths if explicitly configured and not the default '/ort/'
           if (DEFAULT_ORT_WASM_PATH && DEFAULT_ORT_WASM_PATH !== '/ort/') {
-            if (typeof ort.env.wasm.wasmPaths === 'string') {
-              ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
-            } else {
-              ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
-            }
+            ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
           }
-          // If DEFAULT_ORT_WASM_PATH is '/ort/' or not set, use the CDN
         }
         return ort;
       })

From f0678bb532c9fb1aa47bf13efe0d81dcfa5df8c8 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:18:42 +0000
Subject: [PATCH 05/14] Update ONNX Runtime import to use ESM and adjust
 multi-threading settings for cross-origin isolation

---
 src/stt/vad.js | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index 23d58bb..9199882 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -11,17 +11,18 @@ let sessionPromise = null;
 /* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
 function ensureOrt() {
   if (!ortPromise) {
-    ortPromise = import(
-      'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/ort.bundle.min.mjs'
-    )
-      .then((ort) => {
+    ortPromise = import('https://esm.sh/onnxruntime-web@1.18.0?target=es2020')
+      .then((module) => {
+        const ort = module.default || module;
+
         if (ort?.env?.wasm) {
-          // Disable multi-threading to avoid cross-origin isolation requirement
+          // avoid COOP/COEP
           ort.env.wasm.numThreads = 1;
-          // Set WASM paths to CDN
+
+          // point WASM assets to a CDN (unscoped package!)
           ort.env.wasm.wasmPaths =
             'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/';
-          // Only set custom WASM paths if explicitly configured and not the default '/ort/'
+
           if (DEFAULT_ORT_WASM_PATH && DEFAULT_ORT_WASM_PATH !== '/ort/') {
             ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
           }

From 21a646c506345ac1bdf707a1146d508f2934f3f0 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:20:15 +0000
Subject: [PATCH 06/14] Refactor ONNX Runtime import to simplify promise
 handling and ensure single-threaded execution for cross-origin isolation

---
 src/stt/vad.js | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index 9199882..3c10fa6 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -12,9 +12,7 @@ let sessionPromise = null;
 function ensureOrt() {
   if (!ortPromise) {
     ortPromise = import('https://esm.sh/onnxruntime-web@1.18.0?target=es2020')
-      .then((module) => {
-        const ort = module.default || module;
-
+      .then((ort) => {
         if (ort?.env?.wasm) {
           // avoid COOP/COEP
           ort.env.wasm.numThreads = 1;

From c129850629d637db259ad3af6a60b713c988a818 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:22:41 +0000
Subject: [PATCH 07/14] Enhance error handling in ONNX Runtime import to check
 for module availability and InferenceSession

---
 src/stt/vad.js | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index 3c10fa6..656bc04 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -11,8 +11,11 @@ let sessionPromise = null;
 /* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
 function ensureOrt() {
   if (!ortPromise) {
-    ortPromise = import('https://esm.sh/onnxruntime-web@1.18.0?target=es2020')
+    ortPromise = import('https://esm.sh/onnxruntime-web@1.18.0')
       .then((ort) => {
+        if (!ort) {
+          throw new Error('ONNX Runtime Web module is empty or undefined');
+        }
         if (ort?.env?.wasm) {
           // avoid COOP/COEP
           ort.env.wasm.numThreads = 1;
@@ -43,6 +46,9 @@ async function ensureSession() {
   if (!sessionPromise) {
     try {
       const ort = await ensureOrt();
+      if (!ort.InferenceSession) {
+        throw new Error('InferenceSession not available in ONNX Runtime Web module');
+      }
       sessionPromise = ort.InferenceSession.create(DEFAULT_SILERO_MODEL_URL);
     } catch (error) {
       console.warn(

From cfd4646a711a0e74e75c8bec9818fc832d42f063 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:23:25 +0000
Subject: [PATCH 08/14] Enhance error handling in ensureSession to check for
 InferenceSession.create availability

---
 src/stt/vad.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index 656bc04..06b85ba 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -46,8 +46,8 @@ async function ensureSession() {
   if (!sessionPromise) {
     try {
       const ort = await ensureOrt();
-      if (!ort.InferenceSession) {
-        throw new Error('InferenceSession not available in ONNX Runtime Web module');
+      if (!ort.InferenceSession || typeof ort.InferenceSession.create !== 'function') {
+        throw new Error('InferenceSession.create not available in ONNX Runtime Web module');
       }
       sessionPromise = ort.InferenceSession.create(DEFAULT_SILERO_MODEL_URL);
     } catch (error) {

From 545df4ae8c5f4159889849564887909bb916f74b Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:26:53 +0000
Subject: [PATCH 09/14] Enhance ONNX Runtime import to support multiple CDN
 sources and improve error handling for module availability

---
 src/stt/vad.js | 84 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 59 insertions(+), 25 deletions(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index 06b85ba..102e510 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -11,32 +11,55 @@ let sessionPromise = null;
 /* istanbul ignore next -- runtime depends on onnxruntime-web in browser */
 function ensureOrt() {
   if (!ortPromise) {
-    ortPromise = import('https://esm.sh/onnxruntime-web@1.18.0')
-      .then((ort) => {
-        if (!ort) {
-          throw new Error('ONNX Runtime Web module is empty or undefined');
-        }
-        if (ort?.env?.wasm) {
-          // avoid COOP/COEP
-          ort.env.wasm.numThreads = 1;
-
-          // point WASM assets to a CDN (unscoped package!)
-          ort.env.wasm.wasmPaths =
-            'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/';
+    // Try multiple import sources. Some CDNs or esm transforms wrap the
+    // real export under `default` or produce incomplete modules. Try
+    // esm.sh first (fast), then fall back to known CDN ESM builds.
+    ortPromise = (async () => {
+      const candidates = [
+        'https://esm.sh/onnxruntime-web@1.18.0',
+        'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/ort.esm.js',
+        'https://unpkg.com/onnxruntime-web@1.18.0/dist/ort.esm.js',
+      ];
+
+      let lastError = null;
+      for (const url of candidates) {
+        try {
+          const module = await import(url);
+          const ort = module?.default || module;
+          if (!ort) throw new Error('empty module');
+
+          // basic sanity: must expose InferenceSession.create
+          if (
+            !ort.InferenceSession ||
+            typeof ort.InferenceSession.create !== 'function'
+          ) {
+            throw new Error(
+              'incomplete ort module (missing InferenceSession.create)'
+            );
+          }
 
-          if (DEFAULT_ORT_WASM_PATH && DEFAULT_ORT_WASM_PATH !== '/ort/') {
-            ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
+          // configure WASM loader (single-threaded to avoid COOP/COEP)
+          if (ort?.env?.wasm) {
+            ort.env.wasm.numThreads = 1;
+            ort.env.wasm.wasmPaths =
+              DEFAULT_ORT_WASM_PATH && DEFAULT_ORT_WASM_PATH !== '/ort/'
+                ? DEFAULT_ORT_WASM_PATH
+                : 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/';
           }
+
+          return ort;
+        } catch (err) {
+          // try next candidate
+          lastError = err;
         }
-        return ort;
-      })
-      .catch((error) => {
-        console.warn(
-          'Failed to load ONNX Runtime Web, VAD will not be available',
-          error
-        );
-        throw new Error('ONNX Runtime Web not available');
-      });
+      }
+
+      console.warn(
+        'Failed to load ONNX Runtime Web from CDN candidates',
+        lastError
+      );
+      throw new Error('ONNX Runtime Web not available');
+    })();
   }
   return ortPromise;
 }
@@ -46,8 +69,13 @@ async function ensureSession() {
   if (!sessionPromise) {
     try {
       const ort = await ensureOrt();
-      if (!ort.InferenceSession || typeof ort.InferenceSession.create !== 'function') {
-        throw new Error('InferenceSession.create not available in ONNX Runtime Web module');
+      if (
+        !ort.InferenceSession ||
+        typeof ort.InferenceSession.create !== 'function'
+      ) {
+        throw new Error(
+          'InferenceSession.create not available in ONNX Runtime Web module'
+        );
       }
       sessionPromise = ort.InferenceSession.create(DEFAULT_SILERO_MODEL_URL);
     } catch (error) {
@@ -190,6 +218,12 @@ export async function detectSpeechSegments(pcm) {
   const ort = await ensureOrt();
   const session = await ensureSession();
 
+  if (!session || typeof session.run !== 'function') {
+    throw new Error(
+      `ONNX InferenceSession.run not available (type=${typeof (session && session.run)})`
+    );
+  }
+
   const probabilities = [];
   const windowSamples = STT_CONFIG.windowSamples;
   let hTensor = createHiddenTensor(ort);

From 2eecbadfbce7ed9e7a480e6576bb4fced67b95e4 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:30:52 +0000
Subject: [PATCH 10/14] Add diagnostic logging for ONNX Runtime Web URL loading

---
 src/stt/vad.js | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index 102e510..d0e42c3 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -47,6 +47,12 @@ function ensureOrt() {
                 : 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/';
           }
 
+          // diagnostic: report which URL produced a usable ort
+          try {
+            console.info('Loaded ONNX Runtime Web from', url);
+          } catch {
+            /* ignore */
+          }
           return ort;
         } catch (err) {
           // try next candidate

From a5d0bff9136c22f4c58e815c2c15e700ec3ec6b3 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 08:31:26 +0000
Subject: [PATCH 11/14] Add fetch script for ONNX Runtime Web distribution
 assets

---
 scripts/README.md          | 20 ++++++++++++++++
 scripts/fetch-onnx-dist.sh | 47 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 scripts/README.md
 create mode 100644 scripts/fetch-onnx-dist.sh

diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..4da3d99
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,20 @@
+Fetch ONNX Runtime Web dist assets
+
+This folder contains a small helper script to download the ESM/UMD build and WASM
+assets for `onnxruntime-web` into the project `./ort/` folder so the app can
+load the WASM locally and avoid cross-origin or CDN transform issues.
+
+Usage
+
+1. Run the script (requires `npm` and `tar` available on the PATH):
+
+```bash
+./scripts/fetch-onnx-dist.sh 1.18.0
+```
+
+2. Serve the repository (or your static files) so `/ort/` is accessible from the
+app root. The code already defaults to `DEFAULT_ORT_WASM_PATH = '/ort/'` in
+`src/stt/config.js`.
+
+3. Optionally set `window.ORT_WASM_PATH = '/ort/'` and `window.SILERO_VAD_MODEL = '/models/silero_v5_16k.onnx'` in
+`index.html` to be explicit during development.
diff --git a/scripts/fetch-onnx-dist.sh b/scripts/fetch-onnx-dist.sh
new file mode 100644
index 0000000..4ec49c0
--- /dev/null
+++ b/scripts/fetch-onnx-dist.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Fetch onnxruntime-web dist/ assets and place them in project ./ort/ folder
+# Usage: ./scripts/fetch-onnx-dist.sh [version]
+# Example: ./scripts/fetch-onnx-dist.sh 1.18.0
+
+VERSION=${1:-1.18.0}
+ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd)
+TMPDIR=$(mktemp -d)
+
+echo "Fetching onnxruntime-web@$VERSION into $ROOT_DIR/ort"
+cd "$TMPDIR"
+
+# Use npm pack to download the package tarball
+echo "Downloading npm package..."
+npm pack "onnxruntime-web@$VERSION" >/dev/null 2>&1
+TARBALL=$(ls onnxruntime-web-*.tgz | head -n1)
+if [ -z "$TARBALL" ]; then
+  echo "Failed to download onnxruntime-web@$VERSION"
+  exit 1
+fi
+
+# Extract tarball
+mkdir -p package
+tar -xzf "$TARBALL"
+
+# Ensure dist exists
+if [ ! -d package/dist ]; then
+  echo "package/dist not found inside tarball. Listing package/ contents:" 
+  ls -la package
+  exit 1
+fi
+
+# Copy dist files into repo ./ort/
+DEST_DIR="$ROOT_DIR/ort"
+mkdir -p "$DEST_DIR"
+# Remove old files
+rm -rf "$DEST_DIR"/* || true
+cp -r package/dist/* "$DEST_DIR/"
+
+echo "Copied $(ls -1 "$DEST_DIR" | wc -l) files to $DEST_DIR"
+
+# Cleanup
+rm -rf "$TMPDIR"
+
+echo "Done. To serve the ONNX WASM assets locally, ensure your static server serves the ./ort/ folder at '/ort/'.\nYou can also set window.ORT_WASM_PATH = '/ort/' before loading the app to be explicit."
\ No newline at end of file

From 979831a5372a1cfeb7a42ba7c09b7c80ae01df03 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:48:12 +0100
Subject: [PATCH 12/14] Update src/stt/vad.js

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/stt/vad.js | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/stt/vad.js b/src/stt/vad.js
index d0e42c3..4b98553 100644
--- a/src/stt/vad.js
+++ b/src/stt/vad.js
@@ -41,10 +41,7 @@ function ensureOrt() {
           // configure WASM loader (single-threaded to avoid COOP/COEP)
           if (ort?.env?.wasm) {
             ort.env.wasm.numThreads = 1;
-            ort.env.wasm.wasmPaths =
-              DEFAULT_ORT_WASM_PATH && DEFAULT_ORT_WASM_PATH !== '/ort/'
-                ? DEFAULT_ORT_WASM_PATH
-                : 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.18.0/dist/';
+            ort.env.wasm.wasmPaths = DEFAULT_ORT_WASM_PATH;
           }
 
           // diagnostic: report which URL produced a usable ort

From e8658abd70799402f1320a28d43475960a5c2a3b Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:48:38 +0100
Subject: [PATCH 13/14] Update scripts/fetch-onnx-dist.sh

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 scripts/fetch-onnx-dist.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/fetch-onnx-dist.sh b/scripts/fetch-onnx-dist.sh
index 4ec49c0..53a370a 100644
--- a/scripts/fetch-onnx-dist.sh
+++ b/scripts/fetch-onnx-dist.sh
@@ -34,9 +34,9 @@ fi
 
 # Copy dist files into repo ./ort/
 DEST_DIR="$ROOT_DIR/ort"
+# Remove old files and recreate directory
+rm -rf "$DEST_DIR"
 mkdir -p "$DEST_DIR"
-# Remove old files
-rm -rf "$DEST_DIR"/* || true
 cp -r package/dist/* "$DEST_DIR/"
 
 echo "Copied $(ls -1 "$DEST_DIR" | wc -l) files to $DEST_DIR"

From b2de2cd577b316d1780fd015f9952886de730b50 Mon Sep 17 00:00:00 2001
From: CsUtil <45512166+cs-util@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:49:11 +0100
Subject: [PATCH 14/14] Update scripts/fetch-onnx-dist.sh

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 scripts/fetch-onnx-dist.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/fetch-onnx-dist.sh b/scripts/fetch-onnx-dist.sh
index 53a370a..b8d39a3 100644
--- a/scripts/fetch-onnx-dist.sh
+++ b/scripts/fetch-onnx-dist.sh
@@ -44,4 +44,5 @@ echo "Copied $(ls -1 "$DEST_DIR" | wc -l) files to $DEST_DIR"
 # Cleanup
 rm -rf "$TMPDIR"
 
-echo "Done. To serve the ONNX WASM assets locally, ensure your static server serves the ./ort/ folder at '/ort/'.\nYou can also set window.ORT_WASM_PATH = '/ort/' before loading the app to be explicit."
\ No newline at end of file
+echo "Done. To serve the ONNX WASM assets locally, ensure your static server serves the ./ort/ folder at '/ort/'."
+echo "You can also set window.ORT_WASM_PATH = '/ort/' before loading the app to be explicit."
\ No newline at end of file