From a4b821025af836be26c782c2e41b1ebec4d48a7c Mon Sep 17 00:00:00 2001 From: oratis Date: Sat, 27 Jun 2026 00:52:52 +0800 Subject: [PATCH 1/2] feat(voice): ElevenLabs Scribe as the primary ASR provider Co-Authored-By: Claude Opus 4.8 --- src/voice/transcribe.test.ts | 81 ++++++++++++++++++++++++++++++++++++ src/voice/transcribe.ts | 63 ++++++++++++++++++++++++---- 2 files changed, 137 insertions(+), 7 deletions(-) create mode 100644 src/voice/transcribe.test.ts diff --git a/src/voice/transcribe.test.ts b/src/voice/transcribe.test.ts new file mode 100644 index 0000000..58824ba --- /dev/null +++ b/src/voice/transcribe.test.ts @@ -0,0 +1,81 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { transcribeAudio } from "./transcribe.js"; + +async function withEnv( + key: string, + value: string | undefined, + fn: () => Promise, +): Promise { + const saved = process.env[key]; + if (value === undefined) delete process.env[key]; + else process.env[key] = value; + try { + await fn(); + } finally { + if (saved === undefined) delete process.env[key]; + else process.env[key] = saved; + } +} + +test("no key configured β†’ error names BOTH providers", async () => { + await withEnv("ELEVENLABS_API_KEY", undefined, () => + withEnv("OPENAI_API_KEY", undefined, async () => { + await assert.rejects( + () => transcribeAudio({ audioPath: "/no/such/file.webm" }), + /ELEVENLABS_API_KEY[\s\S]*OPENAI_API_KEY/, + ); + }), + ); +}); + +test("ElevenLabs is preferred and POSTs the file with xi-api-key", async () => { + const tmp = path.join(os.tmpdir(), `lisa-asr-${process.pid}.webm`); + fs.writeFileSync(tmp, Buffer.from([0x1a, 0x45, 0xdf, 0xa3])); // a few bytes + const realFetch = globalThis.fetch; + let calledUrl = ""; + let sentKey: unknown; + let sentFile = false; + + globalThis.fetch = (async (url: unknown, init: { headers?: Record; body?: unknown }) => { + calledUrl = String(url); + sentKey = init?.headers?.["xi-api-key"]; + sentFile = init?.body instanceof FormData && (init.body as FormData).has("file"); + return new Response(JSON.stringify({ text: "hello world" }), { status: 200 }); + }) as typeof fetch; + + try { + await withEnv("ELEVENLABS_API_KEY", "sk_test_key", async () => { + const text = await transcribeAudio({ audioPath: tmp }); + assert.equal(text, "hello world"); + assert.match(calledUrl, /api\.elevenlabs\.io\/v1\/speech-to-text$/); + assert.equal(sentKey, "sk_test_key"); + assert.ok(sentFile, "posts a `file` field in multipart FormData"); + }); + } finally { + globalThis.fetch = realFetch; + fs.rmSync(tmp, { force: true }); + } +}); + +test("ElevenLabs non-2xx surfaces a useful error", async () => { + const tmp = path.join(os.tmpdir(), `lisa-asr-err-${process.pid}.webm`); + fs.writeFileSync(tmp, Buffer.from([1, 2, 3])); + const realFetch = globalThis.fetch; + globalThis.fetch = (async () => + new Response("invalid_api_key", { status: 401 })) as typeof fetch; + try { + await withEnv("ELEVENLABS_API_KEY", "sk_bad", async () => { + await assert.rejects( + () => transcribeAudio({ audioPath: tmp }), + /ElevenLabs transcription failed \(401\)/, + ); + }); + } finally { + globalThis.fetch = realFetch; + fs.rmSync(tmp, { force: true }); + } +}); diff --git a/src/voice/transcribe.ts b/src/voice/transcribe.ts index 353c345..8f6c498 100644 --- a/src/voice/transcribe.ts +++ b/src/voice/transcribe.ts @@ -1,22 +1,71 @@ import fs from "node:fs"; +import path from "node:path"; import OpenAI from "openai"; export interface TranscribeOptions { audioPath: string; + /** OpenAI Whisper model override (ignored by the ElevenLabs path). */ model?: string; + /** OpenAI key override (back-compat); ElevenLabs uses ELEVENLABS_API_KEY. */ apiKey?: string; } +/** + * Transcribe a recorded audio file to text. + * + * Provider order: ElevenLabs Scribe (ELEVENLABS_API_KEY) β†’ OpenAI Whisper + * (OPENAI_API_KEY / opts.apiKey). The signature is unchanged so callers don't + * care which provider runs. + */ export async function transcribeAudio(opts: TranscribeOptions): Promise { - if (!process.env.OPENAI_API_KEY && !opts.apiKey) { - throw new Error( - "Voice transcription needs OPENAI_API_KEY (uses OpenAI Whisper).", - ); + const elevenKey = process.env.ELEVENLABS_API_KEY; + if (elevenKey) { + return transcribeWithElevenLabs(opts.audioPath, elevenKey); } - const client = new OpenAI({ apiKey: opts.apiKey }); + const openaiKey = opts.apiKey ?? process.env.OPENAI_API_KEY; + if (openaiKey) { + return transcribeWithOpenAI(opts.audioPath, openaiKey, opts.model); + } + throw new Error( + "Voice transcription needs ELEVENLABS_API_KEY (ElevenLabs Scribe) or OPENAI_API_KEY (OpenAI Whisper).", + ); +} + +async function transcribeWithOpenAI( + audioPath: string, + apiKey: string, + model?: string, +): Promise { + const client = new OpenAI({ apiKey }); const result = await client.audio.transcriptions.create({ - model: opts.model ?? "whisper-1", - file: fs.createReadStream(opts.audioPath), + model: model ?? "whisper-1", + file: fs.createReadStream(audioPath), }); return result.text; } + +/** + * ElevenLabs Scribe speech-to-text β€” POST /v1/speech-to-text, multipart `file` + + * `model_id`, authed with the `xi-api-key` header. Returns `{ text }`. + */ +async function transcribeWithElevenLabs(audioPath: string, apiKey: string): Promise { + const buf = await fs.promises.readFile(audioPath); + const form = new FormData(); + form.append("file", new Blob([buf]), path.basename(audioPath) || "audio.webm"); + form.append("model_id", process.env.ELEVENLABS_STT_MODEL || "scribe_v1"); + + const res = await fetch("https://api.elevenlabs.io/v1/speech-to-text", { + method: "POST", + headers: { "xi-api-key": apiKey }, + body: form, + }); + if (!res.ok) { + const detail = (await res.text().catch(() => "")).slice(0, 200); + throw new Error(`ElevenLabs transcription failed (${res.status})${detail ? `: ${detail}` : ""}`); + } + const json = (await res.json().catch(() => ({}))) as { text?: string }; + if (typeof json.text !== "string") { + throw new Error("ElevenLabs returned no transcript text."); + } + return json.text; +} From 165afc49d83b49dd2c5975bf67edcf01bcf6a26b Mon Sep 17 00:00:00 2001 From: oratis Date: Sat, 27 Jun 2026 00:52:58 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix(web):=20composer=20+/mic=20icons=20?= =?UTF-8?q?=E2=86=92=20line-style=20SVGs=20matching=20the=20function=20bar?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- src/web/lisa-css.ts | 6 +++--- src/web/lisa-html-snapshot.test.ts | 6 ++++-- src/web/lisa-html.ts | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/web/lisa-css.ts b/src/web/lisa-css.ts index 37572c2..7c8417b 100644 --- a/src/web/lisa-css.ts +++ b/src/web/lisa-css.ts @@ -1102,15 +1102,15 @@ export const MAIN_CSS = ` :root { justify-content: center; background: transparent; border: 0; - color: var(--fg-3); - font-size: 17px; + color: var(--fg-2); cursor: pointer; border-radius: 10px; transition: background 120ms ease, color 120ms ease; min-height: 44px; padding: 0; } - #plusBtn { font-size: 22px; } + /* Line-style icons matching the .fbtn function bar above. */ + #plusBtn svg, #recordBtn svg { width: 19px; height: 19px; display: block; } #plusBtn:hover, #recordBtn:hover { background: var(--bg-card); color: var(--fg); } #plusBtn.flash { background: var(--accent); color: var(--bg-deep); } diff --git a/src/web/lisa-html-snapshot.test.ts b/src/web/lisa-html-snapshot.test.ts index fc47fd5..4bd33af 100644 --- a/src/web/lisa-html-snapshot.test.ts +++ b/src/web/lisa-html-snapshot.test.ts @@ -35,10 +35,12 @@ import { MAIN_HTML } from "./lisa-html.js"; * (browser counterpart to `lisa pair`), with its .pair-row CSS. * Then: a scannable QR (server-rendered SVG from /api/pair/start) at the top of * that modal, with .pair-qr CSS. + * Then: composer οΌ‹ / πŸŽ™ glyphs β†’ line-style SVG icons matching the .fbtn + * function bar (+ #plusBtn/#recordBtn svg sizing; resting color β†’ --fg-2). */ -const EXPECTED_LENGTH = 150733; +const EXPECTED_LENGTH = 151331; const EXPECTED_SHA256 = - "f7adb8e271d8a3984a41810783f5e9918ad9c8b78b9d460cb5e3f7b122a2d6d8"; + "72098d77767d32d5ca646a7aee675cc0cf4419f70956380cd18aa56d4ba9e19f"; test("MAIN_HTML length is byte-identical to the pre-split snapshot", () => { assert.equal(MAIN_HTML.length, EXPECTED_LENGTH); diff --git a/src/web/lisa-html.ts b/src/web/lisa-html.ts index 0339f8b..89b9866 100644 --- a/src/web/lisa-html.ts +++ b/src/web/lisa-html.ts @@ -170,13 +170,13 @@ ${MAIN_CSS}
- +
- +