From 2239c1773350accef382ee2e8317710c2c9864f5 Mon Sep 17 00:00:00 2001 From: ubinatus <51177379+ubinatus@users.noreply.github.com> Date: Mon, 11 May 2026 09:18:37 +0800 Subject: [PATCH] fix(core): don't mark FallbackAdapter primary unavailable on empty LLM turns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the LLM emits a turn with zero spoken text (e.g. a tool-only turn), `FallbackSynthesizeStream` would throw `APIConnectionError("TTS stream completed but no audio was received")`, mark the first provider unavailable, and cascade through the fallback chain — ultimately raising `all TTS instances failed`. The empty audio is the correct response because nothing was sent to synthesize. Track non-sentinel tokens received from the input LLM stream. In the `!sawRawAudio` branch, await the input stream so the count is settled and, if zero real tokens were received, treat the empty result as a clean no-op exit (emit END_OF_STREAM downstream and return) instead of throwing. Silent failures with real text still trigger fallback. Adds a unit test that closes the input without enqueuing any text and asserts no provider is marked unavailable. --- .changeset/fallback-tts-empty-token-stream.md | 5 ++ agents/src/tts/fallback_adapter.test.ts | 46 +++++++++++++++++++ agents/src/tts/fallback_adapter.ts | 16 +++++++ 3 files changed, 67 insertions(+) create mode 100644 .changeset/fallback-tts-empty-token-stream.md diff --git a/.changeset/fallback-tts-empty-token-stream.md b/.changeset/fallback-tts-empty-token-stream.md new file mode 100644 index 000000000..9bab7a8d8 --- /dev/null +++ b/.changeset/fallback-tts-empty-token-stream.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents': patch +--- + +Prevent `FallbackAdapter` from marking the primary TTS unavailable and cascading through the fallback chain when the LLM emits a turn with zero text tokens (e.g. a tool-only turn). The empty audio response is the correct result when nothing was sent to synthesize, so it is now treated as a clean no-op exit instead of a silent provider failure. diff --git a/agents/src/tts/fallback_adapter.test.ts b/agents/src/tts/fallback_adapter.test.ts index 8c972acd5..79558f758 100644 --- a/agents/src/tts/fallback_adapter.test.ts +++ b/agents/src/tts/fallback_adapter.test.ts @@ -227,4 +227,50 @@ describe('TTS FallbackAdapter', () => { await adapter.close(); }); + + it('should not mark the primary unavailable when the LLM emits zero text tokens', async () => { + // A tool-only LLM turn closes the input stream without pushing any text. + // The primary returns no audio because there was nothing to synthesize. + // The adapter should treat this as a clean no-op exit and leave the + // primary available, rather than marking it unavailable and cascading + // through the fallback chain. + const primary = new MockTTS('primary'); + const secondary = new MockTTS('secondary'); + const adapter = new FallbackAdapter({ + ttsInstances: [primary, secondary], + maxRetryPerTTS: 0, + recoveryDelayMs: 60_000, + }); + + const stream = adapter.stream(); + stream.updateInputStream( + new ReadableStream({ + start(controller) { + controller.close(); + }, + }), + ); + + const iterate = (async () => { + let frameCount = 0; + for await (const event of stream) { + if (event === SynthesizeStream.END_OF_STREAM) break; + frameCount++; + } + return frameCount; + })(); + + const timeout = new Promise((_, reject) => + setTimeout(() => reject(new Error('fallback adapter deadlocked')), 3000), + ); + + const frameCount = await Promise.race([iterate, timeout]); + + expect(frameCount).toBe(0); + expect(adapter.status[0]!.available).toBe(true); + expect(adapter.status[1]!.available).toBe(true); + + stream.close(); + await adapter.close(); + }); }); diff --git a/agents/src/tts/fallback_adapter.ts b/agents/src/tts/fallback_adapter.ts index 83a6c3b00..26d389012 100644 --- a/agents/src/tts/fallback_adapter.ts +++ b/agents/src/tts/fallback_adapter.ts @@ -425,11 +425,15 @@ class FallbackSynthesizeStream extends SynthesizeStream { if (allTTSFailed) { this._logger.warn('All fallback TTS instances failed, retrying from first...'); } + let realTokensReceived = 0; const readInputLLMStream = (async () => { try { for await (const input of this.input) { if (this.abortController.signal.aborted) break; this.tokenBuffer.push(input); + if (input !== SynthesizeStream.FLUSH_SENTINEL) { + realTokensReceived += 1; + } } } catch (error) { this._logger.debug({ error }, 'Error reading input LLM stream'); @@ -565,6 +569,18 @@ class FallbackSynthesizeStream extends SynthesizeStream { // Silent failures must trigger fallback. See `sawRawAudio` above for // why we don't check `audioPushed` here. if (!sawRawAudio) { + // Wait for the input LLM stream to settle so we can distinguish + // "the LLM emitted no text" from "TTS failed before forward could + // push anything". If the LLM emitted zero non-sentinel tokens, + // empty audio is the correct response — nothing was synthesizable + // (e.g. an LLM turn whose only content was a tool call). Treat as + // a clean no-op exit instead of marking the provider unavailable + // and cascading through the fallback chain. + await readInputLLMStream.catch(() => {}); + if (realTokensReceived === 0) { + this.queue.put(SynthesizeStream.END_OF_STREAM); + return; + } throw new APIConnectionError({ message: 'TTS stream completed but no audio was received', });