Skip to content

Commit e95026a

Browse files
G-Fourteenclaude
andcommitted
Fix TTS cutting off long text - split into chunks and concatenate
kokoro-js truncates text longer than ~400-500 characters. Fix: - Split long text at sentence/clause boundaries (max 400 chars per chunk) - Generate audio for each chunk sequentially - Concatenate all audio chunks into single buffer - Send complete audio to main thread Example: 953 char text → 3 chunks → 3 audio segments → 1 concatenated audio 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 7ee0eed commit e95026a

1 file changed

Lines changed: 90 additions & 17 deletions

File tree

web/kokoro-worker.js

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -117,35 +117,108 @@ async function handleGenerate(id, data) {
117117
}
118118
}
119119

120+
// Split long text into chunks at sentence boundaries
121+
function splitTextIntoChunks(text, maxChars = 400) {
122+
if (text.length <= maxChars) return [text];
123+
124+
const chunks = [];
125+
let remaining = text;
126+
127+
while (remaining.length > 0) {
128+
if (remaining.length <= maxChars) {
129+
chunks.push(remaining);
130+
break;
131+
}
132+
133+
// Find a good break point (end of sentence or clause)
134+
let breakPoint = -1;
135+
const breakChars = ['. ', '! ', '? ', ', ', '; ', ': ', ' - '];
136+
137+
for (const bc of breakChars) {
138+
const lastIdx = remaining.lastIndexOf(bc, maxChars);
139+
if (lastIdx > breakPoint && lastIdx > maxChars * 0.5) {
140+
breakPoint = lastIdx + bc.length;
141+
}
142+
}
143+
144+
// If no good break point, break at space
145+
if (breakPoint === -1) {
146+
breakPoint = remaining.lastIndexOf(' ', maxChars);
147+
}
148+
149+
// If still no break point, force break
150+
if (breakPoint === -1 || breakPoint < maxChars * 0.3) {
151+
breakPoint = maxChars;
152+
}
153+
154+
chunks.push(remaining.substring(0, breakPoint).trim());
155+
remaining = remaining.substring(breakPoint).trim();
156+
}
157+
158+
console.log(`[WORKER] Split text into ${chunks.length} chunks: ${chunks.map(c => c.length).join(', ')} chars`);
159+
return chunks;
160+
}
161+
162+
// Concatenate multiple Float32Arrays
163+
function concatenateAudio(audioArrays, sampleRate) {
164+
const totalLength = audioArrays.reduce((sum, arr) => sum + arr.length, 0);
165+
const result = new Float32Array(totalLength);
166+
let offset = 0;
167+
for (const arr of audioArrays) {
168+
result.set(arr, offset);
169+
offset += arr.length;
170+
}
171+
return result;
172+
}
173+
120174
async function doGenerate(id, data) {
121175
const { text, voice, speed } = data;
122-
console.log(`[WORKER] Generating: text="${text?.substring(0, 50)}...", voice=${voice}, speed=${speed}`);
176+
console.log(`[WORKER] Generating: text="${text?.substring(0, 50)}...", voice=${voice}, speed=${speed}, length=${text?.length}`);
123177

124178
try {
125179
self.postMessage({ type: 'generating', id });
126180

127181
const startTime = Date.now();
128182

129-
// Add timeout to prevent infinite hang
130-
const timeoutPromise = new Promise((_, reject) => {
131-
setTimeout(() => reject(new Error('Generation timeout (60s)')), 60000);
132-
});
183+
// Split long text into chunks to avoid kokoro-js truncation
184+
const chunks = splitTextIntoChunks(text, 400);
185+
const audioChunks = [];
186+
let sampleRate = 24000;
187+
188+
for (let i = 0; i < chunks.length; i++) {
189+
const chunk = chunks[i];
190+
console.log(`[WORKER] Generating chunk ${i + 1}/${chunks.length}: ${chunk.length} chars`);
191+
192+
// Add timeout to prevent infinite hang (30s per chunk)
193+
const timeoutPromise = new Promise((_, reject) => {
194+
setTimeout(() => reject(new Error(`Chunk ${i + 1} timeout (30s)`)), 30000);
195+
});
133196

134-
const audio = await Promise.race([
135-
tts.generate(text, {
136-
voice: voice || DEFAULT_VOICE,
137-
speed: speed || 1.0
138-
}),
139-
timeoutPromise
140-
]);
197+
const audio = await Promise.race([
198+
tts.generate(chunk, {
199+
voice: voice || DEFAULT_VOICE,
200+
speed: speed || 1.0
201+
}),
202+
timeoutPromise
203+
]);
204+
205+
if (audio && audio.audio) {
206+
audioChunks.push(new Float32Array(audio.audio));
207+
sampleRate = audio.sampling_rate || 24000;
208+
console.log(`[WORKER] Chunk ${i + 1} done: ${audio.audio.length} samples`);
209+
} else {
210+
console.warn(`[WORKER] Chunk ${i + 1} produced no audio`);
211+
}
212+
}
141213

142214
const genTime = Date.now() - startTime;
143-
console.log(`[WORKER] Generation complete in ${genTime}ms, hasAudio=${!!audio}, hasAudioData=${!!(audio?.audio)}`);
215+
console.log(`[WORKER] All chunks complete in ${genTime}ms, ${audioChunks.length} audio segments`);
144216

145-
if (audio && audio.audio) {
146-
const audioBuffer = audio.audio.buffer.slice(0);
147-
const sampleRate = audio.sampling_rate || 24000;
148-
const duration = audioBuffer.byteLength / 4 / sampleRate; // Float32 = 4 bytes
217+
if (audioChunks.length > 0) {
218+
// Concatenate all audio chunks
219+
const fullAudio = concatenateAudio(audioChunks, sampleRate);
220+
const audioBuffer = fullAudio.buffer.slice(0);
221+
const duration = fullAudio.length / sampleRate;
149222

150223
console.log(`[WORKER] Audio: ${audioBuffer.byteLength} bytes, ${sampleRate}Hz, ${duration.toFixed(2)}s`);
151224

0 commit comments

Comments
 (0)