Skip to content

Commit 8d86530

Browse files
committed
feat: enhance TTS generation and voice resolution with support for custom models and improved audio format detection
1 parent e5addf6 commit 8d86530

2 files changed

Lines changed: 58 additions & 5 deletions

File tree

lib/audio/tts-providers.ts

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,21 @@ export async function generateTTS(
172172

173173
/**
174174
* OpenAI TTS implementation (direct API call with explicit UTF-8 encoding)
175+
* Supports both official OpenAI TTS and OpenAI-compatible TTS APIs
175176
*/
176177
async function generateOpenAITTS(
177178
config: TTSModelConfig,
178179
text: string,
179180
): Promise<TTSGenerationResult> {
180-
const baseUrl = config.baseUrl || TTS_PROVIDERS['openai-tts'].defaultBaseUrl;
181+
// Determine baseUrl based on provider type
182+
let baseUrl: string;
183+
184+
if (config.baseUrl) {
185+
baseUrl = config.baseUrl;
186+
} else {
187+
// For official OpenAI TTS, use default baseUrl
188+
baseUrl = TTS_PROVIDERS['openai-tts'].defaultBaseUrl || 'https://api.openai.com/v1';
189+
}
181190

182191
// Use gpt-4o-mini-tts for best quality and intelligent realtime applications
183192
const response = await fetch(`${baseUrl}/audio/speech`, {
@@ -200,9 +209,30 @@ async function generateOpenAITTS(
200209
}
201210

202211
const arrayBuffer = await response.arrayBuffer();
212+
213+
// Extract audio format from Content-Type header
214+
const contentType = response.headers.get('content-type') || 'audio/mpeg';
215+
let format = 'mp3'; // default
216+
217+
if (contentType.includes('audio/wav')) {
218+
format = 'wav';
219+
} else if (contentType.includes('audio/mpeg') || contentType.includes('audio/mp3')) {
220+
format = 'mp3';
221+
} else if (contentType.includes('audio/ogg')) {
222+
format = 'ogg';
223+
} else if (contentType.includes('audio/aac')) {
224+
format = 'aac';
225+
} else if (contentType.includes('audio/flac')) {
226+
format = 'flac';
227+
} else if (contentType.includes('audio/webm')) {
228+
format = 'webm';
229+
} else if (contentType.includes('audio/opus')) {
230+
format = 'opus';
231+
}
232+
203233
return {
204234
audio: new Uint8Array(arrayBuffer),
205-
format: 'mp3',
235+
format,
206236
};
207237
}
208238

lib/audio/voice-resolver.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,17 @@ export interface ProviderWithVoices {
7878
* Get all available providers and their voices for the voice picker UI.
7979
* A provider is available if it has an API key or is server-configured.
8080
* Browser-native-tts is excluded (no static voice list).
81+
* Includes both built-in models and custom models for OpenAI-compatible providers.
8182
*/
8283
export function getAvailableProvidersWithVoices(
8384
ttsProvidersConfig: Record<
8485
string,
85-
{ apiKey?: string; enabled?: boolean; isServerConfigured?: boolean }
86+
{
87+
apiKey?: string;
88+
enabled?: boolean;
89+
isServerConfigured?: boolean;
90+
customModels?: Array<{ id: string; name: string }>;
91+
}
8692
>,
8793
): ProviderWithVoices[] {
8894
const result: ProviderWithVoices[] = [];
@@ -101,6 +107,8 @@ export function getAvailableProvidersWithVoices(
101107

102108
// Build model groups
103109
const modelGroups: ModelVoiceGroup[] = [];
110+
111+
// Add built-in models
104112
if (config.models.length > 0) {
105113
for (const model of config.models) {
106114
const compatibleVoices = config.voices
@@ -112,8 +120,23 @@ export function getAvailableProvidersWithVoices(
112120
voices: compatibleVoices,
113121
});
114122
}
115-
} else {
116-
// Provider has no model concept (Azure, Browser Native, Doubao)
123+
}
124+
125+
// Add custom models for OpenAI-compatible TTS
126+
const customModels = providerConfig?.customModels || [];
127+
if (providerId === 'openai-compatible-tts' && customModels.length > 0) {
128+
for (const model of customModels) {
129+
// All voices are compatible with custom models
130+
modelGroups.push({
131+
modelId: model.id,
132+
modelName: model.name,
133+
voices: allVoices,
134+
});
135+
}
136+
}
137+
138+
// If no models at all, add default group (for providers without model concept)
139+
if (modelGroups.length === 0) {
117140
modelGroups.push({
118141
modelId: '',
119142
modelName: config.name,

0 commit comments

Comments
 (0)