-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtts_server.py
More file actions
88 lines (74 loc) · 2.76 KB
/
tts_server.py
File metadata and controls
88 lines (74 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Kyutai TTS Backend Server for Chemini
Run with: python tts_server.py
Requires: pip install fastapi uvicorn torch moshi
"""
import os
import io
import wave
import struct
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import Response
from pydantic import BaseModel
import subprocess
import tempfile
app = FastAPI(title="Chemini TTS Server")
# Enable CORS for local frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:5173", "http://localhost:3000", "*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class TTSRequest(BaseModel):
text: str
@app.post("/tts")
async def text_to_speech(request: TTSRequest):
"""
Convert text to speech using Kyutai TTS.
Returns WAV audio bytes.
"""
if not request.text.strip():
raise HTTPException(status_code=400, detail="Text cannot be empty")
try:
# Create temp files for input and output
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(request.text)
input_file = f.name
output_file = tempfile.mktemp(suffix='.wav')
# Run Kyutai TTS via the official script
# Assumes moshi package is installed: pip install moshi
result = subprocess.run(
['python', 'scripts/tts_pytorch.py', input_file, output_file],
cwd=os.path.dirname(os.path.abspath(__file__)),
capture_output=True,
text=True,
timeout=60
)
if result.returncode != 0:
# Fallback to simple TTS if Kyutai fails
raise Exception(f"Kyutai TTS failed: {result.stderr}")
# Read the generated audio
with open(output_file, 'rb') as f:
audio_bytes = f.read()
# Cleanup temp files
os.unlink(input_file)
if os.path.exists(output_file):
os.unlink(output_file)
return Response(content=audio_bytes, media_type="audio/wav")
except subprocess.TimeoutExpired:
raise HTTPException(status_code=504, detail="TTS generation timed out")
except FileNotFoundError:
raise HTTPException(status_code=500, detail="TTS scripts not found. Ensure moshi package is installed.")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
return {"status": "ok", "model": "kyutai/tts-1.6b-en_fr"}
if __name__ == "__main__":
import uvicorn
print("Starting Kyutai TTS Server on http://localhost:8000")
print("Frontend should call POST /tts with JSON body: {\"text\": \"Hello world\"}")
uvicorn.run(app, host="0.0.0.0", port=8000)