-
Notifications
You must be signed in to change notification settings - Fork 6
Open
Labels
bugSomething isn't workingSomething isn't working
Description
There was JSON Decoder Error in TTS and fixed it by updating generator and app Fast api ,generator write the audio data to the BytesIO buffer as a WAV file and api stream audio buffer as wav
Fix
generetor.py
import io
import os
from pydantic import BaseModel
from fastapi import FastAPI, HTTPException, Form
from fastapi.responses import StreamingResponse
from TTS.utils.synthesizer import Synthesizer
from typing import Optional
import numpy as np
import soundfile as sf
class TTSResponse(BaseModel):
status_code: int = 0
error: Optional[str] = None
class TTSModel(BaseModel):
MAX_TXT_LEN: int = int(os.getenv('TTS_MAX_TXT_LEN', 1000))
SOUNDS_DIR: str = "sounds"
MODEL_PATH: str = r"./model_files/model.pth"
CONFIG_PATH: str = r"./model_files/config.json"
SPEAKERS_PATH: str = r"./model_files/speakers.pth"
ENCODER_CHECKPOINT_PATH: str = r"./model_files/SE_checkpoint.pth.tar"
ENCODER_CONFIG: str = r"./model_files/config_se.json"
SPEAKER_WAV: str = r"./model_files/conditioning_audio.wav"
# Initiate the model
engine_specs = TTSModel()
engine = Synthesizer(
engine_specs.MODEL_PATH,
engine_specs.CONFIG_PATH,
tts_speakers_file=engine_specs.SPEAKERS_PATH,
encoder_checkpoint=engine_specs.ENCODER_CHECKPOINT_PATH,
encoder_config=engine_specs.ENCODER_CONFIG,
)
class Generator:
def __init__(self, text: str) -> None:
self.MAX_TXT_LEN = 1000
self.SPEAKER_WAV = engine_specs.SPEAKER_WAV
self.response = TTSResponse()
self.audio_bytes = None
self.audio_buffer = io.BytesIO()
# Initiate the tts response
if len(text) > self.MAX_TXT_LEN:
text = text[: self.MAX_TXT_LEN] # cut off text to the limit
self.response.status_code = 10
self.response.error = f"Input text was cutoff since it went over the {self.MAX_TXT_LEN} character limit."
else:
try:
self.audio_bytes = engine.tts(text, speaker_wav=self.SPEAKER_WAV)
self.save_audio()
except Exception as e:
self.response.status_code = 500
self.response.error = str(e)
def save_audio(self) -> None:
# Ensure that all elements are converted to the correct type
if isinstance(self.audio_bytes, list):
self.audio_bytes = np.array(self.audio_bytes, dtype=np.float32)
# Write the audio data to the BytesIO buffer as a WAV file
sf.write(self.audio_buffer, self.audio_bytes, samplerate=22050, format='WAV')
self.audio_buffer.seek(0)app.py
from pydantic import BaseModel
from fastapi import FastAPI, HTTPException, Form
from fastapi.responses import StreamingResponse
from generator import Generator
# FastAPI setup
app = FastAPI()
@app.post("/generate-audio/")
async def generate_audio(text: str = Form(...)):
generator = Generator(text)
if generator.response.status_code != 0:
raise HTTPException(status_code=generator.response.status_code, detail=generator.response.error)
return StreamingResponse(generator.audio_buffer, media_type="audio/wav")
# To run the FastAPI app
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)##Dockerfile
# Use an official Python runtime as a parent image
FROM python:3.7.4
# Set the working directory in the container
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . /app
RUN pip install --no-cache-dir cython
RUN pip install python-git
# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Make port 8000 available to the world outside this container
EXPOSE 8000
# Define environment variable
ENV PYTHONUNBUFFERED=1
ENV TTS_MAX_TXT_LEN=1000
# Run app.py when the container launches
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
#Docker-compose.yml
version: '3.8'
services:
fastapi-app:
build: .
ports:
- "8000:8000"
volumes:
- .:/app
environment:
- PYTHONUNBUFFERED=1
- TTS_MAX_TXT_LEN=4000
#requirments.txt
git+https://github.com/coqui-ai/TTS@0910cb76bcd85df56bf43654bb31427647cdfd0d#egg=TTS
fastapi==0.89.1
uvicorn==0.20.0
python-multipart==0.0.5
pydub==0.25.1
pyaudioconvert==0.0.5
pytest==7.2.1
httpx==0.23.3
websockets==10.4
pymongo==4.3.3
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working