Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,16 @@ You can run the following steps.
``` sh
cd runtime/python
docker build -t cosyvoice:v1.0 .
# For recent NVIDIA GPUs such as RTX 50-series cards with compute capability sm_120,
# build the CUDA 12.8 / PyTorch 2.8 runtime instead. Please make sure the host NVIDIA
# driver supports CUDA 12.8 or newer.
# docker build -f Dockerfile.cuda128 -t cosyvoice:v1.0-cu128 .
# change iic/CosyVoice-300M to iic/CosyVoice-300M-Instruct if you want to use instruct inference
# for grpc usage
docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/grpc && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"
docker run -d --gpus all -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/grpc && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"
cd grpc && python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
# for fastapi usage
docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/fastapi && python3 server.py --port 50000 --model_dir iic/CosyVoice-300M && sleep infinity"
docker run -d --gpus all -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/fastapi && python3 server.py --port 50000 --model_dir iic/CosyVoice-300M && sleep infinity"
cd fastapi && python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
```

Expand Down
43 changes: 43 additions & 0 deletions runtime/python/Dockerfile.cuda128
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# syntax=docker/dockerfile:1.7
FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime
ENV DEBIAN_FRONTEND=noninteractive

WORKDIR /opt/CosyVoice

RUN sed -i \
-e s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g \
-e s@/security.ubuntu.com/@/mirrors.aliyun.com/@g \
/etc/apt/sources.list
RUN apt-get update -y
RUN apt-get -y install git unzip git-lfs g++
RUN git lfs install
RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
# Use a CUDA 12.8 PyTorch runtime so RTX 50-series GPUs with sm_120 can run CUDA kernels.
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install \
-i https://mirrors.aliyun.com/pypi/simple/ \
--trusted-host=mirrors.aliyun.com \
"pip==25.3" "setuptools<80" wheel
RUN printf 'setuptools<80\n' > /tmp/pip-build-constraints.txt \
&& printf 'setuptools<80\ntorch==2.8.0\ntorchaudio==2.8.0\n' > /tmp/pip-constraints.txt \
&& cd CosyVoice \
&& grep -Ev '^(--extra-index-url|torch==|torchaudio==|tensorrt-cu12|deepspeed==|onnxruntime-gpu==|openai-whisper==)' requirements.txt > /tmp/cosyvoice-runtime-requirements.txt \
&& python3 -m pip install \
--constraint /tmp/pip-constraints.txt \
--build-constraint /tmp/pip-build-constraints.txt \
-r /tmp/cosyvoice-runtime-requirements.txt \
-i https://mirrors.aliyun.com/pypi/simple/ \
--trusted-host=mirrors.aliyun.com
RUN python3 -m pip install \
--constraint /tmp/pip-constraints.txt \
-i https://mirrors.aliyun.com/pypi/simple/ \
--trusted-host=mirrors.aliyun.com \
"onnxruntime-gpu==1.22.0" \
"tiktoken==0.5.2" \
&& python3 -m pip install \
--no-deps \
--no-build-isolation \
-i https://mirrors.aliyun.com/pypi/simple/ \
--trusted-host=mirrors.aliyun.com \
"openai-whisper==20231117"
RUN cd CosyVoice/runtime/python/grpc && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto