FunAudioLLM · LingLambda · May 30, 2026
diff --git a/README.md b/README.md
@@ -190,12 +190,16 @@ You can run the following steps.
 ``` sh
 cd runtime/python
 docker build -t cosyvoice:v1.0 .
+# For recent NVIDIA GPUs such as RTX 50-series cards with compute capability sm_120,
+# build the CUDA 12.8 / PyTorch 2.8 runtime instead. Please make sure the host NVIDIA
+# driver supports CUDA 12.8 or newer.
+# docker build -f Dockerfile.cuda128 -t cosyvoice:v1.0-cu128 .
 # change iic/CosyVoice-300M to iic/CosyVoice-300M-Instruct if you want to use instruct inference
 # for grpc usage
-docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/grpc && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"
+docker run -d --gpus all -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/grpc && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"
 cd grpc && python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
 # for fastapi usage
-docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/fastapi && python3 server.py --port 50000 --model_dir iic/CosyVoice-300M && sleep infinity"
+docker run -d --gpus all -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/fastapi && python3 server.py --port 50000 --model_dir iic/CosyVoice-300M && sleep infinity"
 cd fastapi && python3 client.py --port 50000 --mode <sft|zero_shot|cross_lingual|instruct>
 ```
 

diff --git a/runtime/python/Dockerfile.cuda128 b/runtime/python/Dockerfile.cuda128
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1.7
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /opt/CosyVoice
+
+RUN sed -i \
+    -e s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g \
+    -e s@/security.ubuntu.com/@/mirrors.aliyun.com/@g \
+    /etc/apt/sources.list
+RUN apt-get update -y
+RUN apt-get -y install git unzip git-lfs g++
+RUN git lfs install
+RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
+# Use a CUDA 12.8 PyTorch runtime so RTX 50-series GPUs with sm_120 can run CUDA kernels.
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install \
+    -i https://mirrors.aliyun.com/pypi/simple/ \
+    --trusted-host=mirrors.aliyun.com \
+    "pip==25.3" "setuptools<80" wheel
+RUN printf 'setuptools<80\n' > /tmp/pip-build-constraints.txt \
+    && printf 'setuptools<80\ntorch==2.8.0\ntorchaudio==2.8.0\n' > /tmp/pip-constraints.txt \
+    && cd CosyVoice \
+    && grep -Ev '^(--extra-index-url|torch==|torchaudio==|tensorrt-cu12|deepspeed==|onnxruntime-gpu==|openai-whisper==)' requirements.txt > /tmp/cosyvoice-runtime-requirements.txt \
+    && python3 -m pip install \
+        --constraint /tmp/pip-constraints.txt \
+        --build-constraint /tmp/pip-build-constraints.txt \
+        -r /tmp/cosyvoice-runtime-requirements.txt \
+        -i https://mirrors.aliyun.com/pypi/simple/ \
+        --trusted-host=mirrors.aliyun.com
+RUN python3 -m pip install \
+    --constraint /tmp/pip-constraints.txt \
+    -i https://mirrors.aliyun.com/pypi/simple/ \
+    --trusted-host=mirrors.aliyun.com \
+    "onnxruntime-gpu==1.22.0" \
+    "tiktoken==0.5.2" \
+    && python3 -m pip install \
+        --no-deps \
+        --no-build-isolation \
+        -i https://mirrors.aliyun.com/pypi/simple/ \
+        --trusted-host=mirrors.aliyun.com \
+        "openai-whisper==20231117"
+RUN cd CosyVoice/runtime/python/grpc && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto