From 685a66dd3bb19e7ea55e1fc8dd69d40eebab792c Mon Sep 17 00:00:00 2001 From: LingLambda Date: Sun, 31 May 2026 07:06:23 +0800 Subject: [PATCH] Fix Docker runtime for CUDA 12 GPUs --- README.md | 8 ++++-- runtime/python/Dockerfile.cuda128 | 43 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 runtime/python/Dockerfile.cuda128 diff --git a/README.md b/README.md index ef8829e0e..4a18692d2 100644 --- a/README.md +++ b/README.md @@ -190,12 +190,16 @@ You can run the following steps. ``` sh cd runtime/python docker build -t cosyvoice:v1.0 . +# For recent NVIDIA GPUs such as RTX 50-series cards with compute capability sm_120, +# build the CUDA 12.8 / PyTorch 2.8 runtime instead. Please make sure the host NVIDIA +# driver supports CUDA 12.8 or newer. +# docker build -f Dockerfile.cuda128 -t cosyvoice:v1.0-cu128 . # change iic/CosyVoice-300M to iic/CosyVoice-300M-Instruct if you want to use instruct inference # for grpc usage -docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/grpc && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity" +docker run -d --gpus all -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/grpc && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity" cd grpc && python3 client.py --port 50000 --mode # for fastapi usage -docker run -d --runtime=nvidia -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/fastapi && python3 server.py --port 50000 --model_dir iic/CosyVoice-300M && sleep infinity" +docker run -d --gpus all -p 50000:50000 cosyvoice:v1.0 /bin/bash -c "cd /opt/CosyVoice/CosyVoice/runtime/python/fastapi && python3 server.py --port 50000 --model_dir iic/CosyVoice-300M && sleep infinity" cd fastapi && python3 client.py --port 50000 --mode ``` diff --git a/runtime/python/Dockerfile.cuda128 b/runtime/python/Dockerfile.cuda128 new file mode 100644 index 000000000..ec60d41c0 --- /dev/null +++ b/runtime/python/Dockerfile.cuda128 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.7 +FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /opt/CosyVoice + +RUN sed -i \ + -e s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g \ + -e s@/security.ubuntu.com/@/mirrors.aliyun.com/@g \ + /etc/apt/sources.list +RUN apt-get update -y +RUN apt-get -y install git unzip git-lfs g++ +RUN git lfs install +RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git +# Use a CUDA 12.8 PyTorch runtime so RTX 50-series GPUs with sm_120 can run CUDA kernels. +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install \ + -i https://mirrors.aliyun.com/pypi/simple/ \ + --trusted-host=mirrors.aliyun.com \ + "pip==25.3" "setuptools<80" wheel +RUN printf 'setuptools<80\n' > /tmp/pip-build-constraints.txt \ + && printf 'setuptools<80\ntorch==2.8.0\ntorchaudio==2.8.0\n' > /tmp/pip-constraints.txt \ + && cd CosyVoice \ + && grep -Ev '^(--extra-index-url|torch==|torchaudio==|tensorrt-cu12|deepspeed==|onnxruntime-gpu==|openai-whisper==)' requirements.txt > /tmp/cosyvoice-runtime-requirements.txt \ + && python3 -m pip install \ + --constraint /tmp/pip-constraints.txt \ + --build-constraint /tmp/pip-build-constraints.txt \ + -r /tmp/cosyvoice-runtime-requirements.txt \ + -i https://mirrors.aliyun.com/pypi/simple/ \ + --trusted-host=mirrors.aliyun.com +RUN python3 -m pip install \ + --constraint /tmp/pip-constraints.txt \ + -i https://mirrors.aliyun.com/pypi/simple/ \ + --trusted-host=mirrors.aliyun.com \ + "onnxruntime-gpu==1.22.0" \ + "tiktoken==0.5.2" \ + && python3 -m pip install \ + --no-deps \ + --no-build-isolation \ + -i https://mirrors.aliyun.com/pypi/simple/ \ + --trusted-host=mirrors.aliyun.com \ + "openai-whisper==20231117" +RUN cd CosyVoice/runtime/python/grpc && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto