[build] update Dockerfile with new lmdeploy and sglang (#1174)

CyCle1024 · web-flow · commit 1f89ed3fd959 · 2025-10-29T20:52:27.000+08:00
* [build] update Dockerfile with new lmdeploy and sglang

* [build] refactor Dockerfile pip deps

* update lmdeploy tag to 0.10.2
diff --git a/Dockerfile b/Dockerfile
@@ -1,39 +1,50 @@
 # syntax=docker/dockerfile:1.10.0
 # builder
-ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.01-py3
+ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.03-py3
 
 ## build args
 FROM ${BASE_IMAGE} AS setup_env
 
-ARG CODESPACE=/root/codespace
-
-ARG FLASH_ATTN_DIR=/tmp/flash-attn
-ARG FLASH_ATTN3_DIR=/tmp/flash-attn3
-ARG ADAPTIVE_GEMM_DIR=/tmp/adaptive_gemm
-ARG GROUPED_GEMM_DIR=/tmp/grouped_gemm
-
 ARG TORCH_VERSION
-
 ARG PPA_SOURCE
 
-RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
-RUN if [ -n "${TORCH_VERSION}" ]; then \
-        pip install torchvision torch==${TORCH_VERSION} --index-url https://download.pytorch.org/whl/cu126 --no-cache-dir; \
-    fi
-
-# set reasonable default for CUDA architectures when building ngc image
-ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
-
-RUN sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
     apt update && \
     apt install --no-install-recommends ca-certificates -y && \
     apt install --no-install-recommends bc wget -y && \
     apt install --no-install-recommends build-essential sudo -y && \
     apt install --no-install-recommends git curl pkg-config tree unzip tmux \
-    openssh-server openssh-client nmap dnsutils iproute2 lsof net-tools -y && \
+    openssh-server openssh-client dnsutils iproute2 lsof net-tools zsh rclone -y && \
     apt clean && rm -rf /var/lib/apt/lists/*
 
-RUN pip uninstall flash_attn -y
+RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
+RUN pip install pystack py-spy --no-cache-dir
+RUN git config --system --add safe.directory "*"
+
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    if [ -n "${TORCH_VERSION}" ]; then \
+        pip install torchvision torch==${TORCH_VERSION} \
+        --index-url https://download.pytorch.org/whl/cu128 \
+        --extra-index-url https://download.pytorch.org/whl/cu126 \
+        --no-cache-dir; \
+    fi
+
+# set reasonable default for CUDA architectures when building ngc image
+ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
+
+RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
+
+ARG FLASH_ATTN_DIR=/tmp/flash-attn
+ARG CODESPACE=/root/codespace
+ARG FLASH_ATTN3_DIR=/tmp/flash-attn3
+ARG ADAPTIVE_GEMM_DIR=/tmp/adaptive_gemm
+ARG GROUPED_GEMM_DIR=/tmp/grouped_gemm
+ARG DEEP_EP_DIR=/tmp/deep_ep
+ARG NVSHMEM_WHL_DIR=/tmp/nvshmem
+
+RUN mkdir -p $CODESPACE
+WORKDIR ${CODESPACE}
 
 # compile flash-attn
 FROM setup_env AS flash_attn
@@ -43,16 +54,14 @@ ARG FLASH_ATTN_DIR
 ARG FLASH_ATTN3_DIR
 ARG FLASH_ATTN_URL
 
-RUN mkdir -p $CODESPACE 
-WORKDIR ${CODESPACE}
-
-RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    git clone $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
     cd ${CODESPACE}/flash-attention && \
-    git checkout $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 2)
+    git checkout $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 2) && \
+    git submodule update --init --recursive --force
 
 WORKDIR ${CODESPACE}/flash-attention
 
-RUN git submodule update --init --recursive --force
 RUN cd hopper && FLASH_ATTENTION_FORCE_BUILD=TRUE pip wheel -w ${FLASH_ATTN3_DIR} -v --no-deps .
 RUN FLASH_ATTENTION_FORCE_BUILD=TRUE pip wheel -w ${FLASH_ATTN_DIR} -v --no-deps .
 
@@ -63,16 +72,14 @@ ARG CODESPACE
 ARG ADAPTIVE_GEMM_DIR
 ARG ADAPTIVE_GEMM_URL
 
-RUN mkdir -p $CODESPACE
-WORKDIR ${CODESPACE}
-
-RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 1) && \
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    git clone $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 1) && \
     cd ${CODESPACE}/AdaptiveGEMM && \
-    git checkout $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 2)
+    git checkout $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 2) && \
+    git submodule update --init --recursive --force
 
 WORKDIR ${CODESPACE}/AdaptiveGEMM
 
-RUN git submodule update --init --recursive --force
 RUN pip wheel -w ${ADAPTIVE_GEMM_DIR} -v --no-deps .
 
 # compile grouped_gemm(permute and unpermute)
@@ -82,18 +89,37 @@ ARG CODESPACE
 ARG GROUPED_GEMM_DIR
 ARG GROUPED_GEMM_URL
 
-RUN mkdir -p $CODESPACE
-WORKDIR ${CODESPACE}
-
-RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 1) && \
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    git clone $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 1) && \
     cd ${CODESPACE}/GroupedGEMM && \
-    git checkout $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 2)
+    git checkout $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 2) && \
+    git submodule update --init --recursive --force
 
 WORKDIR ${CODESPACE}/GroupedGEMM
 
-RUN git submodule update --init --recursive --force
 RUN pip wheel -w ${GROUPED_GEMM_DIR} -v --no-deps .
 
+# pypi install nvshmem and compile deepep
+FROM setup_env AS deep_ep
+
+ARG CODESPACE
+ARG DEEP_EP_DIR
+ARG DEEP_EP_URL
+ARG NVSHMEM_WHL_DIR
+# build sm90 and sm100 for deep_ep for now
+ARG TORCH_CUDA_ARCH_LIST="9.0 10.0"
+
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    pip wheel -w ${NVSHMEM_WHL_DIR} -v "nvidia-nvshmem-cu12>=3.4.5" && \
+    pip install ${NVSHMEM_WHL_DIR}/*.whl && \
+    git clone $(echo ${DEEP_EP_URL} | cut -d '@' -f 1) && \
+    cd ${CODESPACE}/DeepEP && \
+    git checkout $(echo ${DEEP_EP_URL} | cut -d '@' -f 2) && \
+    git submodule update --init --recursive --force
+
+WORKDIR ${CODESPACE}/DeepEP
+
+RUN pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
 
 # integration xtuner
 FROM setup_env AS xtuner_dev
@@ -105,55 +131,64 @@ ARG FLASH_ATTN_DIR
 ARG FLASH_ATTN3_DIR
 ARG ADAPTIVE_GEMM_DIR
 ARG GROUPED_GEMM_DIR
+ARG DEEP_EP_DIR
+ARG NVSHMEM_WHL_DIR
 
 COPY --from=flash_attn ${FLASH_ATTN3_DIR} ${FLASH_ATTN3_DIR}
 COPY --from=flash_attn ${FLASH_ATTN_DIR} ${FLASH_ATTN_DIR}
 COPY --from=adaptive_gemm ${ADAPTIVE_GEMM_DIR} ${ADAPTIVE_GEMM_DIR}
 COPY --from=grouped_gemm ${GROUPED_GEMM_DIR} ${GROUPED_GEMM_DIR}
+COPY --from=deep_ep ${DEEP_EP_DIR} ${DEEP_EP_DIR}
+COPY --from=deep_ep ${NVSHMEM_WHL_DIR} ${NVSHMEM_WHL_DIR}
 
 RUN unzip ${FLASH_ATTN_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
 RUN unzip ${FLASH_ATTN3_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
 RUN unzip ${ADAPTIVE_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
 RUN unzip ${GROUPED_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
+RUN unzip ${DEEP_EP_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
+RUN unzip ${NVSHMEM_WHL_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
 
-ARG XTUNER_URL
-ARG XTUNER_COMMIT
-ARG LMDEPLOY_VERSION
-ARG LMDEPLOY_URL
-
-## install xtuner
-RUN mkdir -p $CODESPACE
-WORKDIR ${CODESPACE}
-
-#RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
-    #cd ${CODESPACE}/xtuner && \
-    #git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2) 
-COPY . ${CODESPACE}/xtuner
-
-WORKDIR ${CODESPACE}/xtuner
-RUN export HTTPS_PROXY=$HTTPS_PROXY \
-  && export https_proxy=$HTTPS_PROXY \
-  && pip install liger-kernel parametrize --no-cache-dir \
-  && pip install . -v --no-cache-dir
+# install sglang and its runtime requirements
+ARG SGLANG_VERSION
 
-RUN pip install pystack py-spy --no-cache-dir
-RUN git config --system --add safe.directory "*"
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+   pip install sglang==${SGLANG_VERSION} sgl_kernel pybase64 orjson uvloop setproctitle msgspec \
+   compressed_tensors python-multipart torch_memory_saver \
+   grpcio-tools==1.75.1 hf_transfer interegular llguidance==0.7.11 \
+   xgrammar==0.1.24 blobfile==3.0.0 flashinfer_python==0.4.0 --no-cache-dir --no-deps
 
 # install lmdeploy and its missing runtime requirements
-RUN pip install fastapi fire openai outlines \
-    partial_json_parser ray[default] shortuuid uvicorn \
-    'numpy<2.0.0' \
-    python-sat[aiger,approxmc,cryptosat,pblib] distance Faker --no-cache-dir
-WORKDIR ${CODESPACE}
-RUN if [ -n "${LMDEPLOY_VERSION}" ]; then \
+ARG LMDEPLOY_VERSION
+ARG LMDEPLOY_URL
+
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    pip install fastapi fire openai outlines \
+        partial_json_parser ray[default] shortuuid uvicorn \
+        'pydantic>2' openai_harmony --no-cache-dir && \
+    if [ -n "${LMDEPLOY_VERSION}" ]; then \
         pip install lmdeploy==${LMDEPLOY_VERSION} --no-deps --no-cache-dir; \
     else \
-        git clone -c https.proxy=$HTTPS_PROXY $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
+        git clone $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
         cd ${CODESPACE}/lmdeploy && \
         git checkout $(echo ${LMDEPLOY_URL} | cut -d '@' -f 2) && \
         pip install . -v --no-deps --no-cache-dir; \
     fi
 
+## install xtuner
+ARG XTUNER_URL
+ARG XTUNER_COMMIT
+#RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+#   git clone $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
+#   cd ${CODESPACE}/xtuner && \
+#   git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2) 
+COPY . ${CODESPACE}/xtuner
+
+WORKDIR ${CODESPACE}/xtuner
+RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
+    pip install .[all] -v --no-cache-dir
+
+WORKDIR ${CODESPACE}
+
 # setup sysctl
 RUN echo "fs.file-max=100000" >> /etc/sysctl.conf
 RUN sysctl -p
diff --git a/image_build.sh b/image_build.sh
@@ -6,32 +6,38 @@ export XTUNER_URL=https://github.com/InternLM/xtuner@${XTUNER_COMMIT}
 export FLASH_ATTN_URL=https://github.com/Dao-AILab/flash-attention@060c9188beec3a8b62b33a3bfa6d5d2d44975fab
 export ADAPTIVE_GEMM_URL=https://github.com/InternLM/AdaptiveGEMM@f0314fa6b6c54da0aa98b3718025ab8e860fdff4
 export GROUPED_GEMM_URL=https://github.com/InternLM/GroupedGEMM@3ae328844bb13679ef2ae4f704a8eb615cca7571
+export DEEP_EP_URL=https://github.com/deepseek-ai/DeepEP@9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee # v1.2.1
+
 export TORCH_VERSION=${TORCH_VERSION:-"2.8.0"}
-export LMDEPLOY_VERSION="0.10.0"
-export LMDEPLOY_URL=https://github.com/InternLM/lmdeploy@11b9726de4cef1fca132c47a4bb98f4003c7ae27
+export LMDEPLOY_VERSION="0.10.2"
+# export LMDEPLOY_URL=https://github.com/InternLM/lmdeploy@a9a24fbd8985374cb01ecb6021d1ce9668253c9c
 export PPA_SOURCE="https://mirrors.aliyun.com"
+export SGLANG_VERSION="0.5.3"
 
 image_name=${IMAGE_NAME:-"xtuner"}
-image_tag=${IMAGE_TAG:-"${XTUNER_COMMIT}"}
+image_tag=${IMAGE_TAG:-"pt$(echo ${TORCH_VERSION} | awk -F. '{print $1$2}')_$(date +%Y%m%d)_${XTUNER_COMMIT:0:7}"}
 
 docker build . \
   -t "$image_name:$image_tag" \
-  --build-arg HTTPS_PROXY=$HTTPS_PROXY \
+  --secret id=HTTPS_PROXY \
   --build-arg TORCH_VERSION=$TORCH_VERSION\
   --build-arg BASE_IMAGE=$BASE_IMAGE \
   --build-arg PPA_SOURCE=$PPA_SOURCE \
   --build-arg ADAPTIVE_GEMM_URL=$ADAPTIVE_GEMM_URL \
   --build-arg FLASH_ATTN_URL=$FLASH_ATTN_URL \
   --build-arg GROUPED_GEMM_URL=$GROUPED_GEMM_URL \
+  --build-arg DEEP_EP_URL=$DEEP_EP_URL \
   --build-arg XTUNER_URL=$XTUNER_URL \
   --build-arg XTUNER_COMMIT=$XTUNER_COMMIT \
   --build-arg LMDEPLOY_VERSION=$LMDEPLOY_VERSION \
-  --build-arg LMDEPLOY_URL=${LMDEPLOY_URL}\
+  --build-arg LMDEPLOY_URL=$LMDEPLOY_URL \
+  --build-arg SGLANG_VERSION=$SGLANG_VERSION \
   --progress=plain \
   --label "BASE_IMAGE=$BASE_IMAGE" \
   --label "XTUNER_URL=$XTUNER_URL" \
   --label "XTUNER_COMMIT=$XTUNER_COMMIT" \
   --label "ADAPTIVE_GEMM_URL=$ADAPTIVE_GEMM_URL" \
   --label "FLASH_ATTN_URL=$FLASH_ATTN_URL" \
   --label "GROUPED_GEMM_URL=$GROUPED_GEMM_URL" \
-  --label "LMDEPLOY_VERSION=$LMDEPLOY_VERSION"
+  --label "LMDEPLOY_VERSION=$LMDEPLOY_VERSION" \
+  --label "SGLANG_VERSION=$SGLANG_VERSION"
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,7 @@ classifiers = [
       "Topic :: Utilities",
 ]
 dependencies = [
+  "astor",
   "bitsandbytes==0.45.0",
   "datasets<4.0.0",
   "einops",
@@ -36,7 +37,7 @@ dependencies = [
   "tiktoken",
   "torch>=2.6.0",
   "torchvision",
-  "transformers==4.56.0",
+  "transformers==4.57.0",
   "cyclopts",
   "transformers_stream_generator",
   "opencv-python-headless",
@@ -46,6 +47,7 @@ dependencies = [
   "imageio",
   "timm",
   "codetiming",
+  "GitPython",
 ]
 
 [project.urls]
@@ -70,11 +72,14 @@ video = [
   "decord",
 ]
 all = [
+  "jsonlines",
   "decord",
   "ray[default]",
   "httpx",
   "fastapi",
   "uvicorn",
+  "liger-kernel",
+  "parametrize",
 ]
 
 [tool.mypy]