Skip to content

Commit 1f89ed3

Browse files
authored
[build] update Dockerfile with new lmdeploy and sglang (#1174)
* [build] update Dockerfile with new lmdeploy and sglang * [build] refactor Dockerfile pip deps * update lmdeploy tag to 0.10.2
1 parent 5e64570 commit 1f89ed3

File tree

3 files changed

+119
-73
lines changed

3 files changed

+119
-73
lines changed

Dockerfile

Lines changed: 101 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,50 @@
11
# syntax=docker/dockerfile:1.10.0
22
# builder
3-
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.01-py3
3+
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.03-py3
44

55
## build args
66
FROM ${BASE_IMAGE} AS setup_env
77

8-
ARG CODESPACE=/root/codespace
9-
10-
ARG FLASH_ATTN_DIR=/tmp/flash-attn
11-
ARG FLASH_ATTN3_DIR=/tmp/flash-attn3
12-
ARG ADAPTIVE_GEMM_DIR=/tmp/adaptive_gemm
13-
ARG GROUPED_GEMM_DIR=/tmp/grouped_gemm
14-
158
ARG TORCH_VERSION
16-
179
ARG PPA_SOURCE
1810

19-
RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
20-
RUN if [ -n "${TORCH_VERSION}" ]; then \
21-
pip install torchvision torch==${TORCH_VERSION} --index-url https://download.pytorch.org/whl/cu126 --no-cache-dir; \
22-
fi
23-
24-
# set reasonable default for CUDA architectures when building ngc image
25-
ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
26-
27-
RUN sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
11+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
12+
sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
2813
apt update && \
2914
apt install --no-install-recommends ca-certificates -y && \
3015
apt install --no-install-recommends bc wget -y && \
3116
apt install --no-install-recommends build-essential sudo -y && \
3217
apt install --no-install-recommends git curl pkg-config tree unzip tmux \
33-
openssh-server openssh-client nmap dnsutils iproute2 lsof net-tools -y && \
18+
openssh-server openssh-client dnsutils iproute2 lsof net-tools zsh rclone -y && \
3419
apt clean && rm -rf /var/lib/apt/lists/*
3520

36-
RUN pip uninstall flash_attn -y
21+
RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
22+
RUN pip install pystack py-spy --no-cache-dir
23+
RUN git config --system --add safe.directory "*"
24+
25+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
26+
if [ -n "${TORCH_VERSION}" ]; then \
27+
pip install torchvision torch==${TORCH_VERSION} \
28+
--index-url https://download.pytorch.org/whl/cu128 \
29+
--extra-index-url https://download.pytorch.org/whl/cu126 \
30+
--no-cache-dir; \
31+
fi
32+
33+
# set reasonable default for CUDA architectures when building ngc image
34+
ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
35+
36+
RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
37+
38+
ARG FLASH_ATTN_DIR=/tmp/flash-attn
39+
ARG CODESPACE=/root/codespace
40+
ARG FLASH_ATTN3_DIR=/tmp/flash-attn3
41+
ARG ADAPTIVE_GEMM_DIR=/tmp/adaptive_gemm
42+
ARG GROUPED_GEMM_DIR=/tmp/grouped_gemm
43+
ARG DEEP_EP_DIR=/tmp/deep_ep
44+
ARG NVSHMEM_WHL_DIR=/tmp/nvshmem
45+
46+
RUN mkdir -p $CODESPACE
47+
WORKDIR ${CODESPACE}
3748

3849
# compile flash-attn
3950
FROM setup_env AS flash_attn
@@ -43,16 +54,14 @@ ARG FLASH_ATTN_DIR
4354
ARG FLASH_ATTN3_DIR
4455
ARG FLASH_ATTN_URL
4556

46-
RUN mkdir -p $CODESPACE
47-
WORKDIR ${CODESPACE}
48-
49-
RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
57+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
58+
git clone $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
5059
cd ${CODESPACE}/flash-attention && \
51-
git checkout $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 2)
60+
git checkout $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 2) && \
61+
git submodule update --init --recursive --force
5262

5363
WORKDIR ${CODESPACE}/flash-attention
5464

55-
RUN git submodule update --init --recursive --force
5665
RUN cd hopper && FLASH_ATTENTION_FORCE_BUILD=TRUE pip wheel -w ${FLASH_ATTN3_DIR} -v --no-deps .
5766
RUN FLASH_ATTENTION_FORCE_BUILD=TRUE pip wheel -w ${FLASH_ATTN_DIR} -v --no-deps .
5867

@@ -63,16 +72,14 @@ ARG CODESPACE
6372
ARG ADAPTIVE_GEMM_DIR
6473
ARG ADAPTIVE_GEMM_URL
6574

66-
RUN mkdir -p $CODESPACE
67-
WORKDIR ${CODESPACE}
68-
69-
RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 1) && \
75+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
76+
git clone $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 1) && \
7077
cd ${CODESPACE}/AdaptiveGEMM && \
71-
git checkout $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 2)
78+
git checkout $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 2) && \
79+
git submodule update --init --recursive --force
7280

7381
WORKDIR ${CODESPACE}/AdaptiveGEMM
7482

75-
RUN git submodule update --init --recursive --force
7683
RUN pip wheel -w ${ADAPTIVE_GEMM_DIR} -v --no-deps .
7784

7885
# compile grouped_gemm(permute and unpermute)
@@ -82,18 +89,37 @@ ARG CODESPACE
8289
ARG GROUPED_GEMM_DIR
8390
ARG GROUPED_GEMM_URL
8491

85-
RUN mkdir -p $CODESPACE
86-
WORKDIR ${CODESPACE}
87-
88-
RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 1) && \
92+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
93+
git clone $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 1) && \
8994
cd ${CODESPACE}/GroupedGEMM && \
90-
git checkout $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 2)
95+
git checkout $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 2) && \
96+
git submodule update --init --recursive --force
9197

9298
WORKDIR ${CODESPACE}/GroupedGEMM
9399

94-
RUN git submodule update --init --recursive --force
95100
RUN pip wheel -w ${GROUPED_GEMM_DIR} -v --no-deps .
96101

102+
# pypi install nvshmem and compile deepep
103+
FROM setup_env AS deep_ep
104+
105+
ARG CODESPACE
106+
ARG DEEP_EP_DIR
107+
ARG DEEP_EP_URL
108+
ARG NVSHMEM_WHL_DIR
109+
# build sm90 and sm100 for deep_ep for now
110+
ARG TORCH_CUDA_ARCH_LIST="9.0 10.0"
111+
112+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
113+
pip wheel -w ${NVSHMEM_WHL_DIR} -v "nvidia-nvshmem-cu12>=3.4.5" && \
114+
pip install ${NVSHMEM_WHL_DIR}/*.whl && \
115+
git clone $(echo ${DEEP_EP_URL} | cut -d '@' -f 1) && \
116+
cd ${CODESPACE}/DeepEP && \
117+
git checkout $(echo ${DEEP_EP_URL} | cut -d '@' -f 2) && \
118+
git submodule update --init --recursive --force
119+
120+
WORKDIR ${CODESPACE}/DeepEP
121+
122+
RUN pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
97123

98124
# integration xtuner
99125
FROM setup_env AS xtuner_dev
@@ -105,55 +131,64 @@ ARG FLASH_ATTN_DIR
105131
ARG FLASH_ATTN3_DIR
106132
ARG ADAPTIVE_GEMM_DIR
107133
ARG GROUPED_GEMM_DIR
134+
ARG DEEP_EP_DIR
135+
ARG NVSHMEM_WHL_DIR
108136

109137
COPY --from=flash_attn ${FLASH_ATTN3_DIR} ${FLASH_ATTN3_DIR}
110138
COPY --from=flash_attn ${FLASH_ATTN_DIR} ${FLASH_ATTN_DIR}
111139
COPY --from=adaptive_gemm ${ADAPTIVE_GEMM_DIR} ${ADAPTIVE_GEMM_DIR}
112140
COPY --from=grouped_gemm ${GROUPED_GEMM_DIR} ${GROUPED_GEMM_DIR}
141+
COPY --from=deep_ep ${DEEP_EP_DIR} ${DEEP_EP_DIR}
142+
COPY --from=deep_ep ${NVSHMEM_WHL_DIR} ${NVSHMEM_WHL_DIR}
113143

114144
RUN unzip ${FLASH_ATTN_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
115145
RUN unzip ${FLASH_ATTN3_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
116146
RUN unzip ${ADAPTIVE_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
117147
RUN unzip ${GROUPED_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
148+
RUN unzip ${DEEP_EP_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
149+
RUN unzip ${NVSHMEM_WHL_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
118150

119-
ARG XTUNER_URL
120-
ARG XTUNER_COMMIT
121-
ARG LMDEPLOY_VERSION
122-
ARG LMDEPLOY_URL
123-
124-
## install xtuner
125-
RUN mkdir -p $CODESPACE
126-
WORKDIR ${CODESPACE}
127-
128-
#RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
129-
#cd ${CODESPACE}/xtuner && \
130-
#git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
131-
COPY . ${CODESPACE}/xtuner
132-
133-
WORKDIR ${CODESPACE}/xtuner
134-
RUN export HTTPS_PROXY=$HTTPS_PROXY \
135-
&& export https_proxy=$HTTPS_PROXY \
136-
&& pip install liger-kernel parametrize --no-cache-dir \
137-
&& pip install . -v --no-cache-dir
151+
# install sglang and its runtime requirements
152+
ARG SGLANG_VERSION
138153

139-
RUN pip install pystack py-spy --no-cache-dir
140-
RUN git config --system --add safe.directory "*"
154+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
155+
pip install sglang==${SGLANG_VERSION} sgl_kernel pybase64 orjson uvloop setproctitle msgspec \
156+
compressed_tensors python-multipart torch_memory_saver \
157+
grpcio-tools==1.75.1 hf_transfer interegular llguidance==0.7.11 \
158+
xgrammar==0.1.24 blobfile==3.0.0 flashinfer_python==0.4.0 --no-cache-dir --no-deps
141159

142160
# install lmdeploy and its missing runtime requirements
143-
RUN pip install fastapi fire openai outlines \
144-
partial_json_parser ray[default] shortuuid uvicorn \
145-
'numpy<2.0.0' \
146-
python-sat[aiger,approxmc,cryptosat,pblib] distance Faker --no-cache-dir
147-
WORKDIR ${CODESPACE}
148-
RUN if [ -n "${LMDEPLOY_VERSION}" ]; then \
161+
ARG LMDEPLOY_VERSION
162+
ARG LMDEPLOY_URL
163+
164+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
165+
pip install fastapi fire openai outlines \
166+
partial_json_parser ray[default] shortuuid uvicorn \
167+
'pydantic>2' openai_harmony --no-cache-dir && \
168+
if [ -n "${LMDEPLOY_VERSION}" ]; then \
149169
pip install lmdeploy==${LMDEPLOY_VERSION} --no-deps --no-cache-dir; \
150170
else \
151-
git clone -c https.proxy=$HTTPS_PROXY $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
171+
git clone $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
152172
cd ${CODESPACE}/lmdeploy && \
153173
git checkout $(echo ${LMDEPLOY_URL} | cut -d '@' -f 2) && \
154174
pip install . -v --no-deps --no-cache-dir; \
155175
fi
156176

177+
## install xtuner
178+
ARG XTUNER_URL
179+
ARG XTUNER_COMMIT
180+
#RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
181+
# git clone $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
182+
# cd ${CODESPACE}/xtuner && \
183+
# git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
184+
COPY . ${CODESPACE}/xtuner
185+
186+
WORKDIR ${CODESPACE}/xtuner
187+
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
188+
pip install .[all] -v --no-cache-dir
189+
190+
WORKDIR ${CODESPACE}
191+
157192
# setup sysctl
158193
RUN echo "fs.file-max=100000" >> /etc/sysctl.conf
159194
RUN sysctl -p

image_build.sh

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,38 @@ export XTUNER_URL=https://github.com/InternLM/xtuner@${XTUNER_COMMIT}
66
export FLASH_ATTN_URL=https://github.com/Dao-AILab/flash-attention@060c9188beec3a8b62b33a3bfa6d5d2d44975fab
77
export ADAPTIVE_GEMM_URL=https://github.com/InternLM/AdaptiveGEMM@f0314fa6b6c54da0aa98b3718025ab8e860fdff4
88
export GROUPED_GEMM_URL=https://github.com/InternLM/GroupedGEMM@3ae328844bb13679ef2ae4f704a8eb615cca7571
9+
export DEEP_EP_URL=https://github.com/deepseek-ai/DeepEP@9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee # v1.2.1
10+
911
export TORCH_VERSION=${TORCH_VERSION:-"2.8.0"}
10-
export LMDEPLOY_VERSION="0.10.0"
11-
export LMDEPLOY_URL=https://github.com/InternLM/lmdeploy@11b9726de4cef1fca132c47a4bb98f4003c7ae27
12+
export LMDEPLOY_VERSION="0.10.2"
13+
# export LMDEPLOY_URL=https://github.com/InternLM/lmdeploy@a9a24fbd8985374cb01ecb6021d1ce9668253c9c
1214
export PPA_SOURCE="https://mirrors.aliyun.com"
15+
export SGLANG_VERSION="0.5.3"
1316

1417
image_name=${IMAGE_NAME:-"xtuner"}
15-
image_tag=${IMAGE_TAG:-"${XTUNER_COMMIT}"}
18+
image_tag=${IMAGE_TAG:-"pt$(echo ${TORCH_VERSION} | awk -F. '{print $1$2}')_$(date +%Y%m%d)_${XTUNER_COMMIT:0:7}"}
1619

1720
docker build . \
1821
-t "$image_name:$image_tag" \
19-
--build-arg HTTPS_PROXY=$HTTPS_PROXY \
22+
--secret id=HTTPS_PROXY \
2023
--build-arg TORCH_VERSION=$TORCH_VERSION\
2124
--build-arg BASE_IMAGE=$BASE_IMAGE \
2225
--build-arg PPA_SOURCE=$PPA_SOURCE \
2326
--build-arg ADAPTIVE_GEMM_URL=$ADAPTIVE_GEMM_URL \
2427
--build-arg FLASH_ATTN_URL=$FLASH_ATTN_URL \
2528
--build-arg GROUPED_GEMM_URL=$GROUPED_GEMM_URL \
29+
--build-arg DEEP_EP_URL=$DEEP_EP_URL \
2630
--build-arg XTUNER_URL=$XTUNER_URL \
2731
--build-arg XTUNER_COMMIT=$XTUNER_COMMIT \
2832
--build-arg LMDEPLOY_VERSION=$LMDEPLOY_VERSION \
29-
--build-arg LMDEPLOY_URL=${LMDEPLOY_URL}\
33+
--build-arg LMDEPLOY_URL=$LMDEPLOY_URL \
34+
--build-arg SGLANG_VERSION=$SGLANG_VERSION \
3035
--progress=plain \
3136
--label "BASE_IMAGE=$BASE_IMAGE" \
3237
--label "XTUNER_URL=$XTUNER_URL" \
3338
--label "XTUNER_COMMIT=$XTUNER_COMMIT" \
3439
--label "ADAPTIVE_GEMM_URL=$ADAPTIVE_GEMM_URL" \
3540
--label "FLASH_ATTN_URL=$FLASH_ATTN_URL" \
3641
--label "GROUPED_GEMM_URL=$GROUPED_GEMM_URL" \
37-
--label "LMDEPLOY_VERSION=$LMDEPLOY_VERSION"
42+
--label "LMDEPLOY_VERSION=$LMDEPLOY_VERSION" \
43+
--label "SGLANG_VERSION=$SGLANG_VERSION"

pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ classifiers = [
2323
"Topic :: Utilities",
2424
]
2525
dependencies = [
26+
"astor",
2627
"bitsandbytes==0.45.0",
2728
"datasets<4.0.0",
2829
"einops",
@@ -36,7 +37,7 @@ dependencies = [
3637
"tiktoken",
3738
"torch>=2.6.0",
3839
"torchvision",
39-
"transformers==4.56.0",
40+
"transformers==4.57.0",
4041
"cyclopts",
4142
"transformers_stream_generator",
4243
"opencv-python-headless",
@@ -46,6 +47,7 @@ dependencies = [
4647
"imageio",
4748
"timm",
4849
"codetiming",
50+
"GitPython",
4951
]
5052

5153
[project.urls]
@@ -70,11 +72,14 @@ video = [
7072
"decord",
7173
]
7274
all = [
75+
"jsonlines",
7376
"decord",
7477
"ray[default]",
7578
"httpx",
7679
"fastapi",
7780
"uvicorn",
81+
"liger-kernel",
82+
"parametrize",
7883
]
7984

8085
[tool.mypy]

0 commit comments

Comments
 (0)