11# syntax=docker/dockerfile:1.10.0
22# builder
3- ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.01 -py3
3+ ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.03 -py3
44
55# # build args
66FROM ${BASE_IMAGE} AS setup_env
77
8- ARG CODESPACE=/root/codespace
9-
10- ARG FLASH_ATTN_DIR=/tmp/flash-attn
11- ARG FLASH_ATTN3_DIR=/tmp/flash-attn3
12- ARG ADAPTIVE_GEMM_DIR=/tmp/adaptive_gemm
13- ARG GROUPED_GEMM_DIR=/tmp/grouped_gemm
14-
158ARG TORCH_VERSION
16-
179ARG PPA_SOURCE
1810
19- RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
20- RUN if [ -n "${TORCH_VERSION}" ]; then \
21- pip install torchvision torch==${TORCH_VERSION} --index-url https://download.pytorch.org/whl/cu126 --no-cache-dir; \
22- fi
23-
24- # set reasonable default for CUDA architectures when building ngc image
25- ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
26-
27- RUN sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
11+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
12+ sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
2813 apt update && \
2914 apt install --no-install-recommends ca-certificates -y && \
3015 apt install --no-install-recommends bc wget -y && \
3116 apt install --no-install-recommends build-essential sudo -y && \
3217 apt install --no-install-recommends git curl pkg-config tree unzip tmux \
33- openssh-server openssh-client nmap dnsutils iproute2 lsof net-tools -y && \
18+ openssh-server openssh-client dnsutils iproute2 lsof net-tools zsh rclone -y && \
3419 apt clean && rm -rf /var/lib/apt/lists/*
3520
36- RUN pip uninstall flash_attn -y
21+ RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
22+ RUN pip install pystack py-spy --no-cache-dir
23+ RUN git config --system --add safe.directory "*"
24+
25+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
26+ if [ -n "${TORCH_VERSION}" ]; then \
27+ pip install torchvision torch==${TORCH_VERSION} \
28+ --index-url https://download.pytorch.org/whl/cu128 \
29+ --extra-index-url https://download.pytorch.org/whl/cu126 \
30+ --no-cache-dir; \
31+ fi
32+
33+ # set reasonable default for CUDA architectures when building ngc image
34+ ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
35+
36+ RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
37+
38+ ARG FLASH_ATTN_DIR=/tmp/flash-attn
39+ ARG CODESPACE=/root/codespace
40+ ARG FLASH_ATTN3_DIR=/tmp/flash-attn3
41+ ARG ADAPTIVE_GEMM_DIR=/tmp/adaptive_gemm
42+ ARG GROUPED_GEMM_DIR=/tmp/grouped_gemm
43+ ARG DEEP_EP_DIR=/tmp/deep_ep
44+ ARG NVSHMEM_WHL_DIR=/tmp/nvshmem
45+
46+ RUN mkdir -p $CODESPACE
47+ WORKDIR ${CODESPACE}
3748
3849# compile flash-attn
3950FROM setup_env AS flash_attn
@@ -43,16 +54,14 @@ ARG FLASH_ATTN_DIR
4354ARG FLASH_ATTN3_DIR
4455ARG FLASH_ATTN_URL
4556
46- RUN mkdir -p $CODESPACE
47- WORKDIR ${CODESPACE}
48-
49- RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
57+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
58+ git clone $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
5059 cd ${CODESPACE}/flash-attention && \
51- git checkout $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 2)
60+ git checkout $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 2) && \
61+ git submodule update --init --recursive --force
5262
5363WORKDIR ${CODESPACE}/flash-attention
5464
55- RUN git submodule update --init --recursive --force
5665RUN cd hopper && FLASH_ATTENTION_FORCE_BUILD=TRUE pip wheel -w ${FLASH_ATTN3_DIR} -v --no-deps .
5766RUN FLASH_ATTENTION_FORCE_BUILD=TRUE pip wheel -w ${FLASH_ATTN_DIR} -v --no-deps .
5867
@@ -63,16 +72,14 @@ ARG CODESPACE
6372ARG ADAPTIVE_GEMM_DIR
6473ARG ADAPTIVE_GEMM_URL
6574
66- RUN mkdir -p $CODESPACE
67- WORKDIR ${CODESPACE}
68-
69- RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 1) && \
75+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
76+ git clone $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 1) && \
7077 cd ${CODESPACE}/AdaptiveGEMM && \
71- git checkout $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 2)
78+ git checkout $(echo ${ADAPTIVE_GEMM_URL} | cut -d '@' -f 2) && \
79+ git submodule update --init --recursive --force
7280
7381WORKDIR ${CODESPACE}/AdaptiveGEMM
7482
75- RUN git submodule update --init --recursive --force
7683RUN pip wheel -w ${ADAPTIVE_GEMM_DIR} -v --no-deps .
7784
7885# compile grouped_gemm(permute and unpermute)
@@ -82,18 +89,37 @@ ARG CODESPACE
8289ARG GROUPED_GEMM_DIR
8390ARG GROUPED_GEMM_URL
8491
85- RUN mkdir -p $CODESPACE
86- WORKDIR ${CODESPACE}
87-
88- RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 1) && \
92+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
93+ git clone $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 1) && \
8994 cd ${CODESPACE}/GroupedGEMM && \
90- git checkout $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 2)
95+ git checkout $(echo ${GROUPED_GEMM_URL} | cut -d '@' -f 2) && \
96+ git submodule update --init --recursive --force
9197
9298WORKDIR ${CODESPACE}/GroupedGEMM
9399
94- RUN git submodule update --init --recursive --force
95100RUN pip wheel -w ${GROUPED_GEMM_DIR} -v --no-deps .
96101
102+ # pypi install nvshmem and compile deepep
103+ FROM setup_env AS deep_ep
104+
105+ ARG CODESPACE
106+ ARG DEEP_EP_DIR
107+ ARG DEEP_EP_URL
108+ ARG NVSHMEM_WHL_DIR
109+ # build sm90 and sm100 for deep_ep for now
110+ ARG TORCH_CUDA_ARCH_LIST="9.0 10.0"
111+
112+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
113+ pip wheel -w ${NVSHMEM_WHL_DIR} -v "nvidia-nvshmem-cu12>=3.4.5" && \
114+ pip install ${NVSHMEM_WHL_DIR}/*.whl && \
115+ git clone $(echo ${DEEP_EP_URL} | cut -d '@' -f 1) && \
116+ cd ${CODESPACE}/DeepEP && \
117+ git checkout $(echo ${DEEP_EP_URL} | cut -d '@' -f 2) && \
118+ git submodule update --init --recursive --force
119+
120+ WORKDIR ${CODESPACE}/DeepEP
121+
122+ RUN pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
97123
98124# integration xtuner
99125FROM setup_env AS xtuner_dev
@@ -105,55 +131,64 @@ ARG FLASH_ATTN_DIR
105131ARG FLASH_ATTN3_DIR
106132ARG ADAPTIVE_GEMM_DIR
107133ARG GROUPED_GEMM_DIR
134+ ARG DEEP_EP_DIR
135+ ARG NVSHMEM_WHL_DIR
108136
109137COPY --from=flash_attn ${FLASH_ATTN3_DIR} ${FLASH_ATTN3_DIR}
110138COPY --from=flash_attn ${FLASH_ATTN_DIR} ${FLASH_ATTN_DIR}
111139COPY --from=adaptive_gemm ${ADAPTIVE_GEMM_DIR} ${ADAPTIVE_GEMM_DIR}
112140COPY --from=grouped_gemm ${GROUPED_GEMM_DIR} ${GROUPED_GEMM_DIR}
141+ COPY --from=deep_ep ${DEEP_EP_DIR} ${DEEP_EP_DIR}
142+ COPY --from=deep_ep ${NVSHMEM_WHL_DIR} ${NVSHMEM_WHL_DIR}
113143
114144RUN unzip ${FLASH_ATTN_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
115145RUN unzip ${FLASH_ATTN3_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
116146RUN unzip ${ADAPTIVE_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
117147RUN unzip ${GROUPED_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
148+ RUN unzip ${DEEP_EP_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
149+ RUN unzip ${NVSHMEM_WHL_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
118150
119- ARG XTUNER_URL
120- ARG XTUNER_COMMIT
121- ARG LMDEPLOY_VERSION
122- ARG LMDEPLOY_URL
123-
124- # # install xtuner
125- RUN mkdir -p $CODESPACE
126- WORKDIR ${CODESPACE}
127-
128- # RUN git clone -c https.proxy=$HTTPS_PROXY $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
129- # cd ${CODESPACE}/xtuner && \
130- # git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
131- COPY . ${CODESPACE}/xtuner
132-
133- WORKDIR ${CODESPACE}/xtuner
134- RUN export HTTPS_PROXY=$HTTPS_PROXY \
135- && export https_proxy=$HTTPS_PROXY \
136- && pip install liger-kernel parametrize --no-cache-dir \
137- && pip install . -v --no-cache-dir
151+ # install sglang and its runtime requirements
152+ ARG SGLANG_VERSION
138153
139- RUN pip install pystack py-spy --no-cache-dir
140- RUN git config --system --add safe.directory "*"
154+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
155+ pip install sglang==${SGLANG_VERSION} sgl_kernel pybase64 orjson uvloop setproctitle msgspec \
156+ compressed_tensors python-multipart torch_memory_saver \
157+ grpcio-tools==1.75.1 hf_transfer interegular llguidance==0.7.11 \
158+ xgrammar==0.1.24 blobfile==3.0.0 flashinfer_python==0.4.0 --no-cache-dir --no-deps
141159
142160# install lmdeploy and its missing runtime requirements
143- RUN pip install fastapi fire openai outlines \
144- partial_json_parser ray[default] shortuuid uvicorn \
145- 'numpy<2.0.0' \
146- python-sat[aiger,approxmc,cryptosat,pblib] distance Faker --no-cache-dir
147- WORKDIR ${CODESPACE}
148- RUN if [ -n "${LMDEPLOY_VERSION}" ]; then \
161+ ARG LMDEPLOY_VERSION
162+ ARG LMDEPLOY_URL
163+
164+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
165+ pip install fastapi fire openai outlines \
166+ partial_json_parser ray[default] shortuuid uvicorn \
167+ 'pydantic>2' openai_harmony --no-cache-dir && \
168+ if [ -n "${LMDEPLOY_VERSION}" ]; then \
149169 pip install lmdeploy==${LMDEPLOY_VERSION} --no-deps --no-cache-dir; \
150170 else \
151- git clone -c https.proxy=$HTTPS_PROXY $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
171+ git clone $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
152172 cd ${CODESPACE}/lmdeploy && \
153173 git checkout $(echo ${LMDEPLOY_URL} | cut -d '@' -f 2) && \
154174 pip install . -v --no-deps --no-cache-dir; \
155175 fi
156176
177+ # # install xtuner
178+ ARG XTUNER_URL
179+ ARG XTUNER_COMMIT
180+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
181+ # git clone $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
182+ # cd ${CODESPACE}/xtuner && \
183+ # git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
184+ COPY . ${CODESPACE}/xtuner
185+
186+ WORKDIR ${CODESPACE}/xtuner
187+ RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
188+ pip install .[all] -v --no-cache-dir
189+
190+ WORKDIR ${CODESPACE}
191+
157192# setup sysctl
158193RUN echo "fs.file-max=100000" >> /etc/sysctl.conf
159194RUN sysctl -p
0 commit comments