Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
linux-build:
strategy:
matrix:
pyver: [py39, py310, py311, py312, py313]
pyver: [py310, py311, py312, py313]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
windows-build:
strategy:
matrix:
pyver: ['3.9', '3.10', '3.11', '3.12', '3.13']
pyver: ['3.10', '3.11', '3.12', '3.13']
runs-on: windows-latest
steps:
- name: Set git for windows
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
cuda_version: ['cu12.8', cu12, cu11]
python_version: ['3.9', '3.10', '3.11', '3.12', '3.13']
cuda_version: [cu13, cu12]
python_version: ['3.10', '3.11', '3.12', '3.13']
env:
CUDA_VERSION: ${{ matrix.cuda_version }}
PYTHON_VERSION: ${{ matrix.python_version }}
Expand Down
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,9 @@ endif()

if(ARCH STREQUAL "x86_64")
if (NOT CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70-real 75-real) # V100, 2080
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS_EQUAL "12")
set(CMAKE_CUDA_ARCHITECTURES 70-real 75-real) # V100, 2080
endif()
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11")
list(APPEND CMAKE_CUDA_ARCHITECTURES 80-real) # A100
endif ()
Expand Down
6 changes: 6 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
ARG IMAGE_TYPE=final
ARG CUDA_VERSION=cu12

FROM nvidia/cuda:13.0.2-devel-ubuntu22.04 AS cu13
ENV CUDA_VERSION_SHORT=cu130

FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 AS cu12.8
ENV CUDA_VERSION_SHORT=cu128

Expand Down Expand Up @@ -44,6 +47,9 @@ RUN --mount=type=cache,target=/root/.cache \
docker/prepare_wheel.sh

# Runtime image
FROM nvidia/cuda:13.0.2-base-ubuntu22.04 AS cu13-base
ENV CUDA_VERSION_SHORT=cu130

FROM nvidia/cuda:12.8.1-base-ubuntu22.04 AS cu12.8-base
ENV CUDA_VERSION_SHORT=cu128

Expand Down
4 changes: 3 additions & 1 deletion docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

mkdir -p /wheels /nccl

if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then
if [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then
pip install nvidia-nccl-cu13
elif [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then
pip install nvidia-nccl-cu12
else
NVCC_GENCODE="-gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90,code=sm_90 -gencode=arch=compute_90,code=compute_90"
Expand Down
6 changes: 5 additions & 1 deletion docker/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ elif [[ "${CUDA_VERSION_SHORT}" = "cu124" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-12-4
elif [[ "${CUDA_VERSION_SHORT}" = "cu128" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-12-8
elif [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-13-0
fi

apt-get clean -y
Expand All @@ -42,7 +44,9 @@ fi

pip install -U pip wheel setuptools

if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then
if [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then
pip install nvidia-nvshmem-cu13
elif [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then
pip install nvidia-nvshmem-cu12
fi

Expand Down
8 changes: 7 additions & 1 deletion docker/prepare_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then
fi

DEEP_EP_VERSION=26cf250
pip install nvidia-nvshmem-cu12

if [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then
export CPLUS_INCLUDE_PATH="/usr/local/cuda/include/cccl":${CPLUS_INCLUDE_PATH}
pip install nvidia-nvshmem-cu13
else
pip install nvidia-nvshmem-cu12
fi

pip wheel -v --no-build-isolation --no-deps -w /wheels "git+https://github.com/deepseek-ai/DeepEP.git@${DEEP_EP_VERSION}"
pip wheel -v --no-build-isolation --no-deps -w /wheels "git+https://github.com/deepseek-ai/FlashMLA.git@${FLASH_MLA_VERSION}"
Expand Down
20 changes: 14 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,20 @@ def get_turbomind_deps():
CUDA_COMPILER = os.getenv('CUDACXX', os.getenv('CMAKE_CUDA_COMPILER', 'nvcc'))
nvcc_output = subprocess.check_output([CUDA_COMPILER, '--version'], stderr=subprocess.DEVNULL).decode()
CUDAVER, = re.search(r'release\s+(\d+).', nvcc_output).groups()
return [
f'nvidia-nccl-cu{CUDAVER}',
f'nvidia-cuda-runtime-cu{CUDAVER}',
f'nvidia-cublas-cu{CUDAVER}',
f'nvidia-curand-cu{CUDAVER}',
]
if int(CUDAVER) >= 13:
return [
f'nvidia-nccl-cu{CUDAVER}',
'nvidia-cuda-runtime',
'nvidia-cublas',
'nvidia-curand',
]
else:
return [
f'nvidia-nccl-cu{CUDAVER}',
f'nvidia-cuda-runtime-cu{CUDAVER}',
f'nvidia-cublas-cu{CUDAVER}',
f'nvidia-curand-cu{CUDAVER}',
]


def parse_requirements(fname='requirements.txt', with_version=True):
Expand Down
5 changes: 5 additions & 0 deletions src/turbomind/kernels/gemm/tma.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ namespace turbomind::gemm {

#if __CUDACC_VER_MAJOR__ >= 12

#if (CUDA_VERSION >= 13000) && (!defined(PFN_cuTensorMapEncodeTiled))
// PFN_cuTensorMapEncodeTiled not defined in cuda 13 headers.
#define PFN_cuTensorMapEncodeTiled PFN_cuTensorMapEncodeTiled_v12000
#endif

namespace {

PFN_cuTensorMapEncodeTiled get_cuTensorMapEncodeTiled()
Expand Down
27 changes: 19 additions & 8 deletions src/turbomind/python/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.

cmake_minimum_required(VERSION 3.8)
project(_turbomind)
project(_turbomind LANGUAGES CXX CUDA)

find_package(pybind11 CONFIG)
if(NOT pybind11_FOUND)
Expand All @@ -21,13 +21,24 @@ target_link_libraries(_xgrammar PRIVATE core xgrammar)
target_compile_features(_xgrammar PRIVATE cxx_std_14)

if (CALL_FROM_SETUP_PY)
set(_INSTALL_CUDA_RPATH
"\$ORIGIN"
"\$ORIGIN/../../nvidia/nccl/lib/"
"\$ORIGIN/../../nvidia/cuda_runtime/lib/"
"\$ORIGIN/../../nvidia/cublas/lib/"
"\$ORIGIN/../../nvidia/curand/lib/"
)
string(REPLACE "." ";" _ver ${CMAKE_CUDA_COMPILER_VERSION})
list(GET _ver 0 CUDA_MAJOR)

if(CUDA_MAJOR GREATER_EQUAL "13")
set(_INSTALL_CUDA_RPATH
"\$ORIGIN"
"\$ORIGIN/../../nvidia/nccl/lib/"
"\$ORIGIN/../../nvidia/cu${CUDA_MAJOR}/lib/"
)
else()
set(_INSTALL_CUDA_RPATH
"\$ORIGIN"
"\$ORIGIN/../../nvidia/nccl/lib/"
"\$ORIGIN/../../nvidia/cuda_runtime/lib/"
"\$ORIGIN/../../nvidia/cublas/lib/"
"\$ORIGIN/../../nvidia/curand/lib/"
)
endif()
set_target_properties(${PROJECT_NAME} PROPERTIES
BUILD_RPATH "\$ORIGIN"
INSTALL_RPATH "${_INSTALL_CUDA_RPATH}"
Expand Down
Loading