Skip to content

Commit b23aaa8

Browse files
authored
Merge branch 'main' into feat/mrope-stage1
2 parents 5a458d3 + 7b15b19 commit b23aaa8

File tree

150 files changed

+2832
-1361
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

150 files changed

+2832
-1361
lines changed

.github/ISSUE_TEMPLATE/bug-report.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ Steps to reproduce the behavior:
3131
- Operating system (e.g. Ubuntu/Windows/MacOS/...):
3232
- Device (e.g. iPhone 12 Pro, PC+RTX 3090, ...)
3333
- How you installed MLC-LLM (`conda`, source):
34-
- How you installed TVM-Unity (`pip`, source):
34+
- How you installed TVM (`pip`, source):
3535
- Python version (e.g. 3.10):
3636
- GPU driver version (if applicable):
3737
- CUDA/cuDNN version (if applicable):
38-
- TVM Unity Hash Tag (`python -c "import tvm; print('\n'.join(f'{k}: {v}' for k, v in tvm.support.libinfo().items()))"`, applicable if you compile models):
38+
- TVM Hash Tag (`python -c "import tvm; print('\n'.join(f'{k}: {v}' for k, v in tvm.support.libinfo().items()))"`, applicable if you compile models):
3939
- Any other relevant information:
4040

4141
## Additional context

3rdparty/tokenizers-cpp

3rdparty/tvm

Submodule tvm updated 1834 files

CMakeLists.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ if(NOT CMAKE_BUILD_TYPE)
2424
endif(NOT CMAKE_BUILD_TYPE)
2525

2626
option(MLC_HIDE_PRIVATE_SYMBOLS "Hide private symbols" ON)
27+
option(MLC_LLM_BUILD_PYTHON_MODULE "Build Python module with scikit-build-core"
28+
OFF)
2729

2830
if(MLC_LLM_INSTALL_STATIC_LIB)
2931
set(BUILD_STATIC_RUNTIME ON)
@@ -184,3 +186,32 @@ else()
184186
RUNTIME DESTINATION bin
185187
LIBRARY DESTINATION lib${LIB_SUFFIX})
186188
endif()
189+
190+
# Python package installation configuration This section ensures that all
191+
# necessary files are installed for the Python wheel
192+
if(MLC_LLM_BUILD_PYTHON_MODULE)
193+
message(STATUS "Configuring Python package installation")
194+
195+
# Set RPATH for mlc_llm and mlc_llm_module to find other libraries relatively
196+
if(APPLE)
197+
# macOS uses @loader_path
198+
set_target_properties(mlc_llm PROPERTIES INSTALL_RPATH "@loader_path")
199+
set_target_properties(mlc_llm_module PROPERTIES INSTALL_RPATH
200+
"@loader_path")
201+
elseif(LINUX)
202+
# Linux uses $ORIGIN
203+
set_target_properties(mlc_llm PROPERTIES INSTALL_RPATH "\$ORIGIN")
204+
set_target_properties(mlc_llm_module PROPERTIES INSTALL_RPATH "\$ORIGIN")
205+
endif()
206+
207+
# Install compiled shared libraries
208+
install(TARGETS mlc_llm DESTINATION ".")
209+
install(TARGETS mlc_llm_module DESTINATION ".")
210+
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/cpp/" DESTINATION "cpp/")
211+
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/web/" DESTINATION "web/")
212+
install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
213+
"${CMAKE_CURRENT_SOURCE_DIR}/LICENSE"
214+
"${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" DESTINATION ".")
215+
216+
message(STATUS "Python package installation configured")
217+
endif()

android/MLCChat/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class AppViewModel(application: Application) : AndroidViewModel(application) {
5151
companion object {
5252
const val AppConfigFilename = "mlc-app-config.json"
5353
const val ModelConfigFilename = "mlc-chat-config.json"
54-
const val ParamsConfigFilename = "ndarray-cache.json"
54+
const val ParamsConfigFilename = "tensor-cache.json"
5555
const val ModelUrlSuffix = "resolve/main/"
5656
}
5757

android/mlc4j/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ target_include_directories(
5757
PUBLIC ${JNI_INCLUDE_DIRS}
5858
${JNI_HEADER}
5959
${ANDROID_DIR}/src/cpp
60-
${TVM_SOURCE_DIR}/3rdparty/dlpack/include
60+
${TVM_SOURCE_DIR}/3rdparty/tvm-ffi/3rdparty/dlpack/include
6161
${TVM_SOURCE_DIR}/3rdparty/dmlc-core/include
6262
${TVM_SOURCE_DIR}/3rdparty/OpenCL-Headers
6363
${TVM_SOURCE_DIR}/3rdparty/picojson
6464
${TVM_SOURCE_DIR}/include
6565
${TVM_SOURCE_DIR}/src
66-
${TVM_SOURCE_DIR}/ffi/include
67-
${TVM_SOURCE_DIR}/ffi/src)
66+
${TVM_SOURCE_DIR}/3rdparty/tvm-ffi/include
67+
${TVM_SOURCE_DIR}/3rdparty/tvm-ffi/src)
6868
target_compile_definitions(tvm4j_runtime_packed PUBLIC ${MLC_LLM_COMPILE_DEFS})
6969
target_compile_definitions(
7070
tvm4j_runtime_packed

android/mlc4j/src/cpp/tvm_runtime.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,37 +6,41 @@
66
#include <dmlc/logging.h>
77
#include <dmlc/thread_local.h>
88

9+
#include <ffi/backtrace.cc>
910
#include <ffi/container.cc>
1011
#include <ffi/dtype.cc>
1112
#include <ffi/error.cc>
13+
#include <ffi/extra/env_c_api.cc>
14+
#include <ffi/extra/env_context.cc>
15+
#include <ffi/extra/library_module.cc>
16+
#include <ffi/extra/library_module_dynamic_lib.cc>
17+
#include <ffi/extra/library_module_system_lib.cc>
18+
#include <ffi/extra/module.cc>
1219
#include <ffi/function.cc>
1320
#include <ffi/object.cc>
14-
#include <ffi/traceback.cc>
1521
#include <runtime/cpu_device_api.cc>
1622
#include <runtime/device_api.cc>
1723
#include <runtime/file_utils.cc>
18-
#include <runtime/library_module.cc>
1924
#include <runtime/logging.cc>
2025
#include <runtime/memory/memory_manager.cc>
2126
#include <runtime/module.cc>
22-
#include <runtime/ndarray.cc>
2327
#include <runtime/nvtx.cc>
2428
#include <runtime/opencl/opencl_device_api.cc>
2529
#include <runtime/opencl/opencl_module.cc>
2630
#include <runtime/opencl/opencl_wrapper/opencl_wrapper.cc>
2731
#include <runtime/profiling.cc>
2832
#include <runtime/source_utils.cc>
29-
#include <runtime/system_library.cc>
33+
#include <runtime/tensor.cc>
3034
#include <runtime/thread_pool.cc>
3135
#include <runtime/threading_backend.cc>
3236
#include <runtime/vm/attn_backend.cc>
3337
#include <runtime/vm/builtin.cc>
3438
#include <runtime/vm/bytecode.cc>
3539
#include <runtime/vm/executable.cc>
3640
#include <runtime/vm/kv_state.cc>
37-
#include <runtime/vm/ndarray_cache_support.cc>
3841
#include <runtime/vm/paged_kv_cache.cc>
3942
#include <runtime/vm/rnn_state.cc>
43+
#include <runtime/vm/tensor_cache_support.cc>
4044
#include <runtime/vm/vm.cc>
4145
#include <runtime/workspace_pool.cc>
4246

ci/jenkinsfile.groovy

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ stage('Build') {
125125
sh(script: "ls -alh", label: 'Show work directory')
126126
sh(script: "${pkg_cuda} conda env export --name py312", label: 'Checkout version')
127127
sh(script: "${pkg_cuda} -j 8 -v \$HOME/.ccache /ccache conda run -n py312 ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
128-
sh(script: "${pkg_cuda} -j 8 conda run -n py312 ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
129128
sh(script: "${pkg_cuda} -j 1 conda run -n py312 ./ci/task/build_clean.sh", label: 'Clean up after build')
130129
sh(script: "ls -alh ./wheels/", label: 'Build artifact')
131130
pack_lib('mlc_wheel_cuda', 'wheels/*.whl')
@@ -139,7 +138,6 @@ stage('Build') {
139138
// sh(script: "ls -alh", label: 'Show work directory')
140139
// sh(script: "${pkg_rocm} conda env export --name py38", label: 'Checkout version')
141140
// sh(script: "${pkg_rocm} -j 8 conda run -n py38 ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
142-
// sh(script: "${pkg_rocm} -j 8 conda run -n py38 ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
143141
// sh(script: "${pkg_rocm} -j 1 conda run -n py38 ./ci/task/build_clean.sh", label: 'Clean up after build')
144142
// sh(script: "ls -alh ./wheels/", label: 'Build artifact')
145143
// pack_lib('mlc_wheel_rocm', 'wheels/*.whl')
@@ -153,7 +151,6 @@ stage('Build') {
153151
sh(script: "ls -alh", label: 'Show work directory')
154152
sh(script: "conda env export --name mlc-llm-ci", label: 'Checkout version')
155153
sh(script: "NUM_THREADS=6 GPU=metal conda run -n mlc-llm-ci ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
156-
sh(script: "NUM_THREADS=6 GPU=metal conda run -n mlc-llm-ci ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
157154
sh(script: "NUM_THREADS=6 GPU=metal conda run -n mlc-llm-ci ./ci/task/build_clean.sh", label: 'Clean up after build')
158155
sh(script: "ls -alh ./wheels/", label: 'Build artifact')
159156
pack_lib('mlc_wheel_metal', 'wheels/*.whl')
@@ -167,7 +164,6 @@ stage('Build') {
167164
sh(script: "ls -alh", label: 'Show work directory')
168165
sh(script: "${pkg_cpu} conda env export --name py312", label: 'Checkout version')
169166
sh(script: "${pkg_cpu} -j 8 conda run -n py312 ./ci/task/build_lib.sh", label: 'Build MLC LLM runtime')
170-
sh(script: "${pkg_cpu} -j 8 conda run -n py312 ./ci/task/build_wheel.sh", label: 'Build MLC LLM wheel')
171167
sh(script: "${pkg_cpu} -j 1 conda run -n py312 ./ci/task/build_clean.sh", label: 'Clean up after build')
172168
sh(script: "ls -alh ./wheels/", label: 'Build artifact')
173169
pack_lib('mlc_wheel_vulkan', 'wheels/*.whl')

ci/task/build_clean.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,4 @@ set -x
66
: ${GPU:="cpu"}
77

88
rm -rf ${WORKSPACE_CWD}/build/ \
9-
${WORKSPACE_CWD}/python/dist/ \
10-
${WORKSPACE_CWD}/python/build/ \
11-
${WORKSPACE_CWD}/python/mlc_llm.egg-info
9+
${WORKSPACE_CWD}/dist/

ci/task/build_lib.sh

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,18 @@ export CCACHE_NOHASHDIR=1
99
export CCACHE_DIR=/ccache
1010

1111
# Temporary workaround to install ccache.
12+
if [[ ${GPU} != metal ]]; then
13+
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main
14+
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
15+
fi
1216
conda install -c conda-forge ccache
1317

1418
if [[ ${GPU} != metal ]]; then
1519
source /multibuild/manylinux_utils.sh
1620
source /opt/rh/gcc-toolset-11/enable # GCC-11 is the hightest GCC version compatible with NVCC < 12
1721
fi
1822

19-
mkdir -p $WORKSPACE_CWD/build/ && cd $WORKSPACE_CWD/build/
23+
mkdir -p $WORKSPACE_CWD/build
2024
if [[ ${GPU} == rocm* ]]; then
2125
echo set\(USE_VULKAN ON\) >>config.cmake
2226
echo set\(USE_ROCM ON\) >>config.cmake
@@ -29,17 +33,6 @@ elif [[ ${GPU} == cuda* ]]; then
2933
echo set\(USE_CUDA ON\) >>config.cmake
3034
echo set\(USE_CUBLAS ON\) >>config.cmake
3135
echo set\(USE_NCCL ON\) >>config.cmake
32-
echo set\(USE_FLASHINFER ON\) >>config.cmake
33-
echo set\(FLASHINFER_ENABLE_FP8 OFF\) >>config.cmake
34-
echo set\(FLASHINFER_ENABLE_BF16 OFF\) >>config.cmake
35-
echo set\(FLASHINFER_GEN_GROUP_SIZES 1 4 6 8\) >>config.cmake
36-
echo set\(FLASHINFER_GEN_PAGE_SIZES 16\) >>config.cmake
37-
echo set\(FLASHINFER_GEN_HEAD_DIMS 128\) >>config.cmake
38-
echo set\(FLASHINFER_GEN_KV_LAYOUTS 0 1\) >>config.cmake
39-
echo set\(FLASHINFER_GEN_POS_ENCODING_MODES 0 1\) >>config.cmake
40-
echo set\(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false"\) >>config.cmake
41-
echo set\(FLASHINFER_GEN_CASUALS "false" "true"\) >>config.cmake
42-
echo set\(USE_CUTLASS ON\) >>config.cmake
4336
elif [[ ${GPU} == metal ]]; then
4437
export CCACHE_DIR=$HOME/ci/ccache
4538
echo set\(USE_METAL ON\) >>config.cmake
@@ -49,4 +42,26 @@ fi
4942

5043
cat config.cmake
5144

52-
cmake -DCMAKE_POLICY_VERSION_MINIMUM=3.5 .. && make -j${NUM_THREADS}
45+
AUDITWHEEL_OPTS="--plat ${AUDITWHEEL_PLAT} -w repaired_wheels/"
46+
AUDITWHEEL_OPTS="--exclude libtvm --exclude libtvm_runtime --exclude libtvm_ffi --exclude libvulkan ${AUDITWHEEL_OPTS}"
47+
if [[ ${GPU} == rocm* ]]; then
48+
AUDITWHEEL_OPTS="--exclude libamdhip64 --exclude libhsa-runtime64 --exclude librocm_smi64 --exclude librccl ${AUDITWHEEL_OPTS}"
49+
elif [[ ${GPU} == cuda* ]]; then
50+
AUDITWHEEL_OPTS="--exclude libcuda --exclude libcudart --exclude libnvrtc --exclude libcublas --exclude libcublasLt ${AUDITWHEEL_OPTS}"
51+
fi
52+
53+
rm -rf ${WORKSPACE_CWD}/dist
54+
cd ${WORKSPACE_CWD} && pip wheel --no-deps -w dist . -v
55+
56+
rm -rf ${WORKSPACE_CWD}/wheels/
57+
if [[ ${GPU} != metal ]]; then
58+
mkdir -p ${WORKSPACE_CWD}/repaired_wheels
59+
rm -rf ${WORKSPACE_CWD}/repaired_wheels/*
60+
auditwheel repair ${AUDITWHEEL_OPTS} dist/*.whl
61+
mv ${WORKSPACE_CWD}/repaired_wheels/ ${WORKSPACE_CWD}/wheels/
62+
else
63+
mkdir ${WORKSPACE_CWD}/wheels/
64+
mv dist/*.whl ${WORKSPACE_CWD}/wheels/
65+
fi
66+
67+
chown -R $ENV_USER_ID:$ENV_GROUP_ID ${WORKSPACE_CWD}/wheels/

0 commit comments

Comments
 (0)