diff --git a/.github/workflows/build_linux_arm64_wheels-gh.yml b/.github/workflows/build_linux_arm64_wheels-gh.yml index 7fa9fdf5fc2..9c9ee0bcbef 100644 --- a/.github/workflows/build_linux_arm64_wheels-gh.yml +++ b/.github/workflows/build_linux_arm64_wheels-gh.yml @@ -23,7 +23,7 @@ on: jobs: build_universal_wheel: - name: Build Universal Wheel (Linux ARM64) + name: Build Universal Wheel (Linux arm64) runs-on: GH-Linux-ARM64 if: ${{ !github.event.pull_request.draft }} steps: diff --git a/.github/workflows/build_linux_x86_wheels.yml b/.github/workflows/build_linux_x86_wheels.yml index 744ef0e0795..63789e89a41 100644 --- a/.github/workflows/build_linux_x86_wheels.yml +++ b/.github/workflows/build_linux_x86_wheels.yml @@ -1,4 +1,4 @@ -name: Build Linux X86 +name: Build Linux x86_64 on: workflow_dispatch: diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml index 04cf1b2e4b6..b11389c2a75 100644 --- a/.github/workflows/build_macos_arm64_wheels.yml +++ b/.github/workflows/build_macos_arm64_wheels.yml @@ -21,10 +21,119 @@ on: - '**/*.md' jobs: - build_universal_wheel: - name: Build Universal Wheel (macOS ARM64) + build_universal_wheel_on_linux: + name: Build on Linux (cross-compile for macOS arm64) + runs-on: GH-Linux-ARM64 + if: ${{ !github.event.pull_request.draft }} + timeout-minutes: 600 + steps: + - name: Install Python build dependencies + run: | + sudo apt-get update + sudo apt-get install -y make build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \ + libffi-dev liblzma-dev p7zip-full + - name: Upgrade Rust toolchain + run: | + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version + - name: Install clang++ for Ubuntu + run: | + pwd + uname -a + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 19 + which clang++-19 + clang++-19 --version + sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget + # Install WebAssembly linker (wasm-ld) + sudo apt-get install -y lld-19 + # Create symlink for wasm-ld + if ! command -v wasm-ld &> /dev/null; then + sudo ln -sf /usr/bin/wasm-ld-19 /usr/bin/wasm-ld || true + fi + which wasm-ld || echo "wasm-ld not found in PATH" + ccache -s + - name: Update git + run: | + sudo add-apt-repository ppa:git-core/ppa -y + sudo apt-get update + sudo apt-get install -y git + git --version + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Update submodules + run: | + git submodule update --init --recursive --jobs 4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ubuntu-24.04-aarch64-cross-compile + max-size: 5G + append-timestamp: true + - name: remove old clang and link clang-19 to clang + run: | + sudo rm -f /usr/bin/clang || true + sudo ln -s /usr/bin/clang-19 /usr/bin/clang + sudo rm -f /usr/bin/clang++ || true + sudo ln -s /usr/bin/clang++-19 /usr/bin/clang++ + which clang++ + clang++ --version + - name: Run chdb/build_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build_mac_on_linux.sh arm64 + continue-on-error: false + - name: Run chdb/build/build_static_lib_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build/build_static_lib_mac_on_linux.sh arm64 + continue-on-error: false + - name: Check ccache statistics + run: | + ccache -s + ls -lh chdb + df -h + - name: Keep killall ccache and wait for ccache to finish + if: always() + run: | + sleep 60 + while ps -ef | grep ccache | grep -v grep; do \ + killall ccache; \ + sleep 10; \ + done + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: macos-arm64-build-artifacts + path: | + ./libchdb.so + ./libchdb.a + ./chdb/_chdb.abi3.so + ./chdb/libpybind11nonlimitedapi_stubs.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.8.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.9.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.10.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.11.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.12.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.13.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.14.dylib + retention-days: 1 + + test_on_macos: + name: Test on macOS arm64 runs-on: macos-14-xlarge + needs: build_universal_wheel_on_linux if: ${{ !github.event.pull_request.draft }} + timeout-minutes: 600 steps: - name: Check machine architecture run: | @@ -39,17 +148,6 @@ jobs: else echo "This is an x86_64 (Intel) machine" fi - - name: Free up disk space (Initial) - run: | - # Clean Homebrew cache - brew cleanup -s 2>/dev/null || true - rm -rf "$(brew --cache)" 2>/dev/null || true - sudo rm -rf ~/Library/Developer/Xcode/DerivedData 2>/dev/null || true - sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode 2>/dev/null || true - sudo rm -rf /Users/runner/Library/Android 2>/dev/null || true - sudo rm -rf /tmp/* 2>/dev/null || true - echo "=== Disk usage after cleanup ===" - df -h - name: Setup pyenv run: | curl https://pyenv.run | bash @@ -97,39 +195,14 @@ jobs: - name: Remove /usr/local/bin/python3 run: | sudo rm -f /usr/local/bin/python3 - - name: Install clang++ for macOS + - name: Install go for macOS run: | - pwd - uname -a - export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 brew update - brew install ca-certificates lz4 mpdecimal readline sqlite xz z3 zstd - brew install openssl@3 || echo "OpenSSL install failed, continuing..." - brew install --ignore-dependencies llvm@19 - brew install git ninja libtool gettext binutils grep findutils nasm lld@19 libiconv - brew install ccache || echo "ccache installation failed, continuing without it" brew install go - cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:$PATH - which clang++ - clang++ --version - which wasm-ld || echo "wasm-ld not found in PATH" - which go go version - ccache -s | echo "ccache not available yet" - - name: Upgrade Rust toolchain - run: | - rustup toolchain install nightly-2025-07-07 - rustup default nightly-2025-07-07 - rustup component add rust-src - rustc --version - cargo --version - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Update submodules - run: | - git submodule update --init --recursive --jobs 4 - name: Update version for release if: startsWith(github.ref, 'refs/tags/v') run: | @@ -143,89 +216,34 @@ jobs: bump-my-version replace --new-version $TAG_NAME echo "Version files updated to $TAG_NAME" pyenv shell --unset - - name: Free up disk space (Before compilation) - run: | - echo "=== Disk usage before compilation cleanup ===" - df -h - brew cleanup -s 2>/dev/null || true - rm -rf "$(brew --cache)" 2>/dev/null || true - rm -rf ~/.cache/pip 2>/dev/null || true - rm -rf ~/.pyenv/.cache 2>/dev/null || true - rm -rf ~/.cargo/registry/cache 2>/dev/null || true - echo "=== Disk usage after cleanup ===" - df -h - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 + - name: Download build artifacts + uses: actions/download-artifact@v4 with: - key: ${{ runner.os }}-arm64-ccache - max-size: 5G - append-timestamp: true - - name: Run chdb/build.sh + name: macos-arm64-build-artifacts + path: ./artifacts + - name: Restore artifacts to original paths + run: | + mv ./artifacts/libchdb.so ./ + mv ./artifacts/libchdb.a ./ + mv ./artifacts/chdb/_chdb.abi3.so ./chdb/ + mv ./artifacts/chdb/libpybind11nonlimitedapi_stubs.dylib ./chdb/ + for v in 8 9 10 11 12 13 14; do + mv ./artifacts/chdb/libpybind11nonlimitedapi_chdb_3.${v}.dylib ./chdb/ + done + ls -lh ./libchdb.so ./libchdb.a + ls -lh ./chdb/*.so ./chdb/*.dylib + - name: Run chdb/test_smoke.sh timeout-minutes: 600 run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - source ~/.cargo/env pyenv shell 3.8 - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ bash gen_manifest.sh - bash ./chdb/build.sh - pyenv shell 3.8 - bash -x ./chdb/test_smoke.sh - - name: Run chdb/build/build_static_lib.sh + bash -x ./chdb/test_smoke.sh cross-compile + - name: Run chdb/build/test_go_example.sh timeout-minutes: 600 run: | - export PATH=$HOME/.pyenv/bin:$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ - source ~/.cargo/env - eval "$(pyenv init -)" - pyenv shell 3.8 - bash ./chdb/build/build_static_lib.sh - pyenv shell --unset - continue-on-error: false - - name: Debug libchdb - run: | - ls -lh - llvm-nm libchdb.so | grep query_stable || true - echo "Global Symbol in libchdb.so:" - llvm-nm -g libchdb.so || true - echo "Global Symbol in libclickhouse-local-chdb.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-chdb.a || true - echo "Global Symbol in libclickhouse-local-lib.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-lib.a || true - echo "pychdb_cmd.sh:" - cat buildlib/pychdb_cmd.sh - echo "libchdb_cmd.sh:" - cat buildlib/libchdb_cmd.sh - - name: Scan chdb libraries with grype - run: | - echo "Scanning chdb libraries for vulnerabilities..." - # Files to scan - FILES_TO_SCAN="" - [ -f libchdb.so ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.so" - [ -f libchdb.a ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.a" - FILES_TO_SCAN="$FILES_TO_SCAN $(find chdb/ \( -name "*.dylib" -o -name "*.so" \) 2>/dev/null || true)" - SQLITE_VULNERABILITIES_FOUND=false - for file in $FILES_TO_SCAN; do - if [ -f "$file" ]; then - echo "=== Scanning $file ===" - SCAN_OUTPUT=$(grype "$file" 2>/dev/null || true) - echo "$SCAN_OUTPUT" - if echo "$SCAN_OUTPUT" | grep -qi sqlite; then - echo "❌ SQLite vulnerability found in $file" - SQLITE_VULNERABILITIES_FOUND=true - fi - fi - done - if [ "$SQLITE_VULNERABILITIES_FOUND" = true ]; then - echo "❌ SQLite vulnerabilities detected in chdb libraries!" - exit 1 - else - echo "✅ No SQLite vulnerabilities found in chdb libraries" - fi + bash ./chdb/build/test_go_example.sh ${{ github.workspace }}/libchdb.a continue-on-error: false - name: Run libchdb stub in examples dir run: | @@ -236,9 +254,6 @@ jobs: rm -rf chdb/build/ export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ pyenv shell 3.8 make wheel - name: Fix wheel platform tag @@ -257,12 +272,6 @@ jobs: sudo sysctl kern.corefile=$PWD/tmp/core/core.%P sudo sysctl kern.coredump=1 ulimit -c unlimited - - name: Free up disk space - run: | - # Clean more build artifacts - rm -rf buildlib/contrib 2>/dev/null || true - rm -rf buildlib/base 2>/dev/null || true - rm -rf buildlib/src 2>/dev/null || true - name: Test wheel on all Python versions run: | ulimit -c unlimited @@ -282,7 +291,7 @@ jobs: run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - pyenv shell 3.8 + pyenv shell 3.9 python -m pip install dist/*.whl --force-reinstall jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb pyenv shell --unset @@ -300,14 +309,6 @@ jobs: echo "No core files found in tmp/core" fi continue-on-error: true - - name: Keep killall ccache and wait for ccache to finish - if: always() - run: | - sleep 60 - while ps -ef | grep ccache | grep -v grep; do \ - killall ccache; \ - sleep 10; \ - done - name: Upload core files artifact if: always() && env.CORE_FILES_FOUND == 'true' uses: actions/upload-artifact@v4 diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml index 1791a9fefa5..1a6577cf50c 100644 --- a/.github/workflows/build_macos_x86_wheels.yml +++ b/.github/workflows/build_macos_x86_wheels.yml @@ -1,4 +1,4 @@ -name: Build macOS X86 +name: Build macOS x86_64 on: workflow_dispatch: @@ -21,9 +21,117 @@ on: - '**/*.md' jobs: - build_universal_wheel: - name: Build Universal Wheel (macOS x86_64) + build_universal_wheel_on_linux: + name: Build on Linux (cross-compile for macOS x86_64) + runs-on: gh-64c + if: ${{ !github.event.pull_request.draft }} + timeout-minutes: 600 + steps: + - name: Install Python build dependencies + run: | + sudo apt-get update + sudo apt-get install -y make build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \ + libffi-dev liblzma-dev p7zip-full + - name: Upgrade Rust toolchain + run: | + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version + - name: Install clang++ for Ubuntu + run: | + pwd + uname -a + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 19 + which clang++-19 + clang++-19 --version + sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget + # Install WebAssembly linker (wasm-ld) + sudo apt-get install -y lld-19 + # Create symlink for wasm-ld + if ! command -v wasm-ld &> /dev/null; then + sudo ln -sf /usr/bin/wasm-ld-19 /usr/bin/wasm-ld || true + fi + which wasm-ld || echo "wasm-ld not found in PATH" + ccache -s + - name: Update git + run: | + sudo add-apt-repository ppa:git-core/ppa -y + sudo apt-get update + sudo apt-get install -y git + git --version + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Update submodules + run: | + git submodule update --init --recursive --jobs 4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ubuntu-22.04-x86_64-cross-compile + max-size: 5G + append-timestamp: true + - name: remove old clang and link clang-19 to clang + run: | + sudo rm -f /usr/bin/clang || true + sudo ln -s /usr/bin/clang-19 /usr/bin/clang + sudo rm -f /usr/bin/clang++ || true + sudo ln -s /usr/bin/clang++-19 /usr/bin/clang++ + which clang++ + clang++ --version + - name: Run chdb/build_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build_mac_on_linux.sh x86_64 + continue-on-error: false + - name: Run chdb/build/build_static_lib_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build/build_static_lib_mac_on_linux.sh x86_64 + continue-on-error: false + - name: Check ccache statistics + run: | + ccache -s + ls -lh chdb + df -h + - name: Keep killall ccache and wait for ccache to finish + if: always() + run: | + sleep 60 + while ps -ef | grep ccache | grep -v grep; do \ + killall ccache; \ + sleep 10; \ + done + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: macos-x86_64-build-artifacts + path: | + ./libchdb.so + ./libchdb.a + ./chdb/_chdb.abi3.so + ./chdb/libpybind11nonlimitedapi_stubs.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.8.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.9.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.10.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.11.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.12.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.13.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.14.dylib + retention-days: 1 + + test_on_macos: + name: Test on macOS x86_64 runs-on: macos-15-intel + needs: build_universal_wheel_on_linux if: ${{ !github.event.pull_request.draft }} timeout-minutes: 600 steps: @@ -86,58 +194,14 @@ jobs: - name: Remove /usr/local/bin/python3 run: | sudo rm -f /usr/local/bin/python3 - - name: Install clang++ for macOS + - name: Install go for macOS run: | - pwd - uname -a - export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 brew update - brew install ca-certificates lz4 mpdecimal openssl@3 readline sqlite xz z3 zstd - brew install --ignore-dependencies llvm@19 - brew install git ninja libtool gettext gcc binutils grep findutils nasm lld@19 libiconv - brew install ccache || echo "ccache installation failed, continuing without it" brew install go - cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:$PATH - which clang++ - clang++ --version - which go go version - ccache -s || echo "ccache not available yet" - - name: Scan SQLite vulnerabilities with grype - run: | - # Install grype - curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin - # Update grype vulnerability database - grype db update - # Check SQLite vulnerabilities in Homebrew packages - echo "Scanning SQLite packages for vulnerabilities..." - GRYPE_RAW_OUTPUT=$(grype dir:/opt/homebrew --scope all-layers 2>/dev/null || true) - echo "Raw grype output:" - echo "$GRYPE_RAW_OUTPUT" - SQLITE_SCAN_OUTPUT=$(echo "$GRYPE_RAW_OUTPUT" | grep -i sqlite || true) - if [ -n "$SQLITE_SCAN_OUTPUT" ]; then - echo "❌ SQLite vulnerabilities found in packages! Build should be reviewed." - echo "SQLite vulnerability details:" - echo "$SQLITE_SCAN_OUTPUT" - exit 1 - else - echo "✅ No SQLite vulnerabilities found" - fi - continue-on-error: false - - name: Upgrade Rust toolchain - run: | - rustup toolchain install nightly-2025-07-07 - rustup default nightly-2025-07-07 - rustup component add rust-src - rustc --version - cargo --version - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Update submodules - run: | - git submodule update --init --recursive --jobs 4 - name: Update version for release if: startsWith(github.ref, 'refs/tags/v') run: | @@ -151,82 +215,35 @@ jobs: bump-my-version replace --new-version $TAG_NAME echo "Version files updated to $TAG_NAME" pyenv shell --unset - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 + - name: Download build artifacts + uses: actions/download-artifact@v4 with: - key: ${{ runner.os }}-x86_64-ccache - max-size: 10G - append-timestamp: true - env: - CCACHE_NOHASHDIR: "true" - - name: Run chdb/build.sh + name: macos-x86_64-build-artifacts + path: ./artifacts + - name: Restore artifacts to original paths + run: | + mv ./artifacts/libchdb.so ./ + mv ./artifacts/libchdb.a ./ + mv ./artifacts/chdb/_chdb.abi3.so ./chdb/ + mv ./artifacts/chdb/libpybind11nonlimitedapi_stubs.dylib ./chdb/ + for v in 8 9 10 11 12 13 14; do + mv ./artifacts/chdb/libpybind11nonlimitedapi_chdb_3.${v}.dylib ./chdb/ + done + ls -lh ./libchdb.so ./libchdb.a + ls -lh ./chdb/*.so ./chdb/*.dylib + - name: Run chdb/test_smoke.sh timeout-minutes: 600 run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - source ~/.cargo/env pyenv shell 3.9 - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ bash gen_manifest.sh - bash ./chdb/build.sh - pyenv shell 3.9 - bash -x ./chdb/test_smoke.sh + bash -x ./chdb/test_smoke.sh cross-compile continue-on-error: false - - name: Run chdb/build/build_static_lib.sh + - name: Run chdb/build/test_go_example.sh timeout-minutes: 600 run: | - export PATH="$HOME/.pyenv/bin:$PATH" - eval "$(pyenv init -)" - source ~/.cargo/env - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ - pyenv shell 3.9 - bash ./chdb/build/build_static_lib.sh - pyenv shell --unset - continue-on-error: false - - name: Debug libchdb - run: | - ls -lh - llvm-nm libchdb.so | grep query_stable || true - echo "Global Symbol in libchdb.so:" - llvm-nm -g libchdb.so || true - echo "Global Symbol in libclickhouse-local-chdb.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-chdb.a || true - echo "Global Symbol in libclickhouse-local-lib.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-lib.a || true - echo "pychdb_cmd.sh:" - cat buildlib/pychdb_cmd.sh - echo "libchdb_cmd.sh:" - cat buildlib/libchdb_cmd.sh - - name: Scan chdb libraries with grype - run: | - echo "Scanning chdb libraries for vulnerabilities..." - # Files to scan - FILES_TO_SCAN="" - [ -f libchdb.so ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.so" - [ -f libchdb.a ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.a" - FILES_TO_SCAN="$FILES_TO_SCAN $(find chdb/ \( -name "*.dylib" -o -name "*.so" \) 2>/dev/null || true)" - SQLITE_VULNERABILITIES_FOUND=false - for file in $FILES_TO_SCAN; do - if [ -f "$file" ]; then - echo "=== Scanning $file ===" - SCAN_OUTPUT=$(grype "$file" 2>/dev/null || true) - echo "$SCAN_OUTPUT" - if echo "$SCAN_OUTPUT" | grep -qi sqlite; then - echo "❌ SQLite vulnerability found in $file" - SQLITE_VULNERABILITIES_FOUND=true - fi - fi - done - if [ "$SQLITE_VULNERABILITIES_FOUND" = true ]; then - echo "❌ SQLite vulnerabilities detected in chdb libraries!" - exit 1 - else - echo "✅ No SQLite vulnerabilities found in chdb libraries" - fi + bash ./chdb/build/test_go_example.sh ${{ github.workspace }}/libchdb.a continue-on-error: false - name: Run libchdb stub in examples dir run: | @@ -237,10 +254,6 @@ jobs: rm -rf chdb/build/ export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - source ~/.cargo/env - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ pyenv shell 3.9 make wheel - name: Fix wheel platform tag @@ -259,12 +272,6 @@ jobs: sudo sysctl kern.corefile=$PWD/tmp/core/core.%P sudo sysctl kern.coredump=1 ulimit -c unlimited - - name: Free up disk space - run: | - # Clean more build artifacts - rm -rf buildlib/contrib 2>/dev/null || true - rm -rf buildlib/base 2>/dev/null || true - rm -rf buildlib/src 2>/dev/null || true - name: Test wheel on all Python versions run: | export PATH="$HOME/.pyenv/bin:$PATH" @@ -300,14 +307,6 @@ jobs: echo "No core files found in tmp/core" fi continue-on-error: true - - name: Keep killall ccache and wait for ccache to finish - if: always() - run: | - sleep 60 - while ps -ef | grep ccache | grep -v grep; do \ - killall ccache; \ - sleep 10; \ - done - name: Upload core files artifact if: always() && env.CORE_FILES_FOUND == 'true' uses: actions/upload-artifact@v4 @@ -362,4 +361,4 @@ jobs: python -m twine upload dist/*.whl env: TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} \ No newline at end of file + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/build_musllinux_arm64_wheels.yml b/.github/workflows/build_musllinux_arm64_wheels.yml index 08e03e0eea5..9e7bfc32fab 100644 --- a/.github/workflows/build_musllinux_arm64_wheels.yml +++ b/.github/workflows/build_musllinux_arm64_wheels.yml @@ -1,4 +1,4 @@ -name: Build Linux(musllinux) ARM64 +name: Build Linux(musllinux) arm64 on: workflow_dispatch: @@ -22,7 +22,7 @@ on: jobs: build_musllinux_wheels: - name: Build musllinux wheels (Alpine Linux aarch64) + name: Build musllinux wheels (Alpine Linux arm64) runs-on: GH-Linux-ARM64 if: ${{ !github.event.pull_request.draft }} steps: diff --git a/.github/workflows/build_musllinux_x86_wheels.yml b/.github/workflows/build_musllinux_x86_wheels.yml index 0a753c6a493..11cc21dc6ac 100644 --- a/.github/workflows/build_musllinux_x86_wheels.yml +++ b/.github/workflows/build_musllinux_x86_wheels.yml @@ -1,4 +1,4 @@ -name: Build Linux(musllinux) x86 +name: Build Linux(musllinux) x86_64 on: workflow_dispatch: diff --git a/CMakeLists.txt b/CMakeLists.txt index f798d19698c..cf0dd320236 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -353,6 +353,11 @@ set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -fPI set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") if (OS_DARWIN) + # Set macOS deployment target if specified + if (CMAKE_OSX_DEPLOYMENT_TARGET) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-macosx_version_min,${CMAKE_OSX_DEPLOYMENT_TARGET}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-macosx_version_min,${CMAKE_OSX_DEPLOYMENT_TARGET}") + endif() if (USE_PYTHON) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-undefined,dynamic_lookup") else() diff --git a/chdb/build.sh b/chdb/build.sh index da078bff541..b1321882ad8 100755 --- a/chdb/build.sh +++ b/chdb/build.sh @@ -112,63 +112,6 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DCHDB_VERSION=${CHDB_VERSION} \ " -# # Generate libchdb.so linkage command: -# # 1. Use ar to delete the LocalChdb.cpp.o from libclickhouse-local-lib.a -# # `ar d programs/local/libclickhouse-local-lib.a LocalChdb.cpp.o` -# # 2. Change the entry point from `PyInit_chdb` to `query_stable` -# # `-Wl,-ePyInit_chdb` to `-Wl,-equery_stable` on Linux -# # `-Wl,-exported_symbol,_PyInit_${CHDB_PY_MOD}` to -# # `-Wl,-exported_symbol,_query_stable -Wl,-exported_symbol,_free_result` on Darwin -# # 3. Change the output file name from `_chdb.cpython-xx-x86_64-linux-gnu.s` to `libchdb.so` -# # `-o _chdb.cpython-39-x86_64-linux-gnu.so` to `-o libchdb.so` -# # 4. Write the command to a file for debug -# # 5. Run the command to generate libchdb.so - -# # Remove object from archive and save it to a new archive like: -# # path/to/oldname.a -> path/to/oldname-nopy.a -# remove_obj_from_archive() { -# local archive=$1 -# local obj=$2 -# local new_archive=$(echo ${archive} | sed 's/\.a$/-nopy.a/') -# cp -a ${archive} ${new_archive} -# ${AR} d ${new_archive} ${obj} -# echo "Old archive: ${archive}" -# ls -l ${archive} -# echo "New archive: ${new_archive}" -# ls -l ${new_archive} -# local oldfile=$(basename ${archive}) -# local newfile=$(basename ${new_archive}) -# LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed "s/${oldfile}/${newfile}/g") -# ${SED_INPLACE} "s/${oldfile}/${newfile}/g" CMakeFiles/libchdb.rsp -# } - - -# # Step 1, 2, 3: -# # Backup the libclickhouse-local-lib.a and restore it after ar d -# # LIBCHDB_SO="libchdb.so" -# # CLEAN_CHDB_A="libclickhouse-local-chdb.a" -# # cp -a ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a.bak -# # ${AR} d ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a LocalChdb.cpp.o -# # mv ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a ${BUILD_DIR}/programs/local/${CLEAN_CHDB_A} -# # mv ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a.bak ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a -# # ls -l ${BUILD_DIR}/programs/local/ -# LIBCHDB_SO="libchdb.so" -# LIBCHDB_CMD=${PYCHDB_CMD} -# if [ "${build_type}" == "Debug" ]; then -# remove_obj_from_archive ${BUILD_DIR}/programs/local/libclickhouse-local-libd.a LocalChdb.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbmsd.a StoragePython.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbmsd.a PythonSource.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libclickhouse_common_iod.a PythonUtils.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/TableFunctions/libclickhouse_table_functionsd.a TableFunctionPython.cpp.o -# else -# remove_obj_from_archive ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a LocalChdb.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbms.a StoragePython.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbms.a PythonSource.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libclickhouse_common_io.a PythonUtils.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/TableFunctions/libclickhouse_table_functions.a TableFunctionPython.cpp.o -# fi - - LIBCHDB_SO="libchdb.so" # Build libchdb.so cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 .. diff --git a/chdb/build/build_static_lib.sh b/chdb/build/build_static_lib.sh index d82459d28eb..71157755841 100755 --- a/chdb/build/build_static_lib.sh +++ b/chdb/build/build_static_lib.sh @@ -146,23 +146,7 @@ fi # Test with Go example -echo "Preparing go-example directory..." -cd ${MY_DIR}/go-example -cp ${MY_DIR}/libchdb_minimal.a ./libchdb.a -cp ${PROJ_DIR}/programs/local/chdb.h . -echo "Copied libchdb_minimal.a as libchdb.a and chdb.h to go-example directory" - -# Run Go test -echo "Running Go test..." -# export CGO_CFLAGS_ALLOW=".*" -# export CGO_LDFLAGS_ALLOW=".*" -go run . -if [ $? -ne 0 ]; then - echo "Error: Go test failed" - exit 1 -fi - -echo "Go test completed successfully!" +bash ${MY_DIR}/test_go_example.sh ${MY_DIR}/libchdb_minimal.a # Copy final library to project root echo "Copying libchdb_minimal.a to project root as libchdb.a..." diff --git a/chdb/build/build_static_lib_mac_on_linux.sh b/chdb/build/build_static_lib_mac_on_linux.sh new file mode 100755 index 00000000000..0370a153530 --- /dev/null +++ b/chdb/build/build_static_lib_mac_on_linux.sh @@ -0,0 +1,224 @@ +#!/bin/bash + +set -e + +TARGET_ARCH=${1:-x86_64} +build_type=${2:-Release} +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +. ${MY_DIR}/../vars.sh cross-compile + +# Validate architecture +if [[ "$TARGET_ARCH" != "x86_64" && "$TARGET_ARCH" != "arm64" ]]; then + echo "Error: Invalid architecture. Use 'x86_64' or 'arm64'" + echo "Usage: $0 [x86_64|arm64] [Release|Debug]" + exit 1 +fi + +echo "Cross-compiling chdb static library for macOS ${TARGET_ARCH} on Linux..." + +# Verify we're running on Linux +if [ "$(uname)" != "Linux" ]; then + echo "Error: This script must be run on Linux" + exit 1 +fi + +# Set architecture-specific variables +if [ "$TARGET_ARCH" == "x86_64" ]; then + DARWIN_TRIPLE="x86_64-apple-darwin" + TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" + BUILD_DIR_SUFFIX="darwin-x86_64" + MACOS_MIN_VERSION="10.15" + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" +else + # arm64 + DARWIN_TRIPLE="aarch64-apple-darwin" + TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" + BUILD_DIR_SUFFIX="darwin-arm64" + MACOS_MIN_VERSION="11.0" + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" +fi + +# Download macOS SDK +SDK_PATH="${PROJ_DIR}/cmake/toolchain/${SDK_DIR}" +echo "Downloading macOS SDK to ${SDK_PATH}..." +mkdir -p "${SDK_PATH}" +cd "${SDK_PATH}" +if ! curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz' | tar xJ --strip-components=1; then + echo "Error: Failed to download macOS SDK" + exit 1 +fi +echo "macOS SDK downloaded successfully" + +# Download Python headers +echo "Downloading Python headers..." +if ! bash "${DIR}/build/download_python_headers.sh"; then + echo "Error: Failed to download Python headers" + exit 1 +fi + +# Install cctools +if ! bash "${DIR}/build/install_cctools.sh" "${TARGET_ARCH}"; then + echo "Error: Failed to install cctools" + exit 1 +fi +# Set CCTOOLS path after installation +CCTOOLS_INSTALL_DIR="${HOME}/cctools" +CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" + +# Override tools with cross-compilation versions from cctools +export STRIP="llvm-strip-19" +export AR="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +export NM="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-nm" +export LDD="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool -L" + +echo "Using cross-compilation tools:" +echo " STRIP: ${STRIP}" +echo " AR: ${AR}" +echo " NM: ${NM}" +echo " LDD: ${LDD}" + +BUILD_DIR=${PROJ_DIR}/build-${BUILD_DIR_SUFFIX} + +# Set up cross-compilation tools +export CC=clang-19 +export CXX=clang++-19 + +# macOS-specific settings +GLIBC_COMPATIBILITY="-DGLIBC_COMPATIBILITY=0" +UNWIND="-DUSE_UNWIND=0" +HDFS="-DENABLE_HDFS=0 -DENABLE_GSASL_LIBRARY=0 -DENABLE_KRB5=0" +MYSQL="-DENABLE_MYSQL=0" +ICU="-DENABLE_ICU=0" +RUST_FEATURES="-DENABLE_RUST=0" +JEMALLOC="-DENABLE_JEMALLOC=0" +LLVM="-DENABLE_EMBEDDED_COMPILER=0 -DENABLE_DWARF_PARSER=0" +CMAKE_AR_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +CMAKE_INSTALL_NAME_TOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-install_name_tool" +CMAKE_RANLIB_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ranlib" +CMAKE_LINKER_NAME="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" + +if [ ! -d $BUILD_DIR ]; then + mkdir $BUILD_DIR +fi + +cd ${BUILD_DIR} + +CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_AR:FILEPATH=${CMAKE_AR_FILEPATH} \ + -DCMAKE_INSTALL_NAME_TOOL=${CMAKE_INSTALL_NAME_TOOL} \ + -DCMAKE_RANLIB:FILEPATH=${CMAKE_RANLIB_FILEPATH} \ + -DLINKER_NAME=${CMAKE_LINKER_NAME} \ + -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ + -DENABLE_CLICKHOUSE_KEEPER=0 -DENABLE_CLICKHOUSE_KEEPER_CONVERTER=0 -DENABLE_CLICKHOUSE_LOCAL=1 -DENABLE_CLICKHOUSE_SU=0 -DENABLE_CLICKHOUSE_BENCHMARK=0 \ + -DENABLE_AZURE_BLOB_STORAGE=1 -DENABLE_CLICKHOUSE_COPIER=0 -DENABLE_CLICKHOUSE_DISKS=0 -DENABLE_CLICKHOUSE_FORMAT=0 -DENABLE_CLICKHOUSE_GIT_IMPORT=0 \ + -DENABLE_AWS_S3=1 -DENABLE_HIVE=0 -DENABLE_AVRO=1 \ + -DENABLE_CLICKHOUSE_OBFUSCATOR=0 -DENABLE_CLICKHOUSE_ODBC_BRIDGE=0 -DENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER=0 \ + -DENABLE_KAFKA=1 -DENABLE_LIBPQXX=1 -DENABLE_NATS=0 -DENABLE_AMQPCPP=0 -DENABLE_NURAFT=0 \ + -DENABLE_CASSANDRA=0 -DENABLE_ODBC=0 -DENABLE_NLP=0 \ + -DENABLE_LDAP=0 \ + ${MYSQL} \ + ${HDFS} \ + -DENABLE_LIBRARIES=0 ${RUST_FEATURES} \ + ${GLIBC_COMPATIBILITY} \ + -DENABLE_UTILS=0 ${LLVM} ${UNWIND} \ + ${ICU} -DENABLE_UTF8PROC=1 ${JEMALLOC} \ + -DENABLE_PARQUET=1 -DENABLE_ROCKSDB=1 -DENABLE_SQLITE=1 -DENABLE_VECTORSCAN=1 \ + -DENABLE_PROTOBUF=1 -DENABLE_THRIFT=1 -DENABLE_MSGPACK=1 \ + -DENABLE_BROTLI=1 -DENABLE_H3=1 -DENABLE_CURL=1 \ + -DENABLE_CLICKHOUSE_ALL=0 -DUSE_STATIC_LIBRARIES=1 -DSPLIT_SHARED_LIBRARIES=0 \ + -DENABLE_SIMDJSON=1 -DENABLE_RAPIDJSON=1 \ + ${CPU_FEATURES} \ + -DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \ + -DENABLE_LIBFIU=1 \ + -DCHDB_VERSION=${CHDB_VERSION} \ + -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + " + +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 -DCHDB_STATIC_LIBRARY_BUILD=1 .. +ninja -d keeprsp + +BINARY=${BUILD_DIR}/programs/clickhouse +rm -f ${BINARY} + +cd ${BUILD_DIR} +ninja -d keeprsp -v > build.log || true + +ccache -s || true + +cd ${MY_DIR} + +# Create static library +echo "Creating static library libchdb.a for macOS..." +python3 create_static_libchdb.py --cross-compile --build-dir=build-${BUILD_DIR_SUFFIX} --ar-cmd=${AR} +if [ $? -ne 0 ]; then + echo "Error: Failed to create static library" + exit 1 +fi + +# Prepare cpp-example directory and copy header file +echo "Preparing cpp-example directory..." +cd ${MY_DIR}/cpp-example +cp ${PROJ_DIR}/programs/local/chdb.h . +cp ${MY_DIR}/libchdb.a . +echo "Copied chdb.h and libchdb.a to cpp-example directory" + +# Compile example program +echo "Compiling chdb_example.cpp..." +if [ "$TARGET_ARCH" == "x86_64" ]; then + SYSROOT="${PROJ_DIR}/cmake/toolchain/darwin-x86_64" +else + SYSROOT="${PROJ_DIR}/cmake/toolchain/darwin-aarch64" +fi +clang-19 chdb_example.cpp -o chdb_example \ + --target=${DARWIN_TRIPLE} \ + -isysroot ${SYSROOT} \ + -mmacosx-version-min=${MACOS_MIN_VERSION} \ + -nostdinc++ \ + -I${PROJ_DIR}/contrib/llvm-project/libcxx/include \ + -I${PROJ_DIR}/contrib/llvm-project/libcxxabi/include \ + --ld-path=${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld \ + -L. -lchdb -liconv \ + -framework CoreFoundation \ + -Wl,-map,chdb_example.map +if [ $? -ne 0 ]; then + echo "Error: Failed to compile chdb_example.cpp" + exit 1 +fi + +# Copy map file to parent directory for analysis +echo "Copying chdb_example.map to parent directory..." +cp chdb_example.map ${MY_DIR}/ +cd ${MY_DIR} + +# Analyze map file to extract chdb objects +echo "Analyzing map file to extract chdb objects..." +python3 extract_chdb_objects.py +if [ $? -ne 0 ]; then + echo "Error: Failed to analyze map file" + exit 1 +fi + +# Create minimal libchdb.a based on extracted objects +echo "Creating minimal libchdb.a..." +python3 create_minimal_libchdb.py --ar-cmd=${AR} +if [ $? -ne 0 ]; then + echo "Error: Failed to create minimal libchdb.a" + exit 1 +fi + +# Strip the libchdb_minimal.a +if [ ${build_type} == "Debug" ]; then + echo -e "\nDebug build, skip strip" +else + echo -e "\nStrip the libchdb_minimal.a:" + ${STRIP} -S libchdb_minimal.a +fi + +# Copy final library to project root +echo "Copying libchdb_minimal.a to project root as libchdb.a..." +cp ${MY_DIR}/libchdb_minimal.a ${PROJ_DIR}/libchdb.a +echo "Final libchdb.a created at ${PROJ_DIR}/libchdb.a" + +# Print final library size +echo "Final libchdb.a size:" +ls -lh ${PROJ_DIR}/libchdb.a diff --git a/chdb/build/create_minimal_libchdb.py b/chdb/build/create_minimal_libchdb.py index 849bec0e766..6faf5a30004 100644 --- a/chdb/build/create_minimal_libchdb.py +++ b/chdb/build/create_minimal_libchdb.py @@ -4,22 +4,31 @@ Create minimized libchdb.a based on chdb_objects.txt """ +import argparse import os import platform import sys import subprocess -IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) AR_CMD = "" -if IS_MACOS_X86: - AR_CMD = "llvm-ar" - print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") -else: - AR_CMD = "ar" - print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") +def setup_ar_cmd(ar_cmd=None): + """Setup AR command based on arguments or platform""" + global AR_CMD + + if ar_cmd: + AR_CMD = ar_cmd + print(f"Using custom ar command: {AR_CMD}") + else: + IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) + if IS_MACOS_X86: + AR_CMD = "llvm-ar" + print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") + else: + AR_CMD = "ar" + print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") -print(f"Selected ar command: {AR_CMD}") + print(f"Selected ar command: {AR_CMD}") def read_required_objects(objects_file="chdb_objects.txt"): """Read list of required target files""" @@ -241,7 +250,20 @@ def create_minimal_library(extracted_files, temp_dir, output_lib="libchdb_minima return True +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description='Create minimized libchdb.a based on chdb_objects.txt') + parser.add_argument('--ar-cmd', type=str, default=None, + help='Path to ar command (for cross-compilation)') + return parser.parse_args() + def main(): + # Parse arguments + args = parse_args() + + # Setup AR command + setup_ar_cmd(ar_cmd=args.ar_cmd) + print("Starting creation of minimized libchdb.a") print("=" * 50) @@ -251,13 +273,6 @@ def main(): temp_dir = "libchdb_objects_tmp_dir" output_lib = "libchdb_minimal.a" - if len(sys.argv) > 1: - chdb_objects_file = sys.argv[1] - if len(sys.argv) > 2: - original_lib = sys.argv[2] - if len(sys.argv) > 3: - output_lib = sys.argv[3] - # Read required object files required_objects = read_required_objects(chdb_objects_file) if not required_objects: diff --git a/chdb/build/create_static_libchdb.py b/chdb/build/create_static_libchdb.py index c8746fd1c43..5f7fed8398b 100755 --- a/chdb/build/create_static_libchdb.py +++ b/chdb/build/create_static_libchdb.py @@ -4,6 +4,7 @@ Script to create libchdb.a static library """ +import argparse import os import platform import re @@ -11,33 +12,54 @@ import sys import shutil -# Detect if running on macOS x86 (where ar -d has problematic behavior) -IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) -IS_MACOS = platform.system() == "Darwin" +# Global variables (will be set based on arguments) +IS_MACOS_X86 = False +IS_MACOS = False +CROSS_COMPILE = False AR_CMD = "" +BUILD_DIR = "" + +def setup_platform(cross_compile=False, ar_cmd=None): + """Setup platform-specific variables""" + global IS_MACOS_X86, IS_MACOS, CROSS_COMPILE, AR_CMD + + if cross_compile: + # Cross-compiling for macOS on Linux + IS_MACOS = True + CROSS_COMPILE = True + if ar_cmd: + AR_CMD = ar_cmd + else: + AR_CMD = "ar" + print(f"Cross-compile mode: targeting macOS") + else: + # Native build + IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) + IS_MACOS = platform.system() == "Darwin" + if IS_MACOS_X86: + AR_CMD = "llvm-ar" + print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") + else: + AR_CMD = "ar" + print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") -# Choose ar command based on platform -if IS_MACOS_X86: - AR_CMD = "llvm-ar" - print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") -else: - AR_CMD = "ar" - print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") - -print(f"Selected ar command: {AR_CMD}") + print(f"Selected ar command: {AR_CMD}") + print(f"CROSS_COMPILE: {CROSS_COMPILE}, IS_MACOS: {IS_MACOS}") -def parse_libchdb_cmd(): +def parse_libchdb_cmd(build_dir_override=None): """Extract object files and static libraries""" + global BUILD_DIR # Get the directory containing this script, then go up two levels script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(os.path.dirname(script_dir)) - if IS_MACOS_X86: - build_dir = 'buildlib' + if build_dir_override: + build_dir = build_dir_override else: build_dir = 'build-static-lib' + BUILD_DIR = build_dir print(f"Using build directory: {build_dir}") # First, check build.log to see if it contains @CMakeFiles/clickhouse.rsp @@ -153,7 +175,7 @@ def create_static_library(obj_files, lib_files): extracted_objects = [] # Add libiconv.a to the list of libraries to extract on macOS - if IS_MACOS: + if not CROSS_COMPILE and IS_MACOS: libiconv_path = "/opt/homebrew/opt/libiconv/lib/libiconv.a" if os.path.exists(libiconv_path): lib_files.append(libiconv_path) @@ -311,16 +333,6 @@ def create_static_library(obj_files, lib_files): print(f"Extracted {target_filename} → {unique_filename} (group #{file_index})") - # if IS_MACOS_X86: - # # Move the first occurrence to the end (changes extraction order) - # move_result = subprocess.run([AR_CMD, "-m", working_archive, target_filename], - # capture_output=True) - - # if move_result.returncode != 0: - # print(f"Warning: Failed to move {target_filename} in archive") - # print(f"STDERR: {move_result.stderr.decode() if move_result.stderr else 'No error message'}") - # return False - # else: # Delete this occurrence from working archive delete_result = subprocess.run([AR_CMD, "d", working_archive, target_filename], capture_output=True) @@ -470,12 +482,29 @@ def create_static_library(obj_files, lib_files): finally: pass +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description='Create libchdb.a static library') + parser.add_argument('--cross-compile', '-c', action='store_true', + help='Cross-compile mode (targeting macOS from Linux)') + parser.add_argument('--build-dir', '-b', type=str, default=None, + help='Build directory path (relative to project root or absolute)') + parser.add_argument('--ar-cmd', type=str, default=None, + help='Path to ar command (for cross-compilation)') + return parser.parse_args() + def main(): print("Creating libchdb.a static library...") + # Parse arguments + args = parse_args() + + # Setup platform based on arguments + setup_platform(cross_compile=args.cross_compile, ar_cmd=args.ar_cmd) + try: # Parse the command file - obj_files, lib_files = parse_libchdb_cmd() + obj_files, lib_files = parse_libchdb_cmd(build_dir_override=args.build_dir) # Create static library success = create_static_library(obj_files, lib_files) diff --git a/chdb/build/download_python_headers.sh b/chdb/build/download_python_headers.sh new file mode 100644 index 00000000000..aaeb543a650 --- /dev/null +++ b/chdb/build/download_python_headers.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +set -e + +TARGET_DIR="${HOME}/python_include" +TEMP_DIR="${TARGET_DIR}/tmp" + +VERSIONS=( + "3.8.10:3.8:3.8" + "3.9.13:3.9:3.9" + "3.10.11:3.10:3.10" + "3.11.9:3.11:3.11" + "3.12.10:3.12:3.12" + "3.13.9:3.13:3.13" + "3.14.0:3.14:3.14" +) + +cleanup() { + rm -rf "$TEMP_DIR" +} +trap cleanup EXIT + +mkdir -p "$TARGET_DIR" +mkdir -p "$TEMP_DIR" + +for entry in "${VERSIONS[@]}"; do + IFS=':' read -r FULL_VER SUBDIR MINOR_VER <<< "$entry" + + echo "==========================================" + echo "Processing Python ${FULL_VER}..." + echo "==========================================" + + # 检查目标目录是否已存在 + DEST_DIR="${TARGET_DIR}/${SUBDIR}" + if [ -d "$DEST_DIR" ] && [ -f "${DEST_DIR}/Python.h" ]; then + echo "✓ Python ${FULL_VER} headers already installed at ${DEST_DIR}" + echo " Skipping..." + continue + fi + + WORK_DIR="${TEMP_DIR}/${SUBDIR}" + mkdir -p "$WORK_DIR" + cd "$WORK_DIR" + + PKG_URL="https://www.python.org/ftp/python/${FULL_VER}/python-${FULL_VER}-macos11.pkg" + + echo "Downloading: $PKG_URL" + if wget -q --spider "$PKG_URL" 2>/dev/null; then + wget -q --show-progress -O python.pkg "$PKG_URL" + else + echo "ERROR: Failed to download Python ${FULL_VER}" + exit 1 + fi + + echo "Extracting pkg with 7z..." + 7z x -y python.pkg > /dev/null + + PAYLOAD_DIR="" + for dir in Python_Framework.pkg PythonFramework-*.pkg; do + if [ -d "$dir" ] || [ -f "$dir/Payload" ]; then + PAYLOAD_DIR="$dir" + break + fi + done + + if [ -z "$PAYLOAD_DIR" ]; then + PAYLOAD_DIR=$(find . -name "Payload" -type f | head -1 | xargs dirname) + fi + + if [ -z "$PAYLOAD_DIR" ] || [ ! -f "${PAYLOAD_DIR}/Payload" ]; then + echo "ERROR: Cannot find Payload for Python ${FULL_VER}" + exit 1 + fi + + echo "Extracting Payload from ${PAYLOAD_DIR}..." + cd "$PAYLOAD_DIR" + 7z x -y Payload -so 2>/dev/null | cpio -id 2>/dev/null || true + + HEADER_SRC="" + for path in \ + "Versions/${MINOR_VER}/Headers" \ + "Headers" + do + if [ -d "$path" ] && [ -f "$path/Python.h" ]; then + HEADER_SRC="$path" + break + fi + done + + if [ -z "$HEADER_SRC" ]; then + PYTHON_H=$(find . -name "Python.h" -type f | head -1) + if [ -n "$PYTHON_H" ]; then + HEADER_SRC=$(dirname "$PYTHON_H") + fi + fi + + if [ -z "$HEADER_SRC" ] || [ ! -f "${HEADER_SRC}/Python.h" ]; then + echo "ERROR: Cannot find headers for Python ${FULL_VER}" + exit 1 + fi + + mkdir -p "$DEST_DIR" + cp -r "${HEADER_SRC}/"* "$DEST_DIR/" + + echo "✓ Python ${FULL_VER} headers installed to ${DEST_DIR}" + echo " Files: $(ls "$DEST_DIR" | wc -l | tr -d ' ') items" +done + +echo "" +echo "==========================================" +echo "Done! Headers installed to: ${TARGET_DIR}" +echo "==========================================" +ls -la "$TARGET_DIR" \ No newline at end of file diff --git a/chdb/build/install_cctools.sh b/chdb/build/install_cctools.sh new file mode 100644 index 00000000000..bfc7fe193e2 --- /dev/null +++ b/chdb/build/install_cctools.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +set -e + +# Parse arguments +TARGET_ARCH="${1:-x86_64}" + +# Set Darwin triple based on architecture +if [ "$TARGET_ARCH" == "x86_64" ]; then + DARWIN_TRIPLE="x86_64-apple-darwin" +else + DARWIN_TRIPLE="aarch64-apple-darwin" +fi + +# Install cctools if not already installed +CCTOOLS_INSTALL_DIR="${HOME}/cctools" +CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" + +if [ -z "${CCTOOLS:-}" ]; then + echo "CCTOOLS environment variable not set, checking for installation..." >&2 + + # Check if cctools is already installed + if [ -f "${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" ]; then + echo "Found existing cctools installation at ${CCTOOLS_INSTALL_DIR}" >&2 + export CCTOOLS="${CCTOOLS_BIN}" + else + echo "cctools not found, installing..." >&2 + + mkdir -p ~/cctools + export CCTOOLS=$(cd ~/cctools && pwd) + cd ${CCTOOLS} + + git clone https://github.com/tpoechtrager/apple-libtapi.git + cd apple-libtapi + git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 + INSTALLPREFIX=${CCTOOLS} ./build.sh + ./install.sh + cd .. + + git clone https://github.com/chdb-io/cctools-port.git + cd cctools-port/cctools + + # Set cctools target based on architecture + if [ "$TARGET_ARCH" == "x86_64" ]; then + CCTOOLS_TARGET="x86_64-apple-darwin" + else + CCTOOLS_TARGET="aarch64-apple-darwin" + fi + + ./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=${CCTOOLS_TARGET} + make install + fi +else + echo "Using CCTOOLS from environment variable: ${CCTOOLS}" >&2 +fi + +# Verify cctools installation +if [ ! -f "${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" ]; then + echo "Error: cctools linker not found at ${CCTOOLS}/${DARWIN_TRIPLE}-ld" >&2 + echo "Please verify cctools installation or set CCTOOLS environment variable correctly" >&2 + exit 1 +fi + +echo "cctools verified: ${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" >&2 diff --git a/chdb/build/test_go_example.sh b/chdb/build/test_go_example.sh new file mode 100644 index 00000000000..b64d938e470 --- /dev/null +++ b/chdb/build/test_go_example.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +# Get script directory +MY_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJ_DIR="$(cd "${MY_DIR}/../.." && pwd)" + +# Allow custom library path +LIBCHDB_PATH="${1:-${MY_DIR}/libchdb_minimal.a}" + +echo "Testing with Go example..." +echo "Using library: ${LIBCHDB_PATH}" + +# Prepare go-example directory +echo "Preparing go-example directory..." +cd ${MY_DIR}/go-example + +# Copy library and header +if [ -f "${LIBCHDB_PATH}" ]; then + cp "${LIBCHDB_PATH}" ./libchdb.a +else + echo "Error: Library not found: ${LIBCHDB_PATH}" + exit 1 +fi + +cp ${PROJ_DIR}/programs/local/chdb.h . +echo "Copied library as libchdb.a and chdb.h to go-example directory" + +# Run Go test +echo "Running Go test..." +go run . +if [ $? -ne 0 ]; then + echo "Error: Go test failed" + exit 1 +fi + +echo "Go test completed successfully!" diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh new file mode 100755 index 00000000000..b9b4625294b --- /dev/null +++ b/chdb/build_mac_on_linux.sh @@ -0,0 +1,314 @@ +#!/bin/bash + +set -e + +# Cross-compile chdb for macOS (x86_64 or arm64) on Linux +# Usage: ./build_mac_on_linux_universal.sh [x86_64|arm64] [Release|Debug] + +# Parse arguments +TARGET_ARCH=${1:-x86_64} +build_type=${2:-Release} +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +. ${DIR}/vars.sh cross-compile + +# Validate architecture +if [[ "$TARGET_ARCH" != "x86_64" && "$TARGET_ARCH" != "arm64" ]]; then + echo "Error: Invalid architecture. Use 'x86_64' or 'arm64'" + echo "Usage: $0 [x86_64|arm64] [Release|Debug]" + exit 1 +fi + +# Verify we're running on Linux +if [ "$(uname)" != "Linux" ]; then + echo "Error: This script must be run on Linux" + exit 1 +fi + +echo "Cross-compiling chdb for macOS ${TARGET_ARCH} on Linux..." + +# Set architecture-specific variables first +if [ "$TARGET_ARCH" == "x86_64" ]; then + DARWIN_TRIPLE="x86_64-apple-darwin" + TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" + BUILD_DIR_SUFFIX="darwin-x86_64" + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" + SDK_DIR="darwin-x86_64" +else + # arm64 + DARWIN_TRIPLE="aarch64-apple-darwin" + TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" + BUILD_DIR_SUFFIX="darwin-arm64" + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" + SDK_DIR="darwin-aarch64" +fi + +# Download macOS SDK +SDK_PATH="${PROJ_DIR}/cmake/toolchain/${SDK_DIR}" +echo "Downloading macOS SDK to ${SDK_PATH}..." +mkdir -p "${SDK_PATH}" +cd "${SDK_PATH}" +if ! curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz' | tar xJ --strip-components=1; then + echo "Error: Failed to download macOS SDK" + exit 1 +fi +echo "macOS SDK downloaded successfully" + +# Download Python headers +echo "Downloading Python headers..." +if ! bash "${DIR}/build/download_python_headers.sh"; then + echo "Error: Failed to download Python headers" + exit 1 +fi + +# Install cctools +if ! bash "${DIR}/build/install_cctools.sh" "${TARGET_ARCH}"; then + echo "Error: Failed to install cctools" + exit 1 +fi +# Set CCTOOLS path after installation +CCTOOLS_INSTALL_DIR="${HOME}/cctools" +CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" + +# Override tools with cross-compilation versions from cctools +# export STRIP="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-strip" +export STRIP="llvm-strip-19" +export AR="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +export NM="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-nm" +export LDD="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool -L" + +echo "Using cross-compilation tools:" +echo " STRIP: ${STRIP}" +echo " AR: ${AR}" +echo " NM: ${NM}" +echo " LDD: ${LDD}" + +BUILD_DIR=${PROJ_DIR}/build-${BUILD_DIR_SUFFIX} + +export CC=clang-19 +export CXX=clang++-19 + +RUST_FEATURES="-DENABLE_RUST=0" +GLIBC_COMPATIBILITY="-DGLIBC_COMPATIBILITY=0" +UNWIND="-DUSE_UNWIND=0" +JEMALLOC="-DENABLE_JEMALLOC=0" +PYINIT_ENTRY="-Wl,-exported_symbol,_PyInit_${CHDB_PY_MOD}" +HDFS="-DENABLE_HDFS=0 -DENABLE_GSASL_LIBRARY=0 -DENABLE_KRB5=0" +MYSQL="-DENABLE_MYSQL=0" +ICU="-DENABLE_ICU=0" +SED_INPLACE="sed -i" +LLVM="-DENABLE_EMBEDDED_COMPILER=0 -DENABLE_DWARF_PARSER=0" +CMAKE_AR_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +CMAKE_INSTALL_NAME_TOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-install_name_tool" +CMAKE_RANLIB_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ranlib" +CMAKE_LINKER_NAME="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" + +if [ ! -d $BUILD_DIR ]; then + mkdir $BUILD_DIR +fi + +cd ${BUILD_DIR} + +CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_AR:FILEPATH=${CMAKE_AR_FILEPATH} \ + -DCMAKE_INSTALL_NAME_TOOL=${CMAKE_INSTALL_NAME_TOOL} \ + -DCMAKE_RANLIB:FILEPATH=${CMAKE_RANLIB_FILEPATH} \ + -DLINKER_NAME=${CMAKE_LINKER_NAME} \ + -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ + -DENABLE_CLICKHOUSE_KEEPER=0 -DENABLE_CLICKHOUSE_KEEPER_CONVERTER=0 -DENABLE_CLICKHOUSE_LOCAL=1 -DENABLE_CLICKHOUSE_SU=0 -DENABLE_CLICKHOUSE_BENCHMARK=0 \ + -DENABLE_AZURE_BLOB_STORAGE=1 -DENABLE_CLICKHOUSE_COPIER=0 -DENABLE_CLICKHOUSE_DISKS=0 -DENABLE_CLICKHOUSE_FORMAT=0 -DENABLE_CLICKHOUSE_GIT_IMPORT=0 \ + -DENABLE_AWS_S3=1 -DENABLE_HIVE=0 -DENABLE_AVRO=1 \ + -DENABLE_CLICKHOUSE_OBFUSCATOR=0 -DENABLE_CLICKHOUSE_ODBC_BRIDGE=0 -DENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER=0 \ + -DENABLE_KAFKA=1 -DENABLE_LIBPQXX=1 -DENABLE_NATS=0 -DENABLE_AMQPCPP=0 -DENABLE_NURAFT=0 \ + -DENABLE_CASSANDRA=0 -DENABLE_ODBC=0 -DENABLE_NLP=0 \ + -DENABLE_LDAP=0 \ + ${MYSQL} \ + ${HDFS} \ + -DENABLE_LIBRARIES=0 ${RUST_FEATURES} \ + ${GLIBC_COMPATIBILITY} \ + -DENABLE_UTILS=0 ${LLVM} ${UNWIND} \ + ${ICU} -DENABLE_UTF8PROC=1 ${JEMALLOC} \ + -DENABLE_PARQUET=1 -DENABLE_ROCKSDB=1 -DENABLE_SQLITE=1 -DENABLE_VECTORSCAN=1 \ + -DENABLE_PROTOBUF=1 -DENABLE_THRIFT=1 -DENABLE_MSGPACK=1 \ + -DENABLE_BROTLI=1 -DENABLE_H3=1 -DENABLE_CURL=1 \ + -DENABLE_CLICKHOUSE_ALL=0 -DUSE_STATIC_LIBRARIES=1 -DSPLIT_SHARED_LIBRARIES=0 \ + -DENABLE_SIMDJSON=1 -DENABLE_RAPIDJSON=1 \ + ${CPU_FEATURES} \ + -DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \ + -DENABLE_LIBFIU=1 \ + -DCHDB_VERSION=${CHDB_VERSION} \ + -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + " + +LIBCHDB_SO="libchdb.so" + +# Build libchdb.so +echo "Executing cmake..." +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 .. +ninja -d keeprsp + +BINARY=${BUILD_DIR}/programs/clickhouse +echo -e "\nBINARY: ${BINARY}" +ls -lh ${BINARY} +echo -e "\nfile info of ${BINARY}" +file ${BINARY} +rm -f ${BINARY} + +cd ${BUILD_DIR} +ninja -d keeprsp -v > build.log || true +USING_RESPONSE_FILE=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log | grep '@CMakeFiles/clickhouse.rsp' || true) + +if [ ! "${USING_RESPONSE_FILE}" == "" ]; then + if [ -f CMakeFiles/clickhouse.rsp ]; then + cp -a CMakeFiles/clickhouse.rsp CMakeFiles/libchdb.rsp + else + echo "CMakeFiles/clickhouse.rsp not found" + exit 1 + fi +fi + +LIBCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \ + | sed "s/-o programs\/clickhouse/-fPIC -shared -o ${LIBCHDB_SO}/" \ + | sed 's/^[^&]*&& //' | sed 's/&&.*//' \ + | sed 's/ -Wl,-undefined,error/ -Wl,-undefined,dynamic_lookup/g' \ + | sed 's/ -Xlinker --no-undefined//g' \ + | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/libchdb.rsp/g' \ + ) + +# Generate the command to generate libchdb.so +LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/ '${CHDB_PY_MODULE}'/ '${LIBCHDB_SO}'/g') + +if [ ! "${USING_RESPONSE_FILE}" == "" ]; then + ${SED_INPLACE} 's/ '${CHDB_PY_MODULE}'/ '${LIBCHDB_SO}'/g' CMakeFiles/libchdb.rsp +fi + +# For macOS, replace PyInit entry point with exported symbols for libchdb +LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/ '${PYINIT_ENTRY}'/ -Wl,-exported_symbol,_query_stable -Wl,-exported_symbol,_free_result -Wl,-exported_symbol,_query_stable_v2 -Wl,-exported_symbol,_free_result_v2/g') + +LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/libchdb.rsp/g') + +# Save the command to a file for debug +echo ${LIBCHDB_CMD} > libchdb_cmd.sh + +# Build libchdb.so +echo "Building libchdb.so..." +${LIBCHDB_CMD} + +LIBCHDB_DIR=${BUILD_DIR}/ +LIBCHDB=${LIBCHDB_DIR}/${LIBCHDB_SO} +ls -lh ${LIBCHDB} + +# Build chdb python module +CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DCHDB_CROSSCOMPILING=1 -DCHDB_PYTHON_INCLUDE_DIR_PREFIX=${CHDB_PYTHON_INCLUDE_DIR_PREFIX} -DPYBIND11_NOPYTHON=ON .. +ninja -d keeprsp || true + +# Delete the binary and run ninja -v again to capture the command +rm -f ${BINARY} +cd ${BUILD_DIR} +ninja -d keeprsp -v > build.log || true + +USING_RESPONSE_FILE=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log | grep '@CMakeFiles/clickhouse.rsp' || true) + +if [ ! "${USING_RESPONSE_FILE}" == "" ]; then + if [ -f CMakeFiles/clickhouse.rsp ]; then + cp -a CMakeFiles/clickhouse.rsp CMakeFiles/pychdb.rsp + else + echo "CMakeFiles/clickhouse.rsp not found" + exit 1 + fi +fi + +# Extract the command to generate CHDB_PY_MODULE +PYCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \ + | sed "s/-o programs\/clickhouse/-fPIC -Wl,-undefined,dynamic_lookup -shared ${PYINIT_ENTRY} -o ${CHDB_PY_MODULE}/" \ + | sed 's/^[^&]*&& //' | sed 's/&&.*//' \ + | sed 's/ -Wl,-undefined,error/ -Wl,-undefined,dynamic_lookup/g' \ + | sed 's/ -Xlinker --no-undefined//g' \ + | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/pychdb.rsp/g' \ + ) + +# For macOS, set rpath +PYCHDB_CMD=$(echo ${PYCHDB_CMD} | sed 's|-Wl,-rpath,/[^[:space:]]*/pybind11-cmake|-Wl,-rpath,@loader_path|g') + +# Save the command to a file for debug +echo ${PYCHDB_CMD} > pychdb_cmd.sh + +echo "Building Python module..." +${PYCHDB_CMD} + +ls -lh ${CHDB_PY_MODULE} + +## Check all the so files +LIBCHDB_DIR=${BUILD_DIR}/ + +PYCHDB=${LIBCHDB_DIR}/${CHDB_PY_MODULE} +LIBCHDB=${LIBCHDB_DIR}/${LIBCHDB_SO} + +if [ ${build_type} == "Debug" ]; then + echo -e "\nDebug build, skip strip" +else + echo -e "\nStrip the binary:" + ${STRIP} -x ${PYCHDB} + ${STRIP} -x ${LIBCHDB} +fi + +echo -e "\nPYCHDB: ${PYCHDB}" +ls -lh ${PYCHDB} +echo -e "\nLIBCHDB: ${LIBCHDB}" +ls -lh ${LIBCHDB} +echo -e "\nfile info of ${PYCHDB}" +file ${PYCHDB} +echo -e "\nfile info of ${LIBCHDB}" +file ${LIBCHDB} + +rm -f ${CHDB_DIR}/*.so +cp -a ${PYCHDB} ${CHDB_DIR}/${CHDB_PY_MODULE} +cp -a ${LIBCHDB} ${PROJ_DIR}/${LIBCHDB_SO} + +echo -e "\nSymbols:" +echo -e "\nPyInit in PYCHDB: ${PYCHDB}" +${NM} ${PYCHDB} | grep PyInit || true +echo -e "\nPyInit in LIBCHDB: ${LIBCHDB}" +${NM} ${LIBCHDB} | grep PyInit || echo "PyInit not found in ${LIBCHDB}, it's OK" +echo -e "\nquery_stable in PYCHDB: ${PYCHDB}" +${NM} ${PYCHDB} | grep query_stable || true +echo -e "\nquery_stable in LIBCHDB: ${LIBCHDB}" +${NM} ${LIBCHDB} | grep query_stable || true + +echo -e "\nAfter copy:" +cd ${PROJ_DIR} && pwd + +ccache -s || true + +if ! CMAKE_ARGS="${CMAKE_ARGS}" CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" bash ${DIR}/build_pybind11.sh --all --cross-compile --build-dir=${BUILD_DIR}; then + echo "Error: Failed to build pybind11 libraries" + exit 1 +fi + +# Fix LC_RPATH in _chdb.abi3.so for cross-compiled builds +echo -e "\nFixing LC_RPATH in ${CHDB_PY_MODULE}..." +INSTALL_NAME_TOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-install_name_tool" +OTOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool" + +echo -e "\nPre library dependencies:" +${OTOOL} -L ${CHDB_DIR}/${CHDB_PY_MODULE} + +STUBS_LIB="libpybind11nonlimitedapi_stubs.dylib" +OLD_STUBS_PATH=$(${OTOOL} -L ${CHDB_DIR}/${CHDB_PY_MODULE} | grep "${STUBS_LIB}" | awk '{print $1}') +if [ -n "${OLD_STUBS_PATH}" ]; then + echo "Changing ${STUBS_LIB} reference:" + echo " From: ${OLD_STUBS_PATH}" + echo " To: @loader_path/${STUBS_LIB}" + ${INSTALL_NAME_TOOL} -change "${OLD_STUBS_PATH}" "@loader_path/${STUBS_LIB}" ${CHDB_DIR}/${CHDB_PY_MODULE} +else + echo "${STUBS_LIB} not found in dependencies" +fi + +echo -e "\nPost library dependencies:" +${OTOOL} -L ${CHDB_DIR}/${CHDB_PY_MODULE} + +echo -e "\nCross-compilation for macOS ${TARGET_ARCH} completed successfully!" +echo -e "Generated files:" +echo -e " - ${PROJ_DIR}/${LIBCHDB_SO}" +echo -e " - ${CHDB_DIR}/${CHDB_PY_MODULE}" +echo -e "\nBuild directory: ${BUILD_DIR}" diff --git a/chdb/build_pybind11.sh b/chdb/build_pybind11.sh index 33066b916fe..ff7118b3867 100755 --- a/chdb/build_pybind11.sh +++ b/chdb/build_pybind11.sh @@ -4,6 +4,8 @@ set -e build_all=false py_version="" +cross_compile=false +custom_build_dir="" for arg in "$@"; do case $arg in @@ -15,11 +17,29 @@ for arg in "$@"; do py_version="${arg#*=}" shift ;; + --cross-compile) + cross_compile=true + shift + ;; + --build-dir=*) + custom_build_dir="${arg#*=}" + shift + ;; esac done DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. ${DIR}/vars.sh +if [ "$cross_compile" = true ]; then + . ${DIR}/vars.sh cross-compile +else + . ${DIR}/vars.sh +fi + +# Override BUILD_DIR if custom build dir is specified +if [ -n "$custom_build_dir" ]; then + BUILD_DIR="$custom_build_dir" + echo "Using custom BUILD_DIR: ${BUILD_DIR}" +fi # Check if CMAKE_ARGS is passed from build.sh if [ -z "$CMAKE_ARGS" ]; then @@ -35,6 +55,14 @@ build_pybind11_nonlimitedapi() { local py_cmake_args="${CMAKE_ARGS} -DPYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION=${py_version}" + # Add cross-compile flags if needed + if [ "$cross_compile" = true ]; then + py_cmake_args="${py_cmake_args} -DCHDB_CROSSCOMPILING=1 -DPYBIND11_NOPYTHON=ON" + local python_include_dir="${CHDB_PYTHON_INCLUDE_DIR_PREFIX:-${HOME}/python_include}" + py_cmake_args="${py_cmake_args} -DCHDB_PYTHON_INCLUDE_DIR_PREFIX=${python_include_dir}" + echo "Cross-compiling mode enabled, using Python headers from ${python_include_dir}" + fi + cmake ${py_cmake_args} -DENABLE_PYTHON=1 .. # Build only the pybind11 targets @@ -45,7 +73,7 @@ build_pybind11_nonlimitedapi() { # Copy the built library to output directory local lib_name="pybind11nonlimitedapi_chdb_${py_version}" - if [ "$(uname)" == "Darwin" ]; then + if [ "$(uname)" == "Darwin" ] || [ "$cross_compile" = true ]; then local lib_file="lib${lib_name}.dylib" else local lib_file="lib${lib_name}.so" @@ -65,51 +93,61 @@ build_pybind11_nonlimitedapi() { build_all_pybind11_nonlimitedapi() { local python_versions=("3.8" "3.9" "3.10" "3.11" "3.12" "3.13" "3.14") - # Skip Python 3.8 for macOS x86_64 - if [ "$(uname)" == "Darwin" ] && [ "$(uname -m)" == "x86_64" ]; then - python_versions=("3.9" "3.10" "3.11" "3.12" "3.13" "3.14") - fi - echo "Building pybind11 nonlimitedapi libraries for all Python versions..." - # Check if pyenv is available - if [ -z "$(command -v pyenv)" ]; then - echo "Error: pyenv not found. Please install pyenv first." - exit 1 - fi - - for version in "${python_versions[@]}"; do - # Use pyenv to find specific version - local pyenv_version=$(pyenv versions --bare | grep "^${version}\." | head -1) - if [ -z "$pyenv_version" ]; then - echo "Error: Python ${version} not found in pyenv. Please install it with: pyenv install ${version}.x" + if [ "$cross_compile" = true ]; then + # For cross-compilation, use pre-downloaded headers + echo "Cross-compilation mode: using pre-downloaded Python headers" + for version in "${python_versions[@]}"; do + local python_include_dir="${CHDB_PYTHON_INCLUDE_DIR_PREFIX:-${HOME}/python_include}/${version}" + if [ -f "${python_include_dir}/Python.h" ]; then + echo " Found headers for Python ${version} at: ${python_include_dir}" + build_pybind11_nonlimitedapi "${version}" + else + echo "Error: Python.h not found for Python ${version} at ${python_include_dir}" + exit 1 + fi + done + else + # Check if pyenv is available + if [ -z "$(command -v pyenv)" ]; then + echo "Error: pyenv not found. Please install pyenv first." exit 1 fi - echo "Found pyenv Python ${pyenv_version}" - export PYENV_VERSION=$pyenv_version + for version in "${python_versions[@]}"; do + # Use pyenv to find specific version + local pyenv_version=$(pyenv versions --bare | grep "^${version}\." | head -1) + if [ -z "$pyenv_version" ]; then + echo "Error: Python ${version} not found in pyenv. Please install it with: pyenv install ${version}.x" + exit 1 + fi + + echo "Found pyenv Python ${pyenv_version}" + export PYENV_VERSION=$pyenv_version + + local python_include=$(python -c "import sysconfig; print(sysconfig.get_path('include'))" 2>/dev/null) + local active_version=$(python --version 2>&1) + echo " Active Python: $active_version" + + if [ -f "$python_include/Python.h" ]; then + echo " Headers found at: $python_include" + build_pybind11_nonlimitedapi "${version}" + else + echo "Error: Python.h not found for Python ${version} at $python_include" + unset PYENV_VERSION + exit 1 + fi - local python_include=$(python -c "import sysconfig; print(sysconfig.get_path('include'))" 2>/dev/null) - local active_version=$(python --version 2>&1) - echo " Active Python: $active_version" - - if [ -f "$python_include/Python.h" ]; then - echo " Headers found at: $python_include" - build_pybind11_nonlimitedapi "${version}" - else - echo "Error: Python.h not found for Python ${version} at $python_include" unset PYENV_VERSION - exit 1 - fi - - unset PYENV_VERSION - done + done + fi echo "Finished building pybind11 nonlimitedapi libraries" } copy_stubs() { - if [ "$(uname)" == "Darwin" ]; then + if [ "$(uname)" == "Darwin" ] || [ "$cross_compile" = true ]; then local lib_file="libpybind11nonlimitedapi_stubs.dylib" else local lib_file="libpybind11nonlimitedapi_stubs.so" diff --git a/chdb/test_smoke.sh b/chdb/test_smoke.sh index ddc1f97571d..e254efe93f0 100755 --- a/chdb/test_smoke.sh +++ b/chdb/test_smoke.sh @@ -4,7 +4,7 @@ set -e DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. ${DIR}/vars.sh +. ${DIR}/vars.sh "$1" # test the pybind module cd ${CHDB_DIR} diff --git a/chdb/vars.sh b/chdb/vars.sh index b1b2100a5b6..6513e8e3e20 100755 --- a/chdb/vars.sh +++ b/chdb/vars.sh @@ -9,6 +9,10 @@ pushd ${PROJ_DIR} CHDB_VERSION=$(python3 -c 'import setup; print(setup.get_latest_git_tag())') popd +if [ "$1" == "cross-compile" ]; then + return +fi + # try to use largest llvm-strip version # if none of them are found, use llvm-strip or strip if [ -z "$STRIP" ]; then diff --git a/cmake/darwin/toolchain-aarch64.cmake b/cmake/darwin/toolchain-aarch64.cmake index 178153c1098..0243006c184 100644 --- a/cmake/darwin/toolchain-aarch64.cmake +++ b/cmake/darwin/toolchain-aarch64.cmake @@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "aarch64-apple-darwin") set (CMAKE_ASM_COMPILER_TARGET "aarch64-apple-darwin") set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-aarch64") +# Set minimum macOS deployment target to 11.0 (Big Sur - first version with Apple Silicon support) +set (CMAKE_OSX_DEPLOYMENT_TARGET "11.0") + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake diff --git a/cmake/darwin/toolchain-x86_64.cmake b/cmake/darwin/toolchain-x86_64.cmake index b9cbe72a2b6..ab060d6aa28 100644 --- a/cmake/darwin/toolchain-x86_64.cmake +++ b/cmake/darwin/toolchain-x86_64.cmake @@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-apple-darwin") set (CMAKE_ASM_COMPILER_TARGET "x86_64-apple-darwin") set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-x86_64") +# Set minimum macOS deployment target to 10.15 (Catalina) +set (CMAKE_OSX_DEPLOYMENT_TARGET "10.15") + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 13febb2d1f2..23e7d2d12e8 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -59,6 +59,8 @@ if (LINKER_NAME) message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") endif () set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}") + set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --ld-path=${LLD_PATH}") endif () if (LINKER_NAME) diff --git a/contrib/pybind11-cmake/CMakeLists.txt b/contrib/pybind11-cmake/CMakeLists.txt index 0427cf47b71..ea9a499448b 100644 --- a/contrib/pybind11-cmake/CMakeLists.txt +++ b/contrib/pybind11-cmake/CMakeLists.txt @@ -6,10 +6,32 @@ endif() string(REPLACE "-Wl,-z,defs" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}") -if (DEFINED PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION) - find_package(Python ${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION} EXACT REQUIRED COMPONENTS Interpreter Development) +if(CHDB_CROSSCOMPILING) + # For cross-compiling, manually set Python variables + if (DEFINED PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION) + # Parse version like "3.8" into major and minor + string(REPLACE "." ";" VERSION_LIST ${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION}) + list(GET VERSION_LIST 0 Python_VERSION_MAJOR) + list(GET VERSION_LIST 1 Python_VERSION_MINOR) + set(Python_VERSION "${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION}") + + # Set include directory based on provided prefix and version + set(Python_INCLUDE_DIRS "${CHDB_PYTHON_INCLUDE_DIR_PREFIX}/${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}") + else() + # Default to Python 3.8 if not specified + set(Python_VERSION_MAJOR 3) + set(Python_VERSION_MINOR 8) + set(Python_VERSION "3.8") + set(Python_INCLUDE_DIRS "${CHDB_PYTHON_INCLUDE_DIR_PREFIX}/3.8") + endif() + + message(STATUS "Cross-compiling: Using Python ${Python_VERSION} include directory: ${Python_INCLUDE_DIRS}") else() - find_package(Python REQUIRED COMPONENTS Interpreter Development) + if (DEFINED PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION) + find_package(Python ${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION} EXACT REQUIRED COMPONENTS Interpreter Development) + else() + find_package(Python REQUIRED COMPONENTS Interpreter Development) + endif() endif() set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/pybind11/") @@ -72,7 +94,7 @@ message(STATUS "Python_VERSION_MAJOR: ${Python_VERSION_MAJOR}") message(STATUS "Python_VERSION_MINOR: ${Python_VERSION_MINOR}") message(STATUS "Resulting LIBNAME: ${PYBIND11_NONLIMITEDAPI_LIBNAME}") -if(Python_FOUND) +if(Python_FOUND OR CHDB_CROSSCOMPILING) target_include_directories(${PYBIND11_NONLIMITEDAPI_LIBNAME} PRIVATE ${PYBIND11_INCLUDE_DIR}) target_include_directories(${PYBIND11_NONLIMITEDAPI_LIBNAME} PRIVATE ${Python_INCLUDE_DIRS}) target_link_libraries(${PYBIND11_NONLIMITEDAPI_LIBNAME} PUBLIC ch_contrib::pybind11_stubs) diff --git a/examples/arrow_c_abi.h b/examples/arrow_c_abi.h new file mode 100644 index 00000000000..6abe866b5f6 --- /dev/null +++ b/examples/arrow_c_abi.h @@ -0,0 +1,233 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// \file abi.h Arrow C Data Interface +/// +/// The Arrow C Data interface defines a very small, stable set +/// of C definitions which can be easily copied into any project's +/// source code and vendored to be used for columnar data interchange +/// in the Arrow format. For non-C/C++ languages and runtimes, +/// it should be almost as easy to translate the C definitions into +/// the corresponding C FFI declarations. +/// +/// Applications and libraries can therefore work with Arrow memory +/// without necessarily using the Arrow libraries or reinventing +/// the wheel. Developers can choose between tight integration +/// with the Arrow software project or minimal integration with +/// the Arrow format only. + +#pragma once + +#include + +// Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE + +#ifndef ARROW_C_DEVICE_DATA_INTERFACE +#define ARROW_C_DEVICE_DATA_INTERFACE + +// Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html + +// DeviceType for the allocated memory +typedef int32_t ArrowDeviceType; + +// CPU device, same as using ArrowArray directly +#define ARROW_DEVICE_CPU 1 +// CUDA GPU Device +#define ARROW_DEVICE_CUDA 2 +// Pinned CUDA CPU memory by cudaMallocHost +#define ARROW_DEVICE_CUDA_HOST 3 +// OpenCL Device +#define ARROW_DEVICE_OPENCL 4 +// Vulkan buffer for next-gen graphics +#define ARROW_DEVICE_VULKAN 7 +// Metal for Apple GPU +#define ARROW_DEVICE_METAL 8 +// Verilog simulator buffer +#define ARROW_DEVICE_VPI 9 +// ROCm GPUs for AMD GPUs +#define ARROW_DEVICE_ROCM 10 +// Pinned ROCm CPU memory allocated by hipMallocHost +#define ARROW_DEVICE_ROCM_HOST 11 +// Reserved for extension +#define ARROW_DEVICE_EXT_DEV 12 +// CUDA managed/unified memory allocated by cudaMallocManaged +#define ARROW_DEVICE_CUDA_MANAGED 13 +// unified shared memory allocated on a oneAPI non-partitioned device. +#define ARROW_DEVICE_ONEAPI 14 +// GPU support for next-gen WebGPU standard +#define ARROW_DEVICE_WEBGPU 15 +// Qualcomm Hexagon DSP +#define ARROW_DEVICE_HEXAGON 16 + +struct ArrowDeviceArray { + // the Allocated Array + // + // the buffers in the array (along with the buffers of any + // children) are what is allocated on the device. + struct ArrowArray array; + // The device id to identify a specific device + int64_t device_id; + // The type of device which can access this memory. + ArrowDeviceType device_type; + // An event-like object to synchronize on if needed. + void* sync_event; + // Reserved bytes for future expansion. + int64_t reserved[3]; +}; + +#endif // ARROW_C_DEVICE_DATA_INTERFACE + +#ifndef ARROW_C_STREAM_INTERFACE +#define ARROW_C_STREAM_INTERFACE + +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_STREAM_INTERFACE + +#ifndef ARROW_C_DEVICE_STREAM_INTERFACE +#define ARROW_C_DEVICE_STREAM_INTERFACE + +// Equivalent to ArrowArrayStream, but for ArrowDeviceArrays. +// +// This stream is intended to provide a stream of data on a single +// device, if a producer wants data to be produced on multiple devices +// then multiple streams should be provided. One per device. +struct ArrowDeviceArrayStream { + // The device that this stream produces data on. + ArrowDeviceType device_type; + + // Callback to get the stream schema + // (will be the same for all arrays in the stream). + // + // Return value 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + // The schema should be accessible via CPU memory. + int (*get_schema)(struct ArrowDeviceArrayStream* self, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowDeviceArray must be released independently from the stream. + int (*get_next)(struct ArrowDeviceArrayStream* self, struct ArrowDeviceArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowDeviceArrayStream* self); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowDeviceArrayStream* self); + + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DEVICE_STREAM_INTERFACE + +#ifdef __cplusplus +} +#endif diff --git a/examples/chdbArrowTest.c b/examples/chdbArrowTest.c index f91d4d4c8c4..61b9a8dd603 100644 --- a/examples/chdbArrowTest.c +++ b/examples/chdbArrowTest.c @@ -6,7 +6,7 @@ #include #include "../programs/local/chdb.h" -#include "../contrib/arrow/cpp/src/arrow/c/abi.h" +#include "arrow_c_abi.h" // Custom ArrowArrayStream implementation data typedef struct CustomStreamData diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index 2cade0b59be..19704ee5acb 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -61,11 +61,17 @@ if (USE_PYTHON) include_directories(${PYBIND11_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) # include Python.h - execute_process(COMMAND python3-config --includes - OUTPUT_VARIABLE PYTHON_INCLUDES - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(REGEX REPLACE ".*-I([^ ]+).*" "\\1" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDES}) + if(CHDB_CROSSCOMPILING) + # For cross-compiling, use the provided Python include directory + set(PYTHON_INCLUDE_DIR "${CHDB_PYTHON_INCLUDE_DIR_PREFIX}/3.8") + message(STATUS "Cross-compiling: Using Python include directory: ${PYTHON_INCLUDE_DIR}") + else() + execute_process(COMMAND python3-config --includes + OUTPUT_VARIABLE PYTHON_INCLUDES + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + string(REGEX REPLACE ".*-I([^ ]+).*" "\\1" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDES}) + endif() foreach(_file ${CHDB_SOURCES}) set_source_files_properties(${_file} @@ -74,14 +80,13 @@ if (USE_PYTHON) ) endforeach(_file) - # get python version, something like python3.x - execute_process(COMMAND python3 -c "import sys; print('python3.'+str(sys.version_info[1]))" - OUTPUT_VARIABLE PYTHON_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - # remove all warning, because pybind11 will generate a lot of warning if (OS_LINUX) + # get python version, something like python3.x + execute_process(COMMAND python3 -c "import sys; print('python3.'+str(sys.version_info[1]))" + OUTPUT_VARIABLE PYTHON_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ) # pybind11 will try to find x86_64-linux-gnu/${PYTHON_VERSION}/pyconfig.h # use -idirafter to make it find the right one and not polute the include path # set_source_files_properties(LocalChdb.cpp PROPERTIES COMPILE_FLAGS diff --git a/tests/test_data_insertion.ipynb b/tests/test_data_insertion.ipynb index 20bbb87bae2..e5c7114dbbe 100644 --- a/tests/test_data_insertion.ipynb +++ b/tests/test_data_insertion.ipynb @@ -3,32 +3,28 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "jupyter": { - "is_executing": true - } - }, + "metadata": {}, "outputs": [], "source": [ "from chdb import session\n", "import time\n", - "import tempfile\n", "import os\n", + "import shutil\n", "\n", "print(\"Connecting to chdb session...\")\n", - "chs = session.Session()\n", - "\n", - "temp_csv = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False)\n", - "temp_csv.write(\"movieId,embedding\\n\") # Header\n", + "session_dir = os.path.join(os.getcwd(), \"chdb_test_data_insertion_ipynb\")\n", + "os.makedirs(session_dir, exist_ok=True)\n", + "chs = session.Session(session_dir)\n", "\n", - "# Generate 10,000 rows of test data\n", - "for i in range(1, 10001):\n", - " embedding = [float(i + j * 0.1) for j in range(10)]\n", - " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", - " temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n", + "csv_path = os.path.join(os.getcwd(), \"chdb_test_data_insertion_embedding.csv\")\n", + "with open(csv_path, 'w') as temp_csv:\n", + " temp_csv.write(\"movieId,embedding\\n\") # Header\n", "\n", - "temp_csv.close()\n", - "csv_path = temp_csv.name\n", + " # Generate 10,000 rows of test data\n", + " for i in range(1, 10001):\n", + " embedding = [float(i + j * 0.1) for j in range(10)]\n", + " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", + " temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n", "\n", "# Setup database and table\n", "print(\"\\n=== Setup Phase ===\")\n", @@ -49,15 +45,15 @@ " result = chs.query(f\"INSERT INTO embeddings FROM INFILE '{csv_path}' FORMAT CSV\")\n", " infile_time = time.time() - start_time\n", " print(f\"✓ INFILE insertion successful! Time: {infile_time:.3f}s\")\n", - " \n", + "\n", " count = chs.query('SELECT COUNT(*) as count FROM embeddings')\n", " print(f\"Records inserted via INFILE: {count}\")\n", - " \n", + "\n", " if count != '0':\n", " print(\"Sample data from INFILE:\")\n", " sample = chs.query('SELECT movieId, embedding FROM embeddings ORDER BY movieId LIMIT 3')\n", " print(sample)\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"✗ INFILE insertion failed: {e}\")\n", " infile_time = 0\n", @@ -71,10 +67,10 @@ " embedding = [float(i + j * 0.1) for j in range(10)]\n", " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", " chs.query(f\"INSERT INTO embeddings VALUES ({i}, {embedding_str})\")\n", - " \n", + "\n", " values_time = time.time() - start_time\n", " print(f\"✓ VALUES insertion successful! Time: {values_time:.3f}s\")\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"✗ VALUES insertion failed: {e}\")\n", " values_time = 0\n", @@ -84,21 +80,21 @@ "try:\n", " total_count = chs.query('SELECT COUNT(*) as total FROM embeddings')\n", " print(f\"Total records in embeddings table: {total_count}\")\n", - " \n", + "\n", " # Count by range\n", " infile_count = chs.query('SELECT COUNT(*) as infile_count FROM embeddings WHERE movieId <= 10000')\n", " values_count = chs.query('SELECT COUNT(*) as values_count FROM embeddings WHERE movieId >= 20001')\n", - " \n", + "\n", " print(f\"Records from INFILE (movieId <= 10000): {infile_count}\")\n", " print(f\"Records from VALUES (movieId >= 20001): {values_count}\")\n", - " \n", + "\n", " # Sample from both ranges\n", " print(\"\\nSample from INFILE data:\")\n", " print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId <= 10000 ORDER BY movieId LIMIT 2'))\n", - " \n", + "\n", " print(\"Sample from VALUES data:\")\n", " print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId >= 20001 ORDER BY movieId LIMIT 2'))\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"Count verification error: {e}\")\n", "\n", @@ -106,30 +102,32 @@ "print(\"\\n=== Test 4: CSV Engine Direct Reading ===\")\n", "try:\n", " print(\"Reading generated CSV file directly using CSV engine:\")\n", - " \n", + "\n", " # Method 1: Using file() function\n", " csv_count1 = chs.query(f\"SELECT COUNT(*) as csv_count FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String')\")\n", " print(f\"CSV file rows (via file() function): {csv_count1}\")\n", - " \n", + "\n", " # Method 2: Using CSV table engine directly\n", " print(\"Sample rows from CSV file:\")\n", " csv_sample = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId LIMIT 3\")\n", " print(csv_sample)\n", - " \n", + "\n", " print(\"Last few rows from CSV file:\")\n", " csv_tail = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId DESC LIMIT 3\")\n", " print(csv_tail)\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"CSV engine reading error: {e}\")\n", "\n", "# Cleanup\n", "print(\"\\n=== Cleanup ===\")\n", "try:\n", + " chs.close()\n", " os.unlink(csv_path)\n", - " print(\"✓ Temporary CSV file cleaned up\")\n", + " shutil.rmtree(session_dir, ignore_errors=True)\n", + " print(\"Temporary files cleaned up\")\n", "except Exception as e:\n", - " print(f\"Warning: Could not clean up temporary file: {e}\")\n", + " print(f\"Warning: Could not clean up temporary files: {e}\")\n", "\n", "print(f\"\\n=== Performance Summary ===\")\n", "if infile_time > 0:\n",