Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,19 @@ jobs:
--exclude cust
'

# The `llvm19` feature on `nvvm` / `cuda_builder` / `rustc_codegen_nvvm` requires
# an LLVM 19 toolchain that isn't in the CI image, so we can't run a single
# `--all-features` pass over the whole workspace. Doc those three crates with
# default features (the LLVM 7 path the CI image already supports) and the rest
# of the workspace with `--all-features`.
- name: Check documentation
run: |
docker exec "$CONTAINER_NAME" bash -lc 'set -euo pipefail
export RUSTDOCFLAGS=-Dwarnings
cargo doc --workspace --all-features --document-private-items --no-deps
cargo doc --workspace --all-features --document-private-items --no-deps \
--exclude rustc_codegen_nvvm --exclude cuda_builder --exclude nvvm
cargo doc -p rustc_codegen_nvvm -p cuda_builder -p nvvm \
--document-private-items --no-deps
'

- name: Stop build container
Expand Down
10 changes: 9 additions & 1 deletion .github/workflows/ci_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,20 @@ jobs:
--exclude blastoff --exclude cudnn --exclude cudnn-sys --exclude cust

# Exclude crates that require cuDNN, not available on Windows CI: cudnn, cudnn-sys.
# The `llvm19` feature on `nvvm` / `cuda_builder` / `rustc_codegen_nvvm` requires
# an LLVM 19 toolchain that isn't in the CI image, so we can't run a single
# `--all-features` pass over the whole workspace. Doc those three crates with
# default features (the LLVM 7 path the CI image already supports) and the rest
# of the workspace with `--all-features`.
- name: Check documentation
env:
RUSTDOCFLAGS: -Dwarnings
run: |
cargo doc --workspace --all-features --document-private-items --no-deps `
--exclude cudnn --exclude cudnn-sys
--exclude cudnn --exclude cudnn-sys `
--exclude rustc_codegen_nvvm --exclude cuda_builder --exclude nvvm
cargo doc -p rustc_codegen_nvvm -p cuda_builder -p nvvm `
--document-private-items --no-deps

# Disabled due to dll issues, someone with Windows knowledge needed
# - name: Compiletest
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@ book
/target
**/.vscode
.devcontainer
.codex
rustc-ice-*.txt
.nix-driver-libs
.claude
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions crates/cuda_builder/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ default = []
# HACK(see rust-gpu/spirv-builder): use `dep:` to avoid Cargo auto-creating a feature
# with the dependency name. Consumers must explicitly opt-in to compiling the backend.
rustc_codegen_nvvm = ["dep:rustc_codegen_nvvm"]
# Build the backend against LLVM 19 instead of LLVM 7. Propagates to `nvvm` (which
# uses it to flip the default `NvvmArch` to `Compute100`) and, when the optional
# `rustc_codegen_nvvm` dep is also enabled, to `rustc_codegen_nvvm` itself. Even
# when the optional dep is disabled, the build script's nested `cargo build -p
# rustc_codegen_nvvm` reads `cfg(feature = "llvm19")` here and forwards it.
llvm19 = ["nvvm/llvm19", "rustc_codegen_nvvm?/llvm19"]

[dependencies]
rustc_codegen_nvvm = { version = "0.3", path = "../rustc_codegen_nvvm", optional = true }
Expand Down
17 changes: 12 additions & 5 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,13 +550,20 @@ fn build_backend_and_find(filename: &str) -> Option<PathBuf> {

let target_dir = workspace_dir.join("target").join("cuda-builder-codegen");

let status = Command::new("cargo")
.args(["build", "-p", "rustc_codegen_nvvm"])
let mut cmd = Command::new("cargo");
cmd.args(["build", "-p", "rustc_codegen_nvvm"])
.arg("--target-dir")
.arg(&target_dir)
.current_dir(&workspace_dir)
.status()
.ok()?;
.current_dir(&workspace_dir);

// Propagate the `llvm19` cargo feature to the nested backend build. Without this
// `rustc_codegen_nvvm`'s build script falls through to the prebuilt LLVM 7
// download, which the LLVM 19 codegen path can't link against.
if cfg!(feature = "llvm19") {
cmd.args(["--features", "llvm19"]);
}

let status = cmd.status().ok()?;

if !status.success() {
return None;
Expand Down
8 changes: 8 additions & 0 deletions crates/cust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,12 @@ fn main() {
println!("cargo::rustc-cfg=cuGraphGetEdges_v2");
println!("cargo::rustc-cfg=cuCtxCreate_v4");
}

// In CUDA 13.2 the `id` field in `CUmemLocation_st` was placed inside an anonymous union.
// Bindgen renders this as `__bindgen_anon_1: CUmemLocation_st__bindgen_ty_1` instead of a
// direct `id` field. This cfg gates the struct initialization syntax accordingly.
println!("cargo::rustc-check-cfg=cfg(cuMemLocation_anon_id)");
if driver_version >= 13020 {
println!("cargo::rustc-cfg=cuMemLocation_anon_id");
}
}
15 changes: 15 additions & 0 deletions crates/cust/src/memory/unified.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is from #368

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LegNeato should I put that this depends on #368 to land and then I'd rebase off of that?

I also think that once this lands, I would look into... does it need to be LLVM 19 or can it be LLVM 21+?

__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -693,6 +696,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -735,6 +741,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down Expand Up @@ -777,6 +786,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand All @@ -801,6 +813,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down
1 change: 1 addition & 0 deletions crates/cust_raw/build/cuda_sdk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ impl CudaSdk {
vec![
cuda_root.join("nvvm").join("bin"),
cuda_root.join("nvvm").join("lib64"),
cuda_root.join("nvvm").join("lib"),
]
};
let library_dirs = Self::normalize_dirpaths(search_dirs);
Expand Down
9 changes: 6 additions & 3 deletions crates/cust_raw/build/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,12 @@ fn main() {
println!("cargo::rustc-link-search=native={}", libdir.display());
}
println!("cargo::rustc-link-lib=dylib=nvvm");
// Handle libdevice support.
fs::copy(sdk.libdevice_bitcode_path(), outdir.join("libdevice.bc"))
.expect("Cannot copy libdevice bitcode file.");
// `fs::copy` preserves source mode. When libdevice.10.bc comes from
// the Nix store (0444), re-running this build can't overwrite the
// previous copy in OUT_DIR. Drop it first.
let dest = outdir.join("libdevice.bc");
let _ = fs::remove_file(&dest);
fs::copy(sdk.libdevice_bitcode_path(), &dest).expect("Cannot copy libdevice bitcode file.");
}
}

Expand Down
8 changes: 8 additions & 0 deletions crates/nvvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ description = "High level bindings to libnvvm"
repository = "https://github.com/Rust-GPU/rust-cuda"
readme = "../../README.md"

[features]
default = []
# Match the `llvm19` feature on `rustc_codegen_nvvm`. Currently only flips the
# default `NvvmArch` to the lowest Blackwell capability, since the LLVM 7
# bitcode dialect can't target `compute_100+` and the LLVM 19 dialect can't
# target pre-Blackwell archs.
llvm19 = []

[dependencies]
cust_raw = { version = "0.11.3", path = "../cust_raw", default-features = false, features = ["nvvm"] }
strum = { version = "0.27", features = ["derive"] }
39 changes: 35 additions & 4 deletions crates/nvvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{
ffi::{CStr, CString},
fmt::Display,
mem::MaybeUninit,
ptr::null_mut,
str::FromStr,
};

Expand Down Expand Up @@ -311,12 +310,13 @@ pub enum NvvmArch {
Compute73,
/// This default value of 7.5 corresponds to Turing and later devices. We default to this
/// because it is the minimum supported by CUDA 13.0 while being in the middle of the range
/// supported by CUDA 12.x.
/// supported by CUDA 12.x. Selected as the default only when the `llvm19` feature is off;
/// the LLVM 19 NVVM dialect can't target pre-Blackwell archs.
// WARNING: If you change the default, consider updating:
// - The `--target-arch` values used for compiletests in `ci_linux.yml` and
// `.github/workflows/ci_{linux,windows}.yml`.
// - The CUDA versions used in `setup_cuda_environment` in `compiletests`.
#[default]
#[cfg_attr(not(feature = "llvm19"), default)]
Compute75,
Compute80,
Compute86,
Expand All @@ -325,6 +325,12 @@ pub enum NvvmArch {
Compute89,
Compute90,
Compute90a,
/// First Blackwell arch and the cutoff for NVVM's modern IR dialect — everything at
/// or above this capability uses the LLVM 19-flavored bitcode accepted by CUDA 12.9+
/// `libnvvm`. See [`NvvmArch::uses_modern_ir_dialect`]. Selected as the default when
/// the `llvm19` feature is enabled, since the LLVM 7 dialect can't target this and
/// the LLVM 19 dialect can't target anything below it.
#[cfg_attr(feature = "llvm19", default)]
Compute100,
Compute100f,
Compute100a,
Expand Down Expand Up @@ -448,6 +454,14 @@ impl NvvmArch {
self.capability_value() % 10
}

/// Whether this target uses NVVM's modern IR dialect rather than the legacy LLVM 7 dialect.
///
/// CUDA 13.2 documents the modern dialect as Blackwell-and-later only, which begins at
/// `compute_100`.
pub fn uses_modern_ir_dialect(&self) -> bool {
self.capability_value() >= 100
}

/// Get the target feature string (e.g., "compute_50" for `Compute50`, "compute_90a" for
/// `Compute90a`).
pub fn target_feature(&self) -> &'static str {
Expand Down Expand Up @@ -739,7 +753,24 @@ impl NvvmProgram {
/// Verify the program without actually compiling it. In the case of invalid IR, you can find
/// more detailed error info by calling [`compiler_log`](Self::compiler_log).
pub fn verify(&self) -> Result<(), NvvmError> {
unsafe { nvvm_sys::nvvmVerifyProgram(self.raw, 0, null_mut()).to_result() }
self.verify_with_options(&[])
}

/// Like [`verify`](Self::verify), but runs the verifier with the same `NvvmOption`s that will
/// be passed to [`compile`](Self::compile). Passing the user-selected `-arch=compute_XXX` in
/// particular matters for CUDA 12.9+ / LLVM 19 bitcode: without it the verifier can fall back
/// to the legacy LLVM 7 parser and reject modern-dialect bitcode that would otherwise compile
/// fine.
pub fn verify_with_options(&self, options: &[NvvmOption]) -> Result<(), NvvmError> {
unsafe {
let options = options.iter().map(|x| format!("{x}\0")).collect::<Vec<_>>();
let mut options_ptr = options
.iter()
.map(|x| x.as_ptr().cast())
.collect::<Vec<_>>();
nvvm_sys::nvvmVerifyProgram(self.raw, options.len() as i32, options_ptr.as_mut_ptr())
.to_result()
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions crates/rustc_codegen_nvvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ readme = "../../README.md"
[lib]
crate-type = ["dylib"]

[features]
default = []
llvm19 = []

[dependencies]
nvvm = { version = "0.1", path = "../nvvm" }
rustc-demangle = "0.1.24"
Expand Down
Loading
Loading