From 8ec03af75fe61c2db0e962f94b755278d8afad84 Mon Sep 17 00:00:00 2001 From: kulst Date: Sat, 13 Jun 2026 19:08:06 +0200 Subject: [PATCH] Adapt NVPTX target-feature handling for LLVM 22 Previously, Rust did not explicitly enable a PTX version target-feature, because LLVM implied one from the selected CPU. Starting with LLVM 22, the NVPTX backend still infers the minimum required PTX version from the selected CPU when none is selected explicitly, but no longer reflects that version in the enabled subtarget features. As a result, cfg could miss the PTX version of the emitted PTX. Also, if a PTX version below the required minimum is selected explicitly, LLVM errors. We missed this because the corresponding NVPTX target-feature test was mistakenly disabled. To keep cfg evaluation consistent and avoid LLVM errors, always select an explicit PTX version: whichever is higher between - Rust's minimum required PTX version, and - the CPU's minimum required PTX version. Re-enable the NVPTX target-feature test and adapt it to Rust's minimum required PTX and sm versions. Forbid PTX and sm versions below Rust's minimum required versions by marking the corresponding target-features as `Forbidden`. Add PTX and sm versions present in LLVM 22 but missing as Rust target-features. --- compiler/rustc_codegen_llvm/src/llvm_util.rs | 5 ++ .../rustc_codegen_ssa/src/target_features.rs | 46 +++++++++++++---- compiler/rustc_target/src/target_features.rs | 49 +++++++++++++++++-- tests/ui/check-cfg/target_feature.stderr | 15 ++++++ .../target-feature/implied-features-nvptx.rs | 36 +++++++------- 5 files changed, 120 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 73b7f699b606d..ee09a5258baf3 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -303,6 +303,11 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option Some(LLVMFeature::new(s)), } } + Arch::Nvptx64 => match s { + "sm_88" | "ptx90" if major < 22 => None, + s if s.starts_with("sm_110") && major < 22 => None, + s => Some(LLVMFeature::new(s)), + }, _ => Some(LLVMFeature::new(s)), } } diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs index 3c771f0eb7ec4..d42823255028e 100644 --- a/compiler/rustc_codegen_ssa/src/target_features.rs +++ b/compiler/rustc_codegen_ssa/src/target_features.rs @@ -418,15 +418,43 @@ pub fn target_spec_to_backend_features<'a>( ) { let mut rust_features = vec![]; - // This check handles SM versions that defaults (by LLVM) to unsupported (by Rust) PTX ISA versions. - // sm_70, sm_72 and sm_75 defaults to PTX ISA versions with major version 6, while sm_80 default to 7.0 - if sess.target.arch == Arch::Nvptx64 - && matches!( - sess.opts.cg.target_cpu.as_deref(), - None | Some("sm_70") | Some("sm_72") | Some("sm_75") - ) - { - rust_features.push((true, "ptx70")); + if sess.target.arch == Arch::Nvptx64 { + // Starting with LLVM 22, the NVPTX backend still infers the minimum + // required PTX version from the selected CPU when none is selected + // explicitly, but no longer reflects that in the enabled subtarget + // features. If a PTX version below the required minimum is selected + // explicitly, LLVM errors. + // To keep cfg evaluation consistent and avoid LLVM errors, always + // select an explicit PTX version: whichever is higher between + // - Rust's minimum required PTX version, and + // - the CPU's minimum required PTX version. + let min_ptx = match sess.opts.cg.target_cpu.as_deref() { + Some("sm_86") => "ptx71", + Some("sm_87") => "ptx74", + Some("sm_88") => "ptx90", + Some("sm_90") => "ptx78", + Some("sm_90a") => "ptx80", + Some("sm_100") => "ptx86", + Some("sm_100f") => "ptx88", + Some("sm_100a") => "ptx86", + Some("sm_101") => "ptx86", + Some("sm_101f") => "ptx88", + Some("sm_101a") => "ptx86", + Some("sm_103") => "ptx88", + Some("sm_103f") => "ptx88", + Some("sm_103a") => "ptx88", + Some("sm_110") => "ptx90", + Some("sm_110f") => "ptx90", + Some("sm_110a") => "ptx90", + Some("sm_120") => "ptx87", + Some("sm_120f") => "ptx88", + Some("sm_120a") => "ptx87", + Some("sm_121") => "ptx88", + Some("sm_121f") => "ptx88", + Some("sm_121a") => "ptx88", + _ => "ptx70", + }; + rust_features.push((true, min_ptx)); } // Compute implied features diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index 70e45c9684264..1314128df82d0 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -606,25 +606,62 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ // tidy-alphabetical-start + ("sm_20", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_21", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_30", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_32", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_35", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_37", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_50", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_52", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_53", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_60", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_61", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), + ("sm_62", Forbidden { reason: "unsupported compute capability", hard_error: true }, &[]), ("sm_70", Unstable(sym::nvptx_target_feature), &[]), ("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]), ("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]), ("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]), ("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]), ("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]), - ("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]), + ("sm_88", Unstable(sym::nvptx_target_feature), &["sm_87"]), + ("sm_89", Unstable(sym::nvptx_target_feature), &["sm_88"]), ("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]), ("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]), // tidy-alphabetical-end // tidy-alphabetical-start ("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]), - ("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]), + ("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100f"]), + ("sm_100f", Unstable(sym::nvptx_target_feature), &["sm_100"]), ("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]), - ("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]), - ("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]), - ("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]), + ("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101f"]), + ("sm_101f", Unstable(sym::nvptx_target_feature), &["sm_101"]), + ("sm_103", Unstable(sym::nvptx_target_feature), &["sm_100"]), + ("sm_103a", Unstable(sym::nvptx_target_feature), &["sm_103f"]), + ("sm_103f", Unstable(sym::nvptx_target_feature), &["sm_103", "sm_100f"]), + ("sm_110", Unstable(sym::nvptx_target_feature), &["sm_103"]), + ("sm_110a", Unstable(sym::nvptx_target_feature), &["sm_110f"]), + ("sm_110f", Unstable(sym::nvptx_target_feature), &["sm_110"]), + ("sm_120", Unstable(sym::nvptx_target_feature), &["sm_110"]), + ("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120f"]), + ("sm_120f", Unstable(sym::nvptx_target_feature), &["sm_120"]), + ("sm_121", Unstable(sym::nvptx_target_feature), &["sm_120"]), + ("sm_121a", Unstable(sym::nvptx_target_feature), &["sm_121f"]), + ("sm_121f", Unstable(sym::nvptx_target_feature), &["sm_121", "sm_120f"]), // tidy-alphabetical-end // tidy-alphabetical-start + ("ptx32", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx40", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx41", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx42", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx43", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx50", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx60", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx61", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx62", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx63", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx64", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), + ("ptx65", Forbidden { reason: "unsupported PTX version", hard_error: true }, &[]), ("ptx70", Unstable(sym::nvptx_target_feature), &[]), ("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]), ("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]), @@ -642,6 +679,8 @@ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ ("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]), ("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]), ("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]), + ("ptx88", Unstable(sym::nvptx_target_feature), &["ptx87"]), + ("ptx90", Unstable(sym::nvptx_target_feature), &["ptx88"]), // tidy-alphabetical-end ]; diff --git a/tests/ui/check-cfg/target_feature.stderr b/tests/ui/check-cfg/target_feature.stderr index bcc3abf4ff267..73e46c37c971f 100644 --- a/tests/ui/check-cfg/target_feature.stderr +++ b/tests/ui/check-cfg/target_feature.stderr @@ -262,6 +262,8 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `ptx85` `ptx86` `ptx87` +`ptx88` +`ptx90` `quadword-atomics` `rand` `ras` @@ -294,16 +296,29 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `sm4` `sm_100` `sm_100a` +`sm_100f` `sm_101` `sm_101a` +`sm_101f` +`sm_103` +`sm_103a` +`sm_103f` +`sm_110` +`sm_110a` +`sm_110f` `sm_120` `sm_120a` +`sm_120f` +`sm_121` +`sm_121a` +`sm_121f` `sm_70` `sm_72` `sm_75` `sm_80` `sm_86` `sm_87` +`sm_88` `sm_89` `sm_90` `sm_90a` diff --git a/tests/ui/target-feature/implied-features-nvptx.rs b/tests/ui/target-feature/implied-features-nvptx.rs index a51c22afaf956..b4df58c8e05e8 100644 --- a/tests/ui/target-feature/implied-features-nvptx.rs +++ b/tests/ui/target-feature/implied-features-nvptx.rs @@ -1,28 +1,30 @@ -//@ assembly-output: emit-asm -//@ compile-flags: --crate-type cdylib -C target-cpu=sm_80 -//@ only-nvptx64 -//@ build-pass -#![no_std] +//@ compile-flags: --target=nvptx64-nvidia-cuda --crate-type cdylib -C target-cpu=sm_90 +//@ needs-llvm-components: nvptx +//@ check-pass +//@ ignore-backends: gcc +#![feature(no_core, rustc_attrs)] +#![no_core] #![allow(dead_code)] -#[panic_handler] -pub fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} +#[rustc_builtin_macro] +#[macro_export] +macro_rules! compile_error { + ($msg:expr $(,)?) => {{ /* compiler built-in */ }}; } -// -Ctarget-cpu=sm_80 directly enables sm_80 and ptx70 -#[cfg(not(all(target_feature = "sm_80", target_feature = "ptx70")))] +// -Ctarget-cpu=sm_90 directly enables sm_90 and ptx78 +#[cfg(not(all(target_feature = "sm_90", target_feature = "ptx78")))] compile_error!("direct target features not enabled"); -// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features. +// -Ctarget-cpu=sm_90 implies all earlier sm_* and ptx* features. #[cfg(not(all( - target_feature = "sm_60", target_feature = "sm_70", - target_feature = "ptx50", - target_feature = "ptx60", + target_feature = "sm_80", + target_feature = "ptx71", + target_feature = "ptx74", )))] compile_error!("implied target features not enabled"); -// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features. -#[cfg(target_feature = "ptx71")] -compile_error!("sm_80 requires only ptx70, but ptx71 enabled"); +// -Ctarget-cpu=sm_90 implies all earlier sm_* and ptx* features. +#[cfg(target_feature = "ptx80")] +compile_error!("sm_90 requires only ptx78, but ptx80 enabled");