intel · bratpiorka · Dec 5, 2025 · Dec 5, 2025 · Dec 4, 2025 · Dec 4, 2025
@@ -167,7 +167,7 @@ enum class OffloadArch {
   LNL_M,
   LAST,
 
-  CudaDefault = OffloadArch::SM_52,
+  CudaDefault = OffloadArch::SM_75,
   HIPDefault = OffloadArch::GFX906,
 };
 

@@ -5927,11 +5927,11 @@ class OffloadingActionBuilder final {
 
       // Handle defaults architectures
       for (auto &Triple : SYCLTripleList) {
-        // For NVIDIA use SM_50 as a default
+        // For NVIDIA use SM_75 as a default
         if (Triple.isNVPTX() && llvm::none_of(GpuArchList, [&](auto &P) {
               return P.first.isNVPTX();
             })) {
-          const char *DefaultArch = OffloadArchToString(OffloadArch::SM_50);
+          const char *DefaultArch = OffloadArchToString(OffloadArch::SM_75);
           GpuArchList.emplace_back(Triple, DefaultArch);
         }
 
@@ -7650,7 +7650,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
       // The default arch is set for NVPTX if not provided.  For AMDGPU, emit
       // an error as the user is responsible to set the arch.
       if (TC.getTriple().isNVPTX())
-        Archs.insert(OffloadArchToString(OffloadArch::SM_50));
+        Archs.insert(OffloadArchToString(OffloadArch::SM_75));
       else if (TC.getTriple().isAMDGPU())
         C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch)
             << 1 << TC.getTriple().str();

@@ -0,0 +1,2 @@
+CUDA Version 10.0.130
+
@@ -0,0 +1,2 @@
+CUDA Version 10.0.130
+
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 10000
+
+//
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 10000
+
+//
@@ -0,0 +1,7 @@
+//
+// Placeholder file for testing CUDA version detection
+//
+
+#define CUDA_VERSION 7000
+
+//
@@ -60,7 +60,7 @@
 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON \
-// RUN:     -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30
+// RUN:     -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
 // RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON \
@@ -96,14 +96,14 @@
 
 
 // Verify that -nocudainc prevents adding include path to CUDA headers.
-// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
-// RUN:     -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
-// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
+// RUN:     -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
+// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
-// RUN:     -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35
+// RUN:     -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10
 
 // We should not add any CUDA include paths if there's no valid CUDA installation
 // RUN: not %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
@@ -123,10 +123,10 @@
 // RUN:   | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
 
 // Verify that  -nocudalib prevents linking libdevice bitcode in.
-// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON
-// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \
+// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \
 // RUN:   -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix COMMON
 
@@ -152,10 +152,10 @@
 // RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix CUDA80
 
-// Verify that if no version file is found, we report the default of 7.0.
+// Verify that if no version file is found, we report the default of 10.0.
 // RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
-// RUN:   | FileCheck %s -check-prefix CUDA70
+// RUN:   | FileCheck %s -check-prefix CUDA100
 
 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
 // NO-LIBDEVICE: Found CUDA installation: {{.*}}/Inputs/CUDA-nolibdevice/usr/local/cuda
@@ -174,6 +174,7 @@
 // LIBDEVICE50-SAME: libdevice.compute_50.10.bc
 // PTX42-SAME: "-target-feature" "+ptx42"
 // PTX60-SAME: "-target-feature" "+ptx60"
+// PTX63-SAME: "-target-feature" "+ptx63"
 // CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h"
 // NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h"
 // CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include"
@@ -188,14 +189,20 @@
 // CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8"
 // CHECK-CXXINCLUDE: ld{{.*}}"
 
+// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA70-SAME: -target-sdk-version=7.0
+// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CUDA70-SAME: -target-sdk-version=7.0
+// CUDA70: ld{{.*}}"
+
 // CUDA80: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // CUDA80-SAME: -target-sdk-version=8.0
 // CUDA80: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 // CUDA80-SAME: -target-sdk-version=8.0
 // CUDA80: ld{{.*}}"
 
-// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda"
-// CUDA70-SAME: -target-sdk-version=7.0
-// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
-// CUDA70-SAME: -target-sdk-version=7.0
-// CUDA70: ld{{.*}}"
+// CUDA100: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA100-SAME: -target-sdk-version=10.0
+// CUDA100: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CUDA100-SAME: -target-sdk-version=10.0
+// CUDA100: ld{{.*}}"
@@ -2,14 +2,14 @@
 // -fgpu-flush-denormals-to-zero. This should be translated to
 // -fdenormal-fp-math-f32=preserve-sign
 
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test alias options -f[no-]cuda-flush-denormals-to-zero
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
 // Test explicit argument, with CUDA offload kind
 // RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s

@@ -5,12 +5,12 @@
 // RUN: %clang -### --target=x86_64-linux-gnu -c \
 // RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s
 // RUN: %clang -### --target=x86_64-linux-gnu -c \
-// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s
+// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_75 %s 2>&1 | FileCheck %s
 
 // CHECK: "-cc1"{{.*}} "-triple" "nvptx
-// CHECK-SAME: "-target-cpu" "sm_52"
+// CHECK-SAME: "-target-cpu" "sm_75"
 
 // CHECK: ptxas
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-SAME: "--gpu-name" "sm_75"
 
 // CHECK: "-cc1"{{.*}} "-target-cpu" "haswell"
@@ -104,12 +104,12 @@
 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
 
 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
-//    we default to sm_52 -- same as if no --cuda-gpu-arch were passed.
+//    we default to sm_75 -- same as if no --cuda-gpu-arch were passed.
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \
 // RUN:   --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \
 // RUN:   -c %s 2>&1 \
-// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
+// RUN: | FileCheck -check-prefixes ARCH-SM75,NOARCH-SM60,NOARCH-SM70 %s
 
 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
@@ -193,6 +193,8 @@
 // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60"
 // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70"
 // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70"
+// ARCH-SM75: "-cc1"{{.*}}"-target-cpu" "sm_75"
+// NOARCH-SM75-NOT: "-cc1"{{.*}}"-target-cpu" "sm_75"
 // ARCHALLERROR: error: unsupported CUDA gpu architecture: all
 
 // Match device-side preprocessor and compiler phases with -save-temps.

@@ -1,8 +1,8 @@
 // RUN: %clang -### --target=i386-unknown-linux \
-// RUN:   --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN:   --ptxas-path=/some/path/to/ptxas %s 2>&1 \
 // RUN: | FileCheck %s
 
 // CHECK-NOT: "ptxas"
 // CHECK: "/some/path/to/ptxas"
-// CHECK-SAME: "--gpu-name" "sm_52"
+// CHECK-SAME: "--gpu-name" "sm_75"
@@ -1,6 +1,6 @@
 // Checks that cuda compilation does the right thing when passed -fcuda-short-ptr
 
-// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s
+// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck %s
 
 // CHECK: "-mllvm" "--nvptx-short-ptr"
 // CHECK-SAME: "-fcuda-short-ptr"
@@ -1,4 +1,4 @@
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
@@ -15,43 +15,43 @@
 // RUN:    --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=UNKNOWN_VERSION_CXX
 
-// The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60.
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// The installation at Inputs/CUDA_70 is CUDA 7.0, which doesn't support sm_60.
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // This should only complain about sm_60, not sm_35.
 // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \
-// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35
 
 // We should get two errors here, one for sm_60 and one for sm_61.
 // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \
-// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61
 
 // We should still get an error if we pass -nocudainc, because this compilation
 // would invoke ptxas, and we do a version check on that, too.
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // If with -nocudainc and -E, we don't touch the CUDA install, so we
 // shouldn't get an error.
 // RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \
-// RUN:    --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \
+// RUN:    --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 
 // --no-cuda-version-check should suppress all of these errors.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 \
 // RUN:    --no-cuda-version-check %s | \
 // RUN:    FileCheck %s --check-prefix=OK
 
 // We need to make sure the version check is done only for the device toolchain,
 // therefore we should not get an error in host-only mode. We use the -S here
 // to avoid the error being produced in case by the assembler tool, which does
 // the same check.
-// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
+// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=OK
-// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \
+// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \
 // RUN:    FileCheck %s --check-prefix=ERR_SM60
 
 // OK-NOT: error: GPU arch

@@ -1,6 +1,6 @@
-// RUN: %clang -v --target=i386-pc-windows-msvc \
+// RUN: %clang -v --target=i386-pc-windows-msvc --cuda-gpu-arch=sm_50 \
 // RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
-// RUN: %clang -v --target=i386-pc-windows-mingw32 \
+// RUN: %clang -v --target=i386-pc-windows-mingw32 --cuda-gpu-arch=sm_50 \
 // RUN:   --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s
 
 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0

@@ -26,7 +26,7 @@
 
 // llvm-bc and llvm-ll outputs need to match regular suffixes
 // (unfortunately).
-// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t
+// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA/usr/local/cuda -### 2> %t
 // RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
 //
 // CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu"
-Original file line number
+Diff line change
@@ Expand Up / @@ -167,7 +167,7 @@ enum class OffloadArch { @@
       LNL_M,
       LAST,
-      CudaDefault = OffloadArch::SM_52,
+      CudaDefault = OffloadArch::SM_75,
       HIPDefault = OffloadArch::GFX906,
     };
@@ Expand Down @@