From 8c852f26c2829da63d0e5eca830cef30cfd477ae Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 27 May 2026 15:21:44 +0200 Subject: [PATCH 1/2] PTX: Fix regression in kernel metadata on LLVM 20. --- src/ptx.jl | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/ptx.jl b/src/ptx.jl index a3af659d..a7a01df6 100644 --- a/src/ptx.jl +++ b/src/ptx.jl @@ -186,11 +186,16 @@ function finish_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), # entry instead of re-reading the metadata. annotations = Metadata[entry] - ## kernel metadata - append!(annotations, [MDString("kernel"), - ConstantInt(Int32(1))]) + # kernel metadata + # + # on LLVM >= 20 the `ptx_kernel` calling convention already marks the + # entry; the redundant "kernel" nvvm.annotation causes miscompilations. + if LLVM.version() < v"20" + append!(annotations, [MDString("kernel"), + ConstantInt(Int32(1))]) + end - ## expected CTA sizes + # expected CTA sizes if job.config.target.minthreads !== nothing bounds = ntuple(i -> i <= length(job.config.target.minthreads) ? job.config.target.minthreads[i] : 1, 3) @@ -234,7 +239,9 @@ function finish_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), end end - push!(metadata(mod)["nvvm.annotations"], MDNode(annotations)) + if length(annotations) > 1 + push!(metadata(mod)["nvvm.annotations"], MDNode(annotations)) + end end # we emit properties (of the device and ptx isa) as private global constants, From 7856743575631cbdc827416fae80bd9a856b5aac Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 27 May 2026 17:08:27 +0200 Subject: [PATCH 2/2] Remove IR-level tests. --- test/ptx.jl | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/test/ptx.jl b/test/ptx.jl index e1426ff2..f852f58a 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -35,41 +35,6 @@ end end end -@testset "property_annotations" begin - @test @filecheck PTX.code_llvm(Tuple{}; dump_module=true) do - @check_not "nvvm.annotations" - return - end - @test @filecheck PTX.code_llvm(Tuple{}; dump_module=true, kernel=true) do - @check_not "maxntid" - @check_not "reqntid" - @check_not "minctasm" - @check_not "maxnreg" - @check "nvvm.annotations" - return - end - @test @filecheck PTX.code_llvm(Tuple{}; dump_module=true, kernel=true, maxthreads=42) do - @check "maxntidx\", i32 42" - @check "maxntidy\", i32 1" - @check "maxntidz\", i32 1" - return - end - @test @filecheck PTX.code_llvm(Tuple{}; dump_module=true, kernel=true, minthreads=42) do - @check "reqntidx\", i32 42" - @check "reqntidy\", i32 1" - @check "reqntidz\", i32 1" - return - end - @test @filecheck PTX.code_llvm(Tuple{}; dump_module=true, kernel=true, blocks_per_sm=42) do - @check "minctasm\", i32 42" - return - end - @test @filecheck PTX.code_llvm(Tuple{}; dump_module=true, kernel=true, maxregs=42) do - @check "maxnreg\", i32 42" - return - end -end - LLVM.version() >= v"8" && @testset "calling convention" begin @test @filecheck PTX.code_llvm(Tuple{}; dump_module=true) do @check_not "ptx_kernel"