From 247060010587ed297504ce8dbd4e337d1d814cc5 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 21 May 2026 14:47:00 +0200 Subject: [PATCH] Make `gpu_*` runtime stubs CPU-AOT-safe via weak linkage. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Back-end-provided runtime symbols (`Runtime.compile(:name, ...)`) used to emit `ccall("extern gpu_", llvmcall, ...)` as the Julia stub body. That made every AOT pipeline that materialized the stub on CPU — juliac, sysimage `compile=all`, PrecompileTools — fail with `JIT session error: Symbols not found: [ gpu_ ]`, because the `gpu_*` symbols only exist inside the GPU runtime library. The stub still needs to *reference* `gpu_` somewhere so that, after `link!(ir, runtime; only_needed=true)`, the kernel calls the back-end's implementation (which `build_runtime` emits as `gpu_` by renaming `runtime_module(job).`). Back-ends override at the LLVM-symbol level, not via Julia method tables, so the stub has to produce that symbol reference itself. Emit the stub via `Base.llvmcall` with an inline `define weak @gpu_(...)` returning a sentinel, plus an entry that calls it. LLVM linker semantics: the weak no-op satisfies CPU JIT materialization, and the runtime library's strong definition replaces it during the GPU link step. No method-table machinery, no post-codegen pass, no registry — the fix is local to `Runtime.compile`. IR is built with LLVM.jl's `create_function`/`IRBuilder`/`call_function` rather than string IR, matching the pattern used by `Runtime.type_tag` in the same file. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/runtime.jl | 76 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/src/runtime.jl b/src/runtime.jl index 2b11d915..b7ebfc6f 100644 --- a/src/runtime.jl +++ b/src/runtime.jl @@ -76,19 +76,83 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n end methods[name] = meth - # FIXME: if the function is a symbol, implying it will be specified by the target, - # we won't be able to call this function here or we'll get UndefVarErrors. - # work around that by generating an llvmcall stub. can we do better by - # using the new nonrecursive codegen to handle function lookup ourselves? + # Symbolic `def` means the runtime symbol is provided by the back-end. Emit + # an `llvmcall` whose IR declares `gpu_` as a *weak* definition with + # a CPU-safe no-op body, and calls it. The weak body satisfies the JIT's + # symbol resolution on CPU (so AOT pipelines — juliac, sysimage + # `compile=all`, PrecompileTools — don't fail trying to link an undefined + # `gpu_`). The GPU runtime library, when linked in, provides the + # real strong definition; LLVM's linker semantics replace the weak with + # the strong. if def isa Symbol args = [gensym() for typ in types] - @eval @inline $def($(args...)) = - ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) + stub = LLVM.Context() do _ + build_runtime_stub(llvm_name, return_type, types, args) + end + @eval @inline $def($(args...)) = $stub end return end +# Build an `llvmcall` expression for a back-end-provided runtime symbol: +# +# define weak @gpu_() { ret } +# define @entry() { %r = call @gpu_(); ret %r } +# +# Returns the `Base.llvmcall(...)`-shaped quote produced by `LLVM.Interop.call_function`, +# suitable for splicing as the stub's body. +function build_runtime_stub(llvm_name::String, @nospecialize(return_type::Type), + @nospecialize(types::Tuple), args::Vector) + rt = convert(LLVMType, return_type; allow_boxed=true) + arg_tys = LLVMType[convert(LLVMType, t; allow_boxed=true) for t in types] + + # entry function (`call_function` puts the module on it) + entry, entry_ft = create_function(rt, arg_tys) + mod = LLVM.parent(entry) + + # weak definition of `gpu_` that returns a harmless placeholder on CPU + extern = LLVM.Function(mod, llvm_name, LLVM.FunctionType(rt, arg_tys)) + linkage!(extern, LLVM.API.LLVMWeakAnyLinkage) + @dispose builder=IRBuilder() begin + position!(builder, BasicBlock(extern, "entry")) + emit_fake_return!(builder, rt) + end + + # entry: call the weak symbol, return its result + @dispose builder=IRBuilder() begin + position!(builder, BasicBlock(entry, "entry")) + result = call!(builder, LLVM.function_type(extern), extern, + collect(parameters(entry))) + if rt isa LLVM.VoidType + ret!(builder) + else + ret!(builder, result) + end + end + + return call_function(entry, return_type, Tuple{types...}, args...) +end + +# Emit a placeholder return of the given LLVM type — a sentinel value that +# never escapes (the stub is never meant to actually run on CPU; this only +# satisfies materialization). +function emit_fake_return!(builder::IRBuilder, rt::LLVMType) + if rt isa LLVM.VoidType + ret!(builder) + elseif rt isa LLVM.PointerType + # Use Int64(1), not 0, so `Ptr(Int64(...))` doesn't get lowered to C_NULL. + i64 = LLVM.IntType(64) + ret!(builder, const_inttoptr(ConstantInt(i64, 1), rt)) + elseif rt isa LLVM.IntegerType + ret!(builder, ConstantInt(rt, 0)) + elseif rt isa LLVM.LLVMFloat || rt isa LLVM.LLVMDouble + ret!(builder, ConstantFP(rt, 0.0)) + else + error("Unsupported runtime stub return type: $rt") + end +end + ## exception handling