From 27cf9bf49c190f778bca3885d3c839d89cd25569 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 16:36:57 +0200 Subject: [PATCH 01/20] wip: automatic therock artifact selection --- .pkg/platform_augmentation.jl | 49 +++++++++++++ .pkg/select_artifacts.jl | 134 ++++++++++++++++++++++++++++++++++ Artifacts.toml | 11 +++ Project.toml | 2 - src/discovery/discovery.jl | 16 ++-- src/discovery/utils.jl | 66 ++++++++++++----- 6 files changed, 249 insertions(+), 29 deletions(-) create mode 100644 .pkg/platform_augmentation.jl create mode 100644 .pkg/select_artifacts.jl create mode 100644 Artifacts.toml diff --git a/.pkg/platform_augmentation.jl b/.pkg/platform_augmentation.jl new file mode 100644 index 000000000..360cc24ab --- /dev/null +++ b/.pkg/platform_augmentation.jl @@ -0,0 +1,49 @@ +using Base.BinaryPlatforms + +function rocm_arch() + topology_root = "/sys/class/kfd/kfd/topology/nodes/" + isdir(topology_root) || return String[] + + arch = String[] + for dir in readdir(topology_root; join = true) + props = joinpath(dir, "properties") + isfile(props) || continue + + for s in eachline(props) + m = match(r"^gfx_target_version (\d+)$", s) + m === nothing && continue + + target = parse(Int, m[1]) + target == 0 && continue + + push!(arch, "gfx$(div(target, 10000))$(div(target, 100) % 100)$(target % 100)") + end + end + + unique!(arch) + sort!(arch; rev = true) + return arch +end + +function rocm_arch_comparison_strategy(a::String, b::String, a_requested::Bool, b_requested::Bool) + a == "none" && return false + b == "none" && return false + + a_arches = split(a, ',') + b_arches = split(b, ',') + for arch in a_arches + arch in b_arches && return true + end + return false +end + +function augment_platform!(platform::Platform) + if !haskey(platform, "rocm_arch") + arch = rocm_arch() + platform["rocm_arch"] = isempty(arch) ? "none" : join(arch, ',') + end + + BinaryPlatforms.set_compare_strategy!(platform, "rocm_arch", rocm_arch_comparison_strategy) + + return platform +end diff --git a/.pkg/select_artifacts.jl b/.pkg/select_artifacts.jl new file mode 100644 index 000000000..2b8b643c4 --- /dev/null +++ b/.pkg/select_artifacts.jl @@ -0,0 +1,134 @@ +push!(Base.LOAD_PATH, dirname(@__DIR__)) + +using TOML, Artifacts, Base.BinaryPlatforms +include("./platform_augmentation.jl") +artifacts_toml = joinpath(dirname(@__DIR__), "Artifacts.toml") + +# Update Base.parse to support riscv64, needed for Julia <1.12 +@static if !haskey(BinaryPlatforms.arch_mapping, "riscv64") + + BinaryPlatforms.arch_mapping["riscv64"] = "(rv64|riscv64)" + + function bbparse(::Type{Platform}, triplet::AbstractString; validate_strict::Bool = false) + arch_mapping = BinaryPlatforms.arch_mapping + os_mapping = BinaryPlatforms.os_mapping + libc_mapping = BinaryPlatforms.libc_mapping + call_abi_mapping = BinaryPlatforms.call_abi_mapping + libgfortran_version_mapping = BinaryPlatforms.libgfortran_version_mapping + cxxstring_abi_mapping = BinaryPlatforms.cxxstring_abi_mapping + libstdcxx_version_mapping = BinaryPlatforms.libstdcxx_version_mapping + + # Helper function to collapse dictionary of mappings down into a regex of + # named capture groups joined by "|" operators + c(mapping) = string("(",join(["(?<$k>$v)" for (k, v) in mapping], "|"), ")") + + # We're going to build a mondo regex here to parse everything: + triplet_regex = Regex(string( + "^", + # First, the core triplet; arch/os/libc/call_abi + c(arch_mapping), + c(os_mapping), + c(libc_mapping), + c(call_abi_mapping), + # Next, optional things, like libgfortran/libstdcxx/cxxstring abi + c(libgfortran_version_mapping), + c(cxxstring_abi_mapping), + c(libstdcxx_version_mapping), + # Finally, the catch-all for extended tags + "(?(?:-[^-]+\\+[^-]+)*)?", + "\$", + )) + + m = match(triplet_regex, triplet) + if m !== nothing + # Helper function to find the single named field within the giant regex + # that is not "nothing" for each mapping we give it. + get_field(m, mapping) = begin + for k in keys(mapping) + if m[k] !== nothing + # Convert our sentinel "nothing" values to actual "nothing" + if endswith(k, "_nothing") + return nothing + end + # Convert libgfortran/libstdcxx version numbers + if startswith(k, "libgfortran") + return VersionNumber(parse(Int,k[12:end])) + elseif startswith(k, "libstdcxx") + return VersionNumber(3, 4, parse(Int,m[k][11:end])) + else + return k + end + end + end + end + + # Extract the information we're interested in: + arch = get_field(m, arch_mapping) + os = get_field(m, os_mapping) + libc = get_field(m, libc_mapping) + call_abi = get_field(m, call_abi_mapping) + libgfortran_version = get_field(m, libgfortran_version_mapping) + libstdcxx_version = get_field(m, libstdcxx_version_mapping) + cxxstring_abi = get_field(m, cxxstring_abi_mapping) + function split_tags(tagstr) + tag_fields = filter(!isempty, split(tagstr, "-")) + if isempty(tag_fields) + return Pair{String,String}[] + end + return map(v -> Symbol(v[1]) => v[2], split.(tag_fields, "+")) + end + tags = split_tags(m["tags"]) + + # Special parsing of os version number, if any exists + function extract_os_version(os_name, pattern) + m_osvn = match(pattern, m[os_name]) + if m_osvn !== nothing + return VersionNumber(m_osvn.captures[1]) + end + return nothing + end + os_version = nothing + if os == "macos" + os_version = extract_os_version("macos", r".*darwin([\d.]+)"sa) + end + if os == "freebsd" + os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)"sa) + end + if os == "openbsd" + os_version = extract_os_version("openbsd", r".*openbsd([\d.]+)"sa) + end + + return Platform( + arch, os; + validate_strict, + libc, + call_abi, + libgfortran_version, + cxxstring_abi, + libstdcxx_version, + os_version, + tags..., + ) + end + throw(ArgumentError("Platform `$(triplet)` is not an officially supported platform")) + end + +else + # riscv64 is supported, all is fine + + const bbparse = parse + +end + + +# Get "target triplet" from ARGS, if given (defaulting to the host triplet otherwise) +target_triplet = get(ARGS, 1, Base.BinaryPlatforms.host_triplet()) + +# Augment this platform object with any special tags we require +platform = augment_platform!(HostPlatform(bbparse(Platform, target_triplet))) + +# Select all downloadable artifacts that match that platform +artifacts = select_downloadable_artifacts(artifacts_toml; platform, include_lazy=true) + +# Output the result to `stdout` as a TOML dictionary +TOML.print(stdout, artifacts) \ No newline at end of file diff --git a/Artifacts.toml b/Artifacts.toml new file mode 100644 index 000000000..0367f7091 --- /dev/null +++ b/Artifacts.toml @@ -0,0 +1,11 @@ +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "822a5a89b56180806f3c7e7d870254684fbeda00" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx1150" + + [[ROCm.download]] + sha256 = "49ab239f372901fbd2975285471cf5550de55a7bcd81602044cff426904c2ade" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1150-7.13.0.tar.gz" diff --git a/Project.toml b/Project.toml index d2ec25a65..eb2e9720d 100644 --- a/Project.toml +++ b/Project.toml @@ -24,7 +24,6 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Preferences = "21216c6a-2e73-6563-6e65-726566657250" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -ROCmDeviceLibs_jll = "873c0968-716b-5aa7-bb8d-d1e2e2aeff2d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Random123 = "74087812-796a-5b5d-8853-05524746bad3" RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143" @@ -61,7 +60,6 @@ LLVM = "9" LLVM_jll = "15, 16, 17, 18, 19, 20" Preferences = "1" PrettyTables = "3" -ROCmDeviceLibs_jll = "=5.6.1, =6.2.1" Random123 = "1.6" RandomNumbers = "1.5" SpecialFunctions = "2" diff --git a/src/discovery/discovery.jl b/src/discovery/discovery.jl index a7998b4dc..0ef44dab1 100644 --- a/src/discovery/discovery.jl +++ b/src/discovery/discovery.jl @@ -5,10 +5,12 @@ export librocblas, librocsparse, librocsolver export librocrand, librocfft, libMIOpen_path using LLD_jll -using ROCmDeviceLibs_jll +using LazyArtifacts using Preferences using Libdl +Base.include(@__MODULE__, joinpath(@__DIR__, "..", "..", ".pkg", "platform_augmentation.jl")) + include("utils.jl") function get_artifact_library(pkg::Symbol, libname::Symbol)::String @@ -24,12 +26,8 @@ function get_ld_lld(rocm_path::String)::Tuple{String, Bool} return (LLD_jll.lld_path, true) end -function get_device_libs(from_artifact::Bool; rocm_path::String) - if from_artifact && ROCmDeviceLibs_jll.is_available() - ROCmDeviceLibs_jll.bitcode_path - else - find_device_libs(rocm_path) - end +function get_device_libs(rocm_path::String) + return find_device_libs(rocm_path) end function _hip_runtime_version() @@ -86,9 +84,7 @@ function __init__() global lld_artifact = lld_artifact global libhip = find_rocm_library(Sys.islinux() ? "libamdhip64" : "amdhip64"; rocm_path) - # Always load artifact device libraries. - from_artifact = true - global libdevice_libs = get_device_libs(from_artifact; rocm_path) + global libdevice_libs = get_device_libs(rocm_path) # HIP-based libraries. global librocblas = find_rocm_library(lib_prefix * "rocblas"; rocm_path) diff --git a/src/discovery/utils.jl b/src/discovery/utils.jl index cd85d0bc3..ecb250d9c 100644 --- a/src/discovery/utils.jl +++ b/src/discovery/utils.jl @@ -1,3 +1,21 @@ +# Resolve an artifact-backed ROCm root, if one has been installed. +function artifact_rocm_path()::String + try + return artifact"ROCm" + catch + return "" + end +end + +function rocm_search_dirs(rocm_path::String) + return unique(( + rocm_path, + joinpath(rocm_path, "lib"), + joinpath(rocm_path, "lib64"), + joinpath(rocm_path, "bin"), + )) +end + # use amdhip as query for a valid rocm_path function check_rocm_path(path::String) libname = (Sys.islinux() ? "libamdhip64" : "amdhip64") * "." * dlext @@ -24,6 +42,9 @@ function find_roc_path()::String rocm_path != "" && return rocm_path end + artifact_dir = artifact_rocm_path() + isdir(artifact_dir) && return artifact_dir + if Sys.islinux() hipconfig = Sys.which("hipconfig") if !isnothing(hipconfig) @@ -72,9 +93,11 @@ end function find_device_libs(rocm_path::String)::String env_dir = get(ENV, "ROCM_PATH", "") if isdir(env_dir) - path = joinpath(env_dir, "amdgcn", "bitcode") - path = check_device_libs(path) - isdir(path) && return path + for root in rocm_search_dirs(env_dir) + path = joinpath(root, "amdgcn", "bitcode") + path = check_device_libs(path) + isdir(path) && return path + end end # Might be set by tools like Spack or the user hip_devlibs_path = get(ENV, "HIP_DEVICE_LIB_PATH", "") @@ -83,19 +106,22 @@ function find_device_libs(rocm_path::String)::String devlibs_path !== "" && return devlibs_path # Try using hipconfig to find the device libraries. # Try the canonical location. - canonical_dir = joinpath(rocm_path, "amdgcn", "bitcode") - canonical_dir = check_device_libs(canonical_dir) - isdir(canonical_dir) && return canonical_dir + for root in rocm_search_dirs(rocm_path) + canonical_dir = joinpath(root, "amdgcn", "bitcode") + canonical_dir = check_device_libs(canonical_dir) + isdir(canonical_dir) && return canonical_dir + end # Fedora might put it in a weird place hipconfig = Sys.which("hipconfig") if !isnothing(hipconfig) clang_path = read(`$hipconfig --hipclangpath`, String) lib_path = joinpath(clang_path ,".." , "lib","clang") if isdir(lib_path) - lib_path = joinpath(lib_path, only(readdir(lib_path))) - lib_path = joinpath(lib_path, "amdgcn", "bitcode") - lib_path = check_device_libs(lib_path) - isdir(lib_path) && return lib_path + for version_dir in sort(readdir(lib_path; join=true)) + bitcode_path = joinpath(version_dir, "amdgcn", "bitcode") + bitcode_path = check_device_libs(bitcode_path) + isdir(bitcode_path) && return bitcode_path + end end end return "" @@ -110,12 +136,13 @@ function find_rocm_library(libs::Vector; rocm_path::String, ext::String = dlext) end function find_rocm_library(lib::String; rocm_path::String, ext::String = dlext)::String - libdir = joinpath(rocm_path, rel_libdir) - isdir(libdir) || return "" - for file in readdir(libdir; join=true) - fname = basename(file) - matched = startswith(fname, lib) && contains(fname, ext) - matched && return file + for libdir in rocm_search_dirs(rocm_path) + isdir(libdir) || continue + for file in readdir(libdir; join=true) + fname = basename(file) + matched = startswith(fname, lib) && contains(fname, ext) + matched && return file + end end return "" end @@ -123,7 +150,12 @@ end function find_ld_lld(rocm_path::String)::String lld_name = "ld.lld" * (Sys.iswindows() ? ".exe" : "") - dirs = (joinpath(rocm_path,"llvm", "bin"), joinpath(rocm_path,"bin")) + dirs = ( + joinpath(rocm_path, "llvm", "bin"), + joinpath(rocm_path, "bin"), + joinpath(rocm_path, "lib", "llvm", "bin"), + joinpath(rocm_path, "lib", "bin"), + ) hipconfig = Sys.which("hipconfig") if !isnothing(hipconfig) clang_path = read(`$hipconfig --hipclangpath`, String) From aad55e044cb796e3a63217dda5e665ba6e6fb52c Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 16:42:48 +0200 Subject: [PATCH 02/20] wip --- Artifacts.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Artifacts.toml b/Artifacts.toml index 0367f7091..fc0a780d8 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -1,6 +1,6 @@ [[ROCm]] arch = "x86_64" -git-tree-sha1 = "822a5a89b56180806f3c7e7d870254684fbeda00" +git-tree-sha1 = "4684f33015a110d83fcfe9f52a5aa9f6c6ceb0a8" lazy = true libc = "glibc" os = "linux" From d3922d58919e4ec364f5295c78860b831adb6448 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 17:19:00 +0200 Subject: [PATCH 03/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 46 ++++++++++++++++++++++ Artifacts.toml | 11 ++++++ Project.toml | 2 + 3 files changed, 59 insertions(+) create mode 100644 .github/workflows/rocm-artifact-hashes.yml diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml new file mode 100644 index 000000000..86947b66a --- /dev/null +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -0,0 +1,46 @@ +name: ROCm artifact hashes + +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + hash-rocm-artifacts: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: "1.12" + - name: Download tarballs and compute hashes + run: | + julia --project=. <<'JULIA' + import Pkg + Pkg.add("ArtifactUtils") + using ArtifactUtils, Base.BinaryPlatforms + + for rocm_arch in [ + "gfx908", + "gfx90a", + "gfx94X-dcgpu", + "gfx950-dcgpu", + "gfx101X-dcgpu", + "gfx103X-all", + "gfx110X-all", + "gfx1150", + "gfx1151", + "gfx1152", + "gfx120X-all", + ] + @info rocm_arch + platform = Platform("x86_64", "linux", Dict("libc" => "glibc", "rocm_arch" => rocm_arch)) + add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-linux-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) + end + JULIA + - name: Upload hash report + uses: actions/upload-artifact@v4 + with: + name: Artifacts.toml + path: Artifacts.toml diff --git a/Artifacts.toml b/Artifacts.toml index fc0a780d8..fb3650938 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -9,3 +9,14 @@ rocm_arch = "gfx1150" [[ROCm.download]] sha256 = "49ab239f372901fbd2975285471cf5550de55a7bcd81602044cff426904c2ade" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1150-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "4d8726c11c5fa4f406cb151a8c729cf593d99afe" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx1151" + + [[ROCm.download]] + sha256 = "6550da740ac8565ec2f1dc886f6ec6425af1df79588d773594163b0778832560" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-7.13.0.tar.gz" diff --git a/Project.toml b/Project.toml index eb2e9720d..42cabdeb5 100644 --- a/Project.toml +++ b/Project.toml @@ -18,6 +18,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LLD_jll = "d55e3150-da41-5e91-b323-ecfd1eec6109" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" LLVM_jll = "86de99a1-58d6-5da7-8064-bd56ce2e322c" +LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" @@ -58,6 +59,7 @@ KernelAbstractions = "0.9.2" LLD_jll = "15, 16, 17, 18, 19, 20" LLVM = "9" LLVM_jll = "15, 16, 17, 18, 19, 20" +LazyArtifacts = "1.11.0" Preferences = "1" PrettyTables = "3" Random123 = "1.6" From 0cdeb9ea0cd4a4b2bbf38be309b7ae657969408c Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 17:31:13 +0200 Subject: [PATCH 04/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index 86947b66a..64b18063c 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -3,14 +3,10 @@ name: ROCm artifact hashes on: workflow_dispatch: -permissions: - contents: read - jobs: hash-rocm-artifacts: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: version: "1.12" From 1d45e282001dc3447cab2574dbc5da2bdcb730bb Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 17:40:52 +0200 Subject: [PATCH 05/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index 64b18063c..b7a652a8b 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -11,8 +11,8 @@ jobs: with: version: "1.12" - name: Download tarballs and compute hashes + shell: julia -e {0} run: | - julia --project=. <<'JULIA' import Pkg Pkg.add("ArtifactUtils") using ArtifactUtils, Base.BinaryPlatforms @@ -34,7 +34,6 @@ jobs: platform = Platform("x86_64", "linux", Dict("libc" => "glibc", "rocm_arch" => rocm_arch)) add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-linux-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) end - JULIA - name: Upload hash report uses: actions/upload-artifact@v4 with: From d363d9f0edfa9995f5b32c154b2dd0e40cae725f Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 17:43:10 +0200 Subject: [PATCH 06/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index b7a652a8b..cd30e620f 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -11,7 +11,7 @@ jobs: with: version: "1.12" - name: Download tarballs and compute hashes - shell: julia -e {0} + shell: julia {0} run: | import Pkg Pkg.add("ArtifactUtils") From 576e58af0009f4550cd5b53345973cfbb43e9f28 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 17:52:17 +0200 Subject: [PATCH 07/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index cd30e620f..469ac0504 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -31,7 +31,7 @@ jobs: "gfx120X-all", ] @info rocm_arch - platform = Platform("x86_64", "linux", Dict("libc" => "glibc", "rocm_arch" => rocm_arch)) + platform = Platform("x86_64", "linux", Dict("libc" => "glibc", "rocm_arch" => replace(rocm_arch, "-" => "_"))) add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-linux-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) end - name: Upload hash report From ed4b32e10d35512b98fd6d2ca1e393dc6f3477a1 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 19:03:15 +0200 Subject: [PATCH 08/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index 469ac0504..c2710bc7d 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -22,7 +22,7 @@ jobs: "gfx90a", "gfx94X-dcgpu", "gfx950-dcgpu", - "gfx101X-dcgpu", + "gfx101X-dgpu", "gfx103X-all", "gfx110X-all", "gfx1150", From f19ac76dd192e3e4932618c41d96158c19c7f4cf Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 19:15:24 +0200 Subject: [PATCH 09/20] introduce custom comparison strategy --- .pkg/platform_augmentation.jl | 33 ++++++++++++++++++++++++++++++--- test/runtests.jl | 14 ++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/.pkg/platform_augmentation.jl b/.pkg/platform_augmentation.jl index 360cc24ab..70e6f1b6a 100644 --- a/.pkg/platform_augmentation.jl +++ b/.pkg/platform_augmentation.jl @@ -16,7 +16,7 @@ function rocm_arch() target = parse(Int, m[1]) target == 0 && continue - push!(arch, "gfx$(div(target, 10000))$(div(target, 100) % 100)$(target % 100)") + push!(arch, rocm_arch_string(target)) end end @@ -25,18 +25,45 @@ function rocm_arch() return arch end +function rocm_arch_string(target::Integer) + patch = lowercase(string(target % 100, base = 16)) + return "gfx$(div(target, 10000))$(div(target, 100) % 100)$(patch)" +end + function rocm_arch_comparison_strategy(a::String, b::String, a_requested::Bool, b_requested::Bool) a == "none" && return false b == "none" && return false a_arches = split(a, ',') b_arches = split(b, ',') - for arch in a_arches - arch in b_arches && return true + for a_arch in a_arches + for b_arch in b_arches + rocm_arch_matches(a_arch, b_arch) && return true + rocm_arch_matches(b_arch, a_arch) && return true + end end return false end +function rocm_arch_core(arch::AbstractString) + return first(split(arch, r"[_-]", limit = 2)) +end + +function rocm_arch_matches(pattern::AbstractString, arch::AbstractString) + pattern = rocm_arch_core(pattern) + arch = rocm_arch_core(arch) + + length(pattern) == length(arch) || return false + for (pattern_char, arch_char) in zip(pattern, arch) + if pattern_char == 'X' + isxdigit(arch_char) || return false + elseif pattern_char != arch_char + return false + end + end + return true +end + function augment_platform!(platform::Platform) if !haskey(platform, "rocm_arch") arch = rocm_arch() diff --git a/test/runtests.jl b/test/runtests.jl index 884b7edbc..cfefaa211 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,6 +26,20 @@ end InteractiveUtils.versioninfo() AMDGPU.versioninfo() +include(joinpath(@__DIR__, "..", ".pkg", "platform_augmentation.jl")) + +@testset "platform augmentation" begin + @test rocm_arch_string(120000) == "gfx1200" + @test rocm_arch_string(120001) == "gfx1201" + @test rocm_arch_string(90010) == "gfx90a" + @test rocm_arch_comparison_strategy("gfx120X_all", "gfx1200", false, false) + @test rocm_arch_comparison_strategy("gfx120X_all", "gfx1201", false, false) + @test rocm_arch_comparison_strategy("gfx90X_dcgpu", "gfx90a", false, false) + @test rocm_arch_comparison_strategy("gfx94X_dcgpu", "gfx942", false, false) + @test !rocm_arch_comparison_strategy("gfx120X_all", "gfx1100", false, false) + @test !rocm_arch_comparison_strategy("none", "gfx1200", false, false) +end + # Autodiscovered tests testsuite = find_tests(@__DIR__) From ed394415c9779aa3d7b61f1783b29030d8262eb9 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 20 May 2026 19:20:22 +0200 Subject: [PATCH 10/20] wip --- .pkg/platform_augmentation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pkg/platform_augmentation.jl b/.pkg/platform_augmentation.jl index 70e6f1b6a..eac177afa 100644 --- a/.pkg/platform_augmentation.jl +++ b/.pkg/platform_augmentation.jl @@ -55,7 +55,7 @@ function rocm_arch_matches(pattern::AbstractString, arch::AbstractString) length(pattern) == length(arch) || return false for (pattern_char, arch_char) in zip(pattern, arch) - if pattern_char == 'X' + if lowercase(pattern_char) == 'x' isxdigit(arch_char) || return false elseif pattern_char != arch_char return false From 5253782de062e62a3a95209b5dc59a8fb37d9d99 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 10:04:51 +0200 Subject: [PATCH 11/20] add more artifacts --- Artifacts.toml | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/Artifacts.toml b/Artifacts.toml index fb3650938..203545bb1 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -1,5 +1,82 @@ [[ROCm]] arch = "x86_64" +git-tree-sha1 = "927a3fe95e9d836dbc762f6327159526dcaa5bc7" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx908" + + [[ROCm.download]] + sha256 = "5d84753a8d8895ff2f6137a2a922ee8f36ce9c2e01b60a99d3ee776a683bfc34" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx908-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "66ad1dc431ab06faff7baf8842e91f80548f6ea6" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx90a" + + [[ROCm.download]] + sha256 = "b2d3c49ef936b3b24b10a25bae3e60df7ccc9c5134095a080bbc721d5062b4c7" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx90a-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "148fb9883a4a104c807e2ec2448986e0f4dd40f4" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx94x_dcgpu" + + [[ROCm.download]] + sha256 = "db5543de096fb175ff2ece19dacc28b2a3201df48b38051cc505e508d84e35ab" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx94X-dcgpu-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "e32c6a1039d8704432105abb9e381892f6b66e33" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx950_dcgpu" + + [[ROCm.download]] + sha256 = "794e8292c843621f772df83bbcbb6a8d5926279803dbd273679a1b6e782e16c7" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx950-dcgpu-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "6e52a70a9284555a2eeb670077b58c484860816f" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx101x_dgpu" + + [[ROCm.download]] + sha256 = "0cb099ec837e1206d710467674a254b07119c72fec765a5316d3703161c7bfe1" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx101X-dgpu-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "472b6d4f2186b8363e4621b00d6e7a3d16d4a5ea" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx103x_all" + + [[ROCm.download]] + sha256 = "7049d3d934699226ea58ce821f64f7042f41467281f4d099ca123fd912592f23" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx103X-all-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "7c7e3170d31e5360030a4c8bfbab39cf7bdb230b" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx110x_all" + + [[ROCm.download]] + sha256 = "c28e6861763bd5282caf349908aef99116160d200fdbf56c3ce947033c1876c3" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx110X-all-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" git-tree-sha1 = "4684f33015a110d83fcfe9f52a5aa9f6c6ceb0a8" lazy = true libc = "glibc" @@ -20,3 +97,25 @@ rocm_arch = "gfx1151" [[ROCm.download]] sha256 = "6550da740ac8565ec2f1dc886f6ec6425af1df79588d773594163b0778832560" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "6060cf1f79c122f9ed5ccc27f43ddcac0203b7ce" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx1152" + + [[ROCm.download]] + sha256 = "125094beb4e780bf3494e3b36d334a93593207bdcce18c8afb8161c82b703e5d" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1152-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "8b8d69691b2469e8f890fe977fe9c01e33233b64" +lazy = true +libc = "glibc" +os = "linux" +rocm_arch = "gfx120x_all" + + [[ROCm.download]] + sha256 = "150c3ed6eb51cda451a96d588ca04a15af2c8724fd5e692b705cba30faf4efcf" + url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx120X-all-7.13.0.tar.gz" From f99017d49ce94f6cdb3abe1d15fbd337c1e66043 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 12:55:56 +0200 Subject: [PATCH 12/20] wip: cleanup discovery --- .pkg/platform_augmentation.jl | 3 ++- src/discovery/discovery.jl | 6 +---- src/discovery/utils.jl | 45 ++++++++++------------------------- 3 files changed, 16 insertions(+), 38 deletions(-) diff --git a/.pkg/platform_augmentation.jl b/.pkg/platform_augmentation.jl index eac177afa..d6bb70bc1 100644 --- a/.pkg/platform_augmentation.jl +++ b/.pkg/platform_augmentation.jl @@ -46,7 +46,7 @@ function rocm_arch_comparison_strategy(a::String, b::String, a_requested::Bool, end function rocm_arch_core(arch::AbstractString) - return first(split(arch, r"[_-]", limit = 2)) + return match(r"gfx(.*)", first(split(arch, r"[_-]", limit = 2)))[1] end function rocm_arch_matches(pattern::AbstractString, arch::AbstractString) @@ -71,6 +71,7 @@ function augment_platform!(platform::Platform) end BinaryPlatforms.set_compare_strategy!(platform, "rocm_arch", rocm_arch_comparison_strategy) + @info platform return platform end diff --git a/src/discovery/discovery.jl b/src/discovery/discovery.jl index 0ef44dab1..3909b47e3 100644 --- a/src/discovery/discovery.jl +++ b/src/discovery/discovery.jl @@ -26,10 +26,6 @@ function get_ld_lld(rocm_path::String)::Tuple{String, Bool} return (LLD_jll.lld_path, true) end -function get_device_libs(rocm_path::String) - return find_device_libs(rocm_path) -end - function _hip_runtime_version() v_ref = Ref{Cint}() res = ccall((:hipRuntimeGetVersion, libhip), UInt32, (Ptr{Cint},), v_ref) @@ -84,7 +80,7 @@ function __init__() global lld_artifact = lld_artifact global libhip = find_rocm_library(Sys.islinux() ? "libamdhip64" : "amdhip64"; rocm_path) - global libdevice_libs = get_device_libs(rocm_path) + global libdevice_libs = find_device_libs(rocm_path) # HIP-based libraries. global librocblas = find_rocm_library(lib_prefix * "rocblas"; rocm_path) diff --git a/src/discovery/utils.jl b/src/discovery/utils.jl index ecb250d9c..0194a5f82 100644 --- a/src/discovery/utils.jl +++ b/src/discovery/utils.jl @@ -7,15 +7,6 @@ function artifact_rocm_path()::String end end -function rocm_search_dirs(rocm_path::String) - return unique(( - rocm_path, - joinpath(rocm_path, "lib"), - joinpath(rocm_path, "lib64"), - joinpath(rocm_path, "bin"), - )) -end - # use amdhip as query for a valid rocm_path function check_rocm_path(path::String) libname = (Sys.islinux() ? "libamdhip64" : "amdhip64") * "." * dlext @@ -93,11 +84,9 @@ end function find_device_libs(rocm_path::String)::String env_dir = get(ENV, "ROCM_PATH", "") if isdir(env_dir) - for root in rocm_search_dirs(env_dir) - path = joinpath(root, "amdgcn", "bitcode") - path = check_device_libs(path) - isdir(path) && return path - end + path = joinpath(env_dir, "amdgcn", "bitcode") + path = check_device_libs(path) + isdir(path) && return path end # Might be set by tools like Spack or the user hip_devlibs_path = get(ENV, "HIP_DEVICE_LIB_PATH", "") @@ -106,22 +95,19 @@ function find_device_libs(rocm_path::String)::String devlibs_path !== "" && return devlibs_path # Try using hipconfig to find the device libraries. # Try the canonical location. - for root in rocm_search_dirs(rocm_path) - canonical_dir = joinpath(root, "amdgcn", "bitcode") - canonical_dir = check_device_libs(canonical_dir) - isdir(canonical_dir) && return canonical_dir - end + canonical_dir = joinpath(rocm_path, "amdgcn", "bitcode") + canonical_dir = check_device_libs(canonical_dir) + isdir(canonical_dir) && return canonical_dir # Fedora might put it in a weird place hipconfig = Sys.which("hipconfig") if !isnothing(hipconfig) clang_path = read(`$hipconfig --hipclangpath`, String) lib_path = joinpath(clang_path ,".." , "lib","clang") if isdir(lib_path) - for version_dir in sort(readdir(lib_path; join=true)) - bitcode_path = joinpath(version_dir, "amdgcn", "bitcode") - bitcode_path = check_device_libs(bitcode_path) - isdir(bitcode_path) && return bitcode_path - end + lib_path = joinpath(lib_path, only(readdir(lib_path))) + lib_path = joinpath(lib_path, "amdgcn", "bitcode") + lib_path = check_device_libs(lib_path) + isdir(lib_path) && return lib_path end end return "" @@ -129,14 +115,14 @@ end function find_rocm_library(libs::Vector; rocm_path::String, ext::String = dlext)::String for lib in libs - path = find_rocm_library(lib, rocm_path, ext) + path = find_rocm_library(lib; rocm_path, ext) isempty(path) || return path end return "" end function find_rocm_library(lib::String; rocm_path::String, ext::String = dlext)::String - for libdir in rocm_search_dirs(rocm_path) + for libdir in [joinpath(rocm_path, rel_libdir), joinpath(rocm_path, "lib")] isdir(libdir) || continue for file in readdir(libdir; join=true) fname = basename(file) @@ -150,12 +136,7 @@ end function find_ld_lld(rocm_path::String)::String lld_name = "ld.lld" * (Sys.iswindows() ? ".exe" : "") - dirs = ( - joinpath(rocm_path, "llvm", "bin"), - joinpath(rocm_path, "bin"), - joinpath(rocm_path, "lib", "llvm", "bin"), - joinpath(rocm_path, "lib", "bin"), - ) + dirs = (joinpath(rocm_path,"llvm", "bin"), joinpath(rocm_path,"bin")) hipconfig = Sys.which("hipconfig") if !isnothing(hipconfig) clang_path = read(`$hipconfig --hipclangpath`, String) From ce8f13aaf02d9699e54d8841fe03cd2908303174 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 13:11:44 +0200 Subject: [PATCH 13/20] fix augmentation --- .pkg/platform_augmentation.jl | 1 - src/discovery/utils.jl | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pkg/platform_augmentation.jl b/.pkg/platform_augmentation.jl index d6bb70bc1..d638e6c86 100644 --- a/.pkg/platform_augmentation.jl +++ b/.pkg/platform_augmentation.jl @@ -71,7 +71,6 @@ function augment_platform!(platform::Platform) end BinaryPlatforms.set_compare_strategy!(platform, "rocm_arch", rocm_arch_comparison_strategy) - @info platform return platform end diff --git a/src/discovery/utils.jl b/src/discovery/utils.jl index 0194a5f82..d99a4e144 100644 --- a/src/discovery/utils.jl +++ b/src/discovery/utils.jl @@ -1,7 +1,8 @@ # Resolve an artifact-backed ROCm root, if one has been installed. function artifact_rocm_path()::String try - return artifact"ROCm" + p = augment_platform!(HostPlatform()) + return @artifact_str("ROCm", p) catch return "" end From 0f091cba9b3d3515247c08f138dfb649b4052b42 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 13:17:02 +0200 Subject: [PATCH 14/20] fix whitespace --- .pkg/select_artifacts.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pkg/select_artifacts.jl b/.pkg/select_artifacts.jl index 2b8b643c4..1f150bc19 100644 --- a/.pkg/select_artifacts.jl +++ b/.pkg/select_artifacts.jl @@ -131,4 +131,4 @@ platform = augment_platform!(HostPlatform(bbparse(Platform, target_triplet))) artifacts = select_downloadable_artifacts(artifacts_toml; platform, include_lazy=true) # Output the result to `stdout` as a TOML dictionary -TOML.print(stdout, artifacts) \ No newline at end of file +TOML.print(stdout, artifacts) From ff97ae2112beb1cd6d57e843ad33838e2ba861bc Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 16:32:03 +0200 Subject: [PATCH 15/20] fix Project.toml --- Project.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 42cabdeb5..c73a8f69f 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ authors = ["Julian P Samaroo ", "Valentin Churavy Date: Thu, 21 May 2026 17:33:35 +0200 Subject: [PATCH 16/20] wip: Windows support --- .github/workflows/rocm-artifact-hashes.yml | 6 +- .pkg/platform_augmentation.jl | 221 ++++++++++++++++++++- Project.toml | 1 + test/Project.toml | 3 +- test/runtests.jl | 5 + 5 files changed, 228 insertions(+), 8 deletions(-) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index c2710bc7d..25742539c 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -30,9 +30,13 @@ jobs: "gfx1152", "gfx120X-all", ] - @info rocm_arch + @info "Linux, $rocm_arch" platform = Platform("x86_64", "linux", Dict("libc" => "glibc", "rocm_arch" => replace(rocm_arch, "-" => "_"))) add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-linux-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) + + @info "Windows, $rocm_arch" + platform = Platform("x86_64", "windows", Dict("libc" => "glibc", "rocm_arch" => replace(rocm_arch, "-" => "_"))) + add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-windows-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) end - name: Upload hash report uses: actions/upload-artifact@v4 diff --git a/.pkg/platform_augmentation.jl b/.pkg/platform_augmentation.jl index d638e6c86..1a4475cea 100644 --- a/.pkg/platform_augmentation.jl +++ b/.pkg/platform_augmentation.jl @@ -1,6 +1,68 @@ -using Base.BinaryPlatforms +using Base.BinaryPlatforms, Libdl -function rocm_arch() +function rocm_arch_string(target::Integer) + patch = lowercase(string(target % 100, base = 16)) + return "gfx$(div(target, 10000))$(div(target, 100) % 100)$(patch)" +end + +function rocm_arch_from_device_name(device_name::AbstractString) + device_lower = lowercase(device_name) + + if !occursin("radeon", device_lower) && !occursin("amd", device_lower) + return "" + end + + # STX Halo iGPUs (gfx1151 architecture) + # Radeon 8050S Graphics / Radeon 8060S Graphics + if occursin("8050s", device_lower) || + occursin("8060s", device_lower) || + occursin("device 1586", device_lower) + return "gfx1151" + end + + # STX Point iGPUs (gfx1150 architecture) + # Radeon 880M / 890M Graphics + if occursin("880m", device_lower) || + occursin("890m", device_lower) + return "gfx1150" + end + + # RDNA4 GPUs (gfx120X architecture) + # AMD Radeon AI PRO R9700, AMD Radeon RX 9070 XT, AMD Radeon RX 9070 GRE, + # AMD Radeon RX 9070, AMD Radeon RX 9060 XT + if occursin("r9700", device_lower) || + occursin("9060", device_lower) || + occursin("9070", device_lower) + return "gfx120X" + end + + # RDNA3 GPUs (gfx110X architecture) + # AMD Radeon PRO V710, AMD Radeon PRO W7900 Dual Slot, AMD Radeon PRO W7900, + # AMD Radeon PRO W7800 48GB, AMD Radeon PRO W7800, AMD Radeon PRO W7700, + # AMD Radeon RX 7900 XTX, AMD Radeon RX 7900 XT, AMD Radeon RX 7900 GRE, + # AMD Radeon RX 7800 XT, AMD Radeon RX 7700 XT + if occursin("7700", device_lower) || + occursin("7800", device_lower) || + occursin("7900", device_lower) || + occursin("v710", device_lower) + return "gfx110X" + end + + # RDNA2 GPUs (gfx103X architecture) + # AMD Radeon RX 6800 XT, AMD Radeon RX 6800, AMD Radeon RX 6700 XT, + # AMD Radeon RX 6700, AMD Radeon RX 6600 XT, AMD Radeon RX 6600, + # AMD Radeon RX 6500 XT, AMD Radeon RX 6500 + if occursin("6800", device_lower) || + occursin("6700", device_lower) || + occursin("6600", device_lower) || + occursin("6500", device_lower) + return "gfx103X" + end + + return "" +end + +function rocm_arch_linux() topology_root = "/sys/class/kfd/kfd/topology/nodes/" isdir(topology_root) || return String[] @@ -25,9 +87,156 @@ function rocm_arch() return arch end -function rocm_arch_string(target::Integer) - patch = lowercase(string(target % 100, base = 16)) - return "gfx$(div(target, 10000))$(div(target, 100) % 100)$(patch)" +function wmi_query_video_controllers() + ole32 = Libdl.dlopen("ole32.dll") + oleaut32 = Libdl.dlopen("oleaut32.dll") + + CoInitializeEx = Libdl.dlsym(ole32, :CoInitializeEx) + CoInitializeSecurity = Libdl.dlsym(ole32, :CoInitializeSecurity) + CoCreateInstance = Libdl.dlsym(ole32, :CoCreateInstance) + CoUninitialize = Libdl.dlsym(ole32, :CoUninitialize) + SysAllocString = Libdl.dlsym(oleaut32, :SysAllocString) + SysFreeString = Libdl.dlsym(oleaut32, :SysFreeString) + + # Helper: allocate a BSTR from a Julia string + function bstr(s::String) + ws = transcode(UInt16, s * "\0") + ccall(SysAllocString, Ptr{UInt16}, (Ptr{UInt16},), ws) + end + free_bstr(p) = ccall(SysFreeString, Cvoid, (Ptr{UInt16},), p) + + # Helper: read a BSTR pointer back to Julia String + function read_bstr(p::Ptr{UInt16}) + p == C_NULL && return "" + # BSTR length (in bytes) is stored 4 bytes before the pointer + nbytes = unsafe_load(Ptr{UInt32}(p - 4)) + nchars = nbytes ÷ 2 + buf = unsafe_wrap(Array, p, nchars; own=false) + transcode(String, buf) + end + + # CLSID_WbemLocator = {4590F811-1D3A-11D0-891F-00AA004B2E24} + # IID_IWbemLocator = {DC12A687-737F-11CF-884D-00AA004B2E24} + CLSID_WbemLocator = UInt8[ + 0x11, 0xF8, 0x90, 0x45, # Data1 (little-endian) + 0x3A, 0x1D, # Data2 (little-endian) + 0xD0, 0x11, # Data3 (little-endian) + 0x89, 0x1F, 0x00, 0xAA, 0x00, 0x4B, 0x2E, 0x24] + + IID_IWbemLocator = UInt8[ + 0x87, 0xA6, 0x12, 0xDC, + 0x7F, 0x73, + 0xCF, 0x11, + 0x88, 0x4D, 0x00, 0xAA, 0x00, 0x4B, 0x2E, 0x24] + + # Step 1: CoInitializeEx(0, COINIT_MULTITHREADED=0) + ccall(CoInitializeEx, Clong, (Ptr{Cvoid}, Culong), C_NULL, 0) + + # Step 2: CoInitializeSecurity(NULL, -1, NULL, NULL, + # RPC_C_AUTHN_LEVEL_DEFAULT=0, RPC_C_IMP_LEVEL_IMPERSONATE=3, + # NULL, EOAC_NONE=0, NULL) + ccall(CoInitializeSecurity, Clong, + (Ptr{Cvoid}, Clong, Ptr{Cvoid}, Ptr{Cvoid}, + Culong, Culong, Ptr{Cvoid}, Culong, Ptr{Cvoid}), + C_NULL, -1, C_NULL, C_NULL, 0, 3, C_NULL, 0, C_NULL) + + # Step 3: CoCreateInstance(&CLSID_WbemLocator, 0, CLSCTX_INPROC_SERVER=1, + # &IID_IWbemLocator, &locator) + locator = Ref{Ptr{Cvoid}}(C_NULL) + hr = ccall(CoCreateInstance, Clong, + (Ptr{UInt8}, Ptr{Cvoid}, Culong, Ptr{UInt8}, Ref{Ptr{Cvoid}}), + CLSID_WbemLocator, C_NULL, 1, IID_IWbemLocator, locator) + hr < 0 && error("CoCreateInstance failed: 0x$(string(hr % UInt32, base=16))") + + # vtable layout for IWbemLocator (inherits IUnknown): + # [0] QueryInterface [1] AddRef [2] Release [3] ConnectServer + vtbl_loc = unsafe_load(Ptr{Ptr{Ptr{Cvoid}}}(locator[])) + + resource = bstr("ROOT\\CIMV2") + services = Ref{Ptr{Cvoid}}(C_NULL) + # Step 4: locator->ConnectServer(locator, resource, NULL,NULL,NULL,0,NULL,NULL, &services) + hr = ccall(unsafe_load(vtbl_loc, 4), Clong, + (Ptr{Cvoid}, Ptr{UInt16}, Ptr{Cvoid}, Ptr{Cvoid}, + Ptr{Cvoid}, Clong, Ptr{Cvoid}, Ptr{Cvoid}, Ref{Ptr{Cvoid}}), + locator[], resource, C_NULL, C_NULL, C_NULL, 0, C_NULL, C_NULL, services) + hr < 0 && error("ConnectServer failed: 0x$(string(hr % UInt32, base=16))") + + # vtable layout for IWbemServices — ExecQuery is at index 20 (0-based) + vtbl_svc = unsafe_load(Ptr{Ptr{Ptr{Cvoid}}}(services[])) + + language = bstr("WQL") + query = bstr("SELECT Name FROM Win32_VideoController") + results = Ref{Ptr{Cvoid}}(C_NULL) + # Step 5: services->ExecQuery(services, language, query, + # WBEM_FLAG_BIDIRECTIONAL=0, NULL, &results) + hr = ccall(unsafe_load(vtbl_svc, 21), Clong, # 0-based index 20 -> 1-based 21 + (Ptr{Cvoid}, Ptr{UInt16}, Ptr{UInt16}, Clong, Ptr{Cvoid}, Ref{Ptr{Cvoid}}), + services[], language, query, 0, C_NULL, results) + hr < 0 && error("ExecQuery failed: 0x$(string(hr % UInt32, base=16))") + + # vtable layout for IEnumWbemClassObject: + # [0] QI [1] AddRef [2] Release [3] Reset [4] Next [5] NextAsync [6] Clone [7] Skip + vtbl_enum = unsafe_load(Ptr{Ptr{Ptr{Cvoid}}}(results[])) + + # vtable layout for IWbemClassObject::Get is at index 4 (0-based) + # [0] QI [1] AddRef [2] Release [3] GetQualifierSet [4] Get ... + + names = String[] + while true + result = Ref{Ptr{Cvoid}}(C_NULL) + returned = Ref{Culong}(0) + + # results->Next(results, WBEM_INFINITE=-1, 1, &result, &returned) + hr = ccall(unsafe_load(vtbl_enum, 5), Clong, # Next at 0-based 4 -> 1-based 5 + (Ptr{Cvoid}, Clong, Culong, Ref{Ptr{Cvoid}}, Ref{Culong}), + results[], -1, 1, result, returned) + (hr != 0 || returned[] == 0) && break # S_FALSE or no more objects + + vtbl_obj = unsafe_load(Ptr{Ptr{Ptr{Cvoid}}}(result[])) + + # VARIANT is 16 bytes: vt(2) + reserved(6) + value(8) + variant = zeros(UInt8, 16) + prop = bstr("Name") + + # result->Get(result, L"Name", 0, &variant, NULL, NULL) + hr = ccall(unsafe_load(vtbl_obj, 5), Clong, # Get at 0-based 4 -> 1-based 5 + (Ptr{Cvoid}, Ptr{UInt16}, Clong, Ptr{UInt8}, Ptr{Cvoid}, Ptr{Cvoid}), + result[], prop, 0, variant, C_NULL, C_NULL) + + if hr == 0 + vt = reinterpret(UInt16, variant[1:2])[1] + if vt == 8 # VT_BSTR + p = reinterpret(UInt64, variant[9:16])[1] + push!(names, read_bstr(Ptr{UInt16}(p))) + end + end + + free_bstr(prop) + ccall(unsafe_load(vtbl_obj, 3), Culong, (Ptr{Cvoid},), result[]) # Release + end + + # Cleanup — mirrors the C example exactly + ccall(unsafe_load(vtbl_enum, 3), Culong, (Ptr{Cvoid},), results[]) # results->Release + ccall(unsafe_load(vtbl_svc, 3), Culong, (Ptr{Cvoid},), services[]) # services->Release + ccall(unsafe_load(vtbl_loc, 3), Culong, (Ptr{Cvoid},), locator[]) # locator->Release + ccall(CoUninitialize, Cvoid, ()) + free_bstr(query); free_bstr(language); free_bstr(resource) + + return names +end + +function rocm_arch() + if Sys.islinux() + return rocm_arch_linux() + elseif Sys.iswindows() + arch = map(rocm_arch_from_device_name, wmi_query_video_controllers()) + filter!(!isempty, arch) + unique!(arch) + sort!(arch; rev = true) + return arch + end + + return String[] end function rocm_arch_comparison_strategy(a::String, b::String, a_requested::Bool, b_requested::Bool) @@ -56,7 +265,7 @@ function rocm_arch_matches(pattern::AbstractString, arch::AbstractString) length(pattern) == length(arch) || return false for (pattern_char, arch_char) in zip(pattern, arch) if lowercase(pattern_char) == 'x' - isxdigit(arch_char) || return false + isxdigit(arch_char) || lowercase(arch_char) == 'x' || return false elseif pattern_char != arch_char return false end diff --git a/Project.toml b/Project.toml index c73a8f69f..f90d6cfe4 100644 --- a/Project.toml +++ b/Project.toml @@ -62,6 +62,7 @@ LLD_jll = "15, 16, 17, 18, 19, 20" LLVM = "9" LLVM_jll = "15, 16, 17, 18, 19, 20" LazyArtifacts = "1.10" +Libdl = "1.10" Preferences = "1" PrettyTables = "3" Random123 = "1.6" diff --git a/test/Project.toml b/test/Project.toml index 6185fb8b9..7a93e7578 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -11,11 +11,12 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" +Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/test/runtests.jl b/test/runtests.jl index cfefaa211..075af829e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -32,8 +32,13 @@ include(joinpath(@__DIR__, "..", ".pkg", "platform_augmentation.jl")) @test rocm_arch_string(120000) == "gfx1200" @test rocm_arch_string(120001) == "gfx1201" @test rocm_arch_string(90010) == "gfx90a" + @test rocm_arch_from_device_name("AMD Radeon 8050S Graphics") == "gfx1151" + @test rocm_arch_from_device_name("AMD Radeon 890M Graphics") == "gfx1150" + @test rocm_arch_from_device_name("AMD Radeon RX 9070 XT") == "gfx120X" + @test rocm_arch_from_device_name("NVIDIA GeForce RTX 4090") == "" @test rocm_arch_comparison_strategy("gfx120X_all", "gfx1200", false, false) @test rocm_arch_comparison_strategy("gfx120X_all", "gfx1201", false, false) + @test rocm_arch_comparison_strategy("gfx120X_all", "gfx120X", false, false) @test rocm_arch_comparison_strategy("gfx90X_dcgpu", "gfx90a", false, false) @test rocm_arch_comparison_strategy("gfx94X_dcgpu", "gfx942", false, false) @test !rocm_arch_comparison_strategy("gfx120X_all", "gfx1100", false, false) From e2cf6e291352be3c582c3f3b4d010abd69e84b36 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 17:38:22 +0200 Subject: [PATCH 17/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index 25742539c..0e6a48cb6 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -35,7 +35,7 @@ jobs: add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-linux-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) @info "Windows, $rocm_arch" - platform = Platform("x86_64", "windows", Dict("libc" => "glibc", "rocm_arch" => replace(rocm_arch, "-" => "_"))) + platform = Platform("x86_64", "windows", Dict("rocm_arch" => replace(rocm_arch, "-" => "_"))) add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-windows-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) end - name: Upload hash report From e8651a068ad22713a7b3aaef625ef886b53a04b9 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 17:43:39 +0200 Subject: [PATCH 18/20] wip --- .github/workflows/rocm-artifact-hashes.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rocm-artifact-hashes.yml b/.github/workflows/rocm-artifact-hashes.yml index 0e6a48cb6..f7ed92949 100644 --- a/.github/workflows/rocm-artifact-hashes.yml +++ b/.github/workflows/rocm-artifact-hashes.yml @@ -34,6 +34,7 @@ jobs: platform = Platform("x86_64", "linux", Dict("libc" => "glibc", "rocm_arch" => replace(rocm_arch, "-" => "_"))) add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-linux-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) + startswith(rocm_arch, "gfx9") && continue @info "Windows, $rocm_arch" platform = Platform("x86_64", "windows", Dict("rocm_arch" => replace(rocm_arch, "-" => "_"))) add_artifact!("Artifacts.toml", "ROCm", "https://repo.amd.com/rocm/tarball/therock-dist-windows-$rocm_arch-7.13.0.tar.gz"; lazy = true, platform) From 724c791bb4431b68cc32b90a078643892fccafe7 Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 21 May 2026 21:01:30 +0200 Subject: [PATCH 19/20] update Artifacts.toml with Windows artifacts --- Artifacts.toml | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Artifacts.toml b/Artifacts.toml index 203545bb1..4bf93751f 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -55,6 +55,16 @@ rocm_arch = "gfx101x_dgpu" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx101X-dgpu-7.13.0.tar.gz" [[ROCm]] arch = "x86_64" +git-tree-sha1 = "2351804874d5724fa82a74007d46f9a15951a9be" +lazy = true +os = "windows" +rocm_arch = "gfx101x_dgpu" + + [[ROCm.download]] + sha256 = "0f13040ed76d00fafe0540d5e23bf7b06ee96c7f341edf13d637f176c24508cc" + url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx101X-dgpu-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" git-tree-sha1 = "472b6d4f2186b8363e4621b00d6e7a3d16d4a5ea" lazy = true libc = "glibc" @@ -66,6 +76,16 @@ rocm_arch = "gfx103x_all" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx103X-all-7.13.0.tar.gz" [[ROCm]] arch = "x86_64" +git-tree-sha1 = "5cc5dbbf08958113bec3e77b81ebf3254fc5e59b" +lazy = true +os = "windows" +rocm_arch = "gfx103x_all" + + [[ROCm.download]] + sha256 = "479ccc5318a092023338800cbac3fcc604afd6dd5a8582856705d558ae7f3035" + url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx103X-all-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" git-tree-sha1 = "7c7e3170d31e5360030a4c8bfbab39cf7bdb230b" lazy = true libc = "glibc" @@ -77,6 +97,16 @@ rocm_arch = "gfx110x_all" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx110X-all-7.13.0.tar.gz" [[ROCm]] arch = "x86_64" +git-tree-sha1 = "b502d69d1b873444498d3c398987e664212efffa" +lazy = true +os = "windows" +rocm_arch = "gfx110x_all" + + [[ROCm.download]] + sha256 = "3a27e7aa079c1c4ccc502be3aa1d89f9d2094726e4a75b731b9d590b075abf16" + url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx110X-all-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" git-tree-sha1 = "4684f33015a110d83fcfe9f52a5aa9f6c6ceb0a8" lazy = true libc = "glibc" @@ -88,6 +118,16 @@ rocm_arch = "gfx1150" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1150-7.13.0.tar.gz" [[ROCm]] arch = "x86_64" +git-tree-sha1 = "4221777696eb7833ca9fcfd4b56a7703a92f87c5" +lazy = true +os = "windows" +rocm_arch = "gfx1150" + + [[ROCm.download]] + sha256 = "7fae82e61375fdcd9c8eb83761e91d0603e26a5cf45ade62aa3aa92afd28495b" + url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx1150-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" git-tree-sha1 = "4d8726c11c5fa4f406cb151a8c729cf593d99afe" lazy = true libc = "glibc" @@ -99,6 +139,16 @@ rocm_arch = "gfx1151" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-7.13.0.tar.gz" [[ROCm]] arch = "x86_64" +git-tree-sha1 = "e6012913f4e5b177bb61862b7d56b6037683aa47" +lazy = true +os = "windows" +rocm_arch = "gfx1151" + + [[ROCm.download]] + sha256 = "9171d4bb2fdf9f7228c9658f5de93652d9c6e95974fe14de13b66ce77e8e0999" + url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx1151-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" git-tree-sha1 = "6060cf1f79c122f9ed5ccc27f43ddcac0203b7ce" lazy = true libc = "glibc" @@ -110,6 +160,16 @@ rocm_arch = "gfx1152" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1152-7.13.0.tar.gz" [[ROCm]] arch = "x86_64" +git-tree-sha1 = "d557c97f913bf700e8f6125a417fb29ce313621e" +lazy = true +os = "windows" +rocm_arch = "gfx1152" + + [[ROCm.download]] + sha256 = "3aeff35172973b174c379072c51a39cfce20a1695f2679691ea0567e68ba836a" + url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx1152-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" git-tree-sha1 = "8b8d69691b2469e8f890fe977fe9c01e33233b64" lazy = true libc = "glibc" @@ -119,3 +179,13 @@ rocm_arch = "gfx120x_all" [[ROCm.download]] sha256 = "150c3ed6eb51cda451a96d588ca04a15af2c8724fd5e692b705cba30faf4efcf" url = "https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx120X-all-7.13.0.tar.gz" +[[ROCm]] +arch = "x86_64" +git-tree-sha1 = "389c9194587a0fda638ac38ff71bb5e9c9e88f34" +lazy = true +os = "windows" +rocm_arch = "gfx120x_all" + + [[ROCm.download]] + sha256 = "4496f1e4667f162b1fc03dc94a9465d139f5a8dc1cc1dc472b43e594fb8db68d" + url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-gfx120X-all-7.13.0.tar.gz" From 963eb4ca5bc9ca96365f55e743487e9c69b0413e Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Tue, 26 May 2026 14:11:22 +0200 Subject: [PATCH 20/20] revert to using separate artifact for device libs --- Project.toml | 2 ++ src/discovery/discovery.jl | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f90d6cfe4..7b570f77c 100644 --- a/Project.toml +++ b/Project.toml @@ -26,6 +26,7 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Preferences = "21216c6a-2e73-6563-6e65-726566657250" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +ROCmDeviceLibs_jll = "873c0968-716b-5aa7-bb8d-d1e2e2aeff2d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Random123 = "74087812-796a-5b5d-8853-05524746bad3" RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143" @@ -65,6 +66,7 @@ LazyArtifacts = "1.10" Libdl = "1.10" Preferences = "1" PrettyTables = "3" +ROCmDeviceLibs_jll = "=5.6.1, =6.2.1, =7.0.2" Random123 = "1.6" RandomNumbers = "1.5" SpecialFunctions = "2" diff --git a/src/discovery/discovery.jl b/src/discovery/discovery.jl index 3909b47e3..d954bf569 100644 --- a/src/discovery/discovery.jl +++ b/src/discovery/discovery.jl @@ -5,6 +5,7 @@ export librocblas, librocsparse, librocsolver export librocrand, librocfft, libMIOpen_path using LLD_jll +using ROCmDeviceLibs_jll using LazyArtifacts using Preferences using Libdl @@ -26,6 +27,14 @@ function get_ld_lld(rocm_path::String)::Tuple{String, Bool} return (LLD_jll.lld_path, true) end +function get_device_libs(from_artifact::Bool; rocm_path::String) + if from_artifact && ROCmDeviceLibs_jll.is_available() + ROCmDeviceLibs_jll.bitcode_path + else + find_device_libs(rocm_path) + end +end + function _hip_runtime_version() v_ref = Ref{Cint}() res = ccall((:hipRuntimeGetVersion, libhip), UInt32, (Ptr{Cint},), v_ref) @@ -80,7 +89,9 @@ function __init__() global lld_artifact = lld_artifact global libhip = find_rocm_library(Sys.islinux() ? "libamdhip64" : "amdhip64"; rocm_path) - global libdevice_libs = find_device_libs(rocm_path) + # Always load artifact device libraries. + from_artifact = true + global libdevice_libs = get_device_libs(from_artifact; rocm_path) # HIP-based libraries. global librocblas = find_rocm_library(lib_prefix * "rocblas"; rocm_path)