From e8ddf082b7bd4e642087fc52eab4f6bbeefad3e4 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 16 Jun 2026 11:52:59 +0200 Subject: [PATCH 1/4] Attempt to fix AMD 1.10 conj --- ext/StridedGPUArraysExt.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/ext/StridedGPUArraysExt.jl b/ext/StridedGPUArraysExt.jl index 04728e8..4ee2510 100644 --- a/ext/StridedGPUArraysExt.jl +++ b/ext/StridedGPUArraysExt.jl @@ -105,6 +105,22 @@ function Strided._mapreduce( return Array(out)[1] end +@static if VERSION < v"1.11.0-rc" + function substitute_ops(ops) + # work around compiler issue on AMD on 1.10 + f_conj(x) = real(x)-imag(x)*im + return map(ops) do op + if op == conj + return f_conj + else + return op + end + end + end +else + substitute_ops(ops) = ops +end + function Strided._mapreduce_block!( f, op, initop, dims::Dims{N}, @@ -125,9 +141,8 @@ function Strided._mapreduce_block!( backend = KernelAbstractions.get_backend(parent(out)) kernel! = _mapreduce_gpu_kernel!(backend) - ops = getproperty.(arrays, :op) + ops = substitute_ops(getproperty.(arrays, :op)) kernel!(f, op, initop, dims_red, strides, offsets, ops, parent.(arrays); ndrange = dims_out) - return nothing end From 2a54e782bcc0aea6636b6f1c451459572f167167 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 16 Jun 2026 12:08:00 +0200 Subject: [PATCH 2/4] Formatter --- ext/StridedGPUArraysExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/StridedGPUArraysExt.jl b/ext/StridedGPUArraysExt.jl index 4ee2510..78e7dfb 100644 --- a/ext/StridedGPUArraysExt.jl +++ b/ext/StridedGPUArraysExt.jl @@ -108,7 +108,7 @@ end @static if VERSION < v"1.11.0-rc" function substitute_ops(ops) # work around compiler issue on AMD on 1.10 - f_conj(x) = real(x)-imag(x)*im + f_conj(x) = real(x) - imag(x) * im return map(ops) do op if op == conj return f_conj From aec065d535f419340c3b9245e4c90d71e3fb7e6f Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 17 Jun 2026 02:41:44 -0400 Subject: [PATCH 3/4] Cleanup --- ext/StridedAMDGPUExt.jl | 12 +++++++++++- ext/StridedGPUArraysExt.jl | 20 ++------------------ src/mapreduce.jl | 9 +++++++++ 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/ext/StridedAMDGPUExt.jl b/ext/StridedAMDGPUExt.jl index 627446e..6cdd281 100644 --- a/ext/StridedAMDGPUExt.jl +++ b/ext/StridedAMDGPUExt.jl @@ -1,7 +1,7 @@ module StridedAMDGPUExt using Strided, StridedViews, AMDGPU, AMDGPU.rocBLAS, LinearAlgebra -import Strided: blas_mul! +import Strided: blas_mul!, substitute_op const ROCStridedView{T, N, A <: ROCArray{T}} = StridedViews.StridedView{T, N, A} @@ -16,4 +16,14 @@ function Strided.blas_mul!(C::ROCStridedView{T, 2}, A::ROCStridedView{T, 2}, B:: return C end +_conj(x) = real(x) - imag(x) * im +@static if VERSION < v"1.11.0-rc" + function substitute_op(::Type{<:ROCStridedView}, op) + # work around compiler issue on AMD on 1.10 + return op == conj ? _conj : op + end +else + substitute_op(::Type{<:ROCStridedView}, op) = op +end + end diff --git a/ext/StridedGPUArraysExt.jl b/ext/StridedGPUArraysExt.jl index 78e7dfb..23a18b5 100644 --- a/ext/StridedGPUArraysExt.jl +++ b/ext/StridedGPUArraysExt.jl @@ -5,7 +5,7 @@ using GPUArrays: Adapt, KernelAbstractions using GPUArrays.KernelAbstractions: @kernel, @index using StridedViews: ParentIndex -import Strided: isblasmatrix +import Strided: isblasmatrix, substitute_op ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)} @@ -105,22 +105,6 @@ function Strided._mapreduce( return Array(out)[1] end -@static if VERSION < v"1.11.0-rc" - function substitute_ops(ops) - # work around compiler issue on AMD on 1.10 - f_conj(x) = real(x) - imag(x) * im - return map(ops) do op - if op == conj - return f_conj - else - return op - end - end - end -else - substitute_ops(ops) = ops -end - function Strided._mapreduce_block!( f, op, initop, dims::Dims{N}, @@ -141,7 +125,7 @@ function Strided._mapreduce_block!( backend = KernelAbstractions.get_backend(parent(out)) kernel! = _mapreduce_gpu_kernel!(backend) - ops = substitute_ops(getproperty.(arrays, :op)) + ops = map(Base.Fix1(substitute_op, typeof(out)), getproperty.(arrays, :op)) kernel!(f, op, initop, dims_red, strides, offsets, ops, parent.(arrays); ndrange = dims_out) return nothing end diff --git a/src/mapreduce.jl b/src/mapreduce.jl index 0db5dcf..36d6861 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -7,6 +7,15 @@ LinearAlgebra.transpose!(C::StridedView, A::StridedView) = copy!(C, transpose(A) Base.permutedims!(dst::StridedView, src::StridedView, p) = copy!(dst, permutedims(src, p)) Base.fill!(A::StridedView, val) = map!(Returns(val), A, A) +# This is a wrapper function intended to allow us to +# intercept "conj" and rewrite it in cases where the +# GPU compiler seemingly isn't compiling bare conj +# correctly (https://github.com/QuantumKitHub/Strided.jl/issues/63). +# It should be removed as soon as the underlying +# compilation problem is resolved. It uses the first +# argument to dispatch so that only AMD arrays are affected. +substitute_op(::Type{<:StridedView}, op) = op + function Base.mapreduce(f, op, A::StridedView; dims = :, kw...) return Base._mapreduce_dim(f, op, values(kw), A, dims) end From f1b70b67d57a39bb6b393341a2d3912ebfcce471 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 17 Jun 2026 08:02:56 -0400 Subject: [PATCH 4/4] Apply suggestion --- ext/StridedAMDGPUExt.jl | 10 ++++------ ext/StridedGPUArraysExt.jl | 4 ++-- src/mapreduce.jl | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/ext/StridedAMDGPUExt.jl b/ext/StridedAMDGPUExt.jl index 6cdd281..e14d804 100644 --- a/ext/StridedAMDGPUExt.jl +++ b/ext/StridedAMDGPUExt.jl @@ -1,7 +1,7 @@ module StridedAMDGPUExt using Strided, StridedViews, AMDGPU, AMDGPU.rocBLAS, LinearAlgebra -import Strided: blas_mul!, substitute_op +import Strided: blas_mul!, _get_op const ROCStridedView{T, N, A <: ROCArray{T}} = StridedViews.StridedView{T, N, A} @@ -18,12 +18,10 @@ end _conj(x) = real(x) - imag(x) * im @static if VERSION < v"1.11.0-rc" - function substitute_op(::Type{<:ROCStridedView}, op) - # work around compiler issue on AMD on 1.10 - return op == conj ? _conj : op - end + # work around compiler issue on AMD on 1.10 + _get_op(A::ROCStridedView) = A.op == conj ? _conj : A.op else - substitute_op(::Type{<:ROCStridedView}, op) = op + _get_op(A::ROCStridedView) = A.op end end diff --git a/ext/StridedGPUArraysExt.jl b/ext/StridedGPUArraysExt.jl index 23a18b5..b65313d 100644 --- a/ext/StridedGPUArraysExt.jl +++ b/ext/StridedGPUArraysExt.jl @@ -5,7 +5,7 @@ using GPUArrays: Adapt, KernelAbstractions using GPUArrays.KernelAbstractions: @kernel, @index using StridedViews: ParentIndex -import Strided: isblasmatrix, substitute_op +import Strided: isblasmatrix, _get_op ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)} @@ -125,7 +125,7 @@ function Strided._mapreduce_block!( backend = KernelAbstractions.get_backend(parent(out)) kernel! = _mapreduce_gpu_kernel!(backend) - ops = map(Base.Fix1(substitute_op, typeof(out)), getproperty.(arrays, :op)) + ops = _get_op.(arrays) kernel!(f, op, initop, dims_red, strides, offsets, ops, parent.(arrays); ndrange = dims_out) return nothing end diff --git a/src/mapreduce.jl b/src/mapreduce.jl index 36d6861..9182ee8 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -14,7 +14,7 @@ Base.fill!(A::StridedView, val) = map!(Returns(val), A, A) # It should be removed as soon as the underlying # compilation problem is resolved. It uses the first # argument to dispatch so that only AMD arrays are affected. -substitute_op(::Type{<:StridedView}, op) = op +_get_op(A::StridedView) = A.op function Base.mapreduce(f, op, A::StridedView; dims = :, kw...) return Base._mapreduce_dim(f, op, values(kw), A, dims)