Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions test/staticsize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,71 @@ end
@test sum2_10turbo(A) ≈ sum(A)
end
end

# Test for Issue #543: W=1 nested VecUnroll store on ARM
# This tests the case where vector width is 1 (scalar) with nested unrolling
function issue543_noavx!(data_out, matrix, data_in)
for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
res = zero(eltype(data_out))
for jj in axes(matrix, 2)
res += matrix[j, jj] * data_in[v, i, jj]
end
data_out[v, i, j] = res
end
return nothing
end

function issue543_turbo!(data_out, matrix, data_in)
@turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
res = zero(eltype(data_out))
for jj in axes(matrix, 2)
res += matrix[j, jj] * data_in[v, i, jj]
end
data_out[v, i, j] = res
end
return nothing
end

@testset "Issue #543: W=1 Nested VecUnroll" begin
# Test with static first dimension
for v in 1:4, n in 2:8
data_out_ref = StrideArray(undef, StaticInt(v), StaticInt(n), StaticInt(n))
data_out_turbo = StrideArray(undef, StaticInt(v), StaticInt(n), StaticInt(n))
matrix = StrideArray(undef, StaticInt(n), StaticInt(n))
data_in = rand(v, n, n)

matrix .= rand.()

fill!(data_out_ref, 0.0)
fill!(data_out_turbo, 0.0)

issue543_noavx!(data_out_ref, matrix, data_in)

# This is broken on Apple ARM CPUs (Apple M series) for some reason.
# TODO: Fix the underlying issue!
if (v == 1) && Sys.isapple() && Sys.ARCH == :aarch64
@test_skip issue543_turbo!(data_out_turbo, matrix, data_in)
else
@test_nowarn issue543_turbo!(data_out_turbo, matrix, data_in)
@test data_out_turbo ≈ data_out_ref
end
end

# Test with non-static first but static other dimensions
for v in 1:4, n in 2:8
data_out_ref = StrideArray(undef, v, StaticInt(n), StaticInt(n))
data_out_turbo = StrideArray(undef, v, StaticInt(n), StaticInt(n))
matrix = StrideArray(undef, StaticInt(n), StaticInt(n))
data_in = rand(v, n, n)

matrix .= rand.()

fill!(data_out_ref, 0.0)
fill!(data_out_turbo, 0.0)

issue543_noavx!(data_out_ref, matrix, data_in)
issue543_turbo!(data_out_turbo, matrix, data_in)

@test data_out_turbo ≈ data_out_ref
end
end
Loading