Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "ForwardDiff"
uuid = "f6369f11-7733-5829-9624-2563aa707210"
version = "1.4.1"
version = "1.4.2"

[deps]
CommonSubexpressions = "bbf7d656-a473-5ed7-a52c-81e309532950"
Expand Down Expand Up @@ -28,6 +28,7 @@ DiffRules = "1.4"
DiffTests = "0.1"
IrrationalConstants = "0.1, 0.2"
JET = "0.9, 0.10, 0.11"
JLArrays = "0.1, 0.2"
LogExpFunctions = "0.3, 1"
NaNMath = "1"
Preferences = "1"
Expand All @@ -41,9 +42,10 @@ DiffTests = "de460e47-3fe3-5279-bb4a-814414816d5d"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Calculus", "DiffTests", "IrrationalConstants", "JET", "SparseArrays", "StaticArrays", "Test", "InteractiveUtils"]
test = ["Calculus", "DiffTests", "IrrationalConstants", "JET", "JLArrays", "SparseArrays", "StaticArrays", "Test", "InteractiveUtils"]
42 changes: 39 additions & 3 deletions src/apiutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,21 @@ function structural_eachindex(x::Diagonal, y::AbstractArray)
return diagind(x)
end

# Dense arrays with isbits values need no structural-index or unset-element
# handling, so they can be seeded with broadcast/`map!` over contiguous views.
# This keeps GPU arrays (`AbstractGPUArray <: DenseArray`) free of scalar
# indexing and avoids the O(index) `Iterators.drop` walk of the structural
# path. Broadcast is fastest for the bulk writes; the chunk writes use `map!`
# with an index range because slicing the seeds tuple at runtime allocates.
@inline function dense_seedable(duals, x, ::Type{V}) where {V}
duals isa DenseArray && isbitstype(V) && !Base.has_offset_axes(duals, x)
end

function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N}
if isbitstype(V)
if dense_seedable(duals, x, V) && axes(duals) == axes(x)
duals .= Dual{T,V,N}.(x, Ref(seed))
elseif isbitstype(V)
for idx in structural_eachindex(duals, x)
duals[idx] = Dual{T,V,N}(x[idx], seed)
end
Expand All @@ -90,7 +102,12 @@ end

function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
seeds::NTuple{N,Partials{N,V}}) where {T,V,N}
if isbitstype(V)
if dense_seedable(duals, x, V)
length(duals) == length(x) || throw(DimensionMismatch())
dual_inds = 1:min(N, length(duals))
map!((xi, i) -> Dual{T,V,N}(xi, seeds[i]),
view(duals, dual_inds), view(x, dual_inds), dual_inds)
elseif isbitstype(V)
for (i, idx) in zip(1:N, structural_eachindex(duals, x))
duals[idx] = Dual{T,V,N}(x[idx], seeds[i])
end
Expand All @@ -106,10 +123,21 @@ function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
return duals
end

# Writes at most N elements starting at `index`: chunk mode only ever needs to
# clear the N-wide chunk it just seeded, so writing through to the end of the
# array would be O(n) redundant work per chunk (O(n^2) per sweep).
function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index,
seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N}
if dense_seedable(duals, x, V)
length(duals) == length(x) || throw(DimensionMismatch())
dual_inds = index:min(index + N - 1, length(duals))
# map! rather than broadcast: the dotview allocates under
# --check-bounds=yes on Julia 1.10
map!(xi -> Dual{T,V,N}(xi, seed), view(duals, dual_inds), view(x, dual_inds))
return duals
end
offset = index - 1
idxs = Iterators.drop(structural_eachindex(duals, x), offset)
idxs = Iterators.take(Iterators.drop(structural_eachindex(duals, x), offset), N)
if isbitstype(V)
for idx in idxs
duals[idx] = Dual{T,V,N}(x[idx], seed)
Expand All @@ -128,6 +156,14 @@ end

function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index,
seeds::NTuple{N,Partials{N,V}}, chunksize = N) where {T,V,N}
if dense_seedable(duals, x, V)
length(duals) == length(x) || throw(DimensionMismatch())
shift = index - 1
dual_inds = (1 + shift):min(shift + chunksize, length(duals))
map!((xi, i) -> Dual{T,V,N}(xi, seeds[i - shift]),
view(duals, dual_inds), view(x, dual_inds), dual_inds)
return duals
end
offset = index - 1
idxs = Iterators.drop(structural_eachindex(duals, x), offset)
if isbitstype(V)
Expand Down
56 changes: 56 additions & 0 deletions test/GPUArraysTest.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
module GPUArraysTest

using ForwardDiff, Test
using JLArrays

# JLArrays emulates GPU array semantics (including the scalar-indexing ban) on
# the CPU, so the dense-array `seed!` fast paths can be exercised on a GPU-like
# array type without a physical GPU. GPU arrays are covered by the dense fast
# paths because `AbstractGPUArray <: DenseArray`.
JLArrays.allowscalar(false)

@testset "ForwardDiff seeding on GPU arrays" begin
f(x) = x .^ 2 .+ 2 .* x

@testset "jacobian, vector mode (length $n)" for n in (1, 4, 8)
x = collect(Float64, 1:n)
@test Array(ForwardDiff.jacobian(f, JLArray(x))) == ForwardDiff.jacobian(f, x)
end

# lengths above the chunk size exercise the chunked `seed!` methods
@testset "jacobian, chunk mode (length $n, chunk $c)" for n in (16, 20, 27), c in (4, 8)
x = collect(Float64, 1:n)
cfg = ForwardDiff.JacobianConfig(f, JLArray(x), ForwardDiff.Chunk{c}())
@test Array(ForwardDiff.jacobian(f, JLArray(x), cfg)) == ForwardDiff.jacobian(f, x)
end

@testset "jacobian! into a GPU array (length $n)" for n in (4, 16)
x = collect(Float64, 1:n)
out = JLArray(zeros(n, n))
ForwardDiff.jacobian!(out, f, JLArray(x))
@test Array(out) == ForwardDiff.jacobian(f, x)
end

@testset "jacobian of f! with GPU input and output" begin
f!(y, x) = (y .= x .^ 2 .+ 2 .* x; nothing)
x = collect(Float64, 1:8)
y = zeros(8)
J = ForwardDiff.jacobian(f!, JLArray(y), JLArray(x))
@test Array(J) == ForwardDiff.jacobian(f!, y, x)
end

@testset "jacobian with matrix input (chunk $c)" for c in (3, 6)
X = reshape(collect(Float64, 1:12), 4, 3)
g(x) = x .* sum(x)
cfg = ForwardDiff.JacobianConfig(g, JLArray(X), ForwardDiff.Chunk{c}())
@test Array(ForwardDiff.jacobian(g, JLArray(X), cfg)) ≈ ForwardDiff.jacobian(g, X)
end

@testset "jacobian with view input" begin
X = JLArray(reshape(collect(Float64, 1:18), 6, 3))
xv = view(X, :, 2)
@test Array(ForwardDiff.jacobian(f, xv)) == ForwardDiff.jacobian(f, Array(xv))
end
end

end # module
5 changes: 5 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ Random.seed!(SEED)
t = @elapsed include("ConfusionTest.jl")
println("##### done (took $t seconds).")
end
@testset "GPUArrays" begin
println("##### Testing seeding on GPU arrays...")
t = @elapsed include("GPUArraysTest.jl")
println("##### done (took $t seconds).")
end
@testset "Miscellaneous" begin
println("##### Testing miscellaneous functionality...")
t = @elapsed include("MiscTest.jl")
Expand Down
Loading