From 4f55dc8b89c9efa3884f68af9dcb0a7a0aac3c11 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Fri, 30 Dec 2022 20:02:51 -0500 Subject: [PATCH 1/5] faster path for GPU creation --- Project.toml | 2 +- src/onehot.jl | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 94da4fc..73ca990 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "OneHotArrays" uuid = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f" -version = "0.2.2" +version = "0.2.3" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" diff --git a/src/onehot.jl b/src/onehot.jl index ca2efa5..5f06878 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -110,6 +110,16 @@ function onehotbatch(data::AbstractArray{<:Integer}, labels::AbstractUnitRange{< return OneHotArray(indices, length(labels)) end +function onehotbatch(data::AbstractGPUArray{<:Integer}, labels::AbstractUnitRange{<:Integer}) + offset = 1 - first(labels) + # The bounds check with extrema synchronises, often 10x slower than rest of the function. + indices = map(data) do datum + checkbounds(labels, datum) + UInt32(datum + offset) + end + return OneHotArray(indices, length(labels)) +end + """ onecold(y::AbstractArray, labels = 1:size(y,1)) From 6d02bbe42e996716349ed1e53a0c9cb08ed2fc0a Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Fri, 30 Dec 2022 20:36:55 -0500 Subject: [PATCH 2/5] fixup --- src/onehot.jl | 12 ++++++------ test/gpu.jl | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/onehot.jl b/src/onehot.jl index 5f06878..5cd85cb 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -101,21 +101,21 @@ function _onehotbatch(data, labels, default) end function onehotbatch(data::AbstractArray{<:Integer}, labels::AbstractUnitRange{<:Integer}) - # lo, hi = extrema(data) # fails on Julia 1.6 - lo, hi = minimum(data), maximum(data) + lo, hi = extrema(data) # fails on Julia 1.6 lo < first(labels) && error("Value $lo not found in labels") hi > last(labels) && error("Value $hi not found in labels") offset = 1 - first(labels) indices = UInt32.(data .+ offset) return OneHotArray(indices, length(labels)) end - +# That bounds check with extrema synchronises on GPU, much slower than rest of the function, +# hence add a special method, with a less helpful error message: function onehotbatch(data::AbstractGPUArray{<:Integer}, labels::AbstractUnitRange{<:Integer}) offset = 1 - first(labels) - # The bounds check with extrema synchronises, often 10x slower than rest of the function. indices = map(data) do datum - checkbounds(labels, datum) - UInt32(datum + offset) + i = UInt32(datum + offset) + checkbounds(labels, i) + i end return OneHotArray(indices, length(labels)) end diff --git a/test/gpu.jl b/test/gpu.jl index cd04815..85b94b8 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -30,7 +30,7 @@ end y1 = onehotbatch([1, 3, 0, 2], 0:9) |> cu y2 = onehotbatch([1, 3, 0, 2] |> cu, 0:9) @test y1.indices == y2.indices - @test_broken y1 == y2 + @test_broken y1 == y2 # issue 28 @test_throws Exception onehotbatch([1, 3, 0, 2] |> cu, 1:10) @test_throws Exception onehotbatch([1, 3, 0, 2] |> cu, -2:2) From 11b3feb39e131a953a47292d1fb10918a898fb39 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Fri, 30 Dec 2022 22:10:00 -0500 Subject: [PATCH 3/5] skip tests with real CUDA --- test/gpu.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/gpu.jl b/test/gpu.jl index 85b94b8..681353f 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -32,8 +32,12 @@ end @test y1.indices == y2.indices @test_broken y1 == y2 # issue 28 - @test_throws Exception onehotbatch([1, 3, 0, 2] |> cu, 1:10) - @test_throws Exception onehotbatch([1, 3, 0, 2] |> cu, -2:2) + if !CUDA.functional() + # Here CUDA gives an error which @test_throws does not notice, + # although with JLArrays @test_throws it's fine. + @test_throws Exception onehotbatch([1, 3, 0, 2] |> cu, 1:10) + @test_throws Exception onehotbatch([1, 3, 0, 2] |> cu, -2:2) + end end @testset "onecold gpu" begin From 93a0e36a7952ddc6afc0b61fa6c86d3a61959236 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sat, 31 Dec 2022 16:55:30 -0500 Subject: [PATCH 4/5] indent Co-authored-by: Brian Chen --- src/onehot.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/onehot.jl b/src/onehot.jl index 5cd85cb..5319810 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -113,10 +113,10 @@ end function onehotbatch(data::AbstractGPUArray{<:Integer}, labels::AbstractUnitRange{<:Integer}) offset = 1 - first(labels) indices = map(data) do datum - i = UInt32(datum + offset) - checkbounds(labels, i) - i - end + i = UInt32(datum + offset) + checkbounds(labels, i) + i + end return OneHotArray(indices, length(labels)) end From 49561f9b3648e713e28084561b8193bd7d5500d1 Mon Sep 17 00:00:00 2001 From: Michael Abbott <32575566+mcabbott@users.noreply.github.com> Date: Sat, 31 Dec 2022 16:56:12 -0500 Subject: [PATCH 5/5] rm comment --- src/onehot.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/onehot.jl b/src/onehot.jl index 5319810..d2d5e9d 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -101,7 +101,7 @@ function _onehotbatch(data, labels, default) end function onehotbatch(data::AbstractArray{<:Integer}, labels::AbstractUnitRange{<:Integer}) - lo, hi = extrema(data) # fails on Julia 1.6 + lo, hi = extrema(data) lo < first(labels) && error("Value $lo not found in labels") hi > last(labels) && error("Value $hi not found in labels") offset = 1 - first(labels)