Update tests and types (#28)

SCiarella · web-flow · commit d4c2768417dd · 2025-03-27T17:50:17.000+01:00
diff --git a/src/models.jl b/src/models.jl
@@ -294,10 +294,10 @@ function ((;)::CNO)(x, params, state)
     masks_down = state.masks_down
     masks_up = state.masks_up
     masks_bottlenecks = state.masks_bottlenecks
-    up_ch_ranges = Array(state.up_ch_ranges)
-    down_ch_ranges = Array(state.down_ch_ranges)
-    bottleneck_ranges = Array(state.bottleneck_ranges)
-    reversed_bottleneck_ranges = Array(state.reversed_bottleneck_ranges)
+    up_ch_ranges = Array(state.up_ch_ranges) |> Lux.cpu_device()
+    down_ch_ranges = Array(state.down_ch_ranges) |> Lux.cpu_device()
+    bottleneck_ranges = Array(state.bottleneck_ranges) |> Lux.cpu_device()
+    reversed_bottleneck_ranges = Array(state.reversed_bottleneck_ranges) |> Lux.cpu_device()
 
     # First thing to do is to crop the center of x along every dimension
     s0 = size(x)
diff --git a/test/test-activation.jl b/test/test-activation.jl
@@ -12,7 +12,6 @@ using CairoMakie: Figure, Axis, heatmap, save, heatmap!, GridLayout
 using Images: load
 using CUDA
 
-CUDA.allowscalar(false)
 
 # Setup initial image and parameters
 N0 = 512
@@ -94,6 +93,8 @@ if !CUDA.functional()
     @test "CUDA not functional, skipping GPU tests"
     return
 end
+CUDA.allowscalar(false)
+
 # Prepare for GPU tests
 u = CuArray(u)
 actlayer_identity = create_CNOactivation(T, D, N, cutoff, activation_function = identity)
diff --git a/test/test-convolution.jl b/test/test-convolution.jl
@@ -15,25 +15,19 @@ using FFTW: fft, ifft
 using ChainRulesCore
 
 rng = Random.Xoshiro(123)
-CUDA.allowscalar(false)
 
 function reference_convolve(x, k)
-    if k isa SubArray && parent(k) isa CuArray
-        k = CuArray(collect(k))
-    end
-    if x isa SubArray && parent(x) isa CuArray
-        x = CuArray(collect(x))
-    end
     fft_x = fft(x, (1, 2))
     fft_k = fft(k, (2, 3))
+    # Can not use for loops if you want it to be differentiable
     #    ffty = zeros(ComplexF32, size(x, 1), size(x, 2), size(k, 1), size(x, 4))
     #    for c = 1:size(k, 1)
     #        for ci = 1:size(x, 3)
     #            ffty[:,:,c,:] .+= fft_x[:, :, ci, :] .* fft_k[c, :, :]
     #        end
     #    end
 
-    # Can not use for loops if you want it to be differentiable
+    # No-loops alternative
     ffty = [
         reduce(+, [fft_x[:, :, ci, :] .* fft_k[c, :, :] for ci = 1:size(x, 3)]) for
         c = 1:size(k, 1)
@@ -112,6 +106,8 @@ if !CUDA.functional()
     @test "CUDA not functional, skipping GPU tests"
     return
 end
+CUDA.allowscalar(false)
+
 @testset "Convolution (GPU)" begin
     @testset "Forward" begin
         x = CUDA.ones(Float32, 16, 16, 2, 1)
diff --git a/test/test-couplednode_posterior.jl b/test/test-couplednode_posterior.jl
@@ -168,6 +168,8 @@ if !CUDA.functional()
     @test "CUDA not functional, skipping GPU tests"
     return
 end
+CUDA.allowscalar(false)
+
 @testset "CoupledNODE integration (GPU)" begin
     # Create the model
     closure, θ_start, st = cno(
diff --git a/test/test-couplednode_prior.jl b/test/test-couplednode_prior.jl
@@ -0,0 +1,248 @@
+using Test
+using Adapt
+using Lux
+using LuxCUDA
+using JLD2
+using ConvolutionalNeuralOperators: create_CNOdownsampler, create_CNO
+using ComponentArrays: ComponentArray
+using Optimisers: Adam, ClipGrad, OptimiserChain
+using Optimization
+using Random
+using Zygote: Zygote
+using CUDA
+using CoupledNODE
+using IncompressibleNavierStokes
+using NeuralClosure
+using OrdinaryDiffEqTsit5
+
+rng = Random.Xoshiro(123)
+T = Float32
+N = 16
+nles = 16
+D = 2
+ch_ = [2, 2]
+act = [tanh_fast, identity]
+df = [2, 2]
+k_rad = [3, 3]
+bd = [2, 2, 2]
+cutoff = 10
+batch = 4
+
+@testset "CoupledNODE integration (CPU)" begin
+    # Create the model
+    closure, θ_start, st = cno(
+        T = T,
+        N = N,
+        D = D,
+        cutoff = cutoff,
+        ch_sizes = ch_,
+        activations = act,
+        down_factors = df,
+        k_radii = k_rad,
+        bottleneck_depths = bd,
+        rng = rng,
+        use_cuda = false,
+    )
+
+    # Define input tensor and pass through model
+    input_tensor = rand(T, N, N, D, batch)
+    output = Lux.apply(closure, input_tensor, θ_start, st)[1]
+    @test size(output) == size(input_tensor)
+
+    # Read conf
+    NS = Base.get_extension(CoupledNODE, :NavierStokes)
+    conf = NS.read_config("./config.yaml")
+    conf["params"]["backend"] = CPU()
+
+    # get params
+    params = NS.load_params(conf)
+    device(x) = x
+
+    # Get the setup in the format expected by the CoupledNODE
+    function getsetup(; params, nles)
+        Setup(;
+            x = ntuple(α -> range(params.lims..., nles + 1), params.D),
+            params.Re,
+            params.backend,
+            params.bodyforce,
+            params.issteadybodyforce,
+        )
+    end
+    setup = getsetup(; params, nles)
+    psolver = default_psolver(setup)
+    setup = []
+    for nl in nles
+        x = ntuple(α -> LinRange(T(0.0), T(1.0), nl + 1), params.D)
+        push!(setup, Setup(; x = x, Re = params.Re, params.backend))
+    end
+
+    # Load data
+    function namedtupleload(file)
+        dict = load(file)
+        k, v = keys(dict), values(dict)
+        pairs = @. Symbol(k) => v
+        (; pairs...)
+    end
+    data_train = []
+    data_i = namedtupleload("data_train.jld2")
+    push!(data_train, hcat(data_i))
+
+    # Create the io array
+    NS = Base.get_extension(CoupledNODE, :NavierStokes)
+    io_train = NS.create_io_arrays_priori(data_train, setup)
+
+    # Create the dataloader
+    θ = device(copy(θ_start))
+    dataloader_prior = NS.create_dataloader_prior(
+        io_train[1];
+        batchsize = 4,
+        rng = Random.Xoshiro(24),
+        device = device,
+    )
+    train_data_priori = dataloader_prior()
+
+    l0 = CoupledNODE.loss_priori_lux(closure, θ, st, train_data_priori)[1]
+    @test isnan(l0) == false
+    loss = CoupledNODE.loss_priori_lux
+
+    # Final integration test of the entire train interface
+    l, trainstate = CoupledNODE.train(
+        closure,
+        θ,
+        st,
+        dataloader_prior,
+        loss;
+        nepochs = 20,
+        alg = OptimiserChain(Adam(T(1.0e-3)), ClipGrad(0.1)),
+        cpu = true,
+    )
+    @test isnan(l) == false
+    @test l < l0
+    @test trainstate.step == 20
+    @test any(isnan, trainstate.parameters) == false
+
+end
+
+if !CUDA.functional()
+    @test "CUDA not functional, skipping GPU tests"
+    return
+end
+CUDA.allowscalar(false)
+
+@testset "CoupledNODE integration (GPU)" begin
+    # Create the model
+    closure, θ_start, st = cno(
+        T = T,
+        N = N,
+        D = D,
+        cutoff = cutoff,
+        ch_sizes = ch_,
+        activations = act,
+        down_factors = df,
+        k_radii = k_rad,
+        bottleneck_depths = bd,
+        rng = rng,
+        use_cuda = true,
+    )
+
+    # Define input tensor and pass through model
+    input_tensor = CUDA.rand(T, N, N, D, batch)
+    output = Lux.apply(closure, input_tensor, θ_start, st)[1]
+    @test size(output) == size(input_tensor)
+    @test isa(output, CuArray)
+
+    # Read conf
+    NS = Base.get_extension(CoupledNODE, :NavierStokes)
+    conf = NS.read_config("./config.yaml")
+    conf["params"]["backend"] = CUDABackend()
+
+    # get params
+    params = NS.load_params(conf)
+    device(x) = adapt(params.backend, x)
+
+    # Get the setup in the format expected by the CoupledNODE
+    function getsetup(; params, nles)
+        Setup(;
+            x = ntuple(α -> range(params.lims..., nles + 1), params.D),
+            params.Re,
+            params.backend,
+            params.bodyforce,
+            params.issteadybodyforce,
+        )
+    end
+    setup = getsetup(; params, nles)
+    psolver = default_psolver(setup)
+    setup = []
+    for nl in nles
+        x = ntuple(α -> LinRange(T(0.0), T(1.0), nl + 1), params.D)
+        push!(setup, Setup(; x = x, Re = params.Re, params.backend))
+    end
+
+    # Load data
+    function namedtupleload(file)
+        dict = load(file)
+        k, v = keys(dict), values(dict)
+        pairs = @. Symbol(k) => v
+        (; pairs...)
+    end
+    data_train = []
+    data_i = namedtupleload("data_train.jld2")
+    push!(data_train, hcat(data_i))
+
+    # Create the io array
+    NS = Base.get_extension(CoupledNODE, :NavierStokes)
+    io_train = NS.create_io_arrays_priori(data_train, setup)
+
+    # Create the dataloader
+    θ = device(copy(θ_start))
+    dataloader_prior = NS.create_dataloader_prior(
+        io_train[1];
+        batchsize = 4,
+        rng = Random.Xoshiro(24),
+        device = device,
+    )
+    train_data_priori = dataloader_prior()
+    @test isa(train_data_priori[1], CuArray)
+    @test isa(train_data_priori[2], CuArray)
+
+    l0 = CoupledNODE.loss_priori_lux(closure, θ, st, train_data_priori)[1]
+    @test isnan(l0) == false
+    loss = CoupledNODE.loss_priori_lux
+
+    function loss_pb(model, ps, st, (x, y), device = identity)
+        y_pred, st_ = Lux.apply(model, x, ps, st)[1:2]
+        return sum(abs2, y_pred - y) / sum(abs2, y)
+    end
+    y, back = Zygote.pullback(loss_pb, closure, θ, st, train_data_priori)
+    @test y == l0
+    y_bar = 1
+    _, θ_bar, _, _ = back(y_bar)
+    @test size(θ_bar) == size(θ)
+    @test sum(θ_bar) !== 0.0
+
+
+    tstate = Lux.Training.TrainState(closure, θ, st, Adam(T(1.0e-3))) |> Lux.gpu_device()
+    data = dataloader_prior()
+    _, l, _, tstate =
+        Lux.Training.single_train_step!(Optimization.AutoZygote(), loss, data, tstate) |>
+        Lux.gpu_device()
+    @test isnan(l) == false
+    @test l < 2 * l0
+    @test tstate.step == 1
+
+    # Final integration test of the entire train interface
+    l, trainstate = CoupledNODE.train(
+        closure,
+        θ,
+        st,
+        dataloader_prior,
+        loss;
+        nepochs = 20,
+        alg = Adam(T(1.0e-3)),
+    )
+    @test isnan(l) == false
+    @test l < 2 * l0
+    @test trainstate.step == 20
+    @test any(isnan, trainstate.parameters) == false
+
+end
diff --git a/test/test-fullmodel.jl b/test/test-fullmodel.jl
@@ -40,7 +40,6 @@ cutoff = 10
 
 
 @testset "Full model (CPU)" begin
-    return
 
     @testset "Full CNO model" begin
         model, θ, st = cno(
@@ -89,6 +88,7 @@ if !CUDA.functional()
     @test "CUDA not functional, skipping GPU tests"
     return
 end
+CUDA.allowscalar(false)
 @testset "Full model (GPU)" begin
 
     @testset "Full CNO model" begin
@@ -105,15 +105,11 @@ end
             rng = rng,
             use_cuda = true,
         )
-        @info typeof(model)
-        @info typeof(θ)
-        @info typeof(st)
 
         u_gpu = CuArray(u)
-        y, zz = model(u_gpu, θ, st)
-        @info typeof(y)
-        @info size(y)
-        #        @test size(model(u_gpu, θ, st)[1:1]) == size(u)
+        y, _ = model(u_gpu, θ, st)
+        @test size(y) == size(u)
+        @test isa(y, CuArray)
 
 
         return
diff --git a/test/test-maskedconvolution.jl b/test/test-maskedconvolution.jl
@@ -16,7 +16,6 @@ using FFTW: fft, ifft
 using ChainRulesCore
 
 rng = Random.Xoshiro(123)
-CUDA.allowscalar(false)
 
 
 @testset "Masked-Convolution (CPU)" begin
@@ -99,6 +98,7 @@ if !CUDA.functional()
     @test "CUDA not functional, skipping GPU tests"
     return
 end
+CUDA.allowscalar(false)
 
 @testset "Masked-Convolution (GPU)" begin
     @testset "Forward" begin
diff --git a/test/test-residualblock.jl b/test/test-residualblock.jl
@@ -7,7 +7,6 @@ using Zygote: Zygote
 using ComponentArrays: ComponentArray
 using CUDA
 
-CUDA.allowscalar(false)
 
 x = rand(Float32, 16, 16, 2, 1)
 k_bottlenecks = rand(Float32, 100, 16, 16)
@@ -52,6 +51,8 @@ if !CUDA.functional()
     @test "CUDA not functional, skipping GPU tests"
     return
 end
+CUDA.allowscalar(false)
+
 # Prepare for GPU tests
 x = CUDA.rand(Float32, 16, 16, 2, 1)
 k_bottlenecks = CUDA.rand(Float32, 100, 16, 16)
diff --git a/test/test-training.jl b/test/test-training.jl
diff --git a/test/test-updown.jl b/test/test-updown.jl