From a510ff64fb0ba7cad579317a105346cadad1f862 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Wed, 29 Oct 2025 18:16:30 +0100 Subject: [PATCH 1/2] Improve TLSPH performance on GPUs --- src/general/abstract_system.jl | 24 ++++--- .../fluid/weakly_compressible_sph/system.jl | 12 ++-- .../total_lagrangian_sph/penalty_force.jl | 17 ++--- .../structure/total_lagrangian_sph/rhs.jl | 27 ++++---- .../structure/total_lagrangian_sph/system.jl | 62 ++++++++++--------- .../structure/total_lagrangian_sph/rhs.jl | 15 +++-- test/systems/tlsph_system.jl | 5 +- 7 files changed, 91 insertions(+), 71 deletions(-) diff --git a/src/general/abstract_system.jl b/src/general/abstract_system.jl index 1d0bc2813f..91fb139470 100644 --- a/src/general/abstract_system.jl +++ b/src/general/abstract_system.jl @@ -53,26 +53,30 @@ end initialize!(system, semi) = system # This should not be dispatched by system type. We always expect to get a column of `A`. -@propagate_inbounds function extract_svector(A, system, i) - extract_svector(A, Val(ndims(system)), i) +@propagate_inbounds function extract_svector(A, system, i...) + extract_svector(A, Val(ndims(system)), i...) end # Return the `i`-th column of the array `A` as an `SVector`. -@inline function extract_svector(A, ::Val{NDIMS}, i) where {NDIMS} +@inline function extract_svector(A, ::Val{NDIMS}, i...) where {NDIMS} # Explicit bounds check, which can be removed by calling this function with `@inbounds` - @boundscheck checkbounds(A, NDIMS, i) + @boundscheck checkbounds(A, NDIMS, i...) # Assume inbounds access now - return SVector(ntuple(@inline(dim->@inbounds A[dim, i]), NDIMS)) + return SVector(ntuple(@inline(dim->@inbounds A[dim, i...]), NDIMS)) end # Return `A[:, :, i]` as an `SMatrix`. @inline function extract_smatrix(A, system, particle) + @boundscheck checkbounds(A, ndims(system), ndims(system), particle) + # Extract the matrix elements for this particle as a tuple to pass to SMatrix return SMatrix{ndims(system), - ndims(system)}(ntuple(@inline(i->A[mod(i - 1, ndims(system)) + 1, - div(i - 1, ndims(system)) + 1, - particle]), + ndims(system)}(ntuple(@inline(i->@inbounds A[mod(i - 1, + ndims(system)) + 1, + div(i - 1, + ndims(system)) + 1, + particle]), Val(ndims(system)^2))) end @@ -86,7 +90,7 @@ end @inline current_coordinates(u, system) = u # Specifically get the initial coordinates of a particle for all system types -@inline function initial_coords(system, particle) +@propagate_inbounds function initial_coords(system, particle) return extract_svector(initial_coordinates(system), system, particle) end @@ -102,7 +106,7 @@ end # By default, try to extract it from `v`. @inline current_velocity(v, system) = v -@inline function current_density(v, system::AbstractSystem, particle) +@propagate_inbounds function current_density(v, system::AbstractSystem, particle) return current_density(v, system)[particle] end diff --git a/src/schemes/fluid/weakly_compressible_sph/system.jl b/src/schemes/fluid/weakly_compressible_sph/system.jl index 5462a663aa..c03c1ecd75 100644 --- a/src/schemes/fluid/weakly_compressible_sph/system.jl +++ b/src/schemes/fluid/weakly_compressible_sph/system.jl @@ -244,7 +244,7 @@ end system_correction(system::WeaklyCompressibleSPHSystem) = system.correction -@inline function current_velocity(v, system::WeaklyCompressibleSPHSystem) +@propagate_inbounds function current_velocity(v, system::WeaklyCompressibleSPHSystem) return current_velocity(v, system.density_calculator, system) end @@ -254,14 +254,14 @@ end return v end -@inline function current_velocity(v, ::ContinuityDensity, - system::WeaklyCompressibleSPHSystem) +@propagate_inbounds function current_velocity(v, ::ContinuityDensity, + system::WeaklyCompressibleSPHSystem) # When using `ContinuityDensity`, the velocity is stored # in the first `ndims(system)` rows of `v`. return view(v, 1:ndims(system), :) end -@inline function current_density(v, system::WeaklyCompressibleSPHSystem) +@propagate_inbounds function current_density(v, system::WeaklyCompressibleSPHSystem) return current_density(v, system.density_calculator, system) end @@ -271,8 +271,8 @@ end return system.cache.density end -@inline function current_density(v, ::ContinuityDensity, - system::WeaklyCompressibleSPHSystem) +@propagate_inbounds function current_density(v, ::ContinuityDensity, + system::WeaklyCompressibleSPHSystem) # When using `ContinuityDensity`, the density is stored in the last row of `v` return view(v, size(v, 1), :) end diff --git a/src/schemes/structure/total_lagrangian_sph/penalty_force.jl b/src/schemes/structure/total_lagrangian_sph/penalty_force.jl index 94dbd70aa5..025f1ab660 100644 --- a/src/schemes/structure/total_lagrangian_sph/penalty_force.jl +++ b/src/schemes/structure/total_lagrangian_sph/penalty_force.jl @@ -21,10 +21,11 @@ end return zero(initial_pos_diff) end -@inline function dv_penalty_force(penalty_force::PenaltyForceGanzenmueller, - particle, neighbor, initial_pos_diff, initial_distance, - current_pos_diff, current_distance, - system, m_a, m_b, rho_a, rho_b) +@propagate_inbounds function dv_penalty_force(penalty_force::PenaltyForceGanzenmueller, + particle, neighbor, initial_pos_diff, + initial_distance, + current_pos_diff, current_distance, + system, m_a, m_b, rho_a, rho_b) volume_a = m_a / rho_a volume_b = m_b / rho_b @@ -33,15 +34,17 @@ end F_a = deformation_gradient(system, particle) F_b = deformation_gradient(system, neighbor) + inv_current_distance = 1 / current_distance + # Use the symmetry of epsilon to simplify computations eps_sum = (F_a + F_b) * initial_pos_diff - 2 * current_pos_diff - delta_sum = dot(eps_sum, current_pos_diff) / current_distance + delta_sum = dot(eps_sum, current_pos_diff) * inv_current_distance E = young_modulus(system, particle) f = (penalty_force.alpha / 2) * volume_a * volume_b * - kernel_weight / initial_distance^2 * E * delta_sum * current_pos_diff / - current_distance + kernel_weight / initial_distance^2 * E * delta_sum * current_pos_diff * + inv_current_distance # Divide force by mass to obtain acceleration return f / m_a diff --git a/src/schemes/structure/total_lagrangian_sph/rhs.jl b/src/schemes/structure/total_lagrangian_sph/rhs.jl index 3edad47a55..a1b6e63a84 100644 --- a/src/schemes/structure/total_lagrangian_sph/rhs.jl +++ b/src/schemes/structure/total_lagrangian_sph/rhs.jl @@ -26,7 +26,8 @@ end points=each_integrated_particle(system)) do particle, neighbor, initial_pos_diff, initial_distance - # Only consider particles with a distance > 0. See `src/general/smoothing_kernels.jl` for more details. + # Only consider particles with a distance > 0. + # See `src/general/smoothing_kernels.jl` for more details. initial_distance^2 < eps(initial_smoothing_length(system)^2) && return rho_a = @inbounds system.material_density[particle] @@ -38,22 +39,24 @@ end m_a = @inbounds system.mass[particle] m_b = @inbounds system.mass[neighbor] + # PK1 / rho^2 + pk1_rho2_a = @inbounds pk1_rho2(system, particle) + pk1_rho2_b = @inbounds pk1_rho2(system, neighbor) + current_pos_diff = @inbounds current_coords(system, particle) - current_coords(system, neighbor) - current_distance = norm(current_pos_diff) + current_distance = sqrt(dot(current_pos_diff, current_pos_diff)) - dv_stress = m_b * - (pk1_corrected(system, particle) / rho_a^2 + - pk1_corrected(system, neighbor) / rho_b^2) * grad_kernel + dv_stress = m_b * (pk1_rho2_a + pk1_rho2_b) * grad_kernel - dv_penalty_force_ = dv_penalty_force(penalty_force, particle, neighbor, - initial_pos_diff, initial_distance, - current_pos_diff, current_distance, - system, m_a, m_b, rho_a, rho_b) + dv_penalty_force_ = @inbounds dv_penalty_force(penalty_force, particle, neighbor, + initial_pos_diff, initial_distance, + current_pos_diff, current_distance, + system, m_a, m_b, rho_a, rho_b) - dv_viscosity = dv_viscosity_tlsph(system, v_system, particle, neighbor, - current_pos_diff, current_distance, - m_a, m_b, rho_a, rho_b, grad_kernel) + dv_viscosity = @inbounds dv_viscosity_tlsph(system, v_system, particle, neighbor, + current_pos_diff, current_distance, + m_a, m_b, rho_a, rho_b, grad_kernel) dv_particle = dv_stress + dv_penalty_force_ + dv_viscosity diff --git a/src/schemes/structure/total_lagrangian_sph/system.jl b/src/schemes/structure/total_lagrangian_sph/system.jl index 4bcf567874..83fcfac1ae 100644 --- a/src/schemes/structure/total_lagrangian_sph/system.jl +++ b/src/schemes/structure/total_lagrangian_sph/system.jl @@ -68,7 +68,7 @@ struct TotalLagrangianSPHSystem{BM, NDIMS, ELTYPE <: Real, IC, ARRAY1D, ARRAY2D, current_coordinates :: ARRAY2D # Array{ELTYPE, 2}: [dimension, particle] mass :: ARRAY1D # Array{ELTYPE, 1}: [particle] correction_matrix :: ARRAY3D # Array{ELTYPE, 3}: [i, j, particle] - pk1_corrected :: ARRAY3D # Array{ELTYPE, 3}: [i, j, particle] + pk1_rho2 :: ARRAY3D # PK1 corrected divided by rho^2: [i, j, particle] deformation_grad :: ARRAY3D # Array{ELTYPE, 3}: [i, j, particle] material_density :: ARRAY1D # Array{ELTYPE, 1}: [particle] n_integrated_particles :: Int64 @@ -115,7 +115,7 @@ function TotalLagrangianSPHSystem(initial_condition, smoothing_kernel, smoothing mass = copy(initial_condition.mass) material_density = copy(initial_condition.density) correction_matrix = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles) - pk1_corrected = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles) + pk1_rho2 = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles) deformation_grad = Array{ELTYPE, 3}(undef, NDIMS, NDIMS, n_particles) n_integrated_particles = n_particles - n_clamped_particles @@ -132,7 +132,7 @@ function TotalLagrangianSPHSystem(initial_condition, smoothing_kernel, smoothing return TotalLagrangianSPHSystem(initial_condition, initial_coordinates, current_coordinates, mass, correction_matrix, - pk1_corrected, deformation_grad, material_density, + pk1_rho2, deformation_grad, material_density, n_integrated_particles, young_modulus, poisson_ratio, lame_lambda, lame_mu, smoothing_kernel, smoothing_length, acceleration_, boundary_model, @@ -171,14 +171,14 @@ end return system.current_coordinates end -@inline function current_coords(system::TotalLagrangianSPHSystem, particle) +@propagate_inbounds function current_coords(system::TotalLagrangianSPHSystem, particle) # For this system, the current coordinates are stored in the system directly, # so we don't need a `u` array. This function is only to be used in this file # when no `u` is available. current_coords(nothing, system, particle) end -@inline function current_velocity(v, system::TotalLagrangianSPHSystem, particle) +@propagate_inbounds function current_velocity(v, system::TotalLagrangianSPHSystem, particle) if particle <= system.n_integrated_particles return extract_svector(v, system, particle) end @@ -204,58 +204,58 @@ end error("`current_velocity(v, system)` is not implemented for `TotalLagrangianSPHSystem`") end -@inline function viscous_velocity(v, system::TotalLagrangianSPHSystem, particle) +@propagate_inbounds function viscous_velocity(v, system::TotalLagrangianSPHSystem, particle) return extract_svector(system.boundary_model.cache.wall_velocity, system, particle) end -@inline function current_density(v, system::TotalLagrangianSPHSystem) +@propagate_inbounds function current_density(v, system::TotalLagrangianSPHSystem) return current_density(v, system.boundary_model, system) end # In fluid-structure interaction, use the "hydrodynamic pressure" of the structure particles # corresponding to the chosen boundary model. -@inline function current_pressure(v, system::TotalLagrangianSPHSystem) +@propagate_inbounds function current_pressure(v, system::TotalLagrangianSPHSystem) return current_pressure(v, system.boundary_model, system) end -@inline function hydrodynamic_mass(system::TotalLagrangianSPHSystem, particle) +@propagate_inbounds function hydrodynamic_mass(system::TotalLagrangianSPHSystem, particle) return system.boundary_model.hydrodynamic_mass[particle] end -@inline function correction_matrix(system, particle) +@propagate_inbounds function correction_matrix(system, particle) extract_smatrix(system.correction_matrix, system, particle) end -@inline function deformation_gradient(system, particle) +@propagate_inbounds function deformation_gradient(system, particle) extract_smatrix(system.deformation_grad, system, particle) end -@inline function pk1_corrected(system, particle) - extract_smatrix(system.pk1_corrected, system, particle) +@propagate_inbounds function pk1_rho2(system, particle) + extract_smatrix(system.pk1_rho2, system, particle) end -function young_modulus(system::TotalLagrangianSPHSystem, particle) +@propagate_inbounds function young_modulus(system::TotalLagrangianSPHSystem, particle) return young_modulus(system, system.young_modulus, particle) end -function young_modulus(::TotalLagrangianSPHSystem, young_modulus, particle) +@inline function young_modulus(::TotalLagrangianSPHSystem, young_modulus, particle) return young_modulus end -function young_modulus(::TotalLagrangianSPHSystem, - young_modulus::AbstractVector, particle) +@propagate_inbounds function young_modulus(::TotalLagrangianSPHSystem, + young_modulus::AbstractVector, particle) return young_modulus[particle] end -function poisson_ratio(system::TotalLagrangianSPHSystem, particle) +@propagate_inbounds function poisson_ratio(system::TotalLagrangianSPHSystem, particle) return poisson_ratio(system, system.poisson_ratio, particle) end -function poisson_ratio(::TotalLagrangianSPHSystem, poisson_ratio, particle) +@inline function poisson_ratio(::TotalLagrangianSPHSystem, poisson_ratio, particle) return poisson_ratio end -function poisson_ratio(::TotalLagrangianSPHSystem, - poisson_ratio::AbstractVector, particle) +@inline function poisson_ratio(::TotalLagrangianSPHSystem, + poisson_ratio::AbstractVector, particle) return poisson_ratio[particle] end @@ -316,16 +316,18 @@ function update_boundary_interpolation!(system::TotalLagrangianSPHSystem, v, u, end @inline function compute_pk1_corrected!(system, semi) - (; deformation_grad) = system + (; deformation_grad, pk1_rho2, material_density) = system calc_deformation_grad!(deformation_grad, system, semi) @threaded semi for particle in eachparticle(system) pk1_particle = pk1_stress_tensor(system, particle) pk1_particle_corrected = pk1_particle * correction_matrix(system, particle) + rho2_inv = 1 / material_density[particle]^2 - @inbounds for j in 1:ndims(system), i in 1:ndims(system) - system.pk1_corrected[i, j, particle] = pk1_particle_corrected[i, j] + for j in 1:ndims(system), i in 1:ndims(system) + # Precompute PK1 / rho^2 to avoid repeated divisions in the interaction loop + @inbounds pk1_rho2[i, j, particle] = pk1_particle_corrected[i, j] * rho2_inv end end end @@ -446,7 +448,7 @@ end # The von-Mises stress is one form of equivalent stress, where sigma is the deviatoric stress. # See pages 32 and 123. function von_mises_stress(system) - von_mises_stress_vector = zeros(eltype(system.pk1_corrected), nparticles(system)) + von_mises_stress_vector = zeros(eltype(system.pk1_rho2), nparticles(system)) @threaded default_backend(von_mises_stress_vector) for particle in each_integrated_particle(system) @@ -463,7 +465,7 @@ end @inline function von_mises_stress(system, particle::Integer) F = deformation_gradient(system, particle) J = det(F) - P = pk1_corrected(system, particle) + P = pk1_rho2(system, particle) * system.material_density[particle]^2 sigma = (1.0 / J) * P * F' # Calculate deviatoric stress tensor @@ -479,14 +481,14 @@ end function cauchy_stress(system::TotalLagrangianSPHSystem) NDIMS = ndims(system) - cauchy_stress_tensors = zeros(eltype(system.pk1_corrected), NDIMS, NDIMS, + cauchy_stress_tensors = zeros(eltype(system.pk1_rho2), NDIMS, NDIMS, nparticles(system)) @threaded default_backend(cauchy_stress_tensors) for particle in each_integrated_particle(system) F = deformation_gradient(system, particle) J = det(F) - P = pk1_corrected(system, particle) + P = pk1_rho2(system, particle) * system.material_density[particle]^2 sigma = (1.0 / J) * P * F' cauchy_stress_tensors[:, :, particle] = sigma end @@ -507,7 +509,7 @@ end end function system_data(system::TotalLagrangianSPHSystem, dv_ode, du_ode, v_ode, u_ode, semi) - (; mass, material_density, deformation_grad, pk1_corrected, young_modulus, + (; mass, material_density, deformation_grad, young_modulus, poisson_ratio, lame_lambda, lame_mu) = system dv = wrap_v(dv_ode, system, semi) @@ -518,6 +520,8 @@ function system_data(system::TotalLagrangianSPHSystem, dv_ode, du_ode, v_ode, u_ initial_coordinates_ = initial_coordinates(system) velocity = [current_velocity(v, system, particle) for particle in eachparticle(system)] acceleration = system_data_acceleration(dv, system, system.clamped_particles_motion) + pk1_corrected = [pk1_rho2(system, particle) * system.material_density[particle]^2 + for particle in eachparticle(system)] return (; coordinates, initial_coordinates=initial_coordinates_, velocity, mass, material_density, deformation_grad, pk1_corrected, young_modulus, poisson_ratio, diff --git a/test/schemes/structure/total_lagrangian_sph/rhs.jl b/test/schemes/structure/total_lagrangian_sph/rhs.jl index 555338187e..47f6467e8d 100644 --- a/test/schemes/structure/total_lagrangian_sph/rhs.jl +++ b/test/schemes/structure/total_lagrangian_sph/rhs.jl @@ -41,13 +41,18 @@ initial_coordinates[:, neighbor[i]] = initial_coordinates_neighbor[i] current_coordinates = zeros(2, 10) v_system = zeros(2, 10) - pk1_corrected = 2000 * ones(2, 2, 10) # Just something that's not zero to catch errors - pk1_corrected[:, :, particle[i]] = pk1_particle_corrected[i] - pk1_corrected[:, :, neighbor[i]] = pk1_neighbor_corrected[i] # Density equals the ID of the particle material_density = 1:10 + pk1_rho2 = 2000 * ones(2, 2, 10) # Just something that's not zero to catch errors + pk1_rho2[:, :, + particle[i]] = pk1_particle_corrected[i] / + material_density[particle[i]]^2 + pk1_rho2[:, :, + neighbor[i]] = pk1_neighbor_corrected[i] / + material_density[neighbor[i]]^2 + # Use the same setup as in the unit test above for calc_dv! mass = ones(Float64, 10) kernel_deriv = 1.0 @@ -67,8 +72,8 @@ return current_coordinates elseif f === :material_density return material_density - elseif f === :pk1_corrected - return pk1_corrected + elseif f === :pk1_rho2 + return pk1_rho2 elseif f === :mass return mass elseif f === :penalty_force diff --git a/test/systems/tlsph_system.jl b/test/systems/tlsph_system.jl index f8c587e3fa..370e0e187c 100644 --- a/test/systems/tlsph_system.jl +++ b/test/systems/tlsph_system.jl @@ -379,10 +379,11 @@ system = TotalLagrangianSPHSystem(initial_condition, smoothing_kernel, smoothing_length, E, nu) - # Initialize deformation_grad and pk1_corrected with arbitrary values + # Initialize deformation_grad and pk1_rho2 with arbitrary values for particle in TrixiParticles.eachparticle(system) system.deformation_grad[:, :, particle] = [1.0 0.2; 0.2 1.0] - system.pk1_corrected[:, :, particle] = [1.0 0.5; 0.5 1.0] + system.pk1_rho2[:, :, + particle] = [1.0 0.5; 0.5 1.0] / material_densities[particle]^2 end von_mises_stress = TrixiParticles.von_mises_stress(system) From dd28efcc43e11f503b1692a5ff990977d4d9fd77 Mon Sep 17 00:00:00 2001 From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:13:36 +0100 Subject: [PATCH 2/2] Undo varargs change --- src/general/abstract_system.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/general/abstract_system.jl b/src/general/abstract_system.jl index 91fb139470..f854d80d6b 100644 --- a/src/general/abstract_system.jl +++ b/src/general/abstract_system.jl @@ -53,17 +53,17 @@ end initialize!(system, semi) = system # This should not be dispatched by system type. We always expect to get a column of `A`. -@propagate_inbounds function extract_svector(A, system, i...) - extract_svector(A, Val(ndims(system)), i...) +@propagate_inbounds function extract_svector(A, system, i) + extract_svector(A, Val(ndims(system)), i) end # Return the `i`-th column of the array `A` as an `SVector`. -@inline function extract_svector(A, ::Val{NDIMS}, i...) where {NDIMS} +@inline function extract_svector(A, ::Val{NDIMS}, i) where {NDIMS} # Explicit bounds check, which can be removed by calling this function with `@inbounds` - @boundscheck checkbounds(A, NDIMS, i...) + @boundscheck checkbounds(A, NDIMS, i) # Assume inbounds access now - return SVector(ntuple(@inline(dim->@inbounds A[dim, i...]), NDIMS)) + return SVector(ntuple(@inline(dim->@inbounds A[dim, i]), NDIMS)) end # Return `A[:, :, i]` as an `SMatrix`.