From 37e7e8083689eee91a59aec91cde189f29d3956c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Crozet?= Date: Wed, 17 Jun 2026 11:43:16 +0200 Subject: [PATCH 1/3] feat: p2g optimizations by making it independent from in-chunk particle density variations --- shaders/slosh/grid/grid.slang | 4 +- shaders/slosh/grid/sort.slang | 49 ++++++-- shaders/slosh/solver/p2g_scatter_style.slang | 117 +++++++++++++++++++ src/grid/grid.rs | 3 +- src/grid/sort.rs | 2 +- src/pipeline.rs | 15 ++- src/solver/mod.rs | 2 +- src/solver/p2g.rs | 44 +++++++ src/solver/particle.rs | 2 +- 9 files changed, 221 insertions(+), 17 deletions(-) create mode 100644 shaders/slosh/solver/p2g_scatter_style.slang diff --git a/shaders/slosh/grid/grid.slang b/shaders/slosh/grid/grid.slang index 3cbc51c..1790642 100644 --- a/shaders/slosh/grid/grid.slang +++ b/shaders/slosh/grid/grid.slang @@ -216,7 +216,8 @@ public static const int OFF_BY_ONE = 1; public struct ActiveBlockHeaderGeneric { public BlockVirtualId virtual_id; // Needed to compute the world-space position of a block. public uint first_particle; - public MaybeAtomicUint num_particles; + public MaybeAtomicUint num_particles_with_extras; // Total count of particles contributing to this block. + public MaybeAtomicUint num_particles; // Count of particles assigned to this block exclusively. } public typealias ActiveBlockHeader = ActiveBlockHeaderGeneric; @@ -339,6 +340,7 @@ public func mark_block_as_active( let block_header_id = grid[0].num_active_blocks.add(1u); active_blocks[block_header_id].virtual_id = block; active_blocks[block_header_id].first_particle = 0u; + active_blocks[block_header_id].num_particles_with_extras = 0u; active_blocks[block_header_id].num_particles = 0u; hmap_entries[slot].value = BlockHeaderId(block_header_id); } diff --git a/shaders/slosh/grid/sort.slang b/shaders/slosh/grid/sort.slang index b0383b5..24c5f84 100644 --- a/shaders/slosh/grid/sort.slang +++ b/shaders/slosh/grid/sort.slang @@ -105,9 +105,22 @@ func update_block_particle_count( if (id < particles_len) { let cell_width = grid[0].cell_width; let particle = particles_pos[id]; - let block_id = block_associated_to_point(cell_width, particle.pt); - let active_block_id = find_block_header_id(grid, hmap_entries, block_id); - active_blocks[active_block_id.id].num_particles.add(1u); + + var blocks = blocks_associated_to_point(cell_width, particle.pt); + let active_block_id_0 = find_block_header_id(grid, hmap_entries, blocks[0]); + active_blocks[active_block_id_0.id].num_particles.add(1u); + active_blocks[active_block_id_0.id].num_particles_with_extras.add(1u); + + let assoc = associated_cell_index_in_block_off_by_one(particle, cell_width); + let mask = uint3(assoc >= 2); + + for (var i = 1u; i < NUM_ASSOC_BLOCKS; i += 1u) { + let bshift = blocks[i].id - blocks[0].id; + if (all((bshift * mask) == bshift)) { + let active_block_id_i = find_block_header_id(grid, hmap_entries, blocks[i]); + active_blocks[active_block_id_i.id].num_particles_with_extras.add(1u); + } + } } } @@ -121,13 +134,13 @@ func copy_particles_len_to_scan_value( ) { let id = invocation_id.x; if (id < grid[0].num_active_blocks) { - scan_values[id] = active_blocks[id].num_particles; + scan_values[id] = active_blocks[id].num_particles_with_extras; } } [shader("compute")] [numthreads(GRID_WORKGROUP_SIZE, 1, 1)] -func copy_scan_values_to_first_particles( +func copy_scan_values_to_first_particles_and_prepare_for_finalize( uint3 invocation_id: SV_DispatchThreadID, StructuredBuffer grid, StructuredBuffer scan_values, @@ -136,6 +149,8 @@ func copy_scan_values_to_first_particles( let id = invocation_id.x; if (id < grid[0].num_active_blocks) { active_blocks[id].first_particle = scan_values[id]; + active_blocks[id].num_particles_with_extras = active_blocks[id].num_particles; + active_blocks[id].num_particles = 0u; } } @@ -147,26 +162,40 @@ func finalize_particles_sort( StructuredBuffer hmap_entries, StructuredBuffer particles_pos, ConstantBuffer particles_len, - RWStructuredBuffer> scan_values, RWStructuredBuffer nodes_linked_lists, RWStructuredBuffer particle_node_linked_lists, RWStructuredBuffer sorted_particle_ids, + RWStructuredBuffer active_blocks, ) { let id = invocation_id.x; if (id < particles_len) { let cell_width = grid[0].cell_width; let particle = particles_pos[id]; - let block_id = block_associated_to_point(cell_width, particle.pt); // Place the particle to its sorted place. - let active_block_id = find_block_header_id(grid, hmap_entries, block_id); - let target_index = scan_values[active_block_id.id].add(1u); + var blocks = blocks_associated_to_point(cell_width, particle.pt); + let active_block_id_0 = find_block_header_id(grid, hmap_entries, blocks[0]); + let target_index = active_blocks[active_block_id_0.id].first_particle + + active_blocks[active_block_id_0.id].num_particles.add(1u); sorted_particle_ids[target_index] = id; + let assoc = associated_cell_index_in_block_off_by_one(particle, cell_width); + let mask = uint3(assoc >= 2); + + for (var i = 1u; i < NUM_ASSOC_BLOCKS; i += 1u) { + let bshift = blocks[i].id - blocks[0].id; + if (all((bshift * mask) == bshift)) { + let active_block_id_i = find_block_header_id(grid, hmap_entries, blocks[i]); + let target_index = active_blocks[active_block_id_i.id].first_particle + + active_blocks[active_block_id_i.id].num_particles_with_extras.add(1u); + sorted_particle_ids[target_index] = id; + } + } + // Setup the per-node particle linked-list. let node_local_id = associated_cell_index_in_block_off_by_one(particle, cell_width); - let node_global_id = node_id(block_header_id_to_physical_id(active_block_id), node_local_id); + let node_global_id = node_id(block_header_id_to_physical_id(active_block_id_0), node_local_id); let prev_head = nodes_linked_lists[node_global_id.id].head.exchange(id); nodes_linked_lists[node_global_id.id].len.add(1u); particle_node_linked_lists[id] = prev_head; diff --git a/shaders/slosh/solver/p2g_scatter_style.slang b/shaders/slosh/solver/p2g_scatter_style.slang new file mode 100644 index 0000000..b94000e --- /dev/null +++ b/shaders/slosh/solver/p2g_scatter_style.slang @@ -0,0 +1,117 @@ +module p2g; + +import slosh.solver.params; +import slosh.solver.particle; +import slosh.solver.boundary_condition; +import slosh.grid.kernel; +import slosh.grid.grid; +import slosh.solver.rigid_impulses; +import slosh.rbd.dynamics.body; +import slosh.aliases; + +#if DIM == 2 +static const uint WORKGROUP_SIZE_X = 8; +static const uint WORKGROUP_SIZE_Y = 8; +static const uint WORKGROUP_SIZE_Z = 1; +#else +static const uint WORKGROUP_SIZE_X = 4; +static const uint WORKGROUP_SIZE_Y = 4; +static const uint WORKGROUP_SIZE_Z = 4; +#endif +static const uint WORKGROUP_SIZE = WORKGROUP_SIZE_X * WORKGROUP_SIZE_Y * WORKGROUP_SIZE_Z; + +// Staging buffers for one workgroup-sized chunk of particles. The chunk is loaded +// cooperatively (one particle per thread) and then read by every cell-thread, so +// each particle is fetched from global memory exactly once per block. +#if DIM == 2 +groupshared float2 shared_pos[WORKGROUP_SIZE]; +groupshared float2 shared_momentum[WORKGROUP_SIZE]; +groupshared float2x2 shared_affine[WORKGROUP_SIZE]; +#else +groupshared float3 shared_pos[WORKGROUP_SIZE]; +groupshared float3 shared_momentum[WORKGROUP_SIZE]; +groupshared float3x3 shared_affine[WORKGROUP_SIZE]; +#endif +groupshared float shared_mass[WORKGROUP_SIZE]; + +[shader("compute")] +[numthreads(WORKGROUP_SIZE, 1, 1)] +func p2g_scatter_style( + uint3 block_id: SV_GroupID, + uint tid: SV_GroupIndex, + StructuredBuffer grid, + StructuredBuffer active_blocks, + StructuredBuffer particles_pos, + StructuredBuffer particles_kin, + RWStructuredBuffer nodes, + StructuredBuffer sorted_particle_ids, +) { + let cell_width = grid[0].cell_width; + let inv_cell_width = 1.0 / cell_width; + let bid = block_id.x; + + let first_particle = active_blocks[bid].first_particle; + let num_particles = active_blocks[bid].num_particles_with_extras; + let last_particle = first_particle + num_particles; + let block_vid = active_blocks[bid].virtual_id.id; + + // Each thread owns one cell (grid node) of this block and accumulates the + // contribution of every particle assigned to the block into a register. + // This avoids both global atomics and the per-cell workgroup reduction that + // the previous implementation used (which serialized the whole workgroup with + // ~7 barriers per cell, i.e. hundreds of barriers per particle batch). +#if DIM == 2 + let local_cell = int2(int(tid % 8u), int(tid / 8u)); + let cell_pos = float2(block_vid * 8 + local_cell) * cell_width; + var acc = float3(0.0); +#else + let local_cell = int3(int(tid % 4u), int((tid / 4u) % 4u), int(tid / 16u)); + let cell_pos = float3(block_vid * 4 + local_cell) * cell_width; + var acc = float4(0.0); +#endif + + // Stream the block's particles through shared memory one chunk at a time. + for (var chunk_base = first_particle; chunk_base < last_particle; chunk_base += WORKGROUP_SIZE) { + // Wait for the previous chunk's readers before overwriting shared memory. + GroupMemoryBarrierWithGroupSync(); + + let load_idx = chunk_base + tid; + if (load_idx < last_particle) { + let pid = sorted_particle_ids[load_idx]; + let kin = particles_kin[pid]; + shared_pos[tid] = particles_pos[pid].pt; + shared_mass[tid] = kin.mass; + shared_affine[tid] = kin.affine; + shared_momentum[tid] = kin.velocity * kin.mass + kin.force_dt; + } + + GroupMemoryBarrierWithGroupSync(); + + // `chunk_len` is uniform across the workgroup, so the barriers above stay in + // uniform control flow regardless of the total particle count. + let chunk_len = min(WORKGROUP_SIZE, last_particle - chunk_base); + for (var p = 0u; p < chunk_len; p += 1u) { + let dpt = cell_pos - shared_pos[p]; +#if DIM == 2 + let weight = QuadraticKernel::eval(dpt.x * inv_cell_width) + * QuadraticKernel::eval(dpt.y * inv_cell_width); +#else + let weight = QuadraticKernel::eval(dpt.x * inv_cell_width) + * QuadraticKernel::eval(dpt.y * inv_cell_width) + * QuadraticKernel::eval(dpt.z * inv_cell_width); +#endif + // The quadratic kernel is exactly zero outside the 3-node support, which + // is the common case for the dense cell x particle cross product. Skipping + // the affine matrix-vector product there is the bulk of the saved work. + if (weight != 0.0) { + let momentum = mul(dpt, shared_affine[p]) + shared_momentum[p]; + acc += vector(momentum, shared_mass[p]) * weight; + } + } + } + + // Write the accumulated node state to global memory. Every cell is written once + // (no atomics, no inter-block races), zeroing cells that received no contribution. + let global_chunk_id = block_header_id_to_physical_id(BlockHeaderId(bid)); + nodes[global_chunk_id.id + tid].momentum_velocity_mass = acc; +} diff --git a/src/grid/grid.rs b/src/grid/grid.rs index 7bb1838..e1a271b 100644 --- a/src/grid/grid.rs +++ b/src/grid/grid.rs @@ -164,7 +164,7 @@ impl WgGrid { prefix_sum_module.launch(backend, pass, prefix_sum, &grid.scan_values)?; sort_module - .copy_scan_values_to_first_particles + .copy_scan_values_to_first_particles_and_prepare_for_finalize .launch_indirect(backend, pass, &args, grid.indirect_n_blocks_groups.buffer())?; // Reset here so the linked list heads get reset before `finalize_particles_sort` which @@ -263,6 +263,7 @@ impl Default for GpuGridHashMapEntry { pub struct GpuActiveBlockHeader { virtual_id: BlockVirtualId, first_particle: u32, + num_particles_with_extras: u32, num_particles: u32, } diff --git a/src/grid/sort.rs b/src/grid/sort.rs index 031018d..4ba5d69 100644 --- a/src/grid/sort.rs +++ b/src/grid/sort.rs @@ -20,7 +20,7 @@ pub struct WgSort { pub(crate) mark_rigid_particles_needing_block: GpuFunction, pub(crate) update_block_particle_count: GpuFunction, pub(crate) copy_particles_len_to_scan_value: GpuFunction, - pub(crate) copy_scan_values_to_first_particles: GpuFunction, + pub(crate) copy_scan_values_to_first_particles_and_prepare_for_finalize: GpuFunction, pub(crate) finalize_particles_sort: GpuFunction, pub(crate) sort_rigid_particles: GpuFunction, } diff --git a/src/pipeline.rs b/src/pipeline.rs index 63ed4d9..f58f951 100644 --- a/src/pipeline.rs +++ b/src/pipeline.rs @@ -13,7 +13,7 @@ use crate::solver::{ GpuBoundaryCondition, GpuImpulses, GpuMaterials, GpuParticleModelData, GpuParticles, GpuRigidParticles, GpuSimulationParams, GpuTimestepBounds, Particle, SimulationParams, WgG2P, WgG2PCdf, WgGridUpdate, WgGridUpdateCdf, WgP2G, WgP2GCdf, WgParticleUpdate, WgRigidImpulses, - WgRigidParticleUpdate, WgTimestepBounds, + WgRigidParticleUpdate, WgTimestepBounds, WgP2GScatterStyle }; use rapier::dynamics::RigidBodySet; use rapier::geometry::{ColliderHandle, ColliderSet}; @@ -44,6 +44,7 @@ pub struct MpmPipeline { prefix_sum: WgPrefixSum, sort: WgSort, p2g: WgP2G, + p2g_scatter_style: WgP2GScatterStyle, p2g_cdf: WgP2GCdf, grid_update_cdf: WgGridUpdateCdf, grid_update: WgGridUpdate, @@ -342,6 +343,7 @@ impl MpmPipeline { prefix_sum: WgPrefixSum::from_backend(backend, compiler)?, sort: WgSort::from_backend(backend, compiler)?, p2g: WgP2G::from_backend(backend, compiler)?, + p2g_scatter_style: WgP2GScatterStyle::from_backend(backend, compiler)?, p2g_cdf: WgP2GCdf::from_backend(backend, compiler)?, grid_update: WgGridUpdate::from_backend(backend, compiler)?, grid_update_cdf: WgGridUpdateCdf::from_backend(backend, compiler)?, @@ -479,7 +481,16 @@ impl MpmPipeline { { let mut pass = encoder.begin_pass("p2g", timestamps.as_deref_mut()); - self.p2g.launch( + // self.p2g.launch( + // backend, + // &mut pass, + // &data.grid, + // &data.particles, + // &data.impulses, + // &data.bodies, + // &data.body_materials, + // )?; + self.p2g_scatter_style.launch( backend, &mut pass, &data.grid, diff --git a/src/solver/mod.rs b/src/solver/mod.rs index b335205..41e99ef 100644 --- a/src/solver/mod.rs +++ b/src/solver/mod.rs @@ -35,7 +35,7 @@ pub use g2p::WgG2P; pub use g2p_cdf::WgG2PCdf; -pub use p2g::WgP2G; +pub use p2g::{WgP2G, WgP2GScatterStyle}; pub use p2g_cdf::WgP2GCdf; pub use params::{GpuSimulationParams, SimulationParams}; pub use particle::*; diff --git a/src/solver/p2g.rs b/src/solver/p2g.rs index 90a50ee..8332963 100644 --- a/src/solver/p2g.rs +++ b/src/solver/p2g.rs @@ -27,6 +27,13 @@ pub struct WgP2G { pub p2g: GpuFunction, } +#[derive(Shader)] +#[shader(module = "slosh::solver::p2g_scatter_style")] +pub struct WgP2GScatterStyle { + /// Compiled P2G compute shader. + pub p2g_scatter_style: GpuFunction, +} + #[derive(ShaderArgs)] struct P2GArgs<'a, B: Backend> { grid: &'a GpuScalar, @@ -37,6 +44,7 @@ struct P2GArgs<'a, B: Backend> { particles_pos: &'a GpuVector, particles_kin: &'a GpuVector, nodes: &'a GpuVector, + sorted_particle_ids: &'a GpuVector, body_vels: &'a GpuVector, body_impulses: &'a GpuVector, body_materials: &'a GpuVector, @@ -60,6 +68,7 @@ impl WgP2G { active_blocks: &grid.active_blocks, nodes: &grid.nodes, nodes_linked_lists: &grid.nodes_linked_lists, + sorted_particle_ids: particles.sorted_ids(), particles_pos: particles.positions(), particles_kin: &particles.kinematics, particle_node_linked_lists: particles.node_linked_lists(), @@ -75,3 +84,38 @@ impl WgP2G { ) } } + +impl WgP2GScatterStyle { + /// Launches the P2G kernel to transfer particle data to grid nodes. + pub fn launch( + &self, + backend: &B, + pass: &mut B::Pass, + grid: &GpuGrid, + particles: &GpuParticles, + impulses: &GpuImpulses, + bodies: &GpuBodySet, + body_materials: &GpuMaterials, + ) -> Result<(), B::Error> { + let args = P2GArgs { + grid: &grid.meta, + hmap_entries: &grid.hmap_entries, + active_blocks: &grid.active_blocks, + nodes: &grid.nodes, + nodes_linked_lists: &grid.nodes_linked_lists, + sorted_particle_ids: particles.sorted_ids(), + particles_pos: particles.positions(), + particles_kin: &particles.kinematics, + particle_node_linked_lists: particles.node_linked_lists(), + body_vels: bodies.vels(), + body_impulses: &impulses.incremental_impulses, + body_materials: &body_materials.materials, + }; + self.p2g_scatter_style.launch_indirect( + backend, + pass, + &args, + grid.indirect_n_g2p_p2g_groups.buffer(), + ) + } +} diff --git a/src/solver/particle.rs b/src/solver/particle.rs index 2dfe944..e71965f 100644 --- a/src/solver/particle.rs +++ b/src/solver/particle.rs @@ -493,7 +493,7 @@ impl GpuParticles { def_grad: GpuTensor::vector_encased(backend, &data.def_grad, resizeable)?, properties: GpuTensor::vector_encased(backend, &data.properties, resizeable)?, models: GpuTensor::vector(backend, &data.models, resizeable)?, - sorted_ids: GpuTensor::vector_uninit(backend, particles.len() as u32, resizeable)?, + sorted_ids: GpuTensor::vector_uninit(backend, particles.len() as u32 * 8, resizeable)?, node_linked_lists: GpuTensor::vector_uninit( backend, particles.len() as u32, From 1f5a560435d8b3e2e846b359fd6348c2550889c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Crozet?= Date: Wed, 17 Jun 2026 11:43:28 +0200 Subject: [PATCH 2/3] Release v0.6.1 --- crates/slosh2d/Cargo.toml | 2 +- crates/slosh3d/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/slosh2d/Cargo.toml b/crates/slosh2d/Cargo.toml index e89fa9e..d2aaf61 100644 --- a/crates/slosh2d/Cargo.toml +++ b/crates/slosh2d/Cargo.toml @@ -3,7 +3,7 @@ name = "slosh2d" authors = ["Sébastien Crozet "] description = "Cross-platform GPU 2D Material Point Method implementation." repository = "https://github.com/dimforge/slosh" -version = "0.6.0" +version = "0.6.1" edition = "2024" license = "Apache-2.0" diff --git a/crates/slosh3d/Cargo.toml b/crates/slosh3d/Cargo.toml index 4e33719..7135efb 100644 --- a/crates/slosh3d/Cargo.toml +++ b/crates/slosh3d/Cargo.toml @@ -3,7 +3,7 @@ name = "slosh3d" authors = ["Sébastien Crozet "] description = "Cross-platform GPU 3D Material Point Method implementation." repository = "https://github.com/dimforge/slosh" -version = "0.6.0" +version = "0.6.1" edition = "2024" license = "Apache-2.0" From ed8c498c37787b3a51d67a1677204d99778cd97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Crozet?= Date: Wed, 17 Jun 2026 11:52:48 +0200 Subject: [PATCH 3/3] clippy fixes --- src/grid/sort.rs | 3 +++ src/pipeline.rs | 12 +++++++++--- src/rbd/mod.rs | 5 ----- src/solver/particle.rs | 4 ++-- src_testbed/step.rs | 2 +- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/grid/sort.rs b/src/grid/sort.rs index 4ba5d69..eb9f955 100644 --- a/src/grid/sort.rs +++ b/src/grid/sort.rs @@ -16,7 +16,10 @@ use stensor::tensor::GpuScalar; #[shader(module = "slosh::grid::sort")] pub struct WgSort { pub(crate) touch_particle_blocks: GpuFunction, + // Bound to GPU kernels; currently only used by commented-out rigid-particle code paths. + #[allow(dead_code)] pub(crate) touch_rigid_particle_blocks: GpuFunction, + #[allow(dead_code)] pub(crate) mark_rigid_particles_needing_block: GpuFunction, pub(crate) update_block_particle_count: GpuFunction, pub(crate) copy_particles_len_to_scan_value: GpuFunction, diff --git a/src/pipeline.rs b/src/pipeline.rs index f58f951..c6c34b7 100644 --- a/src/pipeline.rs +++ b/src/pipeline.rs @@ -12,8 +12,8 @@ use crate::rbd::dynamics::body::{BodyCoupling, BodyCouplingEntry}; use crate::solver::{ GpuBoundaryCondition, GpuImpulses, GpuMaterials, GpuParticleModelData, GpuParticles, GpuRigidParticles, GpuSimulationParams, GpuTimestepBounds, Particle, SimulationParams, WgG2P, - WgG2PCdf, WgGridUpdate, WgGridUpdateCdf, WgP2G, WgP2GCdf, WgParticleUpdate, WgRigidImpulses, - WgRigidParticleUpdate, WgTimestepBounds, WgP2GScatterStyle + WgG2PCdf, WgGridUpdate, WgGridUpdateCdf, WgP2G, WgP2GCdf, WgP2GScatterStyle, WgParticleUpdate, + WgRigidImpulses, WgRigidParticleUpdate, WgTimestepBounds, }; use rapier::dynamics::RigidBodySet; use rapier::geometry::{ColliderHandle, ColliderSet}; @@ -43,14 +43,20 @@ pub struct MpmPipeline { grid: WgGrid, prefix_sum: WgPrefixSum, sort: WgSort, + // Kept for the alternative/CDF code paths that are currently commented out in `step`. + #[allow(dead_code)] p2g: WgP2G, p2g_scatter_style: WgP2GScatterStyle, + #[allow(dead_code)] p2g_cdf: WgP2GCdf, + #[allow(dead_code)] grid_update_cdf: WgGridUpdateCdf, grid_update: WgGridUpdate, particles_update: WgParticleUpdate, g2p: WgG2P, + #[allow(dead_code)] g2p_cdf: WgG2PCdf, + #[allow(dead_code)] rigid_particles_update: WgRigidParticleUpdate, /// Maximum timestep bound calculation. pub timestep_bounds: WgTimestepBounds, @@ -571,7 +577,7 @@ impl MpmPipeline { )?; { - let mut pass = encoder.begin_pass("integrate_bodies", timestamps.as_deref_mut()); + let mut pass = encoder.begin_pass("integrate_bodies", timestamps); // TODO: should this be in a separate pipeline? Within impulse probably? self.impulses.launch( backend, diff --git a/src/rbd/mod.rs b/src/rbd/mod.rs index 7f22ded..07c250c 100644 --- a/src/rbd/mod.rs +++ b/src/rbd/mod.rs @@ -1,8 +1,3 @@ -use slang_hal::re_exports::include_dir; - -#[cfg(feature = "runtime")] -use slang_hal::re_exports::minislang::SlangCompiler; - /// GPU-accelerated rigid body dynamics simulation. /// /// This module provides structures and methods for managing physics bodies diff --git a/src/solver/particle.rs b/src/solver/particle.rs index e71965f..81d8860 100644 --- a/src/solver/particle.rs +++ b/src/solver/particle.rs @@ -101,7 +101,7 @@ impl ParticleDynamics { } /// Extracts the kinematic state for GPU upload. - fn to_kinematics(&self) -> Kinematics { + fn to_kinematics(self) -> Kinematics { Kinematics { affine: self.affine, velocity: self.velocity, @@ -113,7 +113,7 @@ impl ParticleDynamics { } /// Extracts the static properties for GPU upload. - fn to_properties(&self) -> ParticleProperties { + fn to_properties(self) -> ParticleProperties { ParticleProperties { init_volume: self.init_volume, init_radius: self.init_radius, diff --git a/src_testbed/step.rs b/src_testbed/step.rs index 9324e13..9f263f6 100644 --- a/src_testbed/step.rs +++ b/src_testbed/step.rs @@ -38,7 +38,7 @@ pub struct SimulationStepResult { /// Can be cast to the concrete model type using `bytemuck::cast_slice`. pub model_data_raw: Vec, /// Raw deformation gradient data read back from GPU, stored as f32 values. - /// Stride per particle is [`GPU_DEF_GRAD_STRIDE_F32`]: 4 in 2D (a + /// Stride per particle is `GPU_DEF_GRAD_STRIDE_F32`: 4 in 2D (a /// `mat2x2`), 12 in 3D (a `mat3x3` with `vec4`-aligned columns). /// In 3D only the first three entries of each column are meaningful; the /// fourth entry of each column is slang padding.