Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Unreleased
- Fix a GPU validation error / panic on simulations with more than ~4.19M particles, caused by
compute kernels dispatching more than 65535 workgroups along a single dimension. The affected
kernels now clamp the dispatch and grid-stride over the particles.

# v0.2.0 (27 Oct. 2025)
- Add support for dynamic particle insertion.
- Add support for specializing the particle update logic using slang’s link-time specializaiton feature.
Expand Down
15 changes: 9 additions & 6 deletions shaders/slosh/grid/sort.slang
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ func touch_particle_blocks(
StructuredBuffer<Position> particles_pos,
ConstantBuffer<uint> particles_len,
) {
let id = invocation_id.x;
if (id < particles_len) {
let _gs_n = particles_len;
let _gs_total = min((_gs_n + (GRID_WORKGROUP_SIZE - 1u)) / GRID_WORKGROUP_SIZE, 65535u) * GRID_WORKGROUP_SIZE;
for (var id = invocation_id.x; id < _gs_n; id += _gs_total) {
let cell_width = grid[0].cell_width;
let particle = particles_pos[id];
var blocks = blocks_associated_to_point(cell_width, particle.pt);
Expand Down Expand Up @@ -101,8 +102,9 @@ func update_block_particle_count(
ConstantBuffer<uint> particles_len,
RWStructuredBuffer<AtomicActiveBlockHeader> active_blocks,
) {
let id = invocation_id.x;
if (id < particles_len) {
let _gs_n = particles_len;
let _gs_total = min((_gs_n + (GRID_WORKGROUP_SIZE - 1u)) / GRID_WORKGROUP_SIZE, 65535u) * GRID_WORKGROUP_SIZE;
for (var id = invocation_id.x; id < _gs_n; id += _gs_total) {
let cell_width = grid[0].cell_width;
let particle = particles_pos[id];

Expand Down Expand Up @@ -168,8 +170,9 @@ func finalize_particles_sort(
RWStructuredBuffer<AtomicActiveBlockHeader> active_blocks,

) {
let id = invocation_id.x;
if (id < particles_len) {
let _gs_n = particles_len;
let _gs_total = min((_gs_n + (GRID_WORKGROUP_SIZE - 1u)) / GRID_WORKGROUP_SIZE, 65535u) * GRID_WORKGROUP_SIZE;
for (var id = invocation_id.x; id < _gs_n; id += _gs_total) {
let cell_width = grid[0].cell_width;
let particle = particles_pos[id];

Expand Down
9 changes: 4 additions & 5 deletions shaders/slosh/solver/particle_update.slang
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,9 @@ func particle_update(
RWStructuredBuffer<ParticleProperties> particles_props,
ConstantBuffer<uint> particles_len,
) {
let particle_id = invocation_id.x;

if (particle_id >= particles_len) {
return;
}
let _gs_n = particles_len;
let _gs_total = min((_gs_n + 63u) / 64u, 65535u) * 64u;
for (var particle_id = invocation_id.x; particle_id < _gs_n; particle_id += _gs_total) {

let model = ParticleModel();
let flags = model.model_flags(particles_model, particle_id);
Expand Down Expand Up @@ -161,4 +159,5 @@ func particle_update(

particles_kin[particle_id] = kin;
particles_def_grad[particle_id] = def_grad;
}
}
11 changes: 5 additions & 6 deletions shaders/slosh/solver/timestep_bound.slang
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,12 @@ func estimate_timestep_bound(
ConstantBuffer<uint> particles_len,
RWStructuredBuffer<GpuTimestepBounds> result,
) {
let particle_id = invocation_id.x;

if (particle_id >= particles_len) {
return;
}
let _gs_n = particles_len;
let _gs_total = min((_gs_n + (WORKGROUP_SIZE - 1u)) / WORKGROUP_SIZE, 65535u) * WORKGROUP_SIZE;
for (var particle_id = invocation_id.x; particle_id < _gs_n; particle_id += _gs_total) {

if (particles_kin[particle_id].enabled == 0) {
return;
continue;
}

let cell_width = grid[0].cell_width;
Expand Down Expand Up @@ -91,4 +89,5 @@ func estimate_timestep_bound(

let candidate = GpuTimestepBounds::secs_to_int(dt);
result[0].computed_max_dt_as_uint.min(candidate);
}
}
12 changes: 6 additions & 6 deletions src/grid/grid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,11 @@ impl<B: Backend> WgGrid<B> {
self.reset_hmap
.launch(backend, pass, &args, [grid.cpu_meta.hmap_capacity, 1, 1])?;

sort_module.touch_particle_blocks.launch(
sort_module.touch_particle_blocks.launch_capped(
backend,
pass,
&args,
[particles.len() as u32, 1, 1],
particles.len() as u32,
)?;

// // Ensure blocks exist wherever we have rigid particles that might affect
Expand Down Expand Up @@ -151,11 +151,11 @@ impl<B: Backend> WgGrid<B> {
self.init_indirect_workgroups
.launch_grid(backend, pass, &args, [1, 1, 1])?;

sort_module.update_block_particle_count.launch(
sort_module.update_block_particle_count.launch_capped(
backend,
pass,
&args,
[particles.len() as u32, 1, 1],
particles.len() as u32,
)?;

sort_module
Expand All @@ -175,11 +175,11 @@ impl<B: Backend> WgGrid<B> {
&args,
grid.indirect_n_g2p_p2g_groups.buffer(),
)?;
sort_module.finalize_particles_sort.launch(
sort_module.finalize_particles_sort.launch_capped(
backend,
pass,
&args,
[particles.len() as u32, 1, 1],
particles.len() as u32,
)?;

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion src/solver/particle_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,6 @@ impl<B: Backend> WgParticleUpdate<B> {
particles_len: particles.gpu_len(),
};
self.particle_update
.launch(backend, pass, &args, [particles.len() as u32, 1, 1])
.launch_capped(backend, pass, &args, particles.len() as u32)
}
}
2 changes: 1 addition & 1 deletion src/solver/timestep_bound.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,6 @@ impl<B: Backend> WgTimestepBounds<B> {
self.reset_timestep_bound
.launch(backend, pass, &args, [1; 3])?;
self.estimate_timestep_bound
.launch(backend, pass, &args, [particles.len() as u32, 1, 1])
.launch_capped(backend, pass, &args, particles.len() as u32)
}
}
Loading