Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions xrspatial/rasterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,25 @@ def _should_write_last_gpu(is_first, new_idx, cur_idx):

_gpu_kernel_cache = {}

# Must stay in lock-step with the cuda.local.array(2048, ...) allocations in
# _scanline_fill_gpu. Raise both together if this is ever tuned.
_GPU_MAX_ISECT = 2048


def _check_gpu_edge_cap(row_ptr):
"""Raise if any raster row needs more active edges than the GPU kernel
can hold in its fixed-size local array. Without this guard the CUDA
scanline kernel would silently truncate and emit wrong pixels.
"""
max_edges_per_row = int(np.diff(row_ptr).max())
if max_edges_per_row > _GPU_MAX_ISECT:
raise ValueError(
f"rasterize CUDA kernel: {max_edges_per_row} active edges in a "
f"single row exceeds the limit of {_GPU_MAX_ISECT}. The kernel "
f"would silently drop excess edges and produce wrong output. "
f"Use the numpy/dask+numpy backend, or reduce polygon density."
)


def _ensure_gpu_kernels(merge_fn, should_write):
"""Compile CUDA kernels for the given merge + predicate pair and cache.
Expand Down Expand Up @@ -1480,17 +1499,10 @@ def _run_cupy(geometries, props_array, bounds, height, width, fill, dtype,
row_ptr, col_idx = _build_row_csr_numba(
edge_y_min, edge_y_max, height)

# Check for rows exceeding the GPU kernel's local array limit
_GPU_MAX_ISECT = 2048
max_edges_per_row = int(np.diff(row_ptr).max())
if max_edges_per_row > _GPU_MAX_ISECT:
warnings.warn(
f"rasterize CUDA kernel: {max_edges_per_row} active edges "
f"in a single row exceeds the limit of {_GPU_MAX_ISECT}. "
f"Excess edges will be silently dropped, causing incorrect "
f"results. Reduce polygon density or use the numpy backend.",
stacklevel=3,
)
# The CUDA scanline kernel allocates a fixed-size local array; rows
# exceeding _GPU_MAX_ISECT would have edges silently dropped and
# produce wrong output. Fail fast instead.
_check_gpu_edge_cap(row_ptr)

d_y_min = cupy.asarray(edge_y_min)
d_x_at_ymin = cupy.asarray(edge_x_at_ymin)
Expand Down Expand Up @@ -1924,6 +1936,9 @@ def _rasterize_tile_cupy(poly_wkb, poly_props_2d, poly_global_2d, tile_bounds,
edge_geom_id = edge_arrays[1:]
row_ptr, col_idx = _build_row_csr_numba(
edge_y_min, edge_y_max, tile_h)
# Same fail-fast as eager CuPy: per-tile truncation would be
# silent without this check.
_check_gpu_edge_cap(row_ptr)
tpb = 256
blocks = (tile_h + tpb - 1) // tpb
kernels['scanline_fill'][blocks, tpb](
Expand Down
26 changes: 26 additions & 0 deletions xrspatial/tests/test_rasterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,32 @@ def test_invalid_chunks(self, bad):
width=10, height=10, bounds=(0, 0, 1, 1),
chunks=bad)

def test_gpu_edge_cap_check_raises(self):
# Issue #2067: the CUDA scanline kernel allocates a fixed 2048-entry
# local array. Without a guard, exceeding it silently truncates
# active edges and produces wrong output. The validator runs on
# the host, so this is testable without a GPU.
from xrspatial.rasterize import _check_gpu_edge_cap, _GPU_MAX_ISECT
# row_ptr where row 0 has _GPU_MAX_ISECT + 1 edges
row_ptr = np.array([0, _GPU_MAX_ISECT + 1, _GPU_MAX_ISECT + 1],
dtype=np.int64)
with pytest.raises(ValueError, match="active edges"):
_check_gpu_edge_cap(row_ptr)

def test_gpu_edge_cap_check_passes_at_limit(self):
from xrspatial.rasterize import _check_gpu_edge_cap, _GPU_MAX_ISECT
row_ptr = np.array([0, _GPU_MAX_ISECT, _GPU_MAX_ISECT],
dtype=np.int64)
# exactly at the cap is permitted
_check_gpu_edge_cap(row_ptr)

def test_gpu_edge_cap_check_passes_below_limit(self):
from xrspatial.rasterize import _check_gpu_edge_cap, _GPU_MAX_ISECT
row_ptr = np.array([0, _GPU_MAX_ISECT - 1, _GPU_MAX_ISECT - 1],
dtype=np.int64)
# just under the cap, the common case
_check_gpu_edge_cap(row_ptr)

def test_empty_geometry_does_not_poison_inferred_bounds(self):
# Issue #2065: an empty geometry returns nan from .bounds; the
# caller used .min()/.max() unfiltered, so a single empty geom
Expand Down
Loading