From 118ee92756d354f2f96a214acd86ef63c8322a7e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 20 May 2026 07:57:52 -0700 Subject: [PATCH 1/2] reproject: validate source coords are monotonic and regularly spaced (#2184) `_source_bounds()` infers pixel resolution from only the first two samples on each axis, but downstream pixel math uses that single value for the whole grid. Irregular or non-monotonic coords silently produced wrong georeferencing. Add `_validate_source_coords()` at the top of `reproject()` and `merge()` (before CRS resolution or grid math). For each spatial axis it checks: - All values are finite. - Steps are strictly monotonic (all ascending or all descending). - |diff - median(diff)| <= 1e-6 * |median(diff)| for every step. On failure it raises `ValueError` naming the axis, the median step, the worst deviation, the offending index, and the two coord values at that index. Sub-ULP drift from real-world GeoTIFFs still passes. Tests cover regular ascending/descending coords, sub-ULP drift, single- pixel rasters, single-sample perturbation in x and y, non-monotonic coords, repeated values, NaN in coords, validation order vs CRS detection, merge() per-raster reporting, and dask / cupy / dask+cupy backends. Closes #2184 --- xrspatial/reproject/__init__.py | 92 ++++++++++ xrspatial/tests/test_reproject.py | 296 ++++++++++++++++++++++++++++++ 2 files changed, 388 insertions(+) diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index cfab24881..683b243a3 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -88,6 +88,86 @@ def _find_spatial_dims(raster): return dims[-2], dims[-1] +# Default tolerance for the regular-spacing check. Coordinates loaded from +# real GeoTIFFs can drift a few ULPs from perfectly uniform after pixel-to- +# world transforms, so 1e-6 (relative) is loose enough to accept those while +# still catching the single-pixel perturbation case in #2184. +_REGULAR_COORD_RTOL = 1e-6 + + +def _validate_regular_axis(coords, axis_name, func_name, rtol=_REGULAR_COORD_RTOL): + """Validate that 1-D coordinate array is strictly monotonic and regular. + + Pixel-resolution math in `_source_bounds` and the chunk workers assumes + a uniform grid. Without this check, irregular or non-monotonic coords + silently produce wrong georeferencing (see #2184). + + Parameters + ---------- + coords : array-like + 1-D coordinate values along one axis. + axis_name : str + Name of the axis ("x" or "y") for the error message. + func_name : str + Calling function name for the error prefix. + rtol : float + Relative tolerance for spacing regularity. + + Raises + ------ + ValueError + If coords contain non-finite values, are not strictly monotonic, + or have spacing that varies by more than ``rtol`` relative to the + median step. + """ + arr = np.asarray(coords) + if arr.ndim != 1: + raise ValueError( + f"{func_name}(): coordinate '{axis_name}' must be 1-D, " + f"got shape {arr.shape}." + ) + if arr.size < 2: + # A single-pixel raster has no spacing to validate; the caller + # will fall back to res=1.0 in _source_bounds, which is fine. + return + if not np.all(np.isfinite(arr)): + raise ValueError( + f"{func_name}(): coordinate '{axis_name}' contains non-finite " + f"values (NaN or inf)." + ) + diffs = np.diff(arr.astype(np.float64)) + # Strict monotonicity: every step has the same sign and is non-zero. + if not (np.all(diffs > 0) or np.all(diffs < 0)): + raise ValueError( + f"{func_name}(): coordinate '{axis_name}' must be strictly " + f"monotonic (all ascending or all descending). The reproject " + f"pipeline assumes a uniformly-spaced grid; see #2184." + ) + median_step = float(np.median(diffs)) + abs_med = abs(median_step) + deviation = np.abs(diffs - median_step) + worst = float(np.max(deviation)) + if worst > rtol * abs_med: + # Report the index of the worst step in the original coords so the + # caller can locate the offending sample without re-running diff. + worst_idx = int(np.argmax(deviation)) + raise ValueError( + f"{func_name}(): coordinate '{axis_name}' is not regularly " + f"spaced. Median step is {median_step!r}; worst deviation is " + f"{worst!r} at index {worst_idx} (between {axis_name}[{worst_idx}]" + f"={float(arr[worst_idx])!r} and {axis_name}[{worst_idx + 1}]" + f"={float(arr[worst_idx + 1])!r}). The reproject pipeline " + f"assumes a uniformly-spaced grid; see #2184." + ) + + +def _validate_source_coords(raster, func_name): + """Validate both spatial axes of a raster before any reproject work.""" + ydim, xdim = _find_spatial_dims(raster) + _validate_regular_axis(raster.coords[ydim].values, 'y', func_name) + _validate_regular_axis(raster.coords[xdim].values, 'x', func_name) + + def _source_bounds(raster): """Extract (left, bottom, right, top) from a DataArray's coordinates.""" ydim, xdim = _find_spatial_dims(raster) @@ -631,6 +711,13 @@ def reproject( _validate_raster(raster, func_name='reproject', name='raster', ndim=(2, 3)) + # Reject irregular / non-monotonic source coords before any CRS + # resolution or grid math. _source_bounds() infers pixel size from + # only the first two coord samples and downstream pixel math assumes + # uniform spacing, so an unchecked irregular input would silently + # produce wrong georeferencing (#2184). + _validate_source_coords(raster, 'reproject') + _validate_grid_params( resolution=resolution, bounds=bounds, @@ -1747,6 +1834,11 @@ def merge( # (#2027). Reject 3-D up front so callers get a clear error. _validate_raster(r, func_name='merge', name=f'rasters[{i}]', ndim=(2,)) + # Each input must have strictly monotonic, regularly spaced spatial + # coords. _source_bounds() infers pixel size from only the first + # two samples, so irregular inputs would otherwise silently produce + # wrong georeferencing (#2184). + _validate_source_coords(r, f'merge[rasters[{i}]]') _validate_grid_params( resolution=resolution, diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index 6edd7012f..20d36a20f 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -1852,6 +1852,302 @@ def test_merge_transform_precision_threaded_to_chunks(self): np.testing.assert_allclose(v0[valid], v16[valid], rtol=1e-10) +# ===================================================================== +# Issue #2184: irregular / non-monotonic source coords are rejected +# ===================================================================== + + +def _regular_raster(h=8, w=8): + """Strictly regular raster used as the baseline in coord-validation tests.""" + return _gradient_raster(h=h, w=w) + + +class TestValidateSourceCoords: + """reproject() and merge() reject irregular / non-monotonic source coords.""" + + # ------------------------------------------------------------------ + # Positive cases: well-formed inputs pass through. + # ------------------------------------------------------------------ + + def test_reproject_accepts_regular_descending_y(self): + from xrspatial.reproject import reproject + # _gradient_raster builds y descending (north-up), x ascending. + out = reproject(_regular_raster(), 'EPSG:4326', resolution=1.0) + assert out.shape[0] > 0 and out.shape[1] > 0 + + def test_reproject_accepts_regular_ascending_y(self): + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(-5, 5, h) # ascending + x = np.linspace(-5, 5, w) + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + out = reproject(raster, 'EPSG:4326', resolution=1.0) + assert out.shape[0] > 0 and out.shape[1] > 0 + + def test_reproject_accepts_tiny_floating_drift(self): + """Coords from real-world GeoTIFFs drift a few ULPs; that must pass.""" + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(5, -5, h) + x = np.linspace(-5, 5, w) + # Inject sub-ULP-scale drift well below the 1e-6 relative tolerance. + rng = np.random.default_rng(0) + x = x + rng.uniform(-1e-10, 1e-10, size=w) + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + out = reproject(raster, 'EPSG:4326', resolution=1.0) + assert out.shape[0] > 0 and out.shape[1] > 0 + + def test_reproject_accepts_single_pixel_raster(self): + """Single-pixel rasters have no spacing to validate.""" + from xrspatial.reproject import reproject + raster = xr.DataArray( + np.zeros((1, 1), dtype=np.float64), + dims=('y', 'x'), + coords={'y': [0.0], 'x': [0.0]}, + attrs={'crs': 'EPSG:4326'}, + ) + # Should not raise; output grid math falls back to res=1.0. + out = reproject(raster, 'EPSG:4326', resolution=1.0) + assert out.size >= 1 + + # ------------------------------------------------------------------ + # Irregular spacing. + # ------------------------------------------------------------------ + + def test_reproject_rejects_irregular_x(self): + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(5, -5, h) + x = np.linspace(-5, 5, w) + x[4] += 0.1 # perturb one sample + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"coordinate 'x' is not regularly"): + reproject(raster, 'EPSG:3857') + + def test_reproject_rejects_irregular_y(self): + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(5, -5, h) + y[3] += 0.05 + x = np.linspace(-5, 5, w) + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"coordinate 'y' is not regularly"): + reproject(raster, 'EPSG:3857') + + def test_reproject_irregular_error_names_index(self): + """The error message points at the offending sample index.""" + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(5, -5, h) + x = np.linspace(-5, 5, w) + x[5] += 0.2 + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError) as exc: + reproject(raster, 'EPSG:3857') + msg = str(exc.value) + # Step index 4 (x[4]->x[5]) or 5 (x[5]->x[6]) is the worst, + # both touch the perturbed sample. + assert "at index 4" in msg or "at index 5" in msg + assert "Median step" in msg + + # ------------------------------------------------------------------ + # Non-monotonic coords. + # ------------------------------------------------------------------ + + def test_reproject_rejects_non_monotonic_x(self): + from xrspatial.reproject import reproject + h, w = 4, 4 + y = np.linspace(5, -5, h) + x = np.array([0.0, 1.0, 0.5, 2.0]) + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"coordinate 'x' must be strictly"): + reproject(raster, 'EPSG:3857') + + def test_reproject_rejects_non_monotonic_y(self): + from xrspatial.reproject import reproject + h, w = 4, 4 + y = np.array([0.0, 1.0, 0.5, 2.0]) + x = np.linspace(-5, 5, w) + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"coordinate 'y' must be strictly"): + reproject(raster, 'EPSG:3857') + + def test_reproject_rejects_repeated_coord(self): + """Repeated values break strict monotonicity (zero step).""" + from xrspatial.reproject import reproject + h, w = 4, 4 + y = np.linspace(5, -5, h) + x = np.array([0.0, 1.0, 1.0, 2.0]) + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"strictly"): + reproject(raster, 'EPSG:3857') + + def test_reproject_rejects_nan_in_coord(self): + from xrspatial.reproject import reproject + h, w = 4, 4 + y = np.linspace(5, -5, h) + x = np.array([0.0, 1.0, np.nan, 3.0]) + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"non-finite"): + reproject(raster, 'EPSG:3857') + + # ------------------------------------------------------------------ + # Validation runs before expensive work. + # ------------------------------------------------------------------ + + def test_reproject_rejects_irregular_before_crs_resolution(self): + """Bad coords must be caught even when source_crs is unresolvable. + + If validation ran after CRS resolution, an irregular raster with no + CRS attribute would raise the "Could not detect source CRS" error + first, hiding the real defect. + """ + from xrspatial.reproject import reproject + h, w = 4, 4 + y = np.linspace(5, -5, h) + x = np.array([0.0, 1.0, 1.5, 2.0]) # irregular + raster = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + # NB: no crs attr -- detection would normally raise here. + ) + with pytest.raises(ValueError, match=r"not regularly"): + reproject(raster, 'EPSG:3857') + + # ------------------------------------------------------------------ + # merge() applies the same checks. + # ------------------------------------------------------------------ + + def test_merge_rejects_irregular_x(self): + from xrspatial.reproject import merge + good = _regular_raster() + h, w = 8, 8 + y = np.linspace(5, -5, h) + x = np.linspace(-5, 5, w) + x[4] += 0.1 + bad = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"rasters\[1\].*coordinate 'x'"): + merge([good, bad], resolution=1.0) + + def test_merge_rejects_non_monotonic_y(self): + from xrspatial.reproject import merge + h, w = 4, 4 + y = np.array([0.0, 1.0, 0.5, 2.0]) + x = np.linspace(-5, 5, w) + bad = xr.DataArray( + np.zeros((h, w), dtype=np.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"coordinate 'y' must be strictly"): + merge([bad], resolution=1.0) + + # ------------------------------------------------------------------ + # Backends: validation fires identically regardless of array type. + # ------------------------------------------------------------------ + + @pytest.mark.skipif(not HAS_DASK, reason="dask not installed") + def test_reproject_rejects_irregular_dask(self): + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(5, -5, h) + x = np.linspace(-5, 5, w) + x[4] += 0.1 + raster = xr.DataArray( + da.zeros((h, w), dtype=np.float64, chunks=(4, 4)), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"not regularly"): + reproject(raster, 'EPSG:3857') + + @pytest.mark.skipif(not HAS_CUPY, reason="cupy not installed") + def test_reproject_rejects_irregular_cupy(self): + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(5, -5, h) + x = np.linspace(-5, 5, w) + x[4] += 0.1 + raster = xr.DataArray( + cp.zeros((h, w), dtype=cp.float64), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"not regularly"): + reproject(raster, 'EPSG:3857') + + @pytest.mark.skipif(not (HAS_DASK and HAS_CUPY), + reason="dask and cupy required") + def test_reproject_rejects_irregular_dask_cupy(self): + from xrspatial.reproject import reproject + h, w = 8, 8 + y = np.linspace(5, -5, h) + x = np.linspace(-5, 5, w) + x[4] += 0.1 + raster = xr.DataArray( + da.from_array(cp.zeros((h, w), dtype=cp.float64), chunks=(4, 4)), + dims=('y', 'x'), + coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match=r"not regularly"): + reproject(raster, 'EPSG:3857') + + # ===================================================================== # Issue #1435: NaN/Inf rejection in scalar inputs # ===================================================================== From 15205c74e38e4f3d3012ab2d1fa9f5747e9b217e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 20 May 2026 08:09:23 -0700 Subject: [PATCH 2/2] Address review nits: document zero-step trap, tighten regex, drop copy (#2184) - Explain why the strict-monotonicity check uses two separate comparisons instead of a single sign test, so a future refactor does not accidentally accept zero steps from repeated coords. - Switch `arr.astype(np.float64)` to `np.asarray(arr, dtype=np.float64)` to skip the copy when coords are already float64. - Tighten `test_reproject_rejects_repeated_coord` to match the full "must be strictly monotonic" phrase instead of just "strictly". --- xrspatial/reproject/__init__.py | 9 ++++++++- xrspatial/tests/test_reproject.py | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index 683b243a3..d23e0d03c 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -135,8 +135,15 @@ def _validate_regular_axis(coords, axis_name, func_name, rtol=_REGULAR_COORD_RTO f"{func_name}(): coordinate '{axis_name}' contains non-finite " f"values (NaN or inf)." ) - diffs = np.diff(arr.astype(np.float64)) + # np.asarray skips the copy when arr is already float64; np.diff promotes + # ints to int64, which is fine but we want float steps for the median / + # tolerance math below. + diffs = np.diff(np.asarray(arr, dtype=np.float64)) # Strict monotonicity: every step has the same sign and is non-zero. + # `diffs > 0` AND `diffs < 0` are both False for zero steps (repeated + # coords), so the combined check rejects them. Do NOT replace this with + # a sign-only test like `np.all(np.sign(diffs) == np.sign(diffs[0]))` -- + # that variant accepts zero steps and lets a repeated coord through. if not (np.all(diffs > 0) or np.all(diffs < 0)): raise ValueError( f"{func_name}(): coordinate '{axis_name}' must be strictly " diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index 20d36a20f..5361f3948 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -2019,7 +2019,8 @@ def test_reproject_rejects_repeated_coord(self): coords={'y': y, 'x': x}, attrs={'crs': 'EPSG:4326'}, ) - with pytest.raises(ValueError, match=r"strictly"): + with pytest.raises(ValueError, + match=r"coordinate 'x' must be strictly monotonic"): reproject(raster, 'EPSG:3857') def test_reproject_rejects_nan_in_coord(self):