diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index 615fc9398..7abec7c04 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-13,1812,HIGH,3,"Re-audit 2026-05-13 (af588e03 worktree, branch deep-sweep-metadata-geotiff-2026-05-13-af588e03): 4-backend (numpy/cupy/dask+numpy/dask+cupy) parity verified for full reads, windowed reads, multi-band, miniswhite, nodata, extra_tags, gdal_metadata, x/y_resolution, image_description. All three writers (eager to_geotiff, dask streaming, write_geotiff_gpu) preserve rich tags through round-trip. VRT read attrs agree across open_geotiff / read_geotiff_dask / read_vrt (all 3 drop x/y_resolution -- VRT XML format limitation, LOW). NEW HIGH finding #1812: to_geotiff silently corrupted data when 3D dims[0] was not in _BAND_DIM_NAMES (e.g. dims=('time','y','x')); writer skipped moveaxis and treated leading axis as y -- round-trip produced shape (2,4,5) instead of (4,5,2) with swapped per-slice contents. Fix raises ValueError at all 3 writer entry points with an actionable message (transpose to (y,x,band) or rename to band/bands/channel). Existing LOW (int+nodata dtype dask always-promotes-to-float64 while eager/gpu keep int when sentinel absent) is documented and intentional per #1597." +geotiff,2026-05-15,1909,HIGH,4;5,"Re-audit 2026-05-15 (agent-a55b69cec1ef2a092 worktree, branch deep-sweep-metadata-geotiff-2026-05-15). 4-backend (numpy/cupy/dask+numpy/dask+cupy) parity reverified after the #1813 modular refactor: full reads, windowed reads, multi-band, band=N selection, no-georef integer pixel coords, crs/crs_wkt/transform/nodata/x_resolution/y_resolution/resolution_unit/image_description/gdal_metadata all agree across backends. DataArray .name and dims agree (y, x for 2D; y, x, band for 3D). NEW HIGH finding #1909: GDS chunked GPU path (_read_geotiff_gpu_chunked_gds) declared the dask graph dtype as float64 when source had an in-range integer nodata sentinel, matching the CPU dask path's #1597 contract, but the per-chunk _chunk_task did not cast its returned cupy array to declared_dtype -- chunks with no sentinel hit returned the raw uint16/int16 source dtype, producing a silent declared/actual dtype mismatch. Fix mirrors the #1597 + #1624 CPU dask pattern: compute declared_dtype before defining _chunk_task, cast inside the task only when arr.dtype != declared_dtype to skip the no-op astype(copy=True). 6 regression tests added in test_chunked_gpu_declared_dtype_1909.py covering declared vs computed parity, CPU/GPU dask declared-dtype agreement, eager paths preserve source dtype, no-nodata round-trip, explicit dtype= kwarg, and sentinel-hit float64 promotion. Pre-existing test failures in test_predictor2_big_endian_gpu_1517.py and test_size_param_validation_gpu_vrt_1776.py exist on main (read_to_array AttributeError after #1813 refactor, tile_size=4 rejected by stricter _validate_tile_size_arg) and are unrelated to this audit." reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch. diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 0d51dcf2e..849323562 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -1032,6 +1032,17 @@ def _read_geotiff_gpu_chunked_gds(source, ifd, geo_info, header, *, masked_fill = (_resolve_masked_fill(ifd.nodata_str, file_dtype) if compression == COMPRESSION_LERC else None) + # Determine declared dtype for the dask graph. Nodata masking + # promotes integer rasters to float64; mirror the CPU dask path. + declared_dtype = file_dtype + if nodata is not None and file_dtype.kind in ('u', 'i'): + if np.isfinite(nodata) and float(nodata).is_integer(): + info = np.iinfo(file_dtype) + if info.min <= int(nodata) <= info.max: + declared_dtype = np.dtype('float64') + if dtype is not None: + declared_dtype = np.dtype(dtype) + @dask.delayed def _chunk_task(meta, r0, c0, r1, c1): all_offsets, all_byte_counts = meta @@ -1050,19 +1061,23 @@ def _chunk_task(meta, r0, c0, r1, c1): arr = arr.astype(target) if band is not None and arr.ndim == 3: arr = arr[:, :, band] + # Cast to the declared graph dtype so every chunk matches the + # dask array's promised dtype. ``_apply_nodata_mask_gpu`` only + # promotes integer arrays to float64 when at least one sentinel + # pixel hits, mirroring the eager GPU semantics; the dask graph + # always-promotes (matching the CPU dask path's #1597 contract), + # so chunks with no sentinel hit need an explicit cast here. + # Without this, a uint16 file + declared nodata sentinel + # produces a graph that advertises float64 but emits uint16 + # buffers from any chunk that didn't hit the sentinel -- a + # silent declared/actual dtype mismatch (issue #1909). Skip + # the cast when the dtype already matches to avoid the + # ``astype(copy=True)`` per-chunk allocation that #1624 fixed + # for the CPU dask path. + if arr.dtype != declared_dtype: + arr = arr.astype(declared_dtype) return arr - # Determine declared dtype for the dask graph. Nodata masking - # promotes integer rasters to float64; mirror the CPU dask path. - declared_dtype = file_dtype - if nodata is not None and file_dtype.kind in ('u', 'i'): - if np.isfinite(nodata) and float(nodata).is_integer(): - info = np.iinfo(file_dtype) - if info.min <= int(nodata) <= info.max: - declared_dtype = np.dtype('float64') - if dtype is not None: - declared_dtype = np.dtype(dtype) - out_has_band_axis = band is None and n_bands_out > 0 blocks_rows = [] diff --git a/xrspatial/geotiff/tests/test_chunked_gpu_declared_dtype_1909.py b/xrspatial/geotiff/tests/test_chunked_gpu_declared_dtype_1909.py new file mode 100644 index 000000000..da6ef86a3 --- /dev/null +++ b/xrspatial/geotiff/tests/test_chunked_gpu_declared_dtype_1909.py @@ -0,0 +1,204 @@ +"""Regression test for issue #1909. + +The GDS chunked GPU read path (``_read_geotiff_gpu_chunked_gds``) +declares ``float64`` on the dask graph whenever the source file has an +integer nodata sentinel that round-trips through the source dtype, but +each chunk task returned the raw source dtype if no pixel hit the +sentinel. The result was a silent declared/actual dtype mismatch: the +dask array advertised float64 while ``.compute()`` produced uint16 +buffers from chunks where the sentinel didn't appear. + +The fix casts each chunk to the declared dtype before returning, gated +on ``arr.dtype != declared_dtype`` so the no-op case skips the +``astype(copy=True)`` allocation (same #1624 optimisation as the CPU +dask path). + +These tests drive ``_read_geotiff_gpu_chunked_gds`` directly because +``read_geotiff_gpu(chunks=...)`` only routes to the GDS path when +``_gds_chunk_path_available`` qualifies (tiled file, kvikio present, +etc.); otherwise it falls back to CPU dask + upload, which already +has the correct dtype contract via #1597 and would not exercise the +#1909 fix. The mmap fallback inside ``gpu_decode_tiles_from_file`` +keeps the test runnable without KvikIO on CI. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +def _parse_for_gds(path: str): + """Return ``(ifd, geo_info, header)`` for the GDS entry point.""" + from xrspatial.geotiff._reader import _FileSource + from xrspatial.geotiff._header import ( + parse_header, parse_all_ifds, select_overview_ifd, + ) + from xrspatial.geotiff._geotags import ( + extract_geo_info_with_overview_inheritance, + ) + + fs = _FileSource(path) + try: + raw = fs.read_all() + finally: + fs.close() + header = parse_header(raw) + ifds = parse_all_ifds(raw, header) + ifd = select_overview_ifd(ifds, None) + geo_info = extract_geo_info_with_overview_inheritance( + ifd, ifds, raw, header.byte_order, + ) + return ifd, geo_info, header + + +@pytest.fixture +def uint16_no_sentinel_path(tmp_path): + """A tiled uint16 GeoTIFF with declared nodata=9999 and no sentinel pixels.""" + from xrspatial.geotiff import to_geotiff + + arr = (np.arange(64, dtype=np.uint16) + 100).reshape(8, 8) + path = tmp_path / "uint16_no_sentinel.tif" + to_geotiff(arr, str(path), compression="none", nodata=9999, tile_size=16) + return str(path) + + +@pytest.fixture +def uint16_sentinel_hit_path(tmp_path): + """A tiled uint16 GeoTIFF whose first chunk hits the nodata sentinel.""" + from xrspatial.geotiff import to_geotiff + + arr = (np.arange(64, dtype=np.uint16) + 100).reshape(8, 8) + arr[0, 0] = 9999 + path = tmp_path / "uint16_sentinel_hit.tif" + to_geotiff(arr, str(path), compression="none", nodata=9999, tile_size=16) + return str(path) + + +@pytest.fixture +def uint16_no_nodata_path(tmp_path): + """A tiled uint16 GeoTIFF with no declared nodata.""" + from xrspatial.geotiff import to_geotiff + + arr = (np.arange(64, dtype=np.uint16) + 100).reshape(8, 8) + path = tmp_path / "uint16_no_nodata.tif" + to_geotiff(arr, str(path), compression="none", tile_size=16) + return str(path) + + +@_gpu_only +def test_chunked_gpu_declared_dtype_matches_computed(uint16_no_sentinel_path): + """Declared dask graph dtype must equal the computed chunk dtype.""" + from xrspatial.geotiff._backends.gpu import _read_geotiff_gpu_chunked_gds + + ifd, geo_info, header = _parse_for_gds(uint16_no_sentinel_path) + da = _read_geotiff_gpu_chunked_gds( + uint16_no_sentinel_path, ifd, geo_info, header, + dtype=None, chunks=4, window=None, band=None, + name=None, max_pixels=None, + ) + declared = da.data.dtype + computed = da.data.compute() + assert declared == np.float64 + assert declared == computed.dtype, ( + f"GDS chunked path declared {declared} but computed {computed.dtype}; " + f"this is the issue #1909 silent declared/actual dtype mismatch." + ) + + +@_gpu_only +def test_chunked_gpu_dtype_matches_cpu_dask(uint16_no_sentinel_path): + """The dask+cupy declared dtype must match the dask+numpy path.""" + from xrspatial.geotiff import read_geotiff_dask + from xrspatial.geotiff._backends.gpu import _read_geotiff_gpu_chunked_gds + + cpu = read_geotiff_dask(uint16_no_sentinel_path, chunks=4) + ifd, geo_info, header = _parse_for_gds(uint16_no_sentinel_path) + gpu = _read_geotiff_gpu_chunked_gds( + uint16_no_sentinel_path, ifd, geo_info, header, + dtype=None, chunks=4, window=None, band=None, + name=None, max_pixels=None, + ) + assert cpu.data.dtype == gpu.data.dtype, ( + f"CPU dask declared {cpu.data.dtype} but GPU dask declared " + f"{gpu.data.dtype}; backends must agree on graph dtype." + ) + + +@_gpu_only +def test_chunked_gpu_no_nodata_keeps_source_dtype(uint16_no_nodata_path): + """No nodata declared => declared dtype stays at source dtype.""" + from xrspatial.geotiff._backends.gpu import _read_geotiff_gpu_chunked_gds + + ifd, geo_info, header = _parse_for_gds(uint16_no_nodata_path) + da = _read_geotiff_gpu_chunked_gds( + uint16_no_nodata_path, ifd, geo_info, header, + dtype=None, chunks=4, window=None, band=None, + name=None, max_pixels=None, + ) + assert da.data.dtype == np.uint16 + assert da.data.compute().dtype == np.uint16 + + +@_gpu_only +def test_chunked_gpu_explicit_dtype_kwarg_threads_through(uint16_no_sentinel_path): + """Explicit dtype= overrides nodata promotion and chunks land in it.""" + from xrspatial.geotiff._backends.gpu import _read_geotiff_gpu_chunked_gds + + ifd, geo_info, header = _parse_for_gds(uint16_no_sentinel_path) + da = _read_geotiff_gpu_chunked_gds( + uint16_no_sentinel_path, ifd, geo_info, header, + dtype="float32", chunks=4, window=None, band=None, + name=None, max_pixels=None, + ) + assert da.data.dtype == np.float32 + assert da.data.compute().dtype == np.float32 + + +@_gpu_only +def test_chunked_gpu_sentinel_hit_still_promotes(uint16_sentinel_hit_path): + """A chunk that hits the sentinel still NaN-masks and lands in float64.""" + from xrspatial.geotiff._backends.gpu import _read_geotiff_gpu_chunked_gds + + ifd, geo_info, header = _parse_for_gds(uint16_sentinel_hit_path) + da = _read_geotiff_gpu_chunked_gds( + uint16_sentinel_hit_path, ifd, geo_info, header, + dtype=None, chunks=4, window=None, band=None, + name=None, max_pixels=None, + ) + assert da.data.dtype == np.float64 + computed = da.data.compute() + assert computed.dtype == np.float64 + # ``.get()`` brings the cupy array to host before NumPy's ``isnan``. + host = computed.get() + assert np.isnan(host[0, 0]) + + +def test_chunked_gpu_eager_paths_keep_source_dtype(uint16_no_sentinel_path): + """Eager paths (no chunks) keep the source uint16 dtype. + + Pinned to lock the contract: the eager paths only promote when an + actual sentinel pixel hits; the dask paths always promote when a + sentinel is declared. Both contracts are valid; the fix only ties + the GDS chunked path to the dask contract. CPU-only so this runs + in every CI configuration. + """ + from xrspatial.geotiff import open_geotiff + + np_da = open_geotiff(uint16_no_sentinel_path) + assert np_da.dtype == np.uint16