From dd0140bd4f237c3f1728ab87fe7eed9df49a8d3a Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 08:11:44 -0700 Subject: [PATCH 1/2] geotiff: golden corpus phase 3 PR 3, GPU backend (#1930) Mirrors phase 3 PR 1 (#2036) and PR 2 (#2038) but reads each fixture through ``open_geotiff(str(path), gpu=True, on_gpu_failure='strict')``, returning a CuPy-backed DataArray. The oracle's ``_candidate_pixels`` pulls the device array back to host via ``.get()`` before comparing. The whole module ``pytest.importorskip``s cupy and skips if no CUDA device is reachable, so CI matrices without a GPU collect zero tests here. The strict on-gpu-failure mode is on so a silent CPU fallback surfaces as an exception rather than masking GPU coverage. 23 fixtures pass, 7 xfailed (shared codec/attrs gaps), 2 skipped (MinIsWhite + the example_ manifest entry). ``_GPU_SKIPS`` is reserved for GPU-specific gaps and is empty for now. A ``test_gpu_candidate_is_actually_on_device`` belt-and-braces check confirms the result is a ``cupy.ndarray`` rather than a CPU-fallback numpy array. --- .../tests/test_golden_corpus_gpu_1930.py | 197 ++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py diff --git a/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py new file mode 100644 index 000000000..e1d281a48 --- /dev/null +++ b/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py @@ -0,0 +1,197 @@ +"""GPU (cupy) backend cells against the golden-corpus oracle (issue #1930). + +Phase 3 PR 3 of the corpus plan. Mirrors the eager and dask parity layers +but reads each fixture through ``open_geotiff(str(path), gpu=True)``, +returning a CuPy-backed DataArray. The oracle's ``_candidate_pixels`` +pulls the CuPy array back to host via ``.get()`` before comparing, so +the comparison machinery is unchanged. + +The whole module ``pytest.importorskip``s cupy and skips if no CUDA +device is reachable. CI matrices without CUDA collect zero tests from +this module; runs with a GPU exercise every fixture the eager and dask +backends already do. + +The shared codec / attrs parity gaps (``_PARITY_GAPS``) carry over from +the eager and dask modules verbatim: they live in the decode / attrs +layer that all three backends share. ``_GPU_SKIPS`` is reserved for +GPU-only gaps (e.g. a codec the GPU pipeline can't decode and that the +strict-mode would normally fall back to CPU for). It starts empty; add +entries only when a fixture is GPU-specific. + +The GPU read is configured with ``on_gpu_failure='strict'`` so a codec +that would silently CPU-fall-back instead surfaces as an xfail / fail +the corpus can act on. +""" +from __future__ import annotations + +import pathlib + +import pytest + +pytest.importorskip("yaml") +pytest.importorskip("rasterio") +cupy = pytest.importorskip("cupy") + +try: + if cupy.cuda.runtime.getDeviceCount() < 1: + pytest.skip( + "no CUDA device available", allow_module_level=True + ) +except Exception as exc: # pragma: no cover - CI without CUDA + pytest.skip( + f"cupy is importable but CUDA is not usable: {exc}", + allow_module_level=True, + ) + +from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 +from xrspatial.geotiff.tests.golden_corpus._oracle import ( # noqa: E402 + compare_to_oracle, +) + + +FIXTURES_DIR = ( + pathlib.Path(generate.__file__).resolve().parent / "fixtures" +) + + +_NODATA_MASKING_REASON = ( + "integer nodata masking: xrspatial masks sentinel pixels to NaN and " + "upcasts to float64 per #1988 (attrs['masked_nodata']=True). The oracle " + "compares raw integer pixels; needs an oracle extension that consults " + "attrs['masked_nodata']." +) + +_PARITY_GAPS: dict[str, str] = { + "compression_jpeg_uint8_ycbcr": ( + "RGB band axis order divergence: rasterio reads (bands, y, x) while " + "xrspatial reads (y, x, band). The oracle does not yet normalise " + "multi-band axis order." + ), + "crs_citation_only": ( + "citation-only CRS: xrspatial decodes the citation into deprecated " + "attrs['geog_citation'] but does not emit a canonical attrs['crs'] " + "or attrs['crs_wkt']. Real parity gap; needs a fix in _crs.py." + ), + "nodata_int_sentinel_uint16": _NODATA_MASKING_REASON, + "stripped_le_uint16": _NODATA_MASKING_REASON, + "stripped_be_uint16": _NODATA_MASKING_REASON, + "tiled_le_uint16": _NODATA_MASKING_REASON, + "tiled_be_uint16": _NODATA_MASKING_REASON, +} + +# GPU-only gaps go here. Empty in the first pass; add entries when a +# fixture is GPU-specific (i.e. eager and dask pass, GPU does not). +_GPU_SKIPS: dict[str, str] = {} + +_INTENTIONAL_SKIPS: dict[str, str] = { + "nodata_miniswhite_uint8": ( + "MinIsWhite photometric inversion: xrspatial inverts pixels per " + "#1797; rasterio leaves them raw. Covered by " + "test_miniswhite_backend_parity_1797.py." + ), +} + + +def _resolved_fixtures() -> list[dict]: + manifest = generate.load_manifest() + entries = generate.validate(manifest) + entries.sort(key=lambda e: e["id"]) + return entries + + +def _fixture_path(entry: dict) -> pathlib.Path: + return FIXTURES_DIR / f"{entry['id']}.tif" + + +def _is_lossy(entry: dict) -> bool: + tol = entry.get("tolerance") or {} + return bool(tol.get("lossy", False)) + + +def _build_param(entry: dict) -> pytest.param: + fid = entry["id"] + if fid in _PARITY_GAPS: + return pytest.param( + entry, + id=fid, + marks=pytest.mark.xfail(reason=_PARITY_GAPS[fid], strict=True), + ) + if fid in _GPU_SKIPS: + return pytest.param( + entry, + id=fid, + marks=pytest.mark.xfail(reason=_GPU_SKIPS[fid], strict=True), + ) + if fid in _INTENTIONAL_SKIPS: + return pytest.param( + entry, + id=fid, + marks=pytest.mark.skip(reason=_INTENTIONAL_SKIPS[fid]), + ) + return pytest.param(entry, id=fid) + + +_FIXTURES = _resolved_fixtures() +_PARAMS = [_build_param(e) for e in _FIXTURES] + + +@pytest.mark.parametrize("manifest_entry", _PARAMS) +def test_gpu_parity(manifest_entry: dict) -> None: + """``open_geotiff(path, gpu=True)`` agrees with the rasterio oracle. + + The GPU path uses nvCOMP for supported codecs and falls back to CPU + otherwise. ``on_gpu_failure='strict'`` is set so a silent CPU + fallback surfaces as an exception rather than masking GPU coverage. + """ + fixture_id = manifest_entry["id"] + path = _fixture_path(manifest_entry) + if not path.exists(): + pytest.skip( + f"fixture {fixture_id!r} has no .tif on disk; run " + f"`python -m xrspatial.geotiff.tests.golden_corpus.generate` " + f"to materialise the full corpus" + ) + candidate = open_geotiff( + str(path), gpu=True, on_gpu_failure="strict" + ) + compare_to_oracle(path, candidate, lossy=_is_lossy(manifest_entry)) + + +def test_taxonomy_ids_are_in_manifest() -> None: + """Every id in the parity-gap, GPU-skip, or intentional-skip tables + must exist in the manifest. + """ + manifest_ids = {e["id"] for e in _FIXTURES} + tagged = set(_PARITY_GAPS) | set(_GPU_SKIPS) | set(_INTENTIONAL_SKIPS) + stale = tagged - manifest_ids + assert not stale, ( + f"taxonomy references unknown fixture ids: {sorted(stale)}" + ) + + +def test_gpu_candidate_is_actually_on_device() -> None: + """Sanity check: the GPU backend returns a CuPy-backed array. + + Catches the failure mode where ``gpu=True`` silently CPU-falls-back + and ``open_geotiff`` returns a numpy array instead. ``strict`` mode + should already raise rather than fall back, but this is a belt-and- + braces check against a regression in the fallback policy. + """ + plain_fixtures = [ + e for e in _FIXTURES + if e["id"] not in _PARITY_GAPS + and e["id"] not in _GPU_SKIPS + and e["id"] not in _INTENTIONAL_SKIPS + and _fixture_path(e).exists() + ] + if not plain_fixtures: + pytest.skip("no eligible fixtures on disk") + entry = plain_fixtures[0] + da = open_geotiff( + str(_fixture_path(entry)), gpu=True, on_gpu_failure="strict" + ) + assert isinstance(da.data, cupy.ndarray), ( + f"expected a cupy.ndarray for {entry['id']!r}, " + f"got {type(da.data).__name__}" + ) From 86951c62d07e8359285d7898482e37d180c84814 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 08:55:17 -0700 Subject: [PATCH 2/2] geotiff: move JPEG cell to GPU-only skip table (#1930) Address phase 3 PR 3 review: ``compression_jpeg_uint8_ycbcr`` lived in ``_PARITY_GAPS`` with the reason "RGB band axis order divergence", which describes the failure mode on the eager and dask backends. On the GPU path with ``on_gpu_failure='strict'`` the JPEG-YCbCr decoder raises ``OSError: broken data stream`` before the oracle gets to compare anything, so the recorded reason no longer matches what actually happens. Move the entry to ``_GPU_SKIPS`` with a GPU-accurate reason. The xfail mechanics are unchanged; this is documentation that now matches reality. Update the module docstring to match. --- .../tests/test_golden_corpus_gpu_1930.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py index e1d281a48..1c061b8aa 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py @@ -11,12 +11,13 @@ this module; runs with a GPU exercise every fixture the eager and dask backends already do. -The shared codec / attrs parity gaps (``_PARITY_GAPS``) carry over from -the eager and dask modules verbatim: they live in the decode / attrs -layer that all three backends share. ``_GPU_SKIPS`` is reserved for -GPU-only gaps (e.g. a codec the GPU pipeline can't decode and that the -strict-mode would normally fall back to CPU for). It starts empty; add -entries only when a fixture is GPU-specific. +``_PARITY_GAPS`` carries over the codec / attrs gaps that all three +backends share (citation CRS, integer nodata masking). ``_GPU_SKIPS`` +holds GPU-only failures, currently the JPEG-YCbCr fixture: the GPU +decoder does not handle it and ``on_gpu_failure='strict'`` raises +rather than falling back, so the read fails before the oracle can +compare. On eager / dask the same fixture exposes the RGB axis-order +divergence; on GPU strict mode it never gets that far. The GPU read is configured with ``on_gpu_failure='strict'`` so a codec that would silently CPU-fall-back instead surfaces as an xfail / fail @@ -63,11 +64,6 @@ ) _PARITY_GAPS: dict[str, str] = { - "compression_jpeg_uint8_ycbcr": ( - "RGB band axis order divergence: rasterio reads (bands, y, x) while " - "xrspatial reads (y, x, band). The oracle does not yet normalise " - "multi-band axis order." - ), "crs_citation_only": ( "citation-only CRS: xrspatial decodes the citation into deprecated " "attrs['geog_citation'] but does not emit a canonical attrs['crs'] " @@ -80,9 +76,19 @@ "tiled_be_uint16": _NODATA_MASKING_REASON, } -# GPU-only gaps go here. Empty in the first pass; add entries when a -# fixture is GPU-specific (i.e. eager and dask pass, GPU does not). -_GPU_SKIPS: dict[str, str] = {} +# GPU-only gaps. Failures here are GPU-specific (the eager and dask +# backends decode the same fixture cleanly). +_GPU_SKIPS: dict[str, str] = { + "compression_jpeg_uint8_ycbcr": ( + "JPEG-YCbCr decode is not implemented on the GPU read path. " + "With on_gpu_failure='strict' the read raises rather than " + "CPU-falling-back, so the test fails before reaching the " + "oracle. On the eager and dask backends this fixture exposes " + "the RGB band axis order divergence (rasterio is (bands, y, " + "x), xrspatial is (y, x, band)); on the GPU backend that " + "comparison never runs." + ), +} _INTENTIONAL_SKIPS: dict[str, str] = { "nodata_miniswhite_uint8": (