From 4e992ed22579ad152f24d0f33edf9d0f569fa459 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 04:26:10 -0700 Subject: [PATCH 1/3] Add release-gate tests for stable GeoTIFF features (#2340) PR 6 of 6 in the GeoTIFF release contract epic. Adds a small, deterministic release-gate test per stable feature so a release engineer can run `pytest -m release_gate` and know the next release does not silently regress a stable promise. Coverage: - Local read: pixels, crs, transform, nodata round-trip. - Local write: pixels, crs, transform, nodata round-trip via the public to_geotiff API. - Stable lossless codecs (none, deflate, lzw, packbits, zstd) on uint16 and float32, plus a cross-file parity check against SUPPORTED_FEATURES. - COG write/read for every stable lossless codec. - Canonical attrs contract: canonical keys present, georef_status='full', contract version stamp shape, and a full write-read-write-read cycle. - Windowed reads: subset returned, crs preserved, transform origin shifts to the window, full-extent matches unwindowed. - Dask read parity: pixels and canonical attrs match eager, lazy reads stay dask-backed. Registers the `release_gate` pytest marker in setup.cfg. Tests run by default; release engineers can filter with `pytest -m release_gate`. --- setup.cfg | 1 + .../tests/test_release_gate_attrs_contract.py | 154 +++++++++++++++ .../geotiff/tests/test_release_gate_codecs.py | 134 +++++++++++++ .../geotiff/tests/test_release_gate_cog.py | 157 ++++++++++++++++ .../tests/test_release_gate_dask_parity.py | 146 +++++++++++++++ .../tests/test_release_gate_local_read.py | 177 ++++++++++++++++++ .../tests/test_release_gate_local_write.py | 156 +++++++++++++++ .../tests/test_release_gate_windowed_read.py | 162 ++++++++++++++++ 8 files changed, 1087 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_release_gate_attrs_contract.py create mode 100644 xrspatial/geotiff/tests/test_release_gate_codecs.py create mode 100644 xrspatial/geotiff/tests/test_release_gate_cog.py create mode 100644 xrspatial/geotiff/tests/test_release_gate_dask_parity.py create mode 100644 xrspatial/geotiff/tests/test_release_gate_local_read.py create mode 100644 xrspatial/geotiff/tests/test_release_gate_local_write.py create mode 100644 xrspatial/geotiff/tests/test_release_gate_windowed_read.py diff --git a/setup.cfg b/setup.cfg index dcfdbbfa..3b87b7bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -112,6 +112,7 @@ filterwarnings = ignore:'asyncio.AbstractEventLoopPolicy' is deprecated:DeprecationWarning:pytest_asyncio markers = slow: long-running test cell (typical: golden-corpus fixtures behind a heavy codec or large pixel count). PR CI can skip with `-m "not slow"`; nightly / release runs use no filter. See xrspatial/geotiff/tests/golden_corpus/_marks.py for the corpus-side helper. + release_gate: locks a single stable feature in the GeoTIFF release contract (epic #2340). Always runs by default in CI; the marker exists so release engineers can run only these gates with `pytest -m release_gate` before tagging a release. Tests in this marker should be small, deterministic, and fail loudly if the contract breaks. [isort] line_length = 100 diff --git a/xrspatial/geotiff/tests/test_release_gate_attrs_contract.py b/xrspatial/geotiff/tests/test_release_gate_attrs_contract.py new file mode 100644 index 00000000..c997ff9a --- /dev/null +++ b/xrspatial/geotiff/tests/test_release_gate_attrs_contract.py @@ -0,0 +1,154 @@ +"""Release gate: CRS / transform / nodata attrs contract (epic #2340). + +The canonical attrs after a GeoTIFF read are tagged ``stable`` in the +release gate checklist. The contract: every georeferenced read produces +a DataArray whose ``attrs`` carry, at minimum, ``crs``, ``crs_wkt``, +``transform``, ``georef_status``, the contract version stamp, and (when +declared) ``nodata``. These attrs survive a write -> read round trip. + +This file is the single-shot release gate. Deep canonicalisation, +alias handling, contract version bumps, and pass-through semantics are +each covered by their own ``test_attrs_contract_*_1984.py`` files; here +we lock the user-facing names and round-trip stability so the release +notes can quote the canonical attrs without caveats. + +Out of scope: +* Alias handling (``test_attrs_contract_aliases_1984.py``). +* Attrs pass-through for user-supplied keys + (``test_attrs_contract_passthrough_1984.py``). +* Contract version stamp bump policy + (``test_attrs_contract_version_1984.py``). +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff, to_geotiff +from xrspatial.geotiff._geotags import GeoTransform +from xrspatial.geotiff._writer import write + + +# Keys that release notes are allowed to promise on every georeferenced +# read. Adding a new key to the canonical set is a contract-version +# bump (see issue #1984); removing one is a breaking change. Anything +# else in the attrs (``masked_nodata``, ``nodata_pixels_present``, +# ``raster_type``, etc.) is additive and not pinned here. +CANONICAL_KEYS = ( + "_xrspatial_geotiff_contract", + "crs", + "crs_wkt", + "transform", + "georef_status", +) + + +def _write_known_good(path: str, *, nodata: float | None = None) -> None: + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + gt = GeoTransform( + origin_x=500000.0, + origin_y=4000000.0, + pixel_width=30.0, + pixel_height=-30.0, + ) + write( + arr, + path, + geo_transform=gt, + crs_epsg=32610, + nodata=nodata, + compression="none", + tiled=False, + ) + + +@pytest.mark.release_gate +def test_release_gate_attrs_canonical_keys_present(tmp_path) -> None: + """A georeferenced read carries every canonical attrs key.""" + path = str(tmp_path / "release_gate_attrs_canonical_2340.tif") + _write_known_good(path) + + da = open_geotiff(path) + missing = [k for k in CANONICAL_KEYS if k not in da.attrs] + assert not missing, ( + "release gate: canonical attrs keys missing from a georeferenced " + f"read: {missing}; release notes promise every key in " + f"{list(CANONICAL_KEYS)}" + ) + + +@pytest.mark.release_gate +def test_release_gate_attrs_georef_status_full(tmp_path) -> None: + """A fully-georeferenced read reports ``georef_status='full'``.""" + path = str(tmp_path / "release_gate_attrs_georef_status_2340.tif") + _write_known_good(path) + + da = open_geotiff(path) + status = da.attrs.get("georef_status") + assert status == "full", ( + f"release gate: a CRS+transform read should report " + f"``georef_status='full'``; got {status!r}. The five canonical " + "georef_status values are the contract downstream code branches on" + ) + + +@pytest.mark.release_gate +def test_release_gate_attrs_contract_version_is_int(tmp_path) -> None: + """``attrs['_xrspatial_geotiff_contract']`` is an int. + + The contract version is the downstream signal for which attrs + shape the array carries. A drift from int to string (or to a + Python object) would silently break callers that compare versions. + """ + path = str(tmp_path / "release_gate_attrs_contract_version_2340.tif") + _write_known_good(path) + + da = open_geotiff(path) + version = da.attrs.get("_xrspatial_geotiff_contract") + assert isinstance(version, int), ( + f"release gate: contract version stamp is not int: type=" + f"{type(version).__name__}, value={version!r}" + ) + assert version >= 1, ( + f"release gate: contract version stamp is non-positive: {version!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_attrs_round_trip_preserves_crs_transform_nodata( + tmp_path, +) -> None: + """Canonical attrs survive a full ``write -> read -> write -> read`` cycle.""" + src = str(tmp_path / "release_gate_attrs_rt_src_2340.tif") + _write_known_good(src, nodata=-9999.0) + + first = open_geotiff(src) + crs_first = int(first.attrs["crs"]) + transform_first = tuple(first.attrs["transform"]) + nodata_first = float(first.attrs["nodata"]) + + # Round-trip through the public writer. + rewrite = str(tmp_path / "release_gate_attrs_rt_rewrite_2340.tif") + to_geotiff(first, rewrite, compression="none", tiled=False) + + second = open_geotiff(rewrite) + assert int(second.attrs["crs"]) == crs_first, ( + f"release gate: CRS drifted across round-trip: {crs_first} -> " + f"{second.attrs['crs']!r}" + ) + transform_second = tuple(second.attrs["transform"]) + assert len(transform_second) == 6, ( + f"release gate: transform reshaped across round-trip: " + f"{transform_second!r}" + ) + for got, want in zip(transform_second, transform_first): + assert got == pytest.approx(want, abs=1e-12, rel=1e-12), ( + f"release gate: transform drifted across round-trip: " + f"{transform_first!r} -> {transform_second!r}" + ) + assert float(second.attrs["nodata"]) == pytest.approx( + nodata_first, abs=0.0 + ), ( + f"release gate: nodata drifted across round-trip: " + f"{nodata_first} -> {second.attrs['nodata']!r}" + ) diff --git a/xrspatial/geotiff/tests/test_release_gate_codecs.py b/xrspatial/geotiff/tests/test_release_gate_codecs.py new file mode 100644 index 00000000..88fdc850 --- /dev/null +++ b/xrspatial/geotiff/tests/test_release_gate_codecs.py @@ -0,0 +1,134 @@ +"""Release gate: stable lossless codec round-trip (epic #2340). + +The release contract for the GeoTIFF module names a specific set of +lossless codecs as ``stable``: ``none``, ``deflate``, ``lzw``, +``packbits``, ``zstd``. Every one of them must round-trip pixels +byte-for-byte through ``to_geotiff`` -> ``open_geotiff`` on both +integer and float dtypes. + +This file is the per-codec gate: one parametrized test per dtype that +walks every stable codec. The fine-grained codec internals (LZW +dictionary edge cases, PackBits boundary cases, deflate stream framing, +etc.) live in their dedicated test files; here we only assert the +end-to-end public-API promise. + +Out of scope: experimental codecs (``lerc``, ``jpeg2000``, ``j2k``, +``lz4``), the internal-only ``jpeg`` codec, and the COG layout gate +(see ``test_release_gate_cog.py``). +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._geotags import GeoTransform +from xrspatial.geotiff._writer import write + + +# The stable lossless codec set. Keep this list in lockstep with the +# ``codec.*`` entries tiered ``stable`` in +# :data:`xrspatial.geotiff.SUPPORTED_FEATURES`. If a codec is promoted +# into or out of stable, add or remove it here -- the gate is meant +# to lock the public-facing list. +STABLE_LOSSLESS_CODECS = ("none", "deflate", "lzw", "packbits", "zstd") + + +def _gt() -> GeoTransform: + return GeoTransform( + origin_x=500000.0, + origin_y=4000000.0, + pixel_width=30.0, + pixel_height=-30.0, + ) + + +@pytest.mark.release_gate +@pytest.mark.parametrize("codec", STABLE_LOSSLESS_CODECS) +def test_release_gate_codec_round_trip_uint16(tmp_path, codec) -> None: + """Integer pixel bytes survive every stable lossless codec.""" + arr = np.arange(64, dtype=np.uint16).reshape(8, 8) + path = str(tmp_path / f"release_gate_codec_{codec}_uint16_2340.tif") + write( + arr, + path, + geo_transform=_gt(), + crs_epsg=32610, + compression=codec, + tiled=False, + ) + + out = open_geotiff(path) + assert out.dtype == np.uint16, ( + f"release gate: codec {codec!r} promoted uint16 to {out.dtype!r}; " + "the lossless contract is that integer dtypes survive every " + "stable codec" + ) + np.testing.assert_array_equal( + np.asarray(out.values), + arr, + err_msg=( + f"release gate: codec {codec!r} did not round-trip uint16 " + "pixels byte-for-byte; the release contract names this codec " + "as lossless" + ), + ) + + +@pytest.mark.release_gate +@pytest.mark.parametrize("codec", STABLE_LOSSLESS_CODECS) +def test_release_gate_codec_round_trip_float32(tmp_path, codec) -> None: + """Float pixel bytes survive every stable lossless codec.""" + # Use a deterministic but non-trivial pattern so a per-axis flip + # or per-row stride bug still fails. + arr = np.linspace(-100.0, 100.0, 64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / f"release_gate_codec_{codec}_float32_2340.tif") + write( + arr, + path, + geo_transform=_gt(), + crs_epsg=32610, + compression=codec, + tiled=False, + ) + + out = open_geotiff(path) + assert out.dtype == np.float32, ( + f"release gate: codec {codec!r} promoted float32 to " + f"{out.dtype!r}" + ) + np.testing.assert_array_equal( + np.asarray(out.values), + arr, + err_msg=( + f"release gate: codec {codec!r} did not round-trip float32 " + "pixels byte-for-byte; the release contract names this codec " + "as lossless" + ), + ) + + +@pytest.mark.release_gate +def test_release_gate_codec_stable_set_matches_supported_features() -> None: + """The stable codec list in this file matches ``SUPPORTED_FEATURES``. + + If a codec is promoted into ``stable`` (or demoted out) in + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` without updating this + file, the release gate is out of sync with the runtime contract. + Fail loudly here so the PR that changes the tier also updates the + gate. + """ + from xrspatial.geotiff import SUPPORTED_FEATURES + + stable_from_constant = { + key.split(".", 1)[1] + for key, tier in SUPPORTED_FEATURES.items() + if key.startswith("codec.") and tier == "stable" + } + assert stable_from_constant == set(STABLE_LOSSLESS_CODECS), ( + "release gate: STABLE_LOSSLESS_CODECS drifted from " + "SUPPORTED_FEATURES; the gate and the runtime tier table must " + "agree on which codecs are stable. " + f"constant: {set(STABLE_LOSSLESS_CODECS)!r}; " + f"SUPPORTED_FEATURES: {stable_from_constant!r}" + ) diff --git a/xrspatial/geotiff/tests/test_release_gate_cog.py b/xrspatial/geotiff/tests/test_release_gate_cog.py new file mode 100644 index 00000000..fabdb386 --- /dev/null +++ b/xrspatial/geotiff/tests/test_release_gate_cog.py @@ -0,0 +1,157 @@ +"""Release gate: COG write and read for stable lossless codecs (epic #2340). + +The release contract tags ``writer.cog`` and ``reader.local_cog`` as +``stable`` in :data:`xrspatial.geotiff.SUPPORTED_FEATURES`. The promise +is: ``to_geotiff(cog=True, compression=)`` writes a +file that ``open_geotiff`` reads back bit-exact, with CRS, transform, +and (when declared) nodata preserved across every stable codec. + +This gate parametrizes the codec axis so a single regression in any +stable codec on the COG path fails noisily. The COG layout itself +(IFD-first, tiled, internal overviews) is exhaustively pinned by +``test_cog_writer_compliance.py`` and ``test_cog_parity_2286.py``; the +release-gate gate is the small end-to-end shape every release needs. + +Out of scope here: +* COG spec compliance details -- see ``test_cog_writer_compliance.py``. +* HTTP COG range reads -- ``reader.http_cog`` is ``advanced`` (not + stable), so it is not part of this gate. +* BigTIFF COG -- ``writer.bigtiff_cog`` is ``advanced``. +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +# Same stable lossless set as ``test_release_gate_codecs.py``; the +# cross-file parity check in that file keeps the list in sync with +# ``SUPPORTED_FEATURES``. +STABLE_LOSSLESS_CODECS = ("none", "deflate", "lzw", "packbits", "zstd") + +# COG requires a tiled internal layout and benefits from a slightly +# larger raster than the plain-file gate so the writer can emit a real +# tile grid rather than a single 1-tile file. Sticking to 32x32 keeps +# the test fast (well under 1 ms for the codec loop) while still +# exercising multiple tiles. +_W = 32 +_H = 32 + + +def _make_data_array(*, nodata: float | None = None) -> xr.DataArray: + pixels = np.arange(_H * _W, dtype=np.float32).reshape(_H, _W) + # Pixel-center coords, 30 m pixels, top-left at (500000, 4000000). + y = np.array( + [4000000.0 - 15.0 - 30.0 * i for i in range(_H)], + dtype=np.float64, + ) + x = np.array( + [500000.0 + 15.0 + 30.0 * i for i in range(_W)], + dtype=np.float64, + ) + attrs: dict = {"crs": 32610} + if nodata is not None: + attrs["nodata"] = nodata + return xr.DataArray( + pixels, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs=attrs, + ) + + +@pytest.mark.release_gate +@pytest.mark.parametrize("codec", STABLE_LOSSLESS_CODECS) +def test_release_gate_cog_round_trips_pixels(tmp_path, codec) -> None: + """COG write -> read returns the same pixels under every stable codec.""" + da = _make_data_array() + path = str(tmp_path / f"release_gate_cog_{codec}_pixels_2340.tif") + to_geotiff( + da, + path, + compression=codec, + cog=True, + tiled=True, + tile_size=16, + ) + + out = open_geotiff(path) + assert out.dtype == np.float32, ( + f"release gate: COG with codec {codec!r} promoted dtype to " + f"{out.dtype!r}" + ) + np.testing.assert_array_equal( + np.asarray(out.values), + np.asarray(da.values), + err_msg=( + f"release gate: COG with codec {codec!r} did not round-trip " + "pixels byte-for-byte" + ), + ) + + +@pytest.mark.release_gate +@pytest.mark.parametrize("codec", STABLE_LOSSLESS_CODECS) +def test_release_gate_cog_preserves_crs_transform(tmp_path, codec) -> None: + """CRS and transform survive the COG write -> read for every stable codec.""" + da = _make_data_array() + path = str(tmp_path / f"release_gate_cog_{codec}_attrs_2340.tif") + to_geotiff( + da, + path, + compression=codec, + cog=True, + tiled=True, + tile_size=16, + ) + + out = open_geotiff(path) + crs = out.attrs.get("crs") + assert crs is not None and int(crs) == 32610, ( + f"release gate: COG with codec {codec!r} dropped or drifted " + f"``attrs['crs']``: got {crs!r}" + ) + transform = out.attrs.get("transform") + assert transform is not None and len(transform) == 6, ( + f"release gate: COG with codec {codec!r} dropped or reshaped " + f"``attrs['transform']``: got {transform!r}" + ) + assert transform[0] == pytest.approx(30.0, abs=1e-9), ( + f"release gate: COG pixel_width drifted under {codec!r}: " + f"{transform!r}" + ) + assert transform[4] == pytest.approx(-30.0, abs=1e-9), ( + f"release gate: COG pixel_height drifted under {codec!r}: " + f"{transform!r}" + ) + + +@pytest.mark.release_gate +@pytest.mark.parametrize("codec", STABLE_LOSSLESS_CODECS) +def test_release_gate_cog_preserves_nodata(tmp_path, codec) -> None: + """A declared nodata sentinel survives COG write -> read under every codec.""" + sentinel = -9999.0 + da = _make_data_array(nodata=sentinel) + path = str(tmp_path / f"release_gate_cog_{codec}_nodata_2340.tif") + to_geotiff( + da, + path, + compression=codec, + nodata=sentinel, + cog=True, + tiled=True, + tile_size=16, + ) + + out = open_geotiff(path) + nodata = out.attrs.get("nodata") + assert nodata is not None, ( + f"release gate: COG with codec {codec!r} dropped declared nodata" + ) + assert float(nodata) == pytest.approx(sentinel, abs=0.0), ( + f"release gate: COG with codec {codec!r} drifted nodata from " + f"{sentinel} to {nodata!r}" + ) diff --git a/xrspatial/geotiff/tests/test_release_gate_dask_parity.py b/xrspatial/geotiff/tests/test_release_gate_dask_parity.py new file mode 100644 index 00000000..a2e199ae --- /dev/null +++ b/xrspatial/geotiff/tests/test_release_gate_dask_parity.py @@ -0,0 +1,146 @@ +"""Release gate: dask read parity vs eager (epic #2340). + +Dask reads of a local GeoTIFF must return the same pixels and the same +canonical attrs as the eager (numpy) read. This is the +``reader.local_file`` stable promise extended to the dask backend. + +The release gate locks the small, deterministic case a release engineer +can run before tagging: write a known-good file, read it both eagerly +and through the dask backend, and assert the pixel-level and attrs +parity. The wide backend matrix +(``test_backend_pixel_parity_matrix_1813.py``, +``test_backend_parity_matrix.py``) exercises every codec / chunk-size / +dtype combination -- those stay the canonical parity suite. The +release-gate test is the one-shot the release notes can quote without +caveats. + +Out of scope: +* GPU / cupy parity (``reader.gpu`` is ``experimental``, not stable). +* VRT lazy reads (``reader.vrt`` is ``advanced``). +* COG dask reads (covered by ``test_release_gate_cog.py`` via the + eager reader; the dask parity for COG is part of the canonical + parity matrix). +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._geotags import GeoTransform +from xrspatial.geotiff._writer import write + + +def _write_known_good(path: str) -> np.ndarray: + """Write a small tiled GeoTIFF and return the source array.""" + arr = np.arange(256, dtype=np.float32).reshape(16, 16) + gt = GeoTransform( + origin_x=500000.0, + origin_y=4000000.0, + pixel_width=30.0, + pixel_height=-30.0, + ) + write( + arr, + path, + geo_transform=gt, + crs_epsg=32610, + compression="deflate", + tiled=True, + tile_size=16, + ) + return arr + + +@pytest.mark.release_gate +def test_release_gate_dask_read_matches_eager_pixels(tmp_path) -> None: + """The dask backend returns the same pixels as the eager backend.""" + path = str(tmp_path / "release_gate_dask_parity_pixels_2340.tif") + _write_known_good(path) + + eager = open_geotiff(path) + lazy = open_geotiff(path, chunks=8) + + # The dask backend returns a lazy DataArray; materialise it once + # so the equality check is comparing concrete numpy arrays. + lazy_values = np.asarray(lazy.values) + eager_values = np.asarray(eager.values) + np.testing.assert_array_equal( + lazy_values, + eager_values, + err_msg=( + "release gate: dask backend returned different pixels than " + "the eager backend; the release contract promises dask read " + "parity for the local-file stable path" + ), + ) + assert lazy.dtype == eager.dtype, ( + f"release gate: dask backend changed dtype from {eager.dtype!r} " + f"to {lazy.dtype!r}" + ) + assert lazy.shape == eager.shape, ( + f"release gate: dask backend changed shape from {eager.shape!r} " + f"to {lazy.shape!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_dask_read_matches_eager_attrs(tmp_path) -> None: + """The dask backend produces the same canonical attrs as eager.""" + path = str(tmp_path / "release_gate_dask_parity_attrs_2340.tif") + _write_known_good(path) + + eager = open_geotiff(path) + lazy = open_geotiff(path, chunks=8) + + # The canonical attrs the release contract pins; backend-specific + # additive attrs (chunk shape, source URI, etc.) are allowed to + # differ between backends and are not part of this gate. + canonical = ("crs", "transform", "georef_status") + for key in canonical: + assert key in eager.attrs, ( + f"release gate: eager read is missing canonical attr " + f"{key!r}; cannot compare backends" + ) + assert key in lazy.attrs, ( + f"release gate: dask read is missing canonical attr " + f"{key!r}; the release contract requires backend parity on " + "canonical attrs" + ) + eager_v = eager.attrs[key] + lazy_v = lazy.attrs[key] + if key == "transform": + assert len(eager_v) == len(lazy_v) == 6 + for a, b in zip(eager_v, lazy_v): + assert a == pytest.approx(b, abs=1e-12, rel=1e-12), ( + f"release gate: transform drifted across backends: " + f"eager={eager_v!r} lazy={lazy_v!r}" + ) + else: + assert eager_v == lazy_v, ( + f"release gate: ``attrs[{key!r}]`` drifted across " + f"backends: eager={eager_v!r} lazy={lazy_v!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_dask_read_is_lazy(tmp_path) -> None: + """A ``chunks=`` read produces a dask-backed DataArray. + + Without this assertion, a regression that silently materialised + the dask path into numpy could pass the pixel-parity test above + without anyone noticing. The dask backend's defining property is + laziness; pin it. + """ + pytest.importorskip("dask") + import dask.array as da_mod + + path = str(tmp_path / "release_gate_dask_parity_lazy_2340.tif") + _write_known_good(path) + + lazy = open_geotiff(path, chunks=8) + assert isinstance(lazy.data, da_mod.Array), ( + f"release gate: chunks= read returned a non-dask array of type " + f"{type(lazy.data).__name__}; the release contract promises a " + "dask-backed DataArray when chunks= is set" + ) diff --git a/xrspatial/geotiff/tests/test_release_gate_local_read.py b/xrspatial/geotiff/tests/test_release_gate_local_read.py new file mode 100644 index 00000000..3a5b3522 --- /dev/null +++ b/xrspatial/geotiff/tests/test_release_gate_local_read.py @@ -0,0 +1,177 @@ +"""Release gate: local GeoTIFF read (epic #2340). + +This test pins the most basic promise the GeoTIFF module makes to a user: +``open_geotiff`` reads a local GeoTIFF and the result carries the pixels, +the CRS, the transform, and the nodata sentinel from the file. + +Why a dedicated release gate +---------------------------- +``reader.local_file`` is tagged ``stable`` in +:data:`xrspatial.geotiff.SUPPORTED_FEATURES`. Per epic #2340 every stable +feature needs a release-gate test that fails loudly if the contract +breaks, so a release engineer can run ``pytest -m release_gate`` and +know the next release does not silently regress a stable promise. + +This file is intentionally small. The surrounding test suite already +covers dtype variants, compression codecs, planar layouts, COG layouts, +fuzz cases, and golden-corpus parity. The release gate locks the single +contract a release note can quote without caveats: + +* Pixel bytes survive the read. +* ``attrs['crs']`` round-trips as the source EPSG. +* ``attrs['transform']`` is the 6-tuple GeoTransform the file carried. +* ``attrs['nodata']`` reflects the on-disk sentinel. + +Out of scope: alternative codecs (see ``test_release_gate_codecs.py``), +COG layouts (see ``test_release_gate_cog.py``), windowed reads (see +``test_release_gate_windowed_read.py``), and dask parity (see +``test_release_gate_dask_parity.py``). +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._geotags import GeoTransform +from xrspatial.geotiff._writer import write + + +# A tiny axis-aligned grid is enough to lock the contract. Using a +# distinctive pixel pattern (not a constant) means a single-axis drift +# in the writer or reader still fails the equality check. +_PIXELS = np.array( + [ + [10.0, 20.0, 30.0, 40.0], + [11.0, 21.0, 31.0, 41.0], + [12.0, 22.0, 32.0, 42.0], + [13.0, 23.0, 33.0, 43.0], + ], + dtype=np.float32, +) + +# Web Mercator (EPSG:3857) is a common real-world CRS. The transform +# uses positive pixel width and negative pixel height so the y axis +# decreases with row index, which is the convention every reader in +# this project assumes for axis-aligned grids. +_EPSG = 3857 +_ORIGIN_X = 500000.0 +_ORIGIN_Y = 4000000.0 +_PIXEL_W = 30.0 +_PIXEL_H = -30.0 +_EXPECTED_TRANSFORM = (_PIXEL_W, 0.0, _ORIGIN_X, 0.0, _PIXEL_H, _ORIGIN_Y) + + +def _write_known_good(path: str, *, nodata: float | None = None) -> None: + """Write a known-good GeoTIFF with an explicit GeoTransform. + + Uses the lower-level :func:`xrspatial.geotiff._writer.write` so the + transform is emitted from the explicit ``geo_transform`` argument + rather than derived from xarray coords. The release gate locks the + read side; the writer-side coord-to-transform derivation is covered + elsewhere. + """ + gt = GeoTransform( + origin_x=_ORIGIN_X, + origin_y=_ORIGIN_Y, + pixel_width=_PIXEL_W, + pixel_height=_PIXEL_H, + ) + write( + _PIXELS, + path, + geo_transform=gt, + crs_epsg=_EPSG, + nodata=nodata, + compression="none", + tiled=False, + ) + + +@pytest.mark.release_gate +def test_release_gate_local_read_pixels(tmp_path) -> None: + """Pixel bytes survive the read.""" + path = str(tmp_path / "release_gate_local_read_2340.tif") + _write_known_good(path) + + da = open_geotiff(path) + + assert da.dtype == np.float32, ( + f"release gate: local read promoted dtype to {da.dtype!r}; the " + "release contract is that float32 stays float32 unless a " + "nodata sentinel forces promotion" + ) + np.testing.assert_array_equal( + np.asarray(da.values), + _PIXELS, + err_msg=( + "release gate: local read returned different pixels than the " + "writer emitted; the byte-for-byte round trip is the most " + "basic promise the release notes make" + ), + ) + + +@pytest.mark.release_gate +def test_release_gate_local_read_crs(tmp_path) -> None: + """``attrs['crs']`` round-trips as the source EPSG.""" + path = str(tmp_path / "release_gate_local_read_crs_2340.tif") + _write_known_good(path) + + da = open_geotiff(path) + crs = da.attrs.get("crs") + assert crs is not None, ( + "release gate: local read dropped ``attrs['crs']``; the release " + "contract promises that an EPSG-coded source surfaces its CRS" + ) + assert int(crs) == _EPSG, ( + f"release gate: ``attrs['crs']`` drifted from {_EPSG} to " + f"{crs!r}; this changes the release notes contract for " + "``reader.local_file``" + ) + + +@pytest.mark.release_gate +def test_release_gate_local_read_transform(tmp_path) -> None: + """``attrs['transform']`` is the 6-tuple GeoTransform the file carried.""" + path = str(tmp_path / "release_gate_local_read_transform_2340.tif") + _write_known_good(path) + + da = open_geotiff(path) + transform = da.attrs.get("transform") + assert transform is not None, ( + "release gate: local read dropped ``attrs['transform']``; the " + "release contract promises a 6-tuple GeoTransform on every " + "georeferenced read" + ) + assert len(transform) == 6, ( + f"release gate: transform tuple is no longer length 6: " + f"{transform!r}; release notes promise the rasterio-style 6-tuple" + ) + for got, want in zip(transform, _EXPECTED_TRANSFORM): + # Floats compared to float precision because the writer encodes + # the transform as doubles in the GeoTIFF tags. + assert got == pytest.approx(want, abs=1e-12, rel=1e-12), ( + f"release gate: transform tuple drifted: got {transform!r} " + f"want {_EXPECTED_TRANSFORM!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_local_read_nodata(tmp_path) -> None: + """``attrs['nodata']`` reflects the on-disk sentinel.""" + path = str(tmp_path / "release_gate_local_read_nodata_2340.tif") + sentinel = -9999.0 + _write_known_good(path, nodata=sentinel) + + da = open_geotiff(path) + nodata = da.attrs.get("nodata") + assert nodata is not None, ( + "release gate: declared nodata sentinel was dropped on read; " + "the release contract promises that a declared sentinel " + "surfaces in ``attrs['nodata']``" + ) + assert float(nodata) == pytest.approx(sentinel, abs=0.0), ( + f"release gate: ``attrs['nodata']`` drifted from {sentinel} to " + f"{nodata!r}" + ) diff --git a/xrspatial/geotiff/tests/test_release_gate_local_write.py b/xrspatial/geotiff/tests/test_release_gate_local_write.py new file mode 100644 index 00000000..95d00e0a --- /dev/null +++ b/xrspatial/geotiff/tests/test_release_gate_local_write.py @@ -0,0 +1,156 @@ +"""Release gate: local GeoTIFF write (epic #2340). + +``writer.local_file`` is tagged ``stable`` in +:data:`xrspatial.geotiff.SUPPORTED_FEATURES`. The release contract is: +``to_geotiff`` writes a file that ``open_geotiff`` reads back bit-exact, +with the CRS, transform, and nodata sentinel preserved. + +This gate is small on purpose. The byte-equivalent pixel contract, +attrs canonicalisation, and dtype handling each have their own deep +test files (``test_round_trip_invariants.py``, +``test_attrs_contract_canonical_1984.py``, the matrix tests). The +release-gate test is the one-shot a release engineer can run to know +the most common public-API write -> read flow still works end-to-end. + +Out of scope here: +* Compression codec coverage -- see ``test_release_gate_codecs.py``. +* COG layout -- see ``test_release_gate_cog.py``. +* Detailed attrs canonicalisation -- see + ``test_release_gate_attrs_contract.py``. +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +def _make_data_array(*, nodata: float | None = None) -> xr.DataArray: + """Build a small DataArray with explicit y/x coords. + + The release contract for ``to_geotiff`` is the public-API path: a + user passes a DataArray with coords, gets back a file whose + GeoTransform reproduces those coords. We keep the grid small (4x4) + so the gate is fast even when run alongside the full release-gate + suite. + """ + pixels = np.array( + [ + [1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0], + [13.0, 14.0, 15.0, 16.0], + ], + dtype=np.float32, + ) + # Pixel-center y/x with width 30 m, origin (500000, 4000000), + # descending y. The writer turns these into a GeoTransform with + # origin at the top-left pixel corner. + y = np.array([3999985.0, 3999955.0, 3999925.0, 3999895.0]) + x = np.array([500015.0, 500045.0, 500075.0, 500105.0]) + attrs: dict = {"crs": 32610} + if nodata is not None: + attrs["nodata"] = nodata + return xr.DataArray( + pixels, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs=attrs, + ) + + +@pytest.mark.release_gate +def test_release_gate_local_write_round_trips_pixels(tmp_path) -> None: + """``to_geotiff`` writes a file that reads back bit-exact.""" + da = _make_data_array() + path = str(tmp_path / "release_gate_local_write_pixels_2340.tif") + to_geotiff(da, path, compression="none", tiled=False) + + out = open_geotiff(path) + assert out.dtype == np.float32, ( + f"release gate: write -> read flipped dtype to {out.dtype!r}; " + "the release contract promises float32 stays float32 absent a " + "nodata sentinel" + ) + np.testing.assert_array_equal( + np.asarray(out.values), + np.asarray(da.values), + err_msg=( + "release gate: write -> read changed pixel values; " + "to_geotiff is promised to be lossless for the default " + "'none' codec" + ), + ) + + +@pytest.mark.release_gate +def test_release_gate_local_write_preserves_crs(tmp_path) -> None: + """The CRS survives the write -> read round trip.""" + da = _make_data_array() + path = str(tmp_path / "release_gate_local_write_crs_2340.tif") + to_geotiff(da, path, compression="none", tiled=False) + + out = open_geotiff(path) + crs = out.attrs.get("crs") + assert crs is not None, ( + "release gate: write -> read dropped ``attrs['crs']``; the " + "release contract requires the CRS to survive" + ) + assert int(crs) == 32610, ( + f"release gate: ``attrs['crs']`` drifted from 32610 to {crs!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_local_write_preserves_transform(tmp_path) -> None: + """The GeoTransform survives the write -> read round trip.""" + da = _make_data_array() + path = str(tmp_path / "release_gate_local_write_transform_2340.tif") + to_geotiff(da, path, compression="none", tiled=False) + + out = open_geotiff(path) + transform = out.attrs.get("transform") + assert transform is not None, ( + "release gate: write -> read dropped ``attrs['transform']``; " + "the release contract requires the GeoTransform to survive" + ) + assert len(transform) == 6, ( + f"release gate: transform tuple is no longer length 6: " + f"{transform!r}" + ) + # Pixel width and pixel height must round-trip exactly; the origin + # is the top-left corner derived from pixel-center coords plus a + # half-pixel offset, so it is also a tight equality. + assert transform[0] == pytest.approx(30.0, abs=1e-9), ( + f"release gate: pixel_width drifted: {transform!r}" + ) + assert transform[4] == pytest.approx(-30.0, abs=1e-9), ( + f"release gate: pixel_height sign or magnitude drifted: " + f"{transform!r}" + ) + assert transform[1] == 0.0 and transform[3] == 0.0, ( + f"release gate: shear terms appeared in axis-aligned write: " + f"{transform!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_local_write_preserves_nodata(tmp_path) -> None: + """A declared nodata sentinel survives the write -> read round trip.""" + sentinel = -9999.0 + da = _make_data_array(nodata=sentinel) + path = str(tmp_path / "release_gate_local_write_nodata_2340.tif") + to_geotiff(da, path, compression="none", tiled=False, nodata=sentinel) + + out = open_geotiff(path) + nodata = out.attrs.get("nodata") + assert nodata is not None, ( + "release gate: declared nodata was dropped on write -> read; " + "the release contract promises the sentinel survives" + ) + assert float(nodata) == pytest.approx(sentinel, abs=0.0), ( + f"release gate: ``attrs['nodata']`` drifted from {sentinel} to " + f"{nodata!r}" + ) diff --git a/xrspatial/geotiff/tests/test_release_gate_windowed_read.py b/xrspatial/geotiff/tests/test_release_gate_windowed_read.py new file mode 100644 index 00000000..c3b69231 --- /dev/null +++ b/xrspatial/geotiff/tests/test_release_gate_windowed_read.py @@ -0,0 +1,162 @@ +"""Release gate: windowed reads (epic #2340). + +``open_geotiff(path, window=...)`` is part of the stable surface. The +release contract: + +* A ``(row_start, col_start, row_stop, col_stop)`` window returns the + exact subset of the source pixels. +* The result keeps ``attrs['crs']`` and produces a transform whose + origin shifts to the window's top-left pixel corner. +* Reading the full extent via ``window=(0, 0, H, W)`` matches an + unwindowed read. + +Out of bounds and degenerate windows are covered by +``test_window_out_of_bounds_1634.py``; the release-gate test only +locks the supported, in-bounds use case so a release engineer knows +the user-facing API behaves end to end. +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._geotags import GeoTransform +from xrspatial.geotiff._writer import write + + +_H = 10 +_W = 10 +# A distinctive per-pixel value (row * 100 + col) means any row / col +# stride confusion in the windowed path fails the equality check. +_PIXELS = ( + np.arange(_H, dtype=np.int32).reshape(-1, 1) * 100 + + np.arange(_W, dtype=np.int32).reshape(1, -1) +).astype(np.int32) +_ORIGIN_X = 500000.0 +_ORIGIN_Y = 4000000.0 +_PIXEL_W = 30.0 +_PIXEL_H = -30.0 + + +def _write_known_good(path: str) -> None: + gt = GeoTransform( + origin_x=_ORIGIN_X, + origin_y=_ORIGIN_Y, + pixel_width=_PIXEL_W, + pixel_height=_PIXEL_H, + ) + write( + _PIXELS, + path, + geo_transform=gt, + crs_epsg=32610, + compression="none", + tiled=False, + ) + + +@pytest.mark.release_gate +def test_release_gate_windowed_read_returns_subset(tmp_path) -> None: + """A windowed read returns exactly the requested subset.""" + path = str(tmp_path / "release_gate_windowed_read_subset_2340.tif") + _write_known_good(path) + + # Take an interior 4x5 window so the test fails if the window + # logic confuses row- and column-order. + row_start, col_start = 2, 3 + row_stop, col_stop = 6, 8 + out = open_geotiff(path, window=(row_start, col_start, row_stop, col_stop)) + + expected = _PIXELS[row_start:row_stop, col_start:col_stop] + assert out.shape == expected.shape, ( + f"release gate: windowed read shape {out.shape} does not match " + f"the requested window shape {expected.shape}" + ) + np.testing.assert_array_equal( + np.asarray(out.values), + expected, + err_msg=( + "release gate: windowed read returned different pixels than " + "the same rows / cols of the source array; this would silently " + "break every downstream caller that relies on window= for " + "subsetting" + ), + ) + + +@pytest.mark.release_gate +def test_release_gate_windowed_read_preserves_crs(tmp_path) -> None: + """A windowed read carries ``attrs['crs']`` over from the source.""" + path = str(tmp_path / "release_gate_windowed_read_crs_2340.tif") + _write_known_good(path) + + out = open_geotiff(path, window=(1, 1, 5, 5)) + crs = out.attrs.get("crs") + assert crs is not None and int(crs) == 32610, ( + f"release gate: windowed read dropped or drifted " + f"``attrs['crs']``: got {crs!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_windowed_read_shifts_transform_origin(tmp_path) -> None: + """The transform origin shifts to the window's top-left pixel. + + Concretely: for a window starting at ``(row, col) = (2, 3)`` on a + grid with pixel width ``+30`` and pixel height ``-30``, the new + origin is ``(origin_x + 3 * 30, origin_y + 2 * -30)``. + """ + path = str(tmp_path / "release_gate_windowed_read_transform_2340.tif") + _write_known_good(path) + + row_start, col_start = 2, 3 + out = open_geotiff(path, window=(row_start, col_start, 6, 8)) + transform = out.attrs.get("transform") + assert transform is not None and len(transform) == 6, ( + f"release gate: windowed read dropped or reshaped transform: " + f"{transform!r}" + ) + # Pixel size must not change. + assert transform[0] == pytest.approx(_PIXEL_W, abs=1e-9), ( + f"release gate: windowed read changed pixel_width: {transform!r}" + ) + assert transform[4] == pytest.approx(_PIXEL_H, abs=1e-9), ( + f"release gate: windowed read changed pixel_height: {transform!r}" + ) + expected_origin_x = _ORIGIN_X + col_start * _PIXEL_W + expected_origin_y = _ORIGIN_Y + row_start * _PIXEL_H + assert transform[2] == pytest.approx(expected_origin_x, abs=1e-9), ( + f"release gate: windowed read origin_x did not shift to the " + f"window's left edge: got {transform[2]!r} expected " + f"{expected_origin_x!r}" + ) + assert transform[5] == pytest.approx(expected_origin_y, abs=1e-9), ( + f"release gate: windowed read origin_y did not shift to the " + f"window's top edge: got {transform[5]!r} expected " + f"{expected_origin_y!r}" + ) + + +@pytest.mark.release_gate +def test_release_gate_windowed_read_full_extent_matches_unwindowed( + tmp_path, +) -> None: + """``window=(0, 0, H, W)`` returns the same pixels as no window.""" + path = str(tmp_path / "release_gate_windowed_read_full_2340.tif") + _write_known_good(path) + + full = open_geotiff(path) + windowed = open_geotiff(path, window=(0, 0, _H, _W)) + assert windowed.shape == full.shape, ( + f"release gate: full-extent window shape drift: " + f"{windowed.shape} vs {full.shape}" + ) + np.testing.assert_array_equal( + np.asarray(windowed.values), + np.asarray(full.values), + err_msg=( + "release gate: full-extent window returned different pixels " + "than the unwindowed read" + ), + ) From 61767a853e4a77cfa2fcbc8d5a65459eff48b302 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 04:29:23 -0700 Subject: [PATCH 2/3] Address review nits: hoist dask skip and SUPPORTED_FEATURES import (#2340) - test_release_gate_dask_parity.py: move pytest.importorskip("dask") to module scope so all three tests skip uniformly when dask is absent. Previously only the lazy-shape test was protected; the two parity tests would error at collection in a dask-less environment. - test_release_gate_codecs.py: hoist `SUPPORTED_FEATURES` import to the top of the module to match the convention used by the other release-gate files. --- xrspatial/geotiff/tests/test_release_gate_codecs.py | 4 +--- .../geotiff/tests/test_release_gate_dask_parity.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/xrspatial/geotiff/tests/test_release_gate_codecs.py b/xrspatial/geotiff/tests/test_release_gate_codecs.py index 88fdc850..b7e96ee0 100644 --- a/xrspatial/geotiff/tests/test_release_gate_codecs.py +++ b/xrspatial/geotiff/tests/test_release_gate_codecs.py @@ -21,7 +21,7 @@ import numpy as np import pytest -from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff import SUPPORTED_FEATURES, open_geotiff from xrspatial.geotiff._geotags import GeoTransform from xrspatial.geotiff._writer import write @@ -118,8 +118,6 @@ def test_release_gate_codec_stable_set_matches_supported_features() -> None: Fail loudly here so the PR that changes the tier also updates the gate. """ - from xrspatial.geotiff import SUPPORTED_FEATURES - stable_from_constant = { key.split(".", 1)[1] for key, tier in SUPPORTED_FEATURES.items() diff --git a/xrspatial/geotiff/tests/test_release_gate_dask_parity.py b/xrspatial/geotiff/tests/test_release_gate_dask_parity.py index a2e199ae..6b557206 100644 --- a/xrspatial/geotiff/tests/test_release_gate_dask_parity.py +++ b/xrspatial/geotiff/tests/test_release_gate_dask_parity.py @@ -26,9 +26,14 @@ import numpy as np import pytest -from xrspatial.geotiff import open_geotiff -from xrspatial.geotiff._geotags import GeoTransform -from xrspatial.geotiff._writer import write +# Every test in this file exercises the ``chunks=`` dask backend. Skip +# the whole file if dask is not installed -- the parity claim is +# vacuous without the backend it compares against. +pytest.importorskip("dask") + +from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff._geotags import GeoTransform # noqa: E402 +from xrspatial.geotiff._writer import write # noqa: E402 def _write_known_good(path: str) -> np.ndarray: @@ -132,7 +137,6 @@ def test_release_gate_dask_read_is_lazy(tmp_path) -> None: without anyone noticing. The dask backend's defining property is laziness; pin it. """ - pytest.importorskip("dask") import dask.array as da_mod path = str(tmp_path / "release_gate_dask_parity_lazy_2340.tif") From 49f46db89407b1ca0e35a2ad780de4ae00087d2c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 04:34:23 -0700 Subject: [PATCH 3/3] Reuse STABLE_LOSSLESS_CODECS from sibling release-gate file (#2340) Self-review fix on PR #2353. test_release_gate_cog.py previously redefined ``STABLE_LOSSLESS_CODECS`` as a local copy of the same tuple in test_release_gate_codecs.py. The cross-check against ``SUPPORTED_FEATURES`` only runs against the codecs-file copy, so a future tier change that updated the codecs file but forgot the COG file would leave the COG gate silently parametrized on a stale list. Import the tuple from the sibling file so the two files cannot drift. --- xrspatial/geotiff/tests/test_release_gate_cog.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/xrspatial/geotiff/tests/test_release_gate_cog.py b/xrspatial/geotiff/tests/test_release_gate_cog.py index fabdb386..e58c878e 100644 --- a/xrspatial/geotiff/tests/test_release_gate_cog.py +++ b/xrspatial/geotiff/tests/test_release_gate_cog.py @@ -26,11 +26,14 @@ from xrspatial.geotiff import open_geotiff, to_geotiff - -# Same stable lossless set as ``test_release_gate_codecs.py``; the -# cross-file parity check in that file keeps the list in sync with -# ``SUPPORTED_FEATURES``. -STABLE_LOSSLESS_CODECS = ("none", "deflate", "lzw", "packbits", "zstd") +# Import the stable lossless set from the sibling release-gate file +# rather than redefining it. The cross-check against +# ``SUPPORTED_FEATURES`` lives in that file; reusing the same tuple +# here means a tier change in ``_attrs.py`` cannot leave the COG gate +# parametrized on a stale list. +from xrspatial.geotiff.tests.test_release_gate_codecs import ( # noqa: E402 + STABLE_LOSSLESS_CODECS, +) # COG requires a tiled internal layout and benefits from a slightly # larger raster than the plain-file gate so the writer can emit a real