Skip to content

Commit dfc500f

Browse files
authored
Add release-gate tests for stable GeoTIFF features (#2340) (#2353)
1 parent c4878d6 commit dfc500f

8 files changed

Lines changed: 1092 additions & 0 deletions

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ filterwarnings =
112112
ignore:'asyncio.AbstractEventLoopPolicy' is deprecated:DeprecationWarning:pytest_asyncio
113113
markers =
114114
slow: long-running test cell (typical: golden-corpus fixtures behind a heavy codec or large pixel count). PR CI can skip with `-m "not slow"`; nightly / release runs use no filter. See xrspatial/geotiff/tests/golden_corpus/_marks.py for the corpus-side helper.
115+
release_gate: locks a single stable feature in the GeoTIFF release contract (epic #2340). Always runs by default in CI; the marker exists so release engineers can run only these gates with `pytest -m release_gate` before tagging a release. Tests in this marker should be small, deterministic, and fail loudly if the contract breaks.
115116

116117
[isort]
117118
line_length = 100
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
"""Release gate: CRS / transform / nodata attrs contract (epic #2340).
2+
3+
The canonical attrs after a GeoTIFF read are tagged ``stable`` in the
4+
release gate checklist. The contract: every georeferenced read produces
5+
a DataArray whose ``attrs`` carry, at minimum, ``crs``, ``crs_wkt``,
6+
``transform``, ``georef_status``, the contract version stamp, and (when
7+
declared) ``nodata``. These attrs survive a write -> read round trip.
8+
9+
This file is the single-shot release gate. Deep canonicalisation,
10+
alias handling, contract version bumps, and pass-through semantics are
11+
each covered by their own ``test_attrs_contract_*_1984.py`` files; here
12+
we lock the user-facing names and round-trip stability so the release
13+
notes can quote the canonical attrs without caveats.
14+
15+
Out of scope:
16+
* Alias handling (``test_attrs_contract_aliases_1984.py``).
17+
* Attrs pass-through for user-supplied keys
18+
(``test_attrs_contract_passthrough_1984.py``).
19+
* Contract version stamp bump policy
20+
(``test_attrs_contract_version_1984.py``).
21+
"""
22+
from __future__ import annotations
23+
24+
import numpy as np
25+
import pytest
26+
27+
from xrspatial.geotiff import open_geotiff, to_geotiff
28+
from xrspatial.geotiff._geotags import GeoTransform
29+
from xrspatial.geotiff._writer import write
30+
31+
32+
# Keys that release notes are allowed to promise on every georeferenced
33+
# read. Adding a new key to the canonical set is a contract-version
34+
# bump (see issue #1984); removing one is a breaking change. Anything
35+
# else in the attrs (``masked_nodata``, ``nodata_pixels_present``,
36+
# ``raster_type``, etc.) is additive and not pinned here.
37+
CANONICAL_KEYS = (
38+
"_xrspatial_geotiff_contract",
39+
"crs",
40+
"crs_wkt",
41+
"transform",
42+
"georef_status",
43+
)
44+
45+
46+
def _write_known_good(path: str, *, nodata: float | None = None) -> None:
47+
arr = np.arange(16, dtype=np.float32).reshape(4, 4)
48+
gt = GeoTransform(
49+
origin_x=500000.0,
50+
origin_y=4000000.0,
51+
pixel_width=30.0,
52+
pixel_height=-30.0,
53+
)
54+
write(
55+
arr,
56+
path,
57+
geo_transform=gt,
58+
crs_epsg=32610,
59+
nodata=nodata,
60+
compression="none",
61+
tiled=False,
62+
)
63+
64+
65+
@pytest.mark.release_gate
66+
def test_release_gate_attrs_canonical_keys_present(tmp_path) -> None:
67+
"""A georeferenced read carries every canonical attrs key."""
68+
path = str(tmp_path / "release_gate_attrs_canonical_2340.tif")
69+
_write_known_good(path)
70+
71+
da = open_geotiff(path)
72+
missing = [k for k in CANONICAL_KEYS if k not in da.attrs]
73+
assert not missing, (
74+
"release gate: canonical attrs keys missing from a georeferenced "
75+
f"read: {missing}; release notes promise every key in "
76+
f"{list(CANONICAL_KEYS)}"
77+
)
78+
79+
80+
@pytest.mark.release_gate
81+
def test_release_gate_attrs_georef_status_full(tmp_path) -> None:
82+
"""A fully-georeferenced read reports ``georef_status='full'``."""
83+
path = str(tmp_path / "release_gate_attrs_georef_status_2340.tif")
84+
_write_known_good(path)
85+
86+
da = open_geotiff(path)
87+
status = da.attrs.get("georef_status")
88+
assert status == "full", (
89+
f"release gate: a CRS+transform read should report "
90+
f"``georef_status='full'``; got {status!r}. The five canonical "
91+
"georef_status values are the contract downstream code branches on"
92+
)
93+
94+
95+
@pytest.mark.release_gate
96+
def test_release_gate_attrs_contract_version_is_int(tmp_path) -> None:
97+
"""``attrs['_xrspatial_geotiff_contract']`` is an int.
98+
99+
The contract version is the downstream signal for which attrs
100+
shape the array carries. A drift from int to string (or to a
101+
Python object) would silently break callers that compare versions.
102+
"""
103+
path = str(tmp_path / "release_gate_attrs_contract_version_2340.tif")
104+
_write_known_good(path)
105+
106+
da = open_geotiff(path)
107+
version = da.attrs.get("_xrspatial_geotiff_contract")
108+
assert isinstance(version, int), (
109+
f"release gate: contract version stamp is not int: type="
110+
f"{type(version).__name__}, value={version!r}"
111+
)
112+
assert version >= 1, (
113+
f"release gate: contract version stamp is non-positive: {version!r}"
114+
)
115+
116+
117+
@pytest.mark.release_gate
118+
def test_release_gate_attrs_round_trip_preserves_crs_transform_nodata(
119+
tmp_path,
120+
) -> None:
121+
"""Canonical attrs survive a full ``write -> read -> write -> read`` cycle."""
122+
src = str(tmp_path / "release_gate_attrs_rt_src_2340.tif")
123+
_write_known_good(src, nodata=-9999.0)
124+
125+
first = open_geotiff(src)
126+
crs_first = int(first.attrs["crs"])
127+
transform_first = tuple(first.attrs["transform"])
128+
nodata_first = float(first.attrs["nodata"])
129+
130+
# Round-trip through the public writer.
131+
rewrite = str(tmp_path / "release_gate_attrs_rt_rewrite_2340.tif")
132+
to_geotiff(first, rewrite, compression="none", tiled=False)
133+
134+
second = open_geotiff(rewrite)
135+
assert int(second.attrs["crs"]) == crs_first, (
136+
f"release gate: CRS drifted across round-trip: {crs_first} -> "
137+
f"{second.attrs['crs']!r}"
138+
)
139+
transform_second = tuple(second.attrs["transform"])
140+
assert len(transform_second) == 6, (
141+
f"release gate: transform reshaped across round-trip: "
142+
f"{transform_second!r}"
143+
)
144+
for got, want in zip(transform_second, transform_first):
145+
assert got == pytest.approx(want, abs=1e-12, rel=1e-12), (
146+
f"release gate: transform drifted across round-trip: "
147+
f"{transform_first!r} -> {transform_second!r}"
148+
)
149+
assert float(second.attrs["nodata"]) == pytest.approx(
150+
nodata_first, abs=0.0
151+
), (
152+
f"release gate: nodata drifted across round-trip: "
153+
f"{nodata_first} -> {second.attrs['nodata']!r}"
154+
)
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"""Release gate: stable lossless codec round-trip (epic #2340).
2+
3+
The release contract for the GeoTIFF module names a specific set of
4+
lossless codecs as ``stable``: ``none``, ``deflate``, ``lzw``,
5+
``packbits``, ``zstd``. Every one of them must round-trip pixels
6+
byte-for-byte through ``to_geotiff`` -> ``open_geotiff`` on both
7+
integer and float dtypes.
8+
9+
This file is the per-codec gate: one parametrized test per dtype that
10+
walks every stable codec. The fine-grained codec internals (LZW
11+
dictionary edge cases, PackBits boundary cases, deflate stream framing,
12+
etc.) live in their dedicated test files; here we only assert the
13+
end-to-end public-API promise.
14+
15+
Out of scope: experimental codecs (``lerc``, ``jpeg2000``, ``j2k``,
16+
``lz4``), the internal-only ``jpeg`` codec, and the COG layout gate
17+
(see ``test_release_gate_cog.py``).
18+
"""
19+
from __future__ import annotations
20+
21+
import numpy as np
22+
import pytest
23+
24+
from xrspatial.geotiff import SUPPORTED_FEATURES, open_geotiff
25+
from xrspatial.geotiff._geotags import GeoTransform
26+
from xrspatial.geotiff._writer import write
27+
28+
29+
# The stable lossless codec set. Keep this list in lockstep with the
30+
# ``codec.*`` entries tiered ``stable`` in
31+
# :data:`xrspatial.geotiff.SUPPORTED_FEATURES`. If a codec is promoted
32+
# into or out of stable, add or remove it here -- the gate is meant
33+
# to lock the public-facing list.
34+
STABLE_LOSSLESS_CODECS = ("none", "deflate", "lzw", "packbits", "zstd")
35+
36+
37+
def _gt() -> GeoTransform:
38+
return GeoTransform(
39+
origin_x=500000.0,
40+
origin_y=4000000.0,
41+
pixel_width=30.0,
42+
pixel_height=-30.0,
43+
)
44+
45+
46+
@pytest.mark.release_gate
47+
@pytest.mark.parametrize("codec", STABLE_LOSSLESS_CODECS)
48+
def test_release_gate_codec_round_trip_uint16(tmp_path, codec) -> None:
49+
"""Integer pixel bytes survive every stable lossless codec."""
50+
arr = np.arange(64, dtype=np.uint16).reshape(8, 8)
51+
path = str(tmp_path / f"release_gate_codec_{codec}_uint16_2340.tif")
52+
write(
53+
arr,
54+
path,
55+
geo_transform=_gt(),
56+
crs_epsg=32610,
57+
compression=codec,
58+
tiled=False,
59+
)
60+
61+
out = open_geotiff(path)
62+
assert out.dtype == np.uint16, (
63+
f"release gate: codec {codec!r} promoted uint16 to {out.dtype!r}; "
64+
"the lossless contract is that integer dtypes survive every "
65+
"stable codec"
66+
)
67+
np.testing.assert_array_equal(
68+
np.asarray(out.values),
69+
arr,
70+
err_msg=(
71+
f"release gate: codec {codec!r} did not round-trip uint16 "
72+
"pixels byte-for-byte; the release contract names this codec "
73+
"as lossless"
74+
),
75+
)
76+
77+
78+
@pytest.mark.release_gate
79+
@pytest.mark.parametrize("codec", STABLE_LOSSLESS_CODECS)
80+
def test_release_gate_codec_round_trip_float32(tmp_path, codec) -> None:
81+
"""Float pixel bytes survive every stable lossless codec."""
82+
# Use a deterministic but non-trivial pattern so a per-axis flip
83+
# or per-row stride bug still fails.
84+
arr = np.linspace(-100.0, 100.0, 64, dtype=np.float32).reshape(8, 8)
85+
path = str(tmp_path / f"release_gate_codec_{codec}_float32_2340.tif")
86+
write(
87+
arr,
88+
path,
89+
geo_transform=_gt(),
90+
crs_epsg=32610,
91+
compression=codec,
92+
tiled=False,
93+
)
94+
95+
out = open_geotiff(path)
96+
assert out.dtype == np.float32, (
97+
f"release gate: codec {codec!r} promoted float32 to "
98+
f"{out.dtype!r}"
99+
)
100+
np.testing.assert_array_equal(
101+
np.asarray(out.values),
102+
arr,
103+
err_msg=(
104+
f"release gate: codec {codec!r} did not round-trip float32 "
105+
"pixels byte-for-byte; the release contract names this codec "
106+
"as lossless"
107+
),
108+
)
109+
110+
111+
@pytest.mark.release_gate
112+
def test_release_gate_codec_stable_set_matches_supported_features() -> None:
113+
"""The stable codec list in this file matches ``SUPPORTED_FEATURES``.
114+
115+
If a codec is promoted into ``stable`` (or demoted out) in
116+
:data:`xrspatial.geotiff.SUPPORTED_FEATURES` without updating this
117+
file, the release gate is out of sync with the runtime contract.
118+
Fail loudly here so the PR that changes the tier also updates the
119+
gate.
120+
"""
121+
stable_from_constant = {
122+
key.split(".", 1)[1]
123+
for key, tier in SUPPORTED_FEATURES.items()
124+
if key.startswith("codec.") and tier == "stable"
125+
}
126+
assert stable_from_constant == set(STABLE_LOSSLESS_CODECS), (
127+
"release gate: STABLE_LOSSLESS_CODECS drifted from "
128+
"SUPPORTED_FEATURES; the gate and the runtime tier table must "
129+
"agree on which codecs are stable. "
130+
f"constant: {set(STABLE_LOSSLESS_CODECS)!r}; "
131+
f"SUPPORTED_FEATURES: {stable_from_constant!r}"
132+
)

0 commit comments

Comments
 (0)