|
| 1 | +"""Locking test for the best-effort pass-through tier of the attrs contract. |
| 2 | +
|
| 3 | +Issue #1984, PR 6 of 7. |
| 4 | +
|
| 5 | +The attrs contract has three tiers: |
| 6 | +
|
| 7 | +1. Canonical: writers consume the attr; round-trip is guaranteed. |
| 8 | +2. Pass-through: writers do not consume the attr. The reader rebuilds |
| 9 | + the value from the GeoKey directory (or another TIFF tag) on read. |
| 10 | + Round-trip is best-effort: it works only when the writer happens to |
| 11 | + emit a tag the reader can rebuild the attr from. |
| 12 | +3. Ignored: writer never touches; attr is dropped silently. |
| 13 | +
|
| 14 | +This file pins the *current* behaviour of every key in the pass-through |
| 15 | +tier so future writer changes have to decide whether to promote a key to |
| 16 | +canonical or to keep it dropping. The split between "reconstructible" |
| 17 | +and "dropped on round-trip" is captured in the parametrisation below and |
| 18 | +mirrored in PR 6's body so the next PR (canonical promotion) has a |
| 19 | +shopping list. |
| 20 | +
|
| 21 | +Current state of every pass-through key, as locked here: |
| 22 | +
|
| 23 | +* Reconstructible (writer puts a TIFF tag the reader rebuilds from): |
| 24 | + - ``image_description`` -> tag 270 (ImageDescription) |
| 25 | + - ``extra_samples`` -> tag 338 (ExtraSamples) |
| 26 | + - ``colormap`` -> tag 320 (ColorMap, raw uint16 triples) |
| 27 | +
|
| 28 | +* Dropped on round-trip (writer never emits the GeoKey, reader has |
| 29 | + nothing to rebuild from): |
| 30 | + - ``crs_name`` (GTCitationGeoKey / ProjCitationGeoKey) |
| 31 | + - ``geog_citation`` (GeogCitationGeoKey) |
| 32 | + - ``datum_code`` (GeogGeodeticDatumGeoKey) |
| 33 | + - ``angular_units`` (GeogAngularUnitsGeoKey) |
| 34 | + - ``linear_units`` (ProjLinearUnitsGeoKey) |
| 35 | + - ``semi_major_axis`` (GeogSemiMajorAxisGeoKey) |
| 36 | + - ``inv_flattening`` (GeogInvFlatteningGeoKey) |
| 37 | + - ``projection_code`` (ProjectionGeoKey) |
| 38 | + - ``vertical_crs`` (VerticalCSTypeGeoKey) |
| 39 | + - ``vertical_citation`` (VerticalCitationGeoKey) |
| 40 | + - ``vertical_units`` (VerticalUnitsGeoKey) |
| 41 | + - ``colormap_rgba`` (only set on read when Photometric == 3) |
| 42 | + - ``cmap`` (matplotlib ListedColormap; same gate) |
| 43 | +""" |
| 44 | +from __future__ import annotations |
| 45 | + |
| 46 | +import warnings |
| 47 | + |
| 48 | +import numpy as np |
| 49 | +import pytest |
| 50 | +import xarray as xr |
| 51 | + |
| 52 | +from xrspatial.geotiff import open_geotiff, to_geotiff |
| 53 | + |
| 54 | + |
| 55 | +# Full set of pass-through keys defined by the contract. |
| 56 | +_ALL_PASSTHROUGH_KEYS = ( |
| 57 | + 'crs_name', |
| 58 | + 'geog_citation', |
| 59 | + 'datum_code', |
| 60 | + 'angular_units', |
| 61 | + 'linear_units', |
| 62 | + 'semi_major_axis', |
| 63 | + 'inv_flattening', |
| 64 | + 'projection_code', |
| 65 | + 'vertical_crs', |
| 66 | + 'vertical_citation', |
| 67 | + 'vertical_units', |
| 68 | + 'image_description', |
| 69 | + 'extra_samples', |
| 70 | + 'colormap', |
| 71 | + 'colormap_rgba', |
| 72 | + 'cmap', |
| 73 | +) |
| 74 | + |
| 75 | + |
| 76 | +def _make_da(crs=None, attrs=None, shape=(4, 4), dtype=np.float32): |
| 77 | + """Build a minimal georeferenced DataArray with optional CRS and attrs.""" |
| 78 | + data = np.ones(shape, dtype=dtype) |
| 79 | + h, w = shape |
| 80 | + coords = { |
| 81 | + 'y': np.arange(h, 0, -1, dtype=np.float64), |
| 82 | + 'x': np.arange(w, dtype=np.float64), |
| 83 | + } |
| 84 | + a = dict(attrs) if attrs else {} |
| 85 | + if crs is not None: |
| 86 | + a['crs'] = crs |
| 87 | + return xr.DataArray(data, dims=('y', 'x'), coords=coords, attrs=a) |
| 88 | + |
| 89 | + |
| 90 | +def _roundtrip(tmp_path, da, name='roundtrip.tif'): |
| 91 | + """Write ``da`` to ``tmp_path/name`` and read it back.""" |
| 92 | + path = str(tmp_path / name) |
| 93 | + to_geotiff(da, path) |
| 94 | + return open_geotiff(path) |
| 95 | + |
| 96 | + |
| 97 | +# (key, crs_to_use, value_set_on_write_or_None, expected_outcome) |
| 98 | +# |
| 99 | +# ``crs_to_use``: 4326 for geographic GeoKey-derived keys, 32633 for |
| 100 | +# projected ones. The CRS pins which GeoKeys the writer would emit if it |
| 101 | +# emitted any; today it emits only the EPSG code, but the test brackets |
| 102 | +# both branches anyway. |
| 103 | +# |
| 104 | +# ``value_set_on_write``: the value the test sets on ``da.attrs`` before |
| 105 | +# write. ``None`` means "do not set this key" (the test only needs the |
| 106 | +# CRS to be present so any reconstruction would have a path to fire). |
| 107 | +# |
| 108 | +# ``expected``: one of ``'reconstructible'`` or ``'dropped'``. |
| 109 | +# - reconstructible: key must be present in read-back attrs AND, when a |
| 110 | +# value was supplied, must equal that value. |
| 111 | +# - dropped: key must be absent from read-back attrs. |
| 112 | +_PASSTHROUGH_CASES = [ |
| 113 | + # GeoKey-derived geographic CRS attrs -- writer emits only EPSG, so |
| 114 | + # the reader has no GeoKey to reconstruct these from. |
| 115 | + ('crs_name', 4326, 'WGS 84', 'dropped'), |
| 116 | + ('geog_citation', 4326, 'WGS 84', 'dropped'), |
| 117 | + ('datum_code', 4326, 6326, 'dropped'), |
| 118 | + ('angular_units', 4326, 'degree', 'dropped'), |
| 119 | + ('semi_major_axis', 4326, 6378137.0, 'dropped'), |
| 120 | + ('inv_flattening', 4326, 298.257223563, 'dropped'), |
| 121 | + # GeoKey-derived projected CRS attrs. |
| 122 | + ('linear_units', 32633, 'metre', 'dropped'), |
| 123 | + ('projection_code', 32633, 16033, 'dropped'), |
| 124 | + # Vertical CRS attrs -- writer never emits the vertical GeoKey block. |
| 125 | + ('vertical_crs', 4326, 5703, 'dropped'), |
| 126 | + ('vertical_citation', 4326, 'NAVD88', 'dropped'), |
| 127 | + ('vertical_units', 4326, 'metre', 'dropped'), |
| 128 | + # Non-GeoKey tag passthroughs. The writer folds these into extra_tags |
| 129 | + # via _merge_friendly_extra_tags, so the reader can rebuild them. |
| 130 | + ('image_description', 4326, 'pr1984 fixture', 'reconstructible'), |
| 131 | + ('extra_samples', 4326, (1,), 'reconstructible'), |
| 132 | + # ``colormap`` round-trips as the raw uint16 triple list; the |
| 133 | + # derived ``colormap_rgba`` / ``cmap`` attrs are only emitted when |
| 134 | + # Photometric == 3 on read, which the writer does not set just |
| 135 | + # because attrs carries a colormap. |
| 136 | + # The TIFF ColorMap tag (320) stores RGB triples as uint16 values in |
| 137 | + # the 0-65535 range. Values below are written as-is and compared |
| 138 | + # by-equality after the round-trip; if the writer ever rescales 8-bit |
| 139 | + # input to 16-bit (or vice versa), update this fixture rather than |
| 140 | + # the contract. |
| 141 | + ('colormap', 4326, tuple([0] * 256 + [128] * 256 + [255] * 256), |
| 142 | + 'reconstructible'), |
| 143 | + ('colormap_rgba', 4326, None, 'dropped'), |
| 144 | + ('cmap', 4326, None, 'dropped'), |
| 145 | +] |
| 146 | + |
| 147 | + |
| 148 | +def test_passthrough_cases_cover_all_keys(): |
| 149 | + """``_PASSTHROUGH_CASES`` and ``_ALL_PASSTHROUGH_KEYS`` carry the |
| 150 | + same set in two forms. Pin them so a key added to one list and |
| 151 | + forgotten on the other fails here rather than silently skipping |
| 152 | + coverage in ``test_passthrough_dropped_when_no_crs``.""" |
| 153 | + case_keys = {c[0] for c in _PASSTHROUGH_CASES} |
| 154 | + assert case_keys == set(_ALL_PASSTHROUGH_KEYS), ( |
| 155 | + f"_PASSTHROUGH_CASES and _ALL_PASSTHROUGH_KEYS diverge.\n" |
| 156 | + f" only in cases: {sorted(case_keys - set(_ALL_PASSTHROUGH_KEYS))}\n" |
| 157 | + f" only in keys : {sorted(set(_ALL_PASSTHROUGH_KEYS) - case_keys)}" |
| 158 | + ) |
| 159 | + |
| 160 | + |
| 161 | +@pytest.mark.parametrize( |
| 162 | + 'key,crs,value,expected', |
| 163 | + _PASSTHROUGH_CASES, |
| 164 | + ids=[c[0] for c in _PASSTHROUGH_CASES], |
| 165 | +) |
| 166 | +def test_passthrough_key_roundtrip(tmp_path, key, crs, value, expected): |
| 167 | + """Lock per-key round-trip outcome for the pass-through attr tier.""" |
| 168 | + attrs = {} |
| 169 | + if value is not None: |
| 170 | + attrs[key] = value |
| 171 | + # For colormap-derived keys we still need an attrs payload that |
| 172 | + # actually puts a colormap tag on disk; setting ``colormap`` itself |
| 173 | + # does that, so probe ``colormap_rgba`` / ``cmap`` via a separate |
| 174 | + # write that does NOT include a colormap. The contract for those two |
| 175 | + # keys is "absent on round-trip unless Photometric=3", and the |
| 176 | + # writer never selects Photometric=3 from attrs alone. |
| 177 | + da = _make_da(crs=crs, attrs=attrs) |
| 178 | + # Single-band uint8 needed for the colormap tag to be valid in TIFF. |
| 179 | + if key == 'colormap': |
| 180 | + da = _make_da(crs=crs, attrs=attrs, dtype=np.uint8) |
| 181 | + |
| 182 | + rd = _roundtrip(tmp_path, da, name=f'{key}.tif') |
| 183 | + |
| 184 | + if expected == 'reconstructible': |
| 185 | + assert key in rd.attrs, ( |
| 186 | + f"pass-through key {key!r} was expected to round-trip but is " |
| 187 | + f"absent. attrs keys present: {sorted(rd.attrs.keys())}" |
| 188 | + ) |
| 189 | + if value is not None: |
| 190 | + got = rd.attrs[key] |
| 191 | + if isinstance(value, tuple): |
| 192 | + assert tuple(got) == value, ( |
| 193 | + f"{key!r}: value mismatch on round-trip\n" |
| 194 | + f" written: {value}\n" |
| 195 | + f" read : {got}" |
| 196 | + ) |
| 197 | + else: |
| 198 | + assert got == value, ( |
| 199 | + f"{key!r}: value mismatch on round-trip\n" |
| 200 | + f" written: {value!r}\n" |
| 201 | + f" read : {got!r}" |
| 202 | + ) |
| 203 | + else: # 'dropped' |
| 204 | + assert key not in rd.attrs, ( |
| 205 | + f"pass-through key {key!r} was expected to drop on round-trip " |
| 206 | + f"(writer does not emit a tag the reader can rebuild it from) " |
| 207 | + f"but it is present with value {rd.attrs[key]!r}. If a writer " |
| 208 | + f"change started emitting this key, decide whether to promote " |
| 209 | + f"the key to canonical (issue #1984) and update this test." |
| 210 | + ) |
| 211 | + |
| 212 | + |
| 213 | +def test_passthrough_dropped_when_no_crs(tmp_path): |
| 214 | + """Files without a CRS do not surface any pass-through attrs.""" |
| 215 | + da = _make_da(crs=None) |
| 216 | + rd = _roundtrip(tmp_path, da, name='no_crs.tif') |
| 217 | + |
| 218 | + present = sorted(k for k in _ALL_PASSTHROUGH_KEYS if k in rd.attrs) |
| 219 | + assert present == [], ( |
| 220 | + f"pass-through keys leaked into a no-CRS round-trip: {present}. " |
| 221 | + f"All keys present: {sorted(rd.attrs.keys())}" |
| 222 | + ) |
| 223 | + # Sanity: no CRS attrs either. |
| 224 | + assert 'crs' not in rd.attrs |
| 225 | + assert 'crs_wkt' not in rd.attrs |
| 226 | + |
| 227 | + |
| 228 | +def test_passthrough_does_not_promote_to_canonical(tmp_path): |
| 229 | + """Setting pass-through attrs without a CRS must not inject a CRS.""" |
| 230 | + # Mix of GeoKey-derived and tag-derived pass-through keys, but no |
| 231 | + # ``crs`` / ``crs_wkt``. If the writer ever started inferring a CRS |
| 232 | + # from these (e.g. picking 4326 because angular_units == 'degree') |
| 233 | + # this test would fail. |
| 234 | + attrs = { |
| 235 | + 'crs_name': 'WGS 84', |
| 236 | + 'geog_citation': 'WGS 84', |
| 237 | + 'angular_units': 'degree', |
| 238 | + 'linear_units': 'metre', |
| 239 | + 'semi_major_axis': 6378137.0, |
| 240 | + 'inv_flattening': 298.257223563, |
| 241 | + 'datum_code': 6326, |
| 242 | + } |
| 243 | + da = _make_da(crs=None, attrs=attrs) |
| 244 | + |
| 245 | + with warnings.catch_warnings(): |
| 246 | + # The writer warns on user-defined-CRS WKT writes; we are not on |
| 247 | + # that path here, but suppress generously so a future warning |
| 248 | + # tweak does not turn this test into a warning regression test. |
| 249 | + warnings.simplefilter('ignore') |
| 250 | + rd = _roundtrip(tmp_path, da, name='no_crs_with_attrs.tif') |
| 251 | + |
| 252 | + assert 'crs' not in rd.attrs, ( |
| 253 | + f"pass-through attrs caused the writer to synthesise a CRS: " |
| 254 | + f"crs={rd.attrs.get('crs')!r}. The contract says pass-through " |
| 255 | + f"attrs are advisory only; the writer must rely on attrs['crs'] " |
| 256 | + f"or attrs['crs_wkt'] to emit georeferencing." |
| 257 | + ) |
| 258 | + assert 'crs_wkt' not in rd.attrs, ( |
| 259 | + f"pass-through attrs caused the writer to synthesise a CRS WKT: " |
| 260 | + f"crs_wkt={rd.attrs.get('crs_wkt')!r}." |
| 261 | + ) |
0 commit comments