Skip to content

Commit 139e8f2

Browse files
authored
geotiff: stamp _xrspatial_geotiff_contract=1 on every read (#1984) (#2003)
* geotiff: stamp _xrspatial_geotiff_contract=1 on every read (#1984) PR 3 of 7 on issue #1984. Adds a contract-version marker attr (``_xrspatial_geotiff_contract``) to every DataArray returned by an xrspatial geotiff read path so downstream code can identify which attrs-contract revision produced an array. The value lives as a module-level constant ``_ATTRS_CONTRACT_VERSION`` in ``_attrs.py``. The eager numpy, dask+numpy, GPU, dask+GPU, and the COG/HTTP path all funnel through ``_populate_attrs_from_geo_info``, so one stamp there covers four backends. The VRT backends in ``_backends/vrt.py`` build their attrs dict directly and stamp the version inline; both the eager and chunked VRT paths reuse the same constant so the value stays in lockstep when it is later bumped. Adds ``test_attrs_contract_version_1984.py`` with one assertion per read path (eager, dask, GPU, dask+GPU, VRT eager, VRT chunked) plus a pin on the constant value. * geotiff: address #2003 review nits on contract version stamp - _attrs.py docstring: noted that the stamp overwrites any pre-existing value on the passed-in attrs dict; callers pass freshly built dicts. - _backends/vrt.py (eager + chunked): added inline comments pointing at _populate_attrs_from_geo_info as the canonical stamp site, so future maintainers know why the helper is bypassed in the VRT path.
1 parent 4bd3e9b commit 139e8f2

3 files changed

Lines changed: 169 additions & 2 deletions

File tree

xrspatial/geotiff/_attrs.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@
114114
_TIFF_SHORT = 3
115115

116116

117+
# Contract version emitted on every read; bumped when the attrs contract
118+
# changes. Downstream code reads ``attrs['_xrspatial_geotiff_contract']``
119+
# to learn which attrs-contract revision produced the array. See issue
120+
# #1984 and ``docs/source/user_guide/attrs_contract.rst``.
121+
_ATTRS_CONTRACT_VERSION = 1
122+
123+
117124
# String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids.
118125
_RESOLUTION_UNIT_IDS = {'none': 1, 'inch': 2, 'centimeter': 3}
119126

@@ -162,7 +169,19 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None
162169
the outer window through this helper so the resulting DataArray
163170
advertises the windowed transform. The GPU path does not currently
164171
expose a windowed read, so it passes ``window=None``.
172+
173+
``attrs['_xrspatial_geotiff_contract']`` is stamped unconditionally
174+
as the first step. Any pre-existing value on the passed-in dict is
175+
overwritten with the current ``_ATTRS_CONTRACT_VERSION``; callers
176+
pass freshly built dicts, so this is the intended behaviour.
165177
"""
178+
# Stamp the contract version first so every read path that funnels
179+
# through this helper carries the marker. The VRT backends build
180+
# their attrs dict directly and stamp the version there (see
181+
# ``_backends/vrt.py``); keep both sites in sync via the constant
182+
# rather than the bare literal.
183+
attrs['_xrspatial_geotiff_contract'] = _ATTRS_CONTRACT_VERSION
184+
166185
if geo_info.crs_epsg is not None:
167186
attrs['crs'] = geo_info.crs_epsg
168187
if geo_info.crs_wkt is not None:

xrspatial/geotiff/_backends/vrt.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import numpy as np
1414
import xarray as xr
1515

16+
from .._attrs import _ATTRS_CONTRACT_VERSION
1617
from .._coords import (
1718
coords_from_pixel_geometry as _coords_from_pixel_geometry,
1819
transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry,
@@ -193,7 +194,10 @@ def read_vrt(source: str, *,
193194
else:
194195
coords = {}
195196

196-
attrs = {}
197+
# VRT builds its attrs dict inline rather than going through
198+
# ``_populate_attrs_from_geo_info``; stamp the contract version here
199+
# so both code paths emit the same marker.
200+
attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION}
197201
if vrt.crs_wkt:
198202
epsg = _wkt_to_epsg(vrt.crs_wkt)
199203
if epsg is not None:
@@ -562,7 +566,10 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype,
562566
# eager reads share the same x/y arrays.
563567
gt = vrt.geo_transform
564568
coords = {}
565-
attrs = {}
569+
# Mirrors the eager VRT branch: this code path bypasses
570+
# ``_populate_attrs_from_geo_info``, so the contract version is
571+
# stamped inline using the shared constant to stay in lockstep.
572+
attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION}
566573
if gt is not None:
567574
origin_x, res_x, _, origin_y, _, res_y = gt
568575
coord_window = (win_r0, win_c0, win_r0 + full_h, win_c0 + full_w)
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
"""Contract-version marker tests for issue #1984.
2+
3+
PR 3 of the 7-PR plan attached to issue #1984 stamps every DataArray
4+
returned by an xrspatial geotiff read path with
5+
``attrs['_xrspatial_geotiff_contract'] = 1``. Downstream code reads
6+
this marker to learn which attrs-contract revision produced the array.
7+
8+
The stamp must appear on every backend:
9+
10+
* eager numpy (``open_geotiff``)
11+
* dask + numpy (``open_geotiff(chunks=...)`` / ``read_geotiff_dask``)
12+
* cupy / GPU (``open_geotiff(gpu=True)`` / ``read_geotiff_gpu``)
13+
* dask + cupy (``open_geotiff(gpu=True, chunks=...)``)
14+
* VRT eager (``read_vrt``)
15+
* VRT dask chunked (``read_vrt(chunks=...)``)
16+
17+
The fixture style mirrors ``test_attrs_parity_1548.py``: build a small
18+
on-disk TIFF (and a small VRT pointing at one) inside ``tmp_path``,
19+
open it through each backend, and assert on the resulting attrs.
20+
"""
21+
from __future__ import annotations
22+
23+
import importlib.util
24+
import os
25+
26+
import numpy as np
27+
import pytest
28+
29+
from xrspatial.geotiff import open_geotiff, read_vrt
30+
from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION
31+
32+
tifffile = pytest.importorskip("tifffile")
33+
34+
35+
_CONTRACT_KEY = '_xrspatial_geotiff_contract'
36+
37+
38+
def _gpu_available() -> bool:
39+
if importlib.util.find_spec("cupy") is None:
40+
return False
41+
try:
42+
import cupy
43+
return bool(cupy.cuda.is_available())
44+
except Exception:
45+
return False
46+
47+
48+
_HAS_GPU = _gpu_available()
49+
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
50+
51+
52+
def _write_small_tiff(path):
53+
"""Write a small tiled float32 TIFF used by every read-path assertion."""
54+
arr = np.arange(64 * 64, dtype=np.float32).reshape(64, 64)
55+
tifffile.imwrite(
56+
path, arr, photometric='minisblack', planarconfig='contig',
57+
tile=(32, 32), compression='deflate', metadata=None,
58+
)
59+
return arr
60+
61+
62+
def _write_minimal_vrt(vrt_path, source_name, *, height, width):
63+
"""Write a VRT that references ``source_name`` as a single-band source."""
64+
vrt_path.write_text(
65+
f'<VRTDataset rasterXSize="{width}" rasterYSize="{height}">\n'
66+
' <VRTRasterBand dataType="Float32" band="1">\n'
67+
' <SimpleSource>\n'
68+
f' <SourceFilename relativeToVRT="1">{source_name}'
69+
'</SourceFilename>\n'
70+
' <SourceBand>1</SourceBand>\n'
71+
f' <SrcRect xOff="0" yOff="0" xSize="{width}" ySize="{height}"/>\n'
72+
f' <DstRect xOff="0" yOff="0" xSize="{width}" ySize="{height}"/>\n'
73+
' </SimpleSource>\n'
74+
' </VRTRasterBand>\n'
75+
'</VRTDataset>\n'
76+
)
77+
78+
79+
def test_attrs_contract_version_constant_is_one():
80+
"""Pin the integer value so a careless bump shows up here first."""
81+
assert _ATTRS_CONTRACT_VERSION == 1
82+
83+
84+
def test_eager_numpy_stamps_contract_version(tmp_path):
85+
path = str(tmp_path / "contract_v1_eager.tif")
86+
_write_small_tiff(path)
87+
88+
da = open_geotiff(path)
89+
90+
assert da.attrs[_CONTRACT_KEY] == 1
91+
92+
93+
def test_dask_numpy_stamps_contract_version(tmp_path):
94+
path = str(tmp_path / "contract_v1_dask.tif")
95+
_write_small_tiff(path)
96+
97+
da = open_geotiff(path, chunks=32)
98+
99+
assert da.attrs[_CONTRACT_KEY] == 1
100+
101+
102+
@_gpu_only
103+
def test_gpu_stamps_contract_version(tmp_path):
104+
path = str(tmp_path / "contract_v1_gpu.tif")
105+
_write_small_tiff(path)
106+
107+
da = open_geotiff(path, gpu=True)
108+
109+
assert da.attrs[_CONTRACT_KEY] == 1
110+
111+
112+
@_gpu_only
113+
def test_dask_gpu_stamps_contract_version(tmp_path):
114+
path = str(tmp_path / "contract_v1_dask_gpu.tif")
115+
_write_small_tiff(path)
116+
117+
da = open_geotiff(path, gpu=True, chunks=32)
118+
119+
assert da.attrs[_CONTRACT_KEY] == 1
120+
121+
122+
def test_vrt_eager_stamps_contract_version(tmp_path):
123+
src = tmp_path / "contract_v1_vrt_source.tif"
124+
_write_small_tiff(str(src))
125+
vrt = tmp_path / "contract_v1_vrt_eager.vrt"
126+
_write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64)
127+
128+
da = read_vrt(str(vrt))
129+
130+
assert da.attrs[_CONTRACT_KEY] == 1
131+
132+
133+
def test_vrt_chunked_stamps_contract_version(tmp_path):
134+
src = tmp_path / "contract_v1_vrt_chunked_source.tif"
135+
_write_small_tiff(str(src))
136+
vrt = tmp_path / "contract_v1_vrt_chunked.vrt"
137+
_write_minimal_vrt(vrt, os.path.basename(src), height=64, width=64)
138+
139+
da = read_vrt(str(vrt), chunks=32)
140+
141+
assert da.attrs[_CONTRACT_KEY] == 1

0 commit comments

Comments
 (0)