Skip to content

Commit 9cd54f6

Browse files
authored
Add consolidated VRT missing_sources policy matrix tests (#2367) (#2372)
* Add consolidated missing_sources policy matrix tests (#2367) Cover the four-by-two contract matrix for VRT missing_sources in one file: default / explicit-raise / warn / invalid, each exercised against both the eager read_vrt path and the dask open_geotiff(.vrt, chunks=) path. Assertions cover exception types, warning class, message text, and output array values (NaN fill on the missing half, PRESENT_FILL on the present half). Complements the existing 1799 / 1843 / 1860 / 2265 tests by keeping the full matrix together so a future kwarg refactor that drops eager/dask parity regresses a single focused file. Work item for epic #2342. * Address review nit: use pytest.warns(match=) for warn-path style parity with #1799 (#2367)
1 parent c6f671e commit 9cd54f6

1 file changed

Lines changed: 267 additions & 0 deletions

File tree

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
"""Consolidated VRT ``missing_sources`` policy matrix (#2367, work item of #2342).
2+
3+
This file complements ``test_vrt_missing_sources_policy_1799.py`` and
4+
``test_vrt_chunked_missing_sources_1799.py`` by covering the full
5+
release contract in one place: every policy value (default,
6+
``'raise'``, ``'warn'``, invalid) is exercised against both read paths
7+
(eager ``read_vrt`` and dask ``open_geotiff(..., chunks=...)``), with
8+
assertions on the exception or warning type, the message text, and the
9+
actual output array values where applicable.
10+
11+
The existing 1799 / 1843 / 2265 tests pin individual cases. This file
12+
keeps the four-by-two matrix together so a future kwarg refactor that
13+
silently drops parity between the eager and chunked paths regresses a
14+
single, focused test file.
15+
16+
Release contract (see ``_backends/vrt.py:206`` docstring):
17+
18+
* ``'raise'`` is the default since #1860.
19+
* ``'raise'`` fails fast with ``FileNotFoundError`` naming the missing
20+
source path. The chunked path raises at build time (#2265) so a
21+
partial mosaic never surfaces silently from a delayed compute.
22+
* ``'warn'`` is the explicit opt-in. It emits
23+
``GeoTIFFFallbackWarning`` naming the missing source and returns the
24+
mosaic with NaN (or the band's nodata sentinel) in the corresponding
25+
region. ``attrs['vrt_holes']`` records the affected source(s).
26+
* Any other value raises ``ValueError`` naming the bad kwarg.
27+
"""
28+
from __future__ import annotations
29+
30+
import os
31+
import warnings
32+
33+
import numpy as np
34+
import pytest
35+
import xarray as xr
36+
37+
from xrspatial.geotiff import (
38+
GeoTIFFFallbackWarning,
39+
open_geotiff,
40+
read_vrt,
41+
to_geotiff,
42+
)
43+
44+
45+
PRESENT_FILL = 7.0
46+
47+
48+
def _build_partial_vrt(tmp_path) -> tuple[str, str, str]:
49+
"""Build a 2-source VRT: left half is real, right half points at a
50+
non-existent file.
51+
52+
Returns ``(vrt_path, present_src_path, missing_path)``. Filenames
53+
embed issue #2367 to keep parallel test runs from colliding on
54+
shared tmp roots.
55+
"""
56+
src = os.path.join(tmp_path, "src_2367_present.tif")
57+
arr = np.full((4, 4), PRESENT_FILL, dtype=np.float32)
58+
da = xr.DataArray(
59+
arr, dims=("y", "x"),
60+
attrs={"transform": (1.0, 0.0, 0.0, 0.0, -1.0, 0.0)},
61+
)
62+
to_geotiff(da, src)
63+
64+
missing = os.path.join(tmp_path, "missing_2367.tif")
65+
vrt_path = os.path.join(tmp_path, "partial_2367.vrt")
66+
with open(vrt_path, "w") as f:
67+
f.write(
68+
'<VRTDataset rasterXSize="8" rasterYSize="4">\n'
69+
'<GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform>\n'
70+
'<VRTRasterBand dataType="Float32" band="1">\n'
71+
'<SimpleSource>\n'
72+
f'<SourceFilename relativeToVRT="0">{src}</SourceFilename>\n'
73+
'<SourceBand>1</SourceBand>\n'
74+
'<SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>\n'
75+
'<DstRect xOff="0" yOff="0" xSize="4" ySize="4"/>\n'
76+
'</SimpleSource>\n'
77+
'<SimpleSource>\n'
78+
f'<SourceFilename relativeToVRT="0">{missing}</SourceFilename>\n'
79+
'<SourceBand>1</SourceBand>\n'
80+
'<SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>\n'
81+
'<DstRect xOff="4" yOff="0" xSize="4" ySize="4"/>\n'
82+
'</SimpleSource>\n'
83+
'</VRTRasterBand>\n'
84+
'</VRTDataset>\n'
85+
)
86+
return vrt_path, src, missing
87+
88+
89+
# ---------------------------------------------------------------------------
90+
# Reader-path fixtures. Each "reader" callable accepts ``(source,
91+
# **kwargs)`` and returns a DataArray. The eager reader returns a numpy-
92+
# backed array; the dask reader returns a chunked DataArray that still
93+
# needs ``.compute()`` to materialise values.
94+
# ---------------------------------------------------------------------------
95+
96+
def _eager_reader(source, **kwargs):
97+
return read_vrt(source, **kwargs)
98+
99+
100+
def _dask_reader(source, **kwargs):
101+
# ``open_geotiff`` routes ``.vrt`` to ``read_vrt`` and forwards
102+
# ``chunks=`` / ``missing_sources=`` unchanged. Using a small chunk
103+
# size keeps the partial mosaic split across multiple tasks so the
104+
# lazy path is genuinely exercised.
105+
return open_geotiff(source, chunks=4, **kwargs)
106+
107+
108+
READERS = [
109+
pytest.param(_eager_reader, id="eager_read_vrt"),
110+
pytest.param(_dask_reader, id="dask_open_geotiff_chunks"),
111+
]
112+
113+
114+
# ---------------------------------------------------------------------------
115+
# Default policy: no kwarg -> raises.
116+
# ---------------------------------------------------------------------------
117+
118+
class TestDefaultPolicyRaises:
119+
"""No ``missing_sources`` kwarg -> ``FileNotFoundError`` naming the
120+
missing source. This is the public default since #1860 and the
121+
release matrix in #2342 calls it out as a hard contract."""
122+
123+
@pytest.mark.parametrize("reader", READERS)
124+
def test_default_raises_filenotfound_naming_source(
125+
self, reader, tmp_path,
126+
):
127+
vrt_path, _, missing = _build_partial_vrt(str(tmp_path))
128+
with pytest.raises(FileNotFoundError) as excinfo:
129+
reader(vrt_path)
130+
# The basename of the missing source must appear in the
131+
# message. The chunked path quotes the full path; the eager
132+
# path may quote just the source filename or the resolved
133+
# absolute path depending on which guard fires first. Match on
134+
# the basename to stay portable across both.
135+
assert "missing_2367.tif" in str(excinfo.value), (
136+
f"Default policy raise must name the missing source. "
137+
f"Got: {excinfo.value!r}"
138+
)
139+
140+
141+
# ---------------------------------------------------------------------------
142+
# Explicit raise: same shape as default.
143+
# ---------------------------------------------------------------------------
144+
145+
class TestExplicitRaisePolicy:
146+
"""``missing_sources='raise'`` passed explicitly must behave the
147+
same as the default. Pins that an explicit opt-in does not
148+
accidentally route through a separate code branch."""
149+
150+
@pytest.mark.parametrize("reader", READERS)
151+
def test_explicit_raise_matches_default(self, reader, tmp_path):
152+
vrt_path, _, _ = _build_partial_vrt(str(tmp_path))
153+
with pytest.raises(FileNotFoundError) as excinfo:
154+
reader(vrt_path, missing_sources="raise")
155+
assert "missing_2367.tif" in str(excinfo.value)
156+
157+
158+
# ---------------------------------------------------------------------------
159+
# Warn opt-in: warning class, message, and output values all pinned.
160+
# ---------------------------------------------------------------------------
161+
162+
class TestWarnPolicyEmitsWarningAndFillsNodata:
163+
"""``missing_sources='warn'`` is the lenient opt-in.
164+
165+
Three things to lock in:
166+
167+
1. The warning class is ``GeoTIFFFallbackWarning`` (not a bare
168+
``UserWarning``) and the message names the missing source.
169+
2. ``attrs['vrt_holes']`` records the affected source.
170+
3. The returned array shows ``PRESENT_FILL`` on the present half
171+
and NaN on the missing half. The eager path materialises this
172+
immediately; the chunked path needs ``.compute()`` and emits the
173+
warning at compute time rather than build time, but the
174+
resulting array values must match.
175+
"""
176+
177+
def test_eager_warn_emits_and_fills(self, tmp_path):
178+
vrt_path, _, missing = _build_partial_vrt(str(tmp_path))
179+
# Use ``match=`` for the class + message check in one step,
180+
# matching the sibling 1799 test's style.
181+
with pytest.warns(
182+
GeoTIFFFallbackWarning, match="missing_2367.tif",
183+
):
184+
da = read_vrt(vrt_path, missing_sources="warn")
185+
186+
# vrt_holes attr is populated and points at the missing file.
187+
assert "vrt_holes" in da.attrs
188+
sources = [h["source"] for h in da.attrs["vrt_holes"]]
189+
assert any(s.endswith("missing_2367.tif") for s in sources)
190+
191+
# Output values: present half == 7.0, missing half == NaN.
192+
out = np.asarray(da)
193+
np.testing.assert_array_equal(
194+
out[:, :4], np.full((4, 4), PRESENT_FILL, dtype=np.float32),
195+
)
196+
assert np.all(np.isnan(out[:, 4:])), (
197+
"Lenient policy must leave the missing region as NaN on "
198+
"float bands."
199+
)
200+
201+
def test_dask_warn_emits_at_compute_and_fills(self, tmp_path):
202+
vrt_path, _, missing = _build_partial_vrt(str(tmp_path))
203+
# Build the lazy DataArray. The parse-time sweep populates
204+
# ``vrt_holes`` here without forcing a decode.
205+
da = open_geotiff(
206+
vrt_path, chunks=4, missing_sources="warn",
207+
)
208+
assert "vrt_holes" in da.attrs, (
209+
"Chunked warn path must populate vrt_holes at build so "
210+
"callers can branch on partial mosaics without computing."
211+
)
212+
213+
with warnings.catch_warnings(record=True) as caught:
214+
warnings.simplefilter("always")
215+
computed = da.compute()
216+
217+
msgs = [
218+
str(w.message) for w in caught
219+
if isinstance(w.message, GeoTIFFFallbackWarning)
220+
]
221+
assert any("missing_2367.tif" in m for m in msgs), (
222+
f"Chunked warn path must emit GeoTIFFFallbackWarning at "
223+
f"compute naming the missing source; got: {msgs!r}"
224+
)
225+
226+
out = np.asarray(computed)
227+
np.testing.assert_array_equal(
228+
out[:, :4], np.full((4, 4), PRESENT_FILL, dtype=np.float32),
229+
)
230+
assert np.all(np.isnan(out[:, 4:]))
231+
232+
233+
# ---------------------------------------------------------------------------
234+
# Invalid policy strings.
235+
# ---------------------------------------------------------------------------
236+
237+
class TestInvalidPolicyRejected:
238+
"""Garbage values for ``missing_sources`` raise ``ValueError`` at
239+
the public-API boundary. The message must name the bad value so
240+
typos like ``'raises'`` surface clearly.
241+
242+
Sanity for the chunked path too: the same value-validation block
243+
runs before ``_read_vrt_chunked`` dispatches, so the eager and
244+
chunked invocations both reject identically."""
245+
246+
@pytest.mark.parametrize("reader", READERS)
247+
@pytest.mark.parametrize(
248+
"bad_value", ["ignore", "RAISE", "raises", "", "warn ", "1"],
249+
)
250+
def test_invalid_policy_raises_value_error_naming_value(
251+
self, reader, bad_value, tmp_path,
252+
):
253+
vrt_path, _, _ = _build_partial_vrt(str(tmp_path))
254+
with pytest.raises(ValueError) as excinfo:
255+
reader(vrt_path, missing_sources=bad_value)
256+
msg = str(excinfo.value)
257+
assert "missing_sources" in msg, (
258+
f"ValueError must name the kwarg; got {msg!r}"
259+
)
260+
# The current implementation quotes the bad value via repr().
261+
# Use repr() here so the assertion stays robust across the few
262+
# acceptable formats (single quotes, double quotes, empty
263+
# string repr).
264+
assert repr(bad_value) in msg, (
265+
f"ValueError must echo the bad value back to the caller; "
266+
f"got {msg!r}"
267+
)

0 commit comments

Comments
 (0)