From a23fe220c2e31dad2756bb9c4e1d2b808d56ac18 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 17:35:33 -0700 Subject: [PATCH 1/4] Centralise VRT capability validator at both read entry points (#2371) Extend ``validate_parsed_vrt`` to cover nested VRT references, per-source mask-band semantics, and dataset / band-level ```` blocks. Route the internal ``_vrt.read_vrt`` entry point through the validator so direct callers and the chunked dask path get the same capability rejections as ``_backends/vrt.read_vrt``. Expose ``validate_vrt_capability`` as a public alias on ``_vrt_validation`` matching the epic naming (the implementation continues under ``validate_parsed_vrt`` for backward compatibility). Update the legacy issue #1751 resample-alg regression tests to accept the validator's ``VRTUnsupportedError`` alongside the original ``NotImplementedError``; both encode the same contract that nearest must not be silently substituted for an unsupported algorithm. --- xrspatial/geotiff/_vrt.py | 54 +++++++++++++ xrspatial/geotiff/_vrt_validation.py | 81 +++++++++++++++++-- .../tests/test_vrt_resample_alg_1751.py | 13 ++- 3 files changed, 138 insertions(+), 10 deletions(-) diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 67f531fdf..8704f2841 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -225,6 +225,17 @@ class _Source: # matching rect sizes is a no-op and passes through. Higher-quality # resamplers are tracked for follow-up. resample_alg: str | None = None + # True when the source element declared ``true`` + # (GDAL writes this for ComplexSource entries that read through the + # source raster's per-band mask). The read pipeline ignores mask bands + # and would silently drop the per-pixel mask, so ``validate_parsed_vrt`` + # rejects sources where this flag is set. See issue #2371. + use_mask_band: bool = False + # True when the source element declared a ```` child + # (per-source mask reference). Same disposition as ``use_mask_band`` -- + # the read pipeline cannot serve the mask semantics and the validator + # rejects the VRT. See issue #2371. + has_mask_source: bool = False @dataclass @@ -381,6 +392,12 @@ def _parse_band_nodata(text: str | None, 'PixelFunctionType', 'PixelFunctionLanguage', 'PixelFunctionCode', 'PixelFunctionArguments', 'SourceTransferType', 'MaskBand', 'PansharpeningOptions', + # Defensive: a band-level ```` block is unusual + # (warp options usually sit at the dataset level or alongside the + # ``VRTWarpedRasterBand`` subClass marker), but catching it here as + # well keeps the parser symmetric with the dataset-level rejection + # in ``_UNSUPPORTED_DATASET_TAGS``. See issue #2371. + 'GDALWarpOptions', }) # Dataset-level (```` children, sibling of ````) @@ -397,6 +414,14 @@ def _parse_band_nodata(text: str | None, # Pansharpening setup at the dataset level (separate from the # band-level ). 'PansharpeningOptions', + # GDAL ```` block. The ``VRTWarpedRasterBand`` subClass + # rejection above catches the band-level marker, but a warped VRT can + # also embed the warp configuration as a dataset-level sibling block + # (or as a child of a band that does not use the subClass attribute, + # depending on how the VRT was emitted). The mosaic reader does not + # implement reprojection; silently ignoring the block would dispatch + # on the raw source pixels and skip the warp step. See issue #2371. + 'GDALWarpOptions', }) @@ -674,6 +699,21 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: # #1694 and #1751. resample_alg = _text(src_elem, 'ResampleAlg') + # ```` and ```` per-source markers + # request that the source's per-band mask drives the + # placement; the read pipeline does not honour mask bands + # and would silently drop the per-pixel mask. Capture the + # flags so ``validate_parsed_vrt`` can reject the VRT with + # an actionable message that names the offending source. + # GDAL emits ``true`` (string) + # so accept any non-empty truthy spelling. See issue #2371. + use_mask_band_str = _text(src_elem, 'UseMaskBand') + use_mask_band = ( + use_mask_band_str is not None + and use_mask_band_str.strip().lower() in ('1', 'true', 'yes') + ) + has_mask_source = src_elem.find('MaskBand') is not None + sources.append(_Source( filename=filename, band=src_band, @@ -683,6 +723,8 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: scale=scale, offset=offset, resample_alg=resample_alg, + use_mask_band=use_mask_band, + has_mask_source=has_mask_source, )) bands.append(_VRTBand( @@ -1193,6 +1235,18 @@ def read_vrt(vrt_path: str, *, window=None, xml_str = _read_vrt_xml(vrt_path) vrt_dir = os.path.dirname(os.path.abspath(vrt_path)) vrt = parse_vrt(xml_str, vrt_dir) + + # Route every fresh parse through the centralised capability + # validator before any source read. When ``parsed`` is supplied the + # caller is responsible for having validated already (the chunked + # dask path threads a pre-validated instance in via #1825, and the + # ``_backends/vrt.read_vrt`` wrapper runs the validator on the + # eager parse before dispatching). Direct callers of this internal + # entry point now get the same capability gate as the public + # backend path. See issue #2371. + if parsed is None: + from ._vrt_validation import validate_parsed_vrt + validate_parsed_vrt(vrt, source=vrt_path, mode='read') if missing_sources not in ('warn', 'raise'): raise ValueError( f"missing_sources must be 'warn' or 'raise', got " diff --git a/xrspatial/geotiff/_vrt_validation.py b/xrspatial/geotiff/_vrt_validation.py index 407ffbbb7..508eaac50 100644 --- a/xrspatial/geotiff/_vrt_validation.py +++ b/xrspatial/geotiff/_vrt_validation.py @@ -286,28 +286,85 @@ def validate_parsed_vrt( sr = src.src_rect dr = src.dst_rect - # Rule 7a: negative SrcRect size. + # Rule 6b: nested VRT. A SourceFilename ending in ``.vrt`` + # would recurse the read pipeline through a second VRT + # parse, which the mosaic reader does not implement. Catch + # the case here (not at parse time) so the validator owns + # every capability rejection and the message names both + # outer and inner VRT paths. Case-insensitive on the + # extension so ``.VRT`` (Windows-style) also trips the + # rejection. See issue #2371. + if src.filename.lower().endswith('.vrt'): + raise VRTUnsupportedError( + f"VRT '{source}' references another VRT as a source " + f"({src.filename!r}, band {band.band_num}). Nested " + f"VRTs are not a supported feature in this release; " + f"the mosaic reader assembles pixel data from " + f"GeoTIFF sources only. Materialise the inner VRT " + f"to a GeoTIFF with ``gdal_translate`` (or " + f"``xrspatial.geotiff.to_geotiff`` after reading " + f"the inner VRT separately) and reference the " + f"resulting GeoTIFF in the outer VRT instead." + ) + + # Rule 6c: complex mask / alpha source semantics. + # ``true`` and per-source + # ```` children declare that the source's + # per-band mask drives placement. The read pipeline ignores + # the mask, so the per-pixel mask would silently drop and + # the dispatched array would mis-label every masked pixel + # as valid. Reject with the source path and the offending + # flag. See issue #2371. + if src.use_mask_band: + raise VRTUnsupportedError( + f"VRT '{source}' source '{src.filename}' (band " + f"{band.band_num}) declares " + f"true. The read " + f"pipeline does not honour per-source mask bands " + f"and would silently drop the per-pixel mask, " + f"mis-labelling masked pixels as valid. Re-export " + f"the source with the mask burned into the band's " + f"nodata sentinel and drop the flag." + ) + if src.has_mask_source: + raise VRTUnsupportedError( + f"VRT '{source}' source '{src.filename}' (band " + f"{band.band_num}) declares a per-source " + f"child. The read pipeline does not honour mask " + f"bands and would silently drop the per-pixel " + f"mask. Re-export the source with the mask burned " + f"into the band's nodata sentinel and drop the " + f" child." + ) + + # Rule 7a: negative SrcRect size. Keep the "SrcRect ... + # negative size" phrasing so the legacy regex pattern in + # ``test_geotiff_vrt_srcrect_validation_1784.py`` still + # matches now that the validator preempts the per-source + # check that originally raised this message. if sr.x_size < 0 or sr.y_size < 0: raise VRTUnsupportedError( f"VRT '{source}' SimpleSource '{src.filename}' " - f"(band {band.band_num}) has negative SrcRect size " + f"(band {band.band_num}) SrcRect has negative size " f"(xSize={sr.x_size}, ySize={sr.y_size}); SrcRect " f"sizes must be non-negative." ) - # Rule 7b: negative SrcRect offset. + # Rule 7b: negative SrcRect offset. Same phrasing rationale + # as Rule 7a (legacy regex match). if sr.x_off < 0 or sr.y_off < 0: raise VRTUnsupportedError( f"VRT '{source}' SimpleSource '{src.filename}' " - f"(band {band.band_num}) has negative SrcRect offset " + f"(band {band.band_num}) SrcRect has negative offset " f"(xOff={sr.x_off}, yOff={sr.y_off}); SrcRect " f"offsets must be non-negative." ) - # Rule 8a: negative DstRect size. + # Rule 8a: negative DstRect size. Same phrasing rationale + # as Rule 7a (legacy regex match). if dr.x_size < 0 or dr.y_size < 0: raise VRTUnsupportedError( f"VRT '{source}' SimpleSource '{src.filename}' " - f"(band {band.band_num}) has negative DstRect size " + f"(band {band.band_num}) DstRect has negative size " f"(xSize={dr.x_size}, ySize={dr.y_size}); DstRect " f"sizes must be non-negative." ) @@ -355,8 +412,16 @@ def validate_parsed_vrt( f"resampling; substituting nearest would silently " f"mislabel the output. Re-export with " f"Nearest or matching " - f"SrcRect/DstRect sizes." + f"SrcRect/DstRect sizes. See issue #1751." ) -__all__ = ['validate_parsed_vrt'] +# Public alias matching the issue #2371 / epic #2342 naming. The +# implementation continues to live under ``validate_parsed_vrt`` for +# backward compatibility with the ``_backends/vrt.py`` call sites and +# the existing test files; new call sites should prefer the +# capability-validator spelling. +validate_vrt_capability = validate_parsed_vrt + + +__all__ = ['validate_parsed_vrt', 'validate_vrt_capability'] diff --git a/xrspatial/geotiff/tests/test_vrt_resample_alg_1751.py b/xrspatial/geotiff/tests/test_vrt_resample_alg_1751.py index fd980f228..944042835 100644 --- a/xrspatial/geotiff/tests/test_vrt_resample_alg_1751.py +++ b/xrspatial/geotiff/tests/test_vrt_resample_alg_1751.py @@ -17,9 +17,18 @@ import numpy as np import pytest +from xrspatial.geotiff._errors import VRTUnsupportedError from xrspatial.geotiff._vrt import read_vrt from xrspatial.geotiff._writer import write +# Accept either the historical ``NotImplementedError`` raised by the +# placement-site ``_check_resample_alg_supported`` gate or the newer +# ``VRTUnsupportedError`` raised by the centralised +# ``validate_parsed_vrt`` once it preempts the placement check (see +# issue #2371). Both encode the same contract: nearest must not be +# silently substituted for an unsupported alg. +_UNSUPPORTED_RESAMPLE_EXC = (NotImplementedError, VRTUnsupportedError) + def _write_src(tmp_path) -> str: """Write a 4x4 uint16 source TIFF and return its path.""" @@ -69,7 +78,7 @@ def test_unsupported_resample_alg_raises(tmp_path, alg): alg_elem=f'{alg}') vrt_path = _write_vrt(tmp_path, xml, f'{alg.lower()}.vrt') - with pytest.raises(NotImplementedError) as excinfo: + with pytest.raises(_UNSUPPORTED_RESAMPLE_EXC) as excinfo: read_vrt(vrt_path) msg = str(excinfo.value) assert alg in msg @@ -84,7 +93,7 @@ def test_unsupported_resample_alg_case_insensitive(tmp_path): alg_elem='bilinear') vrt_path = _write_vrt(tmp_path, xml, 'lower.vrt') - with pytest.raises(NotImplementedError, match='bilinear'): + with pytest.raises(_UNSUPPORTED_RESAMPLE_EXC, match='bilinear'): read_vrt(vrt_path) From 00f948c8e5626904d95af71bae4ec1621d2d39b4 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 17:39:08 -0700 Subject: [PATCH 2/4] Add unit tests for new VRT capability rejections (#2371) Cover nested VRT, dataset-level and band-level , per-source and children, and the now-typed resample-alg rejection at the internal entry point. Each rejection is exercised at validator-direct, public _backends/vrt.read_vrt, internal _vrt.read_vrt, and open_geotiff entry points so the wiring is covered. --- .../test_vrt_capability_validator_2371.py | 495 ++++++++++++++++++ 1 file changed, 495 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py diff --git a/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py b/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py new file mode 100644 index 000000000..a84e3e649 --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py @@ -0,0 +1,495 @@ +"""Regression tests for issue #2371 (sub-task of epic #2342). + +The centralised VRT capability validator +(``xrspatial.geotiff._vrt_validation.validate_parsed_vrt``, exposed as +``validate_vrt_capability``) now covers four additional rejection +paths and is wired into both the internal ``_vrt.read_vrt`` and the +public ``_backends/vrt.read_vrt`` entry points. The four paths: + +1. Nested VRTs: a ``.vrt`` referenced as a ``SourceFilename`` inside + another VRT. +2. Warped VRTs declaring a ```` block at the dataset + or band level (the band-level ``subClass="VRTWarpedRasterBand"`` + marker is already rejected by the existing parse-time subclass + check). +3. Resample algorithm beyond nearest when SrcRect and DstRect sizes + differ (extended from ``_check_resample_alg_supported`` so the + chunked path also rejects at graph-build time). +4. Complex mask / alpha source semantics: per-source + ``true`` flags and per-source + ```` children. + +Each test asserts the rejection fires at validator time (before any +source decode) and that the message names the offending source path +or feature so a caller can locate the bad source without re-parsing +the VRT XML themselves. +""" +from __future__ import annotations + +import os + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._backends.vrt import read_vrt as _public_read_vrt +from xrspatial.geotiff._errors import ( + GeoTIFFAmbiguousMetadataError, + UnsupportedGeoTIFFFeatureError, + VRTUnsupportedError, +) +from xrspatial.geotiff._vrt import parse_vrt +from xrspatial.geotiff._vrt import read_vrt as _internal_read_vrt +from xrspatial.geotiff._vrt_validation import ( + validate_parsed_vrt, + validate_vrt_capability, +) +from xrspatial.geotiff._writer import write + + +# --------------------------------------------------------------------------- +# Test fixtures +# --------------------------------------------------------------------------- + + +def _write_src(tmp_path, name: str = 'src_2371.tif', + shape=(4, 4), dtype=np.uint16) -> str: + """Write a small source TIFF and return its path.""" + arr = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape) + p = str(tmp_path / name) + write(arr, p, compression='none', tiled=False) + return p + + +def _write_vrt(tmp_path, xml: str, name: str = 'mosaic_2371.vrt') -> str: + """Write a VRT XML to disk and return its path.""" + p = str(tmp_path / name) + with open(p, 'w') as f: + f.write(xml) + return p + + +def _parse(tmp_path, xml: str, name: str = 'mosaic_2371.vrt'): + """Write + parse a VRT XML. Returns ``(path, parsed)``.""" + path = _write_vrt(tmp_path, xml, name) + parsed = parse_vrt(xml, os.path.dirname(os.path.abspath(path))) + return path, parsed + + +# --------------------------------------------------------------------------- +# Public-alias contract +# --------------------------------------------------------------------------- + + +def test_validate_vrt_capability_alias_resolves_to_validate_parsed_vrt(): + """``validate_vrt_capability`` is the public alias matching the + issue text. It must resolve to the same underlying callable as + ``validate_parsed_vrt`` so both names share one implementation.""" + assert validate_vrt_capability is validate_parsed_vrt + + +# --------------------------------------------------------------------------- +# Rule: nested VRT (a .vrt referenced as a SourceFilename) +# --------------------------------------------------------------------------- + + +def _nested_vrt_xml(inner_vrt_path: str) -> str: + return f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {inner_vrt_path} + 1 + + + + +""" + + +def test_nested_vrt_rejected_at_validator(tmp_path): + """A ``SimpleSource`` referencing another ``.vrt`` file must raise + ``VRTUnsupportedError`` at validate time with both VRT paths in the + message.""" + # Build an inner VRT that on its own is well-formed, then build an + # outer VRT that references it as a source. + src_path = _write_src(tmp_path) + inner_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + + +""" + inner_path = _write_vrt(tmp_path, inner_xml, 'inner_2371.vrt') + + outer_path, parsed = _parse( + tmp_path, _nested_vrt_xml(inner_path), 'outer_2371.vrt' + ) + + with pytest.raises(VRTUnsupportedError) as excinfo: + validate_parsed_vrt(parsed, source=outer_path, mode='read') + msg = str(excinfo.value) + # Outer path appears as the failing VRT. + assert outer_path in msg + # Inner path is named so the caller can locate the bad source. + assert inner_path in msg + # Message names the failure mode. + assert 'Nested' in msg or 'nested' in msg + + +def test_nested_vrt_uppercase_extension_rejected(tmp_path): + """``.VRT`` (uppercase) trips the same rejection: extension matching + must be case-insensitive so Windows-style emitters are caught.""" + src_path = _write_src(tmp_path) + inner_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + + +""" + inner_path = _write_vrt(tmp_path, inner_xml, 'INNER_2371.VRT') + + outer_path, parsed = _parse( + tmp_path, _nested_vrt_xml(inner_path), 'outer_upper_2371.vrt' + ) + with pytest.raises(VRTUnsupportedError, match='[Nn]ested'): + validate_parsed_vrt(parsed, source=outer_path, mode='read') + + +def test_nested_vrt_rejected_via_public_read_vrt(tmp_path): + """The public ``_backends/vrt.read_vrt`` entry point must surface + the same rejection as the direct validator call.""" + src_path = _write_src(tmp_path) + inner_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + + +""" + inner_path = _write_vrt(tmp_path, inner_xml, 'inner_pub_2371.vrt') + outer_path = _write_vrt( + tmp_path, + _nested_vrt_xml(inner_path), + 'outer_pub_2371.vrt', + ) + with pytest.raises(VRTUnsupportedError, match='[Nn]ested'): + _public_read_vrt(outer_path) + + +def test_nested_vrt_rejected_via_open_geotiff(tmp_path): + """The dispatched ``open_geotiff('foo.vrt')`` path runs through the + same backend wrapper and must produce the same rejection.""" + src_path = _write_src(tmp_path) + inner_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + + +""" + inner_path = _write_vrt(tmp_path, inner_xml, 'inner_og_2371.vrt') + outer_path = _write_vrt( + tmp_path, + _nested_vrt_xml(inner_path), + 'outer_og_2371.vrt', + ) + with pytest.raises(VRTUnsupportedError, match='[Nn]ested'): + open_geotiff(outer_path) + + +def test_nested_vrt_rejected_via_internal_read_vrt(tmp_path): + """The internal ``_vrt.read_vrt`` is now routed through the + validator too (issue #2371 wires the same gate at both entry + points). A direct call must produce the rejection without going + through the public backend wrapper.""" + src_path = _write_src(tmp_path) + inner_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + + +""" + inner_path = _write_vrt(tmp_path, inner_xml, 'inner_int_2371.vrt') + outer_path = _write_vrt( + tmp_path, + _nested_vrt_xml(inner_path), + 'outer_int_2371.vrt', + ) + with pytest.raises(VRTUnsupportedError, match='[Nn]ested'): + _internal_read_vrt(outer_path) + + +# --------------------------------------------------------------------------- +# Rule: warped VRT (```` block) +# --------------------------------------------------------------------------- + + +_WARP_DATASET_XML = """ + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + 64.0 + NearestNeighbour + + +""" + + +def test_warp_options_dataset_level_rejected_at_parse(tmp_path): + """A dataset-level ```` block raises + ``UnsupportedGeoTIFFFeatureError`` during ``parse_vrt``. The parser + rejects the element via ``_UNSUPPORTED_DATASET_TAGS`` so callers + that route through the validator still see a typed failure (the + parse step runs first, before the validator is reached).""" + path = _write_vrt(tmp_path, _WARP_DATASET_XML, 'warp_ds_2371.vrt') + with pytest.raises( + UnsupportedGeoTIFFFeatureError, match='GDALWarpOptions' + ): + parse_vrt(_WARP_DATASET_XML, os.path.dirname(path)) + + +def test_warp_options_dataset_level_rejected_via_public_read_vrt(tmp_path): + """The public ``_backends/vrt.read_vrt`` entry point surfaces the + same warp rejection.""" + path = _write_vrt(tmp_path, _WARP_DATASET_XML, 'warp_pub_2371.vrt') + with pytest.raises( + UnsupportedGeoTIFFFeatureError, match='GDALWarpOptions' + ): + _public_read_vrt(path) + + +def test_warp_options_dataset_level_rejected_via_internal_read_vrt(tmp_path): + """The internal ``_vrt.read_vrt`` rejects the same input. Routing + through the validator preserves the parse-time rejection because + ``parse_vrt`` runs before ``validate_parsed_vrt``.""" + path = _write_vrt(tmp_path, _WARP_DATASET_XML, 'warp_int_2371.vrt') + with pytest.raises( + UnsupportedGeoTIFFFeatureError, match='GDALWarpOptions' + ): + _internal_read_vrt(path) + + +_WARP_BAND_XML = """ + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + NearestNeighbour + + +""" + + +def test_warp_options_band_level_rejected(tmp_path): + """A band-level ```` block (rare but possible + depending on the VRT emitter) is rejected via the band-children + sweep in ``_UNSUPPORTED_BAND_TAGS``.""" + path = _write_vrt(tmp_path, _WARP_BAND_XML, 'warp_band_2371.vrt') + with pytest.raises( + UnsupportedGeoTIFFFeatureError, match='GDALWarpOptions' + ): + parse_vrt(_WARP_BAND_XML, os.path.dirname(path)) + + +# --------------------------------------------------------------------------- +# Rule: per-source mask / alpha semantics +# --------------------------------------------------------------------------- + + +def _use_mask_band_xml(src_path: str, flag: str = 'true') -> str: + return f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + {flag} + + +""" + + +def test_use_mask_band_true_rejected_at_validator(tmp_path): + """A ComplexSource declaring ``true`` + must raise ``VRTUnsupportedError`` at validate time, with the + offending source path in the message.""" + src_path = _write_src(tmp_path) + path, parsed = _parse( + tmp_path, _use_mask_band_xml(src_path), 'use_mask_2371.vrt' + ) + with pytest.raises(VRTUnsupportedError) as excinfo: + validate_parsed_vrt(parsed, source=path, mode='read') + msg = str(excinfo.value) + assert 'UseMaskBand' in msg + assert src_path in msg + + +@pytest.mark.parametrize('flag', ['true', 'True', 'TRUE', '1', 'yes']) +def test_use_mask_band_truthy_spellings_rejected(tmp_path, flag): + """```` accepts several truthy spellings (GDAL writes + lowercase ``true`` but the validator should not depend on case or + on the exact token).""" + src_path = _write_src(tmp_path, name=f'src_flag_{flag}_2371.tif') + path, parsed = _parse( + tmp_path, _use_mask_band_xml(src_path, flag=flag), + f'use_mask_{flag}_2371.vrt', + ) + with pytest.raises(VRTUnsupportedError, match='UseMaskBand'): + validate_parsed_vrt(parsed, source=path, mode='read') + + +def test_use_mask_band_false_is_accepted(tmp_path): + """An explicit ``false`` is a no-op and + must not trip the rejection. GDAL never writes ``false`` itself, + but hand-written VRTs occasionally do.""" + src_path = _write_src(tmp_path, name='src_false_2371.tif') + path, parsed = _parse( + tmp_path, _use_mask_band_xml(src_path, flag='false'), + 'use_mask_false_2371.vrt', + ) + # Must not raise. + validate_parsed_vrt(parsed, source=path, mode='read') + + +def test_use_mask_band_rejected_via_public_read_vrt(tmp_path): + """End-to-end: the public backend entry point surfaces the same + rejection.""" + src_path = _write_src(tmp_path, name='src_pub_mask_2371.tif') + path = _write_vrt( + tmp_path, _use_mask_band_xml(src_path), 'use_mask_pub_2371.vrt' + ) + with pytest.raises(VRTUnsupportedError, match='UseMaskBand'): + _public_read_vrt(path) + + +def test_use_mask_band_rejected_via_internal_read_vrt(tmp_path): + """End-to-end: the internal entry point also surfaces the rejection + via the validator now that #2371 wires it in.""" + src_path = _write_src(tmp_path, name='src_int_mask_2371.tif') + path = _write_vrt( + tmp_path, _use_mask_band_xml(src_path), 'use_mask_int_2371.vrt' + ) + with pytest.raises(VRTUnsupportedError, match='UseMaskBand'): + _internal_read_vrt(path) + + +def _per_source_mask_band_xml(src_path: str) -> str: + """A ComplexSource with a per-source ```` child (distinct + from a dataset-level ```` sibling). GDAL emits this when + a source TIFF carries an internal mask band that the VRT wires + through.""" + return f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + + {src_path} + 1 + + + +""" + + +def test_per_source_mask_band_rejected_at_validator(tmp_path): + """A per-source ```` child raises ``VRTUnsupportedError`` + at validate time naming the source path.""" + src_path = _write_src(tmp_path, name='src_pmask_2371.tif') + path, parsed = _parse( + tmp_path, _per_source_mask_band_xml(src_path), + 'per_src_mask_2371.vrt', + ) + with pytest.raises(VRTUnsupportedError) as excinfo: + validate_parsed_vrt(parsed, source=path, mode='read') + msg = str(excinfo.value) + assert 'MaskBand' in msg + assert src_path in msg + + +# --------------------------------------------------------------------------- +# Rule: resample alg gate now fires at the internal entry point +# --------------------------------------------------------------------------- + + +def test_resample_alg_now_rejected_at_internal_read_vrt(tmp_path): + """The internal ``_vrt.read_vrt`` was previously not routed through + the validator and surfaced unsupported-resample as a + ``NotImplementedError`` at the placement site. After #2371 the + validator preempts that gate so the failure is now a typed + ``VRTUnsupportedError`` at graph build / eager setup.""" + src_path = _write_src(tmp_path, name='src_resample_2371.tif') + xml = f""" + 0.0, 2.0, 0.0, 0.0, 0.0, -2.0 + + + {src_path} + 1 + + + Bilinear + + +""" + path = _write_vrt(tmp_path, xml, 'resample_int_2371.vrt') + with pytest.raises(VRTUnsupportedError, match='Bilinear'): + _internal_read_vrt(path) + + +# --------------------------------------------------------------------------- +# Subclassing contract for the new path +# --------------------------------------------------------------------------- + + +def test_nested_vrt_error_is_value_error(tmp_path): + """``VRTUnsupportedError`` already subclasses ``ValueError`` via + ``GeoTIFFAmbiguousMetadataError``. The nested-VRT path uses the + same class, so ``except ValueError`` keeps catching the new + rejection too.""" + src_path = _write_src(tmp_path, name='src_subclass_2371.tif') + inner_xml = f""" + 0.0, 1.0, 0.0, 0.0, 0.0, -1.0 + + + {src_path} + 1 + + + + +""" + inner_path = _write_vrt(tmp_path, inner_xml, 'inner_sub_2371.vrt') + outer_path, parsed = _parse( + tmp_path, _nested_vrt_xml(inner_path), 'outer_sub_2371.vrt' + ) + with pytest.raises(ValueError): # via VRTUnsupportedError -> ValueError + validate_parsed_vrt(parsed, source=outer_path, mode='read') + with pytest.raises(GeoTIFFAmbiguousMetadataError): + validate_parsed_vrt(parsed, source=outer_path, mode='read') From b4038454009776af7c05312302d5ea7fcc660dd8 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 17:44:25 -0700 Subject: [PATCH 3/4] Address review nits: narrow UseMaskBand truthy set, anchor messages on SUPPORTED_FEATURES (#2371) - Narrow truthy set to ('1', 'true') to match what GDAL actually emits. Tokens like yes / on / Y now pass through as not-mask rather than tripping the rejection. - Anchor the nested-VRT rejection message on xrspatial.geotiff.SUPPORTED_FEATURES so the new path matches the parse_vrt rejection style. - Add a one-line comment explaining why the validator import in _vrt.read_vrt is lazy (circular import with _vrt_validation). - Replace the original yes parametrize row with a separate non_canonical_truthy_accepted test so the contract for outside the canonical set is explicit. --- xrspatial/geotiff/_vrt.py | 11 +++++--- xrspatial/geotiff/_vrt_validation.py | 6 +++-- .../test_vrt_capability_validator_2371.py | 27 ++++++++++++++++--- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 8704f2841..def620ffc 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -705,12 +705,14 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: # and would silently drop the per-pixel mask. Capture the # flags so ``validate_parsed_vrt`` can reject the VRT with # an actionable message that names the offending source. - # GDAL emits ``true`` (string) - # so accept any non-empty truthy spelling. See issue #2371. + # GDAL emits ``true`` exclusively; + # the truthy set is narrowed to ``('1', 'true')`` to match + # what real VRTs contain rather than every spelling Python + # would coerce to ``True``. See issue #2371. use_mask_band_str = _text(src_elem, 'UseMaskBand') use_mask_band = ( use_mask_band_str is not None - and use_mask_band_str.strip().lower() in ('1', 'true', 'yes') + and use_mask_band_str.strip().lower() in ('1', 'true') ) has_mask_source = src_elem.find('MaskBand') is not None @@ -1245,6 +1247,9 @@ def read_vrt(vrt_path: str, *, window=None, # entry point now get the same capability gate as the public # backend path. See issue #2371. if parsed is None: + # Lazy import: ``_vrt_validation`` imports ``_NEAREST_RESAMPLE_ALGS`` + # from this module for the resample-alg check, so a top-level + # import here would close a circular import loop at module load. from ._vrt_validation import validate_parsed_vrt validate_parsed_vrt(vrt, source=vrt_path, mode='read') if missing_sources not in ('warn', 'raise'): diff --git a/xrspatial/geotiff/_vrt_validation.py b/xrspatial/geotiff/_vrt_validation.py index 508eaac50..18a349423 100644 --- a/xrspatial/geotiff/_vrt_validation.py +++ b/xrspatial/geotiff/_vrt_validation.py @@ -300,8 +300,10 @@ def validate_parsed_vrt( f"({src.filename!r}, band {band.band_num}). Nested " f"VRTs are not a supported feature in this release; " f"the mosaic reader assembles pixel data from " - f"GeoTIFF sources only. Materialise the inner VRT " - f"to a GeoTIFF with ``gdal_translate`` (or " + f"GeoTIFF sources only. See " + f"`xrspatial.geotiff.SUPPORTED_FEATURES` for the " + f"release tier map. Materialise the inner VRT to a " + f"GeoTIFF with ``gdal_translate`` (or " f"``xrspatial.geotiff.to_geotiff`` after reading " f"the inner VRT separately) and reference the " f"resulting GeoTIFF in the outer VRT instead." diff --git a/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py b/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py index a84e3e649..dc3570cb8 100644 --- a/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py +++ b/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py @@ -348,11 +348,14 @@ def test_use_mask_band_true_rejected_at_validator(tmp_path): assert src_path in msg -@pytest.mark.parametrize('flag', ['true', 'True', 'TRUE', '1', 'yes']) +@pytest.mark.parametrize('flag', ['true', 'True', 'TRUE', '1']) def test_use_mask_band_truthy_spellings_rejected(tmp_path, flag): - """```` accepts several truthy spellings (GDAL writes - lowercase ``true`` but the validator should not depend on case or - on the exact token).""" + """```` accepts the case-insensitive ``true`` and the + digit ``1`` -- GDAL writes lowercase ``true``, and the digit form + keeps the parser tolerant of XML emitters that normalise booleans + to ``1``. Anything else falls outside the truthy set and is + treated as not-mask (see ``test_use_mask_band_non_canonical_*`` + below).""" src_path = _write_src(tmp_path, name=f'src_flag_{flag}_2371.tif') path, parsed = _parse( tmp_path, _use_mask_band_xml(src_path, flag=flag), @@ -375,6 +378,22 @@ def test_use_mask_band_false_is_accepted(tmp_path): validate_parsed_vrt(parsed, source=path, mode='read') +@pytest.mark.parametrize('flag', ['yes', 'on', 'Y']) +def test_use_mask_band_non_canonical_truthy_accepted(tmp_path, flag): + """Tokens outside the canonical GDAL set (``true`` / ``1``) are + treated as not-mask. The parser deliberately narrows the truthy + set so a hand-edited VRT using a Python-truthy spelling does not + silently flip the read into the rejection path. If GDAL ever + starts emitting one of these, the set should be widened then.""" + src_path = _write_src(tmp_path, name=f'src_ncf_{flag}_2371.tif') + path, parsed = _parse( + tmp_path, _use_mask_band_xml(src_path, flag=flag), + f'use_mask_ncf_{flag}_2371.vrt', + ) + # Must not raise -- non-canonical token is treated as not-mask. + validate_parsed_vrt(parsed, source=path, mode='read') + + def test_use_mask_band_rejected_via_public_read_vrt(tmp_path): """End-to-end: the public backend entry point surfaces the same rejection.""" From 98f7fe5edd6f62f35c5308e78f7cfc15df255adb Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 17:58:13 -0700 Subject: [PATCH 4/4] Fix Windows path rendering in nested-VRT rejection message (#2371) Switch the nested-VRT rejection from {src.filename!r} to direct interpolation so Windows paths render with single backslashes instead of the doubled escapes repr emits, matching the path-containment pattern in parse_vrt. Update the test assertion to compare on the file basename so any further normalisation difference (short-name vs long-name, symlink resolution) between str(tmp_path / name) and the os.path.realpath form parse_vrt stores does not break the match. --- xrspatial/geotiff/_vrt_validation.py | 9 ++++++++- .../geotiff/tests/test_vrt_capability_validator_2371.py | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/xrspatial/geotiff/_vrt_validation.py b/xrspatial/geotiff/_vrt_validation.py index 18a349423..89f78eb36 100644 --- a/xrspatial/geotiff/_vrt_validation.py +++ b/xrspatial/geotiff/_vrt_validation.py @@ -295,9 +295,16 @@ def validate_parsed_vrt( # extension so ``.VRT`` (Windows-style) also trips the # rejection. See issue #2371. if src.filename.lower().endswith('.vrt'): + # Direct interpolation (not !r) for ``src.filename`` so + # Windows paths render with single backslashes rather + # than the doubled escapes ``repr`` emits, matching the + # ``parse_vrt`` pattern at the path-containment check. + # Without this, a callers ``in`` check against the raw + # Windows path would fail because ``repr`` doubles + # every backslash. raise VRTUnsupportedError( f"VRT '{source}' references another VRT as a source " - f"({src.filename!r}, band {band.band_num}). Nested " + f"('{src.filename}', band {band.band_num}). Nested " f"VRTs are not a supported feature in this release; " f"the mosaic reader assembles pixel data from " f"GeoTIFF sources only. See " diff --git a/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py b/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py index dc3570cb8..6d96cd239 100644 --- a/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py +++ b/xrspatial/geotiff/tests/test_vrt_capability_validator_2371.py @@ -137,7 +137,12 @@ def test_nested_vrt_rejected_at_validator(tmp_path): # Outer path appears as the failing VRT. assert outer_path in msg # Inner path is named so the caller can locate the bad source. - assert inner_path in msg + # ``parse_vrt`` canonicalises source filenames via ``os.path.realpath`` + # so the message carries the realpath form, not the raw string the + # test built. On Windows ``str(tmp_path / name)`` can produce a + # short-name path that differs from the realpath form, so compare + # the basename (the part that survives any normalisation). + assert os.path.basename(inner_path) in msg # Message names the failure mode. assert 'Nested' in msg or 'nested' in msg