From 7dca389330b7e188aefa6be91593ccc87ddbd9ab Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 04:28:18 -0700 Subject: [PATCH 1/4] Fail loudly on unsupported GeoTIFF feature combinations (#2349) Epic #2340 PR 5: detect unsupported GeoTIFF / VRT feature combinations at the entry point and raise an actionable typed error naming the feature and pointing at SUPPORTED_FEATURES. Cases newly rejected: * VRTDataset subClass attributes (VRTWarpedDataset, VRTPansharpenedDataset, etc.) rejected at parse time. The reader has no warp / pansharpen pipeline and silent dispatch on whatever simple sources happen to be embedded would drop the subclass semantics. * VRTRasterBand subClass attribute (VRTDerivedRasterBand etc.) rejected at parse time. No pixel-function evaluator exists. * Unknown VRTRasterBand child elements raise rather than silently skip. Known informational children (Description, UnitType, Offset, Scale, Metadata, ColorTable, ...) keep passing; known output-altering children (KernelFilteredSource, MaskBand, PansharpeningOptions, PixelFunction*, ...) and any unrecognised tag raise. * write_vrt now refuses sources whose declared transform carries a non-zero skew term (rotated_affine), refuses cross-source AREA_OR_POINT mismatch, and refuses cross-source nodata mismatch unless the caller pins the mosaic nodata via the nodata kwarg. New UnsupportedGeoTIFFFeatureError exported from xrspatial.geotiff. Subclasses ValueError so existing ``except ValueError`` callers keep catching the cases. Regression test suite covers each gate plus the existing rotated 6-tuple writer refusal and the rotated VRT GeoTransform read refusal so future refactors cannot regress them back to silent fallback. Backend coverage: numpy / dask+numpy / cupy / dask+cupy share the parse_vrt and write_vrt entry points; the gates run before the backend dispatch, so all four hit the same error. --- xrspatial/geotiff/__init__.py | 4 +- xrspatial/geotiff/_errors.py | 17 + xrspatial/geotiff/_vrt.py | 173 ++++++++- .../tests/test_unsupported_features_2349.py | 361 ++++++++++++++++++ 4 files changed, 553 insertions(+), 2 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_unsupported_features_2349.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 73c3145e..ecc2e5a1 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -73,7 +73,8 @@ from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg # noqa: F401 from ._errors import (ConflictingCRSError, ConflictingNodataError, GeoTIFFAmbiguousMetadataError, InvalidCRSCodeError, MixedBandMetadataError, NonUniformCoordsError, - RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError) + RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError, + UnsupportedGeoTIFFFeatureError) from ._geotags import RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT, GeoTransform # noqa: F401 from ._reader import _MAX_CLOUD_BYTES_SENTINEL, UnsafeURLError from ._reader import read_to_array as _read_to_array @@ -115,6 +116,7 @@ 'UnknownCRSModelTypeError', 'UnparseableCRSError', 'UnsafeURLError', + 'UnsupportedGeoTIFFFeatureError', 'open_geotiff', 'read_geotiff_gpu', 'read_geotiff_dask', diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py index 47f2492e..c6495461 100644 --- a/xrspatial/geotiff/_errors.py +++ b/xrspatial/geotiff/_errors.py @@ -118,6 +118,22 @@ class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): """ +class UnsupportedGeoTIFFFeatureError(ValueError): + """Caller asked for a feature this release does not implement (#2349). + + Raised at the read or write entry point when the input declares a + feature the GeoTIFF module does not support (warped / reprojection + VRTs, pansharpened / processed / derived VRT subclasses, unknown + VRT band children, source transforms with non-zero skew on a VRT + mosaic). The message names the feature and points the caller at + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + + Subclasses ``ValueError`` so existing ``except ValueError`` callers + keep catching the case; new code can ``except`` this class to + distinguish "we refuse this input" from "the input is malformed". + """ + + __all__ = [ "GeoTIFFAmbiguousMetadataError", "InvalidCRSCodeError", @@ -128,4 +144,5 @@ class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): "ConflictingCRSError", "ConflictingNodataError", "UnknownCRSModelTypeError", + "UnsupportedGeoTIFFFeatureError", ] diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 3bd370ce..ea22b5dc 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -17,6 +17,7 @@ import numpy as np +from ._errors import UnsupportedGeoTIFFFeatureError from ._safe_xml import safe_fromstring @@ -378,6 +379,28 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: vrt_root = os.path.realpath(vrt_dir) allowed_roots = _allowed_source_roots() + # Refuse any GDAL VRT subclass beyond the plain dataset. The mosaic + # reader implements the simple ```` case only; warped / + # pansharpened / processed / derived subclasses depend on a separate + # pipeline (reprojection, kernel filters, derived expressions) that + # this release does not ship. A silently-ignored ``subClass`` would + # produce wrong output: the reader would dispatch on whatever simple + # sources the subclassed VRT happens to embed and skip the subclass + # semantics entirely. Name the offending attribute in the error and + # point at the release tier map. See epic #2340 / PR 5 (#2349). + _sub_class = root.get('subClass') + if _sub_class: + raise UnsupportedGeoTIFFFeatureError( + f"VRTDataset declares subClass={_sub_class!r}, which is not " + f"a supported feature in this release. read_vrt implements " + f"plain ```` mosaics only; warped / pansharpened " + f"/ processed / derived VRT subclasses are listed as " + f"unsupported in :data:`xrspatial.geotiff.SUPPORTED_FEATURES` " + f"(see epic #2340). Materialise the warped or derived output " + f"with an external tool (e.g. ``gdalwarp``) and read the " + f"resulting GeoTIFF instead." + ) + width = int(root.get('rasterXSize', 0)) height = int(root.get('rasterYSize', 0)) @@ -409,6 +432,23 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: bands = [] for band_elem in root.findall('VRTRasterBand'): band_num = int(band_elem.get('band', 1)) + # ``subClass="VRTDerivedRasterBand"`` (and other future subclasses) + # signal a band-level computation that read_vrt does not + # implement. The plain ```` mosaic case has no + # ``subClass`` attribute. Reject so the derived expression is + # not silently dropped down to whatever sources happen to be + # listed. Epic #2340 / PR 5. + _band_sub_class = band_elem.get('subClass') + if _band_sub_class: + raise UnsupportedGeoTIFFFeatureError( + f"VRTRasterBand band={band_num} declares " + f"subClass={_band_sub_class!r}, which is not supported " + f"by read_vrt. Only the plain ```` " + f"mosaic case is implemented; derived / pixel-function " + f"bands are listed as unsupported in " + f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` " + f"(epic #2340)." + ) # Distinguish "attribute missing" (GDAL default: Float32) from # "attribute present but unsupported". The previous # ``_DTYPE_MAP.get(dtype_name, np.float32)`` collapsed both cases @@ -440,11 +480,58 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: nodata = _parse_band_nodata(nodata_str, dtype) color_interp = _text(band_elem, 'ColorInterp') + # GDAL VRT allows several band sub-elements that this reader + # silently ignored before #2349. The pure-metadata ones + # (informational, no effect on the array) stay ignored; the + # ones that alter the computed output (kernel filters, derived + # band expressions, mask bands, pansharpening) get rejected so + # a VRT relying on them cannot read as a plain mosaic and lose + # its declared computation. See epic #2340 / PR 5. + _INFORMATIONAL_BAND_TAGS = frozenset({ + 'NoDataValue', 'ColorInterp', 'Description', 'Offset', + 'Scale', 'Metadata', 'UnitType', 'CategoryNames', + 'ColorTable', 'Histograms', 'GDALRasterAttributeTable', + 'ColorEntry', 'HideNoDataValue', + }) + _SOURCE_BAND_TAGS = frozenset({'SimpleSource', 'ComplexSource'}) + _UNSUPPORTED_BAND_TAGS = frozenset({ + 'KernelFilteredSource', 'AveragedSource', 'NoDataFromMaskSource', + 'PixelFunctionType', 'PixelFunctionLanguage', + 'PixelFunctionCode', 'PixelFunctionArguments', + 'SourceTransferType', 'MaskBand', 'PansharpeningOptions', + }) + sources = [] for src_elem in band_elem: tag = src_elem.tag - if tag not in ('SimpleSource', 'ComplexSource'): + if tag in _INFORMATIONAL_BAND_TAGS: continue + if tag in _UNSUPPORTED_BAND_TAGS: + raise UnsupportedGeoTIFFFeatureError( + f"VRTRasterBand band={band_num} contains a " + f"<{tag}> element, which declares a VRT feature " + f"(kernel-filtered / derived / pansharpened / " + f"mask-band source) that read_vrt does not " + f"implement. Only ```` and " + f"```` are supported. See " + f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` for " + f"the release tier map (epic #2340)." + ) + if tag not in _SOURCE_BAND_TAGS: + # Unknown band child that is neither informational nor a + # known unsupported-feature marker. Refuse rather than + # silently dropping the element: a future GDAL VRT + # extension that alters pixel computation must not pass + # through this reader as if it were a no-op. + raise UnsupportedGeoTIFFFeatureError( + f"VRTRasterBand band={band_num} contains an unknown " + f"<{tag}> element. read_vrt only recognises " + f"```` and ```` sources " + f"plus informational metadata children; an unknown " + f"element could change the declared pixel output. " + f"See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` " + f"for the release tier map (epic #2340)." + ) filename = _text(src_elem, 'SourceFilename') or '' relative = src_elem.find('SourceFilename') @@ -1583,6 +1670,7 @@ def write_vrt(vrt_path: str, source_files: list[str], *, 'transform': geo.transform, 'crs_wkt': geo.crs_wkt, 'nodata': geo.nodata, + 'raster_type': geo.raster_type, }) first = sources_meta[0] @@ -1617,7 +1705,42 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: return abs(a - b) > 0.0 return abs(a - b) / denom > _PIXEL_SIZE_RTOL + # Reject sources whose declared transform carries non-zero skew + # terms (rotated / sheared affine). The writer emits an axis-aligned + # GeoTransform with ``0.0`` skew slots; a rotated source would be + # placed at the wrong pixel offset in the mosaic and the on-disk + # VRT would point at the source with the rotation silently dropped. + # Refuse rather than write a mosaic with mis-located tiles. + # Epic #2340 / PR 5. + for m in sources_meta: + ra = getattr(m['transform'], 'rotated_affine', None) + if ra is None: + continue + # ``rotated_affine`` is the rasterio 6-tuple + # ``(pixel_width, b, origin_x, d, pixel_height, origin_y)``; + # positions 1 and 3 are the skew terms. + try: + b = float(ra[1]) + d = float(ra[3]) + except (IndexError, TypeError, ValueError): + b = d = 0.0 + if b != 0.0 or d != 0.0: + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} has a rotated / sheared " + f"affine transform (b={b!r}, d={d!r}); rotated source " + f"transforms are not a supported feature in this " + f"release. write_vrt emits an axis-aligned mosaic and " + f"would silently drop the skew terms, mis-placing the " + f"source on the virtual raster. Reproject the source " + f"to an axis-aligned grid before adding it to the " + f"mosaic. See " + f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the " + f"release tier map (epic #2340)." + ) + first_crs = first.get('crs_wkt') + first_raster_type = first.get('raster_type') + first_nodata = first.get('nodata') for m in sources_meta[1:]: t = m['transform'] if _pixel_size_mismatch(t.pixel_width, res_x) \ @@ -1656,6 +1779,54 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: f"match the first source. All sources in a VRT must " f"share the same CRS." ) + # Pixel registration must match across sources. The mosaic + # writes a single dataset-level ``AREA_OR_POINT``; flattening to + # the first source's value would silently shift downstream pixel + # coords by half a pixel for every source that disagrees. + # Epic #2340 / PR 5. + m_raster_type = m.get('raster_type') + if (m_raster_type is not None and first_raster_type is not None + and m_raster_type != first_raster_type): + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} declares raster_type=" + f"{m_raster_type!r} which does not match the first " + f"source ({first_raster_type!r}). Silent flattening of " + f"mixed AREA_OR_POINT registration across stacked " + f"sources is not supported; the mosaic would emit a " + f"single dataset-level value and shift the disagreeing " + f"source by half a pixel. Re-tag the sources so they " + f"share the same pixel registration before mosaicing. " + f"See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` " + f"(epic #2340)." + ) + # Refuse mixed per-source nodata sentinels when the caller did + # not pin one via the ``nodata`` kwarg. The legacy writer + # silently picked ``first['nodata']`` for every band of every + # source, which means a tile whose own sentinel was different + # got its "valid" pixels masked once the consumer applied the + # mosaic-level ````. The fail-closed surface + # mirrors the read-side ``MixedBandMetadataError`` opt-out: + # callers who really want the first-source value override the + # check by passing ``nodata=`` explicitly. Epic #2340 / + # PR 5. + m_nodata = m.get('nodata') + if nodata is None and m_nodata != first_nodata: + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} declares nodata=" + f"{m_nodata!r} which does not match the first source " + f"({first_nodata!r}). Silent flattening of mixed " + f"per-source nodata sentinels across stacked sources " + f"is not supported; the writer would emit the first " + f"source's value as a single dataset-level " + f"```` and the disagreeing source's " + f"sentinel would become a " + f"valid-looking pixel on read. Either pin the mosaic " + f"nodata explicitly with ``write_vrt(..., " + f"nodata=)`` or re-tag the sources so they " + f"agree. See " + f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` " + f"(epic #2340)." + ) # Compute the bounding box of all sources all_x0, all_y0, all_x1, all_y1 = [], [], [], [] diff --git a/xrspatial/geotiff/tests/test_unsupported_features_2349.py b/xrspatial/geotiff/tests/test_unsupported_features_2349.py new file mode 100644 index 00000000..8975ac00 --- /dev/null +++ b/xrspatial/geotiff/tests/test_unsupported_features_2349.py @@ -0,0 +1,361 @@ +"""Regression tests for issue #2349. + +Epic #2340 PR 5: unsupported GeoTIFF feature combinations fail loudly +with an actionable error message that names the feature, names the +offending input, and points the caller at the release tier map. + +Unsupported features covered here: + +* Full GDAL VRT parity (warped / pansharpened / processed / derived + VRT subclasses, derived raster bands, kernel-filtered sources, + unknown band sub-elements). +* Warped / reprojection VRTs (the GeoTransform check already lands on + ``RotatedTransformError``; the regression here pins the existing + behaviour so a future refactor cannot regress it back to a silent + no-georef path). +* Rotated / sheared GeoTIFF write (the eager writer already rejects + rotated ``attrs['transform']`` 6-tuples; the regression pins the + message wording so callers can still match on it). +* Silent mixed-metadata flattening across stacked VRT sources + (mismatched per-source nodata or AREA_OR_POINT registration). +""" +from __future__ import annotations + +import os +import uuid + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import (RotatedTransformError, UnsupportedGeoTIFFFeatureError, + open_geotiff, to_geotiff) +from xrspatial.geotiff._vrt import parse_vrt, write_vrt + + +# --------------------------------------------------------------------------- +# VRT parse-time gates: subClass, derived raster bands, unknown band children. +# --------------------------------------------------------------------------- + + +def test_warped_vrt_subclass_rejected_at_parse(): + """A ```` is not a plain mosaic. + + Without an explicit refusal the reader would dispatch on whatever + simple sources the warped VRT happens to embed and drop the warping + semantics silently. Pin the typed error and the feature-naming + substring so the message stays actionable for callers grepping for + "warped" / "subClass". + """ + xml = ( + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="subClass"): + parse_vrt(xml, '.') + + +def test_pansharpened_vrt_subclass_rejected_at_parse(): + """A ```` is rejected too. + + The subClass check covers every GDAL VRT subclass uniformly, not + just the warped one. Pin the pansharpened case so a caller who + points read_vrt at a pansharpened VRT sees the same actionable + failure rather than silently mis-reading. + """ + xml = ( + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="VRTPansharpened"): + parse_vrt(xml, '.') + + +def test_derived_rasterband_subclass_rejected_at_parse(): + """A ```` is rejected. + + Derived raster bands declare a pixel-function expression evaluated + over the sources. read_vrt has no pixel-function evaluator and + would drop straight to the simple-source path, producing wrong + output. Pin the typed error and the band number in the message. + """ + xml = ( + '' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match=r"band=1.*VRTDerivedRasterBand"): + parse_vrt(xml, '.') + + +def test_kernel_filtered_source_rejected_at_parse(): + """```` is a known unsupported source type. + + The previous parser silently skipped every non-Simple/Complex tag + inside ````. The new gate enumerates the known + output-altering children and raises on each. Pin the substring so + the caller can match on "KernelFilteredSource" specifically. + """ + xml = ( + '' + ' ' + ' ' + ' src.tif' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match="KernelFilteredSource"): + parse_vrt(xml, '.') + + +def test_pansharpening_options_rejected_at_parse(): + """```` inside a band is rejected. + + PansharpeningOptions sits under VRTRasterBand for the pansharpened + subClass case. The dataset-level subClass check fires first when + the subClass attribute is present, but a malformed VRT that omits + the subClass attribute still has to be rejected. Pin that path + here. + """ + xml = ( + '' + ' ' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match="PansharpeningOptions"): + parse_vrt(xml, '.') + + +def test_unknown_band_child_rejected_at_parse(): + """An unknown ```` child element is rejected. + + The previous parser silently skipped any unknown tag. A future GDAL + VRT extension that introduces a new pixel-altering element must + not slip past this reader as a no-op; the catch-all branch raises + rather than guess at semantics. + """ + xml = ( + '' + ' ' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match="FutureVRTPixelMutator"): + parse_vrt(xml, '.') + + +def test_informational_band_children_still_pass(tmp_path): + """```` / ```` / ```` / ```` skip. + + The gate enumerates known informational children that have no + effect on the array bytes and must still be ignored silently. + Pin the allow-list so a future refactor cannot regress it to + "raise on everything" and break legitimate VRTs. + """ + src = tmp_path / f'src_2349_info_{uuid.uuid4().hex[:6]}.tif' + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + y = np.arange(4, dtype=np.float64) + x = np.arange(4, dtype=np.float64) + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}) + to_geotiff(da, str(src), compression='none') + xml = ( + f'' + f' EPSG:4326' + f' 0.0, 1.0, 0.0, 0.0, 0.0, 1.0' + f' ' + f' test' + f' m' + f' -9999' + f' ' + f' {src.name}' + f' 1' + f' ' + f' ' + f' ' + f' ' + f'' + ) + parsed = parse_vrt(xml, str(tmp_path)) + assert len(parsed.bands) == 1 + assert len(parsed.bands[0].sources) == 1 + + +# --------------------------------------------------------------------------- +# VRT writer cross-source mixed-metadata gates. +# --------------------------------------------------------------------------- + + +def _unique_dir(tmp_path, label: str) -> str: + d = tmp_path / f"vrt_2349_{label}_{uuid.uuid4().hex[:8]}" + d.mkdir() + return str(d) + + +def _write_source(path: str, *, px: float = 1.0, py: float = -1.0, + origin_x: float = 0.0, origin_y: float = 100.0, + nodata: float | int | None = -9999.0, + raster_type: str = 'area', + crs: int = 4326, + dtype=np.float32, h: int = 4, w: int = 4) -> None: + arr = np.arange(h * w, dtype=dtype).reshape(h, w) + y = origin_y + (np.arange(h) + 0.5) * py + x = origin_x + (np.arange(w) + 0.5) * px + attrs = {'crs': crs, 'raster_type': raster_type} + if nodata is not None: + attrs['nodata'] = nodata + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs=attrs) + to_geotiff(da, path, compression='none', nodata=nodata) + + +def test_mixed_per_source_nodata_rejected(tmp_path): + """Two sources with different nodata sentinels fail the write. + + Legacy ``write_vrt`` picked ``first['nodata']`` for every band and + silently dropped the second source's sentinel. The fail-closed + surface refuses; the caller can override by pinning the mosaic + nodata via ``write_vrt(..., nodata=)``. Pin the typed error + and a substring naming the kwarg so the message stays actionable. + """ + d = _unique_dir(tmp_path, "nodata") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, nodata=-9999.0) + _write_source(b, origin_x=4.0, nodata=-1.0) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match=r"mixed.*nodata|nodata=\-9999"): + write_vrt(vrt, [a, b]) + + +def test_mixed_nodata_override_via_kwarg_passes(tmp_path): + """``write_vrt(..., nodata=)`` opts back into flatten-to-kwarg. + + The fail-closed default is the strict path; explicit caller intent + via the ``nodata`` kwarg overrides it. Pin that the opt-out keeps + working so the message is actionable rather than a dead-end. + """ + d = _unique_dir(tmp_path, "nodata_ok") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, nodata=-9999.0) + _write_source(b, origin_x=4.0, nodata=-1.0) + vrt = os.path.join(d, "out.vrt") + write_vrt(vrt, [a, b], nodata=-9999.0) + assert os.path.exists(vrt) + + +def test_mixed_raster_type_rejected(tmp_path): + """Sources disagreeing on AREA_OR_POINT registration fail the write. + + The mosaic writes a single dataset-level AREA_OR_POINT, so silently + flattening to the first source's value would shift the disagreeing + source by half a pixel on read. Pin the typed error and the + substring naming the mismatch. + """ + d = _unique_dir(tmp_path, "raster_type") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, raster_type='area') + _write_source(b, origin_x=4.0, raster_type='point') + vrt = os.path.join(d, "out.vrt") + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match=r"raster_type|AREA_OR_POINT"): + write_vrt(vrt, [a, b]) + + +# --------------------------------------------------------------------------- +# Rotated / sheared write gates: regression pins for the existing +# refusal at the eager writer entry point. +# --------------------------------------------------------------------------- + + +def test_eager_writer_rejects_rotated_6tuple_transform(tmp_path): + """``attrs['transform']`` 6-tuple with non-zero ``b`` or ``d`` is refused. + + The eager writer emits an axis-aligned GeoTIFF; silently dropping + the skew terms would place the raster at the wrong location. Pin + the message wording so the existing match patterns in other + fail-closed regressions keep matching. + """ + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + dims=['y', 'x'], + attrs={'transform': (1.0, 0.5, 0.0, 0.0, -1.0, 0.0)}, + ) + path = tmp_path / f"rotated_2349_{uuid.uuid4().hex[:6]}.tif" + with pytest.raises(ValueError, match=r"rotation/shear"): + to_geotiff(da, str(path)) + + +def test_eager_writer_rejects_rotated_affine_attr(tmp_path): + """``attrs['rotated_affine']`` (set by reader on ``allow_rotated``) refused. + + The reader stamps the rotated 6-tuple on this attr when called with + ``allow_rotated=True``. The writer has no ModelTransformationTag + emit path, so a read-then-write round-trip would silently lose the + rotation. Pin the refusal so the regression cannot regress into a + silent identity-affine output. + """ + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + dims=['y', 'x'], + attrs={'rotated_affine': (1.0, 0.5, 0.0, 0.0, -1.0, 0.0)}, + ) + path = tmp_path / f"rotated_affine_2349_{uuid.uuid4().hex[:6]}.tif" + with pytest.raises(ValueError, match=r"rotated_affine"): + to_geotiff(da, str(path)) + + +# --------------------------------------------------------------------------- +# Warped / reprojection VRT gate: regression pin for the existing +# RotatedTransformError on a VRT with non-zero GeoTransform skew terms. +# --------------------------------------------------------------------------- + + +def test_vrt_with_skewed_geotransform_rejected(tmp_path): + """A VRT GeoTransform with non-zero skew is rejected on read. + + The GDAL GeoTransform skew terms (positions 2 and 4 in the + GDAL ordering) flag a warped / reprojection VRT or a rotated + source. read_vrt has no resampler for the warped case; pin the + existing typed error so a future refactor cannot regress to the + silent no-georef fallback. + """ + src = tmp_path / f'flat_2349_{uuid.uuid4().hex[:6]}.tif' + arr = np.zeros((4, 4), dtype=np.float32) + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': np.arange(4, dtype=np.float64), + 'x': np.arange(4, dtype=np.float64)}, + attrs={'crs': 4326}) + to_geotiff(da, str(src), compression='none') + + vrt = tmp_path / f'rotated_2349_{uuid.uuid4().hex[:6]}.vrt' + vrt.write_text( + f'' + f' EPSG:4326' + f' 0.0, 1.0, 0.5, 0.0, 0.0, -1.0' + f' ' + f' ' + f' {src.name}' + f' 1' + f' ' + f' ' + f' ' + f' ' + f'' + ) + with pytest.raises(RotatedTransformError, match=r"rotated affine"): + open_geotiff(str(vrt)) From 510e515fd09125a93b68452d6df42e95bcdf78ba Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 04:36:01 -0700 Subject: [PATCH 2/4] Address review: dataset-level sweep, hoist frozensets, drop :data: role (#2349) Review fixes for PR 5 of epic #2340: * Add dataset-level sweep for ````, ````, and dataset-level ````. The band-children loop never saw these because they sit as siblings of ````; without the new sweep a VRT carrying a dataset-level mask band silently dropped the mask. * Hoist the three band-children classification frozensets (``_INFORMATIONAL_BAND_TAGS`` / ``_SOURCE_BAND_TAGS`` / ``_UNSUPPORTED_BAND_TAGS``) to module scope so parse_vrt does not rebuild them on every band iteration. * Add ``OverviewList`` and ``Overview`` to the informational set so the new "raise on unknown band child" branch does not regress VRTs with band-level overview declarations -- the previous parser ignored these and the new gate now ignores them too, matching real-world GDAL-emitted VRTs. * Group the new write-side cross-source checks (rotated source transform, mixed AREA_OR_POINT registration, mixed nodata) into three module-level helpers (``_check_no_rotated_source_transforms`` / ``_check_no_mixed_raster_type`` / ``_check_no_mixed_nodata``) so the cross-source policy is easy to read end-to-end. The pre-#2349 pixel-size / dtype / band-count / CRS checks stay inline because they predate this PR. * Drop the ``:data:`` Sphinx role from error messages. Callers see these strings in tracebacks, not rendered docs, so the plain ``\`xrspatial.geotiff.SUPPORTED_FEATURES\``` reads better. Three new tests pin the dataset-level MaskBand / GCPList refusals and the OverviewList allow-list. --- xrspatial/geotiff/_vrt.py | 324 ++++++++++++------ .../tests/test_unsupported_features_2349.py | 75 ++++ 2 files changed, 289 insertions(+), 110 deletions(-) diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index ea22b5dc..83370589 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -346,6 +346,60 @@ def _parse_band_nodata(text: str | None, return None +# VRTRasterBand child elements that read_vrt explicitly supports or +# tolerates. Hoisted to module scope so the parse_vrt band loop does +# not rebuild the frozensets on every iteration (epic #2340 / PR 5). +# +# ``_INFORMATIONAL_BAND_TAGS`` -- pure-metadata children that have no +# effect on the assembled array. Silently ignored. +# +# ``_SOURCE_BAND_TAGS`` -- the source types whose pixel-placement +# semantics this reader implements. +# +# ``_UNSUPPORTED_BAND_TAGS`` -- known GDAL VRT children that alter the +# computed output (kernel filters, derived band expressions, mask / +# pansharpening sources). Rejected with ``UnsupportedGeoTIFFFeatureError`` +# so a VRT relying on them cannot read as a plain mosaic and lose its +# declared computation. Anything that is none of the three -- e.g. a +# future GDAL VRT extension -- also raises, via the catch-all branch in +# ``parse_vrt``. +_INFORMATIONAL_BAND_TAGS = frozenset({ + 'NoDataValue', 'ColorInterp', 'Description', 'Offset', + 'Scale', 'Metadata', 'UnitType', 'CategoryNames', + 'ColorTable', 'Histograms', 'GDALRasterAttributeTable', + 'ColorEntry', 'HideNoDataValue', + # VRT band-level overview declarations. The reader does not load + # VRT-declared overviews (overview_level= is honoured by the + # underlying GeoTIFF source reader, not by VRT-level overview + # tags), so keeping these informational preserves pre-#2349 + # behaviour for VRTs emitted by GDAL with declared overviews. + 'OverviewList', 'Overview', +}) +_SOURCE_BAND_TAGS = frozenset({'SimpleSource', 'ComplexSource'}) +_UNSUPPORTED_BAND_TAGS = frozenset({ + 'KernelFilteredSource', 'AveragedSource', 'NoDataFromMaskSource', + 'PixelFunctionType', 'PixelFunctionLanguage', + 'PixelFunctionCode', 'PixelFunctionArguments', + 'SourceTransferType', 'MaskBand', 'PansharpeningOptions', +}) + +# Dataset-level (```` children, sibling of ````) +# elements that signal a feature this reader does not implement. The band +# loop never sees these because they sit at the dataset level; sweep the +# root for them after the ``subClass`` check. See epic #2340 / PR 5. +_UNSUPPORTED_DATASET_TAGS = frozenset({ + # Per-dataset alpha / mask band. GDAL writes this as a sibling of + # the VRTRasterBand entries; the previous parser ignored it and + # silently dropped the per-pixel mask. + 'MaskBand', + # Ground-control-point list for warped reprojection. + 'GCPList', + # Pansharpening setup at the dataset level (separate from the + # band-level ). + 'PansharpeningOptions', +}) + + def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: """Parse a VRT XML string into a VRTDataset. @@ -395,12 +449,31 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: f"a supported feature in this release. read_vrt implements " f"plain ```` mosaics only; warped / pansharpened " f"/ processed / derived VRT subclasses are listed as " - f"unsupported in :data:`xrspatial.geotiff.SUPPORTED_FEATURES` " + f"unsupported in `xrspatial.geotiff.SUPPORTED_FEATURES` " f"(see epic #2340). Materialise the warped or derived output " f"with an external tool (e.g. ``gdalwarp``) and read the " f"resulting GeoTIFF instead." ) + # Sweep the dataset root for known unsupported sibling elements. + # The band loop below only walks ```` children, so a + # ```` / ```` / dataset-level + # ```` would otherwise slip through silently + # and drop its declared semantics (per-pixel mask, ground control + # points, pansharpening setup). See epic #2340 / PR 5. + for _ds_child in root: + _ds_tag = _ds_child.tag + if isinstance(_ds_tag, str) and _ds_tag in _UNSUPPORTED_DATASET_TAGS: + raise UnsupportedGeoTIFFFeatureError( + f"VRTDataset declares a <{_ds_tag}> element, which is " + f"not a supported feature in this release. read_vrt " + f"implements plain ```` mosaics over " + f"```` children only; mask bands, GCP " + f"lists, and dataset-level pansharpening setup are " + f"listed as unsupported in " + f"`xrspatial.geotiff.SUPPORTED_FEATURES` (epic #2340)." + ) + width = int(root.get('rasterXSize', 0)) height = int(root.get('rasterYSize', 0)) @@ -446,8 +519,7 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: f"by read_vrt. Only the plain ```` " f"mosaic case is implemented; derived / pixel-function " f"bands are listed as unsupported in " - f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` " - f"(epic #2340)." + f"`xrspatial.geotiff.SUPPORTED_FEATURES` (epic #2340)." ) # Distinguish "attribute missing" (GDAL default: Float32) from # "attribute present but unsupported". The previous @@ -480,27 +552,13 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: nodata = _parse_band_nodata(nodata_str, dtype) color_interp = _text(band_elem, 'ColorInterp') - # GDAL VRT allows several band sub-elements that this reader - # silently ignored before #2349. The pure-metadata ones - # (informational, no effect on the array) stay ignored; the - # ones that alter the computed output (kernel filters, derived - # band expressions, mask bands, pansharpening) get rejected so - # a VRT relying on them cannot read as a plain mosaic and lose + # Band-children classification is driven by the module-scope + # ``_INFORMATIONAL_BAND_TAGS`` / ``_SOURCE_BAND_TAGS`` / + # ``_UNSUPPORTED_BAND_TAGS`` frozensets above. The pure-metadata + # ones stay ignored; the output-altering ones (kernel filters, + # derived band expressions, pansharpening) get rejected so a + # VRT relying on them cannot read as a plain mosaic and lose # its declared computation. See epic #2340 / PR 5. - _INFORMATIONAL_BAND_TAGS = frozenset({ - 'NoDataValue', 'ColorInterp', 'Description', 'Offset', - 'Scale', 'Metadata', 'UnitType', 'CategoryNames', - 'ColorTable', 'Histograms', 'GDALRasterAttributeTable', - 'ColorEntry', 'HideNoDataValue', - }) - _SOURCE_BAND_TAGS = frozenset({'SimpleSource', 'ComplexSource'}) - _UNSUPPORTED_BAND_TAGS = frozenset({ - 'KernelFilteredSource', 'AveragedSource', 'NoDataFromMaskSource', - 'PixelFunctionType', 'PixelFunctionLanguage', - 'PixelFunctionCode', 'PixelFunctionArguments', - 'SourceTransferType', 'MaskBand', 'PansharpeningOptions', - }) - sources = [] for src_elem in band_elem: tag = src_elem.tag @@ -514,8 +572,8 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: f"mask-band source) that read_vrt does not " f"implement. Only ```` and " f"```` are supported. See " - f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` for " - f"the release tier map (epic #2340)." + f"`xrspatial.geotiff.SUPPORTED_FEATURES` for the " + f"release tier map (epic #2340)." ) if tag not in _SOURCE_BAND_TAGS: # Unknown band child that is neither informational nor a @@ -529,8 +587,8 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: f"```` and ```` sources " f"plus informational metadata children; an unknown " f"element could change the declared pixel output. " - f"See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` " - f"for the release tier map (epic #2340)." + f"See `xrspatial.geotiff.SUPPORTED_FEATURES` for " + f"the release tier map (epic #2340)." ) filename = _text(src_elem, 'SourceFilename') or '' @@ -1585,6 +1643,112 @@ def _vrt_dtype_name_for(bps, sample_format): ) +def _check_no_rotated_source_transforms(sources_meta: list[dict]) -> None: + """Reject VRT writer sources whose transform has non-zero skew (#2349). + + The writer emits an axis-aligned mosaic GeoTransform with ``0.0`` + skew slots. A source whose own GeoTIFF declared a rotated / + sheared affine cannot be placed correctly on that mosaic; the + on-disk VRT would point at the source with the rotation silently + dropped, mis-locating the tile. Refuse rather than write a + mis-aligned mosaic. + + The rotated 6-tuple is stamped on ``GeoTransform.rotated_affine`` + by the reader when ``allow_rotated=True`` -- see issue #2129. A + plain axis-aligned source has ``rotated_affine=None`` and skips + the check. + """ + for m in sources_meta: + ra = getattr(m['transform'], 'rotated_affine', None) + if ra is None: + continue + # ``rotated_affine`` is the rasterio 6-tuple + # ``(pixel_width, b, origin_x, d, pixel_height, origin_y)``; + # positions 1 and 3 are the skew terms. + try: + b = float(ra[1]) + d = float(ra[3]) + except (IndexError, TypeError, ValueError): + b = d = 0.0 + if b != 0.0 or d != 0.0: + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} has a rotated / sheared " + f"affine transform (b={b!r}, d={d!r}); rotated source " + f"transforms are not a supported feature in this " + f"release. write_vrt emits an axis-aligned mosaic and " + f"would silently drop the skew terms, mis-placing the " + f"source on the virtual raster. Reproject the source " + f"to an axis-aligned grid before adding it to the " + f"mosaic. See `xrspatial.geotiff.SUPPORTED_FEATURES` " + f"for the release tier map (epic #2340)." + ) + + +def _check_no_mixed_raster_type(sources_meta: list[dict]) -> None: + """Reject VRT writer sources whose AREA_OR_POINT disagrees (#2349). + + The mosaic VRT emits a single dataset-level ``AREA_OR_POINT`` + value. Silent flattening to ``first['raster_type']`` would shift + the disagreeing source by half a pixel on read, because + PixelIsArea origin is the pixel's edge and PixelIsPoint origin is + the pixel's centre. + """ + if not sources_meta: + return + first = sources_meta[0] + first_raster_type = first.get('raster_type') + for m in sources_meta[1:]: + m_raster_type = m.get('raster_type') + if (m_raster_type is not None and first_raster_type is not None + and m_raster_type != first_raster_type): + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} declares raster_type=" + f"{m_raster_type!r} which does not match the first " + f"source ({first_raster_type!r}). Silent flattening of " + f"mixed AREA_OR_POINT registration across stacked " + f"sources is not supported; the mosaic would emit a " + f"single dataset-level value and shift the disagreeing " + f"source by half a pixel. Re-tag the sources so they " + f"share the same pixel registration before mosaicing. " + f"See `xrspatial.geotiff.SUPPORTED_FEATURES` " + f"(epic #2340)." + ) + + +def _check_no_mixed_nodata(sources_meta: list[dict], *, + caller_nodata) -> None: + """Reject VRT writer sources whose nodata sentinels disagree (#2349). + + The legacy writer silently picked ``first['nodata']`` for every + band of every source, so a tile whose own sentinel was different + had its "valid" pixels masked by the mosaic-level + ```` on read. Fail closed unless the caller pinned + the mosaic nodata explicitly via the ``nodata`` kwarg. Mirrors + the read-side ``MixedBandMetadataError`` opt-out pattern. + """ + if caller_nodata is not None or not sources_meta: + return + first = sources_meta[0] + first_nodata = first.get('nodata') + for m in sources_meta[1:]: + m_nodata = m.get('nodata') + if m_nodata != first_nodata: + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} declares nodata=" + f"{m_nodata!r} which does not match the first source " + f"({first_nodata!r}). Silent flattening of mixed " + f"per-source nodata sentinels across stacked sources " + f"is not supported; the writer would emit the first " + f"source's value as a single dataset-level " + f"```` and the disagreeing source's " + f"sentinel would become a valid-looking pixel on " + f"read. Either pin the mosaic nodata explicitly with " + f"``write_vrt(..., nodata=)`` or re-tag the " + f"sources so they agree. See " + f"`xrspatial.geotiff.SUPPORTED_FEATURES` (epic #2340)." + ) + + def write_vrt(vrt_path: str, source_files: list[str], *, relative: bool = True, crs_wkt: str | None = None, @@ -1705,42 +1869,30 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: return abs(a - b) > 0.0 return abs(a - b) / denom > _PIXEL_SIZE_RTOL - # Reject sources whose declared transform carries non-zero skew - # terms (rotated / sheared affine). The writer emits an axis-aligned - # GeoTransform with ``0.0`` skew slots; a rotated source would be - # placed at the wrong pixel offset in the mosaic and the on-disk - # VRT would point at the source with the rotation silently dropped. - # Refuse rather than write a mosaic with mis-located tiles. - # Epic #2340 / PR 5. - for m in sources_meta: - ra = getattr(m['transform'], 'rotated_affine', None) - if ra is None: - continue - # ``rotated_affine`` is the rasterio 6-tuple - # ``(pixel_width, b, origin_x, d, pixel_height, origin_y)``; - # positions 1 and 3 are the skew terms. - try: - b = float(ra[1]) - d = float(ra[3]) - except (IndexError, TypeError, ValueError): - b = d = 0.0 - if b != 0.0 or d != 0.0: - raise UnsupportedGeoTIFFFeatureError( - f"VRT source {m['path']!r} has a rotated / sheared " - f"affine transform (b={b!r}, d={d!r}); rotated source " - f"transforms are not a supported feature in this " - f"release. write_vrt emits an axis-aligned mosaic and " - f"would silently drop the skew terms, mis-placing the " - f"source on the virtual raster. Reproject the source " - f"to an axis-aligned grid before adding it to the " - f"mosaic. See " - f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the " - f"release tier map (epic #2340)." - ) - + # Group the new "fail-loudly on unsupported feature combinations" + # gates (epic #2340 / PR 5) in one block so the cross-source policy + # is easy to read end-to-end: + # + # 1. Reject any source whose declared transform carries a + # non-zero skew term (rotated / sheared affine). write_vrt + # emits an axis-aligned GeoTransform, so a rotated source + # would be silently mis-placed on the mosaic. + # 2. Reject mixed per-source AREA_OR_POINT registration. The + # mosaic writes a single dataset-level value; flattening to + # ``first`` would shift the disagreeing source by half a + # pixel on read. + # 3. Reject mixed per-source nodata sentinels unless the caller + # pinned the mosaic value via the ``nodata`` kwarg. Mirrors + # the read-side ``MixedBandMetadataError`` opt-out pattern. + _check_no_rotated_source_transforms(sources_meta) + _check_no_mixed_raster_type(sources_meta) + _check_no_mixed_nodata(sources_meta, caller_nodata=nodata) + + # Pixel size, sample format, band count, and CRS share the + # documented "all sources must agree with first" contract (#1733). + # These remain inline here because they predate #2349 and have + # their own match-pattern tests. first_crs = first.get('crs_wkt') - first_raster_type = first.get('raster_type') - first_nodata = first.get('nodata') for m in sources_meta[1:]: t = m['transform'] if _pixel_size_mismatch(t.pixel_width, res_x) \ @@ -1779,54 +1931,6 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: f"match the first source. All sources in a VRT must " f"share the same CRS." ) - # Pixel registration must match across sources. The mosaic - # writes a single dataset-level ``AREA_OR_POINT``; flattening to - # the first source's value would silently shift downstream pixel - # coords by half a pixel for every source that disagrees. - # Epic #2340 / PR 5. - m_raster_type = m.get('raster_type') - if (m_raster_type is not None and first_raster_type is not None - and m_raster_type != first_raster_type): - raise UnsupportedGeoTIFFFeatureError( - f"VRT source {m['path']!r} declares raster_type=" - f"{m_raster_type!r} which does not match the first " - f"source ({first_raster_type!r}). Silent flattening of " - f"mixed AREA_OR_POINT registration across stacked " - f"sources is not supported; the mosaic would emit a " - f"single dataset-level value and shift the disagreeing " - f"source by half a pixel. Re-tag the sources so they " - f"share the same pixel registration before mosaicing. " - f"See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` " - f"(epic #2340)." - ) - # Refuse mixed per-source nodata sentinels when the caller did - # not pin one via the ``nodata`` kwarg. The legacy writer - # silently picked ``first['nodata']`` for every band of every - # source, which means a tile whose own sentinel was different - # got its "valid" pixels masked once the consumer applied the - # mosaic-level ````. The fail-closed surface - # mirrors the read-side ``MixedBandMetadataError`` opt-out: - # callers who really want the first-source value override the - # check by passing ``nodata=`` explicitly. Epic #2340 / - # PR 5. - m_nodata = m.get('nodata') - if nodata is None and m_nodata != first_nodata: - raise UnsupportedGeoTIFFFeatureError( - f"VRT source {m['path']!r} declares nodata=" - f"{m_nodata!r} which does not match the first source " - f"({first_nodata!r}). Silent flattening of mixed " - f"per-source nodata sentinels across stacked sources " - f"is not supported; the writer would emit the first " - f"source's value as a single dataset-level " - f"```` and the disagreeing source's " - f"sentinel would become a " - f"valid-looking pixel on read. Either pin the mosaic " - f"nodata explicitly with ``write_vrt(..., " - f"nodata=)`` or re-tag the sources so they " - f"agree. See " - f":data:`xrspatial.geotiff.SUPPORTED_FEATURES` " - f"(epic #2340)." - ) # Compute the bounding box of all sources all_x0, all_y0, all_x1, all_y1 = [], [], [], [] diff --git a/xrspatial/geotiff/tests/test_unsupported_features_2349.py b/xrspatial/geotiff/tests/test_unsupported_features_2349.py index 8975ac00..87bee0e0 100644 --- a/xrspatial/geotiff/tests/test_unsupported_features_2349.py +++ b/xrspatial/geotiff/tests/test_unsupported_features_2349.py @@ -153,6 +153,81 @@ def test_unknown_band_child_rejected_at_parse(): parse_vrt(xml, '.') +def test_dataset_level_maskband_rejected_at_parse(): + """A dataset-level ```` sibling of VRTRasterBand is rejected. + + Per the GDAL VRT spec, ```` lives at the ```` + level (not inside a band). The band-children loop never sees it, + so without the dataset-root sweep the mask gets silently dropped. + Pin the typed error and the substring naming the offending tag. + """ + xml = ( + '' + ' ' + ' ' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="MaskBand"): + parse_vrt(xml, '.') + + +def test_dataset_level_gcplist_rejected_at_parse(): + """A dataset-level ```` (ground-control points) is rejected. + + GCPList signals a non-axis-aligned georeferencing model that + read_vrt cannot honour. Pin the rejection so a future refactor + cannot regress to the silent no-op pre-#2349 behaviour. + """ + xml = ( + '' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="GCPList"): + parse_vrt(xml, '.') + + +def test_overview_list_band_child_still_passes(tmp_path): + """```` and ```` band children are informational. + + GDAL emits these on VRTs whose source GeoTIFFs carry external + overviews. read_vrt does not consume VRT-level overview + declarations (the source-side reader handles overviews via + ``overview_level=``), so the elements were and remain + no-ops. Pin the allow-list so the catch-all "unknown element" + branch added in #2349 does not regress this case. + """ + src = tmp_path / f'src_2349_ov_{uuid.uuid4().hex[:6]}.tif' + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + y = np.arange(4, dtype=np.float64) + x = np.arange(4, dtype=np.float64) + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}) + to_geotiff(da, str(src), compression='none') + xml = ( + f'' + f' EPSG:4326' + f' 0.0, 1.0, 0.0, 0.0, 0.0, 1.0' + f' ' + f' 2 4' + f' ' + f' {src.name}' + f' 1' + f' ' + f' ' + f' ' + f' ' + f'' + ) + parsed = parse_vrt(xml, str(tmp_path)) + assert len(parsed.bands) == 1 + assert len(parsed.bands[0].sources) == 1 + + def test_informational_band_children_still_pass(tmp_path): """```` / ```` / ```` / ```` skip. From 0ff6f064bfe40aac62711ba3d8ea9fa3b73e0a24 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 04:40:42 -0700 Subject: [PATCH 3/4] Add UnsupportedGeoTIFFFeatureError to the pinned __all__ inventory (#2349) test_features.TestPublicAPI.test_all_lists_supported_functions pins the exact public surface of ``xrspatial.geotiff.__all__``. The new ``UnsupportedGeoTIFFFeatureError`` added in this PR's earlier commit needs to be in the expected set too. --- xrspatial/geotiff/tests/test_features.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 4d5b54bd..272ee146 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2785,6 +2785,12 @@ def test_all_lists_supported_functions(self): 'RotatedTransformError', 'UnknownCRSModelTypeError', 'UnparseableCRSError', + # Epic #2340 / PR 5 (#2349): typed error raised at the read + # or write entry point when the caller asks for a feature + # the GeoTIFF module does not implement (warped / + # pansharpened / derived VRT subclasses, unknown VRT band + # children, rotated source transforms on a VRT mosaic). + 'UnsupportedGeoTIFFFeatureError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', # Canonical georef_status constants (issue #2136). Exposed From 88e590468a7f46db02dd97c20a43dfd34318b5bb Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 04:41:48 -0700 Subject: [PATCH 4/4] Treat matching NaN nodata as agreement, not mismatch (#2349) ``float('nan') != float('nan')`` evaluates True in plain Python, so the cross-source nodata check would flag two perfectly consistent NaN-sentinel sources as a mismatch. NaN nodata is the standard sentinel for float32 / float64 GeoTIFFs, so this would have made the new fail-closed gate too aggressive on real-world inputs. Compare via ``math.isnan`` so two NaNs are equal. Add a regression test that mosaics two sources both carrying NaN nodata and pins that the writer no longer rejects them. --- xrspatial/geotiff/_vrt.py | 28 ++++++++++++++++++- .../tests/test_unsupported_features_2349.py | 20 +++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 83370589..e2737616 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -1715,6 +1715,32 @@ def _check_no_mixed_raster_type(sources_meta: list[dict]) -> None: ) +def _nodata_values_agree(a, b) -> bool: + """Return True iff two nodata sentinels are the same value. + + Float NaN must be compared via :func:`math.isnan` because + ``float('nan') != float('nan')`` is ``True`` and would otherwise + flag two sources that both carry the standard float NaN sentinel + as a mismatch. ``None`` (no sentinel declared) is symmetric: two + Nones agree. Integer / float cross-type equality + (``-9999 == -9999.0``) is fine as-is. + """ + if a is None and b is None: + return True + if a is None or b is None: + return False + try: + a_is_nan = isinstance(a, float) and math.isnan(a) + b_is_nan = isinstance(b, float) and math.isnan(b) + except (TypeError, ValueError): + a_is_nan = b_is_nan = False + if a_is_nan and b_is_nan: + return True + if a_is_nan or b_is_nan: + return False + return a == b + + def _check_no_mixed_nodata(sources_meta: list[dict], *, caller_nodata) -> None: """Reject VRT writer sources whose nodata sentinels disagree (#2349). @@ -1732,7 +1758,7 @@ def _check_no_mixed_nodata(sources_meta: list[dict], *, first_nodata = first.get('nodata') for m in sources_meta[1:]: m_nodata = m.get('nodata') - if m_nodata != first_nodata: + if not _nodata_values_agree(m_nodata, first_nodata): raise UnsupportedGeoTIFFFeatureError( f"VRT source {m['path']!r} declares nodata=" f"{m_nodata!r} which does not match the first source " diff --git a/xrspatial/geotiff/tests/test_unsupported_features_2349.py b/xrspatial/geotiff/tests/test_unsupported_features_2349.py index 87bee0e0..842451ae 100644 --- a/xrspatial/geotiff/tests/test_unsupported_features_2349.py +++ b/xrspatial/geotiff/tests/test_unsupported_features_2349.py @@ -315,6 +315,26 @@ def test_mixed_per_source_nodata_rejected(tmp_path): write_vrt(vrt, [a, b]) +def test_matching_nan_nodata_passes(tmp_path): + """Two sources both declaring NaN nodata are not a mismatch. + + ``float('nan') != float('nan')`` evaluates True in plain Python, + so the naive cross-source equality check would flag a perfectly + consistent pair of NaN-sentinel sources as a mismatch. The + helper compares via ``math.isnan`` to keep two NaNs equal. Pin + the round-trip so a refactor cannot regress to the naive + comparator. + """ + d = _unique_dir(tmp_path, "nan_nodata") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, nodata=float('nan')) + _write_source(b, origin_x=4.0, nodata=float('nan')) + vrt = os.path.join(d, "out.vrt") + write_vrt(vrt, [a, b]) + assert os.path.exists(vrt) + + def test_mixed_nodata_override_via_kwarg_passes(tmp_path): """``write_vrt(..., nodata=)`` opts back into flatten-to-kwarg.