diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 73c3145e..ecc2e5a1 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -73,7 +73,8 @@ from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg # noqa: F401 from ._errors import (ConflictingCRSError, ConflictingNodataError, GeoTIFFAmbiguousMetadataError, InvalidCRSCodeError, MixedBandMetadataError, NonUniformCoordsError, - RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError) + RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError, + UnsupportedGeoTIFFFeatureError) from ._geotags import RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT, GeoTransform # noqa: F401 from ._reader import _MAX_CLOUD_BYTES_SENTINEL, UnsafeURLError from ._reader import read_to_array as _read_to_array @@ -115,6 +116,7 @@ 'UnknownCRSModelTypeError', 'UnparseableCRSError', 'UnsafeURLError', + 'UnsupportedGeoTIFFFeatureError', 'open_geotiff', 'read_geotiff_gpu', 'read_geotiff_dask', diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py index 47f2492e..c6495461 100644 --- a/xrspatial/geotiff/_errors.py +++ b/xrspatial/geotiff/_errors.py @@ -118,6 +118,22 @@ class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): """ +class UnsupportedGeoTIFFFeatureError(ValueError): + """Caller asked for a feature this release does not implement (#2349). + + Raised at the read or write entry point when the input declares a + feature the GeoTIFF module does not support (warped / reprojection + VRTs, pansharpened / processed / derived VRT subclasses, unknown + VRT band children, source transforms with non-zero skew on a VRT + mosaic). The message names the feature and points the caller at + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + + Subclasses ``ValueError`` so existing ``except ValueError`` callers + keep catching the case; new code can ``except`` this class to + distinguish "we refuse this input" from "the input is malformed". + """ + + __all__ = [ "GeoTIFFAmbiguousMetadataError", "InvalidCRSCodeError", @@ -128,4 +144,5 @@ class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): "ConflictingCRSError", "ConflictingNodataError", "UnknownCRSModelTypeError", + "UnsupportedGeoTIFFFeatureError", ] diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 3bd370ce..e2737616 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -17,6 +17,7 @@ import numpy as np +from ._errors import UnsupportedGeoTIFFFeatureError from ._safe_xml import safe_fromstring @@ -345,6 +346,60 @@ def _parse_band_nodata(text: str | None, return None +# VRTRasterBand child elements that read_vrt explicitly supports or +# tolerates. Hoisted to module scope so the parse_vrt band loop does +# not rebuild the frozensets on every iteration (epic #2340 / PR 5). +# +# ``_INFORMATIONAL_BAND_TAGS`` -- pure-metadata children that have no +# effect on the assembled array. Silently ignored. +# +# ``_SOURCE_BAND_TAGS`` -- the source types whose pixel-placement +# semantics this reader implements. +# +# ``_UNSUPPORTED_BAND_TAGS`` -- known GDAL VRT children that alter the +# computed output (kernel filters, derived band expressions, mask / +# pansharpening sources). Rejected with ``UnsupportedGeoTIFFFeatureError`` +# so a VRT relying on them cannot read as a plain mosaic and lose its +# declared computation. Anything that is none of the three -- e.g. a +# future GDAL VRT extension -- also raises, via the catch-all branch in +# ``parse_vrt``. +_INFORMATIONAL_BAND_TAGS = frozenset({ + 'NoDataValue', 'ColorInterp', 'Description', 'Offset', + 'Scale', 'Metadata', 'UnitType', 'CategoryNames', + 'ColorTable', 'Histograms', 'GDALRasterAttributeTable', + 'ColorEntry', 'HideNoDataValue', + # VRT band-level overview declarations. The reader does not load + # VRT-declared overviews (overview_level= is honoured by the + # underlying GeoTIFF source reader, not by VRT-level overview + # tags), so keeping these informational preserves pre-#2349 + # behaviour for VRTs emitted by GDAL with declared overviews. + 'OverviewList', 'Overview', +}) +_SOURCE_BAND_TAGS = frozenset({'SimpleSource', 'ComplexSource'}) +_UNSUPPORTED_BAND_TAGS = frozenset({ + 'KernelFilteredSource', 'AveragedSource', 'NoDataFromMaskSource', + 'PixelFunctionType', 'PixelFunctionLanguage', + 'PixelFunctionCode', 'PixelFunctionArguments', + 'SourceTransferType', 'MaskBand', 'PansharpeningOptions', +}) + +# Dataset-level (```` children, sibling of ````) +# elements that signal a feature this reader does not implement. The band +# loop never sees these because they sit at the dataset level; sweep the +# root for them after the ``subClass`` check. See epic #2340 / PR 5. +_UNSUPPORTED_DATASET_TAGS = frozenset({ + # Per-dataset alpha / mask band. GDAL writes this as a sibling of + # the VRTRasterBand entries; the previous parser ignored it and + # silently dropped the per-pixel mask. + 'MaskBand', + # Ground-control-point list for warped reprojection. + 'GCPList', + # Pansharpening setup at the dataset level (separate from the + # band-level ). + 'PansharpeningOptions', +}) + + def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: """Parse a VRT XML string into a VRTDataset. @@ -378,6 +433,47 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: vrt_root = os.path.realpath(vrt_dir) allowed_roots = _allowed_source_roots() + # Refuse any GDAL VRT subclass beyond the plain dataset. The mosaic + # reader implements the simple ```` case only; warped / + # pansharpened / processed / derived subclasses depend on a separate + # pipeline (reprojection, kernel filters, derived expressions) that + # this release does not ship. A silently-ignored ``subClass`` would + # produce wrong output: the reader would dispatch on whatever simple + # sources the subclassed VRT happens to embed and skip the subclass + # semantics entirely. Name the offending attribute in the error and + # point at the release tier map. See epic #2340 / PR 5 (#2349). + _sub_class = root.get('subClass') + if _sub_class: + raise UnsupportedGeoTIFFFeatureError( + f"VRTDataset declares subClass={_sub_class!r}, which is not " + f"a supported feature in this release. read_vrt implements " + f"plain ```` mosaics only; warped / pansharpened " + f"/ processed / derived VRT subclasses are listed as " + f"unsupported in `xrspatial.geotiff.SUPPORTED_FEATURES` " + f"(see epic #2340). Materialise the warped or derived output " + f"with an external tool (e.g. ``gdalwarp``) and read the " + f"resulting GeoTIFF instead." + ) + + # Sweep the dataset root for known unsupported sibling elements. + # The band loop below only walks ```` children, so a + # ```` / ```` / dataset-level + # ```` would otherwise slip through silently + # and drop its declared semantics (per-pixel mask, ground control + # points, pansharpening setup). See epic #2340 / PR 5. + for _ds_child in root: + _ds_tag = _ds_child.tag + if isinstance(_ds_tag, str) and _ds_tag in _UNSUPPORTED_DATASET_TAGS: + raise UnsupportedGeoTIFFFeatureError( + f"VRTDataset declares a <{_ds_tag}> element, which is " + f"not a supported feature in this release. read_vrt " + f"implements plain ```` mosaics over " + f"```` children only; mask bands, GCP " + f"lists, and dataset-level pansharpening setup are " + f"listed as unsupported in " + f"`xrspatial.geotiff.SUPPORTED_FEATURES` (epic #2340)." + ) + width = int(root.get('rasterXSize', 0)) height = int(root.get('rasterYSize', 0)) @@ -409,6 +505,22 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: bands = [] for band_elem in root.findall('VRTRasterBand'): band_num = int(band_elem.get('band', 1)) + # ``subClass="VRTDerivedRasterBand"`` (and other future subclasses) + # signal a band-level computation that read_vrt does not + # implement. The plain ```` mosaic case has no + # ``subClass`` attribute. Reject so the derived expression is + # not silently dropped down to whatever sources happen to be + # listed. Epic #2340 / PR 5. + _band_sub_class = band_elem.get('subClass') + if _band_sub_class: + raise UnsupportedGeoTIFFFeatureError( + f"VRTRasterBand band={band_num} declares " + f"subClass={_band_sub_class!r}, which is not supported " + f"by read_vrt. Only the plain ```` " + f"mosaic case is implemented; derived / pixel-function " + f"bands are listed as unsupported in " + f"`xrspatial.geotiff.SUPPORTED_FEATURES` (epic #2340)." + ) # Distinguish "attribute missing" (GDAL default: Float32) from # "attribute present but unsupported". The previous # ``_DTYPE_MAP.get(dtype_name, np.float32)`` collapsed both cases @@ -440,11 +552,44 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset: nodata = _parse_band_nodata(nodata_str, dtype) color_interp = _text(band_elem, 'ColorInterp') + # Band-children classification is driven by the module-scope + # ``_INFORMATIONAL_BAND_TAGS`` / ``_SOURCE_BAND_TAGS`` / + # ``_UNSUPPORTED_BAND_TAGS`` frozensets above. The pure-metadata + # ones stay ignored; the output-altering ones (kernel filters, + # derived band expressions, pansharpening) get rejected so a + # VRT relying on them cannot read as a plain mosaic and lose + # its declared computation. See epic #2340 / PR 5. sources = [] for src_elem in band_elem: tag = src_elem.tag - if tag not in ('SimpleSource', 'ComplexSource'): + if tag in _INFORMATIONAL_BAND_TAGS: continue + if tag in _UNSUPPORTED_BAND_TAGS: + raise UnsupportedGeoTIFFFeatureError( + f"VRTRasterBand band={band_num} contains a " + f"<{tag}> element, which declares a VRT feature " + f"(kernel-filtered / derived / pansharpened / " + f"mask-band source) that read_vrt does not " + f"implement. Only ```` and " + f"```` are supported. See " + f"`xrspatial.geotiff.SUPPORTED_FEATURES` for the " + f"release tier map (epic #2340)." + ) + if tag not in _SOURCE_BAND_TAGS: + # Unknown band child that is neither informational nor a + # known unsupported-feature marker. Refuse rather than + # silently dropping the element: a future GDAL VRT + # extension that alters pixel computation must not pass + # through this reader as if it were a no-op. + raise UnsupportedGeoTIFFFeatureError( + f"VRTRasterBand band={band_num} contains an unknown " + f"<{tag}> element. read_vrt only recognises " + f"```` and ```` sources " + f"plus informational metadata children; an unknown " + f"element could change the declared pixel output. " + f"See `xrspatial.geotiff.SUPPORTED_FEATURES` for " + f"the release tier map (epic #2340)." + ) filename = _text(src_elem, 'SourceFilename') or '' relative = src_elem.find('SourceFilename') @@ -1498,6 +1643,138 @@ def _vrt_dtype_name_for(bps, sample_format): ) +def _check_no_rotated_source_transforms(sources_meta: list[dict]) -> None: + """Reject VRT writer sources whose transform has non-zero skew (#2349). + + The writer emits an axis-aligned mosaic GeoTransform with ``0.0`` + skew slots. A source whose own GeoTIFF declared a rotated / + sheared affine cannot be placed correctly on that mosaic; the + on-disk VRT would point at the source with the rotation silently + dropped, mis-locating the tile. Refuse rather than write a + mis-aligned mosaic. + + The rotated 6-tuple is stamped on ``GeoTransform.rotated_affine`` + by the reader when ``allow_rotated=True`` -- see issue #2129. A + plain axis-aligned source has ``rotated_affine=None`` and skips + the check. + """ + for m in sources_meta: + ra = getattr(m['transform'], 'rotated_affine', None) + if ra is None: + continue + # ``rotated_affine`` is the rasterio 6-tuple + # ``(pixel_width, b, origin_x, d, pixel_height, origin_y)``; + # positions 1 and 3 are the skew terms. + try: + b = float(ra[1]) + d = float(ra[3]) + except (IndexError, TypeError, ValueError): + b = d = 0.0 + if b != 0.0 or d != 0.0: + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} has a rotated / sheared " + f"affine transform (b={b!r}, d={d!r}); rotated source " + f"transforms are not a supported feature in this " + f"release. write_vrt emits an axis-aligned mosaic and " + f"would silently drop the skew terms, mis-placing the " + f"source on the virtual raster. Reproject the source " + f"to an axis-aligned grid before adding it to the " + f"mosaic. See `xrspatial.geotiff.SUPPORTED_FEATURES` " + f"for the release tier map (epic #2340)." + ) + + +def _check_no_mixed_raster_type(sources_meta: list[dict]) -> None: + """Reject VRT writer sources whose AREA_OR_POINT disagrees (#2349). + + The mosaic VRT emits a single dataset-level ``AREA_OR_POINT`` + value. Silent flattening to ``first['raster_type']`` would shift + the disagreeing source by half a pixel on read, because + PixelIsArea origin is the pixel's edge and PixelIsPoint origin is + the pixel's centre. + """ + if not sources_meta: + return + first = sources_meta[0] + first_raster_type = first.get('raster_type') + for m in sources_meta[1:]: + m_raster_type = m.get('raster_type') + if (m_raster_type is not None and first_raster_type is not None + and m_raster_type != first_raster_type): + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} declares raster_type=" + f"{m_raster_type!r} which does not match the first " + f"source ({first_raster_type!r}). Silent flattening of " + f"mixed AREA_OR_POINT registration across stacked " + f"sources is not supported; the mosaic would emit a " + f"single dataset-level value and shift the disagreeing " + f"source by half a pixel. Re-tag the sources so they " + f"share the same pixel registration before mosaicing. " + f"See `xrspatial.geotiff.SUPPORTED_FEATURES` " + f"(epic #2340)." + ) + + +def _nodata_values_agree(a, b) -> bool: + """Return True iff two nodata sentinels are the same value. + + Float NaN must be compared via :func:`math.isnan` because + ``float('nan') != float('nan')`` is ``True`` and would otherwise + flag two sources that both carry the standard float NaN sentinel + as a mismatch. ``None`` (no sentinel declared) is symmetric: two + Nones agree. Integer / float cross-type equality + (``-9999 == -9999.0``) is fine as-is. + """ + if a is None and b is None: + return True + if a is None or b is None: + return False + try: + a_is_nan = isinstance(a, float) and math.isnan(a) + b_is_nan = isinstance(b, float) and math.isnan(b) + except (TypeError, ValueError): + a_is_nan = b_is_nan = False + if a_is_nan and b_is_nan: + return True + if a_is_nan or b_is_nan: + return False + return a == b + + +def _check_no_mixed_nodata(sources_meta: list[dict], *, + caller_nodata) -> None: + """Reject VRT writer sources whose nodata sentinels disagree (#2349). + + The legacy writer silently picked ``first['nodata']`` for every + band of every source, so a tile whose own sentinel was different + had its "valid" pixels masked by the mosaic-level + ```` on read. Fail closed unless the caller pinned + the mosaic nodata explicitly via the ``nodata`` kwarg. Mirrors + the read-side ``MixedBandMetadataError`` opt-out pattern. + """ + if caller_nodata is not None or not sources_meta: + return + first = sources_meta[0] + first_nodata = first.get('nodata') + for m in sources_meta[1:]: + m_nodata = m.get('nodata') + if not _nodata_values_agree(m_nodata, first_nodata): + raise UnsupportedGeoTIFFFeatureError( + f"VRT source {m['path']!r} declares nodata=" + f"{m_nodata!r} which does not match the first source " + f"({first_nodata!r}). Silent flattening of mixed " + f"per-source nodata sentinels across stacked sources " + f"is not supported; the writer would emit the first " + f"source's value as a single dataset-level " + f"```` and the disagreeing source's " + f"sentinel would become a valid-looking pixel on " + f"read. Either pin the mosaic nodata explicitly with " + f"``write_vrt(..., nodata=)`` or re-tag the " + f"sources so they agree. See " + f"`xrspatial.geotiff.SUPPORTED_FEATURES` (epic #2340)." + ) + + def write_vrt(vrt_path: str, source_files: list[str], *, relative: bool = True, crs_wkt: str | None = None, @@ -1583,6 +1860,7 @@ def write_vrt(vrt_path: str, source_files: list[str], *, 'transform': geo.transform, 'crs_wkt': geo.crs_wkt, 'nodata': geo.nodata, + 'raster_type': geo.raster_type, }) first = sources_meta[0] @@ -1617,6 +1895,29 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: return abs(a - b) > 0.0 return abs(a - b) / denom > _PIXEL_SIZE_RTOL + # Group the new "fail-loudly on unsupported feature combinations" + # gates (epic #2340 / PR 5) in one block so the cross-source policy + # is easy to read end-to-end: + # + # 1. Reject any source whose declared transform carries a + # non-zero skew term (rotated / sheared affine). write_vrt + # emits an axis-aligned GeoTransform, so a rotated source + # would be silently mis-placed on the mosaic. + # 2. Reject mixed per-source AREA_OR_POINT registration. The + # mosaic writes a single dataset-level value; flattening to + # ``first`` would shift the disagreeing source by half a + # pixel on read. + # 3. Reject mixed per-source nodata sentinels unless the caller + # pinned the mosaic value via the ``nodata`` kwarg. Mirrors + # the read-side ``MixedBandMetadataError`` opt-out pattern. + _check_no_rotated_source_transforms(sources_meta) + _check_no_mixed_raster_type(sources_meta) + _check_no_mixed_nodata(sources_meta, caller_nodata=nodata) + + # Pixel size, sample format, band count, and CRS share the + # documented "all sources must agree with first" contract (#1733). + # These remain inline here because they predate #2349 and have + # their own match-pattern tests. first_crs = first.get('crs_wkt') for m in sources_meta[1:]: t = m['transform'] diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 4d5b54bd..272ee146 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2785,6 +2785,12 @@ def test_all_lists_supported_functions(self): 'RotatedTransformError', 'UnknownCRSModelTypeError', 'UnparseableCRSError', + # Epic #2340 / PR 5 (#2349): typed error raised at the read + # or write entry point when the caller asks for a feature + # the GeoTIFF module does not implement (warped / + # pansharpened / derived VRT subclasses, unknown VRT band + # children, rotated source transforms on a VRT mosaic). + 'UnsupportedGeoTIFFFeatureError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', # Canonical georef_status constants (issue #2136). Exposed diff --git a/xrspatial/geotiff/tests/test_unsupported_features_2349.py b/xrspatial/geotiff/tests/test_unsupported_features_2349.py new file mode 100644 index 00000000..842451ae --- /dev/null +++ b/xrspatial/geotiff/tests/test_unsupported_features_2349.py @@ -0,0 +1,456 @@ +"""Regression tests for issue #2349. + +Epic #2340 PR 5: unsupported GeoTIFF feature combinations fail loudly +with an actionable error message that names the feature, names the +offending input, and points the caller at the release tier map. + +Unsupported features covered here: + +* Full GDAL VRT parity (warped / pansharpened / processed / derived + VRT subclasses, derived raster bands, kernel-filtered sources, + unknown band sub-elements). +* Warped / reprojection VRTs (the GeoTransform check already lands on + ``RotatedTransformError``; the regression here pins the existing + behaviour so a future refactor cannot regress it back to a silent + no-georef path). +* Rotated / sheared GeoTIFF write (the eager writer already rejects + rotated ``attrs['transform']`` 6-tuples; the regression pins the + message wording so callers can still match on it). +* Silent mixed-metadata flattening across stacked VRT sources + (mismatched per-source nodata or AREA_OR_POINT registration). +""" +from __future__ import annotations + +import os +import uuid + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import (RotatedTransformError, UnsupportedGeoTIFFFeatureError, + open_geotiff, to_geotiff) +from xrspatial.geotiff._vrt import parse_vrt, write_vrt + + +# --------------------------------------------------------------------------- +# VRT parse-time gates: subClass, derived raster bands, unknown band children. +# --------------------------------------------------------------------------- + + +def test_warped_vrt_subclass_rejected_at_parse(): + """A ```` is not a plain mosaic. + + Without an explicit refusal the reader would dispatch on whatever + simple sources the warped VRT happens to embed and drop the warping + semantics silently. Pin the typed error and the feature-naming + substring so the message stays actionable for callers grepping for + "warped" / "subClass". + """ + xml = ( + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="subClass"): + parse_vrt(xml, '.') + + +def test_pansharpened_vrt_subclass_rejected_at_parse(): + """A ```` is rejected too. + + The subClass check covers every GDAL VRT subclass uniformly, not + just the warped one. Pin the pansharpened case so a caller who + points read_vrt at a pansharpened VRT sees the same actionable + failure rather than silently mis-reading. + """ + xml = ( + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="VRTPansharpened"): + parse_vrt(xml, '.') + + +def test_derived_rasterband_subclass_rejected_at_parse(): + """A ```` is rejected. + + Derived raster bands declare a pixel-function expression evaluated + over the sources. read_vrt has no pixel-function evaluator and + would drop straight to the simple-source path, producing wrong + output. Pin the typed error and the band number in the message. + """ + xml = ( + '' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match=r"band=1.*VRTDerivedRasterBand"): + parse_vrt(xml, '.') + + +def test_kernel_filtered_source_rejected_at_parse(): + """```` is a known unsupported source type. + + The previous parser silently skipped every non-Simple/Complex tag + inside ````. The new gate enumerates the known + output-altering children and raises on each. Pin the substring so + the caller can match on "KernelFilteredSource" specifically. + """ + xml = ( + '' + ' ' + ' ' + ' src.tif' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match="KernelFilteredSource"): + parse_vrt(xml, '.') + + +def test_pansharpening_options_rejected_at_parse(): + """```` inside a band is rejected. + + PansharpeningOptions sits under VRTRasterBand for the pansharpened + subClass case. The dataset-level subClass check fires first when + the subClass attribute is present, but a malformed VRT that omits + the subClass attribute still has to be rejected. Pin that path + here. + """ + xml = ( + '' + ' ' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match="PansharpeningOptions"): + parse_vrt(xml, '.') + + +def test_unknown_band_child_rejected_at_parse(): + """An unknown ```` child element is rejected. + + The previous parser silently skipped any unknown tag. A future GDAL + VRT extension that introduces a new pixel-altering element must + not slip past this reader as a no-op; the catch-all branch raises + rather than guess at semantics. + """ + xml = ( + '' + ' ' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match="FutureVRTPixelMutator"): + parse_vrt(xml, '.') + + +def test_dataset_level_maskband_rejected_at_parse(): + """A dataset-level ```` sibling of VRTRasterBand is rejected. + + Per the GDAL VRT spec, ```` lives at the ```` + level (not inside a band). The band-children loop never sees it, + so without the dataset-root sweep the mask gets silently dropped. + Pin the typed error and the substring naming the offending tag. + """ + xml = ( + '' + ' ' + ' ' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="MaskBand"): + parse_vrt(xml, '.') + + +def test_dataset_level_gcplist_rejected_at_parse(): + """A dataset-level ```` (ground-control points) is rejected. + + GCPList signals a non-axis-aligned georeferencing model that + read_vrt cannot honour. Pin the rejection so a future refactor + cannot regress to the silent no-op pre-#2349 behaviour. + """ + xml = ( + '' + ' ' + ' ' + '' + ) + with pytest.raises(UnsupportedGeoTIFFFeatureError, match="GCPList"): + parse_vrt(xml, '.') + + +def test_overview_list_band_child_still_passes(tmp_path): + """```` and ```` band children are informational. + + GDAL emits these on VRTs whose source GeoTIFFs carry external + overviews. read_vrt does not consume VRT-level overview + declarations (the source-side reader handles overviews via + ``overview_level=``), so the elements were and remain + no-ops. Pin the allow-list so the catch-all "unknown element" + branch added in #2349 does not regress this case. + """ + src = tmp_path / f'src_2349_ov_{uuid.uuid4().hex[:6]}.tif' + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + y = np.arange(4, dtype=np.float64) + x = np.arange(4, dtype=np.float64) + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}) + to_geotiff(da, str(src), compression='none') + xml = ( + f'' + f' EPSG:4326' + f' 0.0, 1.0, 0.0, 0.0, 0.0, 1.0' + f' ' + f' 2 4' + f' ' + f' {src.name}' + f' 1' + f' ' + f' ' + f' ' + f' ' + f'' + ) + parsed = parse_vrt(xml, str(tmp_path)) + assert len(parsed.bands) == 1 + assert len(parsed.bands[0].sources) == 1 + + +def test_informational_band_children_still_pass(tmp_path): + """```` / ```` / ```` / ```` skip. + + The gate enumerates known informational children that have no + effect on the array bytes and must still be ignored silently. + Pin the allow-list so a future refactor cannot regress it to + "raise on everything" and break legitimate VRTs. + """ + src = tmp_path / f'src_2349_info_{uuid.uuid4().hex[:6]}.tif' + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + y = np.arange(4, dtype=np.float64) + x = np.arange(4, dtype=np.float64) + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}) + to_geotiff(da, str(src), compression='none') + xml = ( + f'' + f' EPSG:4326' + f' 0.0, 1.0, 0.0, 0.0, 0.0, 1.0' + f' ' + f' test' + f' m' + f' -9999' + f' ' + f' {src.name}' + f' 1' + f' ' + f' ' + f' ' + f' ' + f'' + ) + parsed = parse_vrt(xml, str(tmp_path)) + assert len(parsed.bands) == 1 + assert len(parsed.bands[0].sources) == 1 + + +# --------------------------------------------------------------------------- +# VRT writer cross-source mixed-metadata gates. +# --------------------------------------------------------------------------- + + +def _unique_dir(tmp_path, label: str) -> str: + d = tmp_path / f"vrt_2349_{label}_{uuid.uuid4().hex[:8]}" + d.mkdir() + return str(d) + + +def _write_source(path: str, *, px: float = 1.0, py: float = -1.0, + origin_x: float = 0.0, origin_y: float = 100.0, + nodata: float | int | None = -9999.0, + raster_type: str = 'area', + crs: int = 4326, + dtype=np.float32, h: int = 4, w: int = 4) -> None: + arr = np.arange(h * w, dtype=dtype).reshape(h, w) + y = origin_y + (np.arange(h) + 0.5) * py + x = origin_x + (np.arange(w) + 0.5) * px + attrs = {'crs': crs, 'raster_type': raster_type} + if nodata is not None: + attrs['nodata'] = nodata + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs=attrs) + to_geotiff(da, path, compression='none', nodata=nodata) + + +def test_mixed_per_source_nodata_rejected(tmp_path): + """Two sources with different nodata sentinels fail the write. + + Legacy ``write_vrt`` picked ``first['nodata']`` for every band and + silently dropped the second source's sentinel. The fail-closed + surface refuses; the caller can override by pinning the mosaic + nodata via ``write_vrt(..., nodata=)``. Pin the typed error + and a substring naming the kwarg so the message stays actionable. + """ + d = _unique_dir(tmp_path, "nodata") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, nodata=-9999.0) + _write_source(b, origin_x=4.0, nodata=-1.0) + vrt = os.path.join(d, "out.vrt") + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match=r"mixed.*nodata|nodata=\-9999"): + write_vrt(vrt, [a, b]) + + +def test_matching_nan_nodata_passes(tmp_path): + """Two sources both declaring NaN nodata are not a mismatch. + + ``float('nan') != float('nan')`` evaluates True in plain Python, + so the naive cross-source equality check would flag a perfectly + consistent pair of NaN-sentinel sources as a mismatch. The + helper compares via ``math.isnan`` to keep two NaNs equal. Pin + the round-trip so a refactor cannot regress to the naive + comparator. + """ + d = _unique_dir(tmp_path, "nan_nodata") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, nodata=float('nan')) + _write_source(b, origin_x=4.0, nodata=float('nan')) + vrt = os.path.join(d, "out.vrt") + write_vrt(vrt, [a, b]) + assert os.path.exists(vrt) + + +def test_mixed_nodata_override_via_kwarg_passes(tmp_path): + """``write_vrt(..., nodata=)`` opts back into flatten-to-kwarg. + + The fail-closed default is the strict path; explicit caller intent + via the ``nodata`` kwarg overrides it. Pin that the opt-out keeps + working so the message is actionable rather than a dead-end. + """ + d = _unique_dir(tmp_path, "nodata_ok") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, nodata=-9999.0) + _write_source(b, origin_x=4.0, nodata=-1.0) + vrt = os.path.join(d, "out.vrt") + write_vrt(vrt, [a, b], nodata=-9999.0) + assert os.path.exists(vrt) + + +def test_mixed_raster_type_rejected(tmp_path): + """Sources disagreeing on AREA_OR_POINT registration fail the write. + + The mosaic writes a single dataset-level AREA_OR_POINT, so silently + flattening to the first source's value would shift the disagreeing + source by half a pixel on read. Pin the typed error and the + substring naming the mismatch. + """ + d = _unique_dir(tmp_path, "raster_type") + a = os.path.join(d, "a.tif") + b = os.path.join(d, "b.tif") + _write_source(a, origin_x=0.0, raster_type='area') + _write_source(b, origin_x=4.0, raster_type='point') + vrt = os.path.join(d, "out.vrt") + with pytest.raises(UnsupportedGeoTIFFFeatureError, + match=r"raster_type|AREA_OR_POINT"): + write_vrt(vrt, [a, b]) + + +# --------------------------------------------------------------------------- +# Rotated / sheared write gates: regression pins for the existing +# refusal at the eager writer entry point. +# --------------------------------------------------------------------------- + + +def test_eager_writer_rejects_rotated_6tuple_transform(tmp_path): + """``attrs['transform']`` 6-tuple with non-zero ``b`` or ``d`` is refused. + + The eager writer emits an axis-aligned GeoTIFF; silently dropping + the skew terms would place the raster at the wrong location. Pin + the message wording so the existing match patterns in other + fail-closed regressions keep matching. + """ + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + dims=['y', 'x'], + attrs={'transform': (1.0, 0.5, 0.0, 0.0, -1.0, 0.0)}, + ) + path = tmp_path / f"rotated_2349_{uuid.uuid4().hex[:6]}.tif" + with pytest.raises(ValueError, match=r"rotation/shear"): + to_geotiff(da, str(path)) + + +def test_eager_writer_rejects_rotated_affine_attr(tmp_path): + """``attrs['rotated_affine']`` (set by reader on ``allow_rotated``) refused. + + The reader stamps the rotated 6-tuple on this attr when called with + ``allow_rotated=True``. The writer has no ModelTransformationTag + emit path, so a read-then-write round-trip would silently lose the + rotation. Pin the refusal so the regression cannot regress into a + silent identity-affine output. + """ + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + dims=['y', 'x'], + attrs={'rotated_affine': (1.0, 0.5, 0.0, 0.0, -1.0, 0.0)}, + ) + path = tmp_path / f"rotated_affine_2349_{uuid.uuid4().hex[:6]}.tif" + with pytest.raises(ValueError, match=r"rotated_affine"): + to_geotiff(da, str(path)) + + +# --------------------------------------------------------------------------- +# Warped / reprojection VRT gate: regression pin for the existing +# RotatedTransformError on a VRT with non-zero GeoTransform skew terms. +# --------------------------------------------------------------------------- + + +def test_vrt_with_skewed_geotransform_rejected(tmp_path): + """A VRT GeoTransform with non-zero skew is rejected on read. + + The GDAL GeoTransform skew terms (positions 2 and 4 in the + GDAL ordering) flag a warped / reprojection VRT or a rotated + source. read_vrt has no resampler for the warped case; pin the + existing typed error so a future refactor cannot regress to the + silent no-georef fallback. + """ + src = tmp_path / f'flat_2349_{uuid.uuid4().hex[:6]}.tif' + arr = np.zeros((4, 4), dtype=np.float32) + da = xr.DataArray(arr, dims=['y', 'x'], + coords={'y': np.arange(4, dtype=np.float64), + 'x': np.arange(4, dtype=np.float64)}, + attrs={'crs': 4326}) + to_geotiff(da, str(src), compression='none') + + vrt = tmp_path / f'rotated_2349_{uuid.uuid4().hex[:6]}.vrt' + vrt.write_text( + f'' + f' EPSG:4326' + f' 0.0, 1.0, 0.5, 0.0, 0.0, -1.0' + f' ' + f' ' + f' {src.name}' + f' 1' + f' ' + f' ' + f' ' + f' ' + f'' + ) + with pytest.raises(RotatedTransformError, match=r"rotated affine"): + open_geotiff(str(vrt))