Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,17 @@ class _Source:
# matching rect sizes is a no-op and passes through. Higher-quality
# resamplers are tracked for follow-up.
resample_alg: str | None = None
# True when the source element declared ``<UseMaskBand>true</UseMaskBand>``
# (GDAL writes this for ComplexSource entries that read through the
# source raster's per-band mask). The read pipeline ignores mask bands
# and would silently drop the per-pixel mask, so ``validate_parsed_vrt``
# rejects sources where this flag is set. See issue #2371.
use_mask_band: bool = False
# True when the source element declared a ``<MaskBand>`` child
# (per-source mask reference). Same disposition as ``use_mask_band`` --
# the read pipeline cannot serve the mask semantics and the validator
# rejects the VRT. See issue #2371.
has_mask_source: bool = False


@dataclass
Expand Down Expand Up @@ -381,6 +392,12 @@ def _parse_band_nodata(text: str | None,
'PixelFunctionType', 'PixelFunctionLanguage',
'PixelFunctionCode', 'PixelFunctionArguments',
'SourceTransferType', 'MaskBand', 'PansharpeningOptions',
# Defensive: a band-level ``<GDALWarpOptions>`` block is unusual
# (warp options usually sit at the dataset level or alongside the
# ``VRTWarpedRasterBand`` subClass marker), but catching it here as
# well keeps the parser symmetric with the dataset-level rejection
# in ``_UNSUPPORTED_DATASET_TAGS``. See issue #2371.
'GDALWarpOptions',
})

# Dataset-level (``<VRTDataset>`` children, sibling of ``<VRTRasterBand>``)
Expand All @@ -397,6 +414,14 @@ def _parse_band_nodata(text: str | None,
# Pansharpening setup at the dataset level (separate from the
# band-level <PansharpeningOptions>).
'PansharpeningOptions',
# GDAL ``<GDALWarpOptions>`` block. The ``VRTWarpedRasterBand`` subClass
# rejection above catches the band-level marker, but a warped VRT can
# also embed the warp configuration as a dataset-level sibling block
# (or as a child of a band that does not use the subClass attribute,
# depending on how the VRT was emitted). The mosaic reader does not
# implement reprojection; silently ignoring the block would dispatch
# on the raw source pixels and skip the warp step. See issue #2371.
'GDALWarpOptions',
})


Expand Down Expand Up @@ -674,6 +699,23 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
# #1694 and #1751.
resample_alg = _text(src_elem, 'ResampleAlg')

# ``<UseMaskBand>`` and ``<MaskBand>`` per-source markers
# request that the source's per-band mask drives the
# placement; the read pipeline does not honour mask bands
# and would silently drop the per-pixel mask. Capture the
# flags so ``validate_parsed_vrt`` can reject the VRT with
# an actionable message that names the offending source.
# GDAL emits ``<UseMaskBand>true</UseMaskBand>`` exclusively;
# the truthy set is narrowed to ``('1', 'true')`` to match
# what real VRTs contain rather than every spelling Python
# would coerce to ``True``. See issue #2371.
use_mask_band_str = _text(src_elem, 'UseMaskBand')
use_mask_band = (
use_mask_band_str is not None
and use_mask_band_str.strip().lower() in ('1', 'true')
)
has_mask_source = src_elem.find('MaskBand') is not None

sources.append(_Source(
filename=filename,
band=src_band,
Expand All @@ -683,6 +725,8 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
scale=scale,
offset=offset,
resample_alg=resample_alg,
use_mask_band=use_mask_band,
has_mask_source=has_mask_source,
))

bands.append(_VRTBand(
Expand Down Expand Up @@ -1193,6 +1237,21 @@ def read_vrt(vrt_path: str, *, window=None,
xml_str = _read_vrt_xml(vrt_path)
vrt_dir = os.path.dirname(os.path.abspath(vrt_path))
vrt = parse_vrt(xml_str, vrt_dir)

# Route every fresh parse through the centralised capability
# validator before any source read. When ``parsed`` is supplied the
# caller is responsible for having validated already (the chunked
# dask path threads a pre-validated instance in via #1825, and the
# ``_backends/vrt.read_vrt`` wrapper runs the validator on the
# eager parse before dispatching). Direct callers of this internal
# entry point now get the same capability gate as the public
# backend path. See issue #2371.
if parsed is None:
# Lazy import: ``_vrt_validation`` imports ``_NEAREST_RESAMPLE_ALGS``
# from this module for the resample-alg check, so a top-level
# import here would close a circular import loop at module load.
from ._vrt_validation import validate_parsed_vrt
validate_parsed_vrt(vrt, source=vrt_path, mode='read')
if missing_sources not in ('warn', 'raise'):
raise ValueError(
f"missing_sources must be 'warn' or 'raise', got "
Expand Down
90 changes: 82 additions & 8 deletions xrspatial/geotiff/_vrt_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,28 +286,94 @@ def validate_parsed_vrt(
sr = src.src_rect
dr = src.dst_rect

# Rule 7a: negative SrcRect size.
# Rule 6b: nested VRT. A SourceFilename ending in ``.vrt``
# would recurse the read pipeline through a second VRT
# parse, which the mosaic reader does not implement. Catch
# the case here (not at parse time) so the validator owns
# every capability rejection and the message names both
# outer and inner VRT paths. Case-insensitive on the
# extension so ``.VRT`` (Windows-style) also trips the
# rejection. See issue #2371.
if src.filename.lower().endswith('.vrt'):
# Direct interpolation (not !r) for ``src.filename`` so
# Windows paths render with single backslashes rather
# than the doubled escapes ``repr`` emits, matching the
# ``parse_vrt`` pattern at the path-containment check.
# Without this, a callers ``in`` check against the raw
# Windows path would fail because ``repr`` doubles
# every backslash.
raise VRTUnsupportedError(
f"VRT '{source}' references another VRT as a source "
f"('{src.filename}', band {band.band_num}). Nested "
f"VRTs are not a supported feature in this release; "
f"the mosaic reader assembles pixel data from "
f"GeoTIFF sources only. See "
f"`xrspatial.geotiff.SUPPORTED_FEATURES` for the "
f"release tier map. Materialise the inner VRT to a "
f"GeoTIFF with ``gdal_translate`` (or "
f"``xrspatial.geotiff.to_geotiff`` after reading "
f"the inner VRT separately) and reference the "
f"resulting GeoTIFF in the outer VRT instead."
)

# Rule 6c: complex mask / alpha source semantics.
# ``<UseMaskBand>true</UseMaskBand>`` and per-source
# ``<MaskBand>`` children declare that the source's
# per-band mask drives placement. The read pipeline ignores
# the mask, so the per-pixel mask would silently drop and
# the dispatched array would mis-label every masked pixel
# as valid. Reject with the source path and the offending
# flag. See issue #2371.
if src.use_mask_band:
raise VRTUnsupportedError(
f"VRT '{source}' source '{src.filename}' (band "
f"{band.band_num}) declares "
f"<UseMaskBand>true</UseMaskBand>. The read "
f"pipeline does not honour per-source mask bands "
f"and would silently drop the per-pixel mask, "
f"mis-labelling masked pixels as valid. Re-export "
f"the source with the mask burned into the band's "
f"nodata sentinel and drop the <UseMaskBand> flag."
)
if src.has_mask_source:
raise VRTUnsupportedError(
f"VRT '{source}' source '{src.filename}' (band "
f"{band.band_num}) declares a per-source <MaskBand> "
f"child. The read pipeline does not honour mask "
f"bands and would silently drop the per-pixel "
f"mask. Re-export the source with the mask burned "
f"into the band's nodata sentinel and drop the "
f"<MaskBand> child."
)

# Rule 7a: negative SrcRect size. Keep the "SrcRect ...
# negative size" phrasing so the legacy regex pattern in
# ``test_geotiff_vrt_srcrect_validation_1784.py`` still
# matches now that the validator preempts the per-source
# check that originally raised this message.
if sr.x_size < 0 or sr.y_size < 0:
raise VRTUnsupportedError(
f"VRT '{source}' SimpleSource '{src.filename}' "
f"(band {band.band_num}) has negative SrcRect size "
f"(band {band.band_num}) SrcRect has negative size "
f"(xSize={sr.x_size}, ySize={sr.y_size}); SrcRect "
f"sizes must be non-negative."
)
# Rule 7b: negative SrcRect offset.
# Rule 7b: negative SrcRect offset. Same phrasing rationale
# as Rule 7a (legacy regex match).
if sr.x_off < 0 or sr.y_off < 0:
raise VRTUnsupportedError(
f"VRT '{source}' SimpleSource '{src.filename}' "
f"(band {band.band_num}) has negative SrcRect offset "
f"(band {band.band_num}) SrcRect has negative offset "
f"(xOff={sr.x_off}, yOff={sr.y_off}); SrcRect "
f"offsets must be non-negative."
)

# Rule 8a: negative DstRect size.
# Rule 8a: negative DstRect size. Same phrasing rationale
# as Rule 7a (legacy regex match).
if dr.x_size < 0 or dr.y_size < 0:
raise VRTUnsupportedError(
f"VRT '{source}' SimpleSource '{src.filename}' "
f"(band {band.band_num}) has negative DstRect size "
f"(band {band.band_num}) DstRect has negative size "
f"(xSize={dr.x_size}, ySize={dr.y_size}); DstRect "
f"sizes must be non-negative."
)
Expand Down Expand Up @@ -355,8 +421,16 @@ def validate_parsed_vrt(
f"resampling; substituting nearest would silently "
f"mislabel the output. Re-export with "
f"<ResampleAlg>Nearest</ResampleAlg> or matching "
f"SrcRect/DstRect sizes."
f"SrcRect/DstRect sizes. See issue #1751."
)


__all__ = ['validate_parsed_vrt']
# Public alias matching the issue #2371 / epic #2342 naming. The
# implementation continues to live under ``validate_parsed_vrt`` for
# backward compatibility with the ``_backends/vrt.py`` call sites and
# the existing test files; new call sites should prefer the
# capability-validator spelling.
validate_vrt_capability = validate_parsed_vrt


__all__ = ['validate_parsed_vrt', 'validate_vrt_capability']
Loading
Loading