From ec44e2beaad574251e97feeb24109b263e4fdb24 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 22 May 2026 19:40:31 -0700 Subject: [PATCH 1/2] Publish the VRT support contract (#2327) Docs-only. The previous docs implied general-purpose VRT reads. They weren't, and the gap left readers guessing about which VRTs are expected to work. Lock down the supported subset in one place and echo it in the three public entry points. - docs/source/reference/geotiff.rst: new "VRT support matrix" section that calls out the supported subset (simple GDAL VRT mosaics over GeoTIFF sources; compatible CRS, transform orientation, pixel size, dtype, band count; windowed and dask reads over the same subset; explicit nodata with mixed-band rejection by default; missing_sources='raise' as the default) and the explicit non-goals (warped VRTs, arbitrary resampling, mixed metadata without an opt-in, nested VRTs, complex / mask / alpha bands, full GDAL parity), plus one safe-usage example and one intentionally-raises example. - xrspatial/geotiff/__init__.py: open_geotiff's docstring echoes the same subset / non-goals summary and carries the two examples. - xrspatial/geotiff/_backends/vrt.py: read_vrt's docstring echoes the same summary and carries the two examples. - xrspatial/geotiff/_writers/vrt.py: write_vrt's docstring echoes the same summary and carries the two examples (noting the failure mode lives on the read side). VRT was already at the 'advanced' tier in SUPPORTED_FEATURES ('reader.vrt'); no change there. Tier text in each docstring is unchanged. Closes #2327. Parent: #2321. --- docs/source/reference/geotiff.rst | 96 ++++++++++++++++++++++++++++++ xrspatial/geotiff/__init__.py | 41 ++++++++++++- xrspatial/geotiff/_backends/vrt.py | 48 +++++++++++++++ xrspatial/geotiff/_writers/vrt.py | 48 +++++++++++++++ 4 files changed, 232 insertions(+), 1 deletion(-) diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst index 0c6b184c..6b609366 100644 --- a/docs/source/reference/geotiff.rst +++ b/docs/source/reference/geotiff.rst @@ -187,6 +187,102 @@ with spatial coords on both axes but no explicit transform raises Multi-row / multi-column writes are unaffected. 1x1 inputs still require ``attrs['transform']`` because neither axis has a step. +VRT support matrix (issue #2321) +================================ + +VRT reads sit at the ``advanced`` tier in +:data:`xrspatial.geotiff.SUPPORTED_FEATURES` (``reader.vrt``). +``open_geotiff``, ``read_vrt``, and ``write_vrt`` all target the same +narrow subset of GDAL's VRT spec. The reference below is the canonical +contract; the three docstrings echo it. + +Supported +--------- + +* Simple GDAL VRT mosaics whose ```` entries point at + GeoTIFF files. The VRT XML must resolve to source paths under the + VRT's own directory (or under a root listed in + ``XRSPATIAL_VRT_ALLOWED_ROOTS``); see the source-path containment + note on ``read_vrt`` (#1671). +* Sources that agree on CRS, transform orientation (axis-aligned, + same sign on the y step), pixel size, dtype, and band count. The + read rejects mismatch with ``MixedBandMetadataError`` / + ``ValueError`` rather than silently flattening. +* Windowed reads via ``window=(row_start, col_start, row_stop, + col_stop)``. Eager and dask paths shift coords and + ``attrs['transform']`` together so a windowed eager read and a + windowed dask read agree on metadata. +* Lazy / dask reads over the same subset via ``chunks=``. Construction + parses the VRT XML and runs a parse-time existence sweep over every + referenced source so a missing file is surfaced at graph build, not + at ``compute()`` time (#2265). +* Explicit ``nodata``. The default (``band_nodata=None``) rejects a VRT + whose bands declare disagreeing per-band ```` sentinels + with ``MixedBandMetadataError``. ``band_nodata='first'`` opts back + into the legacy flatten-to-band-0 behaviour explicitly (#1987). +* ``missing_sources='raise'`` (the default since #1860). Pass + ``missing_sources='warn'`` to opt into the lenient partial-mosaic + path; see "VRT missing sources" below. + +Non-goals (intentionally unsupported) +------------------------------------- + +* Warped / reprojection VRTs (````). +* Arbitrary resampling beyond the tested subset. The VRT reader honours + only the small set of resampling rules its test corpus covers; other + modes raise rather than silently picking a default. +* Mixed CRS, resolution, dtype, or band metadata across sources without + an explicit opt-in. The default behaviour is to fail closed. +* Nested VRTs (a ```` that itself points at a ``.vrt``). +* Complex source / mask band / alpha band structures + (```` with arbitrary scale and offset, + ````, ````). +* Full GDAL VRT parity. The contract above is the supported surface; + anything outside it is on a best-effort basis at most and is allowed + to raise. + +Safe usage +---------- + +A simple mosaic over two compatible GeoTIFF tiles, read eagerly with +the fail-closed defaults: + +.. code-block:: python + + from xrspatial.geotiff import open_geotiff, write_vrt + + # Write a VRT that mosaics two tiles. Both tiles share CRS, + # pixel size, dtype, and band count. + vrt_path = write_vrt( + 'mosaic.vrt', + source_files=['tile_west.tif', 'tile_east.tif'], + ) + + # Read with the defaults: missing_sources='raise', + # band_nodata=None (fail closed on disagreeing per-band sentinels). + da = open_geotiff(vrt_path) + +Intentionally raises +-------------------- + +Pointing the read at a VRT whose source tiles disagree on their +per-band nodata sentinels triggers the fail-closed check: + +.. code-block:: python + + from xrspatial.geotiff import open_geotiff + from xrspatial.geotiff import MixedBandMetadataError + + # tile_a.tif declares nodata=-9999, tile_b.tif declares nodata=0. + # The default band_nodata=None rejects the mosaic rather than + # flattening to one sentinel. + try: + open_geotiff('mixed_nodata.vrt') + except MixedBandMetadataError: + # Pass band_nodata='first' to opt back into the legacy + # flatten-to-band-0 semantics, or fix the source tiles. + pass + VRT missing sources =================== diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index aacf380d..73c3145e 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -357,7 +357,22 @@ def open_geotiff(source: str | BinaryIO, *, - ``gpu=True, chunks=N``: Dask+CuPy for out-of-core GPU pipelines - Default: NumPy eager read - VRT files are auto-detected by extension. + VRT files are auto-detected by extension. The supported VRT subset + is narrow on purpose (issue #2321; see the "VRT support matrix" + section in ``docs/source/reference/geotiff.rst`` for the canonical + contract). In short: + + * Supported: simple GDAL VRT mosaics over GeoTIFF sources; + compatible CRS, transform orientation, pixel size, dtype, and + band count across sources; clean windowed reads; lazy / dask + reads over the same subset; explicit nodata with mixed-band + rejection by default; ``missing_sources='raise'`` as the + default. + * Non-goals (allowed to raise): warped / reprojection VRTs, + arbitrary resampling beyond the tested subset, mixed CRS / + resolution / dtype / band metadata without an opt-in, nested + VRTs, complex source / mask band / alpha band structures, full + GDAL VRT parity. Parameters ---------- @@ -517,6 +532,30 @@ def open_geotiff(source: str | BinaryIO, *, then raises ``ValueError`` (float-to-int is lossy in a way users rarely intend). When the file has no in-range sentinel match, the promotion is skipped and ``dtype=`` works either way. + + Examples + -------- + Safe VRT usage. Mosaic two compatible tiles and read with the + fail-closed defaults: + + >>> from xrspatial.geotiff import open_geotiff, write_vrt + >>> vrt_path = write_vrt( # doctest: +SKIP + ... 'mosaic.vrt', + ... source_files=['tile_west.tif', 'tile_east.tif'], + ... ) + >>> da = open_geotiff(vrt_path) # doctest: +SKIP + + Intentionally raises. A VRT whose source tiles disagree on their + per-band nodata sentinels is rejected by the default + ``band_nodata=None``: + + >>> from xrspatial.geotiff import MixedBandMetadataError + >>> try: # doctest: +SKIP + ... open_geotiff('mixed_nodata.vrt') + ... except MixedBandMetadataError: + ... pass # pass band_nodata='first' to opt back into the + ... # legacy flatten-to-band-0 semantics, or fix the + ... # source tiles. """ from ._reader import _coerce_path diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 087c59b9..af0f7b72 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -139,9 +139,57 @@ def read_vrt(source: str, *, raises a typed error rather than silently flattening. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + Supported subset (issue #2321; see the "VRT support matrix" section + in ``docs/source/reference/geotiff.rst`` for the canonical + contract): + + * Simple GDAL VRT mosaics whose ```` entries point + at GeoTIFF files (sources must resolve under the VRT's own + directory or an ``XRSPATIAL_VRT_ALLOWED_ROOTS`` root; #1671). + * Sources that agree on CRS, transform orientation, pixel size, + dtype, and band count. Mismatch raises rather than flattening. + * Windowed reads via ``window=``; eager and dask paths shift + coords and ``attrs['transform']`` together. + * Lazy / dask reads via ``chunks=`` over the same subset, with a + parse-time missing-source sweep (#2265). + * Explicit ``nodata``; ``band_nodata=None`` (the default) rejects + disagreeing per-band sentinels with ``MixedBandMetadataError`` + (#1987). + * ``missing_sources='raise'`` is the default (#1860). + + Non-goals (intentionally unsupported, allowed to raise): warped / + reprojection VRTs, arbitrary resampling beyond the tested subset, + mixed CRS / resolution / dtype / band metadata without an opt-in, + nested VRTs, complex source / mask band / alpha band structures, + full GDAL VRT parity. + The VRT's source GeoTIFFs are read via windowed reads and assembled into a single array. + Examples + -------- + Safe usage. Mosaic two compatible tiles and read with the + fail-closed defaults: + + >>> from xrspatial.geotiff import open_geotiff, write_vrt + >>> vrt_path = write_vrt( # doctest: +SKIP + ... 'mosaic.vrt', + ... source_files=['tile_west.tif', 'tile_east.tif'], + ... ) + >>> da = read_vrt(vrt_path) # doctest: +SKIP + + Intentionally raises. A VRT whose source tiles disagree on their + per-band nodata sentinels is rejected by the default + ``band_nodata=None``: + + >>> from xrspatial.geotiff import MixedBandMetadataError + >>> try: # doctest: +SKIP + ... read_vrt('mixed_nodata.vrt') + ... except MixedBandMetadataError: + ... pass # pass band_nodata='first' to opt back into the + ... # legacy flatten-to-band-0 semantics, or fix the + ... # source tiles. + Parameters ---------- source : str diff --git a/xrspatial/geotiff/_writers/vrt.py b/xrspatial/geotiff/_writers/vrt.py index 0b0b81e6..3422043c 100644 --- a/xrspatial/geotiff/_writers/vrt.py +++ b/xrspatial/geotiff/_writers/vrt.py @@ -31,6 +31,24 @@ def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, disagreement. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + Output targets the same narrow subset of GDAL's VRT spec that the + reader supports (issue #2321; see the "VRT support matrix" section + in ``docs/source/reference/geotiff.rst`` for the canonical + contract): + + * Supported: simple GDAL VRT mosaics over GeoTIFF sources; + compatible CRS, transform orientation, pixel size, dtype, and + band count across sources; clean windowed reads on the + consumer side; lazy / dask reads over the same subset on the + consumer side; explicit nodata; ``missing_sources='raise'`` as + the read-side default. + * Non-goals (the writer does not emit these and the reader is + allowed to raise on them): warped / reprojection VRTs, + arbitrary resampling beyond the tested subset, mixed CRS / + resolution / dtype / band metadata without an opt-in, nested + VRTs, complex source / mask band / alpha band structures, full + GDAL VRT parity. + Parameters ---------- path : str @@ -73,6 +91,36 @@ def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, ------- str Path to the written VRT file. + + Examples + -------- + Safe usage. Mosaic two compatible tiles; the consumer can then + read the resulting VRT with the fail-closed defaults: + + >>> from xrspatial.geotiff import write_vrt, open_geotiff + >>> vrt_path = write_vrt( # doctest: +SKIP + ... 'mosaic.vrt', + ... source_files=['tile_west.tif', 'tile_east.tif'], + ... ) + >>> da = open_geotiff(vrt_path) # doctest: +SKIP + + Intentionally raises (on the read side). If the source tiles + disagree on their per-band nodata sentinels, the default + ``band_nodata=None`` on ``open_geotiff`` / ``read_vrt`` rejects + the mosaic with ``MixedBandMetadataError``. The writer does not + pre-validate cross-tile metadata; the failure mode lives on the + read side: + + >>> from xrspatial.geotiff import MixedBandMetadataError + >>> # tile_a.tif declares nodata=-9999; tile_b.tif declares nodata=0 + >>> bad_path = write_vrt( # doctest: +SKIP + ... 'mixed_nodata.vrt', + ... source_files=['tile_a.tif', 'tile_b.tif'], + ... ) + >>> try: # doctest: +SKIP + ... open_geotiff(bad_path) + ... except MixedBandMetadataError: + ... pass # fix the source tiles or pass band_nodata='first'. """ # Explicit signature (previously ``**kwargs``) so ``inspect.signature``, # IDE autocomplete, and ``mypy --strict`` can see the accepted kwargs From 7c34495b799d2e16668d205c74af712400a1da24 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 22 May 2026 19:44:07 -0700 Subject: [PATCH 2/2] Address review: numpydoc section order, tighter snippet (#2327) Follow-up to PR #2334 review: - xrspatial/geotiff/_backends/vrt.py: move the Examples block from before the Parameters section to after the Notes section so read_vrt's docstring matches the section order in open_geotiff and write_vrt (Suggestion). - docs/source/reference/geotiff.rst: collapse the two ``from xrspatial.geotiff import ...`` lines in the "Intentionally raises" snippet into one combined import (Nit). - xrspatial/geotiff/_writers/vrt.py: note that the example source_files paths are illustrative (Nit). No code paths touched. Existing VRT tests still pass (665 passed, 27 skipped). --- docs/source/reference/geotiff.rst | 3 +- xrspatial/geotiff/_backends/vrt.py | 48 +++++++++++++++--------------- xrspatial/geotiff/_writers/vrt.py | 4 ++- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst index 6b609366..6f05ce7f 100644 --- a/docs/source/reference/geotiff.rst +++ b/docs/source/reference/geotiff.rst @@ -270,8 +270,7 @@ per-band nodata sentinels triggers the fail-closed check: .. code-block:: python - from xrspatial.geotiff import open_geotiff - from xrspatial.geotiff import MixedBandMetadataError + from xrspatial.geotiff import open_geotiff, MixedBandMetadataError # tile_a.tif declares nodata=-9999, tile_b.tif declares nodata=0. # The default band_nodata=None rejects the mosaic rather than diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index af0f7b72..c828df04 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -166,30 +166,6 @@ def read_vrt(source: str, *, The VRT's source GeoTIFFs are read via windowed reads and assembled into a single array. - Examples - -------- - Safe usage. Mosaic two compatible tiles and read with the - fail-closed defaults: - - >>> from xrspatial.geotiff import open_geotiff, write_vrt - >>> vrt_path = write_vrt( # doctest: +SKIP - ... 'mosaic.vrt', - ... source_files=['tile_west.tif', 'tile_east.tif'], - ... ) - >>> da = read_vrt(vrt_path) # doctest: +SKIP - - Intentionally raises. A VRT whose source tiles disagree on their - per-band nodata sentinels is rejected by the default - ``band_nodata=None``: - - >>> from xrspatial.geotiff import MixedBandMetadataError - >>> try: # doctest: +SKIP - ... read_vrt('mixed_nodata.vrt') - ... except MixedBandMetadataError: - ... pass # pass band_nodata='first' to opt back into the - ... # legacy flatten-to-band-0 semantics, or fix the - ... # source tiles. - Parameters ---------- source : str @@ -318,6 +294,30 @@ def read_vrt(source: str, *, failures, which surface as per-task ``GeoTIFFFallbackWarning`` instead. Each worker still emits ``GeoTIFFFallbackWarning`` for missing sources at execution time as well. + + Examples + -------- + Safe usage. Mosaic two compatible tiles and read with the + fail-closed defaults: + + >>> from xrspatial.geotiff import open_geotiff, write_vrt + >>> vrt_path = write_vrt( # doctest: +SKIP + ... 'mosaic.vrt', + ... source_files=['tile_west.tif', 'tile_east.tif'], + ... ) + >>> da = read_vrt(vrt_path) # doctest: +SKIP + + Intentionally raises. A VRT whose source tiles disagree on their + per-band nodata sentinels is rejected by the default + ``band_nodata=None``: + + >>> from xrspatial.geotiff import MixedBandMetadataError + >>> try: # doctest: +SKIP + ... read_vrt('mixed_nodata.vrt') + ... except MixedBandMetadataError: + ... pass # pass band_nodata='first' to opt back into the + ... # legacy flatten-to-band-0 semantics, or fix the + ... # source tiles. """ from .._reader import _coerce_path from .._vrt import _apply_integer_sentinel_mask_with_presence as _vrt_mask_with_presence diff --git a/xrspatial/geotiff/_writers/vrt.py b/xrspatial/geotiff/_writers/vrt.py index 3422043c..13f985ce 100644 --- a/xrspatial/geotiff/_writers/vrt.py +++ b/xrspatial/geotiff/_writers/vrt.py @@ -95,7 +95,9 @@ def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, Examples -------- Safe usage. Mosaic two compatible tiles; the consumer can then - read the resulting VRT with the fail-closed defaults: + read the resulting VRT with the fail-closed defaults. Paths + below are illustrative; replace with paths to real GeoTIFF + files on disk: >>> from xrspatial.geotiff import write_vrt, open_geotiff >>> vrt_path = write_vrt( # doctest: +SKIP