diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 73c3145e..099fc721 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -342,14 +342,35 @@ def open_geotiff(source: str | BinaryIO, *, ) -> xr.DataArray: """Read a GeoTIFF, COG, or VRT file into an xarray.DataArray. - Tier: Stable for local-file reads on axis-aligned grids with an - EPSG CRS in ``attrs['crs']``. Cloud / fsspec URIs, HTTP range - reads, ``.vrt`` mosaics, external ``.tif.ovr`` sidecars, - ``allow_rotated=True``, and ``allow_unparseable_crs=True`` are - Advanced (work, but each carries a specific failure mode named on - the parameter doc). ``gpu=True`` is Experimental. See - :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier - map (issue #2137). + Release-contract tier (epic #2340; see + ``docs/source/reference/release_gate_geotiff.rst`` for the audited + matrix and ``docs/source/reference/geotiff_release_contract.rst`` + for the prose contract once that page lands): + + * [stable] Local-file reads on axis-aligned grids with an EPSG CRS + in ``attrs['crs']``; Tier 1 codecs (``none`` / ``deflate`` / + ``lzw`` / ``packbits`` / ``zstd``); windowed reads via ``window=``. + * [advanced] Cloud / fsspec URIs, HTTP range reads, ``.vrt`` + mosaics, external ``.tif.ovr`` sidecars, ``allow_rotated=True``, + ``allow_unparseable_crs=True``, ``overview_level=`` selection. + These paths work and are tested, but each carries a specific + failure mode named on the parameter doc. + * [experimental] ``gpu=True``; LERC / JPEG2000 / J2K / LZ4 decode. + No cross-backend numerical parity claim. JPEG-in-TIFF on the + read side decodes best-effort with no parity claim against + libtiff / GDAL / rasterio; the write side is ``[internal-only]`` + (the encoder omits the required JPEGTables tag, so round-trips + hold only for files this library itself wrote). + * Out of scope for this release (allowed to raise): full GDAL VRT + parity, warped / reprojection VRTs, rotated/sheared write + support. + + See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier + map (issue #2137). Per-parameter tier markers below describe the + tier the parameter itself carries; a parameter's effective tier + is bounded by the function-level surface above (e.g. ``[stable]`` + ``mask_nodata`` is still only stable when combined with a + ``[stable]`` source, codec, and options). Automatically dispatches to the best backend: - ``gpu=True``: GPU-accelerated read via nvCOMP (returns CuPy) @@ -358,9 +379,11 @@ def open_geotiff(source: str | BinaryIO, *, - Default: NumPy eager read VRT files are auto-detected by extension. The supported VRT subset - is narrow on purpose (issue #2321; see the "VRT support matrix" - section in ``docs/source/reference/geotiff.rst`` for the canonical - contract). In short: + is narrow on purpose (issue #2321; epic #2340). See the "VRT + support matrix" section in ``docs/source/reference/geotiff.rst`` + and the audited matrix in + ``docs/source/reference/release_gate_geotiff.rst`` for the + canonical contract. In short: * Supported: simple GDAL VRT mosaics over GeoTIFF sources; compatible CRS, transform orientation, pixel size, dtype, and @@ -377,37 +400,46 @@ def open_geotiff(source: str | BinaryIO, *, Parameters ---------- source : str or binary file-like - File path, HTTP URL, cloud URI (s3://, gs://, az://), or a - binary file-like object (e.g. ``io.BytesIO``) with read+seek. - VRT, dask-chunked, GPU, and remote-URL paths require a string; - in-memory file-like buffers go through the eager numpy reader. + [stable for local file paths; advanced for HTTP/fsspec URIs, + ``.vrt`` paths, and in-memory file-like buffers (the file-like + path is restricted to the eager numpy reader -- dask, GPU, + VRT, and remote-URL paths require a string)] File path, HTTP + URL, cloud URI (s3://, gs://, az://), or a binary file-like + object (e.g. ``io.BytesIO``) with read+seek. dtype : str, numpy.dtype, or None - Cast the result to this dtype after reading. None keeps the - file's native dtype. Float-to-int casts raise ValueError to - prevent accidental data loss. + [stable] Cast the result to this dtype after reading. None + keeps the file's native dtype. Float-to-int casts raise + ValueError to prevent accidental data loss. window : tuple or None - (row_start, col_start, row_stop, col_stop) for windowed reading. + [stable] ``(row_start, col_start, row_stop, col_stop)`` for + windowed reading. overview_level : int or None - Overview level (0 = full resolution). Must be a non-negative int - or ``None``; passing ``bool`` or any other type raises - ``TypeError``. + [advanced] Overview level (0 = full resolution). Must be a + non-negative int or ``None``; passing ``bool`` or any other + type raises ``TypeError``. External ``.tif.ovr`` sidecars are + also [advanced] and are tested but not load-bearing for + release-gate parity. band : int or None - Band index (0-based). None returns all bands. + [stable] Band index (0-based). None returns all bands. name : str or None - Name for the DataArray. + [stable] Name for the DataArray. chunks : int, tuple, or None - Chunk size for Dask lazy reading. + [stable] Chunk size for Dask lazy reading. Dask reads are + gated against the eager reader by the cross-backend parity + suite for the Tier 1 codec set. gpu : bool - Experimental: requires cupy + nvCOMP for the codec the file - carries; the reader falls back to CPU when the optional - libraries are unavailable unless ``on_gpu_failure='strict'`` is - also set. Use GPU-accelerated decompression. + [experimental] Use GPU-accelerated decompression. Requires + cupy + numba CUDA plus optional nvCOMP / nvJPEG / nvJPEG2K + libraries for codec-specific acceleration. The reader falls + back to CPU when those libraries are unavailable unless + ``on_gpu_failure='strict'`` is also set. No cross-backend + numerical parity claim outside the Tier 1 codec set. max_pixels : int or None - Maximum allowed pixel count (width * height * samples). None - uses the default (~1 billion). Raise to read legitimately - large files. + [stable] Maximum allowed pixel count (width * height * + samples). None uses the default (~1 billion). Raise to read + legitimately large files. max_cloud_bytes : int or None, optional - Advanced: fsspec cloud reads can run up cost on large objects; + [advanced] fsspec cloud reads can run up cost on large objects; the budget defends against accidental large downloads but the eager path still pulls the full object once the budget allows. Byte ceiling for eager reads from fsspec sources (``s3://``, @@ -423,14 +455,14 @@ def open_geotiff(source: str | BinaryIO, *, backends do not apply the cloud-byte budget. See issue #1928 (eager path) and issue #1974 (rejection guard). on_gpu_failure : {'auto', 'strict'}, optional - Forwarded to ``read_geotiff_gpu`` when ``gpu=True``. Controls - whether GPU decode failures fall back to CPU (``'auto'``, - default) or re-raise the original exception (``'strict'``). - Passing this kwarg with ``gpu=False`` raises ``ValueError`` - because the policy only applies to the GPU pipeline. See - ``read_geotiff_gpu`` for the full description. + [experimental] Forwarded to ``read_geotiff_gpu`` when + ``gpu=True``. Controls whether GPU decode failures fall back + to CPU (``'auto'``, default) or re-raise the original exception + (``'strict'``). Passing this kwarg with ``gpu=False`` raises + ``ValueError`` because the policy only applies to the GPU + pipeline. See ``read_geotiff_gpu`` for the full description. missing_sources : {'raise', 'warn'}, optional - Advanced: VRT mosaics can return partial output under + [advanced] VRT mosaics can return partial output under ``missing_sources='warn'`` when a backing source is unreadable; the ``attrs['vrt_holes']`` entry records which sources were skipped so downstream code can detect the partial mosaic. @@ -444,9 +476,10 @@ def open_geotiff(source: str | BinaryIO, *, source raises ``ValueError`` because the policy only applies to the VRT pipeline. See ``read_vrt`` for the full description. band_nodata : {'first', None}, optional - VRT-only. Opt-out for the fail-closed check that rejects VRT - sources whose bands declare disagreeing per-band nodata - sentinels (issue #1987 PR 5). When ``None`` (the default), a VRT + [advanced] VRT-only. Opt-out for the fail-closed check that + rejects VRT sources whose bands declare disagreeing per-band + nodata sentinels (issue #1987 PR 5). When ``None`` (the + default), a VRT that mosaics bands with different sentinels raises ``MixedBandMetadataError``; flattening to one value would let one band's valid pixels collide with another band's sentinel. @@ -455,18 +488,18 @@ def open_geotiff(source: str | BinaryIO, *, kwarg with a non-VRT source raises ``ValueError`` because the policy only applies to the VRT pipeline. mask_nodata : bool, default True - If True (the default), replace the nodata sentinel with ``NaN``; - integer rasters get promoted to ``float64`` first so NaN can be - represented. If False, skip the sentinel-to-NaN step and keep - the source dtype. ``attrs['nodata']`` still carries the raw - sentinel either way, so downstream code can mask explicitly. - Pass ``mask_nodata=False`` when you want to preserve an integer - source dtype via ``dtype=``: the default ``mask_nodata=True`` - promotes to ``float64`` whenever the sentinel matches an actual - pixel, and ``dtype=`` then raises ``ValueError`` on the - float-to-int cast. + [stable] If True (the default), replace the nodata sentinel + with ``NaN``; integer rasters get promoted to ``float64`` first + so NaN can be represented. If False, skip the sentinel-to-NaN + step and keep the source dtype. ``attrs['nodata']`` still + carries the raw sentinel either way, so downstream code can + mask explicitly. Pass ``mask_nodata=False`` when you want to + preserve an integer source dtype via ``dtype=``: the default + ``mask_nodata=True`` promotes to ``float64`` whenever the + sentinel matches an actual pixel, and ``dtype=`` then + raises ``ValueError`` on the float-to-int cast. allow_rotated : bool, default False - Advanced: read-only opt-in. ``to_geotiff`` does not currently + [advanced] Read-only opt-in. ``to_geotiff`` does not currently emit ``rotated_affine``; it rejects DataArrays that carry the attr (``ValueError`` naming the attr) unless the caller passes ``drop_rotation=True`` to accept the loss explicitly (#2216). @@ -492,9 +525,9 @@ def open_geotiff(source: str | BinaryIO, *, ``ModelTransformationTag`` emit path is tracked separately (issue #2115). allow_unparseable_crs : bool, default False - Read-side opt-in for CRS strings that pyproj cannot resolve and - that do not parse as WKT. When ``False`` (the default since - #1929), an unrecognised CRS payload raises + [advanced] Read-side opt-in for CRS strings that pyproj cannot + resolve and that do not parse as WKT. When ``False`` (the + default since #1929), an unrecognised CRS payload raises ``UnparseableCRSError`` instead of landing in ``attrs['crs_wkt']`` verbatim. Set to ``True`` to keep the pre-#1929 permissive behaviour where the citation field passes through unchanged. diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 6da17cb3..bfa889cc 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -46,84 +46,99 @@ def read_geotiff_dask(source: str, *, mask_nodata: bool = True) -> xr.DataArray: """Read a GeoTIFF as a dask-backed DataArray for out-of-core processing. - Tier: Stable for local-file reads on axis-aligned grids with the - Tier 1 codec set. ``allow_rotated`` / ``allow_unparseable_crs`` - are Advanced (read-only opt-ins; round-trip semantics are listed - on the parameter docs). See - :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map - (issue #2137). + Release-contract tier (epic #2340; see + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst``): + + * [stable] Local-file dask reads on axis-aligned grids with the + Tier 1 codec set (``none`` / ``deflate`` / ``lzw`` / + ``packbits`` / ``zstd``). Cross-backend parity against the + eager reader is gated by CI for this surface. + * [advanced] HTTP / fsspec dask reads via windowed range GETs; + ``.vrt`` dispatch; ``overview_level=``; ``allow_rotated`` / + ``allow_unparseable_crs`` opt-ins. + * [experimental] Tier 3 codecs (LERC / JPEG2000 / J2K / LZ4) when + the file happens to use them. No cross-backend parity claim. + + See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier + map (issue #2137). Each chunk is loaded lazily via windowed reads. Parameters ---------- source : str - File path. + [stable for local file paths; advanced for HTTP/fsspec URIs + and ``.vrt`` paths] File path. dtype : str, numpy.dtype, or None - Cast each chunk to this dtype after reading. None keeps the - file's native dtype. Float-to-int casts raise ValueError. + [stable] Cast each chunk to this dtype after reading. None + keeps the file's native dtype. Float-to-int casts raise + ValueError. chunks : int or (row_chunk, col_chunk) tuple - Chunk size in pixels. Default 512. + [stable] Chunk size in pixels. Default 512. overview_level : int or None - Overview level (0 = full resolution). + [advanced] Overview level (0 = full resolution). window : tuple or None - ``(row_start, col_start, row_stop, col_stop)`` to restrict - chunking to a sub-region of the file. Chunks are laid out - relative to the window origin. None reads the full raster. + [stable] ``(row_start, col_start, row_stop, col_stop)`` to + restrict chunking to a sub-region of the file. Chunks are laid + out relative to the window origin. None reads the full raster. band : int or None - Zero-based band index. None returns all bands (3D for + [stable] Zero-based band index. None returns all bands (3D for multi-band files, 2D for single-band). Selecting a single band produces a 2D DataArray. max_pixels : int or None - Maximum allowed pixel count (width * height * samples) for the - windowed region. None uses the reader default (~1 billion). - The cap is checked once up-front against the lazy region; each - chunk task also re-checks against ``max_pixels`` so windowed - reads stay bounded even when ``read_to_array`` is invoked - directly. + [stable] Maximum allowed pixel count (width * height * + samples) for the windowed region. None uses the reader default + (~1 billion). The cap is checked once up-front against the + lazy region; each chunk task also re-checks against + ``max_pixels`` so windowed reads stay bounded even when + ``read_to_array`` is invoked directly. name : str or None - Name for the DataArray. + [stable] Name for the DataArray. band_nodata : {'first', None}, optional - VRT-only opt-out for the fail-closed mixed-band-metadata check - (issue #1987 PR 5). Forwarded verbatim to ``read_vrt`` when the - source is a ``.vrt`` file. Passing it with a non-VRT GeoTIFF - source raises ``ValueError``. + [advanced] VRT-only opt-out for the fail-closed + mixed-band-metadata check (issue #1987 PR 5). Forwarded + verbatim to ``read_vrt`` when the source is a ``.vrt`` file. + Passing it with a non-VRT GeoTIFF source raises ``ValueError``. mask_nodata : bool, default True - If True, replace the nodata sentinel with NaN per chunk (integer - rasters get promoted to ``float64``). If False, skip the - sentinel-to-NaN step so the source dtype survives. The raw - sentinel is still carried on ``attrs['nodata']`` either way. - Pass ``mask_nodata=False`` together with ``dtype=`` to - keep an integer source dtype; the default promotes to - ``float64`` and the cast then raises. See issue #2052. + [stable] If True, replace the nodata sentinel with NaN per + chunk (integer rasters get promoted to ``float64``). If False, + skip the sentinel-to-NaN step so the source dtype survives. + The raw sentinel is still carried on ``attrs['nodata']`` + either way. Pass ``mask_nodata=False`` together with + ``dtype=`` to keep an integer source dtype; the + default promotes to ``float64`` and the cast then raises. See + issue #2052. allow_rotated : bool, default False - Read-side opt-in for rotated / sheared ``ModelTransformationTag`` - files. Forwarded to every per-chunk read so a rotated source - yields an ungeoreferenced pixel grid instead of raising - ``NotImplementedError``. See ``open_geotiff`` for the full - contract; the dask path honours the same attrs (``crs`` / - ``crs_wkt`` dropped, ``rotated_affine`` set). + [advanced] Read-side opt-in for rotated / sheared + ``ModelTransformationTag`` files. Forwarded to every per-chunk + read so a rotated source yields an ungeoreferenced pixel grid + instead of raising ``NotImplementedError``. See + ``open_geotiff`` for the full contract; the dask path honours + the same attrs (``crs`` / ``crs_wkt`` dropped, + ``rotated_affine`` set). allow_unparseable_crs : bool, default False - Read-side opt-in for CRS strings that pyproj cannot resolve and - do not parse as WKT. When ``False`` (the default since #1929) - the chunk task raises ``UnparseableCRSError`` instead of - carrying the unrecognised payload through ``attrs['crs_wkt']``. - See ``open_geotiff`` for the full description. + [advanced] Read-side opt-in for CRS strings that pyproj cannot + resolve and do not parse as WKT. When ``False`` (the default + since #1929) the chunk task raises ``UnparseableCRSError`` + instead of carrying the unrecognised payload through + ``attrs['crs_wkt']``. See ``open_geotiff`` for the full + description. on_gpu_failure : str, optional - Accepted for cross-backend signature symmetry only. The dask - path runs CPU decoders, so passing this kwarg raises - ``ValueError`` at dispatch. See ``read_geotiff_gpu`` for the - kwarg's meaning on the GPU reader. + [internal-only] Accepted for cross-backend signature symmetry + only. The dask path runs CPU decoders, so passing this kwarg + raises ``ValueError`` at dispatch. See ``read_geotiff_gpu`` for + the kwarg's meaning on the GPU reader. missing_sources : {'raise', 'warn'}, optional - VRT-only. Forwarded to ``read_vrt`` when the source ends in - ``.vrt``; otherwise raises ``ValueError`` at dispatch. See - ``read_vrt`` for the full description. + [advanced] VRT-only. Forwarded to ``read_vrt`` when the source + ends in ``.vrt``; otherwise raises ``ValueError`` at dispatch. + See ``read_vrt`` for the full description. max_cloud_bytes : int or None, optional - Accepted for cross-backend signature symmetry only. The dask - reader uses bounded range GETs and does not consume the - cloud-byte budget, so passing this kwarg raises ``ValueError`` - at dispatch. See ``open_geotiff`` for the eager-path - description (issue #1974). + [internal-only] Accepted for cross-backend signature symmetry + only. The dask reader uses bounded range GETs and does not + consume the cloud-byte budget, so passing this kwarg raises + ``ValueError`` at dispatch. See ``open_geotiff`` for the + eager-path description (issue #1974). Returns ------- diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index cdab4946..41287b20 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -81,11 +81,16 @@ def read_geotiff_gpu(source: str, *, ) -> xr.DataArray: """Read a GeoTIFF with GPU-accelerated decompression via Numba CUDA. - Tier: Experimental (issue #2137). Requires cupy + numba CUDA plus - optional nvCOMP / nvJPEG / nvJPEG2K libraries for codec-specific - acceleration; cross-backend numerical parity with the CPU reader - is tested for the Tier 1 codec set only. See - :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + Release-contract tier (epic #2340; see + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst``): the + entire entry point is [experimental]. The surface may shift + without a deprecation window and the CPU fallback is the canonical + reader. Requires cupy + numba CUDA plus optional nvCOMP / nvJPEG / + nvJPEG2K libraries for codec-specific acceleration; cross-backend + numerical parity with the CPU reader is tested for the Tier 1 + codec set only. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` + for the full tier map (issue #2137). Decompresses all tiles in parallel on the GPU and returns a CuPy-backed DataArray that stays on device memory. No CPU->GPU @@ -104,43 +109,45 @@ def read_geotiff_gpu(source: str, *, Parameters ---------- source : str - Local file path, ``http://`` / ``https://`` URL, or fsspec URI - (``s3://``, ``gs://``, ``memory://``, ...). URL and fsspec sources - use a CPU decode + GPU upload internally (matching the chunked - path's HTTP/fsspec fallback); the result is still a CuPy-backed - DataArray. + [experimental] Local file path, ``http://`` / ``https://`` + URL, or fsspec URI (``s3://``, ``gs://``, ``memory://``, ...). + URL and fsspec sources use a CPU decode + GPU upload + internally (matching the chunked path's HTTP/fsspec fallback); + the result is still a CuPy-backed DataArray. dtype : str, numpy.dtype, or None - Cast the result to this dtype after reading. None keeps the - file's native dtype. Float-to-int casts raise ValueError, mirroring - ``open_geotiff`` / ``read_geotiff_dask``. + [experimental] Cast the result to this dtype after reading. + None keeps the file's native dtype. Float-to-int casts raise + ValueError, mirroring ``open_geotiff`` / ``read_geotiff_dask``. overview_level : int or None - Overview level (0 = full resolution). + [experimental] Overview level (0 = full resolution). window : tuple or None - ``(row_start, col_start, row_stop, col_stop)`` for windowed - reading. None reads the full raster. The GPU pipeline currently - decodes all tiles and slices on device after assembly, so the - kwarg restores API parity with ``open_geotiff`` and - ``read_geotiff_dask`` but does not yet skip I/O for partial - windows. The returned coords, ``attrs['transform']``, and - shape match the eager numpy path. + [experimental] ``(row_start, col_start, row_stop, col_stop)`` + for windowed reading. None reads the full raster. The GPU + pipeline currently decodes all tiles and slices on device + after assembly, so the kwarg restores API parity with + ``open_geotiff`` and ``read_geotiff_dask`` but does not yet + skip I/O for partial windows. The returned coords, + ``attrs['transform']``, and shape match the eager numpy path. band : int or None - Zero-based band index. None returns all bands (3D output for - multi-band files, 2D for single-band). Selecting a single band - yields a 2D DataArray. + [experimental] Zero-based band index. None returns all bands + (3D output for multi-band files, 2D for single-band). + Selecting a single band yields a 2D DataArray. chunks : int, tuple, or None - If set, return a Dask-chunked CuPy DataArray decoded one chunk - at a time. int for square chunks, (row, col) tuple for - rectangular. Each chunk task reads only the tiles overlapping - its window (CPU decode) and uploads the result to the device, - so peak GPU memory is bounded by chunk size. ``chunks=None`` - (default) decodes the full raster on the GPU in one pass. + [experimental] If set, return a Dask-chunked CuPy DataArray + decoded one chunk at a time. int for square chunks, + (row, col) tuple for rectangular. Each chunk task reads only + the tiles overlapping its window (CPU decode) and uploads the + result to the device, so peak GPU memory is bounded by chunk + size. ``chunks=None`` (default) decodes the full raster on the + GPU in one pass. name : str or None - Name for the DataArray. + [experimental] Name for the DataArray. max_pixels : int or None - Maximum allowed pixel count (width * height * samples). None - uses the default (~1 billion). + [experimental] Maximum allowed pixel count + (width * height * samples). None uses the default (~1 billion). on_gpu_failure : {'auto', 'strict'}, default 'auto' - Behaviour when any GPU decode stage raises an exception. + [experimental] Behaviour when any GPU decode stage raises an + exception. The GPU pipeline has two stages: first ``gpu_decode_tiles_from_file`` (GDS-style direct read), then ``gpu_decode_tiles`` over CPU-mmap @@ -167,45 +174,49 @@ def read_geotiff_gpu(source: str, *, ``cupy.asarray(...)`` upload (e.g. device OOM) still propagate unchanged in both modes. gpu : str, optional - Deprecated alias for ``on_gpu_failure``. Emits ``DeprecationWarning`` - when used. Passing both ``gpu`` and ``on_gpu_failure`` raises - ``TypeError``. The old name shipped with values ``'auto'`` / - ``'strict'`` and was easy to confuse with the boolean ``gpu=`` - kwarg on ``open_geotiff`` / ``to_geotiff`` / ``read_vrt``. + [experimental] Deprecated alias for ``on_gpu_failure``. Emits + ``DeprecationWarning`` when used. Passing both ``gpu`` and + ``on_gpu_failure`` raises ``TypeError``. The old name shipped + with values ``'auto'`` / ``'strict'`` and was easy to confuse + with the boolean ``gpu=`` kwarg on ``open_geotiff`` / + ``to_geotiff`` / ``read_vrt``. mask_nodata : bool, default True - If True, replace the nodata sentinel with NaN (integer rasters - get promoted to ``float64`` first). If False, keep the source - dtype and leave the raw sentinel in the data. ``attrs['nodata']`` - carries the sentinel either way. Pass ``mask_nodata=False`` - together with ``dtype=`` to preserve an integer source - dtype on a file with a matching sentinel. See issue #2052. + [experimental] If True, replace the nodata sentinel with NaN + (integer rasters get promoted to ``float64`` first). If False, + keep the source dtype and leave the raw sentinel in the data. + ``attrs['nodata']`` carries the sentinel either way. Pass + ``mask_nodata=False`` together with ``dtype=`` to + preserve an integer source dtype on a file with a matching + sentinel. See issue #2052. allow_rotated : bool, default False - Read-side opt-in for rotated / sheared ``ModelTransformationTag`` - files. Forwarded through both GPU decode stages and the CPU - fallback so the rotated branch behaves the same regardless of - which stage produces the bytes. See ``open_geotiff`` for the - full contract; on the GPU path the result still lands as a - CuPy-backed DataArray. + [experimental] Read-side opt-in for rotated / sheared + ``ModelTransformationTag`` files. Forwarded through both GPU + decode stages and the CPU fallback so the rotated branch + behaves the same regardless of which stage produces the bytes. + See ``open_geotiff`` for the full contract; on the GPU path + the result still lands as a CuPy-backed DataArray. allow_unparseable_crs : bool, default False - Read-side opt-in for CRS strings that pyproj cannot resolve and - do not parse as WKT. ``False`` (the default since #1929) raises - ``UnparseableCRSError``; ``True`` keeps the pre-#1929 permissive - behaviour. See ``open_geotiff`` for the full description. + [experimental] Read-side opt-in for CRS strings that pyproj + cannot resolve and do not parse as WKT. ``False`` (the default + since #1929) raises ``UnparseableCRSError``; ``True`` keeps + the pre-#1929 permissive behaviour. See ``open_geotiff`` for + the full description. band_nodata : {'first', None}, optional - VRT-only. Accepted at the signature level for parity with - ``open_geotiff``; passing it to ``read_geotiff_gpu`` raises - ``ValueError`` because the GPU dispatcher rejects ``.vrt`` - sources up front and the kwarg only applies to VRT. See - ``read_vrt`` for the kwarg's meaning. + [internal-only] VRT-only. Accepted at the signature level for + parity with ``open_geotiff``; passing it to ``read_geotiff_gpu`` + raises ``ValueError`` because the GPU dispatcher rejects + ``.vrt`` sources up front and the kwarg only applies to VRT. + See ``read_vrt`` for the kwarg's meaning. missing_sources : {'raise', 'warn'}, optional - VRT-only. Same shape as ``band_nodata`` above: accepted for - signature parity, rejected at dispatch with ``ValueError`` for - non-VRT sources. See ``read_vrt`` for the full description. + [internal-only] VRT-only. Same shape as ``band_nodata`` above: + accepted for signature parity, rejected at dispatch with + ``ValueError`` for non-VRT sources. See ``read_vrt`` for the + full description. max_cloud_bytes : int or None, optional - Accepted for cross-backend signature symmetry only. The GPU - reader does not consume the cloud-byte budget; passing this - kwarg raises ``ValueError`` at dispatch (issue #1974). See - ``open_geotiff`` for the eager-path description. + [internal-only] Accepted for cross-backend signature symmetry + only. The GPU reader does not consume the cloud-byte budget; + passing this kwarg raises ``ValueError`` at dispatch (issue + #1974). See ``open_geotiff`` for the eager-path description. Returns ------- diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index c828df04..c67027bd 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -132,16 +132,23 @@ def read_vrt(source: str, *, mask_nodata: bool = True) -> xr.DataArray: """Read a GDAL Virtual Raster Table (.vrt) into an xarray.DataArray. - Tier: Advanced (issue #2137). VRT mosaics work and are tested, but - the caller should know the failure modes: cross-source nodata can - disagree (gated by ``band_nodata``), backing files can be missing - (gated by ``missing_sources``), and per-band metadata mismatch - raises a typed error rather than silently flattening. See - :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + Release-contract tier (epic #2340; see + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst``): the + entry point is [advanced]. VRT mosaics work and are tested for a + narrow subset, but the caller should know the failure modes: + cross-source nodata can disagree (gated by ``band_nodata``), + backing files can be missing (gated by ``missing_sources``), and + per-band metadata mismatch raises a typed error rather than + silently flattening. Full GDAL VRT parity, warped / reprojection + VRTs, and arbitrary resampling are out of scope for this release. + See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier + map (issue #2137). Supported subset (issue #2321; see the "VRT support matrix" section - in ``docs/source/reference/geotiff.rst`` for the canonical - contract): + in ``docs/source/reference/geotiff.rst`` and the audited matrix in + ``docs/source/reference/release_gate_geotiff.rst`` for the + canonical contract): * Simple GDAL VRT mosaics whose ```` entries point at GeoTIFF files (sources must resolve under the VRT's own @@ -169,28 +176,34 @@ def read_vrt(source: str, *, Parameters ---------- source : str - Path to the .vrt file. + [advanced] Path to the .vrt file. dtype : str, numpy.dtype, or None - Cast the result to this dtype after reading. None keeps the - file's native dtype. Float-to-int casts raise ValueError. + [advanced] Cast the result to this dtype after reading. None + keeps the file's native dtype. Float-to-int casts raise + ValueError. window : tuple or None - (row_start, col_start, row_stop, col_stop) for windowed reading. + [advanced] (row_start, col_start, row_stop, col_stop) for + windowed reading. band : int or None - Band index (0-based). None returns all bands. + [advanced] Band index (0-based). None returns all bands. name : str or None - Name for the DataArray. + [advanced] Name for the DataArray. chunks : int, tuple, or None - If set, return a Dask-chunked DataArray. int for square chunks, - (row, col) tuple for rectangular. + [advanced] If set, return a Dask-chunked DataArray. int for + square chunks, (row, col) tuple for rectangular. gpu : bool - If True, return a CuPy-backed DataArray on GPU. + [experimental] If True, return a CuPy-backed DataArray on GPU. + Carries the same caveats as ``gpu=True`` on ``open_geotiff``: + cross-backend numerical parity is tested for the Tier 1 codec + set only. max_pixels : int or None - Maximum allowed pixel count (width * height * samples) for the - assembled VRT region. None uses the reader default (~1 billion). - Matches ``open_geotiff`` / ``read_geotiff_dask`` / - ``read_geotiff_gpu``. + [advanced] Maximum allowed pixel count + (width * height * samples) for the assembled VRT region. None + uses the reader default (~1 billion). Matches ``open_geotiff`` + / ``read_geotiff_dask`` / ``read_geotiff_gpu``. missing_sources : {'raise', 'warn'}, default 'raise' - Policy for unreadable source files referenced by the VRT. + [advanced] Policy for unreadable source files referenced by + the VRT. ``'raise'`` (the default since #1860) fails immediately on an unreadable backing source so a partial mosaic never surfaces silently. This matches the internal ``_vrt.read_vrt`` default @@ -210,8 +223,9 @@ def read_vrt(source: str, *, ``XRSPATIAL_GEOTIFF_STRICT=1`` forces a raise across the whole module regardless of this kwarg. band_nodata : {'first', None}, optional - Opt-out for the fail-closed mixed-band-metadata check (issue - #1987 PR 5). ``None`` (the default) rejects a VRT whose bands + [advanced] Opt-out for the fail-closed mixed-band-metadata + check (issue #1987 PR 5). ``None`` (the default) rejects a VRT + whose bands declare disagreeing per-band ```` sentinels with ``MixedBandMetadataError``; flattening to one value would otherwise let one band's valid pixels collide with another @@ -220,9 +234,9 @@ def read_vrt(source: str, *, other value raises ``ValueError`` at the boundary so typos surface up front instead of degrading silently into strict mode. mask_nodata : bool, default True - If True, run the integer-sentinel-to-NaN promotion on the - assembled mosaic. If False, skip it and keep the source dtype - with the raw sentinel still in the data. ``attrs['nodata']`` + [advanced] If True, run the integer-sentinel-to-NaN promotion + on the assembled mosaic. If False, skip it and keep the source + dtype with the raw sentinel still in the data. ``attrs['nodata']`` carries the sentinel either way. Pass ``mask_nodata=False`` together with ``dtype=`` when you need to preserve an integer source dtype on a VRT whose declared sentinel matches @@ -230,32 +244,34 @@ def read_vrt(source: str, *, by virtue of how the internal reader handles them, so this kwarg is most useful for integer-dtype mosaics. allow_rotated : bool, default False - Read-side opt-in for rotated / sheared ``ModelTransformationTag`` - files referenced by the VRT. Forwarded to the per-source reader - for each ````. See ``open_geotiff`` for the full + [advanced] Read-side opt-in for rotated / sheared + ``ModelTransformationTag`` files referenced by the VRT. + Forwarded to the per-source reader for each + ````. See ``open_geotiff`` for the full contract. allow_unparseable_crs : bool, default False - Read-side opt-in for CRS strings that pyproj cannot resolve and - do not parse as WKT. ``False`` (the default since #1929) raises - ``UnparseableCRSError`` rather than carrying the unrecognised - payload through. See ``open_geotiff`` for the full description. + [advanced] Read-side opt-in for CRS strings that pyproj cannot + resolve and do not parse as WKT. ``False`` (the default since + #1929) raises ``UnparseableCRSError`` rather than carrying the + unrecognised payload through. See ``open_geotiff`` for the + full description. overview_level : int or None - Not supported for VRT sources. The VRT XML references its own - source files, so overview selection would need to apply to each - of them. Accepted at the signature level for cross-backend - symmetry; any value other than ``None`` or ``0`` raises - ``ValueError`` (issue #1685). + [internal-only] Not supported for VRT sources. The VRT XML + references its own source files, so overview selection would + need to apply to each of them. Accepted at the signature level + for cross-backend symmetry; any value other than ``None`` or + ``0`` raises ``ValueError`` (issue #1685). on_gpu_failure : str, optional - Accepted for cross-backend signature symmetry only. VRT reads - do not go through the GPU decoder pipeline, so passing this - kwarg raises ``ValueError`` at dispatch. See + [internal-only] Accepted for cross-backend signature symmetry + only. VRT reads do not go through the GPU decoder pipeline, so + passing this kwarg raises ``ValueError`` at dispatch. See ``read_geotiff_gpu`` for the kwarg's meaning on the GPU reader. max_cloud_bytes : int or None, optional - Accepted for cross-backend signature symmetry only. The VRT - reader does not consume the cloud-byte budget; passing this - kwarg raises ``ValueError`` at dispatch (issue #1974). See - ``open_geotiff`` for the eager-path description. + [internal-only] Accepted for cross-backend signature symmetry + only. The VRT reader does not consume the cloud-byte budget; + passing this kwarg raises ``ValueError`` at dispatch (issue + #1974). See ``open_geotiff`` for the eager-path description. Returns ------- diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 3bd370ce..77629458 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -963,19 +963,34 @@ def read_vrt(vrt_path: str, *, window=None, ) -> tuple[np.ndarray, VRTDataset]: """Read a VRT file by assembling pixel data from its source files. + Do not call this symbol directly from external code. Release-contract + tier (epic #2340): this is the [internal-only] pixel-assembly + helper. The public surface lives in + ``xrspatial.geotiff.read_vrt`` (re-exported from + ``_backends/vrt.py``) and carries the [advanced] tier; this + function is what that public wrapper calls into. See + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst`` for the + contract. Direct calls into this symbol bypass the dispatcher-level + validation in ``_validate_dispatch_kwargs`` and are not part of the + public API. + Parameters ---------- vrt_path : str - Path to the .vrt file. + [internal-only] Path to the .vrt file. window : tuple or None - (row_start, col_start, row_stop, col_stop) for windowed read. + [internal-only] (row_start, col_start, row_stop, col_stop) for + windowed read. band : int or None - Band index (0-based). None returns all bands. + [internal-only] Band index (0-based). None returns all bands. max_pixels : int or None - Maximum allowed pixel count (width * height * samples) for the - assembled VRT region. None uses the reader default. + [internal-only] Maximum allowed pixel count + (width * height * samples) for the assembled VRT region. None + uses the reader default. missing_sources : {'raise', 'warn'}, default 'raise' - Policy for unreadable source files referenced by the VRT. + [internal-only] Policy for unreadable source files referenced + by the VRT. ``'raise'`` (the default) fails immediately on an unreadable source so a partial mosaic never surfaces silently. This matches the rest of the geotiff module's up-front rejection of malformed @@ -991,14 +1006,16 @@ def read_vrt(vrt_path: str, *, window=None, ``XRSPATIAL_GEOTIFF_STRICT=1`` forces a raise across the whole module regardless of this kwarg (see issue #1662). parsed : VRTDataset or None - Pre-parsed VRT structure. When supplied, ``vrt_path`` is not + [internal-only] Pre-parsed VRT structure. When supplied, + ``vrt_path`` is not re-read or re-parsed and the source-path containment check is skipped (the supplied ``VRTDataset`` is assumed to have been produced by :func:`parse_vrt` already, which performs the check). Used by the chunked dask path (issue #1825) so each per-chunk task can skip the redundant XML parse and allowlist validation. mask_nodata : bool, default True - If True (the default), float source bands have their declared + [internal-only] If True (the default), float source bands have + their declared nodata sentinel rewritten to NaN inline during assembly, and integer sources feeding a float-dataType VRT have their sentinel rewritten to NaN as part of the int->float @@ -1504,6 +1521,15 @@ def write_vrt(vrt_path: str, source_files: list[str], *, nodata: float | int | None = None) -> str: """Generate a VRT file that mosaics multiple GeoTIFF tiles. + Do not call this symbol directly from external code. Release-contract + tier (epic #2340): this is the [internal-only] VRT XML emitter. + The public surface lives in ``xrspatial.geotiff.write_vrt`` + (re-exported from ``_writers/vrt.py``) and carries the [advanced] + tier; this function is what that public wrapper calls into. See + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst`` for the + contract. + Each source file is placed in the virtual raster based on its geo transform. All sources must share the same pixel size, dtype (sample format + bits-per-sample), band count, and CRS. Mismatches @@ -1513,20 +1539,21 @@ def write_vrt(vrt_path: str, source_files: list[str], *, Parameters ---------- vrt_path : str - Output .vrt file path. + [internal-only] Output .vrt file path. source_files : list of str - Paths to the source GeoTIFF files. + [internal-only] Paths to the source GeoTIFF files. relative : bool - Store source paths relative to the VRT file. + [internal-only] Store source paths relative to the VRT file. crs_wkt : str or None - CRS as WKT string. If None, taken from the first source. + [internal-only] CRS as WKT string. If None, taken from the + first source. nodata : float, int, or None - NoData value applied to every band of the mosaic. Caller-supplied - value takes precedence; when ``None``, the first source's - per-band nodata is used. Integer sentinels (e.g. ``65535`` for - uint16, ``-9999`` for int32) are accepted so the surface lines up - with the ``nodata`` kwarg on ``to_geotiff`` and - ``write_geotiff_gpu``. + [internal-only] NoData value applied to every band of the + mosaic. Caller-supplied value takes precedence; when ``None``, + the first source's per-band nodata is used. Integer sentinels + (e.g. ``65535`` for uint16, ``-9999`` for int32) are accepted + so the surface lines up with the ``nodata`` kwarg on + ``to_geotiff`` and ``write_geotiff_gpu``. Returns ------- diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 8824b640..352ad1b0 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -62,16 +62,36 @@ def to_geotiff(data: xr.DataArray | np.ndarray, drop_rotation: bool = False) -> str | BinaryIO: """Write data as a GeoTIFF or Cloud Optimized GeoTIFF. - Tier: Stable for local-file output with ``compression`` in - ``{'none', 'deflate', 'lzw', 'packbits', 'zstd'}`` on an axis-aligned - grid. ``cog=True`` / overviews / BigTIFF are Advanced (work, but the - caller should know the failure modes). GPU output, GDAL XML metadata - pass-through, and ``extra_tags`` are Experimental. ``compression`` in - ``{'lerc', 'jpeg2000', 'j2k', 'lz4'}`` is Experimental and requires - ``allow_experimental_codecs=True``. ``compression='jpeg'`` is - Internal-only and requires the dedicated ``allow_internal_only_jpeg`` - flag. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full - tier map (issue #2137). + Release-contract tier (epic #2340; see + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst``): + + * [stable] Local-file output on an axis-aligned grid with + ``compression`` in ``{'none', 'deflate', 'lzw', 'packbits', + 'zstd'}``; CRS / transform / nodata attrs round-trip; ``bigtiff`` + auto-promotion. + * [advanced] ``cog=True`` and overview generation; explicit + ``bigtiff=True``; ``photometric=`` overrides; ``extra_tags`` + pass-through. + * [experimental] GPU dispatch via ``gpu=True``; + ``compression`` in ``{'lerc', 'jpeg2000', 'j2k', 'lz4'}`` behind + the explicit ``allow_experimental_codecs=True`` opt-in; + ``allow_unparseable_crs=True``. + * [internal-only] ``compression='jpeg'`` behind + ``allow_internal_only_jpeg=True``. The produced files do not + round-trip through libtiff / GDAL / rasterio; the path exists for + xrspatial's own use and is not part of the externally + interoperable surface. + * Out of scope for this release (allowed to raise): rotated / + sheared write support (``ModelTransformationTag`` emit, tracked + separately in #2115); silent mixed-metadata flattening. + + See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier + map (issue #2137). Per-parameter tier markers below describe the + tier the parameter itself carries; a parameter's effective tier + is bounded by the function-level surface above (e.g. ``[stable]`` + ``nodata`` is still only stable when combined with a ``[stable]`` + codec and options). Dask-backed DataArrays are written in streaming mode: one tile-row at a time, without materialising the full array into RAM. Peak @@ -89,13 +109,16 @@ def to_geotiff(data: xr.DataArray | np.ndarray, Parameters ---------- data : xr.DataArray or np.ndarray - 2D raster data. + [stable] 2D raster data. path : str or binary file-like - Output file path, or any object exposing a ``write`` method - (e.g. ``io.BytesIO``). When a file-like is passed, the encoded - TIFF bytes are written to that object once assembly completes. - ``cog=True`` and ``.vrt`` outputs require a string path. + [stable for local file paths; advanced for ``io.BytesIO`` and + other in-memory file-likes] Output file path, or any object + exposing a ``write`` method (e.g. ``io.BytesIO``). When a + file-like is passed, the encoded TIFF bytes are written to + that object once assembly completes. ``cog=True`` and ``.vrt`` + outputs require a string path. crs : int, numpy.integer, str, or None + [stable for int EPSG codes; advanced for WKT/PROJ strings] EPSG code (int or numpy integer scalar), WKT string, or PROJ string. If None and data is a DataArray, tries to read from attrs ('crs' for EPSG, 'crs_wkt' for WKT). @@ -108,11 +131,15 @@ def to_geotiff(data: xr.DataArray | np.ndarray, ``UserWarning`` is emitted when the WKT-only path is taken. See issue #1768. nodata : float, int, or None - NoData value. + [stable] NoData value. compression : str - Codec name. One of ``'none'``, ``'deflate'``, ``'lzw'``, - ``'jpeg'``, ``'packbits'``, ``'zstd'``, ``'lz4'``, - ``'jpeg2000'`` (alias ``'j2k'``), or ``'lerc'``. + [stable for ``{'none', 'deflate', 'lzw', 'packbits', 'zstd'}``; + experimental for ``{'lerc', 'jpeg2000', 'j2k', 'lz4'}`` behind + ``allow_experimental_codecs=True``; internal-only for + ``'jpeg'`` behind ``allow_internal_only_jpeg=True``] Codec + name. One of ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``, + ``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias + ``'j2k'``), or ``'lerc'``. Stable codecs (Tier 1, lossless, byte-for-byte round-trip): ``'none'``, ``'deflate'``, ``'lzw'``, ``'packbits'``, @@ -136,22 +163,23 @@ def to_geotiff(data: xr.DataArray | np.ndarray, internal-only is a stricter tier than experimental, and the two flags do not collapse into one switch. compression_level : int or None - Compression effort level. None uses each codec's default (6 for - deflate/zstd). Valid ranges: deflate 1-9, zstd 1-22, lz4 0-16. - Codecs without a level concept (lzw, packbits, jpeg) accept any - value and ignore it. + [stable] Compression effort level. None uses each codec's + default (6 for deflate/zstd). Valid ranges: deflate 1-9, + zstd 1-22, lz4 0-16. Codecs without a level concept (lzw, + packbits, jpeg) accept any value and ignore it. tiled : bool - Use tiled layout (default True). Incompatible with ``cog=True`` - because the COG specification requires a tiled internal layout; - passing ``cog=True, tiled=False`` raises ``ValueError`` (#2312). + [stable] Use tiled layout (default True). Incompatible with + ``cog=True`` because the COG specification requires a tiled + internal layout; passing ``cog=True, tiled=False`` raises + ``ValueError`` (#2312). tile_size : int - Tile size in pixels (default 256). Must be a positive multiple - of 16 when ``tiled=True``; this is a TIFF 6 spec requirement - on TileWidth and TileLength for broad reader compatibility. - Ignored when ``tiled=False``; a warning is emitted if a - non-default value is passed alongside strip mode. + [stable] Tile size in pixels (default 256). Must be a positive + multiple of 16 when ``tiled=True``; this is a TIFF 6 spec + requirement on TileWidth and TileLength for broad reader + compatibility. Ignored when ``tiled=False``; a warning is + emitted if a non-default value is passed alongside strip mode. predictor : bool or int - TIFF predictor. Accepted values: + [stable] TIFF predictor. Accepted values: * ``False``, ``0``, or ``1`` -> no predictor. * ``True`` or ``2`` -> horizontal differencing (good for integer @@ -159,15 +187,15 @@ def to_geotiff(data: xr.DataArray | np.ndarray, * ``3`` -> floating-point predictor (float dtypes only; typically gives better deflate/zstd ratios on float data than predictor 2). cog : bool - Advanced: COG output materialises the full array because - overview pyramids need it, and the all-IFDs-at-file-start layout - only round-trips through readers that honour the COG layout - contract. Write as Cloud Optimized GeoTIFF. Requires + [advanced] COG output materialises the full array because + overview pyramids need it, and the all-IFDs-at-file-start + layout only round-trips through readers that honour the COG + layout contract. Write as Cloud Optimized GeoTIFF. Requires ``tiled=True`` (the default): the COG specification mandates a tiled internal layout, so ``cog=True, tiled=False`` raises ``ValueError`` (#2312). overview_levels : list[int] or None - Advanced: overview pyramids are an optional COG feature; the + [advanced] Overview pyramids are an optional COG feature; the decimation factors and resampling choice affect downstream analytics in ways that are not byte-for-byte reproducible across backends. Overview decimation factors relative to full @@ -180,34 +208,40 @@ def to_geotiff(data: xr.DataArray | np.ndarray, ``[2, 4, 8, ...]`` until the next halving would fall below ``tile_size`` (capped at 8 levels). overview_resampling : str - Resampling method for overviews: 'mean' (default), 'nearest', - 'min', 'max', 'median', 'mode', or 'cubic'. + [advanced] Resampling method for overviews: 'mean' (default), + 'nearest', 'min', 'max', 'median', 'mode', or 'cubic'. bigtiff : bool or None - Advanced: BigTIFF uses 64-bit offsets; older readers that only + [advanced] BigTIFF uses 64-bit offsets; older readers that only speak classic TIFF cannot open the output. Force BigTIFF (64-bit offsets). None (default) auto-promotes when the estimated file size would exceed the classic-TIFF 4 GB limit. Matches the same kwarg on ``write_geotiff_gpu``. gpu : bool or None - Experimental: requires cupy + numba CUDA, plus the optional + [experimental] Requires cupy + numba CUDA, plus the optional nvCOMP / nvJPEG / nvJPEG2K libraries for codec-specific acceleration; backend parity with the CPU writer is tested for the Tier 1 codec set only. Force GPU compression. None (default) auto-detects CuPy data. streaming_buffer_bytes : int - Soft cap on bytes materialised per dask compute call when - streaming a dask-backed DataArray. Defaults to 256 MB. Wide - rasters whose tile-row exceeds this budget are split into - horizontal segments. Ignored for numpy / CuPy / COG paths. + [stable] Soft cap on bytes materialised per dask compute call + when streaming a dask-backed DataArray. Defaults to 256 MB. + Wide rasters whose tile-row exceeds this budget are split into + horizontal segments. Only relevant for dask-backed inputs; the + kwarg is a no-op for numpy / CuPy / COG paths (the COG path + materialises the full array because the overview pyramid + needs it). max_z_error : float - Per-pixel error budget for LERC compression. ``0.0`` (default) - is lossless; larger values let the encoder approximate values - within the bound, producing smaller files at the cost of accuracy - bounded by ``abs(decoded - original) <= max_z_error``. Only used - when ``compression='lerc'``; passing a non-zero value with any - other codec raises ``ValueError``. + [experimental] Per-pixel error budget for LERC compression. + ``0.0`` (default) is lossless; larger values let the encoder + approximate values within the bound, producing smaller files + at the cost of accuracy bounded by + ``abs(decoded - original) <= max_z_error``. Only used when + ``compression='lerc'`` (which itself requires + ``allow_experimental_codecs=True``); passing a non-zero value + with any other codec raises ``ValueError``. photometric : str or int - Photometric interpretation for the TIFF Photometric tag (262). + [advanced] Photometric interpretation for the TIFF Photometric + tag (262). * ``'auto'`` (default) -- MinIsBlack (1) for any band count. ExtraSamples for every band beyond the first is tagged ``0`` @@ -238,7 +272,8 @@ def to_geotiff(data: xr.DataArray | np.ndarray, auto-emitted tags such as ``ImageWidth`` or ``StripOffsets`` remain protected. allow_experimental_codecs : bool - Opt in to the Tier 3 experimental codecs ``'lerc'``, + [experimental] Opt in to the Tier 3 experimental codecs + ``'lerc'``, ``'jpeg2000'`` / ``'j2k'``, and ``'lz4'`` (default ``False``). Setting ``compression=`` to one of those codecs without this flag raises ``ValueError`` whose message names the flag. With @@ -253,20 +288,23 @@ def to_geotiff(data: xr.DataArray | np.ndarray, unchanged to ``write_geotiff_gpu`` on the GPU dispatch path. See issue #2137. allow_internal_only_jpeg : bool - Opt in to the experimental ``compression='jpeg'`` encode path - (default ``False``). The encoder writes self-contained JFIF - tiles without the TIFF JPEGTables tag (347); the file decodes - through this library's reader but not through libtiff, GDAL, - or rasterio. With the flag set, the write proceeds and a + [internal-only] Opt in to the ``compression='jpeg'`` encode + path (default ``False``). The encoder writes self-contained + JFIF tiles without the TIFF JPEGTables tag (347); the file + decodes through this library's reader but not through libtiff, + GDAL, or rasterio. This codec is internal-only for the release + contract: it is not externally interoperable and the path + exists so xrspatial can round-trip its own JPEG output. With the flag set, the write proceeds and a ``GeoTIFFFallbackWarning`` is emitted at call time. Without the flag, ``compression='jpeg'`` raises ``ValueError``. The kwarg is forwarded unchanged to ``write_geotiff_gpu`` on the GPU dispatch path so callers can reach the same experimental encode via ``to_geotiff(..., gpu=True)``. See issue #1845. allow_unparseable_crs : bool - Opt in to writing an unvalidatable CRS string into - ``GTCitationGeoKey`` (default ``False``). When ``False`` (the - default since #1929), a ``crs=`` value that is neither an EPSG + [experimental] Opt in to writing an unvalidatable CRS string + into ``GTCitationGeoKey`` (default ``False``). When ``False`` + (the default since #1929), a ``crs=`` value that is neither an + EPSG int nor a string that pyproj can resolve and is not structurally WKT (no ``PROJCS`` / ``GEOGCS`` / ``PROJCRS`` / ``GEOGCRS`` root) raises ``ValueError`` instead of landing @@ -276,7 +314,7 @@ def to_geotiff(data: xr.DataArray | np.ndarray, ``"EPSG:4326"`` token on a host without pyproj produces a citation that most readers cannot interpret. See issue #1929. drop_rotation : bool, default False - Opt in to writing a DataArray that carries + [advanced] Opt in to writing a DataArray that carries ``attrs['rotated_affine']`` (issue #2216). The reader sets that attr when called with ``allow_rotated=True`` on a file whose ``ModelTransformationTag`` contains rotation, shear, or diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 60a4ccc5..5dbe8d5d 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -81,16 +81,21 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, ) -> str | BinaryIO: """Write a CuPy-backed DataArray as a GeoTIFF with GPU compression. - Tier: Experimental (issue #2137). The GPU writer requires cupy + - numba CUDA plus optional nvCOMP / nvJPEG / nvJPEG2K libraries for - codec-specific acceleration; cross-backend numerical parity with - ``to_geotiff`` is tested for the Tier 1 codec set only. Tier 3 - codecs (``'lerc'``, ``'jpeg2000'`` / ``'j2k'``, ``'lz4'``) require - the explicit ``allow_experimental_codecs=True`` opt-in; the - internal-only ``'jpeg'`` codec keeps its own dedicated + Release-contract tier (epic #2340; see + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst``): the + entire entry point is [experimental]. The surface may shift + without a deprecation window and ``to_geotiff`` is the canonical + writer. Requires cupy + numba CUDA plus optional nvCOMP / nvJPEG / + nvJPEG2K libraries for codec-specific acceleration; cross-backend + numerical parity with ``to_geotiff`` is tested for the Tier 1 + codec set only. Tier 3 codecs (``'lerc'``, ``'jpeg2000'`` / + ``'j2k'``, ``'lz4'``) require the explicit + ``allow_experimental_codecs=True`` opt-in; the [internal-only] + ``'jpeg'`` codec keeps its own dedicated ``allow_internal_only_jpeg`` flag. See - :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier - map. + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map + (issue #2137). Tiles are extracted and compressed on the GPU via nvCOMP, then assembled into a TIFF file on CPU. The CuPy array stays on device @@ -106,29 +111,34 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, Parameters ---------- data : xr.DataArray (CuPy- or NumPy-backed), cupy.ndarray, or np.ndarray - 2D or 3D raster. CuPy-backed inputs stay on device; NumPy/Dask - inputs are uploaded via ``cupy.asarray(np.asarray(data))`` - before compression (matches ``to_geotiff`` parity). + [experimental] 2D or 3D raster. CuPy-backed inputs stay on + device; NumPy/Dask inputs are uploaded via + ``cupy.asarray(np.asarray(data))`` before compression (matches + ``to_geotiff`` parity). path : str or binary file-like - Output file path or any object with a ``write`` method - (e.g. ``io.BytesIO``). ``cog=True`` requires a string path: - the auto-dispatch path through ``to_geotiff(gpu=True, cog=True)`` - rejects file-like destinations, and the explicit GPU writer - mirrors that rule (issue #1652). + [experimental] Output file path or any object with a ``write`` + method (e.g. ``io.BytesIO``). ``cog=True`` requires a string + path: the auto-dispatch path through + ``to_geotiff(gpu=True, cog=True)`` rejects file-like + destinations, and the explicit GPU writer mirrors that rule + (issue #1652). crs : int, numpy.integer, str, or None - EPSG code (int or numpy integer scalar) or WKT string. EPSG - codes are strongly preferred for interop; the WKT-only path - emits a user-defined CRS (32767) with the WKT stored in - ``GTCitationGeoKey``, which many non-libgeotiff readers - ignore. A ``UserWarning`` is emitted when the WKT-only path - is taken. See issue #1768. + [experimental] EPSG code (int or numpy integer scalar) or WKT + string. EPSG codes are strongly preferred for interop; the + WKT-only path emits a user-defined CRS (32767) with the WKT + stored in ``GTCitationGeoKey``, which many non-libgeotiff + readers ignore. A ``UserWarning`` is emitted when the WKT-only + path is taken. See issue #1768. nodata : float, int, or None - NoData value. + [experimental] NoData value. compression : str - Codec name. Accepts the same set ``to_geotiff`` lists in its - own signature: ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``, - ``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias - ``'j2k'``), or ``'lerc'``. + [experimental for Tier 1 codecs on this path; experimental + gated by ``allow_experimental_codecs=True`` for Tier 3 codecs; + internal-only gated by ``allow_internal_only_jpeg=True`` for + ``'jpeg'``] Codec name. Accepts the same set ``to_geotiff`` + lists in its own signature: ``'none'``, ``'deflate'``, + ``'lzw'``, ``'jpeg'``, ``'packbits'``, ``'zstd'``, ``'lz4'``, + ``'jpeg2000'`` (alias ``'j2k'``), or ``'lerc'``. Routing per codec: @@ -154,27 +164,31 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, nvCOMP/CUDA accelerator, so these fall through to the CPU encoder for byte-stable parity with ``to_geotiff``. compression_level : int or None - Compression effort level. Accepted for API compatibility but - currently ignored -- nvCOMP does not expose level control. + [experimental] Compression effort level. Accepted for API + compatibility but currently ignored -- nvCOMP does not expose + level control. tiled : bool - Must be True (default). The GPU writer is tiled-only because - nvCOMP batch compression operates on per-tile streams; passing - ``tiled=False`` raises ``ValueError`` rather than silently - producing a tiled file. Accepted for API parity with - ``to_geotiff``. + [experimental] Must be True (default). The GPU writer is + tiled-only because nvCOMP batch compression operates on + per-tile streams; passing ``tiled=False`` raises ``ValueError`` + rather than silently producing a tiled file. Accepted for API + parity with ``to_geotiff``. tile_size : int - Tile size in pixels (default 256). Must be a positive multiple - of 16; this is a TIFF 6 spec requirement on TileWidth and - TileLength for broad reader compatibility. ``write_geotiff_gpu`` - is always tiled, so the check fires for every call. + [experimental] Tile size in pixels (default 256). Must be a + positive multiple of 16; this is a TIFF 6 spec requirement on + TileWidth and TileLength for broad reader compatibility. + ``write_geotiff_gpu`` is always tiled, so the check fires for + every call. predictor : bool or int - TIFF predictor. ``False``/``0``/``1`` -> none, ``True``/``2`` -> - horizontal differencing, ``3`` -> floating-point predictor - (float dtypes only). + [experimental] TIFF predictor. ``False``/``0``/``1`` -> none, + ``True``/``2`` -> horizontal differencing, ``3`` -> + floating-point predictor (float dtypes only). cog : bool - Write as Cloud Optimized GeoTIFF with overviews. + [experimental] Write as Cloud Optimized GeoTIFF with + overviews. overview_levels : list[int] or None - Overview decimation factors relative to full resolution. + [experimental] Overview decimation factors relative to full + resolution. Each entry must be a power-of-two integer >= 2, and the list must be strictly increasing (e.g. ``[2, 4, 8]`` writes overviews at 1/2, 1/4 and 1/8 of the full resolution). @@ -182,33 +196,37 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, If None and ``cog=True``, auto-generates ``[2, 4, 8, ...]`` by halving until the smallest overview fits in a single tile. overview_resampling : str - Resampling method for overviews: 'mean' (default), 'nearest', - 'min', 'max', 'median', 'mode', or 'cubic'. ``mode`` and - ``cubic`` fall back to the CPU implementation in - ``xrspatial.geotiff._writer`` so the GPU writer produces the - same overview bytes as the CPU writer. + [experimental] Resampling method for overviews: 'mean' + (default), 'nearest', 'min', 'max', 'median', 'mode', or + 'cubic'. ``mode`` and ``cubic`` fall back to the CPU + implementation in ``xrspatial.geotiff._writer`` so the GPU + writer produces the same overview bytes as the CPU writer. bigtiff : bool or None - Force BigTIFF (64-bit offsets). None auto-promotes when the - estimated file size would exceed the classic-TIFF 4 GB limit. + [experimental] Force BigTIFF (64-bit offsets). None + auto-promotes when the estimated file size would exceed the + classic-TIFF 4 GB limit. streaming_buffer_bytes : int - Accepted for API parity with ``to_geotiff``. The GPU writer - materialises the entire array on device and has no streaming - concept, so this kwarg is a no-op. Default matches + [internal-only] Accepted for API parity with ``to_geotiff``. + The GPU writer materialises the entire array on device and has + no streaming concept, so this kwarg is a no-op. Default matches ``to_geotiff`` (256 MB) so callers passing the same kwargs to either entry point see the same default and the same type. max_z_error : float - Per-pixel error budget for LERC compression. The GPU writer - does not implement LERC (nvCOMP has no LERC backend), so any - non-zero value raises ``ValueError``. Accepted at the signature - level for API parity with ``to_geotiff``. + [internal-only] Per-pixel error budget for LERC compression. + The GPU writer does not implement LERC (nvCOMP has no LERC + backend), so any non-zero value raises ``ValueError``. + Accepted at the signature level for API parity with + ``to_geotiff``. photometric : str or int - Photometric interpretation for the TIFF Photometric tag (262). - See :func:`to_geotiff` for the full set of accepted values; the - GPU writer forwards this kwarg unchanged. Default ``'auto'`` - writes MinIsBlack for any band count, so a 4-band raster is - not silently tagged as RGB+alpha (issue #1769). + [experimental] Photometric interpretation for the TIFF + Photometric tag (262). See :func:`to_geotiff` for the full set + of accepted values; the GPU writer forwards this kwarg + unchanged. Default ``'auto'`` writes MinIsBlack for any band + count, so a 4-band raster is not silently tagged as RGB+alpha + (issue #1769). allow_experimental_codecs : bool - Opt in to the Tier 3 experimental codecs ``'lerc'``, + [experimental] Opt in to the Tier 3 experimental codecs + ``'lerc'``, ``'jpeg2000'`` / ``'j2k'``, and ``'lz4'`` (default ``False``). Mirrors the same kwarg on ``to_geotiff`` so the two writers expose a consistent surface; the GPU dispatch path through @@ -220,21 +238,21 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, the internal-only JPEG path keeps its own dedicated ``allow_internal_only_jpeg`` flag. See issue #2137. allow_internal_only_jpeg : bool - Opt in to the experimental ``compression='jpeg'`` encode path - (default ``False``). The encoder emits self-contained JFIF - tiles without the TIFF JPEGTables tag (347); the file decodes - through this library's reader but not through libtiff, GDAL, - or rasterio. With the flag set, the write proceeds and a + [internal-only] Opt in to the ``compression='jpeg'`` encode + path (default ``False``). The encoder emits self-contained + JFIF tiles without the TIFF JPEGTables tag (347); the file + decodes through this library's reader but not through libtiff, + GDAL, or rasterio. With the flag set, the write proceeds and a ``GeoTIFFFallbackWarning`` is emitted at call time. Without the flag, ``compression='jpeg'`` raises ``ValueError`` for parity with ``to_geotiff``. See issue #1845. allow_unparseable_crs : bool - Opt in to writing an unvalidatable CRS string into - ``GTCitationGeoKey`` (default ``False``). See + [experimental] Opt in to writing an unvalidatable CRS string + into ``GTCitationGeoKey`` (default ``False``). See :func:`to_geotiff` for the full description; the GPU writer applies the same fail-closed default. See issue #1929. drop_rotation : bool, default False - Opt in to writing a DataArray that carries + [experimental] Opt in to writing a DataArray that carries ``attrs['rotated_affine']``. Mirrors the same kwarg on ``to_geotiff`` so the two writers share one gate. Default ``False`` refuses the write with ``ValueError``; the GPU diff --git a/xrspatial/geotiff/_writers/vrt.py b/xrspatial/geotiff/_writers/vrt.py index 13f985ce..2c3529f2 100644 --- a/xrspatial/geotiff/_writers/vrt.py +++ b/xrspatial/geotiff/_writers/vrt.py @@ -24,17 +24,24 @@ def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, nodata: float | int | None = None) -> str: """Generate a VRT file that mosaics multiple GeoTIFF tiles. - Tier: Advanced (issue #2137). VRT mosaic output is supported but - the caller should know the failure modes on the read side: a - consumer reading the resulting ``.vrt`` may hit cross-source - nodata mismatch, missing backing files, or per-band metadata - disagreement. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for - the full tier map. + Release-contract tier (epic #2340; see + ``docs/source/reference/release_gate_geotiff.rst`` and + ``docs/source/reference/geotiff_release_contract.rst``): the + entry point is [advanced]. VRT mosaic output is supported but + targets a narrow subset of GDAL's VRT spec; the caller should + know the failure modes on the read side. A consumer reading the + resulting ``.vrt`` may hit cross-source nodata mismatch, missing + backing files, or per-band metadata disagreement. Full GDAL VRT + parity, warped / reprojection VRTs, and nested VRTs are out of + scope for this release. See + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map + (issue #2137). Output targets the same narrow subset of GDAL's VRT spec that the reader supports (issue #2321; see the "VRT support matrix" section - in ``docs/source/reference/geotiff.rst`` for the canonical - contract): + in ``docs/source/reference/geotiff.rst`` and the audited matrix in + ``docs/source/reference/release_gate_geotiff.rst`` for the + canonical contract): * Supported: simple GDAL VRT mosaics over GeoTIFF sources; compatible CRS, transform orientation, pixel size, dtype, and @@ -52,37 +59,41 @@ def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, Parameters ---------- path : str - Output .vrt file path. Mirrors the ``path`` kwarg on - ``to_geotiff`` and ``write_geotiff_gpu`` so the writer trio + [advanced] Output .vrt file path. Mirrors the ``path`` kwarg + on ``to_geotiff`` and ``write_geotiff_gpu`` so the writer trio shares a single destination-arg name (issue #1946). source_files : list of str - Paths to the source GeoTIFF files. + [advanced] Paths to the source GeoTIFF files. vrt_path : str, optional - Deprecated alias for ``path``. Emits ``DeprecationWarning`` when - supplied; passing both ``path`` and ``vrt_path`` raises - ``TypeError``. Kept so existing callers (``write_vrt(vrt_path, - sources)`` positional or ``write_vrt(vrt_path=...)`` keyword) - keep working through the deprecation window. New code should - use ``path``. See issue #1946. + [internal-only] Deprecated alias for ``path``. Emits + ``DeprecationWarning`` when supplied; passing both ``path`` + and ``vrt_path`` raises ``TypeError``. Kept so existing + callers (``write_vrt(vrt_path, sources)`` positional or + ``write_vrt(vrt_path=...)`` keyword) keep working through the + deprecation window. New code should use ``path``. See issue + #1946. relative : bool, optional - Store source paths relative to the VRT file (default True). + [advanced] Store source paths relative to the VRT file + (default True). crs : int, str, or None, optional - EPSG code (int), WKT string, or PROJ string. If None, the CRS - is taken from the first source GeoTIFF. Mirrors the ``crs`` - kwarg on ``to_geotiff`` and ``write_geotiff_gpu`` so the same - value can be forwarded to whichever writer the caller picked - without per-writer special-casing (issue #1715). + [advanced] EPSG code (int), WKT string, or PROJ string. If + None, the CRS is taken from the first source GeoTIFF. Mirrors + the ``crs`` kwarg on ``to_geotiff`` and ``write_geotiff_gpu`` + so the same value can be forwarded to whichever writer the + caller picked without per-writer special-casing (issue #1715). crs_wkt : str or None, optional - Deprecated alias for ``crs``. Emits ``DeprecationWarning`` when - supplied (including ``crs_wkt=None``); passing both ``crs`` and - ``crs_wkt`` raises ``TypeError``. The value is forwarded through - the same ``_resolve_crs_to_wkt`` path as ``crs``, so any string - the resolver accepts (WKT root keyword, PROJ string, + [internal-only] Deprecated alias for ``crs``. Emits + ``DeprecationWarning`` when supplied (including + ``crs_wkt=None``); passing both ``crs`` and ``crs_wkt`` raises + ``TypeError``. The value is forwarded through the same + ``_resolve_crs_to_wkt`` path as ``crs``, so any string the + resolver accepts (WKT root keyword, PROJ string, ``"EPSG:NNNN"``) and ``None`` work here. The historic - ``str | None`` surface is preserved; new code should use ``crs`` - instead, which additionally accepts ``int`` EPSG codes. + ``str | None`` surface is preserved; new code should use + ``crs`` instead, which additionally accepts ``int`` EPSG codes. nodata : float, int, or None, optional - NoData value. If None, taken from the first source GeoTIFF. + [advanced] NoData value. If None, taken from the first source + GeoTIFF. Integer sentinels (e.g. ``65535`` for uint16, ``-9999`` for int32) are accepted so the surface lines up with the ``nodata`` kwarg on ``to_geotiff`` and ``write_geotiff_gpu``.