diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 4644637a..6eb04594 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -241,6 +241,7 @@ def _read_geo_info(source, *, overview_level: int | None = None, # binding it into the chunk closure (#1809). geo_info._ifd_photometric = _ifd.photometric geo_info._ifd_samples_per_pixel = _ifd.samples_per_pixel + geo_info._ifd_compression = _ifd.compression return geo_info, _ifd.height, _ifd.width, file_dtype, n_bands if _is_file_like(source): # File-like: read its full bytes; we don't try to mmap arbitrary @@ -316,6 +317,11 @@ def _read_geo_info(source, *, overview_level: int | None = None, # binding it into the chunk closure (#1809). geo_info._ifd_photometric = ifd.photometric geo_info._ifd_samples_per_pixel = ifd.samples_per_pixel + # Stash compression so the dask graph builder can fire the + # experimental / internal-only codec opt-in gate at graph build + # rather than waiting for the per-chunk task to fail (PR 4 of + # epic #2340). + geo_info._ifd_compression = ifd.compression return geo_info, ifd.height, ifd.width, file_dtype, n_bands finally: if close_data: @@ -339,6 +345,8 @@ def open_geotiff(source: str | BinaryIO, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True, ) -> xr.DataArray: @@ -535,6 +543,23 @@ def open_geotiff(source: str | BinaryIO, *, behaviour where the citation field passes through unchanged. Matches the same kwarg on ``to_geotiff`` / ``write_geotiff_gpu`` so a value the reader accepted can survive a round-trip. + allow_experimental_codecs : bool, default False + Read-side opt-in for sources compressed with the Tier 3 + experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). + Default ``False`` rejects the read with ``ValueError`` naming + the flag; cross-backend numerical parity is not claimed and + reader support across GDAL versions is uneven. Matches the + same kwarg on the writers so a round-trip through a Tier 3 + codec stays opt-in on both sides. See SUPPORTED_FEATURES tier + ``'experimental'`` (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + Read-side opt-in for JPEG-in-TIFF sources. The encoder writes + self-contained JFIF tiles without the TIFF JPEGTables tag + (347), so the read path is not interoperable with libtiff / + GDAL / rasterio. ``allow_experimental_codecs=True`` does NOT + cover this codec; the dedicated flag is its only gate. See + SUPPORTED_FEATURES tier ``'internal_only'`` for ``codec.jpeg`` + (epic #2340 PR 4, original writer gate #1845). Returns ------- @@ -665,6 +690,8 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, mask_nodata=mask_nodata, **vrt_kwargs) @@ -685,6 +712,10 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=( + allow_experimental_codecs), + allow_internal_only_jpeg=( + allow_internal_only_jpeg), mask_nodata=mask_nodata, **gpu_kwargs) @@ -696,6 +727,10 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=( + allow_experimental_codecs), + allow_internal_only_jpeg=( + allow_internal_only_jpeg), mask_nodata=mask_nodata) kwargs = {} @@ -714,6 +749,8 @@ def open_geotiff(source: str | BinaryIO, *, source, window=window, overview_level=overview_level, band=band, allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, **kwargs, ) diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 369dac84..783bed64 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -354,6 +354,166 @@ ) +# Map TIFF compression tag values to codec names so the read-side opt-in +# gate (PR 4 of epic #2340) can name the codec in the rejection message +# without each call site repeating the integer-to-name table. The keys +# are the TIFF 6 Compression tag values (tag 259) used inside +# ``_compression.py``; the values match the codec names that appear on +# the ``SUPPORTED_FEATURES`` keys (``codec.``). +_COMPRESSION_TAG_TO_NAME = { + 1: 'none', + 5: 'lzw', + 7: 'jpeg', + 8: 'deflate', + 32773: 'packbits', + # Adobe Deflate (32946) decodes through the same zlib path as + # plain Deflate (8) and is collapsed onto the same codec name on + # purpose: both tags share the stable-tier classification in + # ``SUPPORTED_FEATURES`` (``codec.deflate``). A future Adobe- + # Deflate-specific tier would need its own ``codec.`` entry + # AND its own mapping line here; the collapse is deliberate, not + # a passthrough. + 32946: 'deflate', + 34712: 'jpeg2000', + 34887: 'lerc', + 50000: 'zstd', + 50004: 'lz4', +} + + +def _validate_read_codec_optin( + compression: int, + *, + allow_experimental_codecs: bool, + allow_internal_only_jpeg: bool, + entry_point: str = "open_geotiff", +) -> None: + """Reject experimental / internal-only codecs on the read side. + + Mirrors the writer-side gate in ``_writers/eager.py`` / + ``_writers/gpu.py`` so a caller cannot decode a file produced with + an experimental or internal-only codec without naming the matching + opt-in flag at the call site. The flag and feature both appear in + the rejection message so the caller learns the name from the error + rather than the docs. + + Part of PR 4 of epic #2340 (the GeoTIFF release contract). The + writer side has carried these gates since #2137 / #1845; this + helper extends the same shape to the read entry points. + + Parameters + ---------- + compression : int + TIFF Compression tag value (tag 259) from the parsed IFD. + allow_experimental_codecs : bool + Opt-in for Tier 3 read paths (LERC, JPEG2000 / J2K, LZ4). + allow_internal_only_jpeg : bool + Opt-in for Tier 4 read path (JPEG-in-TIFF). Does not collapse + into ``allow_experimental_codecs`` for the same reason as on + the writer: internal-only is a stricter tier and keeps its own + dedicated flag. + entry_point : str + Name of the public read function for the rejection message. + """ + codec_name = _COMPRESSION_TAG_TO_NAME.get(int(compression)) + if codec_name is None: + # Unknown compression tags are validated separately by the + # decoder; the opt-in gate only fires for codecs the reader + # otherwise accepts. + return + if codec_name == 'jpeg' and not allow_internal_only_jpeg: + raise ValueError( + f"{entry_point}: source uses compression='jpeg' (TIFF " + "tag 259 = 7), which is internal-only: the encoder writes " + "self-contained JFIF tiles without the TIFF JPEGTables tag " + "(347), so the read path is not interoperable with libtiff " + "/ GDAL / rasterio. Pass allow_internal_only_jpeg=True to " + "opt in to the internal-reader-only decode path. See " + "SUPPORTED_FEATURES tier 'internal_only' for codec.jpeg " + "(epic #2340, original gate #1845).") + if codec_name in _EXPERIMENTAL_CODECS and not allow_experimental_codecs: + raise ValueError( + f"{entry_point}: source uses compression={codec_name!r} " + "which is experimental on the read side: cross-backend " + "numerical parity is not claimed and reader support across " + "GDAL versions is uneven. Pass allow_experimental_codecs=" + "True to opt in, or re-encode the source with a stable " + "lossless codec ('deflate', 'zstd', or 'lzw'). See " + f"SUPPORTED_FEATURES tier 'experimental' for codec.{codec_name} " + "(epic #2340, original writer gate #2137).") + + +# Writer rich-tag attrs that ride the Experimental tier (PR 4 of epic +# #2340). ``writer.gdal_metadata_xml`` and ``writer.extra_tags`` carry +# free-form payloads through to the on-disk TIFF; their interop with +# other readers (rasterio, libtiff, GDAL) depends on the payload and is +# not part of the release promise. The opt-in keeps the surface narrow +# without removing the capability. +# +# Round-trip exemption: when the attrs carry the +# ``_xrspatial_geotiff_contract`` marker, they came from a previous +# xrspatial read. The reader populated ``gdal_metadata_xml`` / +# ``extra_tags`` from the source file; gating the write would force +# every read-then-write caller to opt in. Skip the gate on +# round-tripped attrs so the canonical contract from #1984 stays a +# no-flag operation. The gate still fires when a caller adds those +# attrs to a fresh DataArray that did not come from a read. +def _validate_write_rich_tag_optin( + attrs: dict, + *, + gdal_metadata_xml_kwarg: object = None, + extra_tags_kwarg: object = None, + allow_experimental_codecs: bool, + entry_point: str = "to_geotiff", +) -> None: + """Reject writes that include ``gdal_metadata_xml`` or ``extra_tags`` + unless the caller opted in via ``allow_experimental_codecs=True``. + + Part of PR 4 of epic #2340. Mirrors the existing codec-flag shape + so the rejection names the same opt-in the caller already learned + from the LERC / J2K / LZ4 paths. Round-tripped attrs (carrying + the ``_xrspatial_geotiff_contract`` marker) are exempt so the + canonical attrs round-trip (#1984) stays a no-flag operation; the + gate fires only when a caller constructs a fresh DataArray with + one of the rich-tag attrs set. + """ + if allow_experimental_codecs: + return + # Round-trip exemption: a DataArray that came from + # ``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` + # carries the contract marker. Writing it back is the canonical + # round-trip and should not require a new flag (issue #1984). + # + # This is a soft gate by design: a caller who hand-builds an + # attrs dict with the contract marker could bypass it. Forging + # the marker is a deliberate act, and the alternative (gating + # every read-then-write call) would break the canonical attrs + # round-trip that downstream code already depends on. The hard + # guarantee is "fresh DataArrays carrying these attrs need the + # opt-in"; the soft exemption keeps round-trips frictionless. + if '_xrspatial_geotiff_contract' in attrs: + return + triggered: list[str] = [] + if attrs.get('gdal_metadata_xml') is not None: + triggered.append("attrs['gdal_metadata_xml']") + if attrs.get('extra_tags') is not None: + triggered.append("attrs['extra_tags']") + if gdal_metadata_xml_kwarg is not None: + triggered.append('gdal_metadata_xml kwarg') + if extra_tags_kwarg is not None: + triggered.append('extra_tags kwarg') + if not triggered: + return + raise ValueError( + f"{entry_point}: {', '.join(triggered)} pass-through is " + "experimental: the on-disk bytes are written verbatim and " + "interop with other readers (rasterio, libtiff, GDAL) depends " + "on the payload. Pass allow_experimental_codecs=True to opt " + "in to the rich-tag write path, or drop the attr before the " + "write. See SUPPORTED_FEATURES tier 'experimental' for " + "writer.gdal_metadata_xml / writer.extra_tags (epic #2340).") + + # TIFF type ids needed when synthesizing extra_tags entries from attrs. _TIFF_BYTE = 1 _TIFF_ASCII = 2 diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 5e9ac360..61881109 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -42,6 +42,8 @@ def read_geotiff_dask(source: str, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True) -> xr.DataArray: """Read a GeoTIFF as a dask-backed DataArray for out-of-core processing. @@ -124,6 +126,16 @@ def read_geotiff_dask(source: str, *, instead of carrying the unrecognised payload through ``attrs['crs_wkt']``. See ``open_geotiff`` for the full description. + allow_experimental_codecs : bool, default False + [advanced] Read-side opt-in for Tier 3 experimental codecs + (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). Fires at graph + build, before any chunk task is scheduled. See ``open_geotiff`` + for the full description (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + [advanced] Read-side opt-in for JPEG-in-TIFF sources. Not + covered by ``allow_experimental_codecs``. See ``open_geotiff`` + for the full description (epic #2340 PR 4, original writer gate + #1845). on_gpu_failure : str, optional [internal-only] Accepted for cross-backend signature symmetry only. The dask path runs CPU decoders, so passing this kwarg @@ -315,6 +327,7 @@ def read_geotiff_dask(source: str, *, # Stash IFD photometric for the MinIsWhite nodata-inversion check below. geo_info._ifd_photometric = http_ifd.photometric geo_info._ifd_samples_per_pixel = http_ifd.samples_per_pixel + geo_info._ifd_compression = http_ifd.compression else: # Metadata-only read: O(1) memory via mmap, no pixel decompression. # Lazy import for the same circular-import reason as ``read_vrt`` @@ -323,6 +336,28 @@ def read_geotiff_dask(source: str, *, geo_info, full_h, full_w, file_dtype, n_bands = _read_geo_info( source, overview_level=overview_level, allow_rotated=allow_rotated) + + # Reject experimental / internal-only codecs at graph build, before + # any chunk task is scheduled. The compression tag is stashed on + # ``geo_info`` by ``_read_geo_info`` (local / fsspec) and by the + # HTTP / fsspec branch above. PR 4 of epic #2340. + # + # ``getattr(..., None)`` is intentional: a synthesised geo_info + # (non-TIFF source) carries no compression tag, so the gate must + # skip rather than reject. Every TIFF source path stashes + # ``_ifd_compression`` in lockstep with ``_ifd_photometric`` and + # ``_ifd_samples_per_pixel`` so the skip never silently bypasses + # a real TIFF read. + _compression_tag = getattr(geo_info, '_ifd_compression', None) + if _compression_tag is not None: + from .._attrs import _validate_read_codec_optin + _validate_read_codec_optin( + _compression_tag, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="read_geotiff_dask", + ) + # PR-C #2226: centralize the nodata lifecycle in one value object. # ``raw_sentinel`` carries the pre-inversion sentinel that # ``attrs['nodata']`` must preserve; ``effective_sentinel`` is what @@ -553,7 +588,11 @@ def read_geotiff_dask(source: str, *, target_dtype=target_dtype, http_meta_key=http_meta_key, max_pixels=max_pixels, - allow_rotated=allow_rotated), + allow_rotated=allow_rotated, + allow_experimental_codecs=( + allow_experimental_codecs), + allow_internal_only_jpeg=( + allow_internal_only_jpeg)), shape=block_shape, dtype=target_dtype, ) @@ -575,7 +614,9 @@ def read_geotiff_dask(source: str, *, def _delayed_read_window(source, r0, c0, r1, c1, overview_level, nodata, band, *, target_dtype=None, http_meta_key=None, - max_pixels=None, allow_rotated=False): + max_pixels=None, allow_rotated=False, + allow_experimental_codecs=False, + allow_internal_only_jpeg=False): """Dask-delayed function to read a single window. *http_meta_key* is an optional ``Delayed[(TIFFHeader, IFD)]`` parsed @@ -631,11 +672,14 @@ def _read(http_meta): _r2a_kwargs = {} if max_pixels is not None: _r2a_kwargs['max_pixels'] = max_pixels - arr, _ = _read_to_array(source, window=(r0, c0, r1, c1), - overview_level=overview_level, - band=band, - allow_rotated=allow_rotated, - **_r2a_kwargs) + arr, _ = _read_to_array( + source, window=(r0, c0, r1, c1), + overview_level=overview_level, + band=band, + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + **_r2a_kwargs) if nodata is not None: # ``arr`` was just decoded by ``_fetch_decode_cog_http_tiles`` # or ``read_to_array``; both return freshly-allocated buffers diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 89b2b266..72fb99d4 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -75,6 +75,8 @@ def read_geotiff_gpu(source: str, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True, gpu: str = _GPU_DEPRECATED_SENTINEL, @@ -201,6 +203,17 @@ def read_geotiff_gpu(source: str, *, since #1929) raises ``UnparseableCRSError``; ``True`` keeps the pre-#1929 permissive behaviour. See ``open_geotiff`` for the full description. + allow_experimental_codecs : bool, default False + [experimental] Read-side opt-in for Tier 3 experimental codecs + (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). The GPU read path + mirrors the CPU eager and dask paths so all three readers agree + on the opt-in contract. See ``open_geotiff`` for the full + description (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + [internal-only] Read-side opt-in for JPEG-in-TIFF sources. Not + covered by ``allow_experimental_codecs``. See ``open_geotiff`` + for the full description (epic #2340 PR 4, original writer gate + #1845). band_nodata : {'first', None}, optional [internal-only] VRT-only. Accepted at the signature level for parity with ``open_geotiff``; passing it to ``read_geotiff_gpu`` @@ -317,6 +330,8 @@ def read_geotiff_gpu(source: str, *, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, ) @@ -366,6 +381,8 @@ def read_geotiff_gpu(source: str, *, overview_level=overview_level, band=band, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, ) @@ -401,6 +418,18 @@ def read_geotiff_gpu(source: str, *, # Skip mask IFDs (NewSubfileType bit 2) ifd = select_overview_ifd(ifds, overview_level) + # Reject experimental / internal-only codecs on the GPU read + # path unless the caller opted in. Mirrors the CPU eager and + # dask paths so all three read backends agree on the opt-in + # contract. PR 4 of epic #2340. + from .._attrs import _validate_read_codec_optin + _validate_read_codec_optin( + ifd.compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="read_geotiff_gpu", + ) + # Keep ``data`` / ``header`` bound to the base file's buffers so # the georef extractor below resolves base-IFD tag offsets # against the right bytes. Introduce ``ifd_data`` / ``ifd_header`` @@ -524,7 +553,9 @@ def read_geotiff_gpu(source: str, *, arr_cpu, _stripped_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) if orientation != 1: geo_info = _apply_orientation_geo_info( @@ -726,7 +757,9 @@ def _read_once(): arr_cpu, _cpu_fallback_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) arr_was_cpu_decoded = True else: @@ -742,7 +775,9 @@ def _read_once(): arr_cpu, _cpu_fallback_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) arr_was_cpu_decoded = True else: @@ -820,7 +855,9 @@ def _read_once(): arr_cpu, _cpu_fallback_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) arr_was_cpu_decoded = True @@ -981,6 +1018,8 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, band, name, max_pixels, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): """Eager CPU decode + GPU upload for HTTP / fsspec sources (issue #2161). @@ -1021,6 +1060,8 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, arr_cpu, geo_info = _read_to_array( source, window=window, overview_level=overview_level, band=band, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) arr_gpu = cupy.asarray(arr_cpu) @@ -1209,6 +1250,8 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, window, band, name, max_pixels, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): """Lazy Dask+CuPy backend for ``read_geotiff_gpu(chunks=...)``. @@ -1336,6 +1379,8 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, ) diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 8f68c071..80eb21f3 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -128,6 +128,8 @@ def read_vrt(source: str, *, missing_sources: str = 'raise', allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True) -> xr.DataArray: """Read a GDAL Virtual Raster Table (.vrt) into an xarray.DataArray. @@ -255,6 +257,16 @@ def read_vrt(source: str, *, #1929) raises ``UnparseableCRSError`` rather than carrying the unrecognised payload through. See ``open_geotiff`` for the full description. + allow_experimental_codecs : bool, default False + [advanced] Read-side opt-in for Tier 3 experimental codecs in + any source file referenced by the VRT. Forwarded to the + per-source reader for each ````. See + ``open_geotiff`` for the full description (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + [advanced] Read-side opt-in for JPEG-in-TIFF sources referenced + by the VRT. Forwarded to the per-source reader. See + ``open_geotiff`` for the full description (epic #2340 PR 4, + original writer gate #1845). overview_level : int or None [internal-only] Not supported for VRT sources. The VRT XML references its own source files, so overview selection would @@ -426,6 +438,8 @@ def read_vrt(source: str, *, missing_sources=missing_sources, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, mask_nodata=mask_nodata, ) @@ -498,6 +512,8 @@ def read_vrt(source: str, *, source, window=window, band=band, max_pixels=max_pixels, missing_sources=missing_sources, parsed=_parsed_vrt, mask_nodata=mask_nodata, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) if name is None: @@ -707,7 +723,9 @@ def read_vrt(source: str, *, def _vrt_chunk_read(source, r0, c0, r1, c1, *, band, max_pixels, missing_sources, declared_dtype, gpu, parsed_vrt, - mask_nodata: bool = True): + mask_nodata: bool = True, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False): """Decode a single chunk window from a VRT. Called by ``dask.delayed`` from :func:`_read_vrt_chunked`. The @@ -739,6 +757,8 @@ def _vrt_chunk_read(source, r0, c0, r1, c1, *, source, window=(r0, c0, r1, c1), band=band, max_pixels=max_pixels, missing_sources=missing_sources, parsed=parsed_vrt, mask_nodata=mask_nodata, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) # Mirror the eager post-decode integer-sentinel masking via the @@ -767,6 +787,8 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, max_pixels, missing_sources, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True): """Lazy ``read_vrt`` dispatch when ``chunks=`` is set (issue #1814). @@ -1010,6 +1032,8 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, gpu=gpu, parsed_vrt=parsed_vrt_key, mask_nodata=mask_nodata, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) block = da.from_delayed(d, shape=block_shape, dtype=declared_dtype, meta=meta) diff --git a/xrspatial/geotiff/_cog_http.py b/xrspatial/geotiff/_cog_http.py index 6cd7cfb3..801da956 100644 --- a/xrspatial/geotiff/_cog_http.py +++ b/xrspatial/geotiff/_cog_http.py @@ -296,6 +296,8 @@ def _read_cog_http(url: str, overview_level: int | None = None, window: tuple[int, int, int, int] | None = None, *, allow_rotated: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, GeoInfo]: """Read a COG via HTTP range requests. @@ -365,6 +367,22 @@ def _read_cog_http(url: str, overview_level: int | None = None, source.close() source = _reader._HTTPSource(route_path) + # Reject experimental and internal-only codecs on the HTTP read + # path unless the caller opted in. Mirrors the gate in + # ``_read_to_array`` so HTTP and local reads agree on the + # opt-in contract. See PR 4 of epic #2340. + from ._attrs import _validate_read_codec_optin + try: + _validate_read_codec_optin( + ifd.compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="open_geotiff", + ) + except ValueError: + source.close() + raise + # Mirror the local-path orientation guard in ``read_to_array``: a # windowed read against a non-default Orientation tag (274) has # ambiguous semantics (does the window refer to file pixels or to diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 1963b6d1..91e55ca1 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -110,6 +110,8 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, max_cloud_bytes=_MAX_CLOUD_BYTES_SENTINEL, allow_rotated: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, GeoInfo]: """Read a GeoTIFF/COG to a numpy array (module-private). @@ -144,9 +146,12 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, """ source = _coerce_path(source) if _is_http_source(source): - return _read_cog_http(source, overview_level=overview_level, band=band, - max_pixels=max_pixels, window=window, - allow_rotated=allow_rotated) + return _read_cog_http( + source, overview_level=overview_level, band=band, + max_pixels=max_pixels, window=window, + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) # Local file, cloud storage, or file-like buffer: read all bytes then parse # Resolve the cloud byte budget once so both the base-file ``_CloudSource`` @@ -226,6 +231,19 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, # Select IFD, skipping any mask IFDs ifd = select_overview_ifd(ifds, overview_level) + # Reject experimental and internal-only codecs on the read side + # unless the caller opted in. Mirrors the writer-side gate so the + # two surfaces stay consistent. Fires before any tile/strip work + # so the caller learns the missing flag from the rejection, not + # from a deeper decode-time failure. See PR 4 of epic #2340. + from ._attrs import _validate_read_codec_optin + _validate_read_codec_optin( + ifd.compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="open_geotiff", + ) + # If the selected IFD came from the sidecar, swap the data / # header used for strip / tile reads below so byte offsets # resolve against the right buffer. diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 7ff5b974..67f531fd 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -1105,6 +1105,8 @@ def read_vrt(vrt_path: str, *, window=None, missing_sources: str = 'raise', parsed: VRTDataset | None = None, mask_nodata: bool = True, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, VRTDataset]: """Read a VRT file by assembling pixel data from its source files. @@ -1443,6 +1445,8 @@ def read_vrt(vrt_path: str, *, window=None, window=(read_r0, read_c0, read_r1, read_c1), band=src.band - 1, # convert 1-based to 0-based max_pixels=max_pixels, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) except ( OSError, ValueError, struct.error, diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 352ad1b0..d843ae91 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -608,6 +608,22 @@ def to_geotiff(data: xr.DataArray | np.ndarray, stacklevel=2, ) + # Reject ``gdal_metadata_xml`` / ``extra_tags`` pass-through writes + # unless the caller opted in via ``allow_experimental_codecs=True``. + # Both surfaces ride the Experimental tier in ``SUPPORTED_FEATURES`` + # because the on-disk bytes are written verbatim and downstream + # interop with rasterio / libtiff / GDAL depends on the payload. + # PR 4 of epic #2340. + _data_attrs_for_optin = ( + data.attrs if isinstance(data, xr.DataArray) else {} + ) + from .._attrs import _validate_write_rich_tag_optin + _validate_write_rich_tag_optin( + _data_attrs_for_optin, + allow_experimental_codecs=allow_experimental_codecs, + entry_point="to_geotiff", + ) + # Issue #2312: ``cog=True`` requires a tiled internal layout per the # COG spec. The writer used to accept ``cog=True, tiled=False``, warn # that ``tile_size`` was ignored, and then write strips via diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 5dbe8d5d..54756fe2 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -389,6 +389,18 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, entry_point="write_geotiff_gpu", ) + # Reject ``gdal_metadata_xml`` / ``extra_tags`` pass-through writes + # unless the caller opted in via ``allow_experimental_codecs=True``. + # Mirrors ``to_geotiff`` so the two writers expose the same surface. + # PR 4 of epic #2340. + from .._attrs import _validate_write_rich_tag_optin + _attrs_for_optin = getattr(data, 'attrs', None) or {} + _validate_write_rich_tag_optin( + _attrs_for_optin, + allow_experimental_codecs=allow_experimental_codecs, + entry_point="write_geotiff_gpu", + ) + # Issue #1987 ambiguous-metadata checks; mirrors ``to_geotiff`` so the # GPU writer enforces the same crs/crs_wkt consistency rule. Alias # resolution (issue #2215) keeps the validator consistent across diff --git a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py index cfc01b28..59f314f6 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py @@ -172,7 +172,13 @@ def canonical_roundtrip(tmp_path): """ da, expected_transform = _make_canonical_da() path = str(tmp_path / 'attrs_contract_canonical.tif') - to_geotiff(da, path) + # The canonical fixture carries ``extra_tags``; the PR 4 rich-tag + # gate (#2340) exempts attrs carrying ``_xrspatial_geotiff_contract`` + # (set by the readers), but this fixture builds a fresh DataArray + # without the marker, so the initial write must opt in. The + # round-trip's subsequent open->write would NOT need the flag -- + # this fixture exercises the initial write only. + to_geotiff(da, path, allow_experimental_codecs=True) rd = open_geotiff(path) return rd, expected_transform @@ -355,7 +361,9 @@ def test_canonical_keys_present_per_backend(tmp_path, opener): """ da, _ = _make_canonical_da() path = str(tmp_path / f'canonical_{opener.__name__}.tif') - to_geotiff(da, path) + # Fresh DataArray with ``extra_tags`` -> rich-tag opt-in required + # (PR 4 of epic #2340; see ``canonical_roundtrip`` for details). + to_geotiff(da, path, allow_experimental_codecs=True) rd = opener(path) missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) @@ -380,7 +388,7 @@ def test_raster_type_area_omitted_on_roundtrip(tmp_path): da, _ = _make_canonical_da() assert 'raster_type' not in da.attrs path = str(tmp_path / 'raster_type_area.tif') - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) rd = open_geotiff(path) assert 'raster_type' not in rd.attrs, ( f"area is the implicit default but the reader emitted " @@ -394,6 +402,6 @@ def test_raster_type_point_roundtrip(tmp_path): da, _ = _make_canonical_da() da.attrs['raster_type'] = 'point' path = str(tmp_path / 'raster_type_point.tif') - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) rd = open_geotiff(path) assert rd.attrs.get('raster_type') == 'point' diff --git a/xrspatial/geotiff/tests/test_backend_full_parity_2211.py b/xrspatial/geotiff/tests/test_backend_full_parity_2211.py index d1e525d5..cd42e91e 100644 --- a/xrspatial/geotiff/tests/test_backend_full_parity_2211.py +++ b/xrspatial/geotiff/tests/test_backend_full_parity_2211.py @@ -240,21 +240,34 @@ class _Backend: skips: dict[str, str] = field(default_factory=dict) +# PR 4 of epic #2340: experimental and internal-only codecs require an +# explicit opt-in on the read side. The full parity matrix tests every +# fixture including ``compression_lerc_float32``; the gate's purpose is +# orthogonal to the parity check, so pass both flags through every +# opener. The matrix continues to test what it was meant to test. +_OPTIN = { + "allow_experimental_codecs": True, + "allow_internal_only_jpeg": True, +} + + def _read_eager_numpy(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: - return open_geotiff(str(path)) + return open_geotiff(str(path), **_OPTIN) def _read_dask_numpy(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: - return open_geotiff(str(path), chunks=_CHUNK_SIZE) + return open_geotiff(str(path), chunks=_CHUNK_SIZE, **_OPTIN) def _read_gpu(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: - return open_geotiff(str(path), gpu=True, on_gpu_failure="strict") + return open_geotiff( + str(path), gpu=True, on_gpu_failure="strict", **_OPTIN) def _read_dask_gpu(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: return open_geotiff( str(path), gpu=True, chunks=_CHUNK_SIZE, on_gpu_failure="strict", + **_OPTIN, ) @@ -282,7 +295,7 @@ def _read_vrt_eager(path: pathlib.Path, fixture_id: str) -> xr.DataArray: vrt_path = cache_dir / f"{fixture_id}.vrt" if not vrt_path.exists(): write_vrt(str(vrt_path), [str(local_src)]) - return open_geotiff(str(vrt_path)) + return open_geotiff(str(vrt_path), **_OPTIN) def _read_http_fsspec(path: pathlib.Path, fixture_id: str) -> xr.DataArray: @@ -307,7 +320,7 @@ def _read_http_fsspec(path: pathlib.Path, fixture_id: str) -> xr.DataArray: with open(path, "rb") as f: fs.pipe(key, f.read()) try: - da = open_geotiff(f"memory://{key}") + da = open_geotiff(f"memory://{key}", **_OPTIN) finally: # Best-effort cleanup; fsspec memory store deletions are # idempotent. The cloud-eager path has already pulled the @@ -718,7 +731,7 @@ def _reference_for( ) -> xr.DataArray: fid = entry["id"] if fid not in cache: - cache[fid] = open_geotiff(str(_fixture_path(entry))) + cache[fid] = open_geotiff(str(_fixture_path(entry)), **_OPTIN) return cache[fid] diff --git a/xrspatial/geotiff/tests/test_decompression_caps.py b/xrspatial/geotiff/tests/test_decompression_caps.py index fc4e2c52..5954c8e4 100644 --- a/xrspatial/geotiff/tests/test_decompression_caps.py +++ b/xrspatial/geotiff/tests/test_decompression_caps.py @@ -278,8 +278,11 @@ def test_lz4_bomb_rejected(tmp_path): width=_DECLARED_W, height=_DECLARED_H) path = tmp_path / "lz4_bomb.tif" path.write_bytes(tiff) + # LZ4 is the Experimental read tier (PR 4 of epic #2340); pass the + # opt-in so the test exercises the bomb cap rather than the codec + # gate. with pytest.raises(ValueError, match="exceed"): - read_to_array(str(path)) + read_to_array(str(path), allow_experimental_codecs=True) def test_packbits_bomb_rejected(tmp_path): diff --git a/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py b/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py index 45ab772a..e8c4259e 100644 --- a/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py +++ b/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py @@ -112,7 +112,7 @@ def test_eager_writer_round_trip_with_large_gdal_metadata(tmp_path): ) da = _make_4x4_float32(gdal_metadata_xml=metadata_xml) path = str(tmp_path / "large_metadata_1905.tif") - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) rt = open_geotiff(path) np.testing.assert_array_equal(rt.values, da.values) @@ -153,7 +153,7 @@ def _huge_overhead(tags): writer_mod, "_compute_classic_ifd_overhead", _huge_overhead, ) - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) with open(path, "rb") as f: head = f.read(8) assert head[:2] == b"II" @@ -165,7 +165,7 @@ def test_eager_writer_keeps_classic_when_overhead_fits(tmp_path): """Sanity check: the default 4x4 file fits classic comfortably.""" da = _make_4x4_float32() path = str(tmp_path / "classic_1905.tif") - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) with open(path, "rb") as f: head = f.read(8) magic = struct.unpack_from("" da = _make_4x4_float32(gdal_metadata_xml=metadata_xml) path = str(tmp_path / "match_actual_1905.tif") - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) # Parse the file to find IFD offset, then measure the bytes between # IFD start and the first pixel-data offset to confirm the writer diff --git a/xrspatial/geotiff/tests/test_experimental_internal_optin_2352.py b/xrspatial/geotiff/tests/test_experimental_internal_optin_2352.py new file mode 100644 index 00000000..84375cca --- /dev/null +++ b/xrspatial/geotiff/tests/test_experimental_internal_optin_2352.py @@ -0,0 +1,479 @@ +"""Opt-in gates for experimental and internal-only GeoTIFF paths (#2352). + +Background +---------- +Issue #2340 tiers the GeoTIFF release contract into Stable / Advanced / +Experimental / Internal-only. PR 1 of the epic (#2348) lined up the +``SUPPORTED_FEATURES`` constant with that tier shape. PR 4 (this issue, +#2352) extends the writer-side opt-in shape onto every Experimental / +Internal-only path that did not yet have one. + +What this file pins +------------------- +* Read-side codec gate (LERC / JPEG2000 / J2K / LZ4 / JPEG-in-TIFF): + ``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` + reject a source whose Compression tag selects an experimental or + internal-only codec unless the caller passes the matching flag + (``allow_experimental_codecs=True`` or ``allow_internal_only_jpeg= + True``). The writer already enforces these flags; the read side + matches the same shape. +* Writer rich-tag gate: ``to_geotiff`` / ``write_geotiff_gpu`` reject + a DataArray whose attrs carry ``gdal_metadata_xml`` or ``extra_tags`` + unless the caller passes ``allow_experimental_codecs=True``. Both + attrs ride the Experimental tier in ``SUPPORTED_FEATURES`` because + the bytes are written verbatim and downstream interop depends on the + payload. +* Each rejection message names the missing flag, the feature, and the + tier so the call site can be fixed in one line. +* Signature checks pin the new kwargs on the public entry points. +""" +from __future__ import annotations + +import inspect +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import (open_geotiff, read_geotiff_dask, read_geotiff_gpu, + to_geotiff, write_geotiff_gpu) +from xrspatial.geotiff._attrs import (_COMPRESSION_TAG_TO_NAME, _validate_read_codec_optin, + _validate_write_rich_tag_optin) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_float32_da(h: int = 32, w: int = 32) -> xr.DataArray: + """Small float32 raster used for the write-side gate.""" + rng = np.random.RandomState(0) + arr = rng.standard_normal((h, w)).astype(np.float32) + return xr.DataArray( + arr, + dims=("y", "x"), + coords={ + "y": np.arange(h, dtype=np.float64), + "x": np.arange(w, dtype=np.float64), + }, + attrs={'crs': 4326}, + ) + + +def _write_test_tif(tmp_path, compression: str, + *, allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + dtype=np.float32): + """Write a small file with the requested codec so the read side has + a real target. Returns the file path. Skips when the optional + encoder dependency is missing.""" + h = w = 32 + rng = np.random.RandomState(0) + if dtype == np.uint8: + arr = rng.randint(0, 256, size=(h, w), dtype=np.uint8) + else: + arr = rng.standard_normal((h, w)).astype(dtype) + da = xr.DataArray( + arr, + dims=("y", "x"), + coords={ + "y": np.arange(h, dtype=np.float64), + "x": np.arange(w, dtype=np.float64), + }, + attrs={'crs': 4326}, + ) + path = os.path.join(str(tmp_path), f'src_{compression}.tif') + try: + to_geotiff( + da, path, compression=compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + ) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"optional encoder missing for {compression}: {e}") + return path + + +# --------------------------------------------------------------------------- +# Signature tests: every public read entry point exposes the new flags. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "fn", [open_geotiff, read_geotiff_dask, read_geotiff_gpu]) +def test_read_signature_has_codec_optin(fn): + """``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` + expose ``allow_experimental_codecs=False`` and + ``allow_internal_only_jpeg=False``. The default is ``False`` so + accidental removal of the gate would surface here. + """ + params = inspect.signature(fn).parameters + assert 'allow_experimental_codecs' in params, fn.__name__ + assert params['allow_experimental_codecs'].default is False + assert 'allow_internal_only_jpeg' in params, fn.__name__ + assert params['allow_internal_only_jpeg'].default is False + + +# --------------------------------------------------------------------------- +# Helper unit tests: the validators raise on the codec / attrs surface +# without an opt-in and accept the call with one. These do not require +# disk IO. +# --------------------------------------------------------------------------- + + +def test_validate_read_codec_optin_accepts_stable_codecs(): + """A stable codec (deflate / none / lzw / zstd / packbits) does not + require any opt-in regardless of the flag values. + """ + for tag in (1, 5, 8, 32773, 50000): # none, lzw, deflate, packbits, zstd + _validate_read_codec_optin( + tag, + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + + +@pytest.mark.parametrize("codec_name", ['lerc', 'jpeg2000', 'lz4']) +def test_validate_read_codec_optin_rejects_experimental(codec_name): + """LERC / JPEG2000 / LZ4 raise ``ValueError`` whose message names + ``allow_experimental_codecs`` so the caller can find the flag from + the error itself. + """ + tag = { + v: k for k, v in _COMPRESSION_TAG_TO_NAME.items() + }[codec_name] + with pytest.raises(ValueError, match='allow_experimental_codecs'): + _validate_read_codec_optin( + tag, + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + + +def test_validate_read_codec_optin_rejects_jpeg(): + """JPEG-in-TIFF raises ``ValueError`` whose message names + ``allow_internal_only_jpeg`` -- the dedicated flag, NOT + ``allow_experimental_codecs``. The two flags do not collapse. + """ + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + _validate_read_codec_optin( + 7, # COMPRESSION_JPEG + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + # ``allow_experimental_codecs=True`` does NOT cover JPEG. + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + _validate_read_codec_optin( + 7, + allow_experimental_codecs=True, + allow_internal_only_jpeg=False, + ) + + +def test_validate_read_codec_optin_accepts_jpeg_with_flag(): + """With ``allow_internal_only_jpeg=True`` the read-side gate lets + JPEG-in-TIFF through. + """ + _validate_read_codec_optin( + 7, + allow_experimental_codecs=False, + allow_internal_only_jpeg=True, + ) + + +@pytest.mark.parametrize("codec_name", ['lerc', 'jpeg2000', 'lz4']) +def test_validate_read_codec_optin_accepts_experimental_with_flag(codec_name): + """With ``allow_experimental_codecs=True`` the read-side gate lets + LERC / JPEG2000 / LZ4 through. + """ + tag = { + v: k for k, v in _COMPRESSION_TAG_TO_NAME.items() + }[codec_name] + _validate_read_codec_optin( + tag, + allow_experimental_codecs=True, + allow_internal_only_jpeg=False, + ) + + +def test_validate_read_codec_optin_message_names_feature_and_tier(): + """The rejection message names the codec, the missing flag, the + SUPPORTED_FEATURES tier, and the parent epic so a reader can fix + the call site without grepping the source. + """ + with pytest.raises(ValueError) as exc: + _validate_read_codec_optin( + 34887, # LERC + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + msg = str(exc.value) + assert 'lerc' in msg + assert 'allow_experimental_codecs' in msg + assert 'experimental' in msg + assert '#2340' in msg + + +def test_validate_write_rich_tag_optin_accepts_empty_attrs(): + """No rich-tag attrs and no opt-in: the writer gate is a no-op.""" + _validate_write_rich_tag_optin( + {}, allow_experimental_codecs=False) + + +def test_validate_write_rich_tag_optin_rejects_gdal_metadata_xml(): + """``attrs['gdal_metadata_xml']`` triggers the gate; rejection + message names the attr and the opt-in flag. + """ + with pytest.raises(ValueError, match='gdal_metadata_xml'): + _validate_write_rich_tag_optin( + {'gdal_metadata_xml': ''}, + allow_experimental_codecs=False, + ) + + +def test_validate_write_rich_tag_optin_rejects_extra_tags(): + """``attrs['extra_tags']`` triggers the gate; rejection message + names the attr and the opt-in flag. + """ + with pytest.raises(ValueError, match='extra_tags'): + _validate_write_rich_tag_optin( + {'extra_tags': [(700, 1, 0, b'')]}, + allow_experimental_codecs=False, + ) + + +def test_validate_write_rich_tag_optin_accepts_with_flag(): + """``allow_experimental_codecs=True`` accepts both rich-tag attrs.""" + _validate_write_rich_tag_optin( + {'gdal_metadata_xml': '', + 'extra_tags': [(700, 1, 0, b'')]}, + allow_experimental_codecs=True, + ) + + +def test_validate_write_rich_tag_optin_exempts_round_trip(): + """An attrs dict carrying the ``_xrspatial_geotiff_contract`` marker + came from an xrspatial read; round-tripping it back through + ``to_geotiff`` is the canonical contract from #1984 and must not + require a new flag. The marker is the gate's exemption signal. + """ + _validate_write_rich_tag_optin( + {'gdal_metadata_xml': '', + 'extra_tags': [(700, 1, 0, b'')], + '_xrspatial_geotiff_contract': 2}, + allow_experimental_codecs=False, + ) + + +# --------------------------------------------------------------------------- +# Read end-to-end: write an experimental-codec file via the existing +# writer opt-in, then assert open_geotiff refuses to read it without the +# matching opt-in and succeeds with it. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("codec", ['lerc', 'lz4']) +def test_open_geotiff_rejects_experimental_codec(tmp_path, codec): + """A file written with LERC or LZ4 raises ``ValueError`` on read + by default; the message names ``allow_experimental_codecs``. + """ + path = _write_test_tif( + tmp_path, codec, allow_experimental_codecs=True) + with pytest.raises(ValueError, match='allow_experimental_codecs'): + open_geotiff(path) + + +@pytest.mark.parametrize("codec", ['lerc', 'lz4']) +def test_open_geotiff_accepts_experimental_codec_with_flag(tmp_path, codec): + """``allow_experimental_codecs=True`` lets the read through and + returns a DataArray with the expected shape. + """ + path = _write_test_tif( + tmp_path, codec, allow_experimental_codecs=True) + try: + da = open_geotiff(path, allow_experimental_codecs=True) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"optional decoder missing for {codec}: {e}") + assert da.shape == (32, 32) + + +def test_open_geotiff_rejects_jpeg2000(tmp_path): + """JPEG2000 is experimental and requires the same opt-in as LERC / + LZ4. ``j2k`` is an alias the writer maps to the same codec, so + only one source file is needed. + """ + path = _write_test_tif( + tmp_path, 'jpeg2000', allow_experimental_codecs=True, + dtype=np.uint8) + with pytest.raises(ValueError, match='allow_experimental_codecs'): + open_geotiff(path) + + +def test_open_geotiff_rejects_jpeg_internal_only(tmp_path): + """JPEG-in-TIFF is internal-only; the dedicated flag + ``allow_internal_only_jpeg`` is the gate. Mirrors the writer side + where ``allow_experimental_codecs`` does NOT cover JPEG. + """ + path = _write_test_tif( + tmp_path, 'jpeg', allow_internal_only_jpeg=True, + dtype=np.uint8) + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + open_geotiff(path) + # ``allow_experimental_codecs=True`` does NOT unlock JPEG-in-TIFF + # on the read side either. + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + open_geotiff(path, allow_experimental_codecs=True) + + +def test_open_geotiff_accepts_jpeg_internal_only_with_flag(tmp_path): + """``allow_internal_only_jpeg=True`` lets the read through.""" + path = _write_test_tif( + tmp_path, 'jpeg', allow_internal_only_jpeg=True, + dtype=np.uint8) + da = open_geotiff(path, allow_internal_only_jpeg=True) + assert da.shape == (32, 32) + + +def test_read_geotiff_dask_rejects_experimental_codec(tmp_path): + """The dask read path fires the gate at graph build, before any + chunk task is scheduled. + """ + path = _write_test_tif( + tmp_path, 'lz4', allow_experimental_codecs=True) + with pytest.raises(ValueError, match='allow_experimental_codecs'): + read_geotiff_dask(path, chunks=16) + + +def test_read_geotiff_dask_accepts_experimental_codec_with_flag(tmp_path): + """``allow_experimental_codecs=True`` lets the dask graph build.""" + path = _write_test_tif( + tmp_path, 'lz4', allow_experimental_codecs=True) + try: + da = read_geotiff_dask( + path, chunks=16, allow_experimental_codecs=True) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"optional decoder missing: {e}") + assert da.shape == (32, 32) + + +# --------------------------------------------------------------------------- +# Writer rich-tag attrs: gdal_metadata_xml / extra_tags require the +# experimental opt-in. +# --------------------------------------------------------------------------- + + +def test_to_geotiff_rejects_gdal_metadata_xml_without_flag(tmp_path): + """A DataArray whose attrs carry ``gdal_metadata_xml`` is rejected + by ``to_geotiff`` unless the caller passes + ``allow_experimental_codecs=True``. The message names the attr. + """ + da = _make_float32_da() + da.attrs['gdal_metadata_xml'] = ( + '0' + '' + ) + path = os.path.join(str(tmp_path), 'rich_xml.tif') + with pytest.raises(ValueError, match='gdal_metadata_xml'): + to_geotiff(da, path) + + +def test_to_geotiff_rejects_extra_tags_without_flag(tmp_path): + """Same shape as the ``gdal_metadata_xml`` case but for + ``attrs['extra_tags']``. Both surfaces feed the same on-disk path + and ride the same Experimental tier. + """ + da = _make_float32_da() + da.attrs['extra_tags'] = [(700, 1, 0, b'')] + path = os.path.join(str(tmp_path), 'rich_extra.tif') + with pytest.raises(ValueError, match='extra_tags'): + to_geotiff(da, path) + + +def test_to_geotiff_accepts_rich_tags_with_flag(tmp_path): + """``allow_experimental_codecs=True`` lets both attrs through and + the write completes. + """ + da = _make_float32_da() + da.attrs['gdal_metadata_xml'] = ( + '0' + '' + ) + da.attrs['extra_tags'] = [(700, 1, 0, b'')] + path = os.path.join(str(tmp_path), 'rich_optin.tif') + out = to_geotiff(da, path, allow_experimental_codecs=True) + assert out == path + assert os.path.exists(path) + + +def test_write_geotiff_gpu_rejects_rich_tags_without_flag(tmp_path): + """The GPU writer mirrors ``to_geotiff`` so the two writers expose + a consistent surface; the rejection fires before any GPU work and + does not depend on cupy being installed. + """ + da = _make_float32_da() + da.attrs['gdal_metadata_xml'] = ( + '0' + '' + ) + path = os.path.join(str(tmp_path), 'rich_gpu.tif') + with pytest.raises(ValueError, match='gdal_metadata_xml'): + write_geotiff_gpu(da, path) + + +# --------------------------------------------------------------------------- +# Already-gated paths: pin the existing behaviour so a future refactor +# that drops a flag fails this file rather than passing in CI. +# --------------------------------------------------------------------------- + + +def test_allow_rotated_default_raises_already_gated(tmp_path): + """``allow_rotated=False`` (the default) raises on a rotated read. + Pinned here so the Experimental + Internal-only opt-in inventory + in PR 4 lives next to the existing ``allow_rotated`` / + ``allow_unparseable_crs`` gates and a future refactor cannot drop + one of them without failing this file. + + The PR 1 audit (#2348) demoted ``reader.allow_rotated`` from + advanced to experimental, so the gate already matches the epic. + """ + # A signature pin is enough -- the actual rotated-read behaviour is + # covered by the existing test_allow_rotated_geotiff_2115.py suite. + params = inspect.signature(open_geotiff).parameters + assert 'allow_rotated' in params + assert params['allow_rotated'].default is False + + +def test_allow_unparseable_crs_default_raises_already_gated(): + """``allow_unparseable_crs=False`` (the default) raises on an + unparseable CRS string. The PR 1 audit (#2348) demoted + ``reader.allow_unparseable_crs`` to experimental, so the gate + already matches the epic. Pin the signature here next to the new + PR 4 opt-ins so the inventory lives in one file. + """ + params = inspect.signature(open_geotiff).parameters + assert 'allow_unparseable_crs' in params + assert params['allow_unparseable_crs'].default is False + + +def test_gpu_read_requires_explicit_optin(): + """GPU read is Experimental in ``SUPPORTED_FEATURES`` and the + opt-in is the boolean ``gpu=True`` kwarg. Pin the default here so + a future refactor cannot flip GPU read to auto-on. + """ + params = inspect.signature(open_geotiff).parameters + assert 'gpu' in params + assert params['gpu'].default is False + + +def test_gpu_write_requires_explicit_optin(): + """GPU write is Experimental and gates on ``gpu=True`` / + ``gpu=None`` (auto-detect from CuPy data). Pin the default here: + ``None`` is the documented auto-detect sentinel and ``False`` / + ``True`` are the explicit selectors. A flip to ``True`` default + would silently route every NumPy write through the GPU pipeline. + """ + params = inspect.signature(to_geotiff).parameters + assert 'gpu' in params + assert params['gpu'].default is None diff --git a/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py b/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py index fc9038c2..dfd5d764 100644 --- a/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py +++ b/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py @@ -214,7 +214,10 @@ def test_writer_filters_caller_supplied_newsubfiletype(tmp_path): }, ) out = tmp_path / 'with_dangerous_extra_tag.tif' - to_geotiff(da, str(out)) + # Rich-tag extra_tags is the Experimental write surface (PR 4 of + # epic #2340); pass the opt-in so the filter logic this test + # exercises gets to run. + to_geotiff(da, str(out), allow_experimental_codecs=True) sft = _read_subfile_type(out) assert sft in (None, 0), ( f"Writer accepted dangerous extra_tags[254]={sft}, expected None/0." @@ -237,7 +240,10 @@ def test_writer_filters_caller_supplied_subifds(tmp_path): }, ) out = tmp_path / 'with_subifds.tif' - to_geotiff(da, str(out)) + # Rich-tag extra_tags is the Experimental write surface (PR 4 of + # epic #2340); pass the opt-in so the filter logic this test + # exercises gets to run. + to_geotiff(da, str(out), allow_experimental_codecs=True) with tifffile.TiffFile(str(out)) as tf: sub = tf.pages[0].tags.get('SubIFDs') assert sub is None, ( @@ -264,7 +270,10 @@ def test_writer_keeps_benign_extra_tags(tmp_path): }, ) out = tmp_path / 'mixed_extra_tags.tif' - to_geotiff(da, str(out)) + # Rich-tag extra_tags is the Experimental write surface (PR 4 of + # epic #2340); pass the opt-in so the filter logic this test + # exercises gets to run. + to_geotiff(da, str(out), allow_experimental_codecs=True) with tifffile.TiffFile(str(out)) as tf: page = tf.pages[0] assert page.tags.get('NewSubfileType') is None diff --git a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py index 9598b1a9..c8f84dc6 100644 --- a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py +++ b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py @@ -152,7 +152,10 @@ def test_round_trip_property(tmp_path_factory, inputs): allow_experimental_codecs=True, ) - got = open_geotiff(path, dtype=str(da.dtype)) + # Mirror the write-side opt-in (PR 4 of epic #2340) so the read + # accepts the experimental codec. + got = open_geotiff(path, dtype=str(da.dtype), + allow_experimental_codecs=True) # Reader may add a leading band axis; squeeze for the 2D comparison. got_arr = got.values diff --git a/xrspatial/geotiff/tests/test_gds_chunked_gpu_parity_1896.py b/xrspatial/geotiff/tests/test_gds_chunked_gpu_parity_1896.py index 249564e6..68073701 100644 --- a/xrspatial/geotiff/tests/test_gds_chunked_gpu_parity_1896.py +++ b/xrspatial/geotiff/tests/test_gds_chunked_gpu_parity_1896.py @@ -232,7 +232,10 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=float("nan")) - eager = read_geotiff_gpu(path, on_gpu_failure='strict').data.get() + eager = read_geotiff_gpu( + path, on_gpu_failure='strict', + allow_experimental_codecs=True, + ).data.get() ifd, geo_info, header = _parse_for_gds(path) chunked_da = _read_geotiff_gpu_chunked_gds( @@ -275,7 +278,10 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=-9999.0) - eager = read_geotiff_gpu(path, on_gpu_failure='strict').data.get() + eager = read_geotiff_gpu( + path, on_gpu_failure='strict', + allow_experimental_codecs=True, + ).data.get() ifd, geo_info, header = _parse_for_gds(path) chunked_da = _read_geotiff_gpu_chunked_gds( diff --git a/xrspatial/geotiff/tests/test_golden_corpus_dask_gpu_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_dask_gpu_1930.py index 175bf30d..fc16b055 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_dask_gpu_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_dask_gpu_1930.py @@ -48,6 +48,12 @@ from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 +# Golden-corpus fixtures span every codec/tier, including the +# experimental and internal-only ones gated by epic #2340 PR 4. Opting +# in here lets the parity check exercise the full corpus; the per-codec +# release-contract tests pin the rejection shape separately. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} + FIXTURES_DIR = ( pathlib.Path(generate.__file__).resolve().parent / "fixtures" ) @@ -156,7 +162,8 @@ def test_dask_gpu_parity(manifest_entry: dict) -> None: f"to materialise the full corpus" ) candidate = open_geotiff( - str(path), gpu=True, chunks=CHUNK_SIZE, on_gpu_failure="strict" + str(path), gpu=True, chunks=CHUNK_SIZE, on_gpu_failure="strict", + **_OPTIN, ) # Wire the oracle's overview-IFD check when the fixture carries # overviews. The factory inherits the dask+GPU read options so each @@ -223,6 +230,7 @@ def test_dask_gpu_candidate_is_chunked_and_on_device() -> None: gpu=True, chunks=CHUNK_SIZE, on_gpu_failure="strict", + **_OPTIN, ) assert hasattr(da.data, "dask"), ( f"expected a dask-backed DataArray for {entry['id']!r}, " diff --git a/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py index c02a1bc6..4d86a538 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py @@ -40,7 +40,13 @@ pytest.importorskip("rasterio") pytest.importorskip("dask") -from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff import open_geotiff + +# PR 4 of epic #2340: the golden corpus has experimental-codec +# and JPEG-in-TIFF entries; the parity check is orthogonal to the +# read-side opt-in so pass both flags through every open. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} + # noqa: E402 from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -148,7 +154,7 @@ def test_dask_numpy_parity(manifest_entry: dict) -> None: f"`python -m xrspatial.geotiff.tests.golden_corpus.generate` " f"to materialise the full corpus" ) - candidate = open_geotiff(str(path), chunks=CHUNK_SIZE) + candidate = open_geotiff(str(path), chunks=CHUNK_SIZE, **_OPTIN) # Wire the oracle's overview-IFD check when the fixture carries # overviews. The factory threads the dask backend's options through # so each overview level reads via the same windowed-decode path @@ -211,7 +217,7 @@ def test_dask_candidate_is_actually_chunked() -> None: f"no eligible fixture is at least {2 * CHUNK_SIZE}x{2 * CHUNK_SIZE}" ) entry = eligible[0] - da = open_geotiff(str(_fixture_path(entry)), chunks=CHUNK_SIZE) + da = open_geotiff(str(_fixture_path(entry)), chunks=CHUNK_SIZE, **_OPTIN) assert hasattr(da.data, "dask"), ( f"expected a dask-backed DataArray for {entry['id']!r}, " f"got data of type {type(da.data).__name__}" diff --git a/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py index c4b7986b..46495ce1 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py @@ -68,7 +68,13 @@ pytest.importorskip("yaml") pytest.importorskip("rasterio") -from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff import open_geotiff + +# PR 4 of epic #2340: the golden corpus has experimental-codec +# and JPEG-in-TIFF entries; the parity check is orthogonal to the +# read-side opt-in so pass both flags through every open. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} + # noqa: E402 from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._marks import fast_slow_marks_for # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -156,7 +162,7 @@ def test_eager_numpy_parity(manifest_entry: dict) -> None: f"`python -m xrspatial.geotiff.tests.golden_corpus.generate` " f"to materialise the full corpus" ) - candidate = open_geotiff(str(path)) + candidate = open_geotiff(str(path), **_OPTIN) # When the fixture carries pyramid overviews, hand the oracle a # factory it can use to fetch each overview level via the same # backend (eager numpy). The oracle introspects the rasterio source @@ -168,7 +174,7 @@ def test_eager_numpy_parity(manifest_entry: dict) -> None: # base-IFD comparison still runs. overviews = manifest_entry.get("overviews") or [] factory = ( - (lambda lvl, p=path: open_geotiff(str(p), overview_level=lvl)) + (lambda lvl, p=path: open_geotiff(str(p), overview_level=lvl, **_OPTIN)) if overviews and fixture_id not in _OVERVIEW_READER_GAPS else None ) diff --git a/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py index f98db2ad..dcef967b 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py @@ -59,7 +59,11 @@ import numpy as np # noqa: E402 -from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff import open_geotiff + +# PR 4 of epic #2340: corpus has experimental + jpeg entries. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} + # noqa: E402 from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._marks import fast_slow_marks_for # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -200,14 +204,14 @@ def test_fsspec_parity(manifest_entry: dict, memory_fs_clean) -> None: payload = f.read() url = _serve_via_memory(payload, fixture_id) - candidate = open_geotiff(url) + candidate = open_geotiff(url, **_OPTIN) # When the fixture carries pyramid overviews, hand the oracle a # factory it can use to fetch each overview level via the same # cloud-eager path. The overview-IFD scan also goes through # ``_CloudSource``, so any divergence there shows up here. overviews = manifest_entry.get("overviews") or [] factory = ( - (lambda level, u=url: open_geotiff(u, overview_level=level)) + (lambda level, u=url: open_geotiff(u, overview_level=level, **_OPTIN)) if overviews and fixture_id not in _OVERVIEW_READER_GAPS else None ) @@ -265,7 +269,7 @@ def test_fsspec_candidate_is_actually_numpy(memory_fs_clean) -> None: with open(path, "rb") as f: payload = f.read() url = _serve_via_memory(payload, entry["id"]) - da = open_geotiff(url) + da = open_geotiff(url, **_OPTIN) assert isinstance(da.data, np.ndarray), ( f"expected a numpy.ndarray for the fsspec eager path on " f"{entry['id']!r}, got {type(da.data).__name__}" diff --git a/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py index f51dba38..5fd0c642 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_gpu_1930.py @@ -54,6 +54,12 @@ ) from xrspatial.geotiff import open_geotiff # noqa: E402 + +# Golden-corpus fixtures span every codec/tier, including the +# experimental and internal-only ones gated by epic #2340 PR 4. Opting +# in here lets the parity check exercise the full corpus; the per-codec +# release-contract tests pin the rejection shape separately. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -176,7 +182,7 @@ def test_gpu_parity(manifest_entry: dict) -> None: "auto" if fixture_id in _GPU_CPU_FALLBACK else "strict" ) candidate = open_geotiff( - str(path), gpu=True, on_gpu_failure=on_gpu_failure + str(path), gpu=True, on_gpu_failure=on_gpu_failure, **_OPTIN, ) # Wire the oracle's overview-IFD check when the fixture carries # overviews. The factory inherits the GPU read options the base @@ -186,6 +192,7 @@ def test_gpu_parity(manifest_entry: dict) -> None: factory = ( (lambda lvl, p=path, on_fail=on_gpu_failure: open_geotiff( str(p), gpu=True, on_gpu_failure=on_fail, overview_level=lvl, + **_OPTIN, )) if overviews and fixture_id not in _OVERVIEW_READER_GAPS else None @@ -235,7 +242,8 @@ def test_gpu_candidate_is_actually_on_device() -> None: pytest.skip("no eligible fixtures on disk") entry = plain_fixtures[0] da = open_geotiff( - str(_fixture_path(entry)), gpu=True, on_gpu_failure="strict" + str(_fixture_path(entry)), gpu=True, on_gpu_failure="strict", + **_OPTIN, ) assert isinstance(da.data, cupy.ndarray), ( f"expected a cupy.ndarray for {entry['id']!r}, " diff --git a/xrspatial/geotiff/tests/test_golden_corpus_http_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_http_1930.py index 6eb7782c..6a7b1c8d 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_http_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_http_1930.py @@ -38,6 +38,12 @@ pytest.importorskip("rasterio") from xrspatial.geotiff import open_geotiff # noqa: E402 + +# Golden-corpus fixtures span every codec/tier, including the +# experimental and internal-only ones gated by epic #2340 PR 4. Opting +# in here lets the parity check exercise the full corpus; the per-codec +# release-contract tests pin the rejection shape separately. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -164,7 +170,7 @@ def test_http_cog_parity(fixture_id: str, allow_private_http) -> None: try: host, port = httpd.server_address url = f"http://{host}:{port}/{fixture_id}.tif" - candidate = open_geotiff(url) + candidate = open_geotiff(url, **_OPTIN) # COG fixtures with internal overviews exercise the # overview-IFD code path through the HTTP range-request reader. # The factory points at the same served URL, so each overview @@ -176,7 +182,7 @@ def test_http_cog_parity(fixture_id: str, allow_private_http) -> None: ) overviews = (entry or {}).get("overviews") or [] factory = ( - (lambda lvl, u=url: open_geotiff(u, overview_level=lvl)) + (lambda lvl, u=url: open_geotiff(u, overview_level=lvl, **_OPTIN)) if overviews else None ) diff --git a/xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py index 3bdc05bf..ec665f1f 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_vrt_1930.py @@ -34,6 +34,12 @@ rasterio = pytest.importorskip("rasterio") from xrspatial.geotiff import open_geotiff, write_vrt # noqa: E402 + +# Golden-corpus fixtures span every codec/tier, including the +# experimental and internal-only ones gated by epic #2340 PR 4. Opting +# in here lets the parity check exercise the full corpus; the per-codec +# release-contract tests pin the rejection shape separately. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -128,7 +134,7 @@ def test_vrt_two_source_horizontal_mosaic( # Parity check via the shared oracle. Compares pixels, dtype, # transform, CRS, and nodata against the rasterio reference read of # the same VRT, the same as every other phase 3 backend module. - candidate = open_geotiff(str(vrt_path)) + candidate = open_geotiff(str(vrt_path), **_OPTIN) compare_to_oracle(vrt_path, candidate) diff --git a/xrspatial/geotiff/tests/test_gpu_writer_attrs_1563.py b/xrspatial/geotiff/tests/test_gpu_writer_attrs_1563.py index 94bfaac6..0173b80d 100644 --- a/xrspatial/geotiff/tests/test_gpu_writer_attrs_1563.py +++ b/xrspatial/geotiff/tests/test_gpu_writer_attrs_1563.py @@ -185,7 +185,11 @@ def test_extra_tags_custom_tag_round_trips_via_gpu_writer(tmp_path): }, ) out = str(tmp_path / 'extra_tags_1563.tif') - write_geotiff_gpu(da_gpu, out, compression='none') + # Rich-tag extra_tags is the Experimental write surface (PR 4 of + # epic #2340). Opt in on both write and read sides for the + # round-trip. + write_geotiff_gpu(da_gpu, out, compression='none', + allow_experimental_codecs=True) rd = open_geotiff(out) et = rd.attrs.get('extra_tags') or [] diff --git a/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py b/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py index 1e699b47..dec7326d 100644 --- a/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py @@ -232,7 +232,7 @@ def test_write_geotiff_gpu_jpeg_rgb_roundtrip(tmp_path): allow_internal_only_jpeg=True, ) - out = open_geotiff(path) + out = open_geotiff(path, allow_internal_only_jpeg=True) assert out.shape == arr.shape assert out.dtype == arr.dtype diff = np.abs(out.values.astype(np.int32) - arr.astype(np.int32)) @@ -260,7 +260,7 @@ def test_write_geotiff_gpu_jpeg_uint8_single_band_roundtrip(tmp_path): allow_internal_only_jpeg=True, ) - out = open_geotiff(path) + out = open_geotiff(path, allow_internal_only_jpeg=True) assert out.shape == arr.shape assert out.dtype == arr.dtype diff = np.abs(out.values.astype(np.int32) - arr.astype(np.int32)) diff --git a/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py b/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py index b479b0f7..7f6eab99 100644 --- a/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py +++ b/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py @@ -233,7 +233,7 @@ def test_write_geotiff_gpu_lz4_roundtrip(tmp_path): write_geotiff_gpu(da, path, compression='lz4', allow_experimental_codecs=True) - out = open_geotiff(path) + out = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(out.values, arr) assert out.dtype == arr.dtype @@ -272,7 +272,7 @@ def test_write_geotiff_gpu_lerc_float_lossless_roundtrip(tmp_path): write_geotiff_gpu(da, path, compression='lerc', allow_experimental_codecs=True) - out = open_geotiff(path) + out = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(out.values, arr) assert out.dtype == arr.dtype @@ -292,7 +292,7 @@ def test_write_geotiff_gpu_lerc_int_roundtrip(tmp_path): write_geotiff_gpu(da, path, compression='lerc', allow_experimental_codecs=True) - out = open_geotiff(path) + out = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(out.values, arr) assert out.dtype == arr.dtype @@ -334,7 +334,7 @@ def test_write_geotiff_gpu_jpeg2000_uint8_lossless_roundtrip(tmp_path): write_geotiff_gpu(da, path, compression='jpeg2000', allow_experimental_codecs=True) - out = open_geotiff(path) + out = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(out.values, arr) assert out.dtype == arr.dtype @@ -355,7 +355,7 @@ def test_write_geotiff_gpu_jpeg2000_rgb_roundtrip(tmp_path): write_geotiff_gpu(da, path, compression='jpeg2000', allow_experimental_codecs=True) - out = open_geotiff(path) + out = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(out.values, arr) assert out.shape == arr.shape @@ -379,8 +379,12 @@ def test_write_geotiff_gpu_j2k_alias_matches_jpeg2000(tmp_path): assert _read_compression_tag(j2k_path) == _COMPRESSION_TAGS['j2k'] assert _read_compression_tag(jpeg2k_path) == _COMPRESSION_TAGS['jpeg2000'] - np.testing.assert_array_equal(open_geotiff(j2k_path).values, arr) - np.testing.assert_array_equal(open_geotiff(jpeg2k_path).values, arr) + np.testing.assert_array_equal( + open_geotiff(j2k_path, allow_experimental_codecs=True).values, arr, + ) + np.testing.assert_array_equal( + open_geotiff(jpeg2k_path, allow_experimental_codecs=True).values, arr, + ) @_gpu_only diff --git a/xrspatial/geotiff/tests/test_jpeg.py b/xrspatial/geotiff/tests/test_jpeg.py index 67a79434..34229c6f 100644 --- a/xrspatial/geotiff/tests/test_jpeg.py +++ b/xrspatial/geotiff/tests/test_jpeg.py @@ -76,7 +76,7 @@ def test_grayscale_tiled(self, tmp_path): write(expected, path, compression='jpeg', tiled=True, tile_size=16, allow_internal_only_jpeg=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == expected.shape assert arr.dtype == np.uint8 # JPEG is lossy, check approximate @@ -89,7 +89,7 @@ def test_grayscale_stripped(self, tmp_path): write(expected, path, compression='jpeg', tiled=False, allow_internal_only_jpeg=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == expected.shape assert np.abs(arr.astype(int) - expected.astype(int)).mean() < 10 @@ -105,7 +105,7 @@ def test_rgb_tiled(self, tmp_path): write(expected, path, compression='jpeg', tiled=True, tile_size=16, allow_internal_only_jpeg=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == expected.shape assert np.abs(arr.astype(int) - expected.astype(int)).mean() < 10 @@ -262,7 +262,7 @@ def test_tiled_ycbcr_jpeg(self, tmp_path): assert ifds[0].jpeg_tables is not None assert ifds[0].jpeg_tables[:2] == b'\xff\xd8' - arr, _ = read_to_array(path) + arr, _ = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == (size, size, 3) assert arr.dtype == np.uint8 @@ -289,7 +289,7 @@ def test_tiled_grayscale_jpeg(self, tmp_path): ) as dst: dst.write(gray, 1) - arr, _ = read_to_array(path) + arr, _ = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == (size, size) with rio.open(path) as src: diff --git a/xrspatial/geotiff/tests/test_jpeg2000.py b/xrspatial/geotiff/tests/test_jpeg2000.py index bc664d8c..d27b4698 100644 --- a/xrspatial/geotiff/tests/test_jpeg2000.py +++ b/xrspatial/geotiff/tests/test_jpeg2000.py @@ -91,7 +91,7 @@ def test_tiled_uint8(self, tmp_path): path = str(tmp_path / 'j2k_1048_tiled_uint8.tif') write(expected, path, compression='jpeg2000', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_tiled_uint16(self, tmp_path): @@ -102,7 +102,7 @@ def test_tiled_uint16(self, tmp_path): path = str(tmp_path / 'j2k_1048_tiled_uint16.tif') write(expected, path, compression='jpeg2000', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_stripped_uint8(self, tmp_path): @@ -113,7 +113,7 @@ def test_stripped_uint8(self, tmp_path): path = str(tmp_path / 'j2k_1048_stripped.tif') write(expected, path, compression='jpeg2000', tiled=False) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_with_geo_info(self, tmp_path): @@ -127,7 +127,7 @@ def test_with_geo_info(self, tmp_path): write(expected, path, compression='jpeg2000', tiled=True, tile_size=8, geo_transform=gt, crs_epsg=4326, nodata=0) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) assert geo.crs_epsg == 4326 @@ -147,7 +147,7 @@ def test_public_api_roundtrip(self, tmp_path): to_geotiff(da, path, compression='jpeg2000', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_jpeg_gpu_1549.py b/xrspatial/geotiff/tests/test_jpeg_gpu_1549.py index 6e103660..1b33fe33 100644 --- a/xrspatial/geotiff/tests/test_jpeg_gpu_1549.py +++ b/xrspatial/geotiff/tests/test_jpeg_gpu_1549.py @@ -156,7 +156,7 @@ def wrapped(*args, **kwargs): path = str(tmp_path / "rgb_jpeg_1549.tif") _write_jpeg_rgb_tiff(path) - arr = read_geotiff_gpu(path, gpu='strict') + arr = read_geotiff_gpu(path, gpu='strict', allow_internal_only_jpeg=True) # Materialise the GPU buffer so any deferred kernel actually runs # and surface any sticky error from the decode pipeline. assert isinstance(arr.data, cupy.ndarray) @@ -190,8 +190,8 @@ def test_rgb_jpeg_gpu_matches_cpu(tmp_path): path = str(tmp_path / "rgb_jpeg_match_1549.tif") _write_jpeg_rgb_tiff(path, noise=False) - cpu = open_geotiff(path) - gpu = open_geotiff(path, gpu=True) + cpu = open_geotiff(path, allow_internal_only_jpeg=True) + gpu = open_geotiff(path, gpu=True, allow_internal_only_jpeg=True) assert cpu.shape == gpu.shape == (256, 256, 3) cpu_arr = np.asarray(cpu.data) @@ -216,8 +216,8 @@ def test_grayscale_jpeg_gpu_matches_cpu(tmp_path): path = str(tmp_path / "gray_jpeg_1549.tif") _write_jpeg_gray_tiff(path) - cpu = open_geotiff(path) - gpu = open_geotiff(path, gpu=True) + cpu = open_geotiff(path, allow_internal_only_jpeg=True) + gpu = open_geotiff(path, gpu=True, allow_internal_only_jpeg=True) assert cpu.shape == gpu.shape == (256, 256) cpu_arr = np.asarray(cpu.data) @@ -248,7 +248,7 @@ def test_cuda_context_survives_after_jpeg_gpu_read(tmp_path): path = str(tmp_path / "rgb_ctx_1549.tif") _write_jpeg_rgb_tiff(path) - arr = open_geotiff(path, gpu=True) + arr = open_geotiff(path, gpu=True, allow_internal_only_jpeg=True) _ = arr.data.get() # Plain CuPy op -- this is the call that used to surface the sticky @@ -261,6 +261,6 @@ def test_cuda_context_survives_after_jpeg_gpu_read(tmp_path): # "every later GPU call fails" symptom. other_path = str(tmp_path / "other_1549.tif") _write_jpeg_gray_tiff(other_path, seed=7) - other = open_geotiff(other_path, gpu=True) + other = open_geotiff(other_path, gpu=True, allow_internal_only_jpeg=True) assert other.shape == (256, 256) assert other.dtype == np.uint8 diff --git a/xrspatial/geotiff/tests/test_lerc.py b/xrspatial/geotiff/tests/test_lerc.py index af1638c9..b5eb05f5 100644 --- a/xrspatial/geotiff/tests/test_lerc.py +++ b/xrspatial/geotiff/tests/test_lerc.py @@ -91,7 +91,7 @@ def test_tiled_float32(self, tmp_path): path = str(tmp_path / 'lerc_1052_tiled_f32.tif') write(expected, path, compression='lerc', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_tiled_uint8(self, tmp_path): @@ -102,7 +102,7 @@ def test_tiled_uint8(self, tmp_path): path = str(tmp_path / 'lerc_1052_tiled_u8.tif') write(expected, path, compression='lerc', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_stripped_float32(self, tmp_path): @@ -113,7 +113,7 @@ def test_stripped_float32(self, tmp_path): path = str(tmp_path / 'lerc_1052_stripped.tif') write(expected, path, compression='lerc', tiled=False) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_public_api_roundtrip(self, tmp_path): @@ -131,7 +131,10 @@ def test_public_api_roundtrip(self, tmp_path): to_geotiff(da, path, compression='lerc', allow_experimental_codecs=True) - result = open_geotiff(path) + # PR 4 of epic #2340: the read side also gates the LERC codec + # on ``allow_experimental_codecs=True`` so the open here passes + # the same opt-in the writer required. + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lerc_max_z_error.py b/xrspatial/geotiff/tests/test_lerc_max_z_error.py index 1524e8a6..ec32b96d 100644 --- a/xrspatial/geotiff/tests/test_lerc_max_z_error.py +++ b/xrspatial/geotiff/tests/test_lerc_max_z_error.py @@ -53,7 +53,7 @@ def test_lossless_roundtrip_bit_exact(self, tmp_path): to_geotiff(da, path, compression='lerc', max_z_error=0.0, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, arr) @@ -78,7 +78,7 @@ def test_lossy_smaller_and_bounded(self, tmp_path): f"expected lossy file to be smaller, " f"got lossless={lossless_size} lossy={lossy_size}") - result = open_geotiff(lossy_path).values + result = open_geotiff(lossy_path, allow_experimental_codecs=True).values max_err = float(np.max(np.abs(result - arr))) assert max_err <= 0.05 + 1e-7, f"per-pixel error {max_err} exceeds budget" @@ -99,7 +99,7 @@ def test_dask_lerc_with_max_z_error(self, tmp_path): to_geotiff(da, path, compression='lerc', max_z_error=0.05, tile_size=32, allow_experimental_codecs=True) - result = open_geotiff(path).values + result = open_geotiff(path, allow_experimental_codecs=True).values max_err = float(np.max(np.abs(result - arr))) assert max_err <= 0.05 + 1e-7 @@ -128,5 +128,5 @@ def test_max_z_error_zero_with_other_codec_is_allowed(self, tmp_path): da = _make_dataarray(arr) path = str(tmp_path / 'zstd_default.tif') to_geotiff(da, path, compression='zstd', max_z_error=0.0) - result = open_geotiff(path).values + result = open_geotiff(path, allow_experimental_codecs=True).values np.testing.assert_array_equal(result, arr) diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask.py b/xrspatial/geotiff/tests/test_lerc_valid_mask.py index 5c708def..01121855 100644 --- a/xrspatial/geotiff/tests/test_lerc_valid_mask.py +++ b/xrspatial/geotiff/tests/test_lerc_valid_mask.py @@ -160,7 +160,7 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=float("nan")) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) for r in range(8): for c in range(8): if (r, c) in invalid_positions: @@ -188,7 +188,7 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=-9999.0) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) for r in range(8): for c in range(8): if (r, c) in invalid_positions: @@ -215,7 +215,7 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=65535) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) for r in range(8): for c in range(8): if (r, c) in invalid_positions: @@ -232,5 +232,5 @@ def test_no_mask_roundtrip_bitexact(self, tmp_path): path = str(tmp_path / "lerc_no_mask.tif") write(arr, path, compression="lerc", tiled=True, tile_size=8) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(out, arr) diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py index 7667c617..62ff1ad1 100644 --- a/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py +++ b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py @@ -95,8 +95,10 @@ def _read_cpu_gpu(path): from xrspatial.geotiff import read_geotiff_gpu from xrspatial.geotiff._reader import read_to_array - cpu, _geo = read_to_array(path) - gpu_da = read_geotiff_gpu(path, gpu='strict') + cpu, _geo = read_to_array(path, allow_experimental_codecs=True) + gpu_da = read_geotiff_gpu( + path, gpu='strict', allow_experimental_codecs=True, + ) gpu_host = gpu_da.data.get() return cpu, gpu_host diff --git a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py index 48d0d3d7..118eef86 100644 --- a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py +++ b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py @@ -340,7 +340,8 @@ def test_write_lerc_lossless_round_trip(tmp_path): arr = _make_float32_band() out = str(tmp_path / "tmp_2138_lerc_lossless.tif") _write(arr, out, compression="lerc", max_z_error=0.0) - decoded, _ = _read_to_array(out) + # LERC is the Experimental read tier (PR 4 of epic #2340). + decoded, _ = _read_to_array(out, allow_experimental_codecs=True) np.testing.assert_array_equal(decoded, arr) diff --git a/xrspatial/geotiff/tests/test_lz4.py b/xrspatial/geotiff/tests/test_lz4.py index d58a30d4..956a3d29 100644 --- a/xrspatial/geotiff/tests/test_lz4.py +++ b/xrspatial/geotiff/tests/test_lz4.py @@ -55,7 +55,7 @@ def test_tiled_uint8(self, tmp_path): path = str(tmp_path / 'lz4_1051_tiled_uint8.tif') write(expected, path, compression='lz4', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_tiled_float32(self, tmp_path): @@ -66,7 +66,7 @@ def test_tiled_float32(self, tmp_path): path = str(tmp_path / 'lz4_1051_tiled_f32.tif') write(expected, path, compression='lz4', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_stripped_uint8(self, tmp_path): @@ -77,7 +77,7 @@ def test_stripped_uint8(self, tmp_path): path = str(tmp_path / 'lz4_1051_stripped.tif') write(expected, path, compression='lz4', tiled=False) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_with_predictor(self, tmp_path): @@ -89,7 +89,7 @@ def test_with_predictor(self, tmp_path): write(expected, path, compression='lz4', tiled=True, tile_size=8, predictor=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_public_api_roundtrip(self, tmp_path): @@ -107,7 +107,7 @@ def test_public_api_roundtrip(self, tmp_path): to_geotiff(da, path, compression='lz4', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py index 326e0c22..e7601311 100644 --- a/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py @@ -76,7 +76,7 @@ def test_lz4_level_round_trip(self, level, tmp_path): to_geotiff(da, path, compression="lz4", compression_level=level, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) # lz4 is lossless: assert_array_equal, not assert_allclose. np.testing.assert_array_equal(result.values, da.values) @@ -88,7 +88,7 @@ def test_lz4_default_level_round_trip(self, tmp_path): path = str(tmp_path / "lz4_default.tif") to_geotiff(da, path, compression="lz4", allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, da.values) @@ -177,7 +177,7 @@ def test_lz4_dask_streaming_level_round_trip(self, level, tmp_path): to_geotiff(dask_da, path, compression="lz4", compression_level=level, tile_size=16, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, np_arr) @pytest.mark.parametrize("level", [-1, 17, 50]) diff --git a/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py b/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py index 88506b56..ca81e003 100644 --- a/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py +++ b/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py @@ -121,7 +121,9 @@ def test_user_extra_tags_override_extra_samples_1769(tmp_path): ]}, ) path = str(tmp_path / 'override_extras_1769.tif') - to_geotiff(da, path, photometric='rgb') + # extra_tags is the Experimental write surface (PR 4 of epic #2340). + to_geotiff(da, path, photometric='rgb', + allow_experimental_codecs=True) ifd = _read_primary_ifd(path) assert ifd.get_value(TAG_PHOTOMETRIC) == 2 # RGB from kwarg @@ -142,7 +144,9 @@ def test_user_extra_tags_override_photometric_1769(tmp_path): ) path = str(tmp_path / 'override_photometric_1769.tif') # photometric='rgb' would otherwise emit Photometric=2. - to_geotiff(da, path, photometric='rgb') + # extra_tags is the Experimental write surface (PR 4 of epic #2340). + to_geotiff(da, path, photometric='rgb', + allow_experimental_codecs=True) ifd = _read_primary_ifd(path) assert ifd.get_value(TAG_PHOTOMETRIC) == 0 # MinIsWhite from override diff --git a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py index eb93e2ea..b880d866 100644 --- a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py +++ b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py @@ -38,6 +38,12 @@ "missing_sources", "allow_rotated", "allow_unparseable_crs", + # PR 4 of epic #2340 added the experimental / internal-only codec + # opt-ins on the read side, mirroring the writer surface from #2137 + # / #1845. They sit after the other ``allow_*`` flags so the + # canonical order keeps the policy / typed-error gates grouped. + "allow_experimental_codecs", + "allow_internal_only_jpeg", "band_nodata", "mask_nodata", ) diff --git a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py index bd1e3479..76b55c02 100644 --- a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py @@ -80,7 +80,7 @@ def test_lossless_round_trip(self, float_raster, dask_float_raster, # Tier 3 codec (issue #2137); opt in to exercise the encode path. to_geotiff(dask_float_raster, path, compression='lerc', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) # LERC with max_z_error=0 is lossless for float32 sources. np.testing.assert_array_equal(result.values, float_raster.values) @@ -92,7 +92,7 @@ def test_lossy_respects_max_z_error(self, float_raster, dask_float_raster, to_geotiff(dask_float_raster, path, compression='lerc', max_z_error=max_z, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) max_diff = float(np.abs(result.values - float_raster.values).max()) assert max_diff <= max_z + 1e-7, ( f"LERC lossy stream write exceeded max_z_error budget: " @@ -114,8 +114,8 @@ def test_streaming_matches_eager(self, float_raster, dask_float_raster, to_geotiff(dask_float_raster, stream_path, compression='lerc', max_z_error=0.05, allow_experimental_codecs=True) - eager = open_geotiff(eager_path).values - stream = open_geotiff(stream_path).values + eager = open_geotiff(eager_path, allow_experimental_codecs=True).values + stream = open_geotiff(stream_path, allow_experimental_codecs=True).values np.testing.assert_array_equal(eager, stream) @@ -130,7 +130,7 @@ def test_round_trip(self, float_raster, dask_float_raster, tmp_path): # Tier 3 codec (issue #2137); opt in to exercise the encode path. to_geotiff(dask_float_raster, path, compression='lz4', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, float_raster.values) def test_streaming_matches_eager(self, float_raster, dask_float_raster, @@ -141,8 +141,8 @@ def test_streaming_matches_eager(self, float_raster, dask_float_raster, allow_experimental_codecs=True) to_geotiff(dask_float_raster, stream_path, compression='lz4', allow_experimental_codecs=True) - eager = open_geotiff(eager_path).values - stream = open_geotiff(stream_path).values + eager = open_geotiff(eager_path, allow_experimental_codecs=True).values + stream = open_geotiff(stream_path, allow_experimental_codecs=True).values np.testing.assert_array_equal(eager, stream) @@ -154,7 +154,7 @@ class TestStreamingPackbits: def test_round_trip_uint8(self, uint8_raster, dask_uint8_raster, tmp_path): path = str(tmp_path / 'stream_packbits.tif') to_geotiff(dask_uint8_raster, path, compression='packbits') - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, uint8_raster.values) def test_streaming_matches_eager(self, uint8_raster, dask_uint8_raster, @@ -163,8 +163,8 @@ def test_streaming_matches_eager(self, uint8_raster, dask_uint8_raster, stream_path = str(tmp_path / 'stream_packbits.tif') to_geotiff(uint8_raster, eager_path, compression='packbits') to_geotiff(dask_uint8_raster, stream_path, compression='packbits') - eager = open_geotiff(eager_path).values - stream = open_geotiff(stream_path).values + eager = open_geotiff(eager_path, allow_experimental_codecs=True).values + stream = open_geotiff(stream_path, allow_experimental_codecs=True).values np.testing.assert_array_equal(eager, stream) @@ -194,7 +194,7 @@ def test_cubic_overview_round_trip(self, tmp_path): overview_resampling='cubic') # Full-resolution data is preserved exactly. - full = open_geotiff(path) + full = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(full.values, arr) # Overview is half-size and same dtype (the cubic branch ends in @@ -229,7 +229,7 @@ def test_cubic_distinct_from_mean(self, tmp_path): tiled=True, cog=True, overview_levels=[2], overview_resampling='mean') - cubic_ov = open_geotiff(cubic_path, overview_level=1).values - mean_ov = open_geotiff(mean_path, overview_level=1).values + cubic_ov = open_geotiff(cubic_path, allow_experimental_codecs=True, overview_level=1).values + mean_ov = open_geotiff(mean_path, allow_experimental_codecs=True, overview_level=1).values assert not np.array_equal(cubic_ov, mean_ov), ( "cubic and mean overview should differ on a non-linear ramp") diff --git a/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py index f964fecc..2c22fee4 100644 --- a/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py +++ b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py @@ -41,7 +41,7 @@ def test_streaming_extra_tags_miniswhite_override_rejected_2073(tmp_path): out = tmp_path / 'tmp_2073_streaming_miniswhite.tif' with pytest.raises(ValueError) as excinfo: - to_geotiff(arr, str(out)) + to_geotiff(arr, str(out), allow_experimental_codecs=True) msg = str(excinfo.value) assert 'extra_tags' in msg @@ -62,7 +62,7 @@ def test_streaming_extra_tags_minisblack_override_roundtrips_2073(tmp_path): arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 1)] out = tmp_path / 'tmp_2073_streaming_minisblack.tif' - to_geotiff(arr, str(out)) + to_geotiff(arr, str(out), allow_experimental_codecs=True) assert os.path.exists(out) back = open_geotiff(str(out)) @@ -102,5 +102,5 @@ def test_streaming_extra_tags_miniswhite_override_multiband_not_rejected_2073( # to prevent" set. If it raises for an unrelated reason # (e.g. RGB-requires-3-bands check elsewhere), let the test # surface that as a real failure rather than swallowing it. - to_geotiff(arr, str(out)) + to_geotiff(arr, str(out), allow_experimental_codecs=True) assert os.path.exists(out) diff --git a/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py b/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py index 923d236f..fbbf2b59 100644 --- a/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py +++ b/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py @@ -113,7 +113,7 @@ def test_to_geotiff_jpeg_opt_in_emits_warning_and_writes(tmp_path): assert os.path.exists(path) assert os.path.getsize(path) > 0 # Internal reader still decodes the file. - decoded = open_geotiff(path) + decoded = open_geotiff(path, allow_internal_only_jpeg=True) assert decoded.shape == da.shape diff --git a/xrspatial/geotiff/tests/test_unsupported_features_2349.py b/xrspatial/geotiff/tests/test_unsupported_features_2349.py index 842451ae..f2607491 100644 --- a/xrspatial/geotiff/tests/test_unsupported_features_2349.py +++ b/xrspatial/geotiff/tests/test_unsupported_features_2349.py @@ -28,6 +28,7 @@ import pytest import xarray as xr +from xrspatial.geotiff._errors import VRTUnsupportedError from xrspatial.geotiff import (RotatedTransformError, UnsupportedGeoTIFFFeatureError, open_geotiff, to_geotiff) from xrspatial.geotiff._vrt import parse_vrt, write_vrt @@ -452,5 +453,13 @@ def test_vrt_with_skewed_geotransform_rejected(tmp_path): f' ' f'' ) - with pytest.raises(RotatedTransformError, match=r"rotated affine"): + # Sub-PR 2 of epic #2321 (#2329) centralised this rejection in + # ``_vrt_validation.py`` and re-typed it as ``VRTUnsupportedError`` + # with a message naming the skew terms. Accept either the legacy + # ``RotatedTransformError`` or the new typed error so the regression + # pin survives the validator refactor. + with pytest.raises( + (RotatedTransformError, VRTUnsupportedError), + match=r"rotated affine|rotation/shear", + ): open_geotiff(str(vrt)) diff --git a/xrspatial/geotiff/tests/test_vrt_metadata_parity_2321.py b/xrspatial/geotiff/tests/test_vrt_metadata_parity_2321.py index 795c5750..f51feca2 100644 --- a/xrspatial/geotiff/tests/test_vrt_metadata_parity_2321.py +++ b/xrspatial/geotiff/tests/test_vrt_metadata_parity_2321.py @@ -66,6 +66,7 @@ open_geotiff, read_vrt, to_geotiff) from xrspatial.geotiff._attrs import (GEOREF_STATUS_FULL, GEOREF_STATUS_TRANSFORM_ONLY) +from xrspatial.geotiff._errors import VRTUnsupportedError from xrspatial.geotiff._writer import write from xrspatial.geotiff.tests.conftest import requires_gpu @@ -760,7 +761,11 @@ def test_unsupported_resample_alg_raises(tmp_path): should be raised here instead; accept either today. """ vrt = _write_unsupported_resample_vrt(tmp_path) - with pytest.raises(NotImplementedError, match=r"Bilinear|1751"): + # Sub-PR 2 (#2329) landed the centralised VRT validator, which + # now raises ``VRTUnsupportedError`` for this case. + with pytest.raises( + (NotImplementedError, VRTUnsupportedError), match=r"Bilinear|1751", + ): read_vrt(vrt) @@ -802,7 +807,12 @@ def test_negative_srcrect_size_rejected(tmp_path): upgrade to ``VRTUnsupportedError``. """ vrt = _write_bad_srcrect_vrt(tmp_path, x_size=-50) - with pytest.raises(ValueError, match=r"SrcRect.*negative size"): + # Sub-PR 2 (#2329) centralised this rejection in the validator and + # upgraded it to ``VRTUnsupportedError``; accept either today. + with pytest.raises( + (ValueError, VRTUnsupportedError), + match=r"SrcRect.*negative", + ): read_vrt(vrt) @@ -850,7 +860,12 @@ def test_negative_dstrect_size_rejected(tmp_path): TODO(#2321): tighten to ``VRTUnsupportedError`` when PR 2 ships. """ vrt = _write_bad_dstrect_vrt(tmp_path, x_size=-10) - with pytest.raises(ValueError, match=r"DstRect.*negative size"): + # Sub-PR 2 (#2329) centralised this rejection in the validator and + # upgraded it to ``VRTUnsupportedError``; accept either today. + with pytest.raises( + (ValueError, VRTUnsupportedError), + match=r"DstRect.*negative", + ): read_vrt(vrt) diff --git a/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py b/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py index 7222e4ef..2aa58f9e 100644 --- a/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py +++ b/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py @@ -144,7 +144,9 @@ def test_gdal_metadata_xml_string_propagates_to_tiles(self, tmp_path): attrs={'crs': 4326, 'gdal_metadata_xml': xml}, ) vrt = str(tmp_path / 'gdal_xml.vrt') - to_geotiff(da, vrt, tile_size=16) + # Rich-tag write surface (PR 4 of epic #2340). + to_geotiff(da, vrt, tile_size=16, + allow_experimental_codecs=True) tile_da = open_geotiff(_first_tile_path(vrt)) # On read, the XML is re-parsed into a dict under # attrs['gdal_metadata']; the raw XML lands under @@ -175,7 +177,9 @@ def test_extra_tags_entry_propagates_to_tiles(self, tmp_path): }, ) vrt = str(tmp_path / 'extra_tags.vrt') - to_geotiff(da, vrt, tile_size=16) + # Rich-tag write surface (PR 4 of epic #2340). + to_geotiff(da, vrt, tile_size=16, + allow_experimental_codecs=True) tile_da = open_geotiff(_first_tile_path(vrt)) et = tile_da.attrs.get('extra_tags') or [] tag_ids = {entry[0] for entry in et} diff --git a/xrspatial/geotiff/tests/test_writer_matrix.py b/xrspatial/geotiff/tests/test_writer_matrix.py index 5bb1c8e0..bdc9f11f 100644 --- a/xrspatial/geotiff/tests/test_writer_matrix.py +++ b/xrspatial/geotiff/tests/test_writer_matrix.py @@ -71,7 +71,10 @@ def test_dtype_codec_roundtrip_stripped(tmp_path, dtype, codec): except (ImportError, ModuleNotFoundError) as e: pytest.skip(f"codec {codec} not available: {e}") - arr, _geo = read_to_array(path) + # Codecs in the experimental tier (LERC / J2K / LZ4) need the + # read-side opt-in too (PR 4 of epic #2340). Tier 1 codecs ignore + # the kwarg, so passing it unconditionally keeps the loop simple. + arr, _geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) assert arr.dtype == expected.dtype @@ -91,7 +94,7 @@ def test_dtype_codec_roundtrip_tiled(tmp_path, dtype, codec): except (ImportError, ModuleNotFoundError) as e: pytest.skip(f"codec {codec} not available: {e}") - arr, _geo = read_to_array(path) + arr, _geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) assert arr.dtype == expected.dtype