From 8cd6b06924a78cf8eb185bcd513553c1dac863fd Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 05:03:13 -0700 Subject: [PATCH 1/2] Require explicit opt-in on experimental and internal-only GeoTIFF paths (#2352) PR 4 of 6 under epic #2340 (GeoTIFF release contract). Adds the matching read-side gates for the Tier 3 / Tier 4 codecs the writer already gated in #2137 and #1845, and extends the ``allow_experimental_codecs`` opt-in onto the writer rich-tag attrs (``gdal_metadata_xml`` / ``extra_tags``). Read side: - ``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` / ``read_vrt`` now reject sources compressed with LERC, JPEG2000 / J2K, or LZ4 unless ``allow_experimental_codecs=True``. - The same readers reject JPEG-in-TIFF unless ``allow_internal_only_jpeg=True``. The flags do not collapse. - Gates fire at the public entry point (eager) or at graph build (dask), before any decode work, so the caller learns the opt-in name from the rejection rather than a deeper decode failure. Write side: - ``to_geotiff`` and ``write_geotiff_gpu`` reject writes that include ``attrs['gdal_metadata_xml']`` or ``attrs['extra_tags']`` unless ``allow_experimental_codecs=True``. Round-tripped attrs (carrying ``_xrspatial_geotiff_contract``) are exempt so the canonical attrs contract from #1984 still round-trips without a flag. Tests: - New ``test_experimental_internal_optin_2352.py`` pins the rejections, the exemption shape, and the public signatures. - Existing codec / rich-tag tests pass the new opt-ins through; the ``test_reader_kwarg_order_1935.py`` canonical kwarg order is updated to slot the new flags next to the other ``allow_*`` policy gates. Out of scope (sibling PRs under epic #2340): - Docstring rewrites for public read/write entry points (#2346). - Docs release-contract page (#2347). - ``SUPPORTED_FEATURES`` tier audit (#2348). - Unsupported-combination errors (#2349). Closes #2352. --- xrspatial/geotiff/__init__.py | 37 ++ xrspatial/geotiff/_attrs.py | 145 ++++++ xrspatial/geotiff/_backends/dask.py | 50 +- xrspatial/geotiff/_backends/gpu.py | 52 +- xrspatial/geotiff/_backends/vrt.py | 26 +- xrspatial/geotiff/_cog_http.py | 18 + xrspatial/geotiff/_reader.py | 24 +- xrspatial/geotiff/_vrt.py | 4 + xrspatial/geotiff/_writers/eager.py | 16 + xrspatial/geotiff/_writers/gpu.py | 12 + .../test_attrs_contract_canonical_1984.py | 16 +- .../tests/test_backend_full_parity_2211.py | 25 +- .../geotiff/tests/test_decompression_caps.py | 5 +- .../test_eager_bigtiff_overhead_exact_1905.py | 8 +- .../test_experimental_internal_optin_2352.py | 479 ++++++++++++++++++ .../tests/test_extra_tags_safe_filter_1657.py | 15 +- .../tests/test_fuzz_hypothesis_1661.py | 5 +- .../test_golden_corpus_dask_numpy_1930.py | 12 +- .../test_golden_corpus_eager_numpy_1930.py | 12 +- .../tests/test_golden_corpus_fsspec_1930.py | 12 +- xrspatial/geotiff/tests/test_jpeg.py | 10 +- xrspatial/geotiff/tests/test_jpeg2000.py | 10 +- xrspatial/geotiff/tests/test_lerc.py | 11 +- .../geotiff/tests/test_lerc_max_z_error.py | 8 +- .../geotiff/tests/test_lerc_valid_mask.py | 8 +- .../test_lowlevel_write_pushdown_2138.py | 3 +- xrspatial/geotiff/tests/test_lz4.py | 10 +- .../test_lz4_compression_level_2026_05_11.py | 6 +- .../tests/test_photometric_kwarg_1769.py | 8 +- .../tests/test_reader_kwarg_order_1935.py | 6 + .../tests/test_streaming_codecs_2026_05_11.py | 26 +- ...est_streaming_photometric_override_2073.py | 6 +- ...geotiff_allow_internal_only_jpeg_parity.py | 2 +- .../tests/test_vrt_tiled_metadata_1606.py | 8 +- xrspatial/geotiff/tests/test_writer_matrix.py | 7 +- 35 files changed, 1004 insertions(+), 98 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_experimental_internal_optin_2352.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 73c3145ed..72413b969 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -239,6 +239,7 @@ def _read_geo_info(source, *, overview_level: int | None = None, # binding it into the chunk closure (#1809). geo_info._ifd_photometric = _ifd.photometric geo_info._ifd_samples_per_pixel = _ifd.samples_per_pixel + geo_info._ifd_compression = _ifd.compression return geo_info, _ifd.height, _ifd.width, file_dtype, n_bands if _is_file_like(source): # File-like: read its full bytes; we don't try to mmap arbitrary @@ -314,6 +315,11 @@ def _read_geo_info(source, *, overview_level: int | None = None, # binding it into the chunk closure (#1809). geo_info._ifd_photometric = ifd.photometric geo_info._ifd_samples_per_pixel = ifd.samples_per_pixel + # Stash compression so the dask graph builder can fire the + # experimental / internal-only codec opt-in gate at graph build + # rather than waiting for the per-chunk task to fail (PR 4 of + # epic #2340). + geo_info._ifd_compression = ifd.compression return geo_info, ifd.height, ifd.width, file_dtype, n_bands finally: if close_data: @@ -337,6 +343,8 @@ def open_geotiff(source: str | BinaryIO, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True, ) -> xr.DataArray: @@ -500,6 +508,23 @@ def open_geotiff(source: str | BinaryIO, *, behaviour where the citation field passes through unchanged. Matches the same kwarg on ``to_geotiff`` / ``write_geotiff_gpu`` so a value the reader accepted can survive a round-trip. + allow_experimental_codecs : bool, default False + Read-side opt-in for sources compressed with the Tier 3 + experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). + Default ``False`` rejects the read with ``ValueError`` naming + the flag; cross-backend numerical parity is not claimed and + reader support across GDAL versions is uneven. Matches the + same kwarg on the writers so a round-trip through a Tier 3 + codec stays opt-in on both sides. See SUPPORTED_FEATURES tier + ``'experimental'`` (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + Read-side opt-in for JPEG-in-TIFF sources. The encoder writes + self-contained JFIF tiles without the TIFF JPEGTables tag + (347), so the read path is not interoperable with libtiff / + GDAL / rasterio. ``allow_experimental_codecs=True`` does NOT + cover this codec; the dedicated flag is its only gate. See + SUPPORTED_FEATURES tier ``'internal_only'`` for ``codec.jpeg`` + (epic #2340 PR 4, original writer gate #1845). Returns ------- @@ -630,6 +655,8 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, mask_nodata=mask_nodata, **vrt_kwargs) @@ -650,6 +677,10 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=( + allow_experimental_codecs), + allow_internal_only_jpeg=( + allow_internal_only_jpeg), mask_nodata=mask_nodata, **gpu_kwargs) @@ -661,6 +692,10 @@ def open_geotiff(source: str | BinaryIO, *, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=( + allow_experimental_codecs), + allow_internal_only_jpeg=( + allow_internal_only_jpeg), mask_nodata=mask_nodata) kwargs = {} @@ -679,6 +714,8 @@ def open_geotiff(source: str | BinaryIO, *, source, window=window, overview_level=overview_level, band=band, allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, **kwargs, ) diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 618254562..4f1c7b452 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -313,6 +313,151 @@ ) +# Map TIFF compression tag values to codec names so the read-side opt-in +# gate (PR 4 of epic #2340) can name the codec in the rejection message +# without each call site repeating the integer-to-name table. The keys +# are the TIFF 6 Compression tag values (tag 259) used inside +# ``_compression.py``; the values match the codec names that appear on +# the ``SUPPORTED_FEATURES`` keys (``codec.``). +_COMPRESSION_TAG_TO_NAME = { + 1: 'none', + 5: 'lzw', + 7: 'jpeg', + 8: 'deflate', + 32773: 'packbits', + 32946: 'deflate', # adobe deflate, same codec + 34712: 'jpeg2000', + 34887: 'lerc', + 50000: 'zstd', + 50004: 'lz4', +} + + +def _validate_read_codec_optin( + compression: int, + *, + allow_experimental_codecs: bool, + allow_internal_only_jpeg: bool, + entry_point: str = "open_geotiff", +) -> None: + """Reject experimental / internal-only codecs on the read side. + + Mirrors the writer-side gate in ``_writers/eager.py`` / + ``_writers/gpu.py`` so a caller cannot decode a file produced with + an experimental or internal-only codec without naming the matching + opt-in flag at the call site. The flag and feature both appear in + the rejection message so the caller learns the name from the error + rather than the docs. + + Part of PR 4 of epic #2340 (the GeoTIFF release contract). The + writer side has carried these gates since #2137 / #1845; this + helper extends the same shape to the read entry points. + + Parameters + ---------- + compression : int + TIFF Compression tag value (tag 259) from the parsed IFD. + allow_experimental_codecs : bool + Opt-in for Tier 3 read paths (LERC, JPEG2000 / J2K, LZ4). + allow_internal_only_jpeg : bool + Opt-in for Tier 4 read path (JPEG-in-TIFF). Does not collapse + into ``allow_experimental_codecs`` for the same reason as on + the writer: internal-only is a stricter tier and keeps its own + dedicated flag. + entry_point : str + Name of the public read function for the rejection message. + """ + codec_name = _COMPRESSION_TAG_TO_NAME.get(int(compression)) + if codec_name is None: + # Unknown compression tags are validated separately by the + # decoder; the opt-in gate only fires for codecs the reader + # otherwise accepts. + return + if codec_name == 'jpeg' and not allow_internal_only_jpeg: + raise ValueError( + f"{entry_point}: source uses compression='jpeg' (TIFF " + "tag 259 = 7), which is internal-only: the encoder writes " + "self-contained JFIF tiles without the TIFF JPEGTables tag " + "(347), so the read path is not interoperable with libtiff " + "/ GDAL / rasterio. Pass allow_internal_only_jpeg=True to " + "opt in to the internal-reader-only decode path. See " + "SUPPORTED_FEATURES tier 'internal_only' for codec.jpeg " + "(epic #2340, original gate #1845).") + if codec_name in _EXPERIMENTAL_CODECS and not allow_experimental_codecs: + raise ValueError( + f"{entry_point}: source uses compression={codec_name!r} " + "which is experimental on the read side: cross-backend " + "numerical parity is not claimed and reader support across " + "GDAL versions is uneven. Pass allow_experimental_codecs=" + "True to opt in, or re-encode the source with a stable " + "lossless codec ('deflate', 'zstd', or 'lzw'). See " + f"SUPPORTED_FEATURES tier 'experimental' for codec.{codec_name} " + "(epic #2340, original writer gate #2137).") + + +# Writer rich-tag attrs that ride the Experimental tier (PR 4 of epic +# #2340). ``writer.gdal_metadata_xml`` and ``writer.extra_tags`` carry +# free-form payloads through to the on-disk TIFF; their interop with +# other readers (rasterio, libtiff, GDAL) depends on the payload and is +# not part of the release promise. The opt-in keeps the surface narrow +# without removing the capability. +# +# Round-trip exemption: when the attrs carry the +# ``_xrspatial_geotiff_contract`` marker, they came from a previous +# xrspatial read. The reader populated ``gdal_metadata_xml`` / +# ``extra_tags`` from the source file; gating the write would force +# every read-then-write caller to opt in. Skip the gate on +# round-tripped attrs so the canonical contract from #1984 stays a +# no-flag operation. The gate still fires when a caller adds those +# attrs to a fresh DataArray that did not come from a read. +def _validate_write_rich_tag_optin( + attrs: dict, + *, + gdal_metadata_xml_kwarg: object = None, + extra_tags_kwarg: object = None, + allow_experimental_codecs: bool, + entry_point: str = "to_geotiff", +) -> None: + """Reject writes that include ``gdal_metadata_xml`` or ``extra_tags`` + unless the caller opted in via ``allow_experimental_codecs=True``. + + Part of PR 4 of epic #2340. Mirrors the existing codec-flag shape + so the rejection names the same opt-in the caller already learned + from the LERC / J2K / LZ4 paths. Round-tripped attrs (carrying + the ``_xrspatial_geotiff_contract`` marker) are exempt so the + canonical attrs round-trip (#1984) stays a no-flag operation; the + gate fires only when a caller constructs a fresh DataArray with + one of the rich-tag attrs set. + """ + if allow_experimental_codecs: + return + # Round-trip exemption: a DataArray that came from + # ``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` + # carries the contract marker. Writing it back is the canonical + # round-trip and should not require a new flag (issue #1984). + if '_xrspatial_geotiff_contract' in attrs: + return + triggered: list[str] = [] + if attrs.get('gdal_metadata_xml') is not None: + triggered.append("attrs['gdal_metadata_xml']") + if attrs.get('extra_tags') is not None: + triggered.append("attrs['extra_tags']") + if gdal_metadata_xml_kwarg is not None: + triggered.append('gdal_metadata_xml kwarg') + if extra_tags_kwarg is not None: + triggered.append('extra_tags kwarg') + if not triggered: + return + raise ValueError( + f"{entry_point}: {', '.join(triggered)} pass-through is " + "experimental: the on-disk bytes are written verbatim and " + "interop with other readers (rasterio, libtiff, GDAL) depends " + "on the payload. Pass allow_experimental_codecs=True to opt " + "in to the rich-tag write path, or drop the attr before the " + "write. See SUPPORTED_FEATURES tier 'experimental' for " + "writer.gdal_metadata_xml / writer.extra_tags (epic #2340).") + + # TIFF type ids needed when synthesizing extra_tags entries from attrs. _TIFF_BYTE = 1 _TIFF_ASCII = 2 diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 6da17cb31..3fc9ba758 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -42,6 +42,8 @@ def read_geotiff_dask(source: str, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True) -> xr.DataArray: """Read a GeoTIFF as a dask-backed DataArray for out-of-core processing. @@ -109,6 +111,15 @@ def read_geotiff_dask(source: str, *, the chunk task raises ``UnparseableCRSError`` instead of carrying the unrecognised payload through ``attrs['crs_wkt']``. See ``open_geotiff`` for the full description. + allow_experimental_codecs : bool, default False + Read-side opt-in for Tier 3 experimental codecs (``lerc``, + ``jpeg2000`` / ``j2k``, ``lz4``). Fires at graph build, before + any chunk task is scheduled. See ``open_geotiff`` for the full + description (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + Read-side opt-in for JPEG-in-TIFF sources. Not covered by + ``allow_experimental_codecs``. See ``open_geotiff`` for the + full description (epic #2340 PR 4, original writer gate #1845). on_gpu_failure : str, optional Accepted for cross-backend signature symmetry only. The dask path runs CPU decoders, so passing this kwarg raises @@ -295,6 +306,7 @@ def read_geotiff_dask(source: str, *, # Stash IFD photometric for the MinIsWhite nodata-inversion check below. geo_info._ifd_photometric = http_ifd.photometric geo_info._ifd_samples_per_pixel = http_ifd.samples_per_pixel + geo_info._ifd_compression = http_ifd.compression else: # Metadata-only read: O(1) memory via mmap, no pixel decompression. # Lazy import for the same circular-import reason as ``read_vrt`` @@ -303,6 +315,21 @@ def read_geotiff_dask(source: str, *, geo_info, full_h, full_w, file_dtype, n_bands = _read_geo_info( source, overview_level=overview_level, allow_rotated=allow_rotated) + + # Reject experimental / internal-only codecs at graph build, before + # any chunk task is scheduled. The compression tag is stashed on + # ``geo_info`` by ``_read_geo_info`` (local / fsspec) and by the + # HTTP / fsspec branch above. PR 4 of epic #2340. + _compression_tag = getattr(geo_info, '_ifd_compression', None) + if _compression_tag is not None: + from .._attrs import _validate_read_codec_optin + _validate_read_codec_optin( + _compression_tag, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="read_geotiff_dask", + ) + # PR-C #2226: centralize the nodata lifecycle in one value object. # ``raw_sentinel`` carries the pre-inversion sentinel that # ``attrs['nodata']`` must preserve; ``effective_sentinel`` is what @@ -533,7 +560,11 @@ def read_geotiff_dask(source: str, *, target_dtype=target_dtype, http_meta_key=http_meta_key, max_pixels=max_pixels, - allow_rotated=allow_rotated), + allow_rotated=allow_rotated, + allow_experimental_codecs=( + allow_experimental_codecs), + allow_internal_only_jpeg=( + allow_internal_only_jpeg)), shape=block_shape, dtype=target_dtype, ) @@ -555,7 +586,9 @@ def read_geotiff_dask(source: str, *, def _delayed_read_window(source, r0, c0, r1, c1, overview_level, nodata, band, *, target_dtype=None, http_meta_key=None, - max_pixels=None, allow_rotated=False): + max_pixels=None, allow_rotated=False, + allow_experimental_codecs=False, + allow_internal_only_jpeg=False): """Dask-delayed function to read a single window. *http_meta_key* is an optional ``Delayed[(TIFFHeader, IFD)]`` parsed @@ -611,11 +644,14 @@ def _read(http_meta): _r2a_kwargs = {} if max_pixels is not None: _r2a_kwargs['max_pixels'] = max_pixels - arr, _ = _read_to_array(source, window=(r0, c0, r1, c1), - overview_level=overview_level, - band=band, - allow_rotated=allow_rotated, - **_r2a_kwargs) + arr, _ = _read_to_array( + source, window=(r0, c0, r1, c1), + overview_level=overview_level, + band=band, + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + **_r2a_kwargs) if nodata is not None: # ``arr`` was just decoded by ``_fetch_decode_cog_http_tiles`` # or ``read_to_array``; both return freshly-allocated buffers diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index cdab49463..a8362e9e9 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -75,6 +75,8 @@ def read_geotiff_gpu(source: str, *, missing_sources: str = _MISSING_SOURCES_SENTINEL, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True, gpu: str = _GPU_DEPRECATED_SENTINEL, @@ -191,6 +193,16 @@ def read_geotiff_gpu(source: str, *, do not parse as WKT. ``False`` (the default since #1929) raises ``UnparseableCRSError``; ``True`` keeps the pre-#1929 permissive behaviour. See ``open_geotiff`` for the full description. + allow_experimental_codecs : bool, default False + Read-side opt-in for Tier 3 experimental codecs (``lerc``, + ``jpeg2000`` / ``j2k``, ``lz4``). The GPU read path mirrors the + CPU eager and dask paths so all three readers agree on the + opt-in contract. See ``open_geotiff`` for the full description + (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + Read-side opt-in for JPEG-in-TIFF sources. Not covered by + ``allow_experimental_codecs``. See ``open_geotiff`` for the + full description (epic #2340 PR 4, original writer gate #1845). band_nodata : {'first', None}, optional VRT-only. Accepted at the signature level for parity with ``open_geotiff``; passing it to ``read_geotiff_gpu`` raises @@ -306,6 +318,8 @@ def read_geotiff_gpu(source: str, *, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, ) @@ -355,6 +369,8 @@ def read_geotiff_gpu(source: str, *, overview_level=overview_level, band=band, name=name, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, ) @@ -390,6 +406,18 @@ def read_geotiff_gpu(source: str, *, # Skip mask IFDs (NewSubfileType bit 2) ifd = select_overview_ifd(ifds, overview_level) + # Reject experimental / internal-only codecs on the GPU read + # path unless the caller opted in. Mirrors the CPU eager and + # dask paths so all three read backends agree on the opt-in + # contract. PR 4 of epic #2340. + from .._attrs import _validate_read_codec_optin + _validate_read_codec_optin( + ifd.compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="read_geotiff_gpu", + ) + # Keep ``data`` / ``header`` bound to the base file's buffers so # the georef extractor below resolves base-IFD tag offsets # against the right bytes. Introduce ``ifd_data`` / ``ifd_header`` @@ -513,7 +541,9 @@ def read_geotiff_gpu(source: str, *, arr_cpu, _stripped_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) if orientation != 1: geo_info = _apply_orientation_geo_info( @@ -715,7 +745,9 @@ def _read_once(): arr_cpu, _cpu_fallback_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) arr_was_cpu_decoded = True else: @@ -731,7 +763,9 @@ def _read_once(): arr_cpu, _cpu_fallback_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) arr_was_cpu_decoded = True else: @@ -809,7 +843,9 @@ def _read_once(): arr_cpu, _cpu_fallback_geo = _read_to_array( source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) arr_was_cpu_decoded = True @@ -970,6 +1006,8 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, band, name, max_pixels, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): """Eager CPU decode + GPU upload for HTTP / fsspec sources (issue #2161). @@ -1010,6 +1048,8 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, arr_cpu, geo_info = _read_to_array( source, window=window, overview_level=overview_level, band=band, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) arr_gpu = cupy.asarray(arr_cpu) @@ -1198,6 +1238,8 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, window, band, name, max_pixels, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): """Lazy Dask+CuPy backend for ``read_geotiff_gpu(chunks=...)``. @@ -1325,6 +1367,8 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, max_pixels=max_pixels, name=name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, ) diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index c828df04c..69697bf95 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -128,6 +128,8 @@ def read_vrt(source: str, *, missing_sources: str = 'raise', allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True) -> xr.DataArray: """Read a GDAL Virtual Raster Table (.vrt) into an xarray.DataArray. @@ -239,6 +241,16 @@ def read_vrt(source: str, *, do not parse as WKT. ``False`` (the default since #1929) raises ``UnparseableCRSError`` rather than carrying the unrecognised payload through. See ``open_geotiff`` for the full description. + allow_experimental_codecs : bool, default False + Read-side opt-in for Tier 3 experimental codecs in any source + file referenced by the VRT. Forwarded to the per-source reader + for each ````. See ``open_geotiff`` for the + full description (epic #2340 PR 4). + allow_internal_only_jpeg : bool, default False + Read-side opt-in for JPEG-in-TIFF sources referenced by the + VRT. Forwarded to the per-source reader. See ``open_geotiff`` + for the full description (epic #2340 PR 4, original writer + gate #1845). overview_level : int or None Not supported for VRT sources. The VRT XML references its own source files, so overview selection would need to apply to each @@ -410,6 +422,8 @@ def read_vrt(source: str, *, missing_sources=missing_sources, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, mask_nodata=mask_nodata, ) @@ -464,6 +478,8 @@ def read_vrt(source: str, *, source, window=window, band=band, max_pixels=max_pixels, missing_sources=missing_sources, parsed=_parsed_vrt, mask_nodata=mask_nodata, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) if name is None: @@ -673,7 +689,9 @@ def read_vrt(source: str, *, def _vrt_chunk_read(source, r0, c0, r1, c1, *, band, max_pixels, missing_sources, declared_dtype, gpu, parsed_vrt, - mask_nodata: bool = True): + mask_nodata: bool = True, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False): """Decode a single chunk window from a VRT. Called by ``dask.delayed`` from :func:`_read_vrt_chunked`. The @@ -705,6 +723,8 @@ def _vrt_chunk_read(source, r0, c0, r1, c1, *, source, window=(r0, c0, r1, c1), band=band, max_pixels=max_pixels, missing_sources=missing_sources, parsed=parsed_vrt, mask_nodata=mask_nodata, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) # Mirror the eager post-decode integer-sentinel masking via the @@ -733,6 +753,8 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, max_pixels, missing_sources, allow_rotated: bool = False, allow_unparseable_crs: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, mask_nodata: bool = True): """Lazy ``read_vrt`` dispatch when ``chunks=`` is set (issue #1814). @@ -960,6 +982,8 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, gpu=gpu, parsed_vrt=parsed_vrt_key, mask_nodata=mask_nodata, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) block = da.from_delayed(d, shape=block_shape, dtype=declared_dtype, meta=meta) diff --git a/xrspatial/geotiff/_cog_http.py b/xrspatial/geotiff/_cog_http.py index 6cd7cfb3f..801da956f 100644 --- a/xrspatial/geotiff/_cog_http.py +++ b/xrspatial/geotiff/_cog_http.py @@ -296,6 +296,8 @@ def _read_cog_http(url: str, overview_level: int | None = None, window: tuple[int, int, int, int] | None = None, *, allow_rotated: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, GeoInfo]: """Read a COG via HTTP range requests. @@ -365,6 +367,22 @@ def _read_cog_http(url: str, overview_level: int | None = None, source.close() source = _reader._HTTPSource(route_path) + # Reject experimental and internal-only codecs on the HTTP read + # path unless the caller opted in. Mirrors the gate in + # ``_read_to_array`` so HTTP and local reads agree on the + # opt-in contract. See PR 4 of epic #2340. + from ._attrs import _validate_read_codec_optin + try: + _validate_read_codec_optin( + ifd.compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="open_geotiff", + ) + except ValueError: + source.close() + raise + # Mirror the local-path orientation guard in ``read_to_array``: a # windowed read against a non-default Orientation tag (274) has # ambiguous semantics (does the window refer to file pixels or to diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index f13518682..3883431ef 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -109,6 +109,8 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, max_cloud_bytes=_MAX_CLOUD_BYTES_SENTINEL, allow_rotated: bool = False, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, GeoInfo]: """Read a GeoTIFF/COG to a numpy array (module-private). @@ -143,9 +145,12 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, """ source = _coerce_path(source) if _is_http_url(source): - return _read_cog_http(source, overview_level=overview_level, band=band, - max_pixels=max_pixels, window=window, - allow_rotated=allow_rotated) + return _read_cog_http( + source, overview_level=overview_level, band=band, + max_pixels=max_pixels, window=window, + allow_rotated=allow_rotated, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg) # Local file, cloud storage, or file-like buffer: read all bytes then parse # Resolve the cloud byte budget once so both the base-file ``_CloudSource`` @@ -225,6 +230,19 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, # Select IFD, skipping any mask IFDs ifd = select_overview_ifd(ifds, overview_level) + # Reject experimental and internal-only codecs on the read side + # unless the caller opted in. Mirrors the writer-side gate so the + # two surfaces stay consistent. Fires before any tile/strip work + # so the caller learns the missing flag from the rejection, not + # from a deeper decode-time failure. See PR 4 of epic #2340. + from ._attrs import _validate_read_codec_optin + _validate_read_codec_optin( + ifd.compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + entry_point="open_geotiff", + ) + # If the selected IFD came from the sidecar, swap the data / # header used for strip / tile reads below so byte offsets # resolve against the right buffer. diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 3bd370ce3..1f29fcaa3 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -960,6 +960,8 @@ def read_vrt(vrt_path: str, *, window=None, missing_sources: str = 'raise', parsed: VRTDataset | None = None, mask_nodata: bool = True, + allow_experimental_codecs: bool = False, + allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, VRTDataset]: """Read a VRT file by assembling pixel data from its source files. @@ -1281,6 +1283,8 @@ def read_vrt(vrt_path: str, *, window=None, window=(read_r0, read_c0, read_r1, read_c1), band=src.band - 1, # convert 1-based to 0-based max_pixels=max_pixels, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, ) except ( OSError, ValueError, struct.error, diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 8824b6409..9dae4c016 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -570,6 +570,22 @@ def to_geotiff(data: xr.DataArray | np.ndarray, stacklevel=2, ) + # Reject ``gdal_metadata_xml`` / ``extra_tags`` pass-through writes + # unless the caller opted in via ``allow_experimental_codecs=True``. + # Both surfaces ride the Experimental tier in ``SUPPORTED_FEATURES`` + # because the on-disk bytes are written verbatim and downstream + # interop with rasterio / libtiff / GDAL depends on the payload. + # PR 4 of epic #2340. + _data_attrs_for_optin = ( + data.attrs if isinstance(data, xr.DataArray) else {} + ) + from .._attrs import _validate_write_rich_tag_optin + _validate_write_rich_tag_optin( + _data_attrs_for_optin, + allow_experimental_codecs=allow_experimental_codecs, + entry_point="to_geotiff", + ) + # Issue #2312: ``cog=True`` requires a tiled internal layout per the # COG spec. The writer used to accept ``cog=True, tiled=False``, warn # that ``tile_size`` was ignored, and then write strips via diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 60a4ccc50..594df16a8 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -371,6 +371,18 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, entry_point="write_geotiff_gpu", ) + # Reject ``gdal_metadata_xml`` / ``extra_tags`` pass-through writes + # unless the caller opted in via ``allow_experimental_codecs=True``. + # Mirrors ``to_geotiff`` so the two writers expose the same surface. + # PR 4 of epic #2340. + from .._attrs import _validate_write_rich_tag_optin + _attrs_for_optin = getattr(data, 'attrs', None) or {} + _validate_write_rich_tag_optin( + _attrs_for_optin, + allow_experimental_codecs=allow_experimental_codecs, + entry_point="write_geotiff_gpu", + ) + # Issue #1987 ambiguous-metadata checks; mirrors ``to_geotiff`` so the # GPU writer enforces the same crs/crs_wkt consistency rule. Alias # resolution (issue #2215) keeps the validator consistent across diff --git a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py index cfc01b28a..59f314f62 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py @@ -172,7 +172,13 @@ def canonical_roundtrip(tmp_path): """ da, expected_transform = _make_canonical_da() path = str(tmp_path / 'attrs_contract_canonical.tif') - to_geotiff(da, path) + # The canonical fixture carries ``extra_tags``; the PR 4 rich-tag + # gate (#2340) exempts attrs carrying ``_xrspatial_geotiff_contract`` + # (set by the readers), but this fixture builds a fresh DataArray + # without the marker, so the initial write must opt in. The + # round-trip's subsequent open->write would NOT need the flag -- + # this fixture exercises the initial write only. + to_geotiff(da, path, allow_experimental_codecs=True) rd = open_geotiff(path) return rd, expected_transform @@ -355,7 +361,9 @@ def test_canonical_keys_present_per_backend(tmp_path, opener): """ da, _ = _make_canonical_da() path = str(tmp_path / f'canonical_{opener.__name__}.tif') - to_geotiff(da, path) + # Fresh DataArray with ``extra_tags`` -> rich-tag opt-in required + # (PR 4 of epic #2340; see ``canonical_roundtrip`` for details). + to_geotiff(da, path, allow_experimental_codecs=True) rd = opener(path) missing = sorted(k for k in _CANONICAL_KEYS if k not in rd.attrs) @@ -380,7 +388,7 @@ def test_raster_type_area_omitted_on_roundtrip(tmp_path): da, _ = _make_canonical_da() assert 'raster_type' not in da.attrs path = str(tmp_path / 'raster_type_area.tif') - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) rd = open_geotiff(path) assert 'raster_type' not in rd.attrs, ( f"area is the implicit default but the reader emitted " @@ -394,6 +402,6 @@ def test_raster_type_point_roundtrip(tmp_path): da, _ = _make_canonical_da() da.attrs['raster_type'] = 'point' path = str(tmp_path / 'raster_type_point.tif') - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) rd = open_geotiff(path) assert rd.attrs.get('raster_type') == 'point' diff --git a/xrspatial/geotiff/tests/test_backend_full_parity_2211.py b/xrspatial/geotiff/tests/test_backend_full_parity_2211.py index d1e525d57..cd42e91e8 100644 --- a/xrspatial/geotiff/tests/test_backend_full_parity_2211.py +++ b/xrspatial/geotiff/tests/test_backend_full_parity_2211.py @@ -240,21 +240,34 @@ class _Backend: skips: dict[str, str] = field(default_factory=dict) +# PR 4 of epic #2340: experimental and internal-only codecs require an +# explicit opt-in on the read side. The full parity matrix tests every +# fixture including ``compression_lerc_float32``; the gate's purpose is +# orthogonal to the parity check, so pass both flags through every +# opener. The matrix continues to test what it was meant to test. +_OPTIN = { + "allow_experimental_codecs": True, + "allow_internal_only_jpeg": True, +} + + def _read_eager_numpy(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: - return open_geotiff(str(path)) + return open_geotiff(str(path), **_OPTIN) def _read_dask_numpy(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: - return open_geotiff(str(path), chunks=_CHUNK_SIZE) + return open_geotiff(str(path), chunks=_CHUNK_SIZE, **_OPTIN) def _read_gpu(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: - return open_geotiff(str(path), gpu=True, on_gpu_failure="strict") + return open_geotiff( + str(path), gpu=True, on_gpu_failure="strict", **_OPTIN) def _read_dask_gpu(path: pathlib.Path, _fixture_id: str) -> xr.DataArray: return open_geotiff( str(path), gpu=True, chunks=_CHUNK_SIZE, on_gpu_failure="strict", + **_OPTIN, ) @@ -282,7 +295,7 @@ def _read_vrt_eager(path: pathlib.Path, fixture_id: str) -> xr.DataArray: vrt_path = cache_dir / f"{fixture_id}.vrt" if not vrt_path.exists(): write_vrt(str(vrt_path), [str(local_src)]) - return open_geotiff(str(vrt_path)) + return open_geotiff(str(vrt_path), **_OPTIN) def _read_http_fsspec(path: pathlib.Path, fixture_id: str) -> xr.DataArray: @@ -307,7 +320,7 @@ def _read_http_fsspec(path: pathlib.Path, fixture_id: str) -> xr.DataArray: with open(path, "rb") as f: fs.pipe(key, f.read()) try: - da = open_geotiff(f"memory://{key}") + da = open_geotiff(f"memory://{key}", **_OPTIN) finally: # Best-effort cleanup; fsspec memory store deletions are # idempotent. The cloud-eager path has already pulled the @@ -718,7 +731,7 @@ def _reference_for( ) -> xr.DataArray: fid = entry["id"] if fid not in cache: - cache[fid] = open_geotiff(str(_fixture_path(entry))) + cache[fid] = open_geotiff(str(_fixture_path(entry)), **_OPTIN) return cache[fid] diff --git a/xrspatial/geotiff/tests/test_decompression_caps.py b/xrspatial/geotiff/tests/test_decompression_caps.py index fc4e2c521..5954c8e43 100644 --- a/xrspatial/geotiff/tests/test_decompression_caps.py +++ b/xrspatial/geotiff/tests/test_decompression_caps.py @@ -278,8 +278,11 @@ def test_lz4_bomb_rejected(tmp_path): width=_DECLARED_W, height=_DECLARED_H) path = tmp_path / "lz4_bomb.tif" path.write_bytes(tiff) + # LZ4 is the Experimental read tier (PR 4 of epic #2340); pass the + # opt-in so the test exercises the bomb cap rather than the codec + # gate. with pytest.raises(ValueError, match="exceed"): - read_to_array(str(path)) + read_to_array(str(path), allow_experimental_codecs=True) def test_packbits_bomb_rejected(tmp_path): diff --git a/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py b/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py index 45ab772a3..e8c4259e0 100644 --- a/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py +++ b/xrspatial/geotiff/tests/test_eager_bigtiff_overhead_exact_1905.py @@ -112,7 +112,7 @@ def test_eager_writer_round_trip_with_large_gdal_metadata(tmp_path): ) da = _make_4x4_float32(gdal_metadata_xml=metadata_xml) path = str(tmp_path / "large_metadata_1905.tif") - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) rt = open_geotiff(path) np.testing.assert_array_equal(rt.values, da.values) @@ -153,7 +153,7 @@ def _huge_overhead(tags): writer_mod, "_compute_classic_ifd_overhead", _huge_overhead, ) - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) with open(path, "rb") as f: head = f.read(8) assert head[:2] == b"II" @@ -165,7 +165,7 @@ def test_eager_writer_keeps_classic_when_overhead_fits(tmp_path): """Sanity check: the default 4x4 file fits classic comfortably.""" da = _make_4x4_float32() path = str(tmp_path / "classic_1905.tif") - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) with open(path, "rb") as f: head = f.read(8) magic = struct.unpack_from("" da = _make_4x4_float32(gdal_metadata_xml=metadata_xml) path = str(tmp_path / "match_actual_1905.tif") - to_geotiff(da, path) + to_geotiff(da, path, allow_experimental_codecs=True) # Parse the file to find IFD offset, then measure the bytes between # IFD start and the first pixel-data offset to confirm the writer diff --git a/xrspatial/geotiff/tests/test_experimental_internal_optin_2352.py b/xrspatial/geotiff/tests/test_experimental_internal_optin_2352.py new file mode 100644 index 000000000..84375cca4 --- /dev/null +++ b/xrspatial/geotiff/tests/test_experimental_internal_optin_2352.py @@ -0,0 +1,479 @@ +"""Opt-in gates for experimental and internal-only GeoTIFF paths (#2352). + +Background +---------- +Issue #2340 tiers the GeoTIFF release contract into Stable / Advanced / +Experimental / Internal-only. PR 1 of the epic (#2348) lined up the +``SUPPORTED_FEATURES`` constant with that tier shape. PR 4 (this issue, +#2352) extends the writer-side opt-in shape onto every Experimental / +Internal-only path that did not yet have one. + +What this file pins +------------------- +* Read-side codec gate (LERC / JPEG2000 / J2K / LZ4 / JPEG-in-TIFF): + ``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` + reject a source whose Compression tag selects an experimental or + internal-only codec unless the caller passes the matching flag + (``allow_experimental_codecs=True`` or ``allow_internal_only_jpeg= + True``). The writer already enforces these flags; the read side + matches the same shape. +* Writer rich-tag gate: ``to_geotiff`` / ``write_geotiff_gpu`` reject + a DataArray whose attrs carry ``gdal_metadata_xml`` or ``extra_tags`` + unless the caller passes ``allow_experimental_codecs=True``. Both + attrs ride the Experimental tier in ``SUPPORTED_FEATURES`` because + the bytes are written verbatim and downstream interop depends on the + payload. +* Each rejection message names the missing flag, the feature, and the + tier so the call site can be fixed in one line. +* Signature checks pin the new kwargs on the public entry points. +""" +from __future__ import annotations + +import inspect +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import (open_geotiff, read_geotiff_dask, read_geotiff_gpu, + to_geotiff, write_geotiff_gpu) +from xrspatial.geotiff._attrs import (_COMPRESSION_TAG_TO_NAME, _validate_read_codec_optin, + _validate_write_rich_tag_optin) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_float32_da(h: int = 32, w: int = 32) -> xr.DataArray: + """Small float32 raster used for the write-side gate.""" + rng = np.random.RandomState(0) + arr = rng.standard_normal((h, w)).astype(np.float32) + return xr.DataArray( + arr, + dims=("y", "x"), + coords={ + "y": np.arange(h, dtype=np.float64), + "x": np.arange(w, dtype=np.float64), + }, + attrs={'crs': 4326}, + ) + + +def _write_test_tif(tmp_path, compression: str, + *, allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + dtype=np.float32): + """Write a small file with the requested codec so the read side has + a real target. Returns the file path. Skips when the optional + encoder dependency is missing.""" + h = w = 32 + rng = np.random.RandomState(0) + if dtype == np.uint8: + arr = rng.randint(0, 256, size=(h, w), dtype=np.uint8) + else: + arr = rng.standard_normal((h, w)).astype(dtype) + da = xr.DataArray( + arr, + dims=("y", "x"), + coords={ + "y": np.arange(h, dtype=np.float64), + "x": np.arange(w, dtype=np.float64), + }, + attrs={'crs': 4326}, + ) + path = os.path.join(str(tmp_path), f'src_{compression}.tif') + try: + to_geotiff( + da, path, compression=compression, + allow_experimental_codecs=allow_experimental_codecs, + allow_internal_only_jpeg=allow_internal_only_jpeg, + ) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"optional encoder missing for {compression}: {e}") + return path + + +# --------------------------------------------------------------------------- +# Signature tests: every public read entry point exposes the new flags. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "fn", [open_geotiff, read_geotiff_dask, read_geotiff_gpu]) +def test_read_signature_has_codec_optin(fn): + """``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` + expose ``allow_experimental_codecs=False`` and + ``allow_internal_only_jpeg=False``. The default is ``False`` so + accidental removal of the gate would surface here. + """ + params = inspect.signature(fn).parameters + assert 'allow_experimental_codecs' in params, fn.__name__ + assert params['allow_experimental_codecs'].default is False + assert 'allow_internal_only_jpeg' in params, fn.__name__ + assert params['allow_internal_only_jpeg'].default is False + + +# --------------------------------------------------------------------------- +# Helper unit tests: the validators raise on the codec / attrs surface +# without an opt-in and accept the call with one. These do not require +# disk IO. +# --------------------------------------------------------------------------- + + +def test_validate_read_codec_optin_accepts_stable_codecs(): + """A stable codec (deflate / none / lzw / zstd / packbits) does not + require any opt-in regardless of the flag values. + """ + for tag in (1, 5, 8, 32773, 50000): # none, lzw, deflate, packbits, zstd + _validate_read_codec_optin( + tag, + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + + +@pytest.mark.parametrize("codec_name", ['lerc', 'jpeg2000', 'lz4']) +def test_validate_read_codec_optin_rejects_experimental(codec_name): + """LERC / JPEG2000 / LZ4 raise ``ValueError`` whose message names + ``allow_experimental_codecs`` so the caller can find the flag from + the error itself. + """ + tag = { + v: k for k, v in _COMPRESSION_TAG_TO_NAME.items() + }[codec_name] + with pytest.raises(ValueError, match='allow_experimental_codecs'): + _validate_read_codec_optin( + tag, + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + + +def test_validate_read_codec_optin_rejects_jpeg(): + """JPEG-in-TIFF raises ``ValueError`` whose message names + ``allow_internal_only_jpeg`` -- the dedicated flag, NOT + ``allow_experimental_codecs``. The two flags do not collapse. + """ + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + _validate_read_codec_optin( + 7, # COMPRESSION_JPEG + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + # ``allow_experimental_codecs=True`` does NOT cover JPEG. + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + _validate_read_codec_optin( + 7, + allow_experimental_codecs=True, + allow_internal_only_jpeg=False, + ) + + +def test_validate_read_codec_optin_accepts_jpeg_with_flag(): + """With ``allow_internal_only_jpeg=True`` the read-side gate lets + JPEG-in-TIFF through. + """ + _validate_read_codec_optin( + 7, + allow_experimental_codecs=False, + allow_internal_only_jpeg=True, + ) + + +@pytest.mark.parametrize("codec_name", ['lerc', 'jpeg2000', 'lz4']) +def test_validate_read_codec_optin_accepts_experimental_with_flag(codec_name): + """With ``allow_experimental_codecs=True`` the read-side gate lets + LERC / JPEG2000 / LZ4 through. + """ + tag = { + v: k for k, v in _COMPRESSION_TAG_TO_NAME.items() + }[codec_name] + _validate_read_codec_optin( + tag, + allow_experimental_codecs=True, + allow_internal_only_jpeg=False, + ) + + +def test_validate_read_codec_optin_message_names_feature_and_tier(): + """The rejection message names the codec, the missing flag, the + SUPPORTED_FEATURES tier, and the parent epic so a reader can fix + the call site without grepping the source. + """ + with pytest.raises(ValueError) as exc: + _validate_read_codec_optin( + 34887, # LERC + allow_experimental_codecs=False, + allow_internal_only_jpeg=False, + ) + msg = str(exc.value) + assert 'lerc' in msg + assert 'allow_experimental_codecs' in msg + assert 'experimental' in msg + assert '#2340' in msg + + +def test_validate_write_rich_tag_optin_accepts_empty_attrs(): + """No rich-tag attrs and no opt-in: the writer gate is a no-op.""" + _validate_write_rich_tag_optin( + {}, allow_experimental_codecs=False) + + +def test_validate_write_rich_tag_optin_rejects_gdal_metadata_xml(): + """``attrs['gdal_metadata_xml']`` triggers the gate; rejection + message names the attr and the opt-in flag. + """ + with pytest.raises(ValueError, match='gdal_metadata_xml'): + _validate_write_rich_tag_optin( + {'gdal_metadata_xml': ''}, + allow_experimental_codecs=False, + ) + + +def test_validate_write_rich_tag_optin_rejects_extra_tags(): + """``attrs['extra_tags']`` triggers the gate; rejection message + names the attr and the opt-in flag. + """ + with pytest.raises(ValueError, match='extra_tags'): + _validate_write_rich_tag_optin( + {'extra_tags': [(700, 1, 0, b'')]}, + allow_experimental_codecs=False, + ) + + +def test_validate_write_rich_tag_optin_accepts_with_flag(): + """``allow_experimental_codecs=True`` accepts both rich-tag attrs.""" + _validate_write_rich_tag_optin( + {'gdal_metadata_xml': '', + 'extra_tags': [(700, 1, 0, b'')]}, + allow_experimental_codecs=True, + ) + + +def test_validate_write_rich_tag_optin_exempts_round_trip(): + """An attrs dict carrying the ``_xrspatial_geotiff_contract`` marker + came from an xrspatial read; round-tripping it back through + ``to_geotiff`` is the canonical contract from #1984 and must not + require a new flag. The marker is the gate's exemption signal. + """ + _validate_write_rich_tag_optin( + {'gdal_metadata_xml': '', + 'extra_tags': [(700, 1, 0, b'')], + '_xrspatial_geotiff_contract': 2}, + allow_experimental_codecs=False, + ) + + +# --------------------------------------------------------------------------- +# Read end-to-end: write an experimental-codec file via the existing +# writer opt-in, then assert open_geotiff refuses to read it without the +# matching opt-in and succeeds with it. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("codec", ['lerc', 'lz4']) +def test_open_geotiff_rejects_experimental_codec(tmp_path, codec): + """A file written with LERC or LZ4 raises ``ValueError`` on read + by default; the message names ``allow_experimental_codecs``. + """ + path = _write_test_tif( + tmp_path, codec, allow_experimental_codecs=True) + with pytest.raises(ValueError, match='allow_experimental_codecs'): + open_geotiff(path) + + +@pytest.mark.parametrize("codec", ['lerc', 'lz4']) +def test_open_geotiff_accepts_experimental_codec_with_flag(tmp_path, codec): + """``allow_experimental_codecs=True`` lets the read through and + returns a DataArray with the expected shape. + """ + path = _write_test_tif( + tmp_path, codec, allow_experimental_codecs=True) + try: + da = open_geotiff(path, allow_experimental_codecs=True) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"optional decoder missing for {codec}: {e}") + assert da.shape == (32, 32) + + +def test_open_geotiff_rejects_jpeg2000(tmp_path): + """JPEG2000 is experimental and requires the same opt-in as LERC / + LZ4. ``j2k`` is an alias the writer maps to the same codec, so + only one source file is needed. + """ + path = _write_test_tif( + tmp_path, 'jpeg2000', allow_experimental_codecs=True, + dtype=np.uint8) + with pytest.raises(ValueError, match='allow_experimental_codecs'): + open_geotiff(path) + + +def test_open_geotiff_rejects_jpeg_internal_only(tmp_path): + """JPEG-in-TIFF is internal-only; the dedicated flag + ``allow_internal_only_jpeg`` is the gate. Mirrors the writer side + where ``allow_experimental_codecs`` does NOT cover JPEG. + """ + path = _write_test_tif( + tmp_path, 'jpeg', allow_internal_only_jpeg=True, + dtype=np.uint8) + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + open_geotiff(path) + # ``allow_experimental_codecs=True`` does NOT unlock JPEG-in-TIFF + # on the read side either. + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + open_geotiff(path, allow_experimental_codecs=True) + + +def test_open_geotiff_accepts_jpeg_internal_only_with_flag(tmp_path): + """``allow_internal_only_jpeg=True`` lets the read through.""" + path = _write_test_tif( + tmp_path, 'jpeg', allow_internal_only_jpeg=True, + dtype=np.uint8) + da = open_geotiff(path, allow_internal_only_jpeg=True) + assert da.shape == (32, 32) + + +def test_read_geotiff_dask_rejects_experimental_codec(tmp_path): + """The dask read path fires the gate at graph build, before any + chunk task is scheduled. + """ + path = _write_test_tif( + tmp_path, 'lz4', allow_experimental_codecs=True) + with pytest.raises(ValueError, match='allow_experimental_codecs'): + read_geotiff_dask(path, chunks=16) + + +def test_read_geotiff_dask_accepts_experimental_codec_with_flag(tmp_path): + """``allow_experimental_codecs=True`` lets the dask graph build.""" + path = _write_test_tif( + tmp_path, 'lz4', allow_experimental_codecs=True) + try: + da = read_geotiff_dask( + path, chunks=16, allow_experimental_codecs=True) + except (ImportError, ModuleNotFoundError) as e: + pytest.skip(f"optional decoder missing: {e}") + assert da.shape == (32, 32) + + +# --------------------------------------------------------------------------- +# Writer rich-tag attrs: gdal_metadata_xml / extra_tags require the +# experimental opt-in. +# --------------------------------------------------------------------------- + + +def test_to_geotiff_rejects_gdal_metadata_xml_without_flag(tmp_path): + """A DataArray whose attrs carry ``gdal_metadata_xml`` is rejected + by ``to_geotiff`` unless the caller passes + ``allow_experimental_codecs=True``. The message names the attr. + """ + da = _make_float32_da() + da.attrs['gdal_metadata_xml'] = ( + '0' + '' + ) + path = os.path.join(str(tmp_path), 'rich_xml.tif') + with pytest.raises(ValueError, match='gdal_metadata_xml'): + to_geotiff(da, path) + + +def test_to_geotiff_rejects_extra_tags_without_flag(tmp_path): + """Same shape as the ``gdal_metadata_xml`` case but for + ``attrs['extra_tags']``. Both surfaces feed the same on-disk path + and ride the same Experimental tier. + """ + da = _make_float32_da() + da.attrs['extra_tags'] = [(700, 1, 0, b'')] + path = os.path.join(str(tmp_path), 'rich_extra.tif') + with pytest.raises(ValueError, match='extra_tags'): + to_geotiff(da, path) + + +def test_to_geotiff_accepts_rich_tags_with_flag(tmp_path): + """``allow_experimental_codecs=True`` lets both attrs through and + the write completes. + """ + da = _make_float32_da() + da.attrs['gdal_metadata_xml'] = ( + '0' + '' + ) + da.attrs['extra_tags'] = [(700, 1, 0, b'')] + path = os.path.join(str(tmp_path), 'rich_optin.tif') + out = to_geotiff(da, path, allow_experimental_codecs=True) + assert out == path + assert os.path.exists(path) + + +def test_write_geotiff_gpu_rejects_rich_tags_without_flag(tmp_path): + """The GPU writer mirrors ``to_geotiff`` so the two writers expose + a consistent surface; the rejection fires before any GPU work and + does not depend on cupy being installed. + """ + da = _make_float32_da() + da.attrs['gdal_metadata_xml'] = ( + '0' + '' + ) + path = os.path.join(str(tmp_path), 'rich_gpu.tif') + with pytest.raises(ValueError, match='gdal_metadata_xml'): + write_geotiff_gpu(da, path) + + +# --------------------------------------------------------------------------- +# Already-gated paths: pin the existing behaviour so a future refactor +# that drops a flag fails this file rather than passing in CI. +# --------------------------------------------------------------------------- + + +def test_allow_rotated_default_raises_already_gated(tmp_path): + """``allow_rotated=False`` (the default) raises on a rotated read. + Pinned here so the Experimental + Internal-only opt-in inventory + in PR 4 lives next to the existing ``allow_rotated`` / + ``allow_unparseable_crs`` gates and a future refactor cannot drop + one of them without failing this file. + + The PR 1 audit (#2348) demoted ``reader.allow_rotated`` from + advanced to experimental, so the gate already matches the epic. + """ + # A signature pin is enough -- the actual rotated-read behaviour is + # covered by the existing test_allow_rotated_geotiff_2115.py suite. + params = inspect.signature(open_geotiff).parameters + assert 'allow_rotated' in params + assert params['allow_rotated'].default is False + + +def test_allow_unparseable_crs_default_raises_already_gated(): + """``allow_unparseable_crs=False`` (the default) raises on an + unparseable CRS string. The PR 1 audit (#2348) demoted + ``reader.allow_unparseable_crs`` to experimental, so the gate + already matches the epic. Pin the signature here next to the new + PR 4 opt-ins so the inventory lives in one file. + """ + params = inspect.signature(open_geotiff).parameters + assert 'allow_unparseable_crs' in params + assert params['allow_unparseable_crs'].default is False + + +def test_gpu_read_requires_explicit_optin(): + """GPU read is Experimental in ``SUPPORTED_FEATURES`` and the + opt-in is the boolean ``gpu=True`` kwarg. Pin the default here so + a future refactor cannot flip GPU read to auto-on. + """ + params = inspect.signature(open_geotiff).parameters + assert 'gpu' in params + assert params['gpu'].default is False + + +def test_gpu_write_requires_explicit_optin(): + """GPU write is Experimental and gates on ``gpu=True`` / + ``gpu=None`` (auto-detect from CuPy data). Pin the default here: + ``None`` is the documented auto-detect sentinel and ``False`` / + ``True`` are the explicit selectors. A flip to ``True`` default + would silently route every NumPy write through the GPU pipeline. + """ + params = inspect.signature(to_geotiff).parameters + assert 'gpu' in params + assert params['gpu'].default is None diff --git a/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py b/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py index fc9038c22..dfd5d764b 100644 --- a/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py +++ b/xrspatial/geotiff/tests/test_extra_tags_safe_filter_1657.py @@ -214,7 +214,10 @@ def test_writer_filters_caller_supplied_newsubfiletype(tmp_path): }, ) out = tmp_path / 'with_dangerous_extra_tag.tif' - to_geotiff(da, str(out)) + # Rich-tag extra_tags is the Experimental write surface (PR 4 of + # epic #2340); pass the opt-in so the filter logic this test + # exercises gets to run. + to_geotiff(da, str(out), allow_experimental_codecs=True) sft = _read_subfile_type(out) assert sft in (None, 0), ( f"Writer accepted dangerous extra_tags[254]={sft}, expected None/0." @@ -237,7 +240,10 @@ def test_writer_filters_caller_supplied_subifds(tmp_path): }, ) out = tmp_path / 'with_subifds.tif' - to_geotiff(da, str(out)) + # Rich-tag extra_tags is the Experimental write surface (PR 4 of + # epic #2340); pass the opt-in so the filter logic this test + # exercises gets to run. + to_geotiff(da, str(out), allow_experimental_codecs=True) with tifffile.TiffFile(str(out)) as tf: sub = tf.pages[0].tags.get('SubIFDs') assert sub is None, ( @@ -264,7 +270,10 @@ def test_writer_keeps_benign_extra_tags(tmp_path): }, ) out = tmp_path / 'mixed_extra_tags.tif' - to_geotiff(da, str(out)) + # Rich-tag extra_tags is the Experimental write surface (PR 4 of + # epic #2340); pass the opt-in so the filter logic this test + # exercises gets to run. + to_geotiff(da, str(out), allow_experimental_codecs=True) with tifffile.TiffFile(str(out)) as tf: page = tf.pages[0] assert page.tags.get('NewSubfileType') is None diff --git a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py index 9598b1a96..c8f84dc6e 100644 --- a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py +++ b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py @@ -152,7 +152,10 @@ def test_round_trip_property(tmp_path_factory, inputs): allow_experimental_codecs=True, ) - got = open_geotiff(path, dtype=str(da.dtype)) + # Mirror the write-side opt-in (PR 4 of epic #2340) so the read + # accepts the experimental codec. + got = open_geotiff(path, dtype=str(da.dtype), + allow_experimental_codecs=True) # Reader may add a leading band axis; squeeze for the 2D comparison. got_arr = got.values diff --git a/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py index c02a1bc60..4d86a5386 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_dask_numpy_1930.py @@ -40,7 +40,13 @@ pytest.importorskip("rasterio") pytest.importorskip("dask") -from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff import open_geotiff + +# PR 4 of epic #2340: the golden corpus has experimental-codec +# and JPEG-in-TIFF entries; the parity check is orthogonal to the +# read-side opt-in so pass both flags through every open. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} + # noqa: E402 from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -148,7 +154,7 @@ def test_dask_numpy_parity(manifest_entry: dict) -> None: f"`python -m xrspatial.geotiff.tests.golden_corpus.generate` " f"to materialise the full corpus" ) - candidate = open_geotiff(str(path), chunks=CHUNK_SIZE) + candidate = open_geotiff(str(path), chunks=CHUNK_SIZE, **_OPTIN) # Wire the oracle's overview-IFD check when the fixture carries # overviews. The factory threads the dask backend's options through # so each overview level reads via the same windowed-decode path @@ -211,7 +217,7 @@ def test_dask_candidate_is_actually_chunked() -> None: f"no eligible fixture is at least {2 * CHUNK_SIZE}x{2 * CHUNK_SIZE}" ) entry = eligible[0] - da = open_geotiff(str(_fixture_path(entry)), chunks=CHUNK_SIZE) + da = open_geotiff(str(_fixture_path(entry)), chunks=CHUNK_SIZE, **_OPTIN) assert hasattr(da.data, "dask"), ( f"expected a dask-backed DataArray for {entry['id']!r}, " f"got data of type {type(da.data).__name__}" diff --git a/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py index c4b7986bd..46495ce1f 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_eager_numpy_1930.py @@ -68,7 +68,13 @@ pytest.importorskip("yaml") pytest.importorskip("rasterio") -from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff import open_geotiff + +# PR 4 of epic #2340: the golden corpus has experimental-codec +# and JPEG-in-TIFF entries; the parity check is orthogonal to the +# read-side opt-in so pass both flags through every open. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} + # noqa: E402 from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._marks import fast_slow_marks_for # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -156,7 +162,7 @@ def test_eager_numpy_parity(manifest_entry: dict) -> None: f"`python -m xrspatial.geotiff.tests.golden_corpus.generate` " f"to materialise the full corpus" ) - candidate = open_geotiff(str(path)) + candidate = open_geotiff(str(path), **_OPTIN) # When the fixture carries pyramid overviews, hand the oracle a # factory it can use to fetch each overview level via the same # backend (eager numpy). The oracle introspects the rasterio source @@ -168,7 +174,7 @@ def test_eager_numpy_parity(manifest_entry: dict) -> None: # base-IFD comparison still runs. overviews = manifest_entry.get("overviews") or [] factory = ( - (lambda lvl, p=path: open_geotiff(str(p), overview_level=lvl)) + (lambda lvl, p=path: open_geotiff(str(p), overview_level=lvl, **_OPTIN)) if overviews and fixture_id not in _OVERVIEW_READER_GAPS else None ) diff --git a/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py b/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py index f98db2ad1..dcef967bb 100644 --- a/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py +++ b/xrspatial/geotiff/tests/test_golden_corpus_fsspec_1930.py @@ -59,7 +59,11 @@ import numpy as np # noqa: E402 -from xrspatial.geotiff import open_geotiff # noqa: E402 +from xrspatial.geotiff import open_geotiff + +# PR 4 of epic #2340: corpus has experimental + jpeg entries. +_OPTIN = {"allow_experimental_codecs": True, "allow_internal_only_jpeg": True} + # noqa: E402 from xrspatial.geotiff.tests.golden_corpus import generate # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._marks import fast_slow_marks_for # noqa: E402 from xrspatial.geotiff.tests.golden_corpus._oracle import compare_to_oracle # noqa: E402 @@ -200,14 +204,14 @@ def test_fsspec_parity(manifest_entry: dict, memory_fs_clean) -> None: payload = f.read() url = _serve_via_memory(payload, fixture_id) - candidate = open_geotiff(url) + candidate = open_geotiff(url, **_OPTIN) # When the fixture carries pyramid overviews, hand the oracle a # factory it can use to fetch each overview level via the same # cloud-eager path. The overview-IFD scan also goes through # ``_CloudSource``, so any divergence there shows up here. overviews = manifest_entry.get("overviews") or [] factory = ( - (lambda level, u=url: open_geotiff(u, overview_level=level)) + (lambda level, u=url: open_geotiff(u, overview_level=level, **_OPTIN)) if overviews and fixture_id not in _OVERVIEW_READER_GAPS else None ) @@ -265,7 +269,7 @@ def test_fsspec_candidate_is_actually_numpy(memory_fs_clean) -> None: with open(path, "rb") as f: payload = f.read() url = _serve_via_memory(payload, entry["id"]) - da = open_geotiff(url) + da = open_geotiff(url, **_OPTIN) assert isinstance(da.data, np.ndarray), ( f"expected a numpy.ndarray for the fsspec eager path on " f"{entry['id']!r}, got {type(da.data).__name__}" diff --git a/xrspatial/geotiff/tests/test_jpeg.py b/xrspatial/geotiff/tests/test_jpeg.py index 67a79434a..34229c6f9 100644 --- a/xrspatial/geotiff/tests/test_jpeg.py +++ b/xrspatial/geotiff/tests/test_jpeg.py @@ -76,7 +76,7 @@ def test_grayscale_tiled(self, tmp_path): write(expected, path, compression='jpeg', tiled=True, tile_size=16, allow_internal_only_jpeg=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == expected.shape assert arr.dtype == np.uint8 # JPEG is lossy, check approximate @@ -89,7 +89,7 @@ def test_grayscale_stripped(self, tmp_path): write(expected, path, compression='jpeg', tiled=False, allow_internal_only_jpeg=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == expected.shape assert np.abs(arr.astype(int) - expected.astype(int)).mean() < 10 @@ -105,7 +105,7 @@ def test_rgb_tiled(self, tmp_path): write(expected, path, compression='jpeg', tiled=True, tile_size=16, allow_internal_only_jpeg=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == expected.shape assert np.abs(arr.astype(int) - expected.astype(int)).mean() < 10 @@ -262,7 +262,7 @@ def test_tiled_ycbcr_jpeg(self, tmp_path): assert ifds[0].jpeg_tables is not None assert ifds[0].jpeg_tables[:2] == b'\xff\xd8' - arr, _ = read_to_array(path) + arr, _ = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == (size, size, 3) assert arr.dtype == np.uint8 @@ -289,7 +289,7 @@ def test_tiled_grayscale_jpeg(self, tmp_path): ) as dst: dst.write(gray, 1) - arr, _ = read_to_array(path) + arr, _ = read_to_array(path, allow_internal_only_jpeg=True) assert arr.shape == (size, size) with rio.open(path) as src: diff --git a/xrspatial/geotiff/tests/test_jpeg2000.py b/xrspatial/geotiff/tests/test_jpeg2000.py index bc664d8ce..d27b46982 100644 --- a/xrspatial/geotiff/tests/test_jpeg2000.py +++ b/xrspatial/geotiff/tests/test_jpeg2000.py @@ -91,7 +91,7 @@ def test_tiled_uint8(self, tmp_path): path = str(tmp_path / 'j2k_1048_tiled_uint8.tif') write(expected, path, compression='jpeg2000', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_tiled_uint16(self, tmp_path): @@ -102,7 +102,7 @@ def test_tiled_uint16(self, tmp_path): path = str(tmp_path / 'j2k_1048_tiled_uint16.tif') write(expected, path, compression='jpeg2000', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_stripped_uint8(self, tmp_path): @@ -113,7 +113,7 @@ def test_stripped_uint8(self, tmp_path): path = str(tmp_path / 'j2k_1048_stripped.tif') write(expected, path, compression='jpeg2000', tiled=False) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_with_geo_info(self, tmp_path): @@ -127,7 +127,7 @@ def test_with_geo_info(self, tmp_path): write(expected, path, compression='jpeg2000', tiled=True, tile_size=8, geo_transform=gt, crs_epsg=4326, nodata=0) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) assert geo.crs_epsg == 4326 @@ -147,7 +147,7 @@ def test_public_api_roundtrip(self, tmp_path): to_geotiff(da, path, compression='jpeg2000', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lerc.py b/xrspatial/geotiff/tests/test_lerc.py index af1638c9d..b5eb05f51 100644 --- a/xrspatial/geotiff/tests/test_lerc.py +++ b/xrspatial/geotiff/tests/test_lerc.py @@ -91,7 +91,7 @@ def test_tiled_float32(self, tmp_path): path = str(tmp_path / 'lerc_1052_tiled_f32.tif') write(expected, path, compression='lerc', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_tiled_uint8(self, tmp_path): @@ -102,7 +102,7 @@ def test_tiled_uint8(self, tmp_path): path = str(tmp_path / 'lerc_1052_tiled_u8.tif') write(expected, path, compression='lerc', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_stripped_float32(self, tmp_path): @@ -113,7 +113,7 @@ def test_stripped_float32(self, tmp_path): path = str(tmp_path / 'lerc_1052_stripped.tif') write(expected, path, compression='lerc', tiled=False) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_public_api_roundtrip(self, tmp_path): @@ -131,7 +131,10 @@ def test_public_api_roundtrip(self, tmp_path): to_geotiff(da, path, compression='lerc', allow_experimental_codecs=True) - result = open_geotiff(path) + # PR 4 of epic #2340: the read side also gates the LERC codec + # on ``allow_experimental_codecs=True`` so the open here passes + # the same opt-in the writer required. + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lerc_max_z_error.py b/xrspatial/geotiff/tests/test_lerc_max_z_error.py index 1524e8a63..ec32b96dc 100644 --- a/xrspatial/geotiff/tests/test_lerc_max_z_error.py +++ b/xrspatial/geotiff/tests/test_lerc_max_z_error.py @@ -53,7 +53,7 @@ def test_lossless_roundtrip_bit_exact(self, tmp_path): to_geotiff(da, path, compression='lerc', max_z_error=0.0, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, arr) @@ -78,7 +78,7 @@ def test_lossy_smaller_and_bounded(self, tmp_path): f"expected lossy file to be smaller, " f"got lossless={lossless_size} lossy={lossy_size}") - result = open_geotiff(lossy_path).values + result = open_geotiff(lossy_path, allow_experimental_codecs=True).values max_err = float(np.max(np.abs(result - arr))) assert max_err <= 0.05 + 1e-7, f"per-pixel error {max_err} exceeds budget" @@ -99,7 +99,7 @@ def test_dask_lerc_with_max_z_error(self, tmp_path): to_geotiff(da, path, compression='lerc', max_z_error=0.05, tile_size=32, allow_experimental_codecs=True) - result = open_geotiff(path).values + result = open_geotiff(path, allow_experimental_codecs=True).values max_err = float(np.max(np.abs(result - arr))) assert max_err <= 0.05 + 1e-7 @@ -128,5 +128,5 @@ def test_max_z_error_zero_with_other_codec_is_allowed(self, tmp_path): da = _make_dataarray(arr) path = str(tmp_path / 'zstd_default.tif') to_geotiff(da, path, compression='zstd', max_z_error=0.0) - result = open_geotiff(path).values + result = open_geotiff(path, allow_experimental_codecs=True).values np.testing.assert_array_equal(result, arr) diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask.py b/xrspatial/geotiff/tests/test_lerc_valid_mask.py index 5c708def4..011218558 100644 --- a/xrspatial/geotiff/tests/test_lerc_valid_mask.py +++ b/xrspatial/geotiff/tests/test_lerc_valid_mask.py @@ -160,7 +160,7 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=float("nan")) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) for r in range(8): for c in range(8): if (r, c) in invalid_positions: @@ -188,7 +188,7 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=-9999.0) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) for r in range(8): for c in range(8): if (r, c) in invalid_positions: @@ -215,7 +215,7 @@ def invalid_pred(a): write(arr, path, compression="lerc", tiled=True, tile_size=8, nodata=65535) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) for r in range(8): for c in range(8): if (r, c) in invalid_positions: @@ -232,5 +232,5 @@ def test_no_mask_roundtrip_bitexact(self, tmp_path): path = str(tmp_path / "lerc_no_mask.tif") write(arr, path, compression="lerc", tiled=True, tile_size=8) - out, _geo = read_to_array(path) + out, _geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(out, arr) diff --git a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py index 48d0d3d72..118eef86b 100644 --- a/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py +++ b/xrspatial/geotiff/tests/test_lowlevel_write_pushdown_2138.py @@ -340,7 +340,8 @@ def test_write_lerc_lossless_round_trip(tmp_path): arr = _make_float32_band() out = str(tmp_path / "tmp_2138_lerc_lossless.tif") _write(arr, out, compression="lerc", max_z_error=0.0) - decoded, _ = _read_to_array(out) + # LERC is the Experimental read tier (PR 4 of epic #2340). + decoded, _ = _read_to_array(out, allow_experimental_codecs=True) np.testing.assert_array_equal(decoded, arr) diff --git a/xrspatial/geotiff/tests/test_lz4.py b/xrspatial/geotiff/tests/test_lz4.py index d58a30d4a..956a3d29d 100644 --- a/xrspatial/geotiff/tests/test_lz4.py +++ b/xrspatial/geotiff/tests/test_lz4.py @@ -55,7 +55,7 @@ def test_tiled_uint8(self, tmp_path): path = str(tmp_path / 'lz4_1051_tiled_uint8.tif') write(expected, path, compression='lz4', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_tiled_float32(self, tmp_path): @@ -66,7 +66,7 @@ def test_tiled_float32(self, tmp_path): path = str(tmp_path / 'lz4_1051_tiled_f32.tif') write(expected, path, compression='lz4', tiled=True, tile_size=8) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_stripped_uint8(self, tmp_path): @@ -77,7 +77,7 @@ def test_stripped_uint8(self, tmp_path): path = str(tmp_path / 'lz4_1051_stripped.tif') write(expected, path, compression='lz4', tiled=False) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_with_predictor(self, tmp_path): @@ -89,7 +89,7 @@ def test_with_predictor(self, tmp_path): write(expected, path, compression='lz4', tiled=True, tile_size=8, predictor=True) - arr, geo = read_to_array(path) + arr, geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) def test_public_api_roundtrip(self, tmp_path): @@ -107,7 +107,7 @@ def test_public_api_roundtrip(self, tmp_path): to_geotiff(da, path, compression='lz4', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py index 326e0c224..e76013119 100644 --- a/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py @@ -76,7 +76,7 @@ def test_lz4_level_round_trip(self, level, tmp_path): to_geotiff(da, path, compression="lz4", compression_level=level, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) # lz4 is lossless: assert_array_equal, not assert_allclose. np.testing.assert_array_equal(result.values, da.values) @@ -88,7 +88,7 @@ def test_lz4_default_level_round_trip(self, tmp_path): path = str(tmp_path / "lz4_default.tif") to_geotiff(da, path, compression="lz4", allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, da.values) @@ -177,7 +177,7 @@ def test_lz4_dask_streaming_level_round_trip(self, level, tmp_path): to_geotiff(dask_da, path, compression="lz4", compression_level=level, tile_size=16, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, np_arr) @pytest.mark.parametrize("level", [-1, 17, 50]) diff --git a/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py b/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py index 88506b56a..ca81e003c 100644 --- a/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py +++ b/xrspatial/geotiff/tests/test_photometric_kwarg_1769.py @@ -121,7 +121,9 @@ def test_user_extra_tags_override_extra_samples_1769(tmp_path): ]}, ) path = str(tmp_path / 'override_extras_1769.tif') - to_geotiff(da, path, photometric='rgb') + # extra_tags is the Experimental write surface (PR 4 of epic #2340). + to_geotiff(da, path, photometric='rgb', + allow_experimental_codecs=True) ifd = _read_primary_ifd(path) assert ifd.get_value(TAG_PHOTOMETRIC) == 2 # RGB from kwarg @@ -142,7 +144,9 @@ def test_user_extra_tags_override_photometric_1769(tmp_path): ) path = str(tmp_path / 'override_photometric_1769.tif') # photometric='rgb' would otherwise emit Photometric=2. - to_geotiff(da, path, photometric='rgb') + # extra_tags is the Experimental write surface (PR 4 of epic #2340). + to_geotiff(da, path, photometric='rgb', + allow_experimental_codecs=True) ifd = _read_primary_ifd(path) assert ifd.get_value(TAG_PHOTOMETRIC) == 0 # MinIsWhite from override diff --git a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py index eb93e2ead..b880d8660 100644 --- a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py +++ b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py @@ -38,6 +38,12 @@ "missing_sources", "allow_rotated", "allow_unparseable_crs", + # PR 4 of epic #2340 added the experimental / internal-only codec + # opt-ins on the read side, mirroring the writer surface from #2137 + # / #1845. They sit after the other ``allow_*`` flags so the + # canonical order keeps the policy / typed-error gates grouped. + "allow_experimental_codecs", + "allow_internal_only_jpeg", "band_nodata", "mask_nodata", ) diff --git a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py index bd1e34795..76b55c02b 100644 --- a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py @@ -80,7 +80,7 @@ def test_lossless_round_trip(self, float_raster, dask_float_raster, # Tier 3 codec (issue #2137); opt in to exercise the encode path. to_geotiff(dask_float_raster, path, compression='lerc', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) # LERC with max_z_error=0 is lossless for float32 sources. np.testing.assert_array_equal(result.values, float_raster.values) @@ -92,7 +92,7 @@ def test_lossy_respects_max_z_error(self, float_raster, dask_float_raster, to_geotiff(dask_float_raster, path, compression='lerc', max_z_error=max_z, allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) max_diff = float(np.abs(result.values - float_raster.values).max()) assert max_diff <= max_z + 1e-7, ( f"LERC lossy stream write exceeded max_z_error budget: " @@ -114,8 +114,8 @@ def test_streaming_matches_eager(self, float_raster, dask_float_raster, to_geotiff(dask_float_raster, stream_path, compression='lerc', max_z_error=0.05, allow_experimental_codecs=True) - eager = open_geotiff(eager_path).values - stream = open_geotiff(stream_path).values + eager = open_geotiff(eager_path, allow_experimental_codecs=True).values + stream = open_geotiff(stream_path, allow_experimental_codecs=True).values np.testing.assert_array_equal(eager, stream) @@ -130,7 +130,7 @@ def test_round_trip(self, float_raster, dask_float_raster, tmp_path): # Tier 3 codec (issue #2137); opt in to exercise the encode path. to_geotiff(dask_float_raster, path, compression='lz4', allow_experimental_codecs=True) - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, float_raster.values) def test_streaming_matches_eager(self, float_raster, dask_float_raster, @@ -141,8 +141,8 @@ def test_streaming_matches_eager(self, float_raster, dask_float_raster, allow_experimental_codecs=True) to_geotiff(dask_float_raster, stream_path, compression='lz4', allow_experimental_codecs=True) - eager = open_geotiff(eager_path).values - stream = open_geotiff(stream_path).values + eager = open_geotiff(eager_path, allow_experimental_codecs=True).values + stream = open_geotiff(stream_path, allow_experimental_codecs=True).values np.testing.assert_array_equal(eager, stream) @@ -154,7 +154,7 @@ class TestStreamingPackbits: def test_round_trip_uint8(self, uint8_raster, dask_uint8_raster, tmp_path): path = str(tmp_path / 'stream_packbits.tif') to_geotiff(dask_uint8_raster, path, compression='packbits') - result = open_geotiff(path) + result = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(result.values, uint8_raster.values) def test_streaming_matches_eager(self, uint8_raster, dask_uint8_raster, @@ -163,8 +163,8 @@ def test_streaming_matches_eager(self, uint8_raster, dask_uint8_raster, stream_path = str(tmp_path / 'stream_packbits.tif') to_geotiff(uint8_raster, eager_path, compression='packbits') to_geotiff(dask_uint8_raster, stream_path, compression='packbits') - eager = open_geotiff(eager_path).values - stream = open_geotiff(stream_path).values + eager = open_geotiff(eager_path, allow_experimental_codecs=True).values + stream = open_geotiff(stream_path, allow_experimental_codecs=True).values np.testing.assert_array_equal(eager, stream) @@ -194,7 +194,7 @@ def test_cubic_overview_round_trip(self, tmp_path): overview_resampling='cubic') # Full-resolution data is preserved exactly. - full = open_geotiff(path) + full = open_geotiff(path, allow_experimental_codecs=True) np.testing.assert_array_equal(full.values, arr) # Overview is half-size and same dtype (the cubic branch ends in @@ -229,7 +229,7 @@ def test_cubic_distinct_from_mean(self, tmp_path): tiled=True, cog=True, overview_levels=[2], overview_resampling='mean') - cubic_ov = open_geotiff(cubic_path, overview_level=1).values - mean_ov = open_geotiff(mean_path, overview_level=1).values + cubic_ov = open_geotiff(cubic_path, allow_experimental_codecs=True, overview_level=1).values + mean_ov = open_geotiff(mean_path, allow_experimental_codecs=True, overview_level=1).values assert not np.array_equal(cubic_ov, mean_ov), ( "cubic and mean overview should differ on a non-linear ramp") diff --git a/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py index f964feccb..2c22fee4e 100644 --- a/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py +++ b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py @@ -41,7 +41,7 @@ def test_streaming_extra_tags_miniswhite_override_rejected_2073(tmp_path): out = tmp_path / 'tmp_2073_streaming_miniswhite.tif' with pytest.raises(ValueError) as excinfo: - to_geotiff(arr, str(out)) + to_geotiff(arr, str(out), allow_experimental_codecs=True) msg = str(excinfo.value) assert 'extra_tags' in msg @@ -62,7 +62,7 @@ def test_streaming_extra_tags_minisblack_override_roundtrips_2073(tmp_path): arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 1)] out = tmp_path / 'tmp_2073_streaming_minisblack.tif' - to_geotiff(arr, str(out)) + to_geotiff(arr, str(out), allow_experimental_codecs=True) assert os.path.exists(out) back = open_geotiff(str(out)) @@ -102,5 +102,5 @@ def test_streaming_extra_tags_miniswhite_override_multiband_not_rejected_2073( # to prevent" set. If it raises for an unrelated reason # (e.g. RGB-requires-3-bands check elsewhere), let the test # surface that as a real failure rather than swallowing it. - to_geotiff(arr, str(out)) + to_geotiff(arr, str(out), allow_experimental_codecs=True) assert os.path.exists(out) diff --git a/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py b/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py index 923d236fa..fbbf2b59d 100644 --- a/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py +++ b/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py @@ -113,7 +113,7 @@ def test_to_geotiff_jpeg_opt_in_emits_warning_and_writes(tmp_path): assert os.path.exists(path) assert os.path.getsize(path) > 0 # Internal reader still decodes the file. - decoded = open_geotiff(path) + decoded = open_geotiff(path, allow_internal_only_jpeg=True) assert decoded.shape == da.shape diff --git a/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py b/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py index 7222e4efd..2aa58f9e6 100644 --- a/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py +++ b/xrspatial/geotiff/tests/test_vrt_tiled_metadata_1606.py @@ -144,7 +144,9 @@ def test_gdal_metadata_xml_string_propagates_to_tiles(self, tmp_path): attrs={'crs': 4326, 'gdal_metadata_xml': xml}, ) vrt = str(tmp_path / 'gdal_xml.vrt') - to_geotiff(da, vrt, tile_size=16) + # Rich-tag write surface (PR 4 of epic #2340). + to_geotiff(da, vrt, tile_size=16, + allow_experimental_codecs=True) tile_da = open_geotiff(_first_tile_path(vrt)) # On read, the XML is re-parsed into a dict under # attrs['gdal_metadata']; the raw XML lands under @@ -175,7 +177,9 @@ def test_extra_tags_entry_propagates_to_tiles(self, tmp_path): }, ) vrt = str(tmp_path / 'extra_tags.vrt') - to_geotiff(da, vrt, tile_size=16) + # Rich-tag write surface (PR 4 of epic #2340). + to_geotiff(da, vrt, tile_size=16, + allow_experimental_codecs=True) tile_da = open_geotiff(_first_tile_path(vrt)) et = tile_da.attrs.get('extra_tags') or [] tag_ids = {entry[0] for entry in et} diff --git a/xrspatial/geotiff/tests/test_writer_matrix.py b/xrspatial/geotiff/tests/test_writer_matrix.py index 5bb1c8e0f..bdc9f11fc 100644 --- a/xrspatial/geotiff/tests/test_writer_matrix.py +++ b/xrspatial/geotiff/tests/test_writer_matrix.py @@ -71,7 +71,10 @@ def test_dtype_codec_roundtrip_stripped(tmp_path, dtype, codec): except (ImportError, ModuleNotFoundError) as e: pytest.skip(f"codec {codec} not available: {e}") - arr, _geo = read_to_array(path) + # Codecs in the experimental tier (LERC / J2K / LZ4) need the + # read-side opt-in too (PR 4 of epic #2340). Tier 1 codecs ignore + # the kwarg, so passing it unconditionally keeps the loop simple. + arr, _geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) assert arr.dtype == expected.dtype @@ -91,7 +94,7 @@ def test_dtype_codec_roundtrip_tiled(tmp_path, dtype, codec): except (ImportError, ModuleNotFoundError) as e: pytest.skip(f"codec {codec} not available: {e}") - arr, _geo = read_to_array(path) + arr, _geo = read_to_array(path, allow_experimental_codecs=True) np.testing.assert_array_equal(arr, expected) assert arr.dtype == expected.dtype From f928ae957205b68100b0b209573bd6d2d0d310fd Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 24 May 2026 05:16:41 -0700 Subject: [PATCH 2/2] Address self-review: explain Adobe Deflate collapse, contract-marker soft gate, and dask graph-build skip (#2352) Documents three design choices flagged during the self-review pass on #2356: * _COMPRESSION_TAG_TO_NAME collapses TIFF tag 32946 (Adobe Deflate) onto the stable codec.deflate entry on purpose. The comment makes the deliberate collapse vs accidental passthrough obvious to a future reviewer. * The rich-tag gate's _xrspatial_geotiff_contract exemption is a soft gate by design: forging the marker by hand would bypass it, but the alternative (gating every read-then-write call) would break the canonical attrs round-trip from #1984. * The dask read-side gate uses getattr(..., None) so a synthesised geo_info (non-TIFF source) skips the check rather than rejects. The comment documents the lockstep invariant (_ifd_compression stashed alongside _ifd_photometric / _ifd_samples_per_pixel on every TIFF source path) so the skip never silently bypasses a real TIFF read. --- xrspatial/geotiff/_attrs.py | 17 ++++++++++++++++- xrspatial/geotiff/_backends/dask.py | 7 +++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 4f1c7b452..3cc33d4dd 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -325,7 +325,14 @@ 7: 'jpeg', 8: 'deflate', 32773: 'packbits', - 32946: 'deflate', # adobe deflate, same codec + # Adobe Deflate (32946) decodes through the same zlib path as + # plain Deflate (8) and is collapsed onto the same codec name on + # purpose: both tags share the stable-tier classification in + # ``SUPPORTED_FEATURES`` (``codec.deflate``). A future Adobe- + # Deflate-specific tier would need its own ``codec.`` entry + # AND its own mapping line here; the collapse is deliberate, not + # a passthrough. + 32946: 'deflate', 34712: 'jpeg2000', 34887: 'lerc', 50000: 'zstd', @@ -435,6 +442,14 @@ def _validate_write_rich_tag_optin( # ``open_geotiff`` / ``read_geotiff_dask`` / ``read_geotiff_gpu`` # carries the contract marker. Writing it back is the canonical # round-trip and should not require a new flag (issue #1984). + # + # This is a soft gate by design: a caller who hand-builds an + # attrs dict with the contract marker could bypass it. Forging + # the marker is a deliberate act, and the alternative (gating + # every read-then-write call) would break the canonical attrs + # round-trip that downstream code already depends on. The hard + # guarantee is "fresh DataArrays carrying these attrs need the + # opt-in"; the soft exemption keeps round-trips frictionless. if '_xrspatial_geotiff_contract' in attrs: return triggered: list[str] = [] diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 3fc9ba758..94feee9f9 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -320,6 +320,13 @@ def read_geotiff_dask(source: str, *, # any chunk task is scheduled. The compression tag is stashed on # ``geo_info`` by ``_read_geo_info`` (local / fsspec) and by the # HTTP / fsspec branch above. PR 4 of epic #2340. + # + # ``getattr(..., None)`` is intentional: a synthesised geo_info + # (non-TIFF source) carries no compression tag, so the gate must + # skip rather than reject. Every TIFF source path stashes + # ``_ifd_compression`` in lockstep with ``_ifd_photometric`` and + # ``_ifd_samples_per_pixel`` so the skip never silently bypasses + # a real TIFF read. _compression_tag = getattr(geo_info, '_ifd_compression', None) if _compression_tag is not None: from .._attrs import _validate_read_codec_optin