diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 3a8f6c2f..e6d50e3c 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -72,7 +72,8 @@ transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry # noqa: F401 from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg # noqa: F401 from ._errors import (ConflictingCRSError, ConflictingNodataError, GeoTIFFAmbiguousMetadataError, - InconsistentGeoKeysError, InvalidCRSCodeError, MixedBandMetadataError, + InconsistentGeoKeysError, InvalidCRSCodeError, InvalidIntegerNodataError, + MixedBandMetadataError, NonRepresentableEPSGCRSError, NonUniformCoordsError, RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError, UnsupportedGeoTIFFFeatureError) @@ -111,6 +112,7 @@ 'GEOREF_STATUS_VALUES', 'InconsistentGeoKeysError', 'InvalidCRSCodeError', + 'InvalidIntegerNodataError', 'MixedBandMetadataError', 'NonRepresentableEPSGCRSError', 'NonUniformCoordsError', @@ -169,7 +171,8 @@ def _read_geo_info(source, *, overview_level: int | None = None, - allow_rotated: bool = False): + allow_rotated: bool = False, + allow_invalid_nodata: bool = False): """Read only the geographic metadata and image dimensions from a GeoTIFF. Returns (geo_info, height, width, dtype, n_bands) without reading pixel @@ -188,6 +191,10 @@ def _read_geo_info(source, *, overview_level: int | None = None, ``ModelTransformationTag`` reads as an ungeoreferenced pixel grid instead of raising ``RotatedTransformError`` (issues #2115, #2267). + allow_invalid_nodata : bool, optional + Forwarded to the geotag parser. When True, restores the legacy + no-op handling of non-finite / fractional ``GDAL_NODATA`` on + integer sources (#1774 follow-up, #2441). """ # ``_parse_cog_http_meta`` is imported from ``_cog_http`` directly # rather than re-routed through ``_reader`` because the @@ -230,6 +237,7 @@ def _read_geo_info(source, *, overview_level: int | None = None, _header, _ifd, geo_info, _ = _parse_cog_http_meta( _src, overview_level=overview_level, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, source_path=source) finally: _src.close() @@ -340,6 +348,7 @@ def _read_geo_info(source, *, overview_level: int | None = None, geo_info = extract_geo_info_with_overview_inheritance( ifd, ifds, data, header.byte_order, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, sidecar_origin=georef_origin) bps = resolve_bits_per_sample(ifd.bits_per_sample) file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format) @@ -379,6 +388,7 @@ def open_geotiff(source: str | BinaryIO, *, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -589,6 +599,18 @@ def open_geotiff(source: str | BinaryIO, *, raises ``InconsistentGeoKeysError``. Set to ``True`` to keep the legacy permissive behaviour for files known to carry quirky-but-trusted GeoKey layouts. + allow_invalid_nodata : bool, default False + [advanced] Read-side opt-in for integer-dtype sources whose + ``GDAL_NODATA`` tag is non-finite (``"NaN"``, ``"Inf"``, + ``"-Inf"``) or fractional (e.g. ``"3.5"`` on a ``uint16`` + file). The legacy reader (#1774) parsed the value into + ``attrs['nodata']`` and silently skipped the masking step, so + callers had no way to tell a silently-ignored sentinel from a + missing one. When ``False`` (the default), the read raises + ``InvalidIntegerNodataError``. Set to ``True`` to keep the + pre-rejection no-op behaviour for files known to carry such + sentinels (e.g. external tooling that writes ``"nan"`` on + integer outputs). See issue #2441 (#1774 follow-up). allow_experimental_codecs : bool, default False Read-side opt-in for sources compressed with the Tier 3 experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). @@ -738,6 +760,7 @@ def open_geotiff(source: str | BinaryIO, *, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=( allow_inconsistent_geokeys), + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, @@ -762,6 +785,7 @@ def open_geotiff(source: str | BinaryIO, *, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=( allow_inconsistent_geokeys), + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=( allow_experimental_codecs), allow_internal_only_jpeg=( @@ -779,6 +803,7 @@ def open_geotiff(source: str | BinaryIO, *, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=( allow_inconsistent_geokeys), + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=( allow_experimental_codecs), allow_internal_only_jpeg=( @@ -801,6 +826,7 @@ def open_geotiff(source: str | BinaryIO, *, source, window=window, overview_level=overview_level, band=band, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, **kwargs, diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index b1bd18a9..fd7f240c 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -43,6 +43,7 @@ def read_geotiff_dask(source: str, *, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -135,6 +136,12 @@ def read_geotiff_dask(source: str, *, to different EPSG codes). The default raises ``InconsistentGeoKeysError``. See ``open_geotiff`` for the full description (issue #2417). + allow_invalid_nodata : bool, default False + [advanced] Read-side opt-in for integer-dtype sources whose + ``GDAL_NODATA`` tag is non-finite or fractional. Default raises + ``InvalidIntegerNodataError`` at graph-build time. See + ``open_geotiff`` for the full description (#1774 follow-up, + #2441). allow_experimental_codecs : bool, default False [advanced] Read-side opt-in for Tier 3 experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). Fires at graph @@ -219,6 +226,7 @@ def read_geotiff_dask(source: str, *, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=allow_inconsistent_geokeys, + allow_invalid_nodata=allow_invalid_nodata, band_nodata=band_nodata, mask_nodata=mask_nodata, **vrt_kwargs, @@ -284,6 +292,7 @@ def read_geotiff_dask(source: str, *, ) = _parse_cog_http_meta( _src, overview_level=overview_level, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, source_path=source, return_sidecar=True, ) @@ -345,7 +354,8 @@ def read_geotiff_dask(source: str, *, from .. import _read_geo_info geo_info, full_h, full_w, file_dtype, n_bands = _read_geo_info( source, overview_level=overview_level, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata) # Reject experimental / internal-only codecs at graph build, before # any chunk task is scheduled. The compression tag is stashed on @@ -600,6 +610,8 @@ def read_geotiff_dask(source: str, *, http_meta_key=http_meta_key, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_invalid_nodata=( + allow_invalid_nodata), allow_experimental_codecs=( allow_experimental_codecs), allow_internal_only_jpeg=( @@ -626,6 +638,7 @@ def read_geotiff_dask(source: str, *, def _delayed_read_window(source, r0, c0, r1, c1, overview_level, nodata, band, *, target_dtype=None, http_meta_key=None, max_pixels=None, allow_rotated=False, + allow_invalid_nodata=False, allow_experimental_codecs=False, allow_internal_only_jpeg=False): """Dask-delayed function to read a single window. @@ -688,6 +701,7 @@ def _read(http_meta): overview_level=overview_level, band=band, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, **_r2a_kwargs) diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index 27c7d962..bd9238a4 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -76,6 +76,7 @@ def read_geotiff_gpu(source: str, *, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -210,6 +211,12 @@ def read_geotiff_gpu(source: str, *, raises ``InconsistentGeoKeysError``; ``True`` restores the legacy silent acceptance. See ``open_geotiff`` for the full description (issue #2417). + allow_invalid_nodata : bool, default False + [experimental] Read-side opt-in for integer-dtype sources whose + ``GDAL_NODATA`` tag is non-finite or fractional. Mirrors the CPU + eager and dask paths; default raises + ``InvalidIntegerNodataError``. See ``open_geotiff`` for the full + description (#1774 follow-up, #2441). allow_experimental_codecs : bool, default False [experimental] Read-side opt-in for Tier 3 experimental codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``). The GPU read path @@ -338,6 +345,7 @@ def read_geotiff_gpu(source: str, *, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=allow_inconsistent_geokeys, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, @@ -390,6 +398,7 @@ def read_geotiff_gpu(source: str, *, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=allow_inconsistent_geokeys, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, @@ -494,6 +503,7 @@ def read_geotiff_gpu(source: str, *, geo_info = extract_geo_info_with_overview_inheritance( ifd, ifds, data, header.byte_order, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, sidecar_origin=georef_origin) # Capture the Orientation tag (274) once so the post-decode flip # below picks it up for both the stripped fallback and the tiled @@ -589,6 +599,7 @@ def read_geotiff_gpu(source: str, *, source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) @@ -794,6 +805,7 @@ def _read_once(): source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) @@ -812,6 +824,7 @@ def _read_once(): source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) @@ -892,6 +905,7 @@ def _read_once(): source, overview_level=overview_level, window=window, band=band, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg) arr_gpu = cupy.asarray(arr_cpu) @@ -1056,6 +1070,7 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): @@ -1098,6 +1113,7 @@ def _read_geotiff_gpu_eager_via_cpu(source, *, dtype, window, overview_level, arr_cpu, geo_info = _read_to_array( source, window=window, overview_level=overview_level, band=band, max_pixels=max_pixels, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, ) @@ -1290,6 +1306,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, mask_nodata: bool = True): @@ -1390,6 +1407,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, geo_info = extract_geo_info_with_overview_inheritance( ifd, ifds, raw, header.byte_order, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, sidecar_origin=None, ) orientation = ifd.orientation @@ -1407,6 +1425,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=( allow_inconsistent_geokeys), + allow_invalid_nodata=allow_invalid_nodata, mask_nodata=mask_nodata, ) except Exception: @@ -1422,6 +1441,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=allow_inconsistent_geokeys, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, mask_nodata=mask_nodata, @@ -1450,6 +1470,7 @@ def _read_geotiff_gpu_chunked_gds(source, ifd, geo_info, header, *, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, mask_nodata: bool = True): """Build a Dask+CuPy graph that decodes each chunk disk->GPU. diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index d1ac359f..c6161cd9 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -129,6 +129,7 @@ def read_vrt(source: str, *, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, @@ -269,6 +270,12 @@ def read_vrt(source: str, *, thread per-GeoTIFF-source kwargs, so this kwarg is currently a no-op on the VRT path. See ``open_geotiff`` for the full description (issue #2417). + allow_invalid_nodata : bool, default False + [advanced] Read-side opt-in for integer-dtype source files whose + ``GDAL_NODATA`` tag is non-finite or fractional. Forwarded to + the per-source GeoTIFF reads built by the VRT planner. See + ``open_geotiff`` for the full description (#1774 follow-up, + #2441). allow_experimental_codecs : bool, default False [advanced] Read-side opt-in for Tier 3 experimental codecs in any source file referenced by the VRT. Forwarded to the @@ -451,6 +458,7 @@ def read_vrt(source: str, *, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, allow_inconsistent_geokeys=allow_inconsistent_geokeys, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, @@ -802,6 +810,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, allow_rotated: bool = False, allow_unparseable_crs: bool = False, allow_inconsistent_geokeys: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, diff --git a/xrspatial/geotiff/_cog_http.py b/xrspatial/geotiff/_cog_http.py index 801da956..6e7cd984 100644 --- a/xrspatial/geotiff/_cog_http.py +++ b/xrspatial/geotiff/_cog_http.py @@ -99,6 +99,7 @@ def _parse_cog_http_meta( overview_level: int | None = None, *, allow_rotated: bool = False, + allow_invalid_nodata: bool = False, source_path: str | None = None, max_cloud_bytes: int | None = None, return_sidecar: bool = False, @@ -260,7 +261,8 @@ def _parse_cog_http_meta( # its own geokeys is parsed against the sidecar bytes too. geo_info = extract_geo_info_with_overview_inheritance( ifd, ifds, header_bytes, header.byte_order, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata) # When the chosen IFD lives in the sidecar, return the sidecar's own # ``TIFFHeader`` so the per-chunk / eager decode step sees the byte # order of the file the bytes actually came from. A big-endian @@ -296,6 +298,7 @@ def _read_cog_http(url: str, overview_level: int | None = None, window: tuple[int, int, int, int] | None = None, *, allow_rotated: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, GeoInfo]: @@ -355,6 +358,7 @@ def _read_cog_http(url: str, overview_level: int | None = None, ) = _reader._parse_cog_http_meta( source, overview_level=overview_level, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, source_path=url, return_sidecar=True, ) diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py index 9d07e9fe..47a4aa78 100644 --- a/xrspatial/geotiff/_errors.py +++ b/xrspatial/geotiff/_errors.py @@ -147,6 +147,25 @@ class InconsistentGeoKeysError(GeoTIFFAmbiguousMetadataError): """ +class InvalidIntegerNodataError(GeoTIFFAmbiguousMetadataError): + """Integer-dtype source carries a non-finite or fractional GDAL_NODATA (#1774 follow-up). + + Raised when a GeoTIFF whose pixel buffer is an integer dtype declares + a ``GDAL_NODATA`` value the integer buffer cannot represent: NaN, +Inf, + -Inf, or a fractional float such as ``"3.5"`` on a ``uint16`` file. + The original #1774 fix parsed the sentinel into ``attrs['nodata']`` + and silently skipped the masking step, so callers had no way to tell + a silently-ignored sentinel from a missing one. The release contract + (see ``test_release_gate_negative_integer_nodata_float_promoted``) + upgrades that no-op to a typed rejection so the silent-coercion risk + surfaces at the read boundary. + + Pass ``allow_invalid_nodata=True`` on the public read entry points to + restore the pre-rejection no-op behaviour for files known to carry + such sentinels. + """ + + class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): """Can't classify an EPSG as geographic or projected on write (#2277). @@ -210,6 +229,7 @@ class UnsupportedGeoTIFFFeatureError(ValueError): "MixedBandMetadataError", "ConflictingCRSError", "ConflictingNodataError", + "InvalidIntegerNodataError", "VRTUnsupportedError", "UnknownCRSModelTypeError", "NonRepresentableEPSGCRSError", diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index 312a2e0b..13b7bda1 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -3,7 +3,7 @@ from dataclasses import dataclass, field -from ._dtypes import resolve_bits_per_sample +from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy from ._errors import NonRepresentableEPSGCRSError, RotatedTransformError, UnknownCRSModelTypeError from ._header import (IFD, TAG_BITS_PER_SAMPLE, TAG_COMPRESSION, TAG_EXTRA_SAMPLES, TAG_GDAL_METADATA, TAG_GDAL_NODATA, TAG_GEO_ASCII_PARAMS, @@ -826,7 +826,8 @@ def _parse_nodata_str(text: str | None) -> int | float | None: def extract_geo_info(ifd: IFD, data: bytes | memoryview, byte_order: str, *, - allow_rotated: bool = False) -> GeoInfo: + allow_rotated: bool = False, + allow_invalid_nodata: bool = False) -> GeoInfo: """Extract full geographic metadata from a parsed IFD. Parameters @@ -842,6 +843,13 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview, ``ModelTransformationTag`` is read as an ungeoreferenced pixel grid instead of raising ``RotatedTransformError`` (issue #2115, #2267). + allow_invalid_nodata : bool, optional + When False (default), reject integer-dtype sources whose + ``GDAL_NODATA`` tag is non-finite (NaN / Inf) or fractional + with :class:`InvalidIntegerNodataError`. When True, restore the + pre-rejection silent no-op behaviour. See issue #2441 (the + #1774 follow-up) and the release contract document for the full + rationale. Returns ------- @@ -967,6 +975,31 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview, nodata_str = ifd.nodata_str if nodata_str is not None: nodata = _parse_nodata_str(nodata_str) + # Reject non-finite / fractional GDAL_NODATA on integer sources + # (#1774 follow-up, #2441). Default behaviour raises + # :class:`InvalidIntegerNodataError`; ``allow_invalid_nodata=True`` + # restores the legacy silent no-op for callers whose files + # legitimately carry such sentinels. Derive the source dtype + # from the IFD's BitsPerSample + SampleFormat; the validator + # is a no-op on float dtypes and on ``None`` sentinels. + try: + src_dtype = tiff_dtype_to_numpy( + resolve_bits_per_sample(ifd.bits_per_sample), + ifd.sample_format, + ) + except (KeyError, ValueError): + # Unsupported / sub-byte BPS combos slip through to the + # downstream decode-time error so the user sees the right + # diagnostic; skip the nodata gate here. + src_dtype = None + if src_dtype is not None: + # Lazy import: ``_validation`` imports ``_coords`` which + # imports ``_geotags``; a top-level import would loop. + from ._validation import _validate_int_nodata_for_dtype + _validate_int_nodata_for_dtype( + nodata, src_dtype, + allow_invalid_nodata=allow_invalid_nodata, + ) # Parse GDALMetadata XML (tag 42112) gdal_metadata = None @@ -1125,6 +1158,7 @@ def extract_geo_info_with_overview_inheritance( byte_order: str, *, allow_rotated: bool = False, + allow_invalid_nodata: bool = False, sidecar_origin: dict | None = None, ) -> GeoInfo: """Extract geo metadata, inheriting from level 0 when the IFD lacks it. @@ -1205,7 +1239,8 @@ def extract_geo_info_with_overview_inheritance( sel_data, sel_byte_order = origin info = extract_geo_info(ifd, sel_data, sel_byte_order, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata) # Overview IFDs have NewSubfileType bit 0 set; mask IFDs (bit 2) and # page IFDs (bit 1) are filtered out by ``select_overview_ifd`` @@ -1242,7 +1277,8 @@ def extract_geo_info_with_overview_inheritance( base_data, base_byte_order = base_origin base_info = extract_geo_info(base_ifd, base_data, base_byte_order, - allow_rotated=allow_rotated) + allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata) # Inherit the per-IFD metadata that the COG writer emits only on the # level-0 IFD: GDAL_NODATA, GDAL_METADATA, x/y resolution, colormap, diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 7be3d02a..bc5331be 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -112,6 +112,7 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, max_cloud_bytes=_MAX_CLOUD_BYTES_SENTINEL, allow_rotated: bool = False, + allow_invalid_nodata: bool = False, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, ) -> tuple[np.ndarray, GeoInfo]: @@ -152,6 +153,7 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, source, overview_level=overview_level, band=band, max_pixels=max_pixels, window=window, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg) @@ -301,6 +303,7 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None, geo_info = extract_geo_info_with_overview_inheritance( ifd, ifds, data, header.byte_order, allow_rotated=allow_rotated, + allow_invalid_nodata=allow_invalid_nodata, sidecar_origin=georef_origin) # Orientation tag (274): values 2-8 mean the stored pixel order diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index 0fc35b9f..944b5989 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -27,8 +27,8 @@ from ._coords import _BAND_DIM_NAMES from ._errors import (ConflictingCRSError, ConflictingNodataError, InconsistentGeoKeysError, - MixedBandMetadataError, NonUniformCoordsError, RotatedTransformError, - UnparseableCRSError) + InvalidIntegerNodataError, MixedBandMetadataError, NonUniformCoordsError, + RotatedTransformError, UnparseableCRSError) from ._runtime import (_MISSING_SOURCES_SENTINEL, _ON_GPU_FAILURE_SENTINEL, _TIME_DIM_NAMES, _X_DIM_NAMES, _Y_DIM_NAMES) @@ -596,6 +596,78 @@ def _validate_nodata_arg(nodata) -> None: ) from e +def _validate_int_nodata_for_dtype( + nodata, + dtype, + *, + allow_invalid_nodata: bool = False, +) -> None: + """Reject a non-finite or fractional ``GDAL_NODATA`` on an integer source. + + Mirrors the masking-time gate in ``_nodata._sentinel_fits_dtype``: a + NaN / Inf / fractional ``GDAL_NODATA`` value cannot match any pixel + in an integer buffer, so the reader cannot honour the sentinel. The + legacy behaviour (#1774) parsed the value into ``attrs['nodata']`` + and silently skipped the masking step. The release contract upgrades + that silent no-op to a typed rejection so downstream code does not + quietly see the raw sentinel value in the buffer instead of NaN. + + Parameters + ---------- + nodata : scalar or None + The parsed ``GDAL_NODATA`` value (``int`` for plain integer + literals, ``float`` otherwise; ``None`` when the tag is absent). + ``None`` is a no-op. + dtype : numpy.dtype or None + The file's source pixel dtype. Float dtypes and ``None`` are + no-ops; only signed/unsigned integer kinds enter the validation + branch. + allow_invalid_nodata : bool, default False + Opt-in for the pre-rejection behaviour. ``True`` restores the + legacy silent no-op without raising; callers that need to read + files known to carry such sentinels (e.g. external tooling that + emits ``GDAL_NODATA="nan"`` on integer outputs) pass this flag. + + Raises + ------ + InvalidIntegerNodataError + When ``dtype`` is integer and ``nodata`` is non-finite (NaN / + Inf) or fractional. The message names ``nodata``, the integer + dtype, the sentinel value, the opt-in flag, and cites the + ``release_gate_geotiff`` contract document. + """ + if allow_invalid_nodata or nodata is None or dtype is None: + return + np_dtype = np.dtype(dtype) + if np_dtype.kind not in ('u', 'i'): + return + try: + as_float = float(nodata) + except (TypeError, ValueError): + # Non-numeric sentinel slipped past _parse_nodata_str (defensive; + # the parser returns int / float / None). Fall back to the + # validator's "no opinion" branch; downstream masking gates will + # treat it as a no-op the same way the legacy path did. + return + if np.isfinite(as_float) and as_float.is_integer(): + return + if not np.isfinite(as_float): + kind = "non-finite" + else: + kind = "fractional" + raise InvalidIntegerNodataError( + f"GeoTIFF declares GDAL_NODATA={nodata!r} ({kind}) on a " + f"{np_dtype.name} source. The masking step cannot match a " + f"{kind} sentinel against an integer pixel buffer, so the " + f"reader would silently skip masking and leave the raw " + f"sentinel value in the data. Pass allow_invalid_nodata=True " + f"to keep the legacy no-op behaviour, or re-encode the file " + f"with a finite in-range integer sentinel. See " + f"docs/source/reference/release_gate_geotiff.rst for the " + f"release contract on nodata handling (#1774 / #2341)." + ) + + def _validate_no_rotated_affine(attrs, *, drop_rotation: bool, entry_point: str = "to_geotiff") -> None: """Refuse writes that would silently drop ``attrs['rotated_affine']``. diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index e48470e0..0bf6ab8d 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -1961,7 +1961,14 @@ def write_vrt(vrt_path: str, source_files: list[str], *, header = parse_header(data) ifds = parse_all_ifds(data, header) ifd = ifds[0] - geo = extract_geo_info(ifd, data, header.byte_order) + # The writer is reading source metadata to populate the VRT XML; + # it does not decode pixels or run the masking step that the + # new #2441 default-rejection guards against. Pass the opt-in + # so a source TIFF with a non-finite / fractional ``GDAL_NODATA`` + # value can still be referenced by a VRT (the read-side default + # still rejects it when the resulting VRT is later opened). + geo = extract_geo_info(ifd, data, header.byte_order, + allow_invalid_nodata=True) src.close() bps = resolve_bits_per_sample(ifd.bits_per_sample) diff --git a/xrspatial/geotiff/tests/release_gates/test_stable_features.py b/xrspatial/geotiff/tests/release_gates/test_stable_features.py index 67688ddc..9584e37d 100644 --- a/xrspatial/geotiff/tests/release_gates/test_stable_features.py +++ b/xrspatial/geotiff/tests/release_gates/test_stable_features.py @@ -2319,16 +2319,6 @@ def test_release_gate_negative_conflicting_aux_xml_crs(tmp_path) -> None: @pytest.mark.release_gate -@pytest.mark.xfail( - reason=( - "Issue #1774 currently treats a non-finite or fractional integer " - "nodata sentinel as a silent no-op rather than a hard error. The " - "release promise is to upgrade the no-op to a typed rejection so " - "the caller sees the silent-coercion risk; this xfail flips to a " - "pass when the upgrade lands." - ), - strict=False, -) def test_release_gate_negative_integer_nodata_float_promoted( tmp_path, ) -> None: diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index c24f0c06..d30acb4b 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2784,6 +2784,10 @@ def test_all_lists_supported_functions(self): # combinations. 'InconsistentGeoKeysError', 'InvalidCRSCodeError', + # Issue #2441 (the #1774 follow-up): read-side fail-closed + # on non-finite / fractional GDAL_NODATA against an integer + # source dtype, replacing the legacy silent no-op. + 'InvalidIntegerNodataError', 'MixedBandMetadataError', # Issue #2418: writer rejects compound EPSG codes that cannot # be represented in a single GeographicType / ProjectedCSType diff --git a/xrspatial/geotiff/tests/test_invalid_int_nodata_rejection_2441.py b/xrspatial/geotiff/tests/test_invalid_int_nodata_rejection_2441.py new file mode 100644 index 00000000..fe361221 --- /dev/null +++ b/xrspatial/geotiff/tests/test_invalid_int_nodata_rejection_2441.py @@ -0,0 +1,212 @@ +"""Default-rejection tests for non-finite / fractional integer nodata (#2441). + +Companion to ``test_nodata_nan_int_1774.py`` (which covers the opt-in +no-op path). These tests pin the release-contract upgrade: integer +sources whose ``GDAL_NODATA`` tag is non-finite or fractional must raise +``InvalidIntegerNodataError`` at the read boundary unless the caller +explicitly opts back into the legacy silent no-op via +``allow_invalid_nodata=True``. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest + +from xrspatial.geotiff import (GeoTIFFAmbiguousMetadataError, InvalidIntegerNodataError, + open_geotiff, read_geotiff_dask) + +from .test_nodata_nan_int_1774 import _build_uint16_tiff + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif( + not _HAS_GPU, + reason="cupy + CUDA required", +) + + +# ---------------------------------------------------------------------- +# Default behaviour: reject non-finite int sentinels at the read boundary +# ---------------------------------------------------------------------- + + +@pytest.mark.parametrize('nodata_str', ['nan', 'NaN', 'NAN', + 'inf', '-inf', 'Inf', '-Inf']) +def test_open_geotiff_eager_int_nodata_nonfinite_rejected_by_default( + tmp_path, nodata_str, +): + """Eager numpy path raises ``InvalidIntegerNodataError`` for non-finite + ``GDAL_NODATA`` on integer sources. + """ + path = _build_uint16_tiff(nodata_str, tmp_path) + with pytest.raises(InvalidIntegerNodataError) as excinfo: + open_geotiff(path) + msg = str(excinfo.value) + assert 'nodata' in msg.lower() + # Message names the offending sentinel kind and dtype so the user + # can locate the bad source. + assert 'non-finite' in msg + assert 'uint16' in msg + # The opt-in flag name appears in the message so the caller can + # discover the escape hatch from the rejection itself. + assert 'allow_invalid_nodata' in msg + + +@pytest.mark.parametrize('nodata_str', ['3.5', '29.5', '30.5', '0.25']) +def test_open_geotiff_eager_int_nodata_fractional_rejected_by_default( + tmp_path, nodata_str, +): + """Eager numpy path raises ``InvalidIntegerNodataError`` for fractional + ``GDAL_NODATA`` on integer sources. + """ + path = _build_uint16_tiff(nodata_str, tmp_path) + with pytest.raises(InvalidIntegerNodataError) as excinfo: + open_geotiff(path) + msg = str(excinfo.value) + assert 'nodata' in msg.lower() + assert 'fractional' in msg + assert 'uint16' in msg + assert 'allow_invalid_nodata' in msg + + +def test_invalid_int_nodata_error_is_geotiff_ambiguous_metadata_error(): + """The new error subclasses ``GeoTIFFAmbiguousMetadataError`` so + existing ``except GeoTIFFAmbiguousMetadataError`` callers catch it. + """ + assert issubclass(InvalidIntegerNodataError, + GeoTIFFAmbiguousMetadataError) + + +def test_read_geotiff_dask_int_nodata_nan_rejected_by_default(tmp_path): + """Dask path raises at graph-build time, before any chunk task fires.""" + path = _build_uint16_tiff('nan', tmp_path) + with pytest.raises(InvalidIntegerNodataError): + read_geotiff_dask(path, chunks=2) + + +def test_read_geotiff_dask_int_nodata_fractional_rejected_by_default( + tmp_path, +): + """Dask path raises at graph-build time for fractional int sentinels.""" + path = _build_uint16_tiff('30.5', tmp_path) + with pytest.raises(InvalidIntegerNodataError): + read_geotiff_dask(path, chunks=2) + + +# ---------------------------------------------------------------------- +# Float sources are unaffected +# ---------------------------------------------------------------------- + + +def test_open_geotiff_float_dtype_nan_nodata_still_allowed(tmp_path): + """Float-dtype sources with NaN ``GDAL_NODATA`` are the normal case + and must not raise. NaN matches NaN, masking proceeds. + """ + from xrspatial.geotiff import to_geotiff + import xarray as xr + + arr = np.array([[1.0, 2.0], [np.nan, 4.0]], dtype=np.float32) + da = xr.DataArray( + arr, dims=('y', 'x'), + coords={'y': [0.5, -0.5], 'x': [0.5, 1.5]}, + attrs={'crs': 4326}, + ) + path = str(tmp_path / 'float_nan_nodata_2441.tif') + to_geotiff(da, path, nodata=float('nan'), compression='none', tiled=False) + out = open_geotiff(path) + assert out.dtype.kind == 'f' + assert np.isnan(out.attrs['nodata']) + + +# ---------------------------------------------------------------------- +# Finite, in-range integer sentinels are unaffected +# ---------------------------------------------------------------------- + + +def test_open_geotiff_int_finite_nodata_unaffected(tmp_path): + """Finite integer-valued sentinels still mask as before; the new + validator must only reject non-finite / fractional sentinels. + """ + path = _build_uint16_tiff('30', tmp_path) + da = open_geotiff(path) + # 30 matches a real pixel; the sentinel-to-NaN promotion fires. + assert da.dtype == np.float64 + assert np.isnan(da.values[1, 0]) + assert da.attrs['nodata'] == 30 + + +# ---------------------------------------------------------------------- +# Opt-in restores the legacy no-op +# ---------------------------------------------------------------------- + + +@pytest.mark.parametrize('nodata_str', ['nan', 'inf', '3.5']) +def test_open_geotiff_opt_in_restores_noop_eager(tmp_path, nodata_str): + """``allow_invalid_nodata=True`` keeps the pre-2441 no-op behaviour.""" + path = _build_uint16_tiff(nodata_str, tmp_path) + da = open_geotiff(path, allow_invalid_nodata=True) + assert da.dtype == np.uint16 + np.testing.assert_array_equal(da.values, [[10, 20], [30, 40]]) + + +@pytest.mark.parametrize('nodata_str', ['nan', '30.5']) +def test_read_geotiff_dask_opt_in_restores_noop(tmp_path, nodata_str): + """``allow_invalid_nodata=True`` keeps the pre-2441 no-op for dask.""" + path = _build_uint16_tiff(nodata_str, tmp_path) + da = read_geotiff_dask(path, chunks=2, allow_invalid_nodata=True) + assert da.dtype == np.uint16 + np.testing.assert_array_equal(da.compute().values, [[10, 20], [30, 40]]) + + +# ---------------------------------------------------------------------- +# GPU path mirrors the CPU contract +# ---------------------------------------------------------------------- + + +@_gpu_only +def test_read_geotiff_gpu_int_nodata_nan_rejected_by_default(tmp_path): + """GPU read entry point raises before kicking off the device decode.""" + from xrspatial.geotiff import read_geotiff_gpu + + path = _build_uint16_tiff('nan', tmp_path) + with pytest.raises(InvalidIntegerNodataError): + read_geotiff_gpu(path) + + +@_gpu_only +def test_read_geotiff_gpu_int_nodata_opt_in_restores_noop(tmp_path): + """GPU opt-in keeps the no-op (sentinel cannot match any uint16 pixel).""" + import cupy + + from xrspatial.geotiff import read_geotiff_gpu + + path = _build_uint16_tiff('nan', tmp_path) + da = read_geotiff_gpu(path, allow_invalid_nodata=True) + # Buffer stays uint16 on the device. + assert da.dtype == cupy.uint16 + arr = da.data.get() + np.testing.assert_array_equal(arr, [[10, 20], [30, 40]]) + + +@_gpu_only +def test_read_geotiff_gpu_chunked_int_nodata_rejected_by_default(tmp_path): + """dask+cupy backend rejects at metadata parse, before any chunk task + is scheduled. Closes the four-backend matrix explicitly. + """ + from xrspatial.geotiff import read_geotiff_gpu + + path = _build_uint16_tiff('nan', tmp_path) + with pytest.raises(InvalidIntegerNodataError): + read_geotiff_gpu(path, chunks=2) diff --git a/xrspatial/geotiff/tests/test_nodata_nan_int_1774.py b/xrspatial/geotiff/tests/test_nodata_nan_int_1774.py index 7a18a6e8..66e7f276 100644 --- a/xrspatial/geotiff/tests/test_nodata_nan_int_1774.py +++ b/xrspatial/geotiff/tests/test_nodata_nan_int_1774.py @@ -1,26 +1,33 @@ -"""Regression tests for issue #1774. +"""Regression tests for issue #1774 (under the #2441 opt-in). -Reading an integer GeoTIFF whose ``GDAL_NODATA`` tag holds a non-finite -string (``"NaN"`` / ``"nan"`` / ``"Inf"`` / ``"-Inf"``) used to crash with -``ValueError: cannot convert float NaN to integer`` at three call sites in -``xrspatial/geotiff/__init__.py``: +History: reading an integer GeoTIFF whose ``GDAL_NODATA`` tag held a +non-finite string (``"NaN"`` / ``"nan"`` / ``"Inf"`` / ``"-Inf"``) used +to crash with ``ValueError: cannot convert float NaN to integer`` at +three call sites in ``xrspatial/geotiff/__init__.py``: * ``open_geotiff`` eager numpy path * ``_apply_nodata_mask_gpu`` (GPU) * ``_delayed_read_window`` (dask) -The fix gates each ``int(nodata)`` cast on ``np.isfinite(nodata)``, mirroring -the ``_resolve_masked_fill`` / ``_sparse_fill_value`` helpers in -``_reader.py``. A non-finite sentinel on an integer file cannot match any -pixel value, so the mask is a no-op and the file dtype is preserved. -``attrs['nodata']`` still carries the raw sentinel so a write round-trip -keeps the original GDAL_NODATA tag. +The first fix gated each ``int(nodata)`` cast on ``np.isfinite(nodata)``, +mirroring the ``_resolve_masked_fill`` / ``_sparse_fill_value`` helpers +in ``_reader.py``. A non-finite sentinel on an integer file cannot match +any pixel value, so the mask was a no-op and the file dtype was +preserved. ``attrs['nodata']`` still carried the raw sentinel so a write +round-trip kept the original GDAL_NODATA tag. -The same gate is paired with ``float(nodata).is_integer()`` so that a +The same gate was paired with ``float(nodata).is_integer()`` so that a fractional ``GDAL_NODATA`` string (e.g. ``"3.5"`` on a ``uint16`` file) -also stays a no-op rather than truncating to ``int(3.5) == 3`` and -silently masking real pixel value 3. This mirrors the +also stayed a no-op rather than truncating to ``int(3.5) == 3`` and +silently masking real pixel value 3. This mirrored the ``_writer.py`` / ``_vrt.py`` pattern used for #1564 / #1616. + +Follow-up (#2441): the release contract upgrades the silent no-op to a +typed ``InvalidIntegerNodataError`` at the read boundary so callers +cannot quietly mismask such files. The legacy no-op behaviour is still +available via ``allow_invalid_nodata=True``; the tests below cover that +opt-in path. The default-rejection path lives in +``test_invalid_int_nodata_rejection_2441.py``. """ from __future__ import annotations @@ -132,9 +139,11 @@ def add_ascii(tag: int, data: bytes) -> None: @pytest.mark.parametrize('nodata_str', ['nan', 'NaN', 'NAN']) def test_open_geotiff_eager_int_nodata_nan(tmp_path, nodata_str): - """Eager numpy path: NaN nodata on uint16 file is a no-op (#1774).""" + """Eager numpy path: NaN nodata on uint16 file is a no-op under the + #2441 opt-in. + """ path = _build_uint16_tiff(nodata_str, tmp_path) - da = open_geotiff(path) + da = open_geotiff(path, allow_invalid_nodata=True) # No pixel can match NaN, so the dtype stays uint16 assert da.dtype == np.uint16 np.testing.assert_array_equal(da.values, [[10, 20], [30, 40]]) @@ -146,9 +155,11 @@ def test_open_geotiff_eager_int_nodata_nan(tmp_path, nodata_str): @pytest.mark.parametrize('nodata_str', ['inf', 'Inf', 'INF', '-inf', '-Inf', '-INF']) def test_open_geotiff_eager_int_nodata_inf(tmp_path, nodata_str): - """Eager numpy path: +/-Inf nodata on uint16 file is a no-op (#1774).""" + """Eager numpy path: +/-Inf nodata on uint16 file is a no-op under + the #2441 opt-in. + """ path = _build_uint16_tiff(nodata_str, tmp_path) - da = open_geotiff(path) + da = open_geotiff(path, allow_invalid_nodata=True) assert da.dtype == np.uint16 np.testing.assert_array_equal(da.values, [[10, 20], [30, 40]]) assert 'nodata' in da.attrs @@ -168,9 +179,11 @@ def test_open_geotiff_eager_int_nodata_finite_still_masks(tmp_path): def test_read_geotiff_dask_int_nodata_nan(tmp_path): - """Dask path: NaN nodata on uint16 file is a no-op (#1774).""" + """Dask path: NaN nodata on uint16 file is a no-op under the #2441 + opt-in. + """ path = _build_uint16_tiff('nan', tmp_path) - da = read_geotiff_dask(path, chunks=2) + da = read_geotiff_dask(path, chunks=2, allow_invalid_nodata=True) # effective_dtype stays uint16 because the sentinel is non-finite assert da.dtype == np.uint16 np.testing.assert_array_equal(da.compute().values, [[10, 20], [30, 40]]) @@ -179,9 +192,11 @@ def test_read_geotiff_dask_int_nodata_nan(tmp_path): def test_read_geotiff_dask_int_nodata_inf(tmp_path): - """Dask path: Inf nodata on uint16 file is a no-op (#1774).""" + """Dask path: Inf nodata on uint16 file is a no-op under the #2441 + opt-in. + """ path = _build_uint16_tiff('inf', tmp_path) - da = read_geotiff_dask(path, chunks=2) + da = read_geotiff_dask(path, chunks=2, allow_invalid_nodata=True) assert da.dtype == np.uint16 np.testing.assert_array_equal(da.compute().values, [[10, 20], [30, 40]]) assert np.isinf(da.attrs['nodata']) @@ -241,9 +256,11 @@ def test_apply_nodata_mask_gpu_int_finite_still_masks(): @pytest.mark.parametrize('nodata_str', ['3.5', '29.5', '0.5']) def test_open_geotiff_eager_int_nodata_fractional_noop(tmp_path, nodata_str): - """Eager numpy path: fractional nodata on uint16 is a no-op.""" + """Eager numpy path: fractional nodata on uint16 is a no-op under the + #2441 opt-in. + """ path = _build_uint16_tiff(nodata_str, tmp_path) - da = open_geotiff(path) + da = open_geotiff(path, allow_invalid_nodata=True) assert da.dtype == np.uint16 np.testing.assert_array_equal(da.values, [[10, 20], [30, 40]]) assert da.attrs['nodata'] == float(nodata_str) @@ -254,10 +271,10 @@ def test_open_geotiff_eager_int_nodata_fractional_does_not_alias_truncation( ): """A ``"30.5"`` sentinel must not mask the real pixel value 30 (which is in the test image). ``int(30.5)`` would truncate to 30 - without the integerness gate. + without the integerness gate. Runs under the #2441 opt-in. """ path = _build_uint16_tiff('30.5', tmp_path) - da = open_geotiff(path) + da = open_geotiff(path, allow_invalid_nodata=True) assert da.dtype == np.uint16 # pixel @[1,0] is 30; the fractional sentinel must NOT have masked it assert da.values[1, 0] == 30 @@ -265,9 +282,11 @@ def test_open_geotiff_eager_int_nodata_fractional_does_not_alias_truncation( def test_read_geotiff_dask_int_nodata_fractional_noop(tmp_path): - """Dask path: fractional nodata on uint16 is a no-op.""" + """Dask path: fractional nodata on uint16 is a no-op under the #2441 + opt-in. + """ path = _build_uint16_tiff('30.5', tmp_path) - da = read_geotiff_dask(path, chunks=2) + da = read_geotiff_dask(path, chunks=2, allow_invalid_nodata=True) # effective_dtype stays uint16 because the sentinel is fractional assert da.dtype == np.uint16 computed = da.compute().values diff --git a/xrspatial/geotiff/tests/test_overview_nodata_inheritance_1739.py b/xrspatial/geotiff/tests/test_overview_nodata_inheritance_1739.py index 64b90784..7dfa474f 100644 --- a/xrspatial/geotiff/tests/test_overview_nodata_inheritance_1739.py +++ b/xrspatial/geotiff/tests/test_overview_nodata_inheritance_1739.py @@ -296,7 +296,8 @@ def __init__(self, subfile_type, width, height): # Overview already has its own nodata (-5555); base has -9999. # Test: inheritance leaves the overview's -5555 untouched. - def fake_extract(ifd, data, byte_order, *, allow_rotated=False): + def fake_extract(ifd, data, byte_order, *, allow_rotated=False, + allow_invalid_nodata=False): if ifd is ov_ifd: gi = GeoInfo() gi.nodata = -5555.0 diff --git a/xrspatial/geotiff/tests/test_overview_pixel_is_point_1642.py b/xrspatial/geotiff/tests/test_overview_pixel_is_point_1642.py index 26a6dba0..76f3e53e 100644 --- a/xrspatial/geotiff/tests/test_overview_pixel_is_point_1642.py +++ b/xrspatial/geotiff/tests/test_overview_pixel_is_point_1642.py @@ -205,7 +205,7 @@ def test_helper_pixel_is_point_origin_shift_unit(monkeypatch): calls = {'count': 0} - def fake_extract(ifd, data, byte_order, *, allow_rotated=False): + def fake_extract(ifd, data, byte_order, *, allow_rotated=False, allow_invalid_nodata=False): calls['count'] += 1 if ifd is base_ifd: return base_info @@ -241,7 +241,7 @@ def test_helper_pixel_is_area_no_origin_shift_unit(monkeypatch): ) ov_info = GeoInfo(has_georef=False, raster_type=RASTER_PIXEL_IS_AREA) - def fake_extract(ifd, data, byte_order, *, allow_rotated=False): + def fake_extract(ifd, data, byte_order, *, allow_rotated=False, allow_invalid_nodata=False): return base_info if ifd is base_ifd else ov_info monkeypatch.setattr(_gt, 'extract_geo_info', fake_extract) @@ -274,7 +274,7 @@ def test_helper_point_overview_with_own_geokeys_not_shifted(monkeypatch): has_georef=True, raster_type=RASTER_PIXEL_IS_POINT, crs_epsg=32610, ) - def fake_extract(ifd, data, byte_order, *, allow_rotated=False): + def fake_extract(ifd, data, byte_order, *, allow_rotated=False, allow_invalid_nodata=False): return base_info if ifd is base_ifd else own_ov_info monkeypatch.setattr(_gt, 'extract_geo_info', fake_extract) diff --git a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py index 58c8aa24..b54fbffa 100644 --- a/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py +++ b/xrspatial/geotiff/tests/test_reader_kwarg_order_1935.py @@ -42,6 +42,11 @@ # alongside the other ambiguous-metadata opt-outs so the canonical # order keeps the typed-error gates grouped. "allow_inconsistent_geokeys", + # Issue #2441 (the #1774 follow-up) added the integer-nodata fail- + # closed opt-out. Sits alongside the other ambiguous-metadata + # opt-outs so the canonical order keeps the typed-error gates + # grouped. + "allow_invalid_nodata", # PR 4 of epic #2340 added the experimental / internal-only codec # opt-ins on the read side, mirroring the writer surface from #2137 # / #1845. They sit after the other ``allow_*`` flags so the