diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index d1e60c549..6b5d4f921 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-13,1810,MEDIUM,5,"Sweep v5 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1810 open_geotiff dispatcher dropped missing_sources kwarg when routing to read_vrt (Cat 5, same class as #1561/#1605/#1685/#1795). Fix mirrors the on_gpu_failure pattern: sentinel default, forward to read_vrt for .vrt sources, reject for non-VRT sources. Regression test in test_open_geotiff_missing_sources_1810.py. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). cuda-validated." +geotiff,2026-05-15,1845-followup,HIGH,5,"Sweep 2026-05-15 (deep-sweep-api-consistency-geotiff-2026-05-15). 1 HIGH Cat 5 finding fixed in this branch: to_geotiff was missing allow_internal_only_jpeg, the opt-in flag added to write_geotiff_gpu in #1845. to_geotiff(compression='jpeg', gpu=True, allow_internal_only_jpeg=True) could not reach the GPU writer's opt-in because to_geotiff rejected jpeg up front. Fix mirrors the GPU writer: accept the kwarg with default False, gate the up-front jpeg rejection on it, emit GeoTIFFFallbackWarning on opt-in, forward to write_geotiff_gpu. Regression test in test_to_geotiff_allow_internal_only_jpeg_parity.py (6 tests). Prior findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775 #1810) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). cuda-validated." reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)." diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 7dfce7c03..8d3b766eb 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -63,7 +63,8 @@ def to_geotiff(data: xr.DataArray | np.ndarray, gpu: bool | None = None, streaming_buffer_bytes: int = 256 * 1024 * 1024, max_z_error: float = 0.0, - photometric: str | int = 'auto') -> None: + photometric: str | int = 'auto', + allow_internal_only_jpeg: bool = False) -> None: """Write data as a GeoTIFF or Cloud Optimized GeoTIFF. Dask-backed DataArrays are written in streaming mode: one tile-row @@ -106,11 +107,14 @@ def to_geotiff(data: xr.DataArray | np.ndarray, Codec name. One of ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``, ``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias ``'j2k'``), or ``'lerc'``. - ``'jpeg'`` is currently rejected on write because the encoder + ``'jpeg'`` is rejected on write by default because the encoder omits the JPEGTables tag and produced files do not round-trip - through libtiff / GDAL / rasterio. Use ``'deflate'``, ``'zstd'``, - or ``'lzw'`` instead. ``'lerc'`` accepts ``max_z_error`` for - lossy compression with a bounded per-pixel error. + through libtiff / GDAL / rasterio. Pass + ``allow_internal_only_jpeg=True`` to opt in to the experimental + internal-reader-only path (see that parameter for details), or + use ``'deflate'``, ``'zstd'``, or ``'lzw'`` instead. ``'lerc'`` + accepts ``max_z_error`` for lossy compression with a bounded + per-pixel error. compression_level : int or None Compression effort level. None uses each codec's default (6 for deflate/zstd). Valid ranges: deflate 1-9, zstd 1-22, lz4 0-16. @@ -188,6 +192,17 @@ def to_geotiff(data: xr.DataArray | np.ndarray, chosen value; only these two tag ids are overridable so other auto-emitted tags such as ``ImageWidth`` or ``StripOffsets`` remain protected. + allow_internal_only_jpeg : bool + Opt in to the experimental ``compression='jpeg'`` encode path + (default ``False``). The encoder writes self-contained JFIF + tiles without the TIFF JPEGTables tag (347); the file decodes + through this library's reader but not through libtiff, GDAL, + or rasterio. With the flag set, the write proceeds and a + ``GeoTIFFFallbackWarning`` is emitted at call time. Without + the flag, ``compression='jpeg'`` raises ``ValueError``. The + kwarg is forwarded unchanged to ``write_geotiff_gpu`` on the + GPU dispatch path so callers can reach the same experimental + encode via ``to_geotiff(..., gpu=True)``. See issue #1845. Raises ------ @@ -231,16 +246,26 @@ def to_geotiff(data: xr.DataArray | np.ndarray, # files unreadable by libtiff / GDAL / rasterio: they reject the # tile data with "TIFFReadEncodedStrip() failed". The internal # reader round-trips because Pillow re-decodes the JFIF stream - # directly, masking the interop break. Refuse the write rather - # than emit files no other tool can decode. See issue tracking - # the proper JPEGTables fix for re-enabling this codec. - if compression.lower() == 'jpeg': + # directly, masking the interop break. Refuse the write by + # default and surface the same ``allow_internal_only_jpeg=True`` + # opt-in that ``write_geotiff_gpu`` already accepts, so the + # auto-dispatch entry point can reach the experimental + # internal-reader-only path the explicit GPU entry point + # exposes (issue #1845). + if compression.lower() == 'jpeg' and not allow_internal_only_jpeg: raise ValueError( "compression='jpeg' is not supported: the encoder writes " "self-contained JFIF streams without the required " "JPEGTables tag (347), so other readers (libtiff, GDAL, " "rasterio) reject the file. Use 'deflate', 'zstd', or " - "'lzw' instead.") + "'lzw' instead. Pass allow_internal_only_jpeg=True to " + "opt in to the experimental internal-reader-only path " + "(issue #1845).") + # The JPEG opt-in warning is emitted below once we know the + # dispatch decision: ``write_geotiff_gpu`` emits its own warning + # on the GPU path, so emitting here would double-warn callers + # of ``to_geotiff(gpu=True, compression='jpeg', + # allow_internal_only_jpeg=True)``. # max_z_error only applies to LERC; reject negative values and reject # non-zero values paired with any other codec so the caller learns the @@ -272,6 +297,29 @@ def to_geotiff(data: xr.DataArray | np.ndarray, _is_vrt_path = ( isinstance(path, str) and path.lower().endswith('.vrt')) + # Resolve GPU dispatch up front so the JPEG opt-in warning fires + # exactly once. ``write_geotiff_gpu`` emits its own warning on the + # GPU path; emitting here as well would double-warn callers of + # ``to_geotiff(gpu=True, compression='jpeg', + # allow_internal_only_jpeg=True)``. VRT and CPU paths receive the + # warning here. On GPU-to-CPU fallback the GPU writer has already + # warned before raising, so the CPU fallback does not warn twice. + auto_detected_gpu = gpu is None + use_gpu = gpu if gpu is not None else _is_gpu_data(data) + if (isinstance(compression, str) + and compression.lower() == 'jpeg' + and allow_internal_only_jpeg + and not use_gpu): + warnings.warn( + "to_geotiff(compression='jpeg', " + "allow_internal_only_jpeg=True) writes JFIF tiles " + "without the TIFF JPEGTables tag (347); the file decodes " + "through xrspatial but may fail in libtiff, GDAL, or " + "rasterio. See issue #1845.", + GeoTIFFFallbackWarning, + stacklevel=2, + ) + # tile_size only applies to tiled output; warn if the caller passed a # non-default size alongside strip mode (it would otherwise be silently # ignored). The VRT path always tiles, so the warning would be @@ -318,12 +366,10 @@ def to_geotiff(data: xr.DataArray | np.ndarray, photometric=photometric) return - # Auto-detect GPU data and dispatch to write_geotiff_gpu. ``gpu is - # None`` is the implicit "use whatever fits the data" path; preserve - # that distinction in the fallback warning below so users who never - # set ``gpu=True`` are not told their explicit request was dropped. - auto_detected_gpu = gpu is None - use_gpu = gpu if gpu is not None else _is_gpu_data(data) + # Dispatch to write_geotiff_gpu when GPU was selected (explicit + # ``gpu=True`` or auto-detected CuPy data). ``auto_detected_gpu`` + # and ``use_gpu`` were computed above to gate the JPEG opt-in + # warning; reuse them so the call sites stay in sync. if use_gpu and _path_is_file_like: # write_geotiff_gpu's nvCOMP path materialises tile parts and then # calls _write_bytes(path), which would write at the buffer's @@ -348,18 +394,21 @@ def to_geotiff(data: xr.DataArray | np.ndarray, "tiled=False is not supported on the GPU writer. " "Pass gpu=False or omit tiled=False.") try: - write_geotiff_gpu(data, path, crs=crs, nodata=nodata, - compression=compression, - compression_level=compression_level, - tiled=tiled, - tile_size=tile_size, - predictor=predictor, - cog=cog, - overview_levels=overview_levels, - overview_resampling=overview_resampling, - bigtiff=bigtiff, - streaming_buffer_bytes=streaming_buffer_bytes, - photometric=photometric) + write_geotiff_gpu( + data, path, crs=crs, nodata=nodata, + compression=compression, + compression_level=compression_level, + tiled=tiled, + tile_size=tile_size, + predictor=predictor, + cog=cog, + overview_levels=overview_levels, + overview_resampling=overview_resampling, + bigtiff=bigtiff, + streaming_buffer_bytes=streaming_buffer_bytes, + photometric=photometric, + allow_internal_only_jpeg=allow_internal_only_jpeg, + ) return except ImportError as e: # ``write_geotiff_gpu`` raises ImportError when cupy itself diff --git a/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py b/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py new file mode 100644 index 000000000..13afe52e9 --- /dev/null +++ b/xrspatial/geotiff/tests/test_to_geotiff_allow_internal_only_jpeg_parity.py @@ -0,0 +1,245 @@ +"""API parity: ``to_geotiff`` must accept ``allow_internal_only_jpeg``. + +Background +---------- +Issue #1845 added the ``allow_internal_only_jpeg`` opt-in to +``write_geotiff_gpu`` so callers who explicitly want the experimental +internal-reader-only JPEG-in-TIFF encode path could reach it. The CPU +dispatcher ``to_geotiff`` kept rejecting ``compression='jpeg'`` +unconditionally at the top of the function, which meant the +``to_geotiff(..., gpu=True)`` auto-dispatch path could never reach the +GPU writer's opt-in: the rejection fired before the GPU dispatch. + +The two public writers therefore disagreed about the supported codec +set. Callers reading the GPU writer's docstring saw an opt-in flag they +could not actually invoke through the public dispatcher. The fix adds +the same kwarg to ``to_geotiff``, gates the up-front jpeg rejection on +it, and forwards it through the GPU dispatch path so the two entry +points expose a consistent surface. + +These tests pin the parity: signature, rejection, opt-in warning, and +end-to-end write round-trip via the internal reader. +""" +from __future__ import annotations + +import inspect +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + GeoTIFFFallbackWarning, + open_geotiff, + to_geotiff, + write_geotiff_gpu, +) + + +def _make_rgb_uint8_da() -> xr.DataArray: + """64x64x3 uint8 RGB raster suitable for the JPEG encode path.""" + rng = np.random.RandomState(0) + arr = rng.randint(0, 256, size=(64, 64, 3), dtype=np.uint8) + return xr.DataArray( + arr, + dims=("y", "x", "band"), + coords={ + "y": np.arange(64, dtype=np.float64), + "x": np.arange(64, dtype=np.float64), + "band": np.array([1, 2, 3], dtype=np.int32), + }, + ) + + +def test_to_geotiff_signature_has_allow_internal_only_jpeg(): + """``to_geotiff`` must expose ``allow_internal_only_jpeg`` so callers + on the auto-dispatch path can reach the same opt-in + ``write_geotiff_gpu`` exposes (issue #1845). + + Pinning the signature catches accidental removal during future + refactors: if the kwarg disappears, the dispatcher silently drops + back to the unconditional jpeg rejection and the parity regresses. + """ + params = inspect.signature(to_geotiff).parameters + assert "allow_internal_only_jpeg" in params, ( + "to_geotiff must accept allow_internal_only_jpeg for parity " + "with write_geotiff_gpu (issue #1845)." + ) + # Default matches write_geotiff_gpu so callers passing the kwarg to + # either entry point see identical behaviour. + assert params["allow_internal_only_jpeg"].default is False + + +def test_to_geotiff_rejects_jpeg_without_opt_in(tmp_path): + """``to_geotiff(compression='jpeg')`` without the opt-in still + raises ``ValueError``. The default rejection is the entire reason + the flag exists; flipping the default would re-introduce the + interop break #1845 fixed. + """ + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected.tif") + with pytest.raises(ValueError, match="JPEGTables"): + to_geotiff(da, path, compression='jpeg') + + +def test_to_geotiff_jpeg_rejection_message_mentions_opt_in(tmp_path): + """The rejection message must point users at the opt-in flag so + they can find the escape hatch. The GPU writer's matching error + references the same flag; the two messages need to agree. + """ + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected_msg.tif") + with pytest.raises(ValueError) as exc: + to_geotiff(da, path, compression='jpeg') + msg = str(exc.value) + assert "allow_internal_only_jpeg" in msg + assert "1845" in msg + + +def test_to_geotiff_jpeg_opt_in_emits_warning_and_writes(tmp_path): + """Setting ``allow_internal_only_jpeg=True`` lets the write + proceed and emits ``GeoTIFFFallbackWarning`` so the caller knows + the file may not round-trip through external readers. + + Round-trips through this library's reader (it decodes the JFIF + streams directly via Pillow); the file is otherwise unreadable by + libtiff / GDAL / rasterio, which is exactly what the warning warns + about. + """ + da = _make_rgb_uint8_da() + path = str(tmp_path / "opt_in.tif") + with pytest.warns(GeoTIFFFallbackWarning, match="JPEGTables"): + to_geotiff( + da, path, + compression='jpeg', + allow_internal_only_jpeg=True, + ) + assert os.path.exists(path) + assert os.path.getsize(path) > 0 + # Internal reader still decodes the file. + decoded = open_geotiff(path) + assert decoded.shape == da.shape + + +def test_to_geotiff_non_jpeg_unaffected_by_flag(tmp_path): + """Passing ``allow_internal_only_jpeg=True`` on a non-JPEG codec is + a no-op: no warning, no error. The flag must stay JPEG-specific so + callers passing the kwarg defensively to either writer do not pay a + cost when they pick a different codec. + """ + import warnings as _warnings + + da = _make_rgb_uint8_da() + path = str(tmp_path / "non_jpeg_flag.tif") + with _warnings.catch_warnings(): + _warnings.simplefilter("error", GeoTIFFFallbackWarning) + to_geotiff( + da, path, + compression='zstd', + allow_internal_only_jpeg=True, + ) + assert os.path.exists(path) + + +def test_to_geotiff_and_gpu_writer_share_kwarg_default(): + """Both writers must use the same default for the flag so a caller + forwarding kwargs from one to the other sees identical behaviour. + """ + eager_default = inspect.signature( + to_geotiff).parameters["allow_internal_only_jpeg"].default + gpu_default = inspect.signature( + write_geotiff_gpu).parameters["allow_internal_only_jpeg"].default + assert eager_default == gpu_default == False # noqa: E712 + + +def test_to_geotiff_gpu_dispatch_forwards_allow_internal_only_jpeg( + tmp_path, monkeypatch): + """``to_geotiff(gpu=True, allow_internal_only_jpeg=True)`` must + forward the kwarg into ``write_geotiff_gpu``. Without this, the + auto-dispatch path silently swallows the opt-in and the GPU writer + would refuse the encode after the CPU dispatcher accepted the + request -- the exact divergence #1845 fixed. + + Stubbed because most CI hosts lack a CUDA stack; the test still + pins the dispatcher contract regardless of whether the data is on + a real GPU. + """ + captured: dict = {} + + def _fake_write_geotiff_gpu(data, path, **kwargs): + captured['data'] = data + captured['path'] = path + captured['kwargs'] = kwargs + # Touch the file so any caller-side existence asserts pass. + with open(path, 'wb') as f: + f.write(b'') + + monkeypatch.setattr( + 'xrspatial.geotiff._writers.eager.write_geotiff_gpu', + _fake_write_geotiff_gpu, + ) + + da = _make_rgb_uint8_da() + path = str(tmp_path / 'gpu_forward.tif') + to_geotiff( + da, path, + gpu=True, + compression='jpeg', + allow_internal_only_jpeg=True, + ) + + assert 'kwargs' in captured, "to_geotiff must dispatch to write_geotiff_gpu when gpu=True" + assert captured['kwargs'].get('allow_internal_only_jpeg') is True, ( + "to_geotiff must forward allow_internal_only_jpeg unchanged " + "into write_geotiff_gpu (issue #1845)." + ) + assert captured['kwargs'].get('compression') == 'jpeg' + + +def test_to_geotiff_gpu_dispatch_emits_single_jpeg_opt_in_warning( + tmp_path, monkeypatch, recwarn): + """The CPU dispatcher must not emit its own JPEG opt-in warning + when the call is delegated to ``write_geotiff_gpu``. The GPU writer + emits its own ``GeoTIFFFallbackWarning``; emitting from both would + surface the opt-in disclosure twice for the same encode. + + Stubs the GPU writer to emit the warning the real implementation + emits, then asserts exactly one ``GeoTIFFFallbackWarning`` reached + the caller. + """ + import warnings as _warnings + + def _fake_write_geotiff_gpu(data, path, **kwargs): + if kwargs.get('compression') == 'jpeg' and kwargs.get( + 'allow_internal_only_jpeg'): + _warnings.warn( + "write_geotiff_gpu jpeg opt-in (stub).", + GeoTIFFFallbackWarning, + stacklevel=2, + ) + with open(path, 'wb') as f: + f.write(b'') + + monkeypatch.setattr( + 'xrspatial.geotiff._writers.eager.write_geotiff_gpu', + _fake_write_geotiff_gpu, + ) + + da = _make_rgb_uint8_da() + path = str(tmp_path / 'gpu_single_warn.tif') + to_geotiff( + da, path, + gpu=True, + compression='jpeg', + allow_internal_only_jpeg=True, + ) + + jpeg_warns = [w for w in recwarn.list + if issubclass(w.category, GeoTIFFFallbackWarning)] + assert len(jpeg_warns) == 1, ( + "to_geotiff(gpu=True, compression='jpeg', " + "allow_internal_only_jpeg=True) must emit exactly one " + "GeoTIFFFallbackWarning; got " + f"{[str(w.message) for w in jpeg_warns]}" + )