Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-api-consistency-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-13,1810,MEDIUM,5,"Sweep v5 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1810 open_geotiff dispatcher dropped missing_sources kwarg when routing to read_vrt (Cat 5, same class as #1561/#1605/#1685/#1795). Fix mirrors the on_gpu_failure pattern: sentinel default, forward to read_vrt for .vrt sources, reject for non-VRT sources. Regression test in test_open_geotiff_missing_sources_1810.py. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). cuda-validated."
geotiff,2026-05-15,1845-followup,HIGH,5,"Sweep 2026-05-15 (deep-sweep-api-consistency-geotiff-2026-05-15). 1 HIGH Cat 5 finding fixed in this branch: to_geotiff was missing allow_internal_only_jpeg, the opt-in flag added to write_geotiff_gpu in #1845. to_geotiff(compression='jpeg', gpu=True, allow_internal_only_jpeg=True) could not reach the GPU writer's opt-in because to_geotiff rejected jpeg up front. Fix mirrors the GPU writer: accept the kwarg with default False, gate the up-front jpeg rejection on it, emit GeoTIFFFallbackWarning on opt-in, forward to write_geotiff_gpu. Regression test in test_to_geotiff_allow_internal_only_jpeg_parity.py (6 tests). Prior findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775 #1810) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). cuda-validated."
reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)."
105 changes: 77 additions & 28 deletions xrspatial/geotiff/_writers/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
gpu: bool | None = None,
streaming_buffer_bytes: int = 256 * 1024 * 1024,
max_z_error: float = 0.0,
photometric: str | int = 'auto') -> None:
photometric: str | int = 'auto',
allow_internal_only_jpeg: bool = False) -> None:
"""Write data as a GeoTIFF or Cloud Optimized GeoTIFF.

Dask-backed DataArrays are written in streaming mode: one tile-row
Expand Down Expand Up @@ -106,11 +107,14 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
Codec name. One of ``'none'``, ``'deflate'``, ``'lzw'``,
``'jpeg'``, ``'packbits'``, ``'zstd'``, ``'lz4'``,
``'jpeg2000'`` (alias ``'j2k'``), or ``'lerc'``.
``'jpeg'`` is currently rejected on write because the encoder
``'jpeg'`` is rejected on write by default because the encoder
omits the JPEGTables tag and produced files do not round-trip
through libtiff / GDAL / rasterio. Use ``'deflate'``, ``'zstd'``,
or ``'lzw'`` instead. ``'lerc'`` accepts ``max_z_error`` for
lossy compression with a bounded per-pixel error.
through libtiff / GDAL / rasterio. Pass
``allow_internal_only_jpeg=True`` to opt in to the experimental
internal-reader-only path (see that parameter for details), or
use ``'deflate'``, ``'zstd'``, or ``'lzw'`` instead. ``'lerc'``
accepts ``max_z_error`` for lossy compression with a bounded
per-pixel error.
compression_level : int or None
Compression effort level. None uses each codec's default (6 for
deflate/zstd). Valid ranges: deflate 1-9, zstd 1-22, lz4 0-16.
Expand Down Expand Up @@ -188,6 +192,17 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
chosen value; only these two tag ids are overridable so other
auto-emitted tags such as ``ImageWidth`` or ``StripOffsets``
remain protected.
allow_internal_only_jpeg : bool
Opt in to the experimental ``compression='jpeg'`` encode path
(default ``False``). The encoder writes self-contained JFIF
tiles without the TIFF JPEGTables tag (347); the file decodes
through this library's reader but not through libtiff, GDAL,
or rasterio. With the flag set, the write proceeds and a
``GeoTIFFFallbackWarning`` is emitted at call time. Without
the flag, ``compression='jpeg'`` raises ``ValueError``. The
kwarg is forwarded unchanged to ``write_geotiff_gpu`` on the
Comment on lines +195 to +203
GPU dispatch path so callers can reach the same experimental
encode via ``to_geotiff(..., gpu=True)``. See issue #1845.

Raises
------
Expand Down Expand Up @@ -231,16 +246,26 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
# files unreadable by libtiff / GDAL / rasterio: they reject the
# tile data with "TIFFReadEncodedStrip() failed". The internal
# reader round-trips because Pillow re-decodes the JFIF stream
# directly, masking the interop break. Refuse the write rather
# than emit files no other tool can decode. See issue tracking
# the proper JPEGTables fix for re-enabling this codec.
if compression.lower() == 'jpeg':
# directly, masking the interop break. Refuse the write by
# default and surface the same ``allow_internal_only_jpeg=True``
# opt-in that ``write_geotiff_gpu`` already accepts, so the
# auto-dispatch entry point can reach the experimental
# internal-reader-only path the explicit GPU entry point
# exposes (issue #1845).
if compression.lower() == 'jpeg' and not allow_internal_only_jpeg:
raise ValueError(
"compression='jpeg' is not supported: the encoder writes "
"self-contained JFIF streams without the required "
"JPEGTables tag (347), so other readers (libtiff, GDAL, "
"rasterio) reject the file. Use 'deflate', 'zstd', or "
"'lzw' instead.")
"'lzw' instead. Pass allow_internal_only_jpeg=True to "
"opt in to the experimental internal-reader-only path "
"(issue #1845).")
# The JPEG opt-in warning is emitted below once we know the
# dispatch decision: ``write_geotiff_gpu`` emits its own warning
# on the GPU path, so emitting here would double-warn callers
# of ``to_geotiff(gpu=True, compression='jpeg',
# allow_internal_only_jpeg=True)``.

# max_z_error only applies to LERC; reject negative values and reject
# non-zero values paired with any other codec so the caller learns the
Expand Down Expand Up @@ -272,6 +297,29 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
_is_vrt_path = (
isinstance(path, str) and path.lower().endswith('.vrt'))

# Resolve GPU dispatch up front so the JPEG opt-in warning fires
# exactly once. ``write_geotiff_gpu`` emits its own warning on the
# GPU path; emitting here as well would double-warn callers of
# ``to_geotiff(gpu=True, compression='jpeg',
# allow_internal_only_jpeg=True)``. VRT and CPU paths receive the
# warning here. On GPU-to-CPU fallback the GPU writer has already
# warned before raising, so the CPU fallback does not warn twice.
auto_detected_gpu = gpu is None
use_gpu = gpu if gpu is not None else _is_gpu_data(data)
if (isinstance(compression, str)
and compression.lower() == 'jpeg'
and allow_internal_only_jpeg
and not use_gpu):
warnings.warn(
"to_geotiff(compression='jpeg', "
"allow_internal_only_jpeg=True) writes JFIF tiles "
"without the TIFF JPEGTables tag (347); the file decodes "
"through xrspatial but may fail in libtiff, GDAL, or "
"rasterio. See issue #1845.",
GeoTIFFFallbackWarning,
stacklevel=2,
)

# tile_size only applies to tiled output; warn if the caller passed a
# non-default size alongside strip mode (it would otherwise be silently
# ignored). The VRT path always tiles, so the warning would be
Expand Down Expand Up @@ -318,12 +366,10 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
photometric=photometric)
return

# Auto-detect GPU data and dispatch to write_geotiff_gpu. ``gpu is
# None`` is the implicit "use whatever fits the data" path; preserve
# that distinction in the fallback warning below so users who never
# set ``gpu=True`` are not told their explicit request was dropped.
auto_detected_gpu = gpu is None
use_gpu = gpu if gpu is not None else _is_gpu_data(data)
# Dispatch to write_geotiff_gpu when GPU was selected (explicit
# ``gpu=True`` or auto-detected CuPy data). ``auto_detected_gpu``
# and ``use_gpu`` were computed above to gate the JPEG opt-in
# warning; reuse them so the call sites stay in sync.
if use_gpu and _path_is_file_like:
# write_geotiff_gpu's nvCOMP path materialises tile parts and then
# calls _write_bytes(path), which would write at the buffer's
Expand All @@ -348,18 +394,21 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
"tiled=False is not supported on the GPU writer. "
"Pass gpu=False or omit tiled=False.")
try:
write_geotiff_gpu(data, path, crs=crs, nodata=nodata,
compression=compression,
compression_level=compression_level,
tiled=tiled,
tile_size=tile_size,
predictor=predictor,
cog=cog,
overview_levels=overview_levels,
overview_resampling=overview_resampling,
bigtiff=bigtiff,
streaming_buffer_bytes=streaming_buffer_bytes,
photometric=photometric)
write_geotiff_gpu(
data, path, crs=crs, nodata=nodata,
compression=compression,
compression_level=compression_level,
tiled=tiled,
tile_size=tile_size,
predictor=predictor,
cog=cog,
overview_levels=overview_levels,
overview_resampling=overview_resampling,
bigtiff=bigtiff,
streaming_buffer_bytes=streaming_buffer_bytes,
photometric=photometric,
allow_internal_only_jpeg=allow_internal_only_jpeg,
)
return
except ImportError as e:
# ``write_geotiff_gpu`` raises ImportError when cupy itself
Expand Down
Loading
Loading