Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions xrspatial/geotiff/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,28 @@
# ---------------------------------------------------------------------------


# ---------------------------------------------------------------------------
# Shared error messages
# ---------------------------------------------------------------------------

# Issue #2312: a single source of truth for the ``cog=True, tiled=False``
# rejection message used by both the public ``to_geotiff`` boundary and
# the array-level ``_write`` defense-in-depth gate. Keeping the message
# string in one place stops the two raise sites from drifting if one
# ever gets reworded. The substring assertions in
# ``test_cog_requires_tiled_2312.py`` pin the actionable tokens
# (``tiled=True``, ``cog=False``, ``COG``) so a future rewrite still
# has to satisfy the same contract.
_COG_REQUIRES_TILED_MSG = (
"cog=True requires tiled=True: the COG specification "
"mandates a tiled internal layout, so a strip-layout file "
"cannot be a valid Cloud Optimized GeoTIFF. Pass tiled=True "
"(or omit tiled, which defaults to True) to write a COG, or "
"set cog=False to write a non-COG strip TIFF. See issue "
"#2312."
)


# ---------------------------------------------------------------------------
# Array-level write entry points (module-private; see module docstring)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -470,6 +492,18 @@ def _write(data: np.ndarray, path: str, *,
if nodata is not None:
nodata = _invert_nodata_for_miniswhite(nodata, data.dtype)

# Issue #2312: defense-in-depth gate for ``cog=True, tiled=False``.
# The public ``to_geotiff`` wrapper rejects this combination at its
# own boundary, so this branch is unreachable when the wrapper is
# the caller; the gate matters for direct callers of ``_write`` and
# for any future caller (test harness, internal tool) that bypasses
# the wrapper. Without it, ``_write_stripped`` would run below and
# the overview-pyramid block at line ~490 would attach overviews to
# a strip-layout body, producing a malformed file that claims to be
# a COG.
if cog and not tiled:
raise ValueError(_COG_REQUIRES_TILED_MSG)

# Build pixel data parts
parts = []

Expand Down
29 changes: 25 additions & 4 deletions xrspatial/geotiff/_writers/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from .._validation import (_validate_3d_writer_dims, _validate_no_rotated_affine,
_validate_nodata_arg, _validate_tile_size_arg,
_validate_writer_spatial_shape, validate_write_metadata)
from .._writer import write
from .._writer import _COG_REQUIRES_TILED_MSG, write
from .gpu import write_geotiff_gpu


Expand Down Expand Up @@ -141,7 +141,9 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
Codecs without a level concept (lzw, packbits, jpeg) accept any
value and ignore it.
tiled : bool
Use tiled layout (default True).
Use tiled layout (default True). Incompatible with ``cog=True``
because the COG specification requires a tiled internal layout;
passing ``cog=True, tiled=False`` raises ``ValueError`` (#2312).
tile_size : int
Tile size in pixels (default 256). Must be a positive multiple
of 16 when ``tiled=True``; this is a TIFF 6 spec requirement
Expand All @@ -160,7 +162,10 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
Advanced: COG output materialises the full array because
overview pyramids need it, and the all-IFDs-at-file-start layout
only round-trips through readers that honour the COG layout
contract. Write as Cloud Optimized GeoTIFF.
contract. Write as Cloud Optimized GeoTIFF. Requires
``tiled=True`` (the default): the COG specification mandates a
tiled internal layout, so ``cog=True, tiled=False`` raises
``ValueError`` (#2312).
overview_levels : list[int] or None
Advanced: overview pyramids are an optional COG feature; the
decimation factors and resampling choice affect downstream
Expand Down Expand Up @@ -559,11 +564,27 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
stacklevel=2,
)

# Issue #2312: ``cog=True`` requires a tiled internal layout per the
# COG spec. The writer used to accept ``cog=True, tiled=False``, warn
# that ``tile_size`` was ignored, and then write strips via
# ``_write`` -- silently producing a file that violates the stable
# COG contract promoted in #2300. Reject the combination at the
# public boundary with the same actionable-error shape as the other
# COG input gates pinned in #2301 (commit f5fbad54): the message
# names the violated constraint and lists both fixes the caller can
# apply in one line. The defense-in-depth gate in ``_writer._write``
# catches direct callers that bypass this wrapper.
if cog and not tiled:
raise ValueError(_COG_REQUIRES_TILED_MSG)

# tile_size only applies to tiled output; warn if the caller passed a
# non-default size alongside strip mode (it would otherwise be silently
# ignored). The VRT path always tiles, so the warning would be
# misleading there -- the VRT branch below rejects tiled=False up front
# instead.
# instead. The ``cog=True, tiled=False`` arm of this warning is dead
# under the #2312 gate above (that combination raises before reaching
# this line), so the condition below only fires for ``cog=False,
# tiled=False, tile_size != 256``.
if not tiled and tile_size != 256 and not _is_vrt_path:
warnings.warn(
f"tile_size={tile_size} is ignored when tiled=False "
Expand Down
187 changes: 187 additions & 0 deletions xrspatial/geotiff/tests/test_cog_requires_tiled_2312.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""``cog=True`` requires ``tiled=True`` (#2312).

The COG specification mandates a tiled internal layout. Before this
issue's fix, ``to_geotiff(..., cog=True, tiled=False)`` returned
successfully and wrote a strip-layout TIFF: ``cog=True`` was silently
ignored for the layout decision while the overview-pyramid and IFD-order
parts of the COG path still ran, producing a malformed hybrid that
violated the stable COG contract promoted in #2300.

These tests pin three things:

* The public ``to_geotiff`` wrapper rejects ``cog=True, tiled=False``
with a typed, actionable error that names both fixes the caller can
apply (``tiled=True`` or ``cog=False``). This is the user-visible
rejection.
* The defense-in-depth gate inside ``_writer._write`` also rejects the
combination. Direct callers of the array-level entry point (the GPU
CPU-fallback path, tests, internal tools) cannot bypass the public
wrapper to produce the malformed file.
* The tiled COG path (``cog=True``, default ``tiled=True``) still works
end-to-end. A regression in the new gate that broke valid COG writes
would be a worse outcome than the original bug.

Message-substring assertions mirror the style of
``test_cog_invalid_input_errors_2286.py`` (PR #2301): every gate pins
both the exception type and the actionable tokens (``tiled=True``,
``cog=False``, ``COG``) so a future rewrite cannot silently turn the
error into a vague one.
"""
from __future__ import annotations

import warnings

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import to_geotiff
from xrspatial.geotiff._writer import write as _array_write


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _float_da(shape=(64, 64)):
"""A small float32 DataArray suitable for COG writes."""
return xr.DataArray(
np.zeros(shape, dtype=np.float32), dims=('y', 'x')
)


# ---------------------------------------------------------------------------
# Public boundary: ``to_geotiff(cog=True, tiled=False)`` is refused.
# ---------------------------------------------------------------------------

def test_public_writer_rejects_cog_true_tiled_false(tmp_path):
"""The public entry point raises ``ValueError`` with a message that
names the COG-spec constraint and both caller-side fixes."""
da = _float_da()
p = tmp_path / 'cog_tiled_false_2312.tif'

with pytest.raises(ValueError) as exc:
to_geotiff(da, str(p), cog=True, tiled=False)

msg = str(exc.value)
# The message must name the violated constraint.
assert 'COG' in msg, msg
assert 'tiled' in msg.lower(), msg
# Both caller-side fixes must appear so the error is actionable.
assert 'tiled=True' in msg, msg
assert 'cog=False' in msg, msg


def test_public_writer_rejects_cog_true_tiled_false_with_tile_size(tmp_path):
"""Pinning the rejection survives a ``tile_size`` kwarg too.

Before #2312, ``to_geotiff(..., cog=True, tiled=False,
tile_size=128)`` emitted the "tile_size is ignored when tiled=False"
warning and then wrote strips. The new gate has to fire before that
warning so the caller never sees the misleading "tile_size is
ignored" message under ``cog=True``.
"""
da = _float_da()
p = tmp_path / 'cog_tiled_false_with_tile_size_2312.tif'

# ``pytest.warns(None)`` was removed; use the stdlib catch_warnings
# recorder to assert the dead "tile_size is ignored" warning never
# fires on the ``cog=True`` arm.
with warnings.catch_warnings(record=True) as record:
warnings.simplefilter('always')
with pytest.raises(ValueError) as exc:
to_geotiff(da, str(p), cog=True, tiled=False, tile_size=128)

msg = str(exc.value)
assert 'COG' in msg, msg
assert 'tiled=True' in msg, msg

tile_size_warnings = [
w for w in record
if 'tile_size' in str(w.message)
and 'is ignored when tiled=False' in str(w.message)
]
assert not tile_size_warnings, [str(w.message) for w in tile_size_warnings]


# ---------------------------------------------------------------------------
# Defense in depth: ``_writer._write(cog=True, tiled=False)`` also raises.
# ---------------------------------------------------------------------------

def test_lowlevel_write_rejects_cog_true_tiled_false(tmp_path):
"""The array-level entry point ``_writer._write`` (re-exported as
``write``) carries its own gate so a caller that bypasses the public
wrapper still gets the typed rejection.

Without this, a direct caller could quietly produce the malformed
strip-plus-overviews file the public boundary refuses.
"""
arr = np.zeros((64, 64), dtype=np.float32)
p = tmp_path / 'cog_tiled_false_lowlevel_2312.tif'

with pytest.raises(ValueError) as exc:
_array_write(
arr,
str(p),
compression='deflate',
tiled=False,
cog=True,
)

msg = str(exc.value)
assert 'COG' in msg, msg
assert 'tiled=True' in msg, msg
assert 'cog=False' in msg, msg


# ---------------------------------------------------------------------------
# Smoke test: the valid tiled COG path still works.
# ---------------------------------------------------------------------------

def test_tiled_cog_smoke_still_works(tmp_path):
"""A regression in the new gate that broke valid COG writes would
be a worse outcome than the original bug. Pin the happy path
end-to-end so the gate has to stay narrowly targeted at the
``cog=True, tiled=False`` combination it is meant to catch.
"""
da = _float_da(shape=(128, 128))
p = tmp_path / 'cog_tiled_smoke_2312.tif'

rv = to_geotiff(da, str(p), cog=True, tiled=True, tile_size=64)
assert rv == str(p)
assert p.exists()
assert p.stat().st_size > 0


def test_tiled_cog_smoke_default_tiled(tmp_path):
"""``tiled`` defaults to ``True`` on ``to_geotiff``, so ``cog=True``
on its own should also produce a valid COG. Pinned so a future
change that flipped the default would not silently start hitting
the new rejection gate.
"""
da = _float_da(shape=(128, 128))
p = tmp_path / 'cog_tiled_default_smoke_2312.tif'

rv = to_geotiff(da, str(p), cog=True)
assert rv == str(p)
assert p.exists()
assert p.stat().st_size > 0


# ---------------------------------------------------------------------------
# Negative control: ``cog=False, tiled=False`` is still a valid strip TIFF.
# ---------------------------------------------------------------------------

def test_strip_layout_without_cog_still_works(tmp_path):
"""``tiled=False`` without ``cog=True`` is the supported strip-TIFF
path; the new gate must not regress it. Pinned so a stricter
interpretation of ``cog=True implies tiled=True`` could not creep
into the general ``tiled=False`` path.
"""
da = _float_da(shape=(64, 64))
p = tmp_path / 'strip_no_cog_2312.tif'

rv = to_geotiff(da, str(p), cog=False, tiled=False)
assert rv == str(p)
assert p.exists()
assert p.stat().st_size > 0
Loading