Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 56 additions & 24 deletions xrspatial/geotiff/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,51 @@ def _resolve_photometric(photometric, samples_per_pixel: int):
return photo_int, []


def _reject_disagreeing_photometric_override(
extra_tags, resolved_photo: int, samples: int, photometric
) -> None:
"""Reject an ``extra_tags`` entry that overrides ``TAG_PHOTOMETRIC``
across the MinIsWhite boundary for a single-band raster.

The single-band MinIsWhite path requires the writer to pre-invert
pixels (and the nodata sentinel) so the round-trip matches what the
reader unconditionally inverts. An ``extra_tags`` entry that flips
``TAG_PHOTOMETRIC`` between MinIsWhite (0) and anything else makes
the on-disk tag advertise one model while the bytes were
pre-processed for the other -- the round-trip silently corrupts.

The eager and streaming writers both call this guard before any
pre-inversion runs. Only the MinIsWhite-crossing single-band case
is rejected; multi-band rasters and non-crossing overrides (e.g.
photometric='minisblack' with extra_tags=[(262, SHORT, 1, 1)])
pass through unchanged. Issues #2073 / #1769 / #1836.
"""
if extra_tags is None:
return
override = None
for _et in extra_tags:
if _et[0] == TAG_PHOTOMETRIC:
override = int(_et[3])
break
if override is None:
return
if override == resolved_photo:
return
if not (override == 0 or resolved_photo == 0):
return
if samples != 1:
return
raise ValueError(
f"extra_tags TAG_PHOTOMETRIC override ({override}) "
f"disagrees with photometric={photometric!r} for a "
f"single-band raster where MinIsWhite (photometric=0) "
f"requires writer-side pixel inversion. The override would "
f"either pre-invert pixels for a non-MinIsWhite tag or skip "
f"inversion for a MinIsWhite tag. Pass photometric= directly "
f"instead, or drop the override."
)


# Byte order: always write little-endian
BO = '<'

Expand Down Expand Up @@ -1591,30 +1636,9 @@ def write(data: np.ndarray, path: str, *,
# them to NaN. Issue #1836.
_samples = data.shape[2] if data.ndim == 3 else 1
_resolved_photo, _ = _resolve_photometric(photometric, _samples)
# An ``extra_tags`` entry with TAG_PHOTOMETRIC silently overrides the
# kwarg-resolved value when the IFD is written (issue #1769). The
# pre-inversion decision below would otherwise transform pixels for
# one photometric value while the on-disk tag advertises a different
# one. Refuse the combination so callers do not end up with corrupt
# round-trips through the override path.
_extra_tags_photo = None
if extra_tags is not None:
for _et in extra_tags:
if _et[0] == TAG_PHOTOMETRIC:
_extra_tags_photo = int(_et[3])
break
if (_extra_tags_photo is not None
and _extra_tags_photo != _resolved_photo
and (_extra_tags_photo == 0 or _resolved_photo == 0)
and _samples == 1):
raise ValueError(
f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo}) "
f"disagrees with photometric={photometric!r} for a "
f"single-band raster where MinIsWhite (photometric=0) "
f"requires writer-side pixel inversion. The override would "
f"either pre-invert pixels for a non-MinIsWhite tag or skip "
f"inversion for a MinIsWhite tag. Pass photometric= directly "
f"instead, or drop the override.")
_reject_disagreeing_photometric_override(
extra_tags, _resolved_photo, _samples, photometric
)
if _resolved_photo == 0 and _samples == 1:
if cog or overview_levels is not None:
raise NotImplementedError(
Expand Down Expand Up @@ -1915,6 +1939,14 @@ def write_streaming(dask_data, path: str, *,
"match the reader's unconditional MinIsWhite inversion "
"(issue #1836). Call ``.compute()`` first to use the eager "
"writer, or write with photometric='minisblack' / 'auto'.")
# The kwarg guard above only catches photometric='miniswhite'. An
# ``extra_tags`` entry of ``(TAG_PHOTOMETRIC, ...)`` silently
# overrides the IFD tag further down, so the writer must reject the
# MinIsWhite-crossing single-band case the same way the eager
# writer does. Issue #2073.
_reject_disagreeing_photometric_override(
extra_tags, _resolved_photo_ds, samples, photometric
)

# Match the eager path's dtype promotion
out_dtype = dtype
Expand Down
107 changes: 107 additions & 0 deletions xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""Regression tests for issue #2073.

The eager writer rejects an ``extra_tags`` entry that overrides
``TAG_PHOTOMETRIC`` across the MinIsWhite boundary for a single-band
raster because the reader unconditionally inverts MinIsWhite single-band
data and the writer must pre-invert pixels to keep the round-trip honest.
The streaming dask path now shares the same guard via
``_reject_disagreeing_photometric_override`` in ``_writer.py``.

Three pins:

* the MinIsWhite-crossing single-band override is rejected;
* the non-MinIsWhite-crossing override still round-trips;
* multi-band rasters do not trigger the guard (the writer never
pre-inverts there).
"""
from __future__ import annotations

import os

import dask.array as da
import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff


TAG_PHOTOMETRIC = 262
TYPE_SHORT = 3


def test_streaming_extra_tags_miniswhite_override_rejected_2073(tmp_path):
"""Dask write with extra_tags forcing photometric=0 must raise."""
arr = xr.DataArray(
da.from_array(
np.array([[10, 20], [30, 40]], dtype=np.uint8),
chunks=(1, 2),
),
)
arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)]

out = tmp_path / 'tmp_2073_streaming_miniswhite.tif'
with pytest.raises(ValueError) as excinfo:
to_geotiff(arr, str(out))

msg = str(excinfo.value)
assert 'extra_tags' in msg
assert 'photometric' in msg.lower() or 'MinIsWhite' in msg


def test_streaming_extra_tags_minisblack_override_roundtrips_2073(tmp_path):
"""The valid (non-MinIsWhite-crossing) override should still work."""
src = np.array([[10, 20], [30, 40]], dtype=np.uint8)
arr = xr.DataArray(
da.from_array(src, chunks=(1, 2)),
dims=('y', 'x'),
coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]},
)
# photometric=1 (MinIsBlack) matches what the writer picks for a
# single-band raster anyway: no pre-inversion needed, so the guard
# must not fire.
arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 1)]

out = tmp_path / 'tmp_2073_streaming_minisblack.tif'
to_geotiff(arr, str(out))
assert os.path.exists(out)

back = open_geotiff(str(out))
np.testing.assert_array_equal(np.asarray(back.values), src)


def test_streaming_extra_tags_miniswhite_override_multiband_not_rejected_2073(
tmp_path,
):
"""The guard fires only on single-band rasters.

Multi-band rasters do not pre-invert MinIsWhite, so a
``TAG_PHOTOMETRIC`` override that crosses the MinIsWhite boundary
is not the kind of corruption the guard exists to prevent. Pins
the ``samples == 1`` gate inside
``_reject_disagreeing_photometric_override``: a regression that
dropped or flipped the gate would surface as a spurious
``ValueError`` here.

Whether a 3-band raster tagged MinIsWhite is semantically useful
is a separate concern; this test only locks in the guard's scope.
"""
src = np.zeros((2, 2, 3), dtype=np.uint8)
src[..., 0] = 10
src[..., 1] = 20
src[..., 2] = 30
arr = xr.DataArray(
da.from_array(src, chunks=(2, 2, 3)),
dims=('y', 'x', 'band'),
coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]},
)
arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)]

out = tmp_path / 'tmp_2073_streaming_miniswhite_multiband.tif'
# Must not raise: the writer does not pre-invert multi-band data,
# so the override is not in the "corruption that the guard exists
# to prevent" set. If it raises for an unrelated reason
# (e.g. RGB-requires-3-bands check elsewhere), let the test
# surface that as a real failure rather than swallowing it.
to_geotiff(arr, str(out))
assert os.path.exists(out)
Loading