diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index a2468bd29..e649a050d 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -209,6 +209,51 @@ def _resolve_photometric(photometric, samples_per_pixel: int): return photo_int, [] +def _reject_disagreeing_photometric_override( + extra_tags, resolved_photo: int, samples: int, photometric +) -> None: + """Reject an ``extra_tags`` entry that overrides ``TAG_PHOTOMETRIC`` + across the MinIsWhite boundary for a single-band raster. + + The single-band MinIsWhite path requires the writer to pre-invert + pixels (and the nodata sentinel) so the round-trip matches what the + reader unconditionally inverts. An ``extra_tags`` entry that flips + ``TAG_PHOTOMETRIC`` between MinIsWhite (0) and anything else makes + the on-disk tag advertise one model while the bytes were + pre-processed for the other -- the round-trip silently corrupts. + + The eager and streaming writers both call this guard before any + pre-inversion runs. Only the MinIsWhite-crossing single-band case + is rejected; multi-band rasters and non-crossing overrides (e.g. + photometric='minisblack' with extra_tags=[(262, SHORT, 1, 1)]) + pass through unchanged. Issues #2073 / #1769 / #1836. + """ + if extra_tags is None: + return + override = None + for _et in extra_tags: + if _et[0] == TAG_PHOTOMETRIC: + override = int(_et[3]) + break + if override is None: + return + if override == resolved_photo: + return + if not (override == 0 or resolved_photo == 0): + return + if samples != 1: + return + raise ValueError( + f"extra_tags TAG_PHOTOMETRIC override ({override}) " + f"disagrees with photometric={photometric!r} for a " + f"single-band raster where MinIsWhite (photometric=0) " + f"requires writer-side pixel inversion. The override would " + f"either pre-invert pixels for a non-MinIsWhite tag or skip " + f"inversion for a MinIsWhite tag. Pass photometric= directly " + f"instead, or drop the override." + ) + + # Byte order: always write little-endian BO = '<' @@ -1591,30 +1636,9 @@ def write(data: np.ndarray, path: str, *, # them to NaN. Issue #1836. _samples = data.shape[2] if data.ndim == 3 else 1 _resolved_photo, _ = _resolve_photometric(photometric, _samples) - # An ``extra_tags`` entry with TAG_PHOTOMETRIC silently overrides the - # kwarg-resolved value when the IFD is written (issue #1769). The - # pre-inversion decision below would otherwise transform pixels for - # one photometric value while the on-disk tag advertises a different - # one. Refuse the combination so callers do not end up with corrupt - # round-trips through the override path. - _extra_tags_photo = None - if extra_tags is not None: - for _et in extra_tags: - if _et[0] == TAG_PHOTOMETRIC: - _extra_tags_photo = int(_et[3]) - break - if (_extra_tags_photo is not None - and _extra_tags_photo != _resolved_photo - and (_extra_tags_photo == 0 or _resolved_photo == 0) - and _samples == 1): - raise ValueError( - f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo}) " - f"disagrees with photometric={photometric!r} for a " - f"single-band raster where MinIsWhite (photometric=0) " - f"requires writer-side pixel inversion. The override would " - f"either pre-invert pixels for a non-MinIsWhite tag or skip " - f"inversion for a MinIsWhite tag. Pass photometric= directly " - f"instead, or drop the override.") + _reject_disagreeing_photometric_override( + extra_tags, _resolved_photo, _samples, photometric + ) if _resolved_photo == 0 and _samples == 1: if cog or overview_levels is not None: raise NotImplementedError( @@ -1915,6 +1939,14 @@ def write_streaming(dask_data, path: str, *, "match the reader's unconditional MinIsWhite inversion " "(issue #1836). Call ``.compute()`` first to use the eager " "writer, or write with photometric='minisblack' / 'auto'.") + # The kwarg guard above only catches photometric='miniswhite'. An + # ``extra_tags`` entry of ``(TAG_PHOTOMETRIC, ...)`` silently + # overrides the IFD tag further down, so the writer must reject the + # MinIsWhite-crossing single-band case the same way the eager + # writer does. Issue #2073. + _reject_disagreeing_photometric_override( + extra_tags, _resolved_photo_ds, samples, photometric + ) # Match the eager path's dtype promotion out_dtype = dtype diff --git a/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py new file mode 100644 index 000000000..b21f2fb92 --- /dev/null +++ b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py @@ -0,0 +1,107 @@ +"""Regression tests for issue #2073. + +The eager writer rejects an ``extra_tags`` entry that overrides +``TAG_PHOTOMETRIC`` across the MinIsWhite boundary for a single-band +raster because the reader unconditionally inverts MinIsWhite single-band +data and the writer must pre-invert pixels to keep the round-trip honest. +The streaming dask path now shares the same guard via +``_reject_disagreeing_photometric_override`` in ``_writer.py``. + +Three pins: + +* the MinIsWhite-crossing single-band override is rejected; +* the non-MinIsWhite-crossing override still round-trips; +* multi-band rasters do not trigger the guard (the writer never + pre-inverts there). +""" +from __future__ import annotations + +import os + +import dask.array as da +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +TAG_PHOTOMETRIC = 262 +TYPE_SHORT = 3 + + +def test_streaming_extra_tags_miniswhite_override_rejected_2073(tmp_path): + """Dask write with extra_tags forcing photometric=0 must raise.""" + arr = xr.DataArray( + da.from_array( + np.array([[10, 20], [30, 40]], dtype=np.uint8), + chunks=(1, 2), + ), + ) + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)] + + out = tmp_path / 'tmp_2073_streaming_miniswhite.tif' + with pytest.raises(ValueError) as excinfo: + to_geotiff(arr, str(out)) + + msg = str(excinfo.value) + assert 'extra_tags' in msg + assert 'photometric' in msg.lower() or 'MinIsWhite' in msg + + +def test_streaming_extra_tags_minisblack_override_roundtrips_2073(tmp_path): + """The valid (non-MinIsWhite-crossing) override should still work.""" + src = np.array([[10, 20], [30, 40]], dtype=np.uint8) + arr = xr.DataArray( + da.from_array(src, chunks=(1, 2)), + dims=('y', 'x'), + coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]}, + ) + # photometric=1 (MinIsBlack) matches what the writer picks for a + # single-band raster anyway: no pre-inversion needed, so the guard + # must not fire. + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 1)] + + out = tmp_path / 'tmp_2073_streaming_minisblack.tif' + to_geotiff(arr, str(out)) + assert os.path.exists(out) + + back = open_geotiff(str(out)) + np.testing.assert_array_equal(np.asarray(back.values), src) + + +def test_streaming_extra_tags_miniswhite_override_multiband_not_rejected_2073( + tmp_path, +): + """The guard fires only on single-band rasters. + + Multi-band rasters do not pre-invert MinIsWhite, so a + ``TAG_PHOTOMETRIC`` override that crosses the MinIsWhite boundary + is not the kind of corruption the guard exists to prevent. Pins + the ``samples == 1`` gate inside + ``_reject_disagreeing_photometric_override``: a regression that + dropped or flipped the gate would surface as a spurious + ``ValueError`` here. + + Whether a 3-band raster tagged MinIsWhite is semantically useful + is a separate concern; this test only locks in the guard's scope. + """ + src = np.zeros((2, 2, 3), dtype=np.uint8) + src[..., 0] = 10 + src[..., 1] = 20 + src[..., 2] = 30 + arr = xr.DataArray( + da.from_array(src, chunks=(2, 2, 3)), + dims=('y', 'x', 'band'), + coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]}, + ) + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)] + + out = tmp_path / 'tmp_2073_streaming_miniswhite_multiband.tif' + # Must not raise: the writer does not pre-invert multi-band data, + # so the override is not in the "corruption that the guard exists + # to prevent" set. If it raises for an unrelated reason + # (e.g. RGB-requires-3-bands check elsewhere), let the test + # surface that as a real failure rather than swallowing it. + to_geotiff(arr, str(out)) + assert os.path.exists(out)