From a2d3d3bc4d6c1def461f90a139dd095dec9cd6a8 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:08:30 -0700 Subject: [PATCH 1/2] geotiff: guard streaming writer against TAG_PHOTOMETRIC extra_tags override (#2073) The eager writer rejects extra_tags overrides that would require writer-side pixel inversion for single-band MinIsWhite. The streaming dask path lacked the same guard, so an override silently bypassed the inversion and the file round-tripped with inverted pixel values. --- xrspatial/geotiff/_writer.py | 26 +++++++ ...iff_streaming_photometric_override_2073.py | 68 +++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index a2468bd29..d573170f7 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -1915,6 +1915,32 @@ def write_streaming(dask_data, path: str, *, "match the reader's unconditional MinIsWhite inversion " "(issue #1836). Call ``.compute()`` first to use the eager " "writer, or write with photometric='minisblack' / 'auto'.") + # The kwarg guard above only catches photometric='miniswhite'. An + # ``extra_tags`` entry of ``(TAG_PHOTOMETRIC, ...)`` silently + # overrides the IFD tag further down (see the user_photometric_ + # override branch). Without this check, a caller passing + # photometric='auto' but extra_tags=[(262, SHORT, 1, 0)] writes + # uninverted MinIsBlack pixels with a MinIsWhite tag on disk, and + # the reader's unconditional MinIsWhite inversion produces inverted + # values on read. Mirror the eager guard at ``write`` (issue #2073). + _extra_tags_photo_ds = None + if extra_tags is not None: + for _et in extra_tags: + if _et[0] == TAG_PHOTOMETRIC: + _extra_tags_photo_ds = int(_et[3]) + break + if (_extra_tags_photo_ds is not None + and _extra_tags_photo_ds != _resolved_photo_ds + and (_extra_tags_photo_ds == 0 or _resolved_photo_ds == 0) + and samples == 1): + raise ValueError( + f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo_ds}) " + f"disagrees with photometric={photometric!r} for a " + f"single-band raster where MinIsWhite (photometric=0) " + f"requires writer-side pixel inversion. The override would " + f"either pre-invert pixels for a non-MinIsWhite tag or skip " + f"inversion for a MinIsWhite tag. Pass photometric= directly " + f"instead, or drop the override.") # Match the eager path's dtype promotion out_dtype = dtype diff --git a/xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py b/xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py new file mode 100644 index 000000000..5b8e2bab2 --- /dev/null +++ b/xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py @@ -0,0 +1,68 @@ +"""Regression tests for issue #2073. + +The eager writer rejects an ``extra_tags`` entry that overrides +``TAG_PHOTOMETRIC`` across the MinIsWhite boundary for a single-band +raster (``xrspatial/geotiff/_writer.py:1600-1617``) because the reader +unconditionally inverts MinIsWhite single-band data and the writer must +pre-invert pixels to keep the round-trip honest. + +The streaming dask path checked the ``photometric`` kwarg but accepted +the ``extra_tags`` override without the same guard, so dask writers +silently produced inverted on-disk values. These tests pin the guard on +the streaming path and confirm the non-MinIsWhite override case still +round-trips. +""" +from __future__ import annotations + +import os + +import dask.array as da +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +TAG_PHOTOMETRIC = 262 +TYPE_SHORT = 3 + + +def test_streaming_extra_tags_miniswhite_override_rejected_2073(tmp_path): + """Dask write with extra_tags forcing photometric=0 must raise.""" + arr = xr.DataArray( + da.from_array( + np.array([[10, 20], [30, 40]], dtype=np.uint8), + chunks=(1, 2), + ), + ) + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)] + + out = tmp_path / 'tmp_2073_streaming_miniswhite.tif' + with pytest.raises(ValueError) as excinfo: + to_geotiff(arr, str(out)) + + msg = str(excinfo.value) + assert 'extra_tags' in msg + assert 'photometric' in msg.lower() or 'MinIsWhite' in msg + + +def test_streaming_extra_tags_minisblack_override_roundtrips_2073(tmp_path): + """The valid (non-MinIsWhite-crossing) override should still work.""" + src = np.array([[10, 20], [30, 40]], dtype=np.uint8) + arr = xr.DataArray( + da.from_array(src, chunks=(1, 2)), + dims=('y', 'x'), + coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]}, + ) + # photometric=1 (MinIsBlack) matches what the writer picks for a + # single-band raster anyway: no pre-inversion needed, so the guard + # must not fire. + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 1)] + + out = tmp_path / 'tmp_2073_streaming_minisblack.tif' + to_geotiff(arr, str(out)) + assert os.path.exists(out) + + back = open_geotiff(str(out)) + np.testing.assert_array_equal(np.asarray(back.values), src) From 0a2328e51015dc1e7a4de08ebb11125df9ede596 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:20:50 -0700 Subject: [PATCH 2/2] geotiff: consolidate photometric-override guard + add multi-band pin (#2073) Address PR #2080 review: * Factor the duplicated TAG_PHOTOMETRIC-override guard out of the eager and streaming writers into ``_reject_disagreeing_photometric_override``. Both writers now call the helper, so the message text, the gating condition, and the single-band scope live in one place. * Add ``test_streaming_extra_tags_miniswhite_override_multiband_not_rejected_2073`` to pin the ``samples == 1`` gate. A regression that dropped or flipped the gate (so the guard fired for multi-band rasters too) would otherwise surface only by accident. * Move the regression test from ``xrspatial/tests/`` to ``xrspatial/geotiff/tests/`` to match every other geotiff regression test's location. Filename also shortened: the ``geotiff_`` prefix was redundant inside the geotiff subtree. The third dismissed nit (``_extra_tags_photo_ds`` naming) goes away with the refactor because the helper takes its own local variable name. --- xrspatial/geotiff/_writer.py | 102 +++++++++-------- ...est_streaming_photometric_override_2073.py | 107 ++++++++++++++++++ ...iff_streaming_photometric_override_2073.py | 68 ----------- 3 files changed, 161 insertions(+), 116 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py delete mode 100644 xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index d573170f7..e649a050d 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -209,6 +209,51 @@ def _resolve_photometric(photometric, samples_per_pixel: int): return photo_int, [] +def _reject_disagreeing_photometric_override( + extra_tags, resolved_photo: int, samples: int, photometric +) -> None: + """Reject an ``extra_tags`` entry that overrides ``TAG_PHOTOMETRIC`` + across the MinIsWhite boundary for a single-band raster. + + The single-band MinIsWhite path requires the writer to pre-invert + pixels (and the nodata sentinel) so the round-trip matches what the + reader unconditionally inverts. An ``extra_tags`` entry that flips + ``TAG_PHOTOMETRIC`` between MinIsWhite (0) and anything else makes + the on-disk tag advertise one model while the bytes were + pre-processed for the other -- the round-trip silently corrupts. + + The eager and streaming writers both call this guard before any + pre-inversion runs. Only the MinIsWhite-crossing single-band case + is rejected; multi-band rasters and non-crossing overrides (e.g. + photometric='minisblack' with extra_tags=[(262, SHORT, 1, 1)]) + pass through unchanged. Issues #2073 / #1769 / #1836. + """ + if extra_tags is None: + return + override = None + for _et in extra_tags: + if _et[0] == TAG_PHOTOMETRIC: + override = int(_et[3]) + break + if override is None: + return + if override == resolved_photo: + return + if not (override == 0 or resolved_photo == 0): + return + if samples != 1: + return + raise ValueError( + f"extra_tags TAG_PHOTOMETRIC override ({override}) " + f"disagrees with photometric={photometric!r} for a " + f"single-band raster where MinIsWhite (photometric=0) " + f"requires writer-side pixel inversion. The override would " + f"either pre-invert pixels for a non-MinIsWhite tag or skip " + f"inversion for a MinIsWhite tag. Pass photometric= directly " + f"instead, or drop the override." + ) + + # Byte order: always write little-endian BO = '<' @@ -1591,30 +1636,9 @@ def write(data: np.ndarray, path: str, *, # them to NaN. Issue #1836. _samples = data.shape[2] if data.ndim == 3 else 1 _resolved_photo, _ = _resolve_photometric(photometric, _samples) - # An ``extra_tags`` entry with TAG_PHOTOMETRIC silently overrides the - # kwarg-resolved value when the IFD is written (issue #1769). The - # pre-inversion decision below would otherwise transform pixels for - # one photometric value while the on-disk tag advertises a different - # one. Refuse the combination so callers do not end up with corrupt - # round-trips through the override path. - _extra_tags_photo = None - if extra_tags is not None: - for _et in extra_tags: - if _et[0] == TAG_PHOTOMETRIC: - _extra_tags_photo = int(_et[3]) - break - if (_extra_tags_photo is not None - and _extra_tags_photo != _resolved_photo - and (_extra_tags_photo == 0 or _resolved_photo == 0) - and _samples == 1): - raise ValueError( - f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo}) " - f"disagrees with photometric={photometric!r} for a " - f"single-band raster where MinIsWhite (photometric=0) " - f"requires writer-side pixel inversion. The override would " - f"either pre-invert pixels for a non-MinIsWhite tag or skip " - f"inversion for a MinIsWhite tag. Pass photometric= directly " - f"instead, or drop the override.") + _reject_disagreeing_photometric_override( + extra_tags, _resolved_photo, _samples, photometric + ) if _resolved_photo == 0 and _samples == 1: if cog or overview_levels is not None: raise NotImplementedError( @@ -1917,30 +1941,12 @@ def write_streaming(dask_data, path: str, *, "writer, or write with photometric='minisblack' / 'auto'.") # The kwarg guard above only catches photometric='miniswhite'. An # ``extra_tags`` entry of ``(TAG_PHOTOMETRIC, ...)`` silently - # overrides the IFD tag further down (see the user_photometric_ - # override branch). Without this check, a caller passing - # photometric='auto' but extra_tags=[(262, SHORT, 1, 0)] writes - # uninverted MinIsBlack pixels with a MinIsWhite tag on disk, and - # the reader's unconditional MinIsWhite inversion produces inverted - # values on read. Mirror the eager guard at ``write`` (issue #2073). - _extra_tags_photo_ds = None - if extra_tags is not None: - for _et in extra_tags: - if _et[0] == TAG_PHOTOMETRIC: - _extra_tags_photo_ds = int(_et[3]) - break - if (_extra_tags_photo_ds is not None - and _extra_tags_photo_ds != _resolved_photo_ds - and (_extra_tags_photo_ds == 0 or _resolved_photo_ds == 0) - and samples == 1): - raise ValueError( - f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo_ds}) " - f"disagrees with photometric={photometric!r} for a " - f"single-band raster where MinIsWhite (photometric=0) " - f"requires writer-side pixel inversion. The override would " - f"either pre-invert pixels for a non-MinIsWhite tag or skip " - f"inversion for a MinIsWhite tag. Pass photometric= directly " - f"instead, or drop the override.") + # overrides the IFD tag further down, so the writer must reject the + # MinIsWhite-crossing single-band case the same way the eager + # writer does. Issue #2073. + _reject_disagreeing_photometric_override( + extra_tags, _resolved_photo_ds, samples, photometric + ) # Match the eager path's dtype promotion out_dtype = dtype diff --git a/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py new file mode 100644 index 000000000..b21f2fb92 --- /dev/null +++ b/xrspatial/geotiff/tests/test_streaming_photometric_override_2073.py @@ -0,0 +1,107 @@ +"""Regression tests for issue #2073. + +The eager writer rejects an ``extra_tags`` entry that overrides +``TAG_PHOTOMETRIC`` across the MinIsWhite boundary for a single-band +raster because the reader unconditionally inverts MinIsWhite single-band +data and the writer must pre-invert pixels to keep the round-trip honest. +The streaming dask path now shares the same guard via +``_reject_disagreeing_photometric_override`` in ``_writer.py``. + +Three pins: + +* the MinIsWhite-crossing single-band override is rejected; +* the non-MinIsWhite-crossing override still round-trips; +* multi-band rasters do not trigger the guard (the writer never + pre-inverts there). +""" +from __future__ import annotations + +import os + +import dask.array as da +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +TAG_PHOTOMETRIC = 262 +TYPE_SHORT = 3 + + +def test_streaming_extra_tags_miniswhite_override_rejected_2073(tmp_path): + """Dask write with extra_tags forcing photometric=0 must raise.""" + arr = xr.DataArray( + da.from_array( + np.array([[10, 20], [30, 40]], dtype=np.uint8), + chunks=(1, 2), + ), + ) + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)] + + out = tmp_path / 'tmp_2073_streaming_miniswhite.tif' + with pytest.raises(ValueError) as excinfo: + to_geotiff(arr, str(out)) + + msg = str(excinfo.value) + assert 'extra_tags' in msg + assert 'photometric' in msg.lower() or 'MinIsWhite' in msg + + +def test_streaming_extra_tags_minisblack_override_roundtrips_2073(tmp_path): + """The valid (non-MinIsWhite-crossing) override should still work.""" + src = np.array([[10, 20], [30, 40]], dtype=np.uint8) + arr = xr.DataArray( + da.from_array(src, chunks=(1, 2)), + dims=('y', 'x'), + coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]}, + ) + # photometric=1 (MinIsBlack) matches what the writer picks for a + # single-band raster anyway: no pre-inversion needed, so the guard + # must not fire. + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 1)] + + out = tmp_path / 'tmp_2073_streaming_minisblack.tif' + to_geotiff(arr, str(out)) + assert os.path.exists(out) + + back = open_geotiff(str(out)) + np.testing.assert_array_equal(np.asarray(back.values), src) + + +def test_streaming_extra_tags_miniswhite_override_multiband_not_rejected_2073( + tmp_path, +): + """The guard fires only on single-band rasters. + + Multi-band rasters do not pre-invert MinIsWhite, so a + ``TAG_PHOTOMETRIC`` override that crosses the MinIsWhite boundary + is not the kind of corruption the guard exists to prevent. Pins + the ``samples == 1`` gate inside + ``_reject_disagreeing_photometric_override``: a regression that + dropped or flipped the gate would surface as a spurious + ``ValueError`` here. + + Whether a 3-band raster tagged MinIsWhite is semantically useful + is a separate concern; this test only locks in the guard's scope. + """ + src = np.zeros((2, 2, 3), dtype=np.uint8) + src[..., 0] = 10 + src[..., 1] = 20 + src[..., 2] = 30 + arr = xr.DataArray( + da.from_array(src, chunks=(2, 2, 3)), + dims=('y', 'x', 'band'), + coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]}, + ) + arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)] + + out = tmp_path / 'tmp_2073_streaming_miniswhite_multiband.tif' + # Must not raise: the writer does not pre-invert multi-band data, + # so the override is not in the "corruption that the guard exists + # to prevent" set. If it raises for an unrelated reason + # (e.g. RGB-requires-3-bands check elsewhere), let the test + # surface that as a real failure rather than swallowing it. + to_geotiff(arr, str(out)) + assert os.path.exists(out) diff --git a/xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py b/xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py deleted file mode 100644 index 5b8e2bab2..000000000 --- a/xrspatial/tests/test_geotiff_streaming_photometric_override_2073.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Regression tests for issue #2073. - -The eager writer rejects an ``extra_tags`` entry that overrides -``TAG_PHOTOMETRIC`` across the MinIsWhite boundary for a single-band -raster (``xrspatial/geotiff/_writer.py:1600-1617``) because the reader -unconditionally inverts MinIsWhite single-band data and the writer must -pre-invert pixels to keep the round-trip honest. - -The streaming dask path checked the ``photometric`` kwarg but accepted -the ``extra_tags`` override without the same guard, so dask writers -silently produced inverted on-disk values. These tests pin the guard on -the streaming path and confirm the non-MinIsWhite override case still -round-trips. -""" -from __future__ import annotations - -import os - -import dask.array as da -import numpy as np -import pytest -import xarray as xr - -from xrspatial.geotiff import open_geotiff, to_geotiff - - -TAG_PHOTOMETRIC = 262 -TYPE_SHORT = 3 - - -def test_streaming_extra_tags_miniswhite_override_rejected_2073(tmp_path): - """Dask write with extra_tags forcing photometric=0 must raise.""" - arr = xr.DataArray( - da.from_array( - np.array([[10, 20], [30, 40]], dtype=np.uint8), - chunks=(1, 2), - ), - ) - arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 0)] - - out = tmp_path / 'tmp_2073_streaming_miniswhite.tif' - with pytest.raises(ValueError) as excinfo: - to_geotiff(arr, str(out)) - - msg = str(excinfo.value) - assert 'extra_tags' in msg - assert 'photometric' in msg.lower() or 'MinIsWhite' in msg - - -def test_streaming_extra_tags_minisblack_override_roundtrips_2073(tmp_path): - """The valid (non-MinIsWhite-crossing) override should still work.""" - src = np.array([[10, 20], [30, 40]], dtype=np.uint8) - arr = xr.DataArray( - da.from_array(src, chunks=(1, 2)), - dims=('y', 'x'), - coords={'y': [1.0, 0.0], 'x': [0.0, 1.0]}, - ) - # photometric=1 (MinIsBlack) matches what the writer picks for a - # single-band raster anyway: no pre-inversion needed, so the guard - # must not fire. - arr.attrs['extra_tags'] = [(TAG_PHOTOMETRIC, TYPE_SHORT, 1, 1)] - - out = tmp_path / 'tmp_2073_streaming_minisblack.tif' - to_geotiff(arr, str(out)) - assert os.path.exists(out) - - back = open_geotiff(str(out)) - np.testing.assert_array_equal(np.asarray(back.values), src)