From 7405aff48c782920660666efd636f5203026c4e4 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 15 May 2026 06:29:12 -0700 Subject: [PATCH] geotiff: parse_all_ifds raises on IFD chain cycle (#1913) Cyclic IFD chains used to be silently truncated. The loop condition `while offset != 0 and offset not in seen` exited cleanly when a repeat offset showed up and returned whatever had been parsed so far. The other two malformed-chain branches in the same function (past-EOF and MAX_IFDS) both raise ValueError, so the cycle case was the odd one out. The loop now raises ValueError on a repeat offset with the same "file is malformed" wording as the sibling errors. Callers get a clear error instead of a half-parsed file that looks plausible. test_header.py's existing TestIFDChainLoop asserted the old silent-truncation behaviour; updated to assert the new raise. --- xrspatial/geotiff/_header.py | 11 +- xrspatial/geotiff/tests/test_header.py | 45 +++-- .../geotiff/tests/test_ifd_cycle_1913.py | 176 ++++++++++++++++++ 3 files changed, 207 insertions(+), 25 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_ifd_cycle_1913.py diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index 9ec87fc59..7b19424ba 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -827,7 +827,16 @@ def parse_all_ifds(data: bytes | memoryview, offset = header.first_ifd_offset seen = set() - while offset != 0 and offset not in seen: + while offset != 0: + # Cycles in the IFD chain mean a file is malformed: a downstream + # caller silently receiving a truncated overview/mask chain is + # worse than a clear error. Raise to stay consistent with the + # past-EOF and MAX_IFDS branches below. + if offset in seen: + raise ValueError( + f"TIFF IFD chain has a cycle at offset {offset}; " + f"file is malformed" + ) seen.add(offset) if offset >= len(data): raise ValueError( diff --git a/xrspatial/geotiff/tests/test_header.py b/xrspatial/geotiff/tests/test_header.py index 84785d8f5..22008cbd3 100644 --- a/xrspatial/geotiff/tests/test_header.py +++ b/xrspatial/geotiff/tests/test_header.py @@ -186,12 +186,13 @@ def test_bool_value_rejected(self): class TestIFDChainLoop: - """Verify parse_all_ifds bails out on a malicious IFD chain cycle. + """Verify parse_all_ifds rejects a malicious IFD chain cycle. - Issue #1482 (T-2): the parser keeps a ``seen`` set of offsets and - breaks the loop when an IFD's next-pointer points back to an offset - already visited. Without that guard, a crafted file with IFD2 - pointing back at IFD1 would loop forever. + Issue #1482 (T-2) added a ``seen`` set so the parser would not loop + forever on a cyclic chain. Issue #1913 tightened that further: a + cycle is now treated as a malformed file (``ValueError``) for + consistency with the past-EOF and ``MAX_IFDS`` branches. Returning + a truncated chain silently let callers act on a half-parsed file. """ def _build_two_ifd_loop(self) -> bytes: @@ -221,29 +222,25 @@ def _build_two_ifd_loop(self) -> bytes: return bytes(out) - def test_cycle_does_not_infinite_loop(self): + def test_cycle_raises_value_error(self): + """IFD chain cycle now raises rather than truncating silently (#1913).""" data = self._build_two_ifd_loop() header = parse_header(data) - ifds = parse_all_ifds(data, header) - - # Each unique offset is parsed exactly once. Cycle detection - # must stop us at IFD #2 even though its next-pointer is non-zero. - assert len(ifds) == 2 - # Values should match what we wrote, in order. - assert ifds[0].width == 1 - assert ifds[1].width == 2 + with pytest.raises(ValueError, match="cycle"): + parse_all_ifds(data, header) def test_cycle_completes_quickly(self): """Parsing must terminate without exhausting the buffer.""" data = self._build_two_ifd_loop() header = parse_header(data) - # If the seen-set guard regresses this would hang; pytest-timeout - # would catch it but the bounded len() check below is enough. - ifds = parse_all_ifds(data, header) - assert len(ifds) <= 4 # generous upper bound -- we expect 2 - - def test_self_loop(self): - """An IFD whose next-pointer is its own offset terminates.""" + # If the seen-set guard regresses this would hang. The current + # contract raises; pre-fix it returned a truncated list. Either + # way, we should not hang. + with pytest.raises(ValueError): + parse_all_ifds(data, header) + + def test_self_loop_raises(self): + """An IFD whose next-pointer is its own offset is also a cycle.""" bo = '<' out = bytearray() out.extend(b'II') @@ -254,9 +251,9 @@ def test_self_loop(self): out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 3, 1)) out.extend(struct.pack(f'{bo}I', 7)) out.extend(struct.pack(f'{bo}I', ifd_offset)) # points to self - ifds = parse_all_ifds(bytes(out), parse_header(bytes(out))) - assert len(ifds) == 1 - assert ifds[0].width == 7 + data = bytes(out) + with pytest.raises(ValueError, match="cycle"): + parse_all_ifds(data, parse_header(data)) class TestReadValueRationals: diff --git a/xrspatial/geotiff/tests/test_ifd_cycle_1913.py b/xrspatial/geotiff/tests/test_ifd_cycle_1913.py new file mode 100644 index 000000000..8a0a5cb0d --- /dev/null +++ b/xrspatial/geotiff/tests/test_ifd_cycle_1913.py @@ -0,0 +1,176 @@ +"""Tests for IFD-chain cycle rejection in parse_all_ifds (#1913). + +``parse_all_ifds`` already raises ``ValueError`` for two malformed-chain +conditions: an offset past EOF and a chain longer than ``MAX_IFDS``. +A cyclic chain (offset A -> offset B -> offset A) is just as malformed, +but the original loop condition (``offset not in seen``) exited +silently and returned a truncated list. The fix raises ``ValueError`` +with matching ``file is malformed`` wording so the contract is +consistent across all three malformed-chain branches. + +The regression-guard tests in this file overlap with +``test_ifd_chain_cap.py``; they live here so a future refactor of the +loop is forced to keep both behaviours intact. +""" +from __future__ import annotations + +import struct + +import pytest + +from xrspatial.geotiff._header import ( + MAX_IFDS, + TAG_IMAGE_WIDTH, + parse_all_ifds, + parse_header, +) + + +def _build_cyclic_ifd_bytes(big_endian: bool = False) -> bytes: + """Build a classic TIFF whose IFD chain forms a cycle: A -> B -> A. + + Both IFDs carry a single ImageWidth tag so ``parse_ifd`` accepts + them. The second IFD's next-pointer points back to the first IFD's + offset, closing the loop. + """ + bo = '>' if big_endian else '<' + bom = b'MM' if big_endian else b'II' + + out = bytearray() + out.extend(bom) + out.extend(struct.pack(f'{bo}H', 42)) + first = 8 + out.extend(struct.pack(f'{bo}I', first)) + + # IFD A at offset 8 (length 18 bytes) -> IFD B at offset 26. + ifd_a_off = 8 + ifd_b_off = 26 + out.extend(struct.pack(f'{bo}H', 1)) + out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 4, 1)) + out.extend(struct.pack(f'{bo}I', 1)) + out.extend(struct.pack(f'{bo}I', ifd_b_off)) + + # IFD B -> back to IFD A (cycle). + out.extend(struct.pack(f'{bo}H', 1)) + out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 4, 1)) + out.extend(struct.pack(f'{bo}I', 2)) + out.extend(struct.pack(f'{bo}I', ifd_a_off)) + + return bytes(out) + + +def _build_self_cycle_ifd_bytes() -> bytes: + """Build a TIFF whose first IFD points at itself (degenerate cycle).""" + out = bytearray() + out.extend(b'II') + out.extend(struct.pack(' bytes: + """Build a non-cyclic chain of ``n_ifds`` IFDs (regression-guard helper).""" + out = bytearray() + out.extend(b'II') + out.extend(struct.pack(' B -> A must raise, not return [A, B] silently.""" + data = _build_cyclic_ifd_bytes() + header = parse_header(data) + with pytest.raises(ValueError, match="cycle"): + parse_all_ifds(data, header) + + def test_self_cycle_rejected(self): + """A single IFD pointing at itself is a cycle of length one.""" + data = _build_self_cycle_ifd_bytes() + header = parse_header(data) + with pytest.raises(ValueError, match="cycle"): + parse_all_ifds(data, header) + + def test_cycle_error_message_mentions_offset_and_malformed(self): + """Error message should name the repeat offset and call the file malformed.""" + data = _build_cyclic_ifd_bytes() + header = parse_header(data) + with pytest.raises(ValueError) as excinfo: + parse_all_ifds(data, header) + msg = str(excinfo.value) + # The repeat offset (8) appears in the message. + assert "8" in msg + assert "malformed" in msg + assert "cycle" in msg + + def test_big_endian_cycle_rejected(self): + """Cycle detection works on big-endian files too.""" + data = _build_cyclic_ifd_bytes(big_endian=True) + header = parse_header(data) + assert header.byte_order == '>' + with pytest.raises(ValueError, match="cycle"): + parse_all_ifds(data, header) + + +class TestMalformedChainSiblingsStillRaise: + """Regression guards for the two other malformed-chain branches. + + Reorganising the loop to handle cycles must not break the past-EOF + and MAX_IFDS branches. + """ + + def test_offset_past_eof_still_raises(self): + """A first-IFD offset past EOF must still raise ValueError.""" + out = bytearray() + out.extend(b'II') + out.extend(struct.pack('