Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion xrspatial/geotiff/_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,16 @@ def parse_all_ifds(data: bytes | memoryview,
offset = header.first_ifd_offset
seen = set()

while offset != 0 and offset not in seen:
while offset != 0:
# Cycles in the IFD chain mean a file is malformed: a downstream
# caller silently receiving a truncated overview/mask chain is
# worse than a clear error. Raise to stay consistent with the
# past-EOF and MAX_IFDS branches below.
if offset in seen:
raise ValueError(
f"TIFF IFD chain has a cycle at offset {offset}; "
f"file is malformed"
)
seen.add(offset)
if offset >= len(data):
raise ValueError(
Expand Down
45 changes: 21 additions & 24 deletions xrspatial/geotiff/tests/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,13 @@ def test_bool_value_rejected(self):


class TestIFDChainLoop:
"""Verify parse_all_ifds bails out on a malicious IFD chain cycle.
"""Verify parse_all_ifds rejects a malicious IFD chain cycle.

Issue #1482 (T-2): the parser keeps a ``seen`` set of offsets and
breaks the loop when an IFD's next-pointer points back to an offset
already visited. Without that guard, a crafted file with IFD2
pointing back at IFD1 would loop forever.
Issue #1482 (T-2) added a ``seen`` set so the parser would not loop
forever on a cyclic chain. Issue #1913 tightened that further: a
cycle is now treated as a malformed file (``ValueError``) for
consistency with the past-EOF and ``MAX_IFDS`` branches. Returning
a truncated chain silently let callers act on a half-parsed file.
"""

def _build_two_ifd_loop(self) -> bytes:
Expand Down Expand Up @@ -221,29 +222,25 @@ def _build_two_ifd_loop(self) -> bytes:

return bytes(out)

def test_cycle_does_not_infinite_loop(self):
def test_cycle_raises_value_error(self):
"""IFD chain cycle now raises rather than truncating silently (#1913)."""
data = self._build_two_ifd_loop()
header = parse_header(data)
ifds = parse_all_ifds(data, header)

# Each unique offset is parsed exactly once. Cycle detection
# must stop us at IFD #2 even though its next-pointer is non-zero.
assert len(ifds) == 2
# Values should match what we wrote, in order.
assert ifds[0].width == 1
assert ifds[1].width == 2
with pytest.raises(ValueError, match="cycle"):
parse_all_ifds(data, header)

def test_cycle_completes_quickly(self):
"""Parsing must terminate without exhausting the buffer."""
data = self._build_two_ifd_loop()
header = parse_header(data)
# If the seen-set guard regresses this would hang; pytest-timeout
# would catch it but the bounded len() check below is enough.
ifds = parse_all_ifds(data, header)
assert len(ifds) <= 4 # generous upper bound -- we expect 2

def test_self_loop(self):
"""An IFD whose next-pointer is its own offset terminates."""
# If the seen-set guard regresses this would hang. The current
# contract raises; pre-fix it returned a truncated list. Either
# way, we should not hang.
with pytest.raises(ValueError):
parse_all_ifds(data, header)

def test_self_loop_raises(self):
"""An IFD whose next-pointer is its own offset is also a cycle."""
bo = '<'
out = bytearray()
out.extend(b'II')
Expand All @@ -254,9 +251,9 @@ def test_self_loop(self):
out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 3, 1))
out.extend(struct.pack(f'{bo}I', 7))
out.extend(struct.pack(f'{bo}I', ifd_offset)) # points to self
ifds = parse_all_ifds(bytes(out), parse_header(bytes(out)))
assert len(ifds) == 1
assert ifds[0].width == 7
data = bytes(out)
with pytest.raises(ValueError, match="cycle"):
parse_all_ifds(data, parse_header(data))


class TestReadValueRationals:
Expand Down
176 changes: 176 additions & 0 deletions xrspatial/geotiff/tests/test_ifd_cycle_1913.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""Tests for IFD-chain cycle rejection in parse_all_ifds (#1913).

``parse_all_ifds`` already raises ``ValueError`` for two malformed-chain
conditions: an offset past EOF and a chain longer than ``MAX_IFDS``.
A cyclic chain (offset A -> offset B -> offset A) is just as malformed,
but the original loop condition (``offset not in seen``) exited
silently and returned a truncated list. The fix raises ``ValueError``
with matching ``file is malformed`` wording so the contract is
consistent across all three malformed-chain branches.

The regression-guard tests in this file overlap with
``test_ifd_chain_cap.py``; they live here so a future refactor of the
loop is forced to keep both behaviours intact.
"""
from __future__ import annotations

import struct

import pytest

from xrspatial.geotiff._header import (
MAX_IFDS,
TAG_IMAGE_WIDTH,
parse_all_ifds,
parse_header,
)


def _build_cyclic_ifd_bytes(big_endian: bool = False) -> bytes:
"""Build a classic TIFF whose IFD chain forms a cycle: A -> B -> A.

Both IFDs carry a single ImageWidth tag so ``parse_ifd`` accepts
them. The second IFD's next-pointer points back to the first IFD's
offset, closing the loop.
"""
bo = '>' if big_endian else '<'
bom = b'MM' if big_endian else b'II'

out = bytearray()
out.extend(bom)
out.extend(struct.pack(f'{bo}H', 42))
first = 8
out.extend(struct.pack(f'{bo}I', first))

# IFD A at offset 8 (length 18 bytes) -> IFD B at offset 26.
ifd_a_off = 8
ifd_b_off = 26
out.extend(struct.pack(f'{bo}H', 1))
out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 4, 1))
out.extend(struct.pack(f'{bo}I', 1))
out.extend(struct.pack(f'{bo}I', ifd_b_off))

# IFD B -> back to IFD A (cycle).
out.extend(struct.pack(f'{bo}H', 1))
out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 4, 1))
out.extend(struct.pack(f'{bo}I', 2))
out.extend(struct.pack(f'{bo}I', ifd_a_off))

return bytes(out)


def _build_self_cycle_ifd_bytes() -> bytes:
"""Build a TIFF whose first IFD points at itself (degenerate cycle)."""
out = bytearray()
out.extend(b'II')
out.extend(struct.pack('<H', 42))
first = 8
out.extend(struct.pack('<I', first))
# IFD at offset 8 with next-pointer back to itself.
out.extend(struct.pack('<H', 1))
out.extend(struct.pack('<HHI', TAG_IMAGE_WIDTH, 4, 1))
out.extend(struct.pack('<I', 1))
out.extend(struct.pack('<I', first))
return bytes(out)


def _build_chained_ifd_bytes(n_ifds: int) -> bytes:
"""Build a non-cyclic chain of ``n_ifds`` IFDs (regression-guard helper)."""
out = bytearray()
out.extend(b'II')
out.extend(struct.pack('<H', 42))
first = 8
out.extend(struct.pack('<I', first))
ifd_size = 18
for i in range(n_ifds):
next_off = first + (i + 1) * ifd_size
if i == n_ifds - 1:
next_off = 0
out.extend(struct.pack('<H', 1))
out.extend(struct.pack('<HHI', TAG_IMAGE_WIDTH, 4, 1))
out.extend(struct.pack('<I', i + 1))
out.extend(struct.pack('<I', next_off))
return bytes(out)


class TestIFDChainCycle:

def test_two_ifd_cycle_rejected(self):
"""A -> B -> A must raise, not return [A, B] silently."""
data = _build_cyclic_ifd_bytes()
header = parse_header(data)
with pytest.raises(ValueError, match="cycle"):
parse_all_ifds(data, header)

def test_self_cycle_rejected(self):
"""A single IFD pointing at itself is a cycle of length one."""
data = _build_self_cycle_ifd_bytes()
header = parse_header(data)
with pytest.raises(ValueError, match="cycle"):
parse_all_ifds(data, header)

def test_cycle_error_message_mentions_offset_and_malformed(self):
"""Error message should name the repeat offset and call the file malformed."""
data = _build_cyclic_ifd_bytes()
header = parse_header(data)
with pytest.raises(ValueError) as excinfo:
parse_all_ifds(data, header)
msg = str(excinfo.value)
# The repeat offset (8) appears in the message.
assert "8" in msg
assert "malformed" in msg
assert "cycle" in msg

def test_big_endian_cycle_rejected(self):
"""Cycle detection works on big-endian files too."""
data = _build_cyclic_ifd_bytes(big_endian=True)
header = parse_header(data)
assert header.byte_order == '>'
with pytest.raises(ValueError, match="cycle"):
parse_all_ifds(data, header)


class TestMalformedChainSiblingsStillRaise:
"""Regression guards for the two other malformed-chain branches.

Reorganising the loop to handle cycles must not break the past-EOF
and MAX_IFDS branches.
"""

def test_offset_past_eof_still_raises(self):
"""A first-IFD offset past EOF must still raise ValueError."""
out = bytearray()
out.extend(b'II')
out.extend(struct.pack('<H', 42))
# Point at offset 9999, well past the 8-byte header.
out.extend(struct.pack('<I', 9999))
data = bytes(out)
header = parse_header(data)
with pytest.raises(ValueError, match="past end of file"):
parse_all_ifds(data, header)

def test_max_ifds_still_raises(self):
"""A chain longer than MAX_IFDS must still raise ValueError."""
data = _build_chained_ifd_bytes(MAX_IFDS + 1)
header = parse_header(data)
with pytest.raises(ValueError, match=str(MAX_IFDS)):
parse_all_ifds(data, header)


class TestNormalChainStillParses:

def test_short_acyclic_chain_parses(self):
"""A normal multi-IFD chain still works post-fix."""
data = _build_chained_ifd_bytes(5)
header = parse_header(data)
ifds = parse_all_ifds(data, header)
assert len(ifds) == 5
for i, ifd in enumerate(ifds):
assert ifd.width == i + 1

def test_single_ifd_chain_parses(self):
"""A one-IFD file (no next pointer) still parses."""
data = _build_chained_ifd_bytes(1)
header = parse_header(data)
ifds = parse_all_ifds(data, header)
assert len(ifds) == 1
Loading