From d872c63f2b8adbba5c65b442cf0273db512a5eb9 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 22 May 2026 10:57:19 -0700 Subject: [PATCH 1/2] Reject zero-denominator RATIONAL/SRATIONAL tags (#2313) `_read_value` used to coerce a malformed (denominator=0) RATIONAL or SRATIONAL to 0.0, which let corrupted `XResolution` / `YResolution` metadata round-trip through the reader as if the file were valid. Raise `ValueError` instead, name the offending tag and the denominator, and update the two existing tests that pinned the old silent behaviour. --- xrspatial/geotiff/_header.py | 53 ++++- xrspatial/geotiff/tests/test_header.py | 35 ++- .../test_rational_zero_denominator_2313.py | 215 ++++++++++++++++++ 3 files changed, 289 insertions(+), 14 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_rational_zero_denominator_2313.py diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index 8ef15a30..4dabd51d 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -84,6 +84,26 @@ TAG_GEO_ASCII_PARAMS = 34737 +# Human-readable names for tags that turn up in error messages. Only +# tags that legitimately use RATIONAL / SRATIONAL are interesting here; +# anything else gets formatted as the bare numeric id. +_TAG_NAMES = { + TAG_X_RESOLUTION: "XResolution", + TAG_Y_RESOLUTION: "YResolution", + TAG_RESOLUTION_UNIT: "ResolutionUnit", +} + + +def _tag_label(tag: int | None) -> str: + """Format a tag id for error messages.""" + if tag is None: + return "" + name = _TAG_NAMES.get(tag) + if name is not None: + return f"{name} (tag={tag})" + return f"tag={tag}" + + @dataclass class TIFFHeader: """Parsed TIFF file header.""" @@ -427,8 +447,15 @@ def parse_header(data: bytes | memoryview) -> TIFFHeader: def _read_value(data: bytes | memoryview, offset: int, type_id: int, - count: int, bo: str) -> Any: - """Read a typed value array from data at the given offset.""" + count: int, bo: str, tag: int | None = None) -> Any: + """Read a typed value array from data at the given offset. + + A zero-denominator RATIONAL or SRATIONAL is rejected with a + `ValueError` rather than silently coerced to 0.0. The TIFF spec + treats denominator-zero rationals as malformed, and quietly mapping + them to 0.0 lets corrupted `XResolution` / `YResolution` metadata + round-trip through the reader as if the file were valid. + """ type_size = TIFF_TYPE_SIZES.get(type_id, 1) if type_id == ASCII: @@ -445,7 +472,13 @@ def _read_value(data: bytes | memoryview, offset: int, type_id: int, off = offset + i * 8 num = struct.unpack_from(f'{bo}I', data, off)[0] den = struct.unpack_from(f'{bo}I', data, off + 4)[0] - values.append(num / den if den != 0 else 0.0) + if den == 0: + raise ValueError( + f"Malformed RATIONAL on {_tag_label(tag)}: " + f"numerator={num} denominator={den} at element {i}; " + f"refusing to parse possibly malformed TIFF" + ) + values.append(num / den) return tuple(values) if count > 1 else values[0] if type_id == SRATIONAL: @@ -454,7 +487,13 @@ def _read_value(data: bytes | memoryview, offset: int, type_id: int, off = offset + i * 8 num = struct.unpack_from(f'{bo}i', data, off)[0] den = struct.unpack_from(f'{bo}i', data, off + 4)[0] - values.append(num / den if den != 0 else 0.0) + if den == 0: + raise ValueError( + f"Malformed SRATIONAL on {_tag_label(tag)}: " + f"numerator={num} denominator={den} at element {i}; " + f"refusing to parse possibly malformed TIFF" + ) + values.append(num / den) return tuple(values) if count > 1 else values[0] fmt_char = TIFF_TYPE_STRUCT_CODES.get(type_id) @@ -633,7 +672,7 @@ def parse_ifd(data: bytes | memoryview, offset: int, # cap still fires. continue try: - dims[tag] = _read_value(data, value_area_offset, type_id, count, bo) + dims[tag] = _read_value(data, value_area_offset, type_id, count, bo, tag=tag) except (struct.error, ValueError): continue @@ -686,7 +725,7 @@ def parse_ifd(data: bytes | memoryview, offset: int, ) if total_size <= inline_max: - value = _read_value(data, value_area_offset, type_id, count, bo) + value = _read_value(data, value_area_offset, type_id, count, bo, tag=tag) else: if is_big: ptr = struct.unpack_from(f'{bo}Q', data, value_area_offset)[0] @@ -702,7 +741,7 @@ def parse_ifd(data: bytes | memoryview, offset: int, f"[{ptr}, {ptr + total_size}) exceeds file length " f"{data_len}" ) - value = _read_value(data, ptr, type_id, count, bo) + value = _read_value(data, ptr, type_id, count, bo, tag=tag) entries[tag] = IFDEntry(tag=tag, type_id=type_id, count=count, value=value) diff --git a/xrspatial/geotiff/tests/test_header.py b/xrspatial/geotiff/tests/test_header.py index fab75b38..d6988ff6 100644 --- a/xrspatial/geotiff/tests/test_header.py +++ b/xrspatial/geotiff/tests/test_header.py @@ -250,16 +250,37 @@ def test_self_loop_raises(self): class TestReadValueRationals: """T-8 coverage for RATIONAL / SRATIONAL edge cases in _read_value.""" - def test_rational_denominator_zero_returns_zero(self): - # numerator=5, denominator=0 -- by convention return 0.0 + def test_rational_denominator_zero_raises(self): + # numerator=5, denominator=0 -- malformed, reject instead of + # silently mapping to 0.0 (issue #2313). buf = struct.pack(' bytes: + """Build a minimal little-endian TIFF whose XResolution is a single + RATIONAL (or SRATIONAL) pointing at (numerator, denominator). + + The file lays out: + - 8-byte TIFF header + - IFD with the minimum tags a parser will accept plus + XResolution; XResolution is 8 bytes so it lives in an overflow + block after the IFD entry table + - 1 byte of strip data so StripOffsets / StripByteCounts are + consistent + """ + bo = '<' + out = bytearray() + # Header: little-endian, classic TIFF, first IFD at offset 8. + out.extend(b'II') + out.extend(struct.pack(f'{bo}H', 42)) + out.extend(struct.pack(f'{bo}I', 8)) + + tags = [ + # (tag, type_id, count, raw_bytes) + (256, 3, 1, struct.pack(f'{bo}H', 4)), # ImageWidth + (257, 3, 1, struct.pack(f'{bo}H', 4)), # ImageLength + (258, 3, 1, struct.pack(f'{bo}H', 8)), # BitsPerSample + (259, 3, 1, struct.pack(f'{bo}H', 1)), # Compression + (262, 3, 1, struct.pack(f'{bo}H', 1)), # PhotometricInterpretation + (273, 4, 1, b'\x00\x00\x00\x00'), # StripOffsets (patched) + (277, 3, 1, struct.pack(f'{bo}H', 1)), # SamplesPerPixel + (278, 3, 1, struct.pack(f'{bo}H', 4)), # RowsPerStrip + (279, 4, 1, struct.pack(f'{bo}I', 16)), # StripByteCounts + # XResolution / YResolution as RATIONAL or SRATIONAL. + (282, SRATIONAL if srational else RATIONAL, 1, + struct.pack(f'{bo}ii' if srational else f'{bo}II', + numerator, denominator)), + (283, SRATIONAL if srational else RATIONAL, 1, + struct.pack(f'{bo}ii' if srational else f'{bo}II', 72, 1)), + ] + tags.sort(key=lambda t: t[0]) + + num_entries = len(tags) + ifd_start = 8 + ifd_size = 2 + 12 * num_entries + 4 + overflow_start = ifd_start + ifd_size + + # Lay out overflow values for any tag whose raw bytes exceed 4. + overflow_buf = bytearray() + tag_offsets: dict[int, int | None] = {} + for tag, typ, count, raw in tags: + if len(raw) > 4: + tag_offsets[tag] = len(overflow_buf) + overflow_buf.extend(raw) + if len(overflow_buf) % 2: + overflow_buf.append(0) + else: + tag_offsets[tag] = None + + pixel_data_start = overflow_start + len(overflow_buf) + # Patch StripOffsets to point at the actual pixel block. + patched = [] + for tag, typ, count, raw in tags: + if tag == 273: + patched.append((tag, typ, count, + struct.pack(f'{bo}I', pixel_data_start))) + else: + patched.append((tag, typ, count, raw)) + tags = patched + + # IFD entries. + out.extend(struct.pack(f'{bo}H', num_entries)) + for tag, typ, count, raw in tags: + out.extend(struct.pack(f'{bo}HHI', tag, typ, count)) + if len(raw) <= 4: + out.extend(raw.ljust(4, b'\x00')) + else: + ptr = overflow_start + tag_offsets[tag] + out.extend(struct.pack(f'{bo}I', ptr)) + # Next IFD pointer (none). + out.extend(struct.pack(f'{bo}I', 0)) + # Overflow block. + out.extend(overflow_buf) + # Pad to pixel_data_start. + while len(out) < pixel_data_start: + out.append(0) + # 16 bytes of pixel payload (matches StripByteCounts above). + out.extend(b'\x00' * 16) + + return bytes(out) + + +class TestRationalZeroDenominator: + """Issue #2313: zero-denominator rationals must fail loudly.""" + + def test_rational_zero_denominator_surfaces_from_parse_all_ifds(self): + data = _build_tiff_with_rational_xres(72, 0) + header = parse_header(data) + with pytest.raises(ValueError, match="XResolution"): + parse_all_ifds(data, header) + + def test_rational_zero_denominator_message_includes_denominator(self): + data = _build_tiff_with_rational_xres(150, 0) + header = parse_header(data) + with pytest.raises(ValueError) as exc: + parse_all_ifds(data, header) + message = str(exc.value) + assert "Malformed RATIONAL" in message + assert "XResolution" in message + assert "denominator=0" in message + assert "numerator=150" in message + + def test_srational_zero_denominator_surfaces_from_parse_all_ifds(self): + data = _build_tiff_with_rational_xres(-5, 0, srational=True) + header = parse_header(data) + with pytest.raises(ValueError, match="Malformed SRATIONAL"): + parse_all_ifds(data, header) + + def test_rational_zero_denominator_fails_open_geotiff(self): + # The public read entry point should fail loudly too, not just + # the low-level header parser. + data = _build_tiff_with_rational_xres(72, 0) + buf = io.BytesIO(data) + with pytest.raises(ValueError, match="XResolution"): + open_geotiff(buf) + + def test_yresolution_zero_denominator_named_in_error(self): + # Same path, different tag. Build a TIFF where only YResolution + # is malformed by swapping the values via direct construction. + bo = '<' + out = bytearray() + out.extend(b'II') + out.extend(struct.pack(f'{bo}H', 42)) + out.extend(struct.pack(f'{bo}I', 8)) + + tags = [ + (256, 3, 1, struct.pack(f'{bo}H', 4)), + (257, 3, 1, struct.pack(f'{bo}H', 4)), + (258, 3, 1, struct.pack(f'{bo}H', 8)), + (259, 3, 1, struct.pack(f'{bo}H', 1)), + (262, 3, 1, struct.pack(f'{bo}H', 1)), + (273, 4, 1, b'\x00\x00\x00\x00'), + (277, 3, 1, struct.pack(f'{bo}H', 1)), + (278, 3, 1, struct.pack(f'{bo}H', 4)), + (279, 4, 1, struct.pack(f'{bo}I', 16)), + (282, RATIONAL, 1, struct.pack(f'{bo}II', 72, 1)), + # YResolution with zero denominator. + (283, RATIONAL, 1, struct.pack(f'{bo}II', 72, 0)), + ] + tags.sort(key=lambda t: t[0]) + num_entries = len(tags) + ifd_start = 8 + ifd_size = 2 + 12 * num_entries + 4 + overflow_start = ifd_start + ifd_size + overflow_buf = bytearray() + tag_offsets: dict[int, int | None] = {} + for tag, typ, count, raw in tags: + if len(raw) > 4: + tag_offsets[tag] = len(overflow_buf) + overflow_buf.extend(raw) + if len(overflow_buf) % 2: + overflow_buf.append(0) + else: + tag_offsets[tag] = None + pixel_data_start = overflow_start + len(overflow_buf) + patched = [] + for tag, typ, count, raw in tags: + if tag == 273: + patched.append((tag, typ, count, + struct.pack(f'{bo}I', pixel_data_start))) + else: + patched.append((tag, typ, count, raw)) + tags = patched + out.extend(struct.pack(f'{bo}H', num_entries)) + for tag, typ, count, raw in tags: + out.extend(struct.pack(f'{bo}HHI', tag, typ, count)) + if len(raw) <= 4: + out.extend(raw.ljust(4, b'\x00')) + else: + out.extend(struct.pack(f'{bo}I', + overflow_start + tag_offsets[tag])) + out.extend(struct.pack(f'{bo}I', 0)) + out.extend(overflow_buf) + while len(out) < pixel_data_start: + out.append(0) + out.extend(b'\x00' * 16) + + data = bytes(out) + header = parse_header(data) + with pytest.raises(ValueError, match="YResolution"): + parse_all_ifds(data, header) + + # Sanity check: the helpers we use to assert tag ids actually exist. + def test_tag_constants_present(self): + assert TAG_X_RESOLUTION == 282 + assert TAG_Y_RESOLUTION == 283 From ca86979c6f0c27ec7a6f1f389ecf9f15b16a1f0e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 22 May 2026 11:00:08 -0700 Subject: [PATCH 2/2] Address review nits: hoist imports, share TIFF helper (#2313) - Pull `TAG_X_RESOLUTION` / `TAG_Y_RESOLUTION` up into the top-level import in `test_header.py` instead of importing inside the test bodies. - Add a `which` parameter to `_build_tiff_with_malformed_resolution` in `test_rational_zero_denominator_2313.py` so the YResolution test reuses the helper instead of duplicating ~60 lines. --- xrspatial/geotiff/tests/test_header.py | 6 +- .../test_rational_zero_denominator_2313.py | 119 ++++++------------ 2 files changed, 42 insertions(+), 83 deletions(-) diff --git a/xrspatial/geotiff/tests/test_header.py b/xrspatial/geotiff/tests/test_header.py index d6988ff6..50be42db 100644 --- a/xrspatial/geotiff/tests/test_header.py +++ b/xrspatial/geotiff/tests/test_header.py @@ -7,8 +7,8 @@ import pytest from xrspatial.geotiff._dtypes import RATIONAL, SRATIONAL -from xrspatial.geotiff._header import (IFD, TAG_IMAGE_WIDTH, _read_value, parse_all_ifds, - parse_header, parse_ifd) +from xrspatial.geotiff._header import (IFD, TAG_IMAGE_WIDTH, TAG_X_RESOLUTION, TAG_Y_RESOLUTION, + _read_value, parse_all_ifds, parse_header, parse_ifd) from .conftest import make_minimal_tiff @@ -259,7 +259,6 @@ def test_rational_denominator_zero_raises(self): def test_rational_denominator_zero_names_tag(self): # When the tag is known, the error message names it. - from xrspatial.geotiff._header import TAG_X_RESOLUTION buf = struct.pack(' bytes: - """Build a minimal little-endian TIFF whose XResolution is a single - RATIONAL (or SRATIONAL) pointing at (numerator, denominator). +def _build_tiff_with_malformed_resolution(numerator: int, denominator: int, + *, which: int = TAG_X_RESOLUTION, + srational: bool = False) -> bytes: + """Build a minimal little-endian TIFF whose `which` tag is a single + RATIONAL (or SRATIONAL) pointing at `(numerator, denominator)`. + + `which` should be `TAG_X_RESOLUTION` or `TAG_Y_RESOLUTION`. The + other resolution tag is filled with a valid 72/1 so the IFD is + only malformed in the one place the test cares about. The file lays out: - 8-byte TIFF header - IFD with the minimum tags a parser will accept plus - XResolution; XResolution is 8 bytes so it lives in an overflow - block after the IFD entry table - - 1 byte of strip data so StripOffsets / StripByteCounts are - consistent + X/YResolution; rationals are 8 bytes so they live in an + overflow block after the IFD entry table + - 16 bytes of strip data so StripOffsets / StripByteCounts + are consistent """ + if which not in (TAG_X_RESOLUTION, TAG_Y_RESOLUTION): + raise ValueError( + f"which must be TAG_X_RESOLUTION or TAG_Y_RESOLUTION, got {which}" + ) + bo = '<' + rat_type = SRATIONAL if srational else RATIONAL + rat_fmt = f'{bo}ii' if srational else f'{bo}II' + + # The malformed rational goes on `which`; the other resolution tag + # gets a healthy 72/1 so it doesn't trip the parser first. + if which == TAG_X_RESOLUTION: + xres = struct.pack(rat_fmt, numerator, denominator) + yres = struct.pack(rat_fmt, 72, 1) + else: + xres = struct.pack(rat_fmt, 72, 1) + yres = struct.pack(rat_fmt, numerator, denominator) + out = bytearray() # Header: little-endian, classic TIFF, first IFD at offset 8. out.extend(b'II') @@ -50,12 +72,8 @@ def _build_tiff_with_rational_xres(numerator: int, denominator: int, (277, 3, 1, struct.pack(f'{bo}H', 1)), # SamplesPerPixel (278, 3, 1, struct.pack(f'{bo}H', 4)), # RowsPerStrip (279, 4, 1, struct.pack(f'{bo}I', 16)), # StripByteCounts - # XResolution / YResolution as RATIONAL or SRATIONAL. - (282, SRATIONAL if srational else RATIONAL, 1, - struct.pack(f'{bo}ii' if srational else f'{bo}II', - numerator, denominator)), - (283, SRATIONAL if srational else RATIONAL, 1, - struct.pack(f'{bo}ii' if srational else f'{bo}II', 72, 1)), + (TAG_X_RESOLUTION, rat_type, 1, xres), + (TAG_Y_RESOLUTION, rat_type, 1, yres), ] tags.sort(key=lambda t: t[0]) @@ -113,13 +131,13 @@ class TestRationalZeroDenominator: """Issue #2313: zero-denominator rationals must fail loudly.""" def test_rational_zero_denominator_surfaces_from_parse_all_ifds(self): - data = _build_tiff_with_rational_xres(72, 0) + data = _build_tiff_with_malformed_resolution(72, 0) header = parse_header(data) with pytest.raises(ValueError, match="XResolution"): parse_all_ifds(data, header) def test_rational_zero_denominator_message_includes_denominator(self): - data = _build_tiff_with_rational_xres(150, 0) + data = _build_tiff_with_malformed_resolution(150, 0) header = parse_header(data) with pytest.raises(ValueError) as exc: parse_all_ifds(data, header) @@ -130,7 +148,7 @@ def test_rational_zero_denominator_message_includes_denominator(self): assert "numerator=150" in message def test_srational_zero_denominator_surfaces_from_parse_all_ifds(self): - data = _build_tiff_with_rational_xres(-5, 0, srational=True) + data = _build_tiff_with_malformed_resolution(-5, 0, srational=True) header = parse_header(data) with pytest.raises(ValueError, match="Malformed SRATIONAL"): parse_all_ifds(data, header) @@ -138,73 +156,16 @@ def test_srational_zero_denominator_surfaces_from_parse_all_ifds(self): def test_rational_zero_denominator_fails_open_geotiff(self): # The public read entry point should fail loudly too, not just # the low-level header parser. - data = _build_tiff_with_rational_xres(72, 0) + data = _build_tiff_with_malformed_resolution(72, 0) buf = io.BytesIO(data) with pytest.raises(ValueError, match="XResolution"): open_geotiff(buf) def test_yresolution_zero_denominator_named_in_error(self): - # Same path, different tag. Build a TIFF where only YResolution - # is malformed by swapping the values via direct construction. - bo = '<' - out = bytearray() - out.extend(b'II') - out.extend(struct.pack(f'{bo}H', 42)) - out.extend(struct.pack(f'{bo}I', 8)) - - tags = [ - (256, 3, 1, struct.pack(f'{bo}H', 4)), - (257, 3, 1, struct.pack(f'{bo}H', 4)), - (258, 3, 1, struct.pack(f'{bo}H', 8)), - (259, 3, 1, struct.pack(f'{bo}H', 1)), - (262, 3, 1, struct.pack(f'{bo}H', 1)), - (273, 4, 1, b'\x00\x00\x00\x00'), - (277, 3, 1, struct.pack(f'{bo}H', 1)), - (278, 3, 1, struct.pack(f'{bo}H', 4)), - (279, 4, 1, struct.pack(f'{bo}I', 16)), - (282, RATIONAL, 1, struct.pack(f'{bo}II', 72, 1)), - # YResolution with zero denominator. - (283, RATIONAL, 1, struct.pack(f'{bo}II', 72, 0)), - ] - tags.sort(key=lambda t: t[0]) - num_entries = len(tags) - ifd_start = 8 - ifd_size = 2 + 12 * num_entries + 4 - overflow_start = ifd_start + ifd_size - overflow_buf = bytearray() - tag_offsets: dict[int, int | None] = {} - for tag, typ, count, raw in tags: - if len(raw) > 4: - tag_offsets[tag] = len(overflow_buf) - overflow_buf.extend(raw) - if len(overflow_buf) % 2: - overflow_buf.append(0) - else: - tag_offsets[tag] = None - pixel_data_start = overflow_start + len(overflow_buf) - patched = [] - for tag, typ, count, raw in tags: - if tag == 273: - patched.append((tag, typ, count, - struct.pack(f'{bo}I', pixel_data_start))) - else: - patched.append((tag, typ, count, raw)) - tags = patched - out.extend(struct.pack(f'{bo}H', num_entries)) - for tag, typ, count, raw in tags: - out.extend(struct.pack(f'{bo}HHI', tag, typ, count)) - if len(raw) <= 4: - out.extend(raw.ljust(4, b'\x00')) - else: - out.extend(struct.pack(f'{bo}I', - overflow_start + tag_offsets[tag])) - out.extend(struct.pack(f'{bo}I', 0)) - out.extend(overflow_buf) - while len(out) < pixel_data_start: - out.append(0) - out.extend(b'\x00' * 16) - - data = bytes(out) + # Same path, different tag. + data = _build_tiff_with_malformed_resolution( + 72, 0, which=TAG_Y_RESOLUTION + ) header = parse_header(data) with pytest.raises(ValueError, match="YResolution"): parse_all_ifds(data, header)