diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index 8ef15a30..4dabd51d 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -84,6 +84,26 @@ TAG_GEO_ASCII_PARAMS = 34737 +# Human-readable names for tags that turn up in error messages. Only +# tags that legitimately use RATIONAL / SRATIONAL are interesting here; +# anything else gets formatted as the bare numeric id. +_TAG_NAMES = { + TAG_X_RESOLUTION: "XResolution", + TAG_Y_RESOLUTION: "YResolution", + TAG_RESOLUTION_UNIT: "ResolutionUnit", +} + + +def _tag_label(tag: int | None) -> str: + """Format a tag id for error messages.""" + if tag is None: + return "" + name = _TAG_NAMES.get(tag) + if name is not None: + return f"{name} (tag={tag})" + return f"tag={tag}" + + @dataclass class TIFFHeader: """Parsed TIFF file header.""" @@ -427,8 +447,15 @@ def parse_header(data: bytes | memoryview) -> TIFFHeader: def _read_value(data: bytes | memoryview, offset: int, type_id: int, - count: int, bo: str) -> Any: - """Read a typed value array from data at the given offset.""" + count: int, bo: str, tag: int | None = None) -> Any: + """Read a typed value array from data at the given offset. + + A zero-denominator RATIONAL or SRATIONAL is rejected with a + `ValueError` rather than silently coerced to 0.0. The TIFF spec + treats denominator-zero rationals as malformed, and quietly mapping + them to 0.0 lets corrupted `XResolution` / `YResolution` metadata + round-trip through the reader as if the file were valid. + """ type_size = TIFF_TYPE_SIZES.get(type_id, 1) if type_id == ASCII: @@ -445,7 +472,13 @@ def _read_value(data: bytes | memoryview, offset: int, type_id: int, off = offset + i * 8 num = struct.unpack_from(f'{bo}I', data, off)[0] den = struct.unpack_from(f'{bo}I', data, off + 4)[0] - values.append(num / den if den != 0 else 0.0) + if den == 0: + raise ValueError( + f"Malformed RATIONAL on {_tag_label(tag)}: " + f"numerator={num} denominator={den} at element {i}; " + f"refusing to parse possibly malformed TIFF" + ) + values.append(num / den) return tuple(values) if count > 1 else values[0] if type_id == SRATIONAL: @@ -454,7 +487,13 @@ def _read_value(data: bytes | memoryview, offset: int, type_id: int, off = offset + i * 8 num = struct.unpack_from(f'{bo}i', data, off)[0] den = struct.unpack_from(f'{bo}i', data, off + 4)[0] - values.append(num / den if den != 0 else 0.0) + if den == 0: + raise ValueError( + f"Malformed SRATIONAL on {_tag_label(tag)}: " + f"numerator={num} denominator={den} at element {i}; " + f"refusing to parse possibly malformed TIFF" + ) + values.append(num / den) return tuple(values) if count > 1 else values[0] fmt_char = TIFF_TYPE_STRUCT_CODES.get(type_id) @@ -633,7 +672,7 @@ def parse_ifd(data: bytes | memoryview, offset: int, # cap still fires. continue try: - dims[tag] = _read_value(data, value_area_offset, type_id, count, bo) + dims[tag] = _read_value(data, value_area_offset, type_id, count, bo, tag=tag) except (struct.error, ValueError): continue @@ -686,7 +725,7 @@ def parse_ifd(data: bytes | memoryview, offset: int, ) if total_size <= inline_max: - value = _read_value(data, value_area_offset, type_id, count, bo) + value = _read_value(data, value_area_offset, type_id, count, bo, tag=tag) else: if is_big: ptr = struct.unpack_from(f'{bo}Q', data, value_area_offset)[0] @@ -702,7 +741,7 @@ def parse_ifd(data: bytes | memoryview, offset: int, f"[{ptr}, {ptr + total_size}) exceeds file length " f"{data_len}" ) - value = _read_value(data, ptr, type_id, count, bo) + value = _read_value(data, ptr, type_id, count, bo, tag=tag) entries[tag] = IFDEntry(tag=tag, type_id=type_id, count=count, value=value) diff --git a/xrspatial/geotiff/tests/test_header.py b/xrspatial/geotiff/tests/test_header.py index fab75b38..50be42db 100644 --- a/xrspatial/geotiff/tests/test_header.py +++ b/xrspatial/geotiff/tests/test_header.py @@ -7,8 +7,8 @@ import pytest from xrspatial.geotiff._dtypes import RATIONAL, SRATIONAL -from xrspatial.geotiff._header import (IFD, TAG_IMAGE_WIDTH, _read_value, parse_all_ifds, - parse_header, parse_ifd) +from xrspatial.geotiff._header import (IFD, TAG_IMAGE_WIDTH, TAG_X_RESOLUTION, TAG_Y_RESOLUTION, + _read_value, parse_all_ifds, parse_header, parse_ifd) from .conftest import make_minimal_tiff @@ -250,16 +250,35 @@ def test_self_loop_raises(self): class TestReadValueRationals: """T-8 coverage for RATIONAL / SRATIONAL edge cases in _read_value.""" - def test_rational_denominator_zero_returns_zero(self): - # numerator=5, denominator=0 -- by convention return 0.0 + def test_rational_denominator_zero_raises(self): + # numerator=5, denominator=0 -- malformed, reject instead of + # silently mapping to 0.0 (issue #2313). buf = struct.pack(' bytes: + """Build a minimal little-endian TIFF whose `which` tag is a single + RATIONAL (or SRATIONAL) pointing at `(numerator, denominator)`. + + `which` should be `TAG_X_RESOLUTION` or `TAG_Y_RESOLUTION`. The + other resolution tag is filled with a valid 72/1 so the IFD is + only malformed in the one place the test cares about. + + The file lays out: + - 8-byte TIFF header + - IFD with the minimum tags a parser will accept plus + X/YResolution; rationals are 8 bytes so they live in an + overflow block after the IFD entry table + - 16 bytes of strip data so StripOffsets / StripByteCounts + are consistent + """ + if which not in (TAG_X_RESOLUTION, TAG_Y_RESOLUTION): + raise ValueError( + f"which must be TAG_X_RESOLUTION or TAG_Y_RESOLUTION, got {which}" + ) + + bo = '<' + rat_type = SRATIONAL if srational else RATIONAL + rat_fmt = f'{bo}ii' if srational else f'{bo}II' + + # The malformed rational goes on `which`; the other resolution tag + # gets a healthy 72/1 so it doesn't trip the parser first. + if which == TAG_X_RESOLUTION: + xres = struct.pack(rat_fmt, numerator, denominator) + yres = struct.pack(rat_fmt, 72, 1) + else: + xres = struct.pack(rat_fmt, 72, 1) + yres = struct.pack(rat_fmt, numerator, denominator) + + out = bytearray() + # Header: little-endian, classic TIFF, first IFD at offset 8. + out.extend(b'II') + out.extend(struct.pack(f'{bo}H', 42)) + out.extend(struct.pack(f'{bo}I', 8)) + + tags = [ + # (tag, type_id, count, raw_bytes) + (256, 3, 1, struct.pack(f'{bo}H', 4)), # ImageWidth + (257, 3, 1, struct.pack(f'{bo}H', 4)), # ImageLength + (258, 3, 1, struct.pack(f'{bo}H', 8)), # BitsPerSample + (259, 3, 1, struct.pack(f'{bo}H', 1)), # Compression + (262, 3, 1, struct.pack(f'{bo}H', 1)), # PhotometricInterpretation + (273, 4, 1, b'\x00\x00\x00\x00'), # StripOffsets (patched) + (277, 3, 1, struct.pack(f'{bo}H', 1)), # SamplesPerPixel + (278, 3, 1, struct.pack(f'{bo}H', 4)), # RowsPerStrip + (279, 4, 1, struct.pack(f'{bo}I', 16)), # StripByteCounts + (TAG_X_RESOLUTION, rat_type, 1, xres), + (TAG_Y_RESOLUTION, rat_type, 1, yres), + ] + tags.sort(key=lambda t: t[0]) + + num_entries = len(tags) + ifd_start = 8 + ifd_size = 2 + 12 * num_entries + 4 + overflow_start = ifd_start + ifd_size + + # Lay out overflow values for any tag whose raw bytes exceed 4. + overflow_buf = bytearray() + tag_offsets: dict[int, int | None] = {} + for tag, typ, count, raw in tags: + if len(raw) > 4: + tag_offsets[tag] = len(overflow_buf) + overflow_buf.extend(raw) + if len(overflow_buf) % 2: + overflow_buf.append(0) + else: + tag_offsets[tag] = None + + pixel_data_start = overflow_start + len(overflow_buf) + # Patch StripOffsets to point at the actual pixel block. + patched = [] + for tag, typ, count, raw in tags: + if tag == 273: + patched.append((tag, typ, count, + struct.pack(f'{bo}I', pixel_data_start))) + else: + patched.append((tag, typ, count, raw)) + tags = patched + + # IFD entries. + out.extend(struct.pack(f'{bo}H', num_entries)) + for tag, typ, count, raw in tags: + out.extend(struct.pack(f'{bo}HHI', tag, typ, count)) + if len(raw) <= 4: + out.extend(raw.ljust(4, b'\x00')) + else: + ptr = overflow_start + tag_offsets[tag] + out.extend(struct.pack(f'{bo}I', ptr)) + # Next IFD pointer (none). + out.extend(struct.pack(f'{bo}I', 0)) + # Overflow block. + out.extend(overflow_buf) + # Pad to pixel_data_start. + while len(out) < pixel_data_start: + out.append(0) + # 16 bytes of pixel payload (matches StripByteCounts above). + out.extend(b'\x00' * 16) + + return bytes(out) + + +class TestRationalZeroDenominator: + """Issue #2313: zero-denominator rationals must fail loudly.""" + + def test_rational_zero_denominator_surfaces_from_parse_all_ifds(self): + data = _build_tiff_with_malformed_resolution(72, 0) + header = parse_header(data) + with pytest.raises(ValueError, match="XResolution"): + parse_all_ifds(data, header) + + def test_rational_zero_denominator_message_includes_denominator(self): + data = _build_tiff_with_malformed_resolution(150, 0) + header = parse_header(data) + with pytest.raises(ValueError) as exc: + parse_all_ifds(data, header) + message = str(exc.value) + assert "Malformed RATIONAL" in message + assert "XResolution" in message + assert "denominator=0" in message + assert "numerator=150" in message + + def test_srational_zero_denominator_surfaces_from_parse_all_ifds(self): + data = _build_tiff_with_malformed_resolution(-5, 0, srational=True) + header = parse_header(data) + with pytest.raises(ValueError, match="Malformed SRATIONAL"): + parse_all_ifds(data, header) + + def test_rational_zero_denominator_fails_open_geotiff(self): + # The public read entry point should fail loudly too, not just + # the low-level header parser. + data = _build_tiff_with_malformed_resolution(72, 0) + buf = io.BytesIO(data) + with pytest.raises(ValueError, match="XResolution"): + open_geotiff(buf) + + def test_yresolution_zero_denominator_named_in_error(self): + # Same path, different tag. + data = _build_tiff_with_malformed_resolution( + 72, 0, which=TAG_Y_RESOLUTION + ) + header = parse_header(data) + with pytest.raises(ValueError, match="YResolution"): + parse_all_ifds(data, header) + + # Sanity check: the helpers we use to assert tag ids actually exist. + def test_tag_constants_present(self): + assert TAG_X_RESOLUTION == 282 + assert TAG_Y_RESOLUTION == 283