From 17d8f4003d8d39430758692710f8bf97dcd47c5f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:08:22 -0700 Subject: [PATCH 1/2] geotiff: validate overview_level type, reject bool and non-int (#2074) overview_level was compared numerically with no type check, so True was coerced to 1 and silently returned the first overview level, and non-int types leaked raw TypeErrors from internal comparison or list indexing. Validate at the open_geotiff entry point and at the internal select_overview_ifd selector (defense in depth for the dask/GPU/accessor readers that forward the kwarg) and raise a TypeError that names the offending type and value. --- xrspatial/geotiff/__init__.py | 19 +++- xrspatial/geotiff/_header.py | 15 +++ ...est_overview_level_type_validation_2074.py | 105 ++++++++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 89c825d4d..b0d148346 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -280,7 +280,9 @@ def open_geotiff(source: str | BinaryIO, *, window : tuple or None (row_start, col_start, row_stop, col_stop) for windowed reading. overview_level : int or None - Overview level (0 = full resolution). + Overview level (0 = full resolution). Must be a non-negative int + or ``None``; passing ``bool`` or any other type raises + ``TypeError``. band : int or None Band index (0-based). None returns all bands. name : str or None @@ -371,6 +373,21 @@ def open_geotiff(source: str | BinaryIO, *, source = _coerce_path(source) + # ``overview_level`` must be an int or ``None``. ``bool`` is rejected + # explicitly because Python treats it as a subclass of ``int``: without + # this guard, ``overview_level=True`` is coerced to ``1`` and silently + # returns the first overview level instead of failing, and other non-int + # types leak raw ``TypeError`` messages from the internal numeric + # comparison or list indexing. See issue #2074. + if overview_level is not None and ( + not isinstance(overview_level, int) + or isinstance(overview_level, bool) + ): + raise TypeError( + f"overview_level must be an int or None, got " + f"{type(overview_level).__name__}: {overview_level!r}" + ) + # ``on_gpu_failure`` is GPU-only. Reject it up front for CPU/dask paths # rather than silently dropping it once dispatch is decided -- callers # otherwise have no way to learn that the policy is being ignored. diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index 7b19424ba..5f8ea6a79 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -785,6 +785,21 @@ def select_overview_ifd(ifds: list[IFD], overview_level: int | None) -> IFD: if not ifds: raise ValueError("No IFDs found in TIFF file") + # Defense in depth: callers via ``open_geotiff`` are already type-checked, + # but ``select_overview_ifd`` is also reachable from other readers + # (dask, GPU, accessor) that forward ``overview_level`` directly. Reject + # bool and non-int values up front so the failure mode is uniform across + # entry points instead of leaking raw numeric-comparison or indexing + # TypeErrors. See issue #2074. + if overview_level is not None and ( + not isinstance(overview_level, int) + or isinstance(overview_level, bool) + ): + raise TypeError( + f"overview_level must be an int or None, got " + f"{type(overview_level).__name__}: {overview_level!r}" + ) + filtered = [ifd for ifd in ifds if _is_overview_or_full_res(ifd)] if not filtered: raise ValueError( diff --git a/xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py b/xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py new file mode 100644 index 000000000..507ca441d --- /dev/null +++ b/xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py @@ -0,0 +1,105 @@ +"""Type validation for ``open_geotiff(overview_level=...)``. + +The selector in ``_header.select_overview_ifd`` compares ``overview_level`` +numerically and indexes a list with it. Without an upfront type check, +``True`` is silently coerced to ``1`` (because ``bool`` is a subclass of +``int`` in Python), so a caller passing a bool by mistake gets back the +first overview level instead of an error. Non-int types like ``str`` and +``float`` leak raw ``TypeError`` messages from the internal comparison +or indexing. See issue #2074. + +This module asserts that: + +* ``overview_level=True`` / ``False`` raise ``TypeError`` naming ``bool``. +* ``overview_level="0"`` raises ``TypeError`` naming ``str``. +* ``overview_level=1.0`` raises ``TypeError`` naming ``float``. +* ``overview_level=0``, ``1``, and ``None`` continue to work. +""" +from __future__ import annotations + +import numpy as np +import pytest + + +tifffile = pytest.importorskip("tifffile") + + +def _write_cog_with_one_overview(path: str) -> np.ndarray: + """Write a 64x64 single-band TIFF with one half-resolution overview.""" + rng = np.random.RandomState(0x2074) + arr = rng.randint(0, 256, size=(64, 64), dtype=np.uint8) + half = arr[::2, ::2] + with tifffile.TiffWriter(path) as tw: + tw.write(arr, tile=(32, 32), photometric="minisblack") + tw.write(half, tile=(32, 32), photometric="minisblack", + subfiletype=1) + return arr + + +@pytest.fixture +def cog_with_overview(tmp_path): + path = str(tmp_path / "cog_overview_level_type_2074.tif") + arr = _write_cog_with_one_overview(path) + return path, arr + + +@pytest.mark.parametrize("value", [True, False]) +def test_overview_level_bool_raises_typeerror(cog_with_overview, value): + from xrspatial.geotiff import open_geotiff + + path, _ = cog_with_overview + with pytest.raises(TypeError, match="bool"): + open_geotiff(path, overview_level=value) + + +def test_overview_level_str_raises_typeerror(cog_with_overview): + from xrspatial.geotiff import open_geotiff + + path, _ = cog_with_overview + with pytest.raises(TypeError, match="str"): + open_geotiff(path, overview_level="0") + + +def test_overview_level_float_raises_typeerror(cog_with_overview): + from xrspatial.geotiff import open_geotiff + + path, _ = cog_with_overview + with pytest.raises(TypeError, match="float"): + open_geotiff(path, overview_level=1.0) + + +def test_overview_level_zero_succeeds(cog_with_overview): + from xrspatial.geotiff import open_geotiff + + path, arr = cog_with_overview + result = open_geotiff(path, overview_level=0) + assert result.shape == arr.shape + + +def test_overview_level_one_succeeds(cog_with_overview): + from xrspatial.geotiff import open_geotiff + + path, arr = cog_with_overview + result = open_geotiff(path, overview_level=1) + # Half-resolution overview of a 64x64 source. + assert result.shape == (arr.shape[0] // 2, arr.shape[1] // 2) + + +def test_overview_level_none_succeeds(cog_with_overview): + from xrspatial.geotiff import open_geotiff + + path, arr = cog_with_overview + result = open_geotiff(path, overview_level=None) + assert result.shape == arr.shape + + +def test_overview_level_typeerror_names_value(cog_with_overview): + """Error message should name the offending value, not just the type.""" + from xrspatial.geotiff import open_geotiff + + path, _ = cog_with_overview + with pytest.raises(TypeError) as exc_info: + open_geotiff(path, overview_level="not-an-int") + msg = str(exc_info.value) + assert "str" in msg + assert "not-an-int" in msg From 02cfaba68e085c0e8873f1fa420cec55de1cdfa6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:12:40 -0700 Subject: [PATCH 2/2] geotiff: accept numpy.integer for overview_level, share validator (#2074) Review feedback: the previous isinstance(int) check rejected numpy.integer values like np.int64(1), which is inconsistent with _validate_tile_size and friends in _validation.py. Move the check into a shared _validate_overview_level_arg helper that accepts (int, np.integer) and rejects bool, then call it from both open_geotiff and select_overview_ifd so the two copies cannot drift. Add tests for np.int64 / np.int32 at levels 0 and 1. --- xrspatial/geotiff/__init__.py | 21 +++++---------- xrspatial/geotiff/_header.py | 20 +++++--------- xrspatial/geotiff/_validation.py | 26 +++++++++++++++++++ ...est_overview_level_type_validation_2074.py | 20 ++++++++++++++ 4 files changed, 59 insertions(+), 28 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index b0d148346..442d7ec3f 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -373,20 +373,13 @@ def open_geotiff(source: str | BinaryIO, *, source = _coerce_path(source) - # ``overview_level`` must be an int or ``None``. ``bool`` is rejected - # explicitly because Python treats it as a subclass of ``int``: without - # this guard, ``overview_level=True`` is coerced to ``1`` and silently - # returns the first overview level instead of failing, and other non-int - # types leak raw ``TypeError`` messages from the internal numeric - # comparison or list indexing. See issue #2074. - if overview_level is not None and ( - not isinstance(overview_level, int) - or isinstance(overview_level, bool) - ): - raise TypeError( - f"overview_level must be an int or None, got " - f"{type(overview_level).__name__}: {overview_level!r}" - ) + # Reject bool and non-int ``overview_level`` up front (issue #2074). + # Without this guard, ``overview_level=True`` is coerced to ``1`` and + # silently returns the first overview level, and non-int types leak + # raw ``TypeError`` messages from the internal numeric comparison or + # list indexing. + from ._validation import _validate_overview_level_arg + _validate_overview_level_arg(overview_level) # ``on_gpu_failure`` is GPU-only. Reject it up front for CPU/dask paths # rather than silently dropping it once dispatch is decided -- callers diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index 5f8ea6a79..ec31339da 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -785,20 +785,12 @@ def select_overview_ifd(ifds: list[IFD], overview_level: int | None) -> IFD: if not ifds: raise ValueError("No IFDs found in TIFF file") - # Defense in depth: callers via ``open_geotiff`` are already type-checked, - # but ``select_overview_ifd`` is also reachable from other readers - # (dask, GPU, accessor) that forward ``overview_level`` directly. Reject - # bool and non-int values up front so the failure mode is uniform across - # entry points instead of leaking raw numeric-comparison or indexing - # TypeErrors. See issue #2074. - if overview_level is not None and ( - not isinstance(overview_level, int) - or isinstance(overview_level, bool) - ): - raise TypeError( - f"overview_level must be an int or None, got " - f"{type(overview_level).__name__}: {overview_level!r}" - ) + # Defense in depth (issue #2074). ``open_geotiff`` already type-checks + # ``overview_level``, but this selector is also reachable from the dask, + # GPU, and accessor readers that forward the kwarg without going through + # the public entry point. + from ._validation import _validate_overview_level_arg + _validate_overview_level_arg(overview_level) filtered = [ifd for ifd in ifds if _is_overview_or_full_res(ifd)] if not filtered: diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index 24b184c20..1c2f86728 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -240,6 +240,32 @@ def _validate_tile_size_arg(tile_size): _validate_tile_size(tile_size) +def _validate_overview_level_arg(overview_level) -> None: + """Validate the ``overview_level`` kwarg type (issue #2074). + + Accepts ``None`` (default, treated as full resolution) and any + ``int`` / ``numpy.integer`` instance. ``bool`` is rejected + explicitly because Python treats it as a subclass of ``int``; + without the check, ``overview_level=True`` is coerced to ``1`` and + silently returns the first overview level. Non-int types like + ``str`` and ``float`` would otherwise leak raw ``TypeError`` + messages from the internal numeric comparison or list indexing. + + Called by ``open_geotiff`` (public entry point) and by + ``select_overview_ifd`` (defense in depth for the dask, GPU, and + accessor readers that reach the selector without going through + ``open_geotiff``). + """ + if overview_level is None: + return + if (not isinstance(overview_level, (int, np.integer)) + or isinstance(overview_level, bool)): + raise TypeError( + f"overview_level must be an int or None, got " + f"{type(overview_level).__name__}: {overview_level!r}" + ) + + def _validate_predictor_sample_format(predictor, sample_format) -> None: """Reject ``Predictor=3`` paired with a non-float ``SampleFormat`` (issue #1933). diff --git a/xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py b/xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py index 507ca441d..5cd7354f5 100644 --- a/xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py +++ b/xrspatial/geotiff/tests/test_overview_level_type_validation_2074.py @@ -93,6 +93,26 @@ def test_overview_level_none_succeeds(cog_with_overview): assert result.shape == arr.shape +@pytest.mark.parametrize("value", [np.int64(0), np.int32(0)]) +def test_overview_level_numpy_int_zero_succeeds(cog_with_overview, value): + """``np.int64`` / ``np.int32`` should be accepted like Python ints.""" + from xrspatial.geotiff import open_geotiff + + path, arr = cog_with_overview + result = open_geotiff(path, overview_level=value) + assert result.shape == arr.shape + + +@pytest.mark.parametrize("value", [np.int64(1), np.int32(1)]) +def test_overview_level_numpy_int_one_succeeds(cog_with_overview, value): + """``np.int64`` / ``np.int32`` reach the overview level just like int.""" + from xrspatial.geotiff import open_geotiff + + path, arr = cog_with_overview + result = open_geotiff(path, overview_level=value) + assert result.shape == (arr.shape[0] // 2, arr.shape[1] // 2) + + def test_overview_level_typeerror_names_value(cog_with_overview): """Error message should name the offending value, not just the type.""" from xrspatial.geotiff import open_geotiff