From 43e09692f88ddea36c418bdf0c8c19a8a1c5f485 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 25 May 2026 19:54:05 -0700 Subject: [PATCH 1/2] geotiff tests: consolidate VRT missing-sources cluster + _helpers/ foundation (#2393) PR 1 of the 11-PR epic #2390. Foundation: - New _helpers/ with tiff_builders.py (was conftest.py:make_minimal_tiff), tiff_surgery.py (relocated from _tiff_surgery.py), markers.py (was conftest.py: requires_gpu / requires_loopback / requires_integration plus the gpu_available / loopback_available probes). - conftest.py re-exports the helpers and markers so existing `from .conftest import ...` imports keep working. The pytest_collection_modifyitems socketserver hack stays in place until PR 11. VRT missing-sources consolidation: - New vrt/ directory with test_missing_sources.py. - Folds test_vrt_missing_sources_policy_1799.py (eager-only byte band smoke checks) and test_vrt_missing_sources_policy_2367.py (eager-plus-chunked float matrix) into one file. Parametrise over the eager and dask readers with id="eager" / id="dask"; bad-value matrix parametrised over readers and 6 invalid values. No issue numbers in test ids. - test_vrt_missing_sources_default_raise_1843.py stays in place: it covers the internal _vrt.read_vrt entry point and the XRSPATIAL_GEOTIFF_STRICT env-var override, which is out of the consolidated module's surface. Updated direct importers of _tiff_surgery: - test_local_tile_byte_cap_1664.py - test_gpu_tile_byte_cap_2026_05_18.py Updated release_gate_geotiff.rst to cite the new vrt/test_missing_sources.py path. CLUSTER_AUDIT.md maps every old file::test to its new file::test_id; deleted in a follow-up commit on this branch before merge. Verification: - pytest xrspatial/geotiff/tests/vrt/ -v: 22 passed - pytest xrspatial/geotiff/tests/ -x -q: 5706 passed, 68 skipped, 6 xfailed, 1 xpassed --- .../source/reference/release_gate_geotiff.rst | 2 +- xrspatial/geotiff/tests/CLUSTER_AUDIT.md | 75 +++++ xrspatial/geotiff/tests/_helpers/__init__.py | 8 + xrspatial/geotiff/tests/_helpers/markers.py | 67 ++++ .../geotiff/tests/_helpers/tiff_builders.py | 240 ++++++++++++++ .../tiff_surgery.py} | 0 xrspatial/geotiff/tests/conftest.py | 310 ++---------------- .../test_gpu_tile_byte_cap_2026_05_18.py | 2 +- .../tests/test_local_tile_byte_cap_1664.py | 2 +- .../test_vrt_missing_sources_policy_1799.py | 49 --- xrspatial/geotiff/tests/vrt/__init__.py | 1 + .../test_missing_sources.py} | 166 ++++++---- 12 files changed, 530 insertions(+), 392 deletions(-) create mode 100644 xrspatial/geotiff/tests/CLUSTER_AUDIT.md create mode 100644 xrspatial/geotiff/tests/_helpers/__init__.py create mode 100644 xrspatial/geotiff/tests/_helpers/markers.py create mode 100644 xrspatial/geotiff/tests/_helpers/tiff_builders.py rename xrspatial/geotiff/tests/{_tiff_surgery.py => _helpers/tiff_surgery.py} (100%) delete mode 100644 xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py create mode 100644 xrspatial/geotiff/tests/vrt/__init__.py rename xrspatial/geotiff/tests/{test_vrt_missing_sources_policy_2367.py => vrt/test_missing_sources.py} (58%) diff --git a/docs/source/reference/release_gate_geotiff.rst b/docs/source/reference/release_gate_geotiff.rst index 71bcb5cd2..b616af390 100644 --- a/docs/source/reference/release_gate_geotiff.rst +++ b/docs/source/reference/release_gate_geotiff.rst @@ -507,7 +507,7 @@ VRT supported subset - Holes surface as the band sentinel, ``attrs['vrt_holes']`` is set, and a :class:`GeoTIFFFallbackWarning` is emitted. - ``xrspatial/geotiff/tests/test_vrt_holes_attr_1734.py``, - ``xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py``, + ``xrspatial/geotiff/tests/vrt/test_missing_sources.py``, ``xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py`` - `#2342`_ * - VRT source / dest rectangle validation diff --git a/xrspatial/geotiff/tests/CLUSTER_AUDIT.md b/xrspatial/geotiff/tests/CLUSTER_AUDIT.md new file mode 100644 index 000000000..ef806cb19 --- /dev/null +++ b/xrspatial/geotiff/tests/CLUSTER_AUDIT.md @@ -0,0 +1,75 @@ +# CLUSTER_AUDIT.md — PR 1 (Foundation + VRT missing-sources) + +Temporary audit table tracking every old `file::test` and where it lands +in the consolidated layout. Deleted in a follow-up commit on the same +branch before merge per the epic #2390 contract. + +## Foundation moves + +| Old location | New location | Notes | +|---|---|---| +| `conftest.py::make_minimal_tiff` (function) | `_helpers/tiff_builders.py::make_minimal_tiff` | Re-exported from `conftest.py` so existing `from .conftest import make_minimal_tiff` keeps working. | +| `conftest.py::gpu_available` | `_helpers/markers.py::gpu_available` | Re-exported from `conftest.py`. | +| `conftest.py::loopback_available` | `_helpers/markers.py::loopback_available` | Re-exported from `conftest.py`. | +| `conftest.py::requires_gpu` | `_helpers/markers.py::requires_gpu` | Marker name unchanged; re-exported. | +| `conftest.py::requires_loopback` | `_helpers/markers.py::requires_loopback` | Marker name unchanged; re-exported. | +| `conftest.py::requires_integration` | `_helpers/markers.py::requires_integration` | Marker name unchanged; re-exported. | +| `conftest.py::pytest_collection_modifyitems` | `conftest.py::pytest_collection_modifyitems` | Left in place; PR 11 removes it per the epic. | +| `_tiff_surgery.py` (whole module) | `_helpers/tiff_surgery.py` | Verbatim relocation. Direct importers updated. | + +### Updated import sites + +| File | Change | +|---|---| +| `test_local_tile_byte_cap_1664.py` | `from ._tiff_surgery import ...` -> `from ._helpers.tiff_surgery import ...` | +| `test_gpu_tile_byte_cap_2026_05_18.py` | `from ._tiff_surgery import ...` -> `from ._helpers.tiff_surgery import ...` | + +All other test files import `make_minimal_tiff`, `gpu_available`, and +the `requires_*` markers from `conftest.py` (or +`xrspatial.geotiff.tests.conftest`), which now re-exports them. No +further changes needed. + +## VRT missing-sources cluster + +### `test_vrt_missing_sources_policy_1799.py` (deleted) + +| Old `file::test` | New `file::test_id` | Notes | +|---|---|---| +| `test_vrt_missing_sources_policy_1799.py::test_read_vrt_missing_sources_warns_and_records_hole` | `vrt/test_missing_sources.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_eager_byte_warn_records_hole` | Byte-band variant carried over verbatim. Asserts on the `"could not be read"` message and `vrt_holes`. | +| `test_vrt_missing_sources_policy_1799.py::test_read_vrt_missing_sources_raise_fails_fast` | `vrt/test_missing_sources.py::TestExplicitRaisePolicy::test_eager_byte_explicit_raise` | Byte-band variant. Renamed; assertion unchanged (raises `OSError` or `ValueError`). | +| `test_vrt_missing_sources_policy_1799.py::test_read_vrt_missing_sources_validates_policy` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_eager_byte_invalid_policy` | Byte-band invalid-policy smoke check. Parametrised matrix below covers more bad values across both readers; this stays as a literal port to keep byte-band coverage. | + +### `test_vrt_missing_sources_policy_2367.py` (deleted) + +| Old `file::test` | New `file::test_id` | Notes | +|---|---|---| +| `test_vrt_missing_sources_policy_2367.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[eager_read_vrt]` | `vrt/test_missing_sources.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[eager]` | Renamed reader id from `eager_read_vrt` to `eager`. Filename in the VRT helper renamed `missing_2367.tif` -> `missing_source.tif` (no issue numbers in fixtures). | +| `test_vrt_missing_sources_policy_2367.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[dask_open_geotiff_chunks]` | `vrt/test_missing_sources.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[dask]` | Renamed reader id; same coverage. | +| `test_vrt_missing_sources_policy_2367.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[eager_read_vrt]` | `vrt/test_missing_sources.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[eager]` | Renamed reader id. | +| `test_vrt_missing_sources_policy_2367.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[dask_open_geotiff_chunks]` | `vrt/test_missing_sources.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[dask]` | Renamed reader id. | +| `test_vrt_missing_sources_policy_2367.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_eager_warn_emits_and_fills` | `vrt/test_missing_sources.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_eager_warn_emits_and_fills` | Body unchanged except missing-source filename rename. | +| `test_vrt_missing_sources_policy_2367.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_dask_warn_emits_at_compute_and_fills` | `vrt/test_missing_sources.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_dask_warn_emits_at_compute_and_fills` | Body unchanged except missing-source filename rename. | +| `test_vrt_missing_sources_policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-ignore]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[ignore-eager]` | Reader id renamed; parametrize order unchanged. | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-RAISE]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[RAISE-eager]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-raises]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[raises-eager]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[-eager]` | Empty-string bad value. | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-warn ]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[warn -eager]` | Trailing-space bad value. | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-1]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[1-eager]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-ignore]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[ignore-dask]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-RAISE]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[RAISE-dask]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-raises]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[raises-dask]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[-dask]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-warn ]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[warn -dask]` | | +| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-1]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[1-dask]` | | + +### Files NOT folded in (justified) + +| File | Reason left in place | +|---|---| +| `test_vrt_missing_sources_default_raise_1843.py` | Different surface area: tests the *internal* `xrspatial.geotiff._vrt.read_vrt` entry point (not the public `xrspatial.geotiff.read_vrt`), plus the `XRSPATIAL_GEOTIFF_STRICT=1` env-var override. Neither is in the public-API matrix covered by `vrt/test_missing_sources.py`. A future PR that consolidates the strict-env-var coverage can fold this in then. | + +## Verification + +- Old eager byte coverage: 3 tests preserved (warn / raise / invalid). +- Old eager+dask float coverage: 16 parametrised cases preserved (default x2, explicit raise x2, warn x2, invalid 6 bad values x 2 readers). +- Net file delta: 3 files deleted (`_tiff_surgery.py`, `test_vrt_missing_sources_policy_1799.py`, `test_vrt_missing_sources_policy_2367.py`); 6 files added (`_helpers/{__init__,tiff_builders,tiff_surgery,markers}.py`, `vrt/{__init__,test_missing_sources}.py`). The two extra `__init__.py` files plus the audit file (deleted before merge) keep the net `test_*.py` count drop at exactly 2. diff --git a/xrspatial/geotiff/tests/_helpers/__init__.py b/xrspatial/geotiff/tests/_helpers/__init__.py new file mode 100644 index 000000000..b55d743e5 --- /dev/null +++ b/xrspatial/geotiff/tests/_helpers/__init__.py @@ -0,0 +1,8 @@ +"""Shared helpers for the GeoTIFF test suite. + +Centralises the TIFF-builder factory, byte-surgery helpers, and pytest +marker definitions that used to live in ``conftest.py`` and at the top +level of ``xrspatial/geotiff/tests/``. The ``conftest.py`` re-exports +``make_minimal_tiff`` and the marker names so existing +``from .conftest import ...`` test imports keep working. +""" diff --git a/xrspatial/geotiff/tests/_helpers/markers.py b/xrspatial/geotiff/tests/_helpers/markers.py new file mode 100644 index 000000000..0f993c974 --- /dev/null +++ b/xrspatial/geotiff/tests/_helpers/markers.py @@ -0,0 +1,67 @@ +"""Pytest markers and capability probes for the GeoTIFF test suite. + +Relocated from ``conftest.py``. The marker names are unchanged +(``requires_gpu``, ``requires_loopback``, ``requires_integration``) so +test files that already import them via ``from .conftest import ...`` +keep working through the conftest re-export. +""" +from __future__ import annotations + +import importlib.util +import os +import socket + +import pytest + + +def gpu_available() -> bool: + """True iff cupy imports AND a CUDA device is actually usable. + + Some sandboxes ship cupy without a working CUDA runtime. A bare + ``import cupy`` succeeds there but every device call fails, so test + files that gate on the import alone show false failures. + """ + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +def loopback_available() -> bool: + """True iff a loopback TCP bind succeeds on this host. + + Some sandboxed environments deny ``bind(('127.0.0.1', 0))``. HTTP + tests that stand up a loopback server should skip rather than error + in that case. + """ + try: + s = socket.socket() + try: + s.bind(('127.0.0.1', 0)) + finally: + s.close() + except OSError: + return False + return True + + +_HAS_GPU = gpu_available() +_HAS_LOOPBACK = loopback_available() + +requires_gpu = pytest.mark.skipif( + not _HAS_GPU, reason="cupy + CUDA required" +) +requires_loopback = pytest.mark.skipif( + not _HAS_LOOPBACK, reason="loopback bind unavailable in this environment" +) + +_RUN_INTEGRATION = os.environ.get("XRSPATIAL_RUN_INTEGRATION", "") not in ( + "", "0", "false", "False" +) +requires_integration = pytest.mark.skipif( + not _RUN_INTEGRATION, + reason="integration test; set XRSPATIAL_RUN_INTEGRATION=1 to run locally", +) diff --git a/xrspatial/geotiff/tests/_helpers/tiff_builders.py b/xrspatial/geotiff/tests/_helpers/tiff_builders.py new file mode 100644 index 000000000..144a768d5 --- /dev/null +++ b/xrspatial/geotiff/tests/_helpers/tiff_builders.py @@ -0,0 +1,240 @@ +"""In-memory TIFF builder used by the test suite. + +Relocated from ``conftest.py`` so test modules can import the builder +directly without dragging in pytest fixture state. The function +signature and behaviour are unchanged; existing call sites continue to +work through the re-export in ``conftest.py``. +""" +from __future__ import annotations + +import math +import struct + +import numpy as np + + +def make_minimal_tiff( + width: int = 4, + height: int = 4, + dtype: np.dtype = np.dtype('float32'), + pixel_data: np.ndarray | None = None, + compression: int = 1, + tiled: bool = False, + tile_size: int = 4, + big_endian: bool = False, + bigtiff: bool = False, + geo_transform: tuple | None = None, + epsg: int | None = None, +) -> bytes: + """Build a minimal valid TIFF file in memory for testing. + + Uses a three-pass approach: + 1. Collect all tags and their raw value data + 2. Compute file layout (IFD size, overflow positions, pixel data offset) + 3. Serialize everything with correct offsets + """ + bo = '>' if big_endian else '<' + bom = b'MM' if big_endian else b'II' + + if pixel_data is None: + pixel_data = np.arange(width * height, dtype=dtype).reshape(height, width) + else: + dtype = pixel_data.dtype + + bits_per_sample = dtype.itemsize * 8 + if dtype.kind == 'f': + sample_format = 3 + elif dtype.kind == 'i': + sample_format = 2 + else: + sample_format = 1 + + # --- Build pixel data (strips or tiles) --- + if tiled: + tiles_across = math.ceil(width / tile_size) + tiles_down = math.ceil(height / tile_size) + num_tiles = tiles_across * tiles_down + + tile_blobs = [] + for tr in range(tiles_down): + for tc in range(tiles_across): + tile = np.zeros((tile_size, tile_size), dtype=dtype) + r0, c0 = tr * tile_size, tc * tile_size + r1 = min(r0 + tile_size, height) + c1 = min(c0 + tile_size, width) + tile[:r1 - r0, :c1 - c0] = pixel_data[r0:r1, c0:c1] + tile_blobs.append(tile.tobytes()) + + pixel_bytes = b''.join(tile_blobs) + tile_byte_counts = [len(b) for b in tile_blobs] + else: + if big_endian and pixel_data.dtype.itemsize > 1: + pixel_bytes = pixel_data.astype(pixel_data.dtype.newbyteorder('>')).tobytes() + else: + pixel_bytes = pixel_data.tobytes() + + # --- Collect tags as (tag_id, type_id, value_bytes) --- + # value_bytes is the serialized value; if len <= 4 it's inline, else overflow. + tag_list: list[tuple[int, int, int, bytes]] = [] # (tag, type, count, raw_bytes) + + def add_short(tag, val): + tag_list.append((tag, 3, 1, struct.pack(f'{bo}H', val))) + + def add_long(tag, val): + tag_list.append((tag, 4, 1, struct.pack(f'{bo}I', val))) + + def add_shorts(tag, vals): + tag_list.append((tag, 3, len(vals), struct.pack(f'{bo}{len(vals)}H', *vals))) + + def add_longs(tag, vals): + tag_list.append((tag, 4, len(vals), struct.pack(f'{bo}{len(vals)}I', *vals))) + + def add_doubles(tag, vals): + tag_list.append((tag, 12, len(vals), struct.pack(f'{bo}{len(vals)}d', *vals))) + + add_short(256, width) # ImageWidth + add_short(257, height) # ImageLength + add_short(258, bits_per_sample) # BitsPerSample + add_short(259, compression) # Compression + add_short(262, 1) # PhotometricInterpretation + add_short(277, 1) # SamplesPerPixel + add_short(339, sample_format) # SampleFormat + + if tiled: + add_short(322, tile_size) # TileWidth + add_short(323, tile_size) # TileLength + # Placeholder offsets -- will be patched after layout is known + add_longs(324, [0] * num_tiles) # TileOffsets + add_longs(325, tile_byte_counts) # TileByteCounts + else: + add_short(278, height) # RowsPerStrip + add_long(273, 0) # StripOffsets (placeholder) + add_long(279, len(pixel_bytes)) # StripByteCounts + + if geo_transform is not None: + ox, oy, pw, ph = geo_transform + add_doubles(33550, [abs(pw), abs(ph), 0.0]) # ModelPixelScale + add_doubles(33922, [0.0, 0.0, 0.0, ox, oy, 0.0]) # ModelTiepoint + + if epsg is not None: + if epsg == 4326 or (4000 <= epsg < 5000): + model_type, key_id = 2, 2048 + else: + model_type, key_id = 1, 3072 + gkd = [1, 1, 0, 2, 1024, 0, 1, model_type, key_id, 0, 1, epsg] + add_shorts(34735, gkd) + + # Sort by tag ID (TIFF spec requirement) + tag_list.sort(key=lambda t: t[0]) + + # --- Compute layout --- + num_entries = len(tag_list) + ifd_start = 8 # right after header + ifd_size = 2 + 12 * num_entries + 4 # count + entries + next_ifd_offset + overflow_start = ifd_start + ifd_size + + # Figure out which tags need overflow (value > 4 bytes) + overflow_buf = bytearray() + for _tag, _type, _count, raw in tag_list: + if len(raw) > 4: + # This will go to overflow -- just accumulate size for now + overflow_buf.extend(raw) + # Word-align + if len(overflow_buf) % 2: + overflow_buf.append(0) + + pixel_data_start = overflow_start + len(overflow_buf) + + # --- Patch offset tags --- + # Now we know where pixel data starts, patch strip/tile offsets + patched = [] + for tag, typ, count, raw in tag_list: + if tag == 273: # StripOffsets + patched.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start))) + elif tag == 324: # TileOffsets + offsets = [] + pos = 0 + for blob in tile_blobs: + offsets.append(pixel_data_start + pos) + pos += len(blob) + patched.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets))) + else: + patched.append((tag, typ, count, raw)) + tag_list = patched + + # --- Rebuild overflow with final values --- + overflow_buf = bytearray() + tag_offsets = {} # tag -> offset within overflow_buf (or None if inline) + + for tag, typ, count, raw in tag_list: + if len(raw) > 4: + tag_offsets[tag] = len(overflow_buf) + overflow_buf.extend(raw) + if len(overflow_buf) % 2: + overflow_buf.append(0) + else: + tag_offsets[tag] = None + + # Recalculate in case overflow size changed from patching + actual_pixel_start = overflow_start + len(overflow_buf) + if actual_pixel_start != pixel_data_start: + # Need another pass to fix offsets + pixel_data_start = actual_pixel_start + patched2 = [] + for tag, typ, count, raw in tag_list: + if tag == 273: + patched2.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start))) + elif tag == 324: + offsets = [] + pos = 0 + for blob in tile_blobs: + offsets.append(pixel_data_start + pos) + pos += len(blob) + patched2.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets))) + else: + patched2.append((tag, typ, count, raw)) + tag_list = patched2 + + # Rebuild overflow again + overflow_buf = bytearray() + tag_offsets = {} + for tag, typ, count, raw in tag_list: + if len(raw) > 4: + tag_offsets[tag] = len(overflow_buf) + overflow_buf.extend(raw) + if len(overflow_buf) % 2: + overflow_buf.append(0) + else: + tag_offsets[tag] = None + + # --- Serialize --- + out = bytearray() + + # Header + out.extend(bom) + out.extend(struct.pack(f'{bo}H', 42)) + out.extend(struct.pack(f'{bo}I', ifd_start)) + + # IFD + out.extend(struct.pack(f'{bo}H', num_entries)) + + for tag, typ, count, raw in tag_list: + out.extend(struct.pack(f'{bo}HHI', tag, typ, count)) + if len(raw) <= 4: + # Inline value, padded to 4 bytes + out.extend(raw.ljust(4, b'\x00')) + else: + # Pointer to overflow + ptr = overflow_start + tag_offsets[tag] + out.extend(struct.pack(f'{bo}I', ptr)) + + # Next IFD offset + out.extend(struct.pack(f'{bo}I', 0)) + + # Overflow + out.extend(overflow_buf) + + # Pixel data + out.extend(pixel_bytes) + + return bytes(out) diff --git a/xrspatial/geotiff/tests/_tiff_surgery.py b/xrspatial/geotiff/tests/_helpers/tiff_surgery.py similarity index 100% rename from xrspatial/geotiff/tests/_tiff_surgery.py rename to xrspatial/geotiff/tests/_helpers/tiff_surgery.py diff --git a/xrspatial/geotiff/tests/conftest.py b/xrspatial/geotiff/tests/conftest.py index 86cf2a267..39813df40 100644 --- a/xrspatial/geotiff/tests/conftest.py +++ b/xrspatial/geotiff/tests/conftest.py @@ -1,67 +1,36 @@ -"""Shared fixtures for geotiff tests.""" +"""Shared fixtures for geotiff tests. + +The TIFF builder, markers, and capability probes now live under +``_helpers/``. This module re-exports them so legacy imports such as +``from .conftest import make_minimal_tiff`` or +``from xrspatial.geotiff.tests.conftest import requires_gpu`` keep +working. PR 11 of the consolidation epic (#2390) drops the +``pytest_collection_modifyitems`` socketserver hack below; until then +it stays in place to keep the existing loopback-skip behaviour. +""" from __future__ import annotations -import importlib.util -import math -import os -import socket -import struct - import numpy as np import pytest - -def gpu_available() -> bool: - """True iff cupy imports AND a CUDA device is actually usable. - - Some sandboxes ship cupy without a working CUDA runtime. A bare - ``import cupy`` succeeds there but every device call fails, so test - files that gate on the import alone show false failures. - """ - if importlib.util.find_spec("cupy") is None: - return False - try: - import cupy - return bool(cupy.cuda.is_available()) - except Exception: - return False - - -def loopback_available() -> bool: - """True iff a loopback TCP bind succeeds on this host. - - Some sandboxed environments deny ``bind(('127.0.0.1', 0))``. HTTP - tests that stand up a loopback server should skip rather than error - in that case. - """ - try: - s = socket.socket() - try: - s.bind(('127.0.0.1', 0)) - finally: - s.close() - except OSError: - return False - return True - - -_HAS_GPU = gpu_available() -_HAS_LOOPBACK = loopback_available() - -requires_gpu = pytest.mark.skipif( - not _HAS_GPU, reason="cupy + CUDA required" -) -requires_loopback = pytest.mark.skipif( - not _HAS_LOOPBACK, reason="loopback bind unavailable in this environment" +from ._helpers.markers import ( + _HAS_LOOPBACK, + gpu_available, + loopback_available, + requires_gpu, + requires_integration, + requires_loopback, ) +from ._helpers.tiff_builders import make_minimal_tiff -_RUN_INTEGRATION = os.environ.get("XRSPATIAL_RUN_INTEGRATION", "") not in ( - "", "0", "false", "False" -) -requires_integration = pytest.mark.skipif( - not _RUN_INTEGRATION, - reason="integration test; set XRSPATIAL_RUN_INTEGRATION=1 to run locally", -) +__all__ = [ + "gpu_available", + "loopback_available", + "make_minimal_tiff", + "requires_gpu", + "requires_integration", + "requires_loopback", +] def pytest_collection_modifyitems(config, items): @@ -119,233 +88,6 @@ def _references_loopback(src: str) -> bool: item.add_marker(skip_marker) -def make_minimal_tiff( - width: int = 4, - height: int = 4, - dtype: np.dtype = np.dtype('float32'), - pixel_data: np.ndarray | None = None, - compression: int = 1, - tiled: bool = False, - tile_size: int = 4, - big_endian: bool = False, - bigtiff: bool = False, - geo_transform: tuple | None = None, - epsg: int | None = None, -) -> bytes: - """Build a minimal valid TIFF file in memory for testing. - - Uses a three-pass approach: - 1. Collect all tags and their raw value data - 2. Compute file layout (IFD size, overflow positions, pixel data offset) - 3. Serialize everything with correct offsets - """ - bo = '>' if big_endian else '<' - bom = b'MM' if big_endian else b'II' - - if pixel_data is None: - pixel_data = np.arange(width * height, dtype=dtype).reshape(height, width) - else: - dtype = pixel_data.dtype - - bits_per_sample = dtype.itemsize * 8 - if dtype.kind == 'f': - sample_format = 3 - elif dtype.kind == 'i': - sample_format = 2 - else: - sample_format = 1 - - # --- Build pixel data (strips or tiles) --- - if tiled: - tiles_across = math.ceil(width / tile_size) - tiles_down = math.ceil(height / tile_size) - num_tiles = tiles_across * tiles_down - - tile_blobs = [] - for tr in range(tiles_down): - for tc in range(tiles_across): - tile = np.zeros((tile_size, tile_size), dtype=dtype) - r0, c0 = tr * tile_size, tc * tile_size - r1 = min(r0 + tile_size, height) - c1 = min(c0 + tile_size, width) - tile[:r1 - r0, :c1 - c0] = pixel_data[r0:r1, c0:c1] - tile_blobs.append(tile.tobytes()) - - pixel_bytes = b''.join(tile_blobs) - tile_byte_counts = [len(b) for b in tile_blobs] - else: - if big_endian and pixel_data.dtype.itemsize > 1: - pixel_bytes = pixel_data.astype(pixel_data.dtype.newbyteorder('>')).tobytes() - else: - pixel_bytes = pixel_data.tobytes() - - # --- Collect tags as (tag_id, type_id, value_bytes) --- - # value_bytes is the serialized value; if len <= 4 it's inline, else overflow. - tag_list: list[tuple[int, int, int, bytes]] = [] # (tag, type, count, raw_bytes) - - def add_short(tag, val): - tag_list.append((tag, 3, 1, struct.pack(f'{bo}H', val))) - - def add_long(tag, val): - tag_list.append((tag, 4, 1, struct.pack(f'{bo}I', val))) - - def add_shorts(tag, vals): - tag_list.append((tag, 3, len(vals), struct.pack(f'{bo}{len(vals)}H', *vals))) - - def add_longs(tag, vals): - tag_list.append((tag, 4, len(vals), struct.pack(f'{bo}{len(vals)}I', *vals))) - - def add_doubles(tag, vals): - tag_list.append((tag, 12, len(vals), struct.pack(f'{bo}{len(vals)}d', *vals))) - - add_short(256, width) # ImageWidth - add_short(257, height) # ImageLength - add_short(258, bits_per_sample) # BitsPerSample - add_short(259, compression) # Compression - add_short(262, 1) # PhotometricInterpretation - add_short(277, 1) # SamplesPerPixel - add_short(339, sample_format) # SampleFormat - - if tiled: - add_short(322, tile_size) # TileWidth - add_short(323, tile_size) # TileLength - # Placeholder offsets -- will be patched after layout is known - add_longs(324, [0] * num_tiles) # TileOffsets - add_longs(325, tile_byte_counts) # TileByteCounts - else: - add_short(278, height) # RowsPerStrip - add_long(273, 0) # StripOffsets (placeholder) - add_long(279, len(pixel_bytes)) # StripByteCounts - - if geo_transform is not None: - ox, oy, pw, ph = geo_transform - add_doubles(33550, [abs(pw), abs(ph), 0.0]) # ModelPixelScale - add_doubles(33922, [0.0, 0.0, 0.0, ox, oy, 0.0]) # ModelTiepoint - - if epsg is not None: - if epsg == 4326 or (4000 <= epsg < 5000): - model_type, key_id = 2, 2048 - else: - model_type, key_id = 1, 3072 - gkd = [1, 1, 0, 2, 1024, 0, 1, model_type, key_id, 0, 1, epsg] - add_shorts(34735, gkd) - - # Sort by tag ID (TIFF spec requirement) - tag_list.sort(key=lambda t: t[0]) - - # --- Compute layout --- - num_entries = len(tag_list) - ifd_start = 8 # right after header - ifd_size = 2 + 12 * num_entries + 4 # count + entries + next_ifd_offset - overflow_start = ifd_start + ifd_size - - # Figure out which tags need overflow (value > 4 bytes) - overflow_buf = bytearray() - for _tag, _type, _count, raw in tag_list: - if len(raw) > 4: - # This will go to overflow -- just accumulate size for now - overflow_buf.extend(raw) - # Word-align - if len(overflow_buf) % 2: - overflow_buf.append(0) - - pixel_data_start = overflow_start + len(overflow_buf) - - # --- Patch offset tags --- - # Now we know where pixel data starts, patch strip/tile offsets - patched = [] - for tag, typ, count, raw in tag_list: - if tag == 273: # StripOffsets - patched.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start))) - elif tag == 324: # TileOffsets - offsets = [] - pos = 0 - for blob in tile_blobs: - offsets.append(pixel_data_start + pos) - pos += len(blob) - patched.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets))) - else: - patched.append((tag, typ, count, raw)) - tag_list = patched - - # --- Rebuild overflow with final values --- - overflow_buf = bytearray() - tag_offsets = {} # tag -> offset within overflow_buf (or None if inline) - - for tag, typ, count, raw in tag_list: - if len(raw) > 4: - tag_offsets[tag] = len(overflow_buf) - overflow_buf.extend(raw) - if len(overflow_buf) % 2: - overflow_buf.append(0) - else: - tag_offsets[tag] = None - - # Recalculate in case overflow size changed from patching - actual_pixel_start = overflow_start + len(overflow_buf) - if actual_pixel_start != pixel_data_start: - # Need another pass to fix offsets - pixel_data_start = actual_pixel_start - patched2 = [] - for tag, typ, count, raw in tag_list: - if tag == 273: - patched2.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start))) - elif tag == 324: - offsets = [] - pos = 0 - for blob in tile_blobs: - offsets.append(pixel_data_start + pos) - pos += len(blob) - patched2.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets))) - else: - patched2.append((tag, typ, count, raw)) - tag_list = patched2 - - # Rebuild overflow again - overflow_buf = bytearray() - tag_offsets = {} - for tag, typ, count, raw in tag_list: - if len(raw) > 4: - tag_offsets[tag] = len(overflow_buf) - overflow_buf.extend(raw) - if len(overflow_buf) % 2: - overflow_buf.append(0) - else: - tag_offsets[tag] = None - - # --- Serialize --- - out = bytearray() - - # Header - out.extend(bom) - out.extend(struct.pack(f'{bo}H', 42)) - out.extend(struct.pack(f'{bo}I', ifd_start)) - - # IFD - out.extend(struct.pack(f'{bo}H', num_entries)) - - for tag, typ, count, raw in tag_list: - out.extend(struct.pack(f'{bo}HHI', tag, typ, count)) - if len(raw) <= 4: - # Inline value, padded to 4 bytes - out.extend(raw.ljust(4, b'\x00')) - else: - # Pointer to overflow - ptr = overflow_start + tag_offsets[tag] - out.extend(struct.pack(f'{bo}I', ptr)) - - # Next IFD offset - out.extend(struct.pack(f'{bo}I', 0)) - - # Overflow - out.extend(overflow_buf) - - # Pixel data - out.extend(pixel_bytes) - - return bytes(out) - - @pytest.fixture def simple_float32_tiff(): """4x4 float32 stripped TIFF with sequential values.""" diff --git a/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py b/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py index 744fac6f3..127aef86a 100644 --- a/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py +++ b/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py @@ -30,7 +30,7 @@ from xrspatial.geotiff import read_geotiff_gpu, to_geotiff -from ._tiff_surgery import patch_byte_counts as _patch_byte_counts +from ._helpers.tiff_surgery import patch_byte_counts as _patch_byte_counts def _cupy_available() -> bool: diff --git a/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py b/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py index 51dbe7e2c..25216f539 100644 --- a/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py +++ b/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py @@ -19,7 +19,7 @@ from xrspatial.geotiff import _reader as _reader_mod from xrspatial.geotiff import open_geotiff, to_geotiff -from ._tiff_surgery import patch_byte_counts as _patch_byte_counts # noqa: E402 +from ._helpers.tiff_surgery import patch_byte_counts as _patch_byte_counts # noqa: E402 # --------------------------------------------------------------------------- # Helpers -- patch in-place IFD entries for tile / strip byte counts diff --git a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py b/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py deleted file mode 100644 index 29de1ba5e..000000000 --- a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py +++ /dev/null @@ -1,49 +0,0 @@ -"""VRT missing-source handling has an explicit policy (#1799).""" -from __future__ import annotations - -import pytest - -from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt - - -def _write_missing_source_vrt(path): - path.write_text( - '\n' - ' \n' - ' \n' - ' missing.tif' - '\n' - ' 1\n' - ' \n' - ' \n' - ' \n' - ' \n' - '\n' - ) - - -def test_read_vrt_missing_sources_warns_and_records_hole(tmp_path): - vrt = tmp_path / "tmp_1799_missing_warn.vrt" - _write_missing_source_vrt(vrt) - - with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): - da = read_vrt(str(vrt), missing_sources='warn') - - assert 'vrt_holes' in da.attrs - assert da.attrs['vrt_holes'][0]['source'].endswith('missing.tif') - - -def test_read_vrt_missing_sources_raise_fails_fast(tmp_path): - vrt = tmp_path / "tmp_1799_missing_raise.vrt" - _write_missing_source_vrt(vrt) - - with pytest.raises((OSError, ValueError)): - read_vrt(str(vrt), missing_sources='raise') - - -def test_read_vrt_missing_sources_validates_policy(tmp_path): - vrt = tmp_path / "tmp_1799_missing_bad_policy.vrt" - _write_missing_source_vrt(vrt) - - with pytest.raises(ValueError, match="missing_sources"): - read_vrt(str(vrt), missing_sources='ignore') diff --git a/xrspatial/geotiff/tests/vrt/__init__.py b/xrspatial/geotiff/tests/vrt/__init__.py new file mode 100644 index 000000000..128c43a8a --- /dev/null +++ b/xrspatial/geotiff/tests/vrt/__init__.py @@ -0,0 +1 @@ +"""VRT-focused tests for the GeoTIFF module.""" diff --git a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_2367.py b/xrspatial/geotiff/tests/vrt/test_missing_sources.py similarity index 58% rename from xrspatial/geotiff/tests/test_vrt_missing_sources_policy_2367.py rename to xrspatial/geotiff/tests/vrt/test_missing_sources.py index 071e9e92c..da7009ec0 100644 --- a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_2367.py +++ b/xrspatial/geotiff/tests/vrt/test_missing_sources.py @@ -1,29 +1,26 @@ -"""Consolidated VRT ``missing_sources`` policy matrix (#2367, work item of #2342). +"""VRT ``missing_sources`` policy matrix. -This file complements ``test_vrt_missing_sources_policy_1799.py`` and -``test_vrt_chunked_missing_sources_1799.py`` by covering the full -release contract in one place: every policy value (default, -``'raise'``, ``'warn'``, invalid) is exercised against both read paths -(eager ``read_vrt`` and dask ``open_geotiff(..., chunks=...)``), with -assertions on the exception or warning type, the message text, and the -actual output array values where applicable. - -The existing 1799 / 1843 / 2265 tests pin individual cases. This file -keeps the four-by-two matrix together so a future kwarg refactor that -silently drops parity between the eager and chunked paths regresses a -single, focused test file. +Consolidates the eager-only smoke checks that previously lived in +``test_vrt_missing_sources_policy_1799.py`` with the eager-plus-chunked +matrix from ``test_vrt_missing_sources_policy_2367.py``. Release contract (see ``_backends/vrt.py:206`` docstring): -* ``'raise'`` is the default since #1860. -* ``'raise'`` fails fast with ``FileNotFoundError`` naming the missing - source path. The chunked path raises at build time (#2265) so a +* ``'raise'`` is the default. The eager and chunked paths both fail + fast with ``FileNotFoundError`` naming the missing source path so a partial mosaic never surfaces silently from a delayed compute. -* ``'warn'`` is the explicit opt-in. It emits - ``GeoTIFFFallbackWarning`` naming the missing source and returns the - mosaic with NaN (or the band's nodata sentinel) in the corresponding - region. ``attrs['vrt_holes']`` records the affected source(s). -* Any other value raises ``ValueError`` naming the bad kwarg. +* ``'warn'`` is the explicit opt-in. It emits ``GeoTIFFFallbackWarning`` + naming the missing source and returns the mosaic with NaN (or the + band's nodata sentinel) in the corresponding region. + ``attrs['vrt_holes']`` records the affected source(s). +* Any other value raises ``ValueError`` naming the bad kwarg and + echoing the bad value via ``repr()``. + +The companion file ``test_vrt_missing_sources_default_raise_1843.py`` +stays in place for now: it exercises the internal +``xrspatial.geotiff._vrt.read_vrt`` entry point and the +``XRSPATIAL_GEOTIFF_STRICT=1`` env-var override, neither of which is in +this module's surface. """ from __future__ import annotations @@ -45,15 +42,47 @@ PRESENT_FILL = 7.0 -def _build_partial_vrt(tmp_path) -> tuple[str, str, str]: - """Build a 2-source VRT: left half is real, right half points at a - non-existent file. +# --------------------------------------------------------------------------- +# VRT fixtures. +# +# Two shapes: +# +# * ``byte_missing_vrt`` -- a 2x2 ``Byte`` VRT whose only source does not +# exist on disk. The smallest case that exercises the missing-source +# guard. Inherited from the old _1799 smoke checks. +# * ``partial_float_vrt`` -- an 8x4 ``Float32`` VRT split across two +# sources. The left half points at a real GeoTIFF written through +# ``to_geotiff``; the right half points at a missing file. Exercises +# the NaN-fill / vrt_holes contract that the chunked path also has to +# honour at compute time. +# --------------------------------------------------------------------------- + +def _write_byte_missing_vrt(tmp_path) -> str: + """All-missing 2x2 Byte VRT. Returns the VRT path as ``str``.""" + vrt = tmp_path / "byte_missing.vrt" + vrt.write_text( + '\n' + ' \n' + ' \n' + ' missing.tif' + '\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + return str(vrt) + + +def _write_partial_float_vrt(tmp_path) -> tuple[str, str, str]: + """Two-source partial mosaic. - Returns ``(vrt_path, present_src_path, missing_path)``. Filenames - embed issue #2367 to keep parallel test runs from colliding on - shared tmp roots. + Returns ``(vrt_path, present_src_path, missing_path)`` as strings. """ - src = os.path.join(tmp_path, "src_2367_present.tif") + tmp_str = str(tmp_path) + src = os.path.join(tmp_str, "src_present.tif") arr = np.full((4, 4), PRESENT_FILL, dtype=np.float32) da = xr.DataArray( arr, dims=("y", "x"), @@ -61,8 +90,8 @@ def _build_partial_vrt(tmp_path) -> tuple[str, str, str]: ) to_geotiff(da, src) - missing = os.path.join(tmp_path, "missing_2367.tif") - vrt_path = os.path.join(tmp_path, "partial_2367.vrt") + missing = os.path.join(tmp_str, "missing_source.tif") + vrt_path = os.path.join(tmp_str, "partial.vrt") with open(vrt_path, "w") as f: f.write( '\n' @@ -87,7 +116,7 @@ def _build_partial_vrt(tmp_path) -> tuple[str, str, str]: # --------------------------------------------------------------------------- -# Reader-path fixtures. Each "reader" callable accepts ``(source, +# Reader-path parametrisation. Each ``reader`` callable takes ``(source, # **kwargs)`` and returns a DataArray. The eager reader returns a numpy- # backed array; the dask reader returns a chunked DataArray that still # needs ``.compute()`` to materialise values. @@ -106,8 +135,8 @@ def _dask_reader(source, **kwargs): READERS = [ - pytest.param(_eager_reader, id="eager_read_vrt"), - pytest.param(_dask_reader, id="dask_open_geotiff_chunks"), + pytest.param(_eager_reader, id="eager"), + pytest.param(_dask_reader, id="dask"), ] @@ -117,26 +146,31 @@ def _dask_reader(source, **kwargs): class TestDefaultPolicyRaises: """No ``missing_sources`` kwarg -> ``FileNotFoundError`` naming the - missing source. This is the public default since #1860 and the - release matrix in #2342 calls it out as a hard contract.""" + missing source. The public default since the lenient-by-default + behaviour was removed.""" @pytest.mark.parametrize("reader", READERS) def test_default_raises_filenotfound_naming_source( self, reader, tmp_path, ): - vrt_path, _, missing = _build_partial_vrt(str(tmp_path)) + vrt_path, _, missing = _write_partial_float_vrt(tmp_path) with pytest.raises(FileNotFoundError) as excinfo: reader(vrt_path) # The basename of the missing source must appear in the - # message. The chunked path quotes the full path; the eager - # path may quote just the source filename or the resolved - # absolute path depending on which guard fires first. Match on - # the basename to stay portable across both. - assert "missing_2367.tif" in str(excinfo.value), ( + # message. Different code paths quote the full path vs just the + # filename; matching on the basename keeps this portable. + assert "missing_source.tif" in str(excinfo.value), ( f"Default policy raise must name the missing source. " f"Got: {excinfo.value!r}" ) + def test_eager_byte_default_raises(self, tmp_path): + """Smoke check for the byte-band path with no real source on + disk. Inherited from the old _1799 file.""" + vrt = _write_byte_missing_vrt(tmp_path) + with pytest.raises((OSError, ValueError)): + read_vrt(vrt) + # --------------------------------------------------------------------------- # Explicit raise: same shape as default. @@ -149,14 +183,19 @@ class TestExplicitRaisePolicy: @pytest.mark.parametrize("reader", READERS) def test_explicit_raise_matches_default(self, reader, tmp_path): - vrt_path, _, _ = _build_partial_vrt(str(tmp_path)) + vrt_path, _, _ = _write_partial_float_vrt(tmp_path) with pytest.raises(FileNotFoundError) as excinfo: reader(vrt_path, missing_sources="raise") - assert "missing_2367.tif" in str(excinfo.value) + assert "missing_source.tif" in str(excinfo.value) + + def test_eager_byte_explicit_raise(self, tmp_path): + vrt = _write_byte_missing_vrt(tmp_path) + with pytest.raises((OSError, ValueError)): + read_vrt(vrt, missing_sources="raise") # --------------------------------------------------------------------------- -# Warn opt-in: warning class, message, and output values all pinned. +# Warn opt-in: warning class, message, vrt_holes, and array values pinned. # --------------------------------------------------------------------------- class TestWarnPolicyEmitsWarningAndFillsNodata: @@ -175,20 +214,16 @@ class TestWarnPolicyEmitsWarningAndFillsNodata: """ def test_eager_warn_emits_and_fills(self, tmp_path): - vrt_path, _, missing = _build_partial_vrt(str(tmp_path)) - # Use ``match=`` for the class + message check in one step, - # matching the sibling 1799 test's style. + vrt_path, _, missing = _write_partial_float_vrt(tmp_path) with pytest.warns( - GeoTIFFFallbackWarning, match="missing_2367.tif", + GeoTIFFFallbackWarning, match="missing_source.tif", ): da = read_vrt(vrt_path, missing_sources="warn") - # vrt_holes attr is populated and points at the missing file. assert "vrt_holes" in da.attrs sources = [h["source"] for h in da.attrs["vrt_holes"]] - assert any(s.endswith("missing_2367.tif") for s in sources) + assert any(s.endswith("missing_source.tif") for s in sources) - # Output values: present half == 7.0, missing half == NaN. out = np.asarray(da) np.testing.assert_array_equal( out[:, :4], np.full((4, 4), PRESENT_FILL, dtype=np.float32), @@ -199,9 +234,9 @@ def test_eager_warn_emits_and_fills(self, tmp_path): ) def test_dask_warn_emits_at_compute_and_fills(self, tmp_path): - vrt_path, _, missing = _build_partial_vrt(str(tmp_path)) - # Build the lazy DataArray. The parse-time sweep populates - # ``vrt_holes`` here without forcing a decode. + vrt_path, _, missing = _write_partial_float_vrt(tmp_path) + # The parse-time sweep populates ``vrt_holes`` at build so + # callers can branch on partial mosaics without computing. da = open_geotiff( vrt_path, chunks=4, missing_sources="warn", ) @@ -218,7 +253,7 @@ def test_dask_warn_emits_at_compute_and_fills(self, tmp_path): str(w.message) for w in caught if isinstance(w.message, GeoTIFFFallbackWarning) ] - assert any("missing_2367.tif" in m for m in msgs), ( + assert any("missing_source.tif" in m for m in msgs), ( f"Chunked warn path must emit GeoTIFFFallbackWarning at " f"compute naming the missing source; got: {msgs!r}" ) @@ -229,6 +264,16 @@ def test_dask_warn_emits_at_compute_and_fills(self, tmp_path): ) assert np.all(np.isnan(out[:, 4:])) + def test_eager_byte_warn_records_hole(self, tmp_path): + """Byte-band warn path: warning fires and ``vrt_holes`` is + populated even when there is no present half. Inherited from + the old _1799 file.""" + vrt = _write_byte_missing_vrt(tmp_path) + with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): + da = read_vrt(vrt, missing_sources="warn") + assert "vrt_holes" in da.attrs + assert da.attrs["vrt_holes"][0]["source"].endswith("missing.tif") + # --------------------------------------------------------------------------- # Invalid policy strings. @@ -250,7 +295,7 @@ class TestInvalidPolicyRejected: def test_invalid_policy_raises_value_error_naming_value( self, reader, bad_value, tmp_path, ): - vrt_path, _, _ = _build_partial_vrt(str(tmp_path)) + vrt_path, _, _ = _write_partial_float_vrt(tmp_path) with pytest.raises(ValueError) as excinfo: reader(vrt_path, missing_sources=bad_value) msg = str(excinfo.value) @@ -265,3 +310,12 @@ def test_invalid_policy_raises_value_error_naming_value( f"ValueError must echo the bad value back to the caller; " f"got {msg!r}" ) + + def test_eager_byte_invalid_policy(self, tmp_path): + """Smoke check carried over from the old _1799 file. The + parametrised matrix above covers more bad values across both + reader paths; this stays as a literal copy of the original + assertion so the byte-band code path stays exercised.""" + vrt = _write_byte_missing_vrt(tmp_path) + with pytest.raises(ValueError, match="missing_sources"): + read_vrt(vrt, missing_sources="ignore") From fbecbf8f405dad1c33cd95362020a549932dcf61 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 25 May 2026 19:59:42 -0700 Subject: [PATCH 2/2] Address review nits and suggestions (#2393) - CLUSTER_AUDIT.md verification block: correct the "net test_*.py drop of 2" claim to reflect the actual 357 -> 356 transition. Consolidation by definition adds one new file; net is -1. - vrt/test_missing_sources.py: align _write_partial_float_vrt to use Path.write_text with two-space XML indentation, matching _write_byte_missing_vrt. Drop now-unused `import os`. Replace `os.path.join(str(tmp_path), ...)` with `str(tmp_path / ...)`. - conftest.py: drop the private _HAS_LOOPBACK import from _helpers/markers. pytest_collection_modifyitems now calls loopback_available() directly at entry. - vrt/test_missing_sources.py: scrub stray "_1799" issue-number references from test docstrings and helper comments. The file-level docstring still names the two deleted source files for provenance, which is intentional. --- xrspatial/geotiff/tests/CLUSTER_AUDIT.md | 2 +- xrspatial/geotiff/tests/conftest.py | 3 +- .../geotiff/tests/vrt/test_missing_sources.py | 64 +++++++++---------- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/xrspatial/geotiff/tests/CLUSTER_AUDIT.md b/xrspatial/geotiff/tests/CLUSTER_AUDIT.md index ef806cb19..75b1b411d 100644 --- a/xrspatial/geotiff/tests/CLUSTER_AUDIT.md +++ b/xrspatial/geotiff/tests/CLUSTER_AUDIT.md @@ -72,4 +72,4 @@ further changes needed. - Old eager byte coverage: 3 tests preserved (warn / raise / invalid). - Old eager+dask float coverage: 16 parametrised cases preserved (default x2, explicit raise x2, warn x2, invalid 6 bad values x 2 readers). -- Net file delta: 3 files deleted (`_tiff_surgery.py`, `test_vrt_missing_sources_policy_1799.py`, `test_vrt_missing_sources_policy_2367.py`); 6 files added (`_helpers/{__init__,tiff_builders,tiff_surgery,markers}.py`, `vrt/{__init__,test_missing_sources}.py`). The two extra `__init__.py` files plus the audit file (deleted before merge) keep the net `test_*.py` count drop at exactly 2. +- Net file delta: 3 files deleted (`_tiff_surgery.py`, `test_vrt_missing_sources_policy_1799.py`, `test_vrt_missing_sources_policy_2367.py`); 6 files added (`_helpers/{__init__,tiff_builders,tiff_surgery,markers}.py`, `vrt/{__init__,test_missing_sources}.py`). Of those, 2 `test_*.py` files are removed and 1 `test_*.py` file is added under `vrt/`, so `find xrspatial/geotiff/tests -name 'test_*.py' | wc -l` goes from 357 to 356 (net -1). The spec called for a drop of 2; the new VRT module replaces both old files but is itself a `test_*.py` file, so consolidation by definition lands one net deletion per cluster. diff --git a/xrspatial/geotiff/tests/conftest.py b/xrspatial/geotiff/tests/conftest.py index 39813df40..d8786efa1 100644 --- a/xrspatial/geotiff/tests/conftest.py +++ b/xrspatial/geotiff/tests/conftest.py @@ -14,7 +14,6 @@ import pytest from ._helpers.markers import ( - _HAS_LOOPBACK, gpu_available, loopback_available, requires_gpu, @@ -45,7 +44,7 @@ def pytest_collection_modifyitems(config, items): a local-file GPU test) keep their non-HTTP coverage in restricted sandboxes. """ - if _HAS_LOOPBACK: + if loopback_available(): return import inspect diff --git a/xrspatial/geotiff/tests/vrt/test_missing_sources.py b/xrspatial/geotiff/tests/vrt/test_missing_sources.py index da7009ec0..0e41826ec 100644 --- a/xrspatial/geotiff/tests/vrt/test_missing_sources.py +++ b/xrspatial/geotiff/tests/vrt/test_missing_sources.py @@ -24,7 +24,6 @@ """ from __future__ import annotations -import os import warnings import numpy as np @@ -49,7 +48,7 @@ # # * ``byte_missing_vrt`` -- a 2x2 ``Byte`` VRT whose only source does not # exist on disk. The smallest case that exercises the missing-source -# guard. Inherited from the old _1799 smoke checks. +# guard. Inherited from the old eager-only smoke checks. # * ``partial_float_vrt`` -- an 8x4 ``Float32`` VRT split across two # sources. The left half points at a real GeoTIFF written through # ``to_geotiff``; the right half points at a missing file. Exercises @@ -81,8 +80,7 @@ def _write_partial_float_vrt(tmp_path) -> tuple[str, str, str]: Returns ``(vrt_path, present_src_path, missing_path)`` as strings. """ - tmp_str = str(tmp_path) - src = os.path.join(tmp_str, "src_present.tif") + src = str(tmp_path / "src_present.tif") arr = np.full((4, 4), PRESENT_FILL, dtype=np.float32) da = xr.DataArray( arr, dims=("y", "x"), @@ -90,29 +88,28 @@ def _write_partial_float_vrt(tmp_path) -> tuple[str, str, str]: ) to_geotiff(da, src) - missing = os.path.join(tmp_str, "missing_source.tif") - vrt_path = os.path.join(tmp_str, "partial.vrt") - with open(vrt_path, "w") as f: - f.write( - '\n' - '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' - '\n' - '\n' - f'{src}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - f'{missing}\n' - '1\n' - '\n' - '\n' - '\n' - '\n' - '\n' - ) - return vrt_path, src, missing + missing = str(tmp_path / "missing_source.tif") + vrt_path = tmp_path / "partial.vrt" + vrt_path.write_text( + '\n' + ' 0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + ' \n' + ' \n' + f' {src}\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + f' {missing}\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + return str(vrt_path), src, missing # --------------------------------------------------------------------------- @@ -166,7 +163,7 @@ def test_default_raises_filenotfound_naming_source( def test_eager_byte_default_raises(self, tmp_path): """Smoke check for the byte-band path with no real source on - disk. Inherited from the old _1799 file.""" + disk.""" vrt = _write_byte_missing_vrt(tmp_path) with pytest.raises((OSError, ValueError)): read_vrt(vrt) @@ -266,8 +263,7 @@ def test_dask_warn_emits_at_compute_and_fills(self, tmp_path): def test_eager_byte_warn_records_hole(self, tmp_path): """Byte-band warn path: warning fires and ``vrt_holes`` is - populated even when there is no present half. Inherited from - the old _1799 file.""" + populated even when there is no present half.""" vrt = _write_byte_missing_vrt(tmp_path) with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): da = read_vrt(vrt, missing_sources="warn") @@ -312,10 +308,10 @@ def test_invalid_policy_raises_value_error_naming_value( ) def test_eager_byte_invalid_policy(self, tmp_path): - """Smoke check carried over from the old _1799 file. The - parametrised matrix above covers more bad values across both - reader paths; this stays as a literal copy of the original - assertion so the byte-band code path stays exercised.""" + """Byte-band smoke check. The parametrised matrix above covers + more bad values across both reader paths; this stays as a + literal copy of the original assertion so the byte-band code + path stays exercised.""" vrt = _write_byte_missing_vrt(tmp_path) with pytest.raises(ValueError, match="missing_sources"): read_vrt(vrt, missing_sources="ignore")