diff --git a/docs/source/reference/release_gate_geotiff.rst b/docs/source/reference/release_gate_geotiff.rst
index 71bcb5cd..b616af39 100644
--- a/docs/source/reference/release_gate_geotiff.rst
+++ b/docs/source/reference/release_gate_geotiff.rst
@@ -507,7 +507,7 @@ VRT supported subset
- Holes surface as the band sentinel, ``attrs['vrt_holes']`` is set,
and a :class:`GeoTIFFFallbackWarning` is emitted.
- ``xrspatial/geotiff/tests/test_vrt_holes_attr_1734.py``,
- ``xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py``,
+ ``xrspatial/geotiff/tests/vrt/test_missing_sources.py``,
``xrspatial/geotiff/tests/test_vrt_chunked_missing_sources_1799.py``
- `#2342`_
* - VRT source / dest rectangle validation
diff --git a/xrspatial/geotiff/tests/CLUSTER_AUDIT.md b/xrspatial/geotiff/tests/CLUSTER_AUDIT.md
new file mode 100644
index 00000000..75b1b411
--- /dev/null
+++ b/xrspatial/geotiff/tests/CLUSTER_AUDIT.md
@@ -0,0 +1,75 @@
+# CLUSTER_AUDIT.md — PR 1 (Foundation + VRT missing-sources)
+
+Temporary audit table tracking every old `file::test` and where it lands
+in the consolidated layout. Deleted in a follow-up commit on the same
+branch before merge per the epic #2390 contract.
+
+## Foundation moves
+
+| Old location | New location | Notes |
+|---|---|---|
+| `conftest.py::make_minimal_tiff` (function) | `_helpers/tiff_builders.py::make_minimal_tiff` | Re-exported from `conftest.py` so existing `from .conftest import make_minimal_tiff` keeps working. |
+| `conftest.py::gpu_available` | `_helpers/markers.py::gpu_available` | Re-exported from `conftest.py`. |
+| `conftest.py::loopback_available` | `_helpers/markers.py::loopback_available` | Re-exported from `conftest.py`. |
+| `conftest.py::requires_gpu` | `_helpers/markers.py::requires_gpu` | Marker name unchanged; re-exported. |
+| `conftest.py::requires_loopback` | `_helpers/markers.py::requires_loopback` | Marker name unchanged; re-exported. |
+| `conftest.py::requires_integration` | `_helpers/markers.py::requires_integration` | Marker name unchanged; re-exported. |
+| `conftest.py::pytest_collection_modifyitems` | `conftest.py::pytest_collection_modifyitems` | Left in place; PR 11 removes it per the epic. |
+| `_tiff_surgery.py` (whole module) | `_helpers/tiff_surgery.py` | Verbatim relocation. Direct importers updated. |
+
+### Updated import sites
+
+| File | Change |
+|---|---|
+| `test_local_tile_byte_cap_1664.py` | `from ._tiff_surgery import ...` -> `from ._helpers.tiff_surgery import ...` |
+| `test_gpu_tile_byte_cap_2026_05_18.py` | `from ._tiff_surgery import ...` -> `from ._helpers.tiff_surgery import ...` |
+
+All other test files import `make_minimal_tiff`, `gpu_available`, and
+the `requires_*` markers from `conftest.py` (or
+`xrspatial.geotiff.tests.conftest`), which now re-exports them. No
+further changes needed.
+
+## VRT missing-sources cluster
+
+### `test_vrt_missing_sources_policy_1799.py` (deleted)
+
+| Old `file::test` | New `file::test_id` | Notes |
+|---|---|---|
+| `test_vrt_missing_sources_policy_1799.py::test_read_vrt_missing_sources_warns_and_records_hole` | `vrt/test_missing_sources.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_eager_byte_warn_records_hole` | Byte-band variant carried over verbatim. Asserts on the `"could not be read"` message and `vrt_holes`. |
+| `test_vrt_missing_sources_policy_1799.py::test_read_vrt_missing_sources_raise_fails_fast` | `vrt/test_missing_sources.py::TestExplicitRaisePolicy::test_eager_byte_explicit_raise` | Byte-band variant. Renamed; assertion unchanged (raises `OSError` or `ValueError`). |
+| `test_vrt_missing_sources_policy_1799.py::test_read_vrt_missing_sources_validates_policy` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_eager_byte_invalid_policy` | Byte-band invalid-policy smoke check. Parametrised matrix below covers more bad values across both readers; this stays as a literal port to keep byte-band coverage. |
+
+### `test_vrt_missing_sources_policy_2367.py` (deleted)
+
+| Old `file::test` | New `file::test_id` | Notes |
+|---|---|---|
+| `test_vrt_missing_sources_policy_2367.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[eager_read_vrt]` | `vrt/test_missing_sources.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[eager]` | Renamed reader id from `eager_read_vrt` to `eager`. Filename in the VRT helper renamed `missing_2367.tif` -> `missing_source.tif` (no issue numbers in fixtures). |
+| `test_vrt_missing_sources_policy_2367.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[dask_open_geotiff_chunks]` | `vrt/test_missing_sources.py::TestDefaultPolicyRaises::test_default_raises_filenotfound_naming_source[dask]` | Renamed reader id; same coverage. |
+| `test_vrt_missing_sources_policy_2367.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[eager_read_vrt]` | `vrt/test_missing_sources.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[eager]` | Renamed reader id. |
+| `test_vrt_missing_sources_policy_2367.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[dask_open_geotiff_chunks]` | `vrt/test_missing_sources.py::TestExplicitRaisePolicy::test_explicit_raise_matches_default[dask]` | Renamed reader id. |
+| `test_vrt_missing_sources_policy_2367.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_eager_warn_emits_and_fills` | `vrt/test_missing_sources.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_eager_warn_emits_and_fills` | Body unchanged except missing-source filename rename. |
+| `test_vrt_missing_sources_policy_2367.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_dask_warn_emits_at_compute_and_fills` | `vrt/test_missing_sources.py::TestWarnPolicyEmitsWarningAndFillsNodata::test_dask_warn_emits_at_compute_and_fills` | Body unchanged except missing-source filename rename. |
+| `test_vrt_missing_sources_policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-ignore]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[ignore-eager]` | Reader id renamed; parametrize order unchanged. |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-RAISE]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[RAISE-eager]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-raises]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[raises-eager]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[-eager]` | Empty-string bad value. |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-warn ]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[warn -eager]` | Trailing-space bad value. |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[eager_read_vrt-1]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[1-eager]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-ignore]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[ignore-dask]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-RAISE]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[RAISE-dask]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-raises]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[raises-dask]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[-dask]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-warn ]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[warn -dask]` | |
+| `..._policy_2367.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[dask_open_geotiff_chunks-1]` | `vrt/test_missing_sources.py::TestInvalidPolicyRejected::test_invalid_policy_raises_value_error_naming_value[1-dask]` | |
+
+### Files NOT folded in (justified)
+
+| File | Reason left in place |
+|---|---|
+| `test_vrt_missing_sources_default_raise_1843.py` | Different surface area: tests the *internal* `xrspatial.geotiff._vrt.read_vrt` entry point (not the public `xrspatial.geotiff.read_vrt`), plus the `XRSPATIAL_GEOTIFF_STRICT=1` env-var override. Neither is in the public-API matrix covered by `vrt/test_missing_sources.py`. A future PR that consolidates the strict-env-var coverage can fold this in then. |
+
+## Verification
+
+- Old eager byte coverage: 3 tests preserved (warn / raise / invalid).
+- Old eager+dask float coverage: 16 parametrised cases preserved (default x2, explicit raise x2, warn x2, invalid 6 bad values x 2 readers).
+- Net file delta: 3 files deleted (`_tiff_surgery.py`, `test_vrt_missing_sources_policy_1799.py`, `test_vrt_missing_sources_policy_2367.py`); 6 files added (`_helpers/{__init__,tiff_builders,tiff_surgery,markers}.py`, `vrt/{__init__,test_missing_sources}.py`). Of those, 2 `test_*.py` files are removed and 1 `test_*.py` file is added under `vrt/`, so `find xrspatial/geotiff/tests -name 'test_*.py' | wc -l` goes from 357 to 356 (net -1). The spec called for a drop of 2; the new VRT module replaces both old files but is itself a `test_*.py` file, so consolidation by definition lands one net deletion per cluster.
diff --git a/xrspatial/geotiff/tests/_helpers/__init__.py b/xrspatial/geotiff/tests/_helpers/__init__.py
new file mode 100644
index 00000000..b55d743e
--- /dev/null
+++ b/xrspatial/geotiff/tests/_helpers/__init__.py
@@ -0,0 +1,8 @@
+"""Shared helpers for the GeoTIFF test suite.
+
+Centralises the TIFF-builder factory, byte-surgery helpers, and pytest
+marker definitions that used to live in ``conftest.py`` and at the top
+level of ``xrspatial/geotiff/tests/``. The ``conftest.py`` re-exports
+``make_minimal_tiff`` and the marker names so existing
+``from .conftest import ...`` test imports keep working.
+"""
diff --git a/xrspatial/geotiff/tests/_helpers/markers.py b/xrspatial/geotiff/tests/_helpers/markers.py
new file mode 100644
index 00000000..0f993c97
--- /dev/null
+++ b/xrspatial/geotiff/tests/_helpers/markers.py
@@ -0,0 +1,67 @@
+"""Pytest markers and capability probes for the GeoTIFF test suite.
+
+Relocated from ``conftest.py``. The marker names are unchanged
+(``requires_gpu``, ``requires_loopback``, ``requires_integration``) so
+test files that already import them via ``from .conftest import ...``
+keep working through the conftest re-export.
+"""
+from __future__ import annotations
+
+import importlib.util
+import os
+import socket
+
+import pytest
+
+
+def gpu_available() -> bool:
+ """True iff cupy imports AND a CUDA device is actually usable.
+
+ Some sandboxes ship cupy without a working CUDA runtime. A bare
+ ``import cupy`` succeeds there but every device call fails, so test
+ files that gate on the import alone show false failures.
+ """
+ if importlib.util.find_spec("cupy") is None:
+ return False
+ try:
+ import cupy
+ return bool(cupy.cuda.is_available())
+ except Exception:
+ return False
+
+
+def loopback_available() -> bool:
+ """True iff a loopback TCP bind succeeds on this host.
+
+ Some sandboxed environments deny ``bind(('127.0.0.1', 0))``. HTTP
+ tests that stand up a loopback server should skip rather than error
+ in that case.
+ """
+ try:
+ s = socket.socket()
+ try:
+ s.bind(('127.0.0.1', 0))
+ finally:
+ s.close()
+ except OSError:
+ return False
+ return True
+
+
+_HAS_GPU = gpu_available()
+_HAS_LOOPBACK = loopback_available()
+
+requires_gpu = pytest.mark.skipif(
+ not _HAS_GPU, reason="cupy + CUDA required"
+)
+requires_loopback = pytest.mark.skipif(
+ not _HAS_LOOPBACK, reason="loopback bind unavailable in this environment"
+)
+
+_RUN_INTEGRATION = os.environ.get("XRSPATIAL_RUN_INTEGRATION", "") not in (
+ "", "0", "false", "False"
+)
+requires_integration = pytest.mark.skipif(
+ not _RUN_INTEGRATION,
+ reason="integration test; set XRSPATIAL_RUN_INTEGRATION=1 to run locally",
+)
diff --git a/xrspatial/geotiff/tests/_helpers/tiff_builders.py b/xrspatial/geotiff/tests/_helpers/tiff_builders.py
new file mode 100644
index 00000000..144a768d
--- /dev/null
+++ b/xrspatial/geotiff/tests/_helpers/tiff_builders.py
@@ -0,0 +1,240 @@
+"""In-memory TIFF builder used by the test suite.
+
+Relocated from ``conftest.py`` so test modules can import the builder
+directly without dragging in pytest fixture state. The function
+signature and behaviour are unchanged; existing call sites continue to
+work through the re-export in ``conftest.py``.
+"""
+from __future__ import annotations
+
+import math
+import struct
+
+import numpy as np
+
+
+def make_minimal_tiff(
+ width: int = 4,
+ height: int = 4,
+ dtype: np.dtype = np.dtype('float32'),
+ pixel_data: np.ndarray | None = None,
+ compression: int = 1,
+ tiled: bool = False,
+ tile_size: int = 4,
+ big_endian: bool = False,
+ bigtiff: bool = False,
+ geo_transform: tuple | None = None,
+ epsg: int | None = None,
+) -> bytes:
+ """Build a minimal valid TIFF file in memory for testing.
+
+ Uses a three-pass approach:
+ 1. Collect all tags and their raw value data
+ 2. Compute file layout (IFD size, overflow positions, pixel data offset)
+ 3. Serialize everything with correct offsets
+ """
+ bo = '>' if big_endian else '<'
+ bom = b'MM' if big_endian else b'II'
+
+ if pixel_data is None:
+ pixel_data = np.arange(width * height, dtype=dtype).reshape(height, width)
+ else:
+ dtype = pixel_data.dtype
+
+ bits_per_sample = dtype.itemsize * 8
+ if dtype.kind == 'f':
+ sample_format = 3
+ elif dtype.kind == 'i':
+ sample_format = 2
+ else:
+ sample_format = 1
+
+ # --- Build pixel data (strips or tiles) ---
+ if tiled:
+ tiles_across = math.ceil(width / tile_size)
+ tiles_down = math.ceil(height / tile_size)
+ num_tiles = tiles_across * tiles_down
+
+ tile_blobs = []
+ for tr in range(tiles_down):
+ for tc in range(tiles_across):
+ tile = np.zeros((tile_size, tile_size), dtype=dtype)
+ r0, c0 = tr * tile_size, tc * tile_size
+ r1 = min(r0 + tile_size, height)
+ c1 = min(c0 + tile_size, width)
+ tile[:r1 - r0, :c1 - c0] = pixel_data[r0:r1, c0:c1]
+ tile_blobs.append(tile.tobytes())
+
+ pixel_bytes = b''.join(tile_blobs)
+ tile_byte_counts = [len(b) for b in tile_blobs]
+ else:
+ if big_endian and pixel_data.dtype.itemsize > 1:
+ pixel_bytes = pixel_data.astype(pixel_data.dtype.newbyteorder('>')).tobytes()
+ else:
+ pixel_bytes = pixel_data.tobytes()
+
+ # --- Collect tags as (tag_id, type_id, value_bytes) ---
+ # value_bytes is the serialized value; if len <= 4 it's inline, else overflow.
+ tag_list: list[tuple[int, int, int, bytes]] = [] # (tag, type, count, raw_bytes)
+
+ def add_short(tag, val):
+ tag_list.append((tag, 3, 1, struct.pack(f'{bo}H', val)))
+
+ def add_long(tag, val):
+ tag_list.append((tag, 4, 1, struct.pack(f'{bo}I', val)))
+
+ def add_shorts(tag, vals):
+ tag_list.append((tag, 3, len(vals), struct.pack(f'{bo}{len(vals)}H', *vals)))
+
+ def add_longs(tag, vals):
+ tag_list.append((tag, 4, len(vals), struct.pack(f'{bo}{len(vals)}I', *vals)))
+
+ def add_doubles(tag, vals):
+ tag_list.append((tag, 12, len(vals), struct.pack(f'{bo}{len(vals)}d', *vals)))
+
+ add_short(256, width) # ImageWidth
+ add_short(257, height) # ImageLength
+ add_short(258, bits_per_sample) # BitsPerSample
+ add_short(259, compression) # Compression
+ add_short(262, 1) # PhotometricInterpretation
+ add_short(277, 1) # SamplesPerPixel
+ add_short(339, sample_format) # SampleFormat
+
+ if tiled:
+ add_short(322, tile_size) # TileWidth
+ add_short(323, tile_size) # TileLength
+ # Placeholder offsets -- will be patched after layout is known
+ add_longs(324, [0] * num_tiles) # TileOffsets
+ add_longs(325, tile_byte_counts) # TileByteCounts
+ else:
+ add_short(278, height) # RowsPerStrip
+ add_long(273, 0) # StripOffsets (placeholder)
+ add_long(279, len(pixel_bytes)) # StripByteCounts
+
+ if geo_transform is not None:
+ ox, oy, pw, ph = geo_transform
+ add_doubles(33550, [abs(pw), abs(ph), 0.0]) # ModelPixelScale
+ add_doubles(33922, [0.0, 0.0, 0.0, ox, oy, 0.0]) # ModelTiepoint
+
+ if epsg is not None:
+ if epsg == 4326 or (4000 <= epsg < 5000):
+ model_type, key_id = 2, 2048
+ else:
+ model_type, key_id = 1, 3072
+ gkd = [1, 1, 0, 2, 1024, 0, 1, model_type, key_id, 0, 1, epsg]
+ add_shorts(34735, gkd)
+
+ # Sort by tag ID (TIFF spec requirement)
+ tag_list.sort(key=lambda t: t[0])
+
+ # --- Compute layout ---
+ num_entries = len(tag_list)
+ ifd_start = 8 # right after header
+ ifd_size = 2 + 12 * num_entries + 4 # count + entries + next_ifd_offset
+ overflow_start = ifd_start + ifd_size
+
+ # Figure out which tags need overflow (value > 4 bytes)
+ overflow_buf = bytearray()
+ for _tag, _type, _count, raw in tag_list:
+ if len(raw) > 4:
+ # This will go to overflow -- just accumulate size for now
+ overflow_buf.extend(raw)
+ # Word-align
+ if len(overflow_buf) % 2:
+ overflow_buf.append(0)
+
+ pixel_data_start = overflow_start + len(overflow_buf)
+
+ # --- Patch offset tags ---
+ # Now we know where pixel data starts, patch strip/tile offsets
+ patched = []
+ for tag, typ, count, raw in tag_list:
+ if tag == 273: # StripOffsets
+ patched.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start)))
+ elif tag == 324: # TileOffsets
+ offsets = []
+ pos = 0
+ for blob in tile_blobs:
+ offsets.append(pixel_data_start + pos)
+ pos += len(blob)
+ patched.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets)))
+ else:
+ patched.append((tag, typ, count, raw))
+ tag_list = patched
+
+ # --- Rebuild overflow with final values ---
+ overflow_buf = bytearray()
+ tag_offsets = {} # tag -> offset within overflow_buf (or None if inline)
+
+ for tag, typ, count, raw in tag_list:
+ if len(raw) > 4:
+ tag_offsets[tag] = len(overflow_buf)
+ overflow_buf.extend(raw)
+ if len(overflow_buf) % 2:
+ overflow_buf.append(0)
+ else:
+ tag_offsets[tag] = None
+
+ # Recalculate in case overflow size changed from patching
+ actual_pixel_start = overflow_start + len(overflow_buf)
+ if actual_pixel_start != pixel_data_start:
+ # Need another pass to fix offsets
+ pixel_data_start = actual_pixel_start
+ patched2 = []
+ for tag, typ, count, raw in tag_list:
+ if tag == 273:
+ patched2.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start)))
+ elif tag == 324:
+ offsets = []
+ pos = 0
+ for blob in tile_blobs:
+ offsets.append(pixel_data_start + pos)
+ pos += len(blob)
+ patched2.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets)))
+ else:
+ patched2.append((tag, typ, count, raw))
+ tag_list = patched2
+
+ # Rebuild overflow again
+ overflow_buf = bytearray()
+ tag_offsets = {}
+ for tag, typ, count, raw in tag_list:
+ if len(raw) > 4:
+ tag_offsets[tag] = len(overflow_buf)
+ overflow_buf.extend(raw)
+ if len(overflow_buf) % 2:
+ overflow_buf.append(0)
+ else:
+ tag_offsets[tag] = None
+
+ # --- Serialize ---
+ out = bytearray()
+
+ # Header
+ out.extend(bom)
+ out.extend(struct.pack(f'{bo}H', 42))
+ out.extend(struct.pack(f'{bo}I', ifd_start))
+
+ # IFD
+ out.extend(struct.pack(f'{bo}H', num_entries))
+
+ for tag, typ, count, raw in tag_list:
+ out.extend(struct.pack(f'{bo}HHI', tag, typ, count))
+ if len(raw) <= 4:
+ # Inline value, padded to 4 bytes
+ out.extend(raw.ljust(4, b'\x00'))
+ else:
+ # Pointer to overflow
+ ptr = overflow_start + tag_offsets[tag]
+ out.extend(struct.pack(f'{bo}I', ptr))
+
+ # Next IFD offset
+ out.extend(struct.pack(f'{bo}I', 0))
+
+ # Overflow
+ out.extend(overflow_buf)
+
+ # Pixel data
+ out.extend(pixel_bytes)
+
+ return bytes(out)
diff --git a/xrspatial/geotiff/tests/_tiff_surgery.py b/xrspatial/geotiff/tests/_helpers/tiff_surgery.py
similarity index 100%
rename from xrspatial/geotiff/tests/_tiff_surgery.py
rename to xrspatial/geotiff/tests/_helpers/tiff_surgery.py
diff --git a/xrspatial/geotiff/tests/conftest.py b/xrspatial/geotiff/tests/conftest.py
index 86cf2a26..d8786efa 100644
--- a/xrspatial/geotiff/tests/conftest.py
+++ b/xrspatial/geotiff/tests/conftest.py
@@ -1,67 +1,35 @@
-"""Shared fixtures for geotiff tests."""
+"""Shared fixtures for geotiff tests.
+
+The TIFF builder, markers, and capability probes now live under
+``_helpers/``. This module re-exports them so legacy imports such as
+``from .conftest import make_minimal_tiff`` or
+``from xrspatial.geotiff.tests.conftest import requires_gpu`` keep
+working. PR 11 of the consolidation epic (#2390) drops the
+``pytest_collection_modifyitems`` socketserver hack below; until then
+it stays in place to keep the existing loopback-skip behaviour.
+"""
from __future__ import annotations
-import importlib.util
-import math
-import os
-import socket
-import struct
-
import numpy as np
import pytest
-
-def gpu_available() -> bool:
- """True iff cupy imports AND a CUDA device is actually usable.
-
- Some sandboxes ship cupy without a working CUDA runtime. A bare
- ``import cupy`` succeeds there but every device call fails, so test
- files that gate on the import alone show false failures.
- """
- if importlib.util.find_spec("cupy") is None:
- return False
- try:
- import cupy
- return bool(cupy.cuda.is_available())
- except Exception:
- return False
-
-
-def loopback_available() -> bool:
- """True iff a loopback TCP bind succeeds on this host.
-
- Some sandboxed environments deny ``bind(('127.0.0.1', 0))``. HTTP
- tests that stand up a loopback server should skip rather than error
- in that case.
- """
- try:
- s = socket.socket()
- try:
- s.bind(('127.0.0.1', 0))
- finally:
- s.close()
- except OSError:
- return False
- return True
-
-
-_HAS_GPU = gpu_available()
-_HAS_LOOPBACK = loopback_available()
-
-requires_gpu = pytest.mark.skipif(
- not _HAS_GPU, reason="cupy + CUDA required"
-)
-requires_loopback = pytest.mark.skipif(
- not _HAS_LOOPBACK, reason="loopback bind unavailable in this environment"
+from ._helpers.markers import (
+ gpu_available,
+ loopback_available,
+ requires_gpu,
+ requires_integration,
+ requires_loopback,
)
+from ._helpers.tiff_builders import make_minimal_tiff
-_RUN_INTEGRATION = os.environ.get("XRSPATIAL_RUN_INTEGRATION", "") not in (
- "", "0", "false", "False"
-)
-requires_integration = pytest.mark.skipif(
- not _RUN_INTEGRATION,
- reason="integration test; set XRSPATIAL_RUN_INTEGRATION=1 to run locally",
-)
+__all__ = [
+ "gpu_available",
+ "loopback_available",
+ "make_minimal_tiff",
+ "requires_gpu",
+ "requires_integration",
+ "requires_loopback",
+]
def pytest_collection_modifyitems(config, items):
@@ -76,7 +44,7 @@ def pytest_collection_modifyitems(config, items):
a local-file GPU test) keep their non-HTTP coverage in restricted
sandboxes.
"""
- if _HAS_LOOPBACK:
+ if loopback_available():
return
import inspect
@@ -119,233 +87,6 @@ def _references_loopback(src: str) -> bool:
item.add_marker(skip_marker)
-def make_minimal_tiff(
- width: int = 4,
- height: int = 4,
- dtype: np.dtype = np.dtype('float32'),
- pixel_data: np.ndarray | None = None,
- compression: int = 1,
- tiled: bool = False,
- tile_size: int = 4,
- big_endian: bool = False,
- bigtiff: bool = False,
- geo_transform: tuple | None = None,
- epsg: int | None = None,
-) -> bytes:
- """Build a minimal valid TIFF file in memory for testing.
-
- Uses a three-pass approach:
- 1. Collect all tags and their raw value data
- 2. Compute file layout (IFD size, overflow positions, pixel data offset)
- 3. Serialize everything with correct offsets
- """
- bo = '>' if big_endian else '<'
- bom = b'MM' if big_endian else b'II'
-
- if pixel_data is None:
- pixel_data = np.arange(width * height, dtype=dtype).reshape(height, width)
- else:
- dtype = pixel_data.dtype
-
- bits_per_sample = dtype.itemsize * 8
- if dtype.kind == 'f':
- sample_format = 3
- elif dtype.kind == 'i':
- sample_format = 2
- else:
- sample_format = 1
-
- # --- Build pixel data (strips or tiles) ---
- if tiled:
- tiles_across = math.ceil(width / tile_size)
- tiles_down = math.ceil(height / tile_size)
- num_tiles = tiles_across * tiles_down
-
- tile_blobs = []
- for tr in range(tiles_down):
- for tc in range(tiles_across):
- tile = np.zeros((tile_size, tile_size), dtype=dtype)
- r0, c0 = tr * tile_size, tc * tile_size
- r1 = min(r0 + tile_size, height)
- c1 = min(c0 + tile_size, width)
- tile[:r1 - r0, :c1 - c0] = pixel_data[r0:r1, c0:c1]
- tile_blobs.append(tile.tobytes())
-
- pixel_bytes = b''.join(tile_blobs)
- tile_byte_counts = [len(b) for b in tile_blobs]
- else:
- if big_endian and pixel_data.dtype.itemsize > 1:
- pixel_bytes = pixel_data.astype(pixel_data.dtype.newbyteorder('>')).tobytes()
- else:
- pixel_bytes = pixel_data.tobytes()
-
- # --- Collect tags as (tag_id, type_id, value_bytes) ---
- # value_bytes is the serialized value; if len <= 4 it's inline, else overflow.
- tag_list: list[tuple[int, int, int, bytes]] = [] # (tag, type, count, raw_bytes)
-
- def add_short(tag, val):
- tag_list.append((tag, 3, 1, struct.pack(f'{bo}H', val)))
-
- def add_long(tag, val):
- tag_list.append((tag, 4, 1, struct.pack(f'{bo}I', val)))
-
- def add_shorts(tag, vals):
- tag_list.append((tag, 3, len(vals), struct.pack(f'{bo}{len(vals)}H', *vals)))
-
- def add_longs(tag, vals):
- tag_list.append((tag, 4, len(vals), struct.pack(f'{bo}{len(vals)}I', *vals)))
-
- def add_doubles(tag, vals):
- tag_list.append((tag, 12, len(vals), struct.pack(f'{bo}{len(vals)}d', *vals)))
-
- add_short(256, width) # ImageWidth
- add_short(257, height) # ImageLength
- add_short(258, bits_per_sample) # BitsPerSample
- add_short(259, compression) # Compression
- add_short(262, 1) # PhotometricInterpretation
- add_short(277, 1) # SamplesPerPixel
- add_short(339, sample_format) # SampleFormat
-
- if tiled:
- add_short(322, tile_size) # TileWidth
- add_short(323, tile_size) # TileLength
- # Placeholder offsets -- will be patched after layout is known
- add_longs(324, [0] * num_tiles) # TileOffsets
- add_longs(325, tile_byte_counts) # TileByteCounts
- else:
- add_short(278, height) # RowsPerStrip
- add_long(273, 0) # StripOffsets (placeholder)
- add_long(279, len(pixel_bytes)) # StripByteCounts
-
- if geo_transform is not None:
- ox, oy, pw, ph = geo_transform
- add_doubles(33550, [abs(pw), abs(ph), 0.0]) # ModelPixelScale
- add_doubles(33922, [0.0, 0.0, 0.0, ox, oy, 0.0]) # ModelTiepoint
-
- if epsg is not None:
- if epsg == 4326 or (4000 <= epsg < 5000):
- model_type, key_id = 2, 2048
- else:
- model_type, key_id = 1, 3072
- gkd = [1, 1, 0, 2, 1024, 0, 1, model_type, key_id, 0, 1, epsg]
- add_shorts(34735, gkd)
-
- # Sort by tag ID (TIFF spec requirement)
- tag_list.sort(key=lambda t: t[0])
-
- # --- Compute layout ---
- num_entries = len(tag_list)
- ifd_start = 8 # right after header
- ifd_size = 2 + 12 * num_entries + 4 # count + entries + next_ifd_offset
- overflow_start = ifd_start + ifd_size
-
- # Figure out which tags need overflow (value > 4 bytes)
- overflow_buf = bytearray()
- for _tag, _type, _count, raw in tag_list:
- if len(raw) > 4:
- # This will go to overflow -- just accumulate size for now
- overflow_buf.extend(raw)
- # Word-align
- if len(overflow_buf) % 2:
- overflow_buf.append(0)
-
- pixel_data_start = overflow_start + len(overflow_buf)
-
- # --- Patch offset tags ---
- # Now we know where pixel data starts, patch strip/tile offsets
- patched = []
- for tag, typ, count, raw in tag_list:
- if tag == 273: # StripOffsets
- patched.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start)))
- elif tag == 324: # TileOffsets
- offsets = []
- pos = 0
- for blob in tile_blobs:
- offsets.append(pixel_data_start + pos)
- pos += len(blob)
- patched.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets)))
- else:
- patched.append((tag, typ, count, raw))
- tag_list = patched
-
- # --- Rebuild overflow with final values ---
- overflow_buf = bytearray()
- tag_offsets = {} # tag -> offset within overflow_buf (or None if inline)
-
- for tag, typ, count, raw in tag_list:
- if len(raw) > 4:
- tag_offsets[tag] = len(overflow_buf)
- overflow_buf.extend(raw)
- if len(overflow_buf) % 2:
- overflow_buf.append(0)
- else:
- tag_offsets[tag] = None
-
- # Recalculate in case overflow size changed from patching
- actual_pixel_start = overflow_start + len(overflow_buf)
- if actual_pixel_start != pixel_data_start:
- # Need another pass to fix offsets
- pixel_data_start = actual_pixel_start
- patched2 = []
- for tag, typ, count, raw in tag_list:
- if tag == 273:
- patched2.append((tag, typ, count, struct.pack(f'{bo}I', pixel_data_start)))
- elif tag == 324:
- offsets = []
- pos = 0
- for blob in tile_blobs:
- offsets.append(pixel_data_start + pos)
- pos += len(blob)
- patched2.append((tag, typ, count, struct.pack(f'{bo}{num_tiles}I', *offsets)))
- else:
- patched2.append((tag, typ, count, raw))
- tag_list = patched2
-
- # Rebuild overflow again
- overflow_buf = bytearray()
- tag_offsets = {}
- for tag, typ, count, raw in tag_list:
- if len(raw) > 4:
- tag_offsets[tag] = len(overflow_buf)
- overflow_buf.extend(raw)
- if len(overflow_buf) % 2:
- overflow_buf.append(0)
- else:
- tag_offsets[tag] = None
-
- # --- Serialize ---
- out = bytearray()
-
- # Header
- out.extend(bom)
- out.extend(struct.pack(f'{bo}H', 42))
- out.extend(struct.pack(f'{bo}I', ifd_start))
-
- # IFD
- out.extend(struct.pack(f'{bo}H', num_entries))
-
- for tag, typ, count, raw in tag_list:
- out.extend(struct.pack(f'{bo}HHI', tag, typ, count))
- if len(raw) <= 4:
- # Inline value, padded to 4 bytes
- out.extend(raw.ljust(4, b'\x00'))
- else:
- # Pointer to overflow
- ptr = overflow_start + tag_offsets[tag]
- out.extend(struct.pack(f'{bo}I', ptr))
-
- # Next IFD offset
- out.extend(struct.pack(f'{bo}I', 0))
-
- # Overflow
- out.extend(overflow_buf)
-
- # Pixel data
- out.extend(pixel_bytes)
-
- return bytes(out)
-
-
@pytest.fixture
def simple_float32_tiff():
"""4x4 float32 stripped TIFF with sequential values."""
diff --git a/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py b/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py
index 744fac6f..127aef86 100644
--- a/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py
+++ b/xrspatial/geotiff/tests/test_gpu_tile_byte_cap_2026_05_18.py
@@ -30,7 +30,7 @@
from xrspatial.geotiff import read_geotiff_gpu, to_geotiff
-from ._tiff_surgery import patch_byte_counts as _patch_byte_counts
+from ._helpers.tiff_surgery import patch_byte_counts as _patch_byte_counts
def _cupy_available() -> bool:
diff --git a/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py b/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py
index 51dbe7e2..25216f53 100644
--- a/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py
+++ b/xrspatial/geotiff/tests/test_local_tile_byte_cap_1664.py
@@ -19,7 +19,7 @@
from xrspatial.geotiff import _reader as _reader_mod
from xrspatial.geotiff import open_geotiff, to_geotiff
-from ._tiff_surgery import patch_byte_counts as _patch_byte_counts # noqa: E402
+from ._helpers.tiff_surgery import patch_byte_counts as _patch_byte_counts # noqa: E402
# ---------------------------------------------------------------------------
# Helpers -- patch in-place IFD entries for tile / strip byte counts
diff --git a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py b/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py
deleted file mode 100644
index 29de1ba5..00000000
--- a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_1799.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""VRT missing-source handling has an explicit policy (#1799)."""
-from __future__ import annotations
-
-import pytest
-
-from xrspatial.geotiff import GeoTIFFFallbackWarning, read_vrt
-
-
-def _write_missing_source_vrt(path):
- path.write_text(
- '\n'
- ' \n'
- ' \n'
- ' missing.tif'
- '\n'
- ' 1\n'
- ' \n'
- ' \n'
- ' \n'
- ' \n'
- '\n'
- )
-
-
-def test_read_vrt_missing_sources_warns_and_records_hole(tmp_path):
- vrt = tmp_path / "tmp_1799_missing_warn.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
- da = read_vrt(str(vrt), missing_sources='warn')
-
- assert 'vrt_holes' in da.attrs
- assert da.attrs['vrt_holes'][0]['source'].endswith('missing.tif')
-
-
-def test_read_vrt_missing_sources_raise_fails_fast(tmp_path):
- vrt = tmp_path / "tmp_1799_missing_raise.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.raises((OSError, ValueError)):
- read_vrt(str(vrt), missing_sources='raise')
-
-
-def test_read_vrt_missing_sources_validates_policy(tmp_path):
- vrt = tmp_path / "tmp_1799_missing_bad_policy.vrt"
- _write_missing_source_vrt(vrt)
-
- with pytest.raises(ValueError, match="missing_sources"):
- read_vrt(str(vrt), missing_sources='ignore')
diff --git a/xrspatial/geotiff/tests/vrt/__init__.py b/xrspatial/geotiff/tests/vrt/__init__.py
new file mode 100644
index 00000000..128c43a8
--- /dev/null
+++ b/xrspatial/geotiff/tests/vrt/__init__.py
@@ -0,0 +1 @@
+"""VRT-focused tests for the GeoTIFF module."""
diff --git a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_2367.py b/xrspatial/geotiff/tests/vrt/test_missing_sources.py
similarity index 51%
rename from xrspatial/geotiff/tests/test_vrt_missing_sources_policy_2367.py
rename to xrspatial/geotiff/tests/vrt/test_missing_sources.py
index 071e9e92..0e41826e 100644
--- a/xrspatial/geotiff/tests/test_vrt_missing_sources_policy_2367.py
+++ b/xrspatial/geotiff/tests/vrt/test_missing_sources.py
@@ -1,33 +1,29 @@
-"""Consolidated VRT ``missing_sources`` policy matrix (#2367, work item of #2342).
+"""VRT ``missing_sources`` policy matrix.
-This file complements ``test_vrt_missing_sources_policy_1799.py`` and
-``test_vrt_chunked_missing_sources_1799.py`` by covering the full
-release contract in one place: every policy value (default,
-``'raise'``, ``'warn'``, invalid) is exercised against both read paths
-(eager ``read_vrt`` and dask ``open_geotiff(..., chunks=...)``), with
-assertions on the exception or warning type, the message text, and the
-actual output array values where applicable.
-
-The existing 1799 / 1843 / 2265 tests pin individual cases. This file
-keeps the four-by-two matrix together so a future kwarg refactor that
-silently drops parity between the eager and chunked paths regresses a
-single, focused test file.
+Consolidates the eager-only smoke checks that previously lived in
+``test_vrt_missing_sources_policy_1799.py`` with the eager-plus-chunked
+matrix from ``test_vrt_missing_sources_policy_2367.py``.
Release contract (see ``_backends/vrt.py:206`` docstring):
-* ``'raise'`` is the default since #1860.
-* ``'raise'`` fails fast with ``FileNotFoundError`` naming the missing
- source path. The chunked path raises at build time (#2265) so a
+* ``'raise'`` is the default. The eager and chunked paths both fail
+ fast with ``FileNotFoundError`` naming the missing source path so a
partial mosaic never surfaces silently from a delayed compute.
-* ``'warn'`` is the explicit opt-in. It emits
- ``GeoTIFFFallbackWarning`` naming the missing source and returns the
- mosaic with NaN (or the band's nodata sentinel) in the corresponding
- region. ``attrs['vrt_holes']`` records the affected source(s).
-* Any other value raises ``ValueError`` naming the bad kwarg.
+* ``'warn'`` is the explicit opt-in. It emits ``GeoTIFFFallbackWarning``
+ naming the missing source and returns the mosaic with NaN (or the
+ band's nodata sentinel) in the corresponding region.
+ ``attrs['vrt_holes']`` records the affected source(s).
+* Any other value raises ``ValueError`` naming the bad kwarg and
+ echoing the bad value via ``repr()``.
+
+The companion file ``test_vrt_missing_sources_default_raise_1843.py``
+stays in place for now: it exercises the internal
+``xrspatial.geotiff._vrt.read_vrt`` entry point and the
+``XRSPATIAL_GEOTIFF_STRICT=1`` env-var override, neither of which is in
+this module's surface.
"""
from __future__ import annotations
-import os
import warnings
import numpy as np
@@ -45,15 +41,46 @@
PRESENT_FILL = 7.0
-def _build_partial_vrt(tmp_path) -> tuple[str, str, str]:
- """Build a 2-source VRT: left half is real, right half points at a
- non-existent file.
+# ---------------------------------------------------------------------------
+# VRT fixtures.
+#
+# Two shapes:
+#
+# * ``byte_missing_vrt`` -- a 2x2 ``Byte`` VRT whose only source does not
+# exist on disk. The smallest case that exercises the missing-source
+# guard. Inherited from the old eager-only smoke checks.
+# * ``partial_float_vrt`` -- an 8x4 ``Float32`` VRT split across two
+# sources. The left half points at a real GeoTIFF written through
+# ``to_geotiff``; the right half points at a missing file. Exercises
+# the NaN-fill / vrt_holes contract that the chunked path also has to
+# honour at compute time.
+# ---------------------------------------------------------------------------
+
+def _write_byte_missing_vrt(tmp_path) -> str:
+ """All-missing 2x2 Byte VRT. Returns the VRT path as ``str``."""
+ vrt = tmp_path / "byte_missing.vrt"
+ vrt.write_text(
+ '\n'
+ ' \n'
+ ' \n'
+ ' missing.tif'
+ '\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ '\n'
+ )
+ return str(vrt)
+
+
+def _write_partial_float_vrt(tmp_path) -> tuple[str, str, str]:
+ """Two-source partial mosaic.
- Returns ``(vrt_path, present_src_path, missing_path)``. Filenames
- embed issue #2367 to keep parallel test runs from colliding on
- shared tmp roots.
+ Returns ``(vrt_path, present_src_path, missing_path)`` as strings.
"""
- src = os.path.join(tmp_path, "src_2367_present.tif")
+ src = str(tmp_path / "src_present.tif")
arr = np.full((4, 4), PRESENT_FILL, dtype=np.float32)
da = xr.DataArray(
arr, dims=("y", "x"),
@@ -61,33 +88,32 @@ def _build_partial_vrt(tmp_path) -> tuple[str, str, str]:
)
to_geotiff(da, src)
- missing = os.path.join(tmp_path, "missing_2367.tif")
- vrt_path = os.path.join(tmp_path, "partial_2367.vrt")
- with open(vrt_path, "w") as f:
- f.write(
- '\n'
- '0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
- '\n'
- '\n'
- f'{src}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- f'{missing}\n'
- '1\n'
- '\n'
- '\n'
- '\n'
- '\n'
- '\n'
- )
- return vrt_path, src, missing
+ missing = str(tmp_path / "missing_source.tif")
+ vrt_path = tmp_path / "partial.vrt"
+ vrt_path.write_text(
+ '\n'
+ ' 0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n'
+ ' \n'
+ ' \n'
+ f' {src}\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ f' {missing}\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ '\n'
+ )
+ return str(vrt_path), src, missing
# ---------------------------------------------------------------------------
-# Reader-path fixtures. Each "reader" callable accepts ``(source,
+# Reader-path parametrisation. Each ``reader`` callable takes ``(source,
# **kwargs)`` and returns a DataArray. The eager reader returns a numpy-
# backed array; the dask reader returns a chunked DataArray that still
# needs ``.compute()`` to materialise values.
@@ -106,8 +132,8 @@ def _dask_reader(source, **kwargs):
READERS = [
- pytest.param(_eager_reader, id="eager_read_vrt"),
- pytest.param(_dask_reader, id="dask_open_geotiff_chunks"),
+ pytest.param(_eager_reader, id="eager"),
+ pytest.param(_dask_reader, id="dask"),
]
@@ -117,26 +143,31 @@ def _dask_reader(source, **kwargs):
class TestDefaultPolicyRaises:
"""No ``missing_sources`` kwarg -> ``FileNotFoundError`` naming the
- missing source. This is the public default since #1860 and the
- release matrix in #2342 calls it out as a hard contract."""
+ missing source. The public default since the lenient-by-default
+ behaviour was removed."""
@pytest.mark.parametrize("reader", READERS)
def test_default_raises_filenotfound_naming_source(
self, reader, tmp_path,
):
- vrt_path, _, missing = _build_partial_vrt(str(tmp_path))
+ vrt_path, _, missing = _write_partial_float_vrt(tmp_path)
with pytest.raises(FileNotFoundError) as excinfo:
reader(vrt_path)
# The basename of the missing source must appear in the
- # message. The chunked path quotes the full path; the eager
- # path may quote just the source filename or the resolved
- # absolute path depending on which guard fires first. Match on
- # the basename to stay portable across both.
- assert "missing_2367.tif" in str(excinfo.value), (
+ # message. Different code paths quote the full path vs just the
+ # filename; matching on the basename keeps this portable.
+ assert "missing_source.tif" in str(excinfo.value), (
f"Default policy raise must name the missing source. "
f"Got: {excinfo.value!r}"
)
+ def test_eager_byte_default_raises(self, tmp_path):
+ """Smoke check for the byte-band path with no real source on
+ disk."""
+ vrt = _write_byte_missing_vrt(tmp_path)
+ with pytest.raises((OSError, ValueError)):
+ read_vrt(vrt)
+
# ---------------------------------------------------------------------------
# Explicit raise: same shape as default.
@@ -149,14 +180,19 @@ class TestExplicitRaisePolicy:
@pytest.mark.parametrize("reader", READERS)
def test_explicit_raise_matches_default(self, reader, tmp_path):
- vrt_path, _, _ = _build_partial_vrt(str(tmp_path))
+ vrt_path, _, _ = _write_partial_float_vrt(tmp_path)
with pytest.raises(FileNotFoundError) as excinfo:
reader(vrt_path, missing_sources="raise")
- assert "missing_2367.tif" in str(excinfo.value)
+ assert "missing_source.tif" in str(excinfo.value)
+
+ def test_eager_byte_explicit_raise(self, tmp_path):
+ vrt = _write_byte_missing_vrt(tmp_path)
+ with pytest.raises((OSError, ValueError)):
+ read_vrt(vrt, missing_sources="raise")
# ---------------------------------------------------------------------------
-# Warn opt-in: warning class, message, and output values all pinned.
+# Warn opt-in: warning class, message, vrt_holes, and array values pinned.
# ---------------------------------------------------------------------------
class TestWarnPolicyEmitsWarningAndFillsNodata:
@@ -175,20 +211,16 @@ class TestWarnPolicyEmitsWarningAndFillsNodata:
"""
def test_eager_warn_emits_and_fills(self, tmp_path):
- vrt_path, _, missing = _build_partial_vrt(str(tmp_path))
- # Use ``match=`` for the class + message check in one step,
- # matching the sibling 1799 test's style.
+ vrt_path, _, missing = _write_partial_float_vrt(tmp_path)
with pytest.warns(
- GeoTIFFFallbackWarning, match="missing_2367.tif",
+ GeoTIFFFallbackWarning, match="missing_source.tif",
):
da = read_vrt(vrt_path, missing_sources="warn")
- # vrt_holes attr is populated and points at the missing file.
assert "vrt_holes" in da.attrs
sources = [h["source"] for h in da.attrs["vrt_holes"]]
- assert any(s.endswith("missing_2367.tif") for s in sources)
+ assert any(s.endswith("missing_source.tif") for s in sources)
- # Output values: present half == 7.0, missing half == NaN.
out = np.asarray(da)
np.testing.assert_array_equal(
out[:, :4], np.full((4, 4), PRESENT_FILL, dtype=np.float32),
@@ -199,9 +231,9 @@ def test_eager_warn_emits_and_fills(self, tmp_path):
)
def test_dask_warn_emits_at_compute_and_fills(self, tmp_path):
- vrt_path, _, missing = _build_partial_vrt(str(tmp_path))
- # Build the lazy DataArray. The parse-time sweep populates
- # ``vrt_holes`` here without forcing a decode.
+ vrt_path, _, missing = _write_partial_float_vrt(tmp_path)
+ # The parse-time sweep populates ``vrt_holes`` at build so
+ # callers can branch on partial mosaics without computing.
da = open_geotiff(
vrt_path, chunks=4, missing_sources="warn",
)
@@ -218,7 +250,7 @@ def test_dask_warn_emits_at_compute_and_fills(self, tmp_path):
str(w.message) for w in caught
if isinstance(w.message, GeoTIFFFallbackWarning)
]
- assert any("missing_2367.tif" in m for m in msgs), (
+ assert any("missing_source.tif" in m for m in msgs), (
f"Chunked warn path must emit GeoTIFFFallbackWarning at "
f"compute naming the missing source; got: {msgs!r}"
)
@@ -229,6 +261,15 @@ def test_dask_warn_emits_at_compute_and_fills(self, tmp_path):
)
assert np.all(np.isnan(out[:, 4:]))
+ def test_eager_byte_warn_records_hole(self, tmp_path):
+ """Byte-band warn path: warning fires and ``vrt_holes`` is
+ populated even when there is no present half."""
+ vrt = _write_byte_missing_vrt(tmp_path)
+ with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
+ da = read_vrt(vrt, missing_sources="warn")
+ assert "vrt_holes" in da.attrs
+ assert da.attrs["vrt_holes"][0]["source"].endswith("missing.tif")
+
# ---------------------------------------------------------------------------
# Invalid policy strings.
@@ -250,7 +291,7 @@ class TestInvalidPolicyRejected:
def test_invalid_policy_raises_value_error_naming_value(
self, reader, bad_value, tmp_path,
):
- vrt_path, _, _ = _build_partial_vrt(str(tmp_path))
+ vrt_path, _, _ = _write_partial_float_vrt(tmp_path)
with pytest.raises(ValueError) as excinfo:
reader(vrt_path, missing_sources=bad_value)
msg = str(excinfo.value)
@@ -265,3 +306,12 @@ def test_invalid_policy_raises_value_error_naming_value(
f"ValueError must echo the bad value back to the caller; "
f"got {msg!r}"
)
+
+ def test_eager_byte_invalid_policy(self, tmp_path):
+ """Byte-band smoke check. The parametrised matrix above covers
+ more bad values across both reader paths; this stays as a
+ literal copy of the original assertion so the byte-band code
+ path stays exercised."""
+ vrt = _write_byte_missing_vrt(tmp_path)
+ with pytest.raises(ValueError, match="missing_sources"):
+ read_vrt(vrt, missing_sources="ignore")