Skip to content

Commit ae4dca4

Browse files
committed
geotiff: GPU + dask+GPU coverage for predictor=3 + integer reject (#1933)
#1933 added _validate_predictor_sample_format and wired it into every IFD-read site, including two GPU sites that had no targeted tests: - _backends/gpu.py:443 -- tiled eager GPU validator - _backends/gpu.py:999 -- GDS chunked GPU validator The eager and dask paths are covered by test_predictor3_int_dtype_1933. A regression dropping either GPU validator call would let malformed predictor=3 + integer tiled files decode silently to garbage bytes on GPU and ship under existing CI. Adds 9 tests, all passing on a CUDA host: - read_geotiff_gpu on stripped + tiled malformed files - open_geotiff(gpu=True) and open_geotiff(chunks=, gpu=True) dispatchers - read_geotiff_gpu(chunks=) on stripped + tiled - legitimate predictor=3 + float32 tiled file still round-trips on GPU (eager + dask+GPU) - error-message parity between GPU and eager paths Mutation against the tiled GPU validator at gpu.py:443 flipped the tiled-raises test red; mutation against the GDS chunked validator at gpu.py:999 flipped the chunked-tiled + chunked-dispatcher tests red.
1 parent 31c8f77 commit ae4dca4

1 file changed

Lines changed: 299 additions & 0 deletions

File tree

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
"""GPU + dask+GPU backend coverage for issue #1933.
2+
3+
#1933 added ``_validate_predictor_sample_format`` and wired it into
4+
every IFD-read site (eager numpy, dask, GPU tiled, GPU stripped). The
5+
eager and dask paths are covered by ``test_predictor3_int_dtype_1933``;
6+
this module closes the GPU coverage gap.
7+
8+
The validator is invoked at two GPU sites:
9+
10+
* ``_backends/gpu.py:443`` -- the tiled eager GPU read path. Reached when
11+
the file is tiled and ``bps == file_dtype.itemsize * 8`` (so the
12+
bps_mismatch fallback at line 358 does not take over).
13+
* ``_backends/gpu.py:999`` -- the GDS chunked GPU path
14+
(``_read_geotiff_gpu_chunked_gds``). Reached when the file qualifies
15+
for direct disk->GPU decode.
16+
17+
The stripped GPU path falls back to CPU via ``_read_to_array`` and the
18+
CPU-side validator there fires; the dask+GPU non-GDS path delegates to
19+
``read_geotiff_dask`` which has its own validator (covered by the
20+
existing dask test). The two NEW call sites have no targeted tests.
21+
22+
A regression dropping either of those two validator calls would let
23+
malformed predictor=3 + integer tiled files decode silently to
24+
garbage bytes on GPU. The eager-test asserts the error path is wired
25+
on CPU; this module asserts the GPU dispatcher path is wired too.
26+
"""
27+
from __future__ import annotations
28+
29+
import importlib.util
30+
31+
import numpy as np
32+
import pytest
33+
34+
from xrspatial.geotiff._compression import COMPRESSION_NONE
35+
from xrspatial.geotiff._dtypes import LONG, SHORT, numpy_to_tiff_dtype
36+
from xrspatial.geotiff._header import (
37+
TAG_BITS_PER_SAMPLE,
38+
TAG_COMPRESSION,
39+
TAG_IMAGE_LENGTH,
40+
TAG_IMAGE_WIDTH,
41+
TAG_PHOTOMETRIC,
42+
TAG_PREDICTOR,
43+
TAG_SAMPLE_FORMAT,
44+
TAG_SAMPLES_PER_PIXEL,
45+
TAG_STRIP_BYTE_COUNTS,
46+
TAG_STRIP_OFFSETS,
47+
TAG_ROWS_PER_STRIP,
48+
TAG_TILE_BYTE_COUNTS,
49+
TAG_TILE_LENGTH,
50+
TAG_TILE_OFFSETS,
51+
TAG_TILE_WIDTH,
52+
)
53+
from xrspatial.geotiff._writer import (
54+
_assemble_standard_layout,
55+
_write_stripped,
56+
)
57+
58+
59+
def _gpu_available() -> bool:
60+
if importlib.util.find_spec("cupy") is None:
61+
return False
62+
try:
63+
import cupy
64+
65+
return bool(cupy.cuda.is_available())
66+
except Exception:
67+
return False
68+
69+
70+
_HAS_GPU = _gpu_available()
71+
pytestmark = pytest.mark.skipif(
72+
not _HAS_GPU, reason="cupy + CUDA required",
73+
)
74+
75+
76+
def _build_predictor3_uint32_stripped_tiff(arr: np.ndarray) -> bytes:
77+
"""Build a stripped TIFF: predictor=3 + uint32 SampleFormat=1.
78+
79+
Mirrors the helper in ``test_predictor3_int_dtype_1933`` so the GPU
80+
coverage gap can be exercised against the same shape of malformed
81+
file the eager test uses. Compression is COMPRESSION_NONE so the
82+
strip bytes are exactly the raw integer values.
83+
"""
84+
rel_off, bc, chunks = _write_stripped(arr, COMPRESSION_NONE, False)
85+
bits_per_sample, _ = numpy_to_tiff_dtype(arr.dtype)
86+
tags = [
87+
(TAG_IMAGE_WIDTH, LONG, 1, arr.shape[1]),
88+
(TAG_IMAGE_LENGTH, LONG, 1, arr.shape[0]),
89+
(TAG_BITS_PER_SAMPLE, SHORT, 1, bits_per_sample),
90+
(TAG_COMPRESSION, SHORT, 1, COMPRESSION_NONE),
91+
(TAG_PHOTOMETRIC, SHORT, 1, 1),
92+
(TAG_SAMPLES_PER_PIXEL, SHORT, 1, 1),
93+
(TAG_SAMPLE_FORMAT, SHORT, 1, 1),
94+
(TAG_PREDICTOR, SHORT, 1, 3),
95+
(TAG_ROWS_PER_STRIP, SHORT, 1, arr.shape[0]),
96+
(TAG_STRIP_OFFSETS, LONG, len(rel_off), rel_off),
97+
(TAG_STRIP_BYTE_COUNTS, LONG, len(bc), bc),
98+
]
99+
parts = [(arr, arr.shape[1], arr.shape[0], rel_off, bc, chunks)]
100+
return _assemble_standard_layout(8, [tags], parts, bigtiff=False)
101+
102+
103+
def _build_predictor3_uint32_tiled_tiff(
104+
arr: np.ndarray, tile_w: int = 16, tile_h: int = 16,
105+
) -> bytes:
106+
"""Build a tiled malformed TIFF: predictor=3 + uint32 SampleFormat=1.
107+
108+
The tiled layout is the one that reaches the GPU validator at
109+
``_backends/gpu.py:443`` (no bps_mismatch fallback). Tile size is
110+
16x16, the smallest tifffile/standard tile size.
111+
"""
112+
bits_per_sample, _ = numpy_to_tiff_dtype(arr.dtype)
113+
h, w = arr.shape
114+
115+
tiles_across = (w + tile_w - 1) // tile_w
116+
tiles_down = (h + tile_h - 1) // tile_h
117+
tiles: list[bytes] = []
118+
rel_off: list[int] = []
119+
bc: list[int] = []
120+
offset = 0
121+
for tr in range(tiles_down):
122+
for tc in range(tiles_across):
123+
r0 = tr * tile_h
124+
c0 = tc * tile_w
125+
r1 = min(r0 + tile_h, h)
126+
c1 = min(c0 + tile_w, w)
127+
tile_slice = arr[r0:r1, c0:c1]
128+
if tile_slice.shape != (tile_h, tile_w):
129+
padded = np.zeros((tile_h, tile_w), dtype=arr.dtype)
130+
padded[: tile_slice.shape[0], : tile_slice.shape[1]] = (
131+
tile_slice)
132+
tile_arr = padded
133+
else:
134+
tile_arr = np.ascontiguousarray(tile_slice)
135+
chunk = tile_arr.tobytes()
136+
rel_off.append(offset)
137+
bc.append(len(chunk))
138+
tiles.append(chunk)
139+
offset += len(chunk)
140+
141+
tags = [
142+
(TAG_IMAGE_WIDTH, LONG, 1, w),
143+
(TAG_IMAGE_LENGTH, LONG, 1, h),
144+
(TAG_BITS_PER_SAMPLE, SHORT, 1, bits_per_sample),
145+
(TAG_COMPRESSION, SHORT, 1, COMPRESSION_NONE),
146+
(TAG_PHOTOMETRIC, SHORT, 1, 1),
147+
(TAG_SAMPLES_PER_PIXEL, SHORT, 1, 1),
148+
(TAG_SAMPLE_FORMAT, SHORT, 1, 1),
149+
(TAG_PREDICTOR, SHORT, 1, 3),
150+
(TAG_TILE_WIDTH, LONG, 1, tile_w),
151+
(TAG_TILE_LENGTH, LONG, 1, tile_h),
152+
(TAG_TILE_OFFSETS, LONG, len(rel_off), rel_off),
153+
(TAG_TILE_BYTE_COUNTS, LONG, len(bc), bc),
154+
]
155+
parts = [(arr, w, h, rel_off, bc, tiles)]
156+
return _assemble_standard_layout(8, [tags], parts, bigtiff=False)
157+
158+
159+
class TestGPUEagerRejectsMalformedFile:
160+
"""``read_geotiff_gpu`` rejects predictor=3 + integer SampleFormat."""
161+
162+
def test_gpu_eager_stripped_raises(self, tmp_path):
163+
from xrspatial.geotiff import read_geotiff_gpu
164+
165+
arr = np.array(
166+
[[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint32)
167+
path = tmp_path / "pred3_uint32_stripped.tif"
168+
path.write_bytes(_build_predictor3_uint32_stripped_tiff(arr))
169+
with pytest.raises(ValueError, match="Predictor=3"):
170+
read_geotiff_gpu(str(path))
171+
172+
def test_gpu_eager_tiled_raises(self, tmp_path):
173+
"""Tiled layout hits the tiled GPU validator at gpu.py:443.
174+
175+
Distinct from the stripped fallback path -- a regression
176+
dropping the line 443 call would leak through this test
177+
because the stripped path's validator lives in
178+
``_read_to_array`` and would still raise.
179+
"""
180+
from xrspatial.geotiff import read_geotiff_gpu
181+
182+
arr = np.arange(256, dtype=np.uint32).reshape(16, 16)
183+
path = tmp_path / "pred3_uint32_tiled.tif"
184+
path.write_bytes(_build_predictor3_uint32_tiled_tiff(arr))
185+
with pytest.raises(ValueError, match="Predictor=3"):
186+
read_geotiff_gpu(str(path))
187+
188+
def test_gpu_dispatcher_eager_raises(self, tmp_path):
189+
"""``open_geotiff(gpu=True)`` dispatcher rejects the file."""
190+
from xrspatial.geotiff import open_geotiff
191+
192+
arr = np.arange(64, dtype=np.uint32).reshape(8, 8)
193+
path = tmp_path / "pred3_uint32_dispatch.tif"
194+
path.write_bytes(_build_predictor3_uint32_stripped_tiff(arr))
195+
with pytest.raises(ValueError, match="Predictor=3"):
196+
open_geotiff(str(path), gpu=True)
197+
198+
199+
class TestGPUChunkedRejectsMalformedFile:
200+
"""The dask+GPU paths also reject predictor=3 + integer."""
201+
202+
def test_read_geotiff_gpu_chunked_stripped_raises(self, tmp_path):
203+
from xrspatial.geotiff import read_geotiff_gpu
204+
205+
arr = np.arange(64, dtype=np.uint32).reshape(8, 8)
206+
path = tmp_path / "pred3_uint32_chunked_str.tif"
207+
path.write_bytes(_build_predictor3_uint32_stripped_tiff(arr))
208+
with pytest.raises(ValueError, match="Predictor=3"):
209+
read_geotiff_gpu(str(path), chunks=4)
210+
211+
def test_read_geotiff_gpu_chunked_tiled_raises(self, tmp_path):
212+
"""Tiled chunked path: routes through ``_read_geotiff_gpu_chunked``.
213+
214+
With KvikIO usable, qualification calls
215+
``_read_geotiff_gpu_chunked_gds`` which invokes the validator at
216+
gpu.py:999 during graph construction; without KvikIO, the CPU
217+
dask fallback raises with the same message. Either way the
218+
caller sees the malformed-file rejection. The test pins the
219+
contract rather than the dispatch detail.
220+
"""
221+
from xrspatial.geotiff import read_geotiff_gpu
222+
223+
arr = np.arange(256, dtype=np.uint32).reshape(16, 16)
224+
path = tmp_path / "pred3_uint32_chunked_tiled.tif"
225+
path.write_bytes(_build_predictor3_uint32_tiled_tiff(arr))
226+
with pytest.raises(ValueError, match="Predictor=3"):
227+
read_geotiff_gpu(str(path), chunks=16)
228+
229+
def test_open_geotiff_chunks_gpu_dispatcher_raises(self, tmp_path):
230+
"""``open_geotiff(chunks=, gpu=True)`` dispatcher rejects the file."""
231+
from xrspatial.geotiff import open_geotiff
232+
233+
arr = np.arange(256, dtype=np.uint32).reshape(16, 16)
234+
path = tmp_path / "pred3_uint32_chunked_dispatch.tif"
235+
path.write_bytes(_build_predictor3_uint32_tiled_tiff(arr))
236+
with pytest.raises(ValueError, match="Predictor=3"):
237+
open_geotiff(str(path), chunks=8, gpu=True)
238+
239+
240+
class TestValidPredictor3StillWorksOnGPU:
241+
"""A legitimate predictor=3 + float32 tiled file still decodes on GPU."""
242+
243+
def test_predictor3_float32_gpu_round_trip(self, tmp_path):
244+
tifffile = pytest.importorskip("tifffile")
245+
pytest.importorskip("imagecodecs")
246+
247+
from xrspatial.geotiff import read_geotiff_gpu
248+
249+
arr = np.linspace(-1.0, 1.0, 256, dtype=np.float32).reshape(16, 16)
250+
path = tmp_path / "pred3_float32_tiled.tif"
251+
tifffile.imwrite(
252+
str(path), arr, predictor=3, compression="deflate",
253+
tile=(16, 16))
254+
255+
result = read_geotiff_gpu(str(path))
256+
assert result.dtype == np.float32
257+
np.testing.assert_array_equal(result.data.get(), arr)
258+
259+
def test_predictor3_float32_dask_gpu_round_trip(self, tmp_path):
260+
tifffile = pytest.importorskip("tifffile")
261+
pytest.importorskip("imagecodecs")
262+
263+
from xrspatial.geotiff import read_geotiff_gpu
264+
265+
arr = np.linspace(-1.0, 1.0, 256, dtype=np.float32).reshape(16, 16)
266+
path = tmp_path / "pred3_float32_dask.tif"
267+
tifffile.imwrite(
268+
str(path), arr, predictor=3, compression="deflate",
269+
tile=(16, 16))
270+
271+
result = read_geotiff_gpu(str(path), chunks=8)
272+
assert result.dtype == np.float32
273+
np.testing.assert_array_equal(result.compute().data.get(), arr)
274+
275+
276+
class TestErrorMessageStable:
277+
"""The GPU error wording matches the eager/dask wording.
278+
279+
Cross-backend error parity is a real concern -- a regression that
280+
fired the validator on GPU but with a different message would force
281+
callers to special-case the backend on ``except ValueError``.
282+
"""
283+
284+
def test_gpu_error_message_matches_eager(self, tmp_path):
285+
from xrspatial.geotiff import open_geotiff, read_geotiff_gpu
286+
287+
arr = np.arange(64, dtype=np.uint32).reshape(8, 8)
288+
path = tmp_path / "pred3_uint32_msg.tif"
289+
path.write_bytes(_build_predictor3_uint32_stripped_tiff(arr))
290+
291+
with pytest.raises(ValueError) as exc_eager:
292+
open_geotiff(str(path))
293+
with pytest.raises(ValueError) as exc_gpu:
294+
read_geotiff_gpu(str(path))
295+
296+
assert str(exc_eager.value) == str(exc_gpu.value), (
297+
"GPU and eager paths must surface the same Predictor=3 "
298+
"error message so callers can use a single except branch."
299+
)

0 commit comments

Comments
 (0)