Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 40 additions & 15 deletions xrspatial/geotiff/_backends/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,10 @@ def read_geotiff_gpu(source: str, *,
# Append sibling `.tif.ovr` sidecar IFDs onto the pyramid list so
# ``overview_level`` indexes both internal and external overviews
# (issue #2112). When the selected IFD comes from the sidecar,
# we swap ``data`` / ``header`` to the sidecar's buffers below
# and skip the GDS fast path -- GDS reads the source file path,
# which would point at the base file rather than the sidecar.
# tile / strip reads below use ``ifd_data`` / ``ifd_header`` for
# the sidecar's buffers, and we skip the GDS fast path -- GDS
# reads the source file path, which would point at the base
# file rather than the sidecar.
from .._sidecar import attach_sidecar_origin, close_sidecar, find_sidecar, load_sidecar
sidecar_origin: dict[int, tuple] = {}
sidecar_path = find_sidecar(source)
Expand All @@ -388,21 +389,36 @@ def read_geotiff_gpu(source: str, *,
# Skip mask IFDs (NewSubfileType bit 2)
ifd = select_overview_ifd(ifds, overview_level)

# Swap base data / header for the sidecar's buffers when the
# requested overview level lives in the sidecar. Subsequent
# tile / strip reads slice the right buffer.
# Keep ``data`` / ``header`` bound to the base file's buffers so
# the georef extractor below resolves base-IFD tag offsets
# against the right bytes. Introduce ``ifd_data`` / ``ifd_header``
# for tile / strip reads that need the sidecar buffer when the
# selected IFD lives in the sidecar (issue #2324).
ifd_data, ifd_header = data, header
origin = sidecar_origin.get(id(ifd))
if origin is not None:
data, header = origin
ifd_data, ifd_header = origin
sidecar_owned_ifd = True

bps = resolve_bits_per_sample(ifd.bits_per_sample)
file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
# Inherit georef from the level-0 IFD when the overview itself
# has no geokeys (issue #1640); pass-through for level 0.
# ``sidecar_origin`` routes tag-offset parsing to the sidecar
# bytes for any IFD that lives in the sidecar AND declares its
# own georef payload, mirroring the CPU eager path
# (``_reader.py``) and dask path (``__init__.py``). Without
# this, a sidecar-owned IFD that lacks geokeys would parse the
# base IFD against the sidecar bytes (issue #2324).
georef_origin = (
{iid: (od, oh.byte_order)
for iid, (od, oh) in sidecar_origin.items()}
if sidecar_origin else None
)
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, data, header.byte_order,
allow_rotated=allow_rotated)
allow_rotated=allow_rotated,
sidecar_origin=georef_origin)
# Capture the Orientation tag (274) once so the post-decode flip
# below picks it up for both the stripped fallback and the tiled
# GPU pipelines. CPU read_to_array applies the array remap +
Expand Down Expand Up @@ -670,7 +686,7 @@ def _read_once():
source, _read_once, band_offsets, band_byte_counts,
tw, th, width, height,
compression, predictor, file_dtype,
byte_order=header.byte_order,
byte_order=ifd_header.byte_order,
gpu=gpu,
)
if band_arr is None:
Expand Down Expand Up @@ -734,7 +750,7 @@ def _read_once():
source, offsets, byte_counts,
tw, th, width, height,
compression, predictor, file_dtype, samples,
byte_order=header.byte_order,
byte_order=ifd_header.byte_order,
masked_fill=masked_fill,
)
except Exception as e:
Expand All @@ -750,12 +766,13 @@ def _read_once():

if arr_gpu is None:
# Fallback: extract tiles via CPU mmap, then GPU decode. For
# sidecar IFDs the tile bytes already live in ``data`` (loaded
# from the .ovr above); re-opening ``source`` would point at the
# base file. Use the in-scope ``data`` directly in that case.
# sidecar IFDs the tile bytes already live in ``ifd_data``
# (loaded from the .ovr above); re-opening ``source`` would
# point at the base file. Use ``ifd_data`` directly in that
# case.
if sidecar_owned_ifd:
compressed_tiles = [
bytes(data[offsets[i]:offsets[i] + byte_counts[i]])
bytes(ifd_data[offsets[i]:offsets[i] + byte_counts[i]])
for i in range(len(offsets))
]
else:
Expand All @@ -775,7 +792,7 @@ def _read_once():
compressed_tiles,
tw, th, width, height,
compression, predictor, file_dtype, samples,
byte_order=header.byte_order,
byte_order=ifd_header.byte_order,
masked_fill=masked_fill,
)
except Exception as e:
Expand Down Expand Up @@ -1262,9 +1279,17 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level,
if not ifds:
raise ValueError("No IFDs found in TIFF file")
ifd = select_overview_ifd(ifds, overview_level)
# The GDS qualification probe parses base-file IFDs only --
# sidecar files do not qualify for the disk->GPU fast path
# and the chunked path falls through to ``read_geotiff_dask``
# which carries its own sidecar handling. Pass
# ``sidecar_origin=None`` explicitly so all four call sites
# of this helper share the same call shape (review nit
# on #2324).
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, raw, header.byte_order,
allow_rotated=allow_rotated,
sidecar_origin=None,
)
orientation = ifd.orientation
has_sparse_tile = (
Expand Down
165 changes: 165 additions & 0 deletions xrspatial/geotiff/tests/test_gpu_sidecar_georef_parity_2324.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""GPU + sidecar georef-attr parity across backends (issue #2324).

The GPU eager path used to overwrite the local ``data`` / ``header``
references with the sidecar's buffer before calling
``extract_geo_info_with_overview_inheritance``. For the common GDAL
sidecar case where overview IFDs lack their own geokeys, the helper
then parsed the inherited base IFD against the sidecar bytes -- the
returned georef came out silently wrong, while the CPU eager and dask
paths handled it correctly.

These tests load the same ``.tif`` + ``.tif.ovr`` pair via the three
backends and assert georef attrs match. The GPU portion skips cleanly
when cupy / CUDA is unavailable so the test runs on CI without a GPU
and still guards the metadata code on a real GPU box.

Test fixture helpers (sidecar writer, NewSubfileType insertion, georef
tag stripping) are shared with ``test_sidecar_own_geokeys_2315.py`` --
imported rather than duplicated so a fixture-format change there does
not silently desync this file.
"""
from __future__ import annotations

import importlib.util

import numpy as np
import pytest

from xrspatial.geotiff import open_geotiff, read_geotiff_dask

from .test_sidecar_own_geokeys_2315 import _write_pair


def _gpu_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()
_gpu_only = pytest.mark.skipif(
not _HAS_GPU, reason="cupy + CUDA required",
)


def _attrs_subset(da):
"""Pull the georef attrs we want to compare across backends.

Coerce transform elements to plain ``float`` so an attr stored as a
numpy array (some backends round-trip through ``np.asarray``) does
not produce numpy scalars in the tuple. The equality check below
would still pass for matching values but the comparison semantics
are clearer with native Python floats.
"""
transform = da.attrs.get("transform", ())
return {
"transform": tuple(float(x) for x in transform),
"crs": da.attrs.get("crs"),
}


def test_sidecar_without_geokeys_attrs_match_cpu_vs_dask_2324(tmp_path):
"""Baseline: CPU eager and dask agree on inherited georef.

Pre-#2324 already passes; included so the parity matrix is complete
and a future regression on either path surfaces here too. The GPU
half lives in ``test_sidecar_without_geokeys_gpu_matches_cpu_2324``.
"""
base_geo = dict(origin_x=100.0, origin_y=200.0,
pixel_w=10.0, pixel_h=-10.0, epsg=4326)
side_geo = dict(origin_x=500.0, origin_y=800.0,
pixel_w=2.5, pixel_h=-2.5, epsg=3857)

base, _ = _write_pair(
tmp_path,
base_geo=base_geo, sidecar_geo=side_geo,
sidecar_has_geokeys=False,
)

cpu = open_geotiff(str(base), overview_level=1)
dsk = read_geotiff_dask(str(base), overview_level=1, chunks=2)

assert _attrs_subset(cpu) == _attrs_subset(dsk)
# And the inherited transform is what we expect from the base file
# (pixel size rescaled by 8/4 = 2 for the 4x4 overview).
t = cpu.attrs["transform"]
assert t[0] == pytest.approx(20.0)
assert t[2] == pytest.approx(100.0)
assert t[4] == pytest.approx(-20.0)
assert t[5] == pytest.approx(200.0)
assert cpu.attrs.get("crs") == 4326


@_gpu_only
def test_sidecar_without_geokeys_gpu_matches_cpu_2324(tmp_path):
"""Regression for #2324: GPU eager georef matches CPU / dask.

The sidecar is stripped of its georef tags, so the inheritance
helper has to walk back to the base IFD. Pre-fix, the GPU path
handed the helper the sidecar bytes (because ``data, header`` got
swapped before the call) and parsed the base IFD's georef offsets
against the sidecar buffer -- silently corrupt transform / crs.
"""
from xrspatial.geotiff import read_geotiff_gpu

base_geo = dict(origin_x=100.0, origin_y=200.0,
pixel_w=10.0, pixel_h=-10.0, epsg=4326)
side_geo = dict(origin_x=500.0, origin_y=800.0,
pixel_w=2.5, pixel_h=-2.5, epsg=3857)

base, _ = _write_pair(
tmp_path,
base_geo=base_geo, sidecar_geo=side_geo,
sidecar_has_geokeys=False,
)

cpu = open_geotiff(str(base), overview_level=1)
gpu = read_geotiff_gpu(str(base), overview_level=1)

# Same georef attrs as the CPU path.
assert _attrs_subset(gpu) == _attrs_subset(cpu)
# And pixel data also matches (sidecar bytes are still consumed for
# the actual decode; this confirms the fix did not break the tile
# read path while moving the variable rename).
np.testing.assert_array_equal(np.asarray(gpu.data.get()),
np.asarray(cpu.data))


@_gpu_only
def test_sidecar_with_own_geokeys_gpu_matches_cpu_2324(tmp_path):
"""GPU path routes a sidecar-owned georef payload to sidecar bytes.

When the sidecar IFD declares its own georef payload the helper's
``sidecar_origin`` argument routes the parse to the sidecar buffer.
The CPU eager / dask paths already pass the mapping; the GPU eager
path now passes it too. The sidecar's transform must win for both
backends.
"""
from xrspatial.geotiff import read_geotiff_gpu

base_geo = dict(origin_x=100.0, origin_y=200.0,
pixel_w=10.0, pixel_h=-10.0, epsg=4326)
side_geo = dict(origin_x=500.0, origin_y=800.0,
pixel_w=2.5, pixel_h=-2.5, epsg=3857)

base, _ = _write_pair(
tmp_path,
base_geo=base_geo, sidecar_geo=side_geo,
sidecar_has_geokeys=True,
)

cpu = open_geotiff(str(base), overview_level=1)
gpu = read_geotiff_gpu(str(base), overview_level=1)

assert _attrs_subset(gpu) == _attrs_subset(cpu)
# Sidecar's transform won on both sides.
t = gpu.attrs["transform"]
assert t[0] == pytest.approx(2.5)
assert t[2] == pytest.approx(500.0)
assert t[4] == pytest.approx(-2.5)
assert t[5] == pytest.approx(800.0)
assert gpu.attrs.get("crs") == 3857
Loading