Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/reference/geotiff_release_contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ category. The `Key` column matches the runtime key.
| `reader.http` | advanced | Plain HTTP/HTTPS reads; SSRF and private-host filters apply. |
| `reader.http_cog` | advanced | HTTP COG with range-request fetching. The transport surface (redirects, retries) is not yet contracted at the stable bar. |
| `reader.vrt` | advanced | Simple VRT mosaics. Full GDAL VRT parity is out of scope. |
| `reader.sidecar_ovr` | advanced | External `.tif.ovr` sidecar overviews. |
| `reader.sidecar_ovr` | advanced | External `.tif.ovr` sidecar overviews. A stale or malformed sidecar does not break a base read: the eager CPU, eager GPU, and dask metadata paths warn and fall through to base-file-only behaviour for `overview_level=None`/`0`. Requesting a specific external level surfaces the underlying parse error. Caller-set `max_cloud_bytes` breaches still raise `CloudSizeLimitError` either way. See issue #2416. |
| `reader.allow_rotated` | experimental | Opt-in `allow_rotated=True`; drops the axis-aligned `transform` attr in favour of `rotated_affine`. |
| `reader.allow_unparseable_crs` | experimental | Opt-in escape hatch for CRS strings pyproj cannot parse. |
| `reader.gpu` | experimental | GPU read path; no cross-backend numerical parity claim. |
Expand Down
56 changes: 43 additions & 13 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError,
UnsupportedGeoTIFFFeatureError)
from ._geotags import RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT, GeoTransform # noqa: F401
from ._reader import _MAX_CLOUD_BYTES_SENTINEL, UnsafeURLError
from ._reader import _MAX_CLOUD_BYTES_SENTINEL, CloudSizeLimitError, UnsafeURLError
from ._reader import read_to_array as _read_to_array
from ._runtime import (_CRS_WKT_DEPRECATED_SENTINEL, _GPU_DEPRECATED_SENTINEL, # noqa: F401
_MISSING_SOURCES_SENTINEL, _ON_GPU_FAILURE_SENTINEL, GeoTIFFFallbackWarning,
Expand Down Expand Up @@ -275,22 +275,52 @@ def _read_geo_info(source, *, overview_level: int | None = None,
# Append sibling `.tif.ovr` sidecar IFDs onto the pyramid list
# so ``overview_level`` indexes both internal and external
# overviews (issue #2112). Local file paths only.
#
# A broken sidecar must not break the base read. The release
# contract puts ``reader.local_file`` at the stable tier and
# ``reader.sidecar_ovr`` at advanced; a stale or corrupt
# ``.ovr`` written by an external tool falls back to base-only
# behaviour with a warning. Mirrors the eager CPU path in
# ``_reader._read_to_array`` and the dask metadata helper
# ``_sidecar.discover_remote_sidecar``. Issue #2416.
from ._sidecar import attach_sidecar_origin, find_sidecar, load_sidecar
sidecar_origin: dict[int, tuple] = {}
sidecar_path = find_sidecar(source)
if sidecar_path is not None:
sidecar = load_sidecar(sidecar_path)
# The origin mapping is consumed below for georef extraction
# only -- strip/tile bytes are sliced by ``read_to_array`` on
# the actual read. A sidecar IFD that carries its own
# GeoKeyDirectory / ModelPixelScale / ModelTiepoint /
# ModelTransformation needs the sidecar's byte order to
# parse cleanly; without the mapping the helper falls back
# to the base file's bytes (today's default, correct under
# the usual GDAL convention). See issue #2315.
sidecar_origin = attach_sidecar_origin(
sidecar.ifds, sidecar.data, sidecar.header)
ifds = ifds + sidecar.ifds
try:
sidecar = load_sidecar(sidecar_path)
except CloudSizeLimitError:
# Re-raised for symmetry with ``_reader._read_to_array``;
# the byte budget is a caller-set contract. In practice
# this branch is local-file-only (the cloud / HTTP cases
# are handled in the earlier ``_parse_cog_http_meta`` /
# ``_CloudSource`` branch above) so the exception cannot
# fire from a local mmap today, but keeping the explicit
# re-raise prevents the symmetry breaking if a future
# patch routes a cloud-source path through here.
raise
except Exception as exc:
warnings.warn(
f"Ignoring unreadable sidecar {sidecar_path!r}: "
f"{type(exc).__name__}: {exc}. Falling back to "
f"base-file-only read. Delete the .ovr file or pass "
f"overview_level>=1 to surface the parse error.",
RuntimeWarning,
stacklevel=3,
)
sidecar = None
if sidecar is not None:
# The origin mapping is consumed below for georef extraction
# only -- strip/tile bytes are sliced by ``read_to_array`` on
# the actual read. A sidecar IFD that carries its own
# GeoKeyDirectory / ModelPixelScale / ModelTiepoint /
# ModelTransformation needs the sidecar's byte order to
# parse cleanly; without the mapping the helper falls back
# to the base file's bytes (today's default, correct under
# the usual GDAL convention). See issue #2315.
sidecar_origin = attach_sidecar_origin(
sidecar.ifds, sidecar.data, sidecar.header)
ifds = ifds + sidecar.ifds
ifd = select_overview_ifd(ifds, overview_level)
# Inherit georef from the level-0 IFD when the overview itself
# has no geokeys (issue #1640). Pass-through for level 0. The
Expand Down
36 changes: 31 additions & 5 deletions xrspatial/geotiff/_backends/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from .._attrs import _finalize_eager_read, _finalize_lazy_read_attrs
from .._coords import coords_from_geo_info as _coords_from_geo_info
from .._nodata import NodataLifecycle as _NL
from .._reader import _MAX_CLOUD_BYTES_SENTINEL, _coerce_path
from .._reader import _MAX_CLOUD_BYTES_SENTINEL, CloudSizeLimitError, _coerce_path
from .._reader import read_to_array as _read_to_array
from .._runtime import (_GPU_DEPRECATED_SENTINEL, _MISSING_SOURCES_SENTINEL,
_ON_GPU_FAILURE_SENTINEL, _geotiff_strict_mode)
Expand Down Expand Up @@ -406,14 +406,40 @@ def read_geotiff_gpu(source: str, *,
# the sidecar's buffers, and we skip the GDS fast path -- GDS
# reads the source file path, which would point at the base
# file rather than the sidecar.
#
# A broken sidecar must not break a base read here either. The
# release contract puts ``reader.local_file`` at the stable tier
# and ``reader.sidecar_ovr`` at advanced; matches the eager CPU
# path in ``_reader._read_to_array`` and the dask metadata
# helper ``_sidecar.discover_remote_sidecar``. Issue #2416.
from .._sidecar import attach_sidecar_origin, close_sidecar, find_sidecar, load_sidecar
sidecar_origin: dict[int, tuple] = {}
sidecar_path = find_sidecar(source)
if sidecar_path is not None:
sidecar = load_sidecar(sidecar_path)
sidecar_origin = attach_sidecar_origin(
sidecar.ifds, sidecar.data, sidecar.header)
ifds = ifds + sidecar.ifds
try:
sidecar = load_sidecar(sidecar_path)
except CloudSizeLimitError:
# Re-raised for symmetry with ``_reader._read_to_array``;
# the byte budget is a caller-set contract. The GPU eager
# path operates on a local mmap source today so the
# exception cannot fire here, but keeping the explicit
# re-raise prevents the symmetry breaking if a future
# patch routes a cloud-source path through here.
raise
except Exception as exc:
warnings.warn(
f"Ignoring unreadable sidecar {sidecar_path!r}: "
f"{type(exc).__name__}: {exc}. Falling back to "
f"base-file-only read. Delete the .ovr file or pass "
f"overview_level>=1 to surface the parse error.",
RuntimeWarning,
stacklevel=3,
)
sidecar = None
if sidecar is not None:
sidecar_origin = attach_sidecar_origin(
sidecar.ifds, sidecar.data, sidecar.header)
ifds = ifds + sidecar.ifds

# Skip mask IFDs (NewSubfileType bit 2)
ifd = select_overview_ifd(ifds, overview_level)
Expand Down
40 changes: 35 additions & 5 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
"""
from __future__ import annotations

import warnings

import numpy as np
# ``urllib3`` is kept as a top-level import here even though the HTTP
# source moved to ``_sources`` in #2228. ``test_http_no_stdlib_fallback_2050``
Expand Down Expand Up @@ -218,15 +220,43 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None,
# enforces (#2121). The sidecar must be loaded before IFD
# selection so ``overview_level`` indexes into a unified
# pyramid list.
#
# Sidecar load failures must not break a base read. The release
# contract classifies ``reader.local_file`` as stable and
# ``reader.sidecar_ovr`` as advanced (see
# ``docs/source/reference/geotiff_release_contract.md``); a
# stale, truncated, or malformed ``.ovr`` written by an external
# tool should not be able to take the stable surface down.
# ``CloudSizeLimitError`` is the one exception: that signals a
# caller-set byte budget breach which the caller asked to hear
# about. Everything else (bad TIFF header, I/O error, fsspec
# failure) falls back to base-only behaviour with a warning so
# the user can still investigate. Mirrors the contract that
# ``discover_remote_sidecar`` already uses on the dask metadata
# path. Issue #2416.
from ._sidecar import attach_sidecar_origin, find_sidecar, load_sidecar
sidecar_origin: dict[int, tuple] = {}
sidecar_path = find_sidecar(source)
if sidecar_path is not None:
sidecar = load_sidecar(sidecar_path,
max_cloud_bytes=cloud_budget)
sidecar_origin = attach_sidecar_origin(
sidecar.ifds, sidecar.data, sidecar.header)
ifds = ifds + sidecar.ifds
try:
sidecar = load_sidecar(sidecar_path,
max_cloud_bytes=cloud_budget)
except CloudSizeLimitError:
raise
except Exception as exc:
warnings.warn(
f"Ignoring unreadable sidecar {sidecar_path!r}: "
f"{type(exc).__name__}: {exc}. Falling back to "
f"base-file-only read. Delete the .ovr file or pass "
f"overview_level>=1 to surface the parse error.",
RuntimeWarning,
stacklevel=3,
)
sidecar = None
if sidecar is not None:
sidecar_origin = attach_sidecar_origin(
sidecar.ifds, sidecar.data, sidecar.header)
ifds = ifds + sidecar.ifds

# Select IFD, skipping any mask IFDs
ifd = select_overview_ifd(ifds, overview_level)
Expand Down
Loading
Loading