Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,24 +273,38 @@ def _read_geo_info(source, *, overview_level: int | None = None,
# so ``overview_level`` indexes both internal and external
# overviews (issue #2112). Local file paths only.
from ._sidecar import attach_sidecar_origin, find_sidecar, load_sidecar
sidecar_origin: dict[int, tuple] = {}
sidecar_path = find_sidecar(source)
if sidecar_path is not None:
sidecar = load_sidecar(sidecar_path)
# Metadata-only path: drop the origin mapping. The reader
# only needs the merged IFD list to resolve the requested
# ``overview_level`` against; strip/tile bytes are sliced by
# ``read_to_array`` on the actual read.
attach_sidecar_origin(
# The origin mapping is consumed below for georef extraction
# only -- strip/tile bytes are sliced by ``read_to_array`` on
# the actual read. A sidecar IFD that carries its own
# GeoKeyDirectory / ModelPixelScale / ModelTiepoint /
# ModelTransformation needs the sidecar's byte order to
# parse cleanly; without the mapping the helper falls back
# to the base file's bytes (today's default, correct under
# the usual GDAL convention). See issue #2315.
sidecar_origin = attach_sidecar_origin(
sidecar.ifds, sidecar.data, sidecar.header)
ifds = ifds + sidecar.ifds
ifd = select_overview_ifd(ifds, overview_level)
# Inherit georef from the level-0 IFD when the overview itself
# has no geokeys (issue #1640). Pass-through for level 0. The
# sidecar IFDs typically lack geokeys so the inheritance pulls
# from the base file's full-resolution IFD as GDAL does.
# from the base file's full-resolution IFD as GDAL does. When a
# sidecar IFD does declare its own georef payload, ``georef_origin``
# routes the parse to the sidecar's bytes / byte order so the
# sidecar's georef wins. See issue #2315.
georef_origin = (
{iid: (od, oh.byte_order)
for iid, (od, oh) in sidecar_origin.items()}
if sidecar_origin else None
)
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, data, header.byte_order,
allow_rotated=allow_rotated)
allow_rotated=allow_rotated,
sidecar_origin=georef_origin)
bps = resolve_bits_per_sample(ifd.bits_per_sample)
file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format)
_validate_predictor_sample_format(ifd.predictor, ifd.sample_format)
Expand Down
7 changes: 7 additions & 0 deletions xrspatial/geotiff/_cog_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,13 @@ def _parse_cog_http_meta(
# IFD lives in the sidecar; that mirrors the eager local reader,
# whose sidecar IFDs typically carry no out-of-line geokeys and
# inherit from level-0 (which sits in the base buffer). #2239.
#
# The ``sidecar_origin`` kwarg added in #2315 for the eager local /
# fsspec paths is intentionally not threaded here. A separate fix
# is tracked in the HTTP / dask sidecar-byte-order finding (see
# the linked issue / PR for the HTTP side). When that lands, this
# call should pick up the same mapping so an HTTP sidecar with
# its own geokeys is parsed against the sidecar bytes too.
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, header_bytes, header.byte_order,
allow_rotated=allow_rotated)
Expand Down
70 changes: 68 additions & 2 deletions xrspatial/geotiff/_geotags.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,13 +1085,43 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview,
)


# Tag IDs that carry georeferencing payload (offsets into either the
# IFD entry's inline value or the file's byte stream). When a sidecar
# IFD declares any of these, the georef extractor must parse against
# the sidecar's bytes / byte order, not the base file's. See issue
# #2315.
_GEOREF_PAYLOAD_TAGS = frozenset({
33550, # TAG_MODEL_PIXEL_SCALE
33922, # TAG_MODEL_TIEPOINT
34264, # TAG_MODEL_TRANSFORMATION
34735, # TAG_GEO_KEY_DIRECTORY
})


def _ifd_has_georef_payload(ifd: IFD) -> bool:
"""Return True if ``ifd`` declares any georef-bearing tag.

The GDAL convention is that an external ``.tif.ovr`` sidecar
inherits georef from the base file -- the sidecar IFDs do not
re-declare ModelPixelScale / ModelTiepoint / GeoKeyDirectory.
Hand-rolled sidecars and non-GDAL writers can break that convention,
and when they do the sidecar's georef tags must be parsed against
the sidecar's byte buffer rather than the base file's. This helper
is the conservative gate that flips the data / byte_order swap in
:func:`extract_geo_info_with_overview_inheritance`. See issue #2315.
"""
entries = ifd.entries
return any(tag in entries for tag in _GEOREF_PAYLOAD_TAGS)


def extract_geo_info_with_overview_inheritance(
ifd: IFD,
ifds: list,
data: bytes | memoryview,
byte_order: str,
*,
allow_rotated: bool = False,
sidecar_origin: dict | None = None,
) -> GeoInfo:
"""Extract geo metadata, inheriting from level 0 when the IFD lacks it.

Expand Down Expand Up @@ -1141,12 +1171,36 @@ def extract_geo_info_with_overview_inheritance(
Full file bytes (forwarded to ``extract_geo_info``).
byte_order : str
``'<'`` or ``'>'`` (forwarded to ``extract_geo_info``).
sidecar_origin : dict, optional
Mapping from ``id(ifd)`` to ``(data, byte_order)`` for IFDs
that live in an external ``.tif.ovr`` sidecar. When the
selected IFD is from a sidecar that declares its own georef
payload (ModelPixelScale / ModelTiepoint /
ModelTransformation / GeoKeyDirectory), this mapping tells
the extractor which byte buffer the tag offsets resolve
against. IFDs absent from the mapping fall through to
``(data, byte_order)`` for the base file. The conservative
default is ``None``, which preserves the legacy behavior
(read sidecar IFDs against the base file's bytes -- correct
only when the sidecar has no geokeys, which is the GDAL
convention). See issue #2315.

Returns
-------
GeoInfo
"""
info = extract_geo_info(ifd, data, byte_order,
# When the selected IFD lives in a sidecar AND it carries its own
# georef payload, resolve tag offsets against the sidecar's bytes /
# byte order. The sidecar-without-geokeys convention (GDAL) keeps
# ``(data, byte_order)`` from the base file and inherits below.
# See issue #2315.
sel_data, sel_byte_order = data, byte_order
if sidecar_origin is not None:
origin = sidecar_origin.get(id(ifd))
if origin is not None and _ifd_has_georef_payload(ifd):
sel_data, sel_byte_order = origin

info = extract_geo_info(ifd, sel_data, sel_byte_order,
allow_rotated=allow_rotated)

# Overview IFDs have NewSubfileType bit 0 set; mask IFDs (bit 2) and
Expand All @@ -1171,7 +1225,19 @@ def extract_geo_info_with_overview_inheritance(
if base_ifd is None:
return info

base_info = extract_geo_info(base_ifd, data, byte_order,
# Mirror the sidecar-origin routing for ``base_ifd``. The base IFD
# normally lives in the base file (``data`` / ``byte_order`` are
# correct), but a file with no full-resolution IFD of its own could
# land here with ``base_ifd`` resolved out of a sidecar. The lookup
# is the same shape as the one applied to ``ifd`` above. See
# review nit on #2315.
base_data, base_byte_order = data, byte_order
if sidecar_origin is not None:
base_origin = sidecar_origin.get(id(base_ifd))
if base_origin is not None:
base_data, base_byte_order = base_origin

base_info = extract_geo_info(base_ifd, base_data, base_byte_order,
allow_rotated=allow_rotated)

# Inherit the per-IFD metadata that the COG writer emits only on the
Expand Down
21 changes: 16 additions & 5 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,24 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None,
# Inherit georef from level 0 when an overview IFD lacks its own
# geokeys (issue #1640). For overview_level=0 (or None) this is a
# no-op: the helper short-circuits when the IFD is not a
# NewSubfileType=overview entry. Sidecar IFDs always lack
# geokeys, so the inheritance pulls from the base file's
# level-0 IFD (kept first in the merged list) which is the
# GDAL convention.
# NewSubfileType=overview entry. Sidecar IFDs typically lack
# geokeys (the GDAL convention), so the inheritance pulls from
# the base file's level-0 IFD (kept first in the merged list).
# A sidecar that does declare its own georef payload is a corner
# case: ``georef_origin`` maps the sidecar IFDs to
# ``(data, byte_order)`` from the sidecar so the helper resolves
# those tags against the right buffer. Sidecar IFDs without
# geokeys still inherit from the base file via the existing
# overview-inheritance path. See issues #1640 and #2315.
georef_origin = (
{iid: (od, oh.byte_order)
for iid, (od, oh) in sidecar_origin.items()}
if sidecar_origin else None
)
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, data, header.byte_order,
allow_rotated=allow_rotated)
allow_rotated=allow_rotated,
sidecar_origin=georef_origin)

# Orientation tag (274): values 2-8 mean the stored pixel order
# differs from display order. We need to remap the array post
Expand Down
Loading
Loading