Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions xrspatial/geotiff/_backends/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ def read_geotiff_dask(source: str, *,
if sidecar is not None:
from .._sidecar import close_sidecar
close_sidecar(sidecar)
# ``http_header`` carries the sidecar's ``TIFFHeader`` when
# ``used_sidecar`` was True (``_parse_cog_http_meta`` swaps it
# so ``byte_order`` matches the file the per-chunk range GETs
# land on). Pass that through to the per-chunk decode step so a
# mixed-endian base / ``.ovr`` pair decodes against the right
# endianness. Issue #2314.
http_meta = (http_header, http_ifd)
if http_ifd.orientation != 1:
raise ValueError(
Expand Down
32 changes: 30 additions & 2 deletions xrspatial/geotiff/_cog_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@ def _parse_cog_http_meta(
``sidecar`` and must close it; ``route_path`` is the URL/URI
that per-chunk fetches should target; ``used_sidecar`` is
``True`` iff the selected IFD came from the sidecar.

``header`` is the :class:`TIFFHeader` of the file the chosen
IFD lives in: the sidecar's header when ``used_sidecar=True``,
otherwise the base file's header. Callers that decode pixel
bytes (the eager HTTP path, the dask chunk reader) MUST use
this returned header for ``byte_order`` so the decode step
interprets the right endianness; a big-endian ``.ovr`` paired
with a little-endian base file would otherwise scramble the
result. ``geo_info`` is still extracted from the base file's
``header_bytes`` (sidecar IFDs typically carry no geokeys and
inherit from the level-0 IFD that sits in the base buffer);
that parse is unaffected by the swap. Issue #2314.
"""
if return_sidecar and source_path is None:
# The 5-tuple contract guarantees ``route_path`` is a usable
Expand Down Expand Up @@ -242,17 +254,33 @@ def _parse_cog_http_meta(
geo_info = extract_geo_info_with_overview_inheritance(
ifd, ifds, header_bytes, header.byte_order,
allow_rotated=allow_rotated)
# When the chosen IFD lives in the sidecar, return the sidecar's own
# ``TIFFHeader`` so the per-chunk / eager decode step sees the byte
# order of the file the bytes actually came from. A big-endian
# ``.ovr`` paired with a little-endian base (or vice versa) would
# otherwise have its pixels reinterpreted with the wrong endianness
# at ``_decode_strip_or_tile``. Mirrors the local sidecar path in
# ``_reader.py:223`` which swaps to the sidecar header for the same
# reason. Issue #2314.
#
# ``used_sidecar`` can only be True when ``sidecar`` is not None:
# ``sidecar_ifd_ids`` is populated by ``discover_remote_sidecar``
# only on the same branch that assigns ``sidecar`` (and stays empty
# otherwise), so ``id(ifd) in sidecar_ifd_ids`` implies the sidecar
# was loaded successfully. The branch below relies on that
# invariant when it reads ``sidecar.header``.
return_header = sidecar.header if used_sidecar else header
if return_sidecar:
route_path = sidecar.path if used_sidecar else source_path
return (header, ifd, geo_info, header_bytes,
return (return_header, ifd, geo_info, header_bytes,
(sidecar, route_path, used_sidecar))
# Caller did not opt into sidecar metadata. Close the sidecar (if
# any was loaded) before returning so the buffer does not leak --
# the legacy return tuple has no slot to hand it back through.
if sidecar is not None:
from ._sidecar import close_sidecar
close_sidecar(sidecar)
return header, ifd, geo_info, header_bytes
return return_header, ifd, geo_info, header_bytes


def _read_cog_http(url: str, overview_level: int | None = None,
Expand Down
Loading
Loading