diff --git a/docs/source/user_guide/geotiff_safe_io.rst b/docs/source/user_guide/geotiff_safe_io.rst index 4d842e85..0b2d9cbd 100644 --- a/docs/source/user_guide/geotiff_safe_io.rst +++ b/docs/source/user_guide/geotiff_safe_io.rst @@ -235,6 +235,13 @@ the ambiguous-metadata family at once. - The writer cannot classify an EPSG code as geographic or projected. - Pass a code pyproj can resolve, or install pyproj. + * - :class:`~xrspatial.geotiff.NonRepresentableEPSGCRSError` + - The integer EPSG code resolves to a compound (horizontal + + vertical) CRS, which the writer cannot represent in a single + ``GeographicTypeGeoKey`` or ``ProjectedCSTypeGeoKey`` slot. + - Pass the full compound CRS as WKT to take the user-defined CRS + fallback path, or pass the horizontal sub-CRS EPSG directly if + the vertical component is not needed. * - :class:`~xrspatial.geotiff.UnsupportedGeoTIFFFeatureError` - The input declares a feature the GeoTIFF module does not implement (warped / reprojection VRTs, pansharpened or derived diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 38574c89..644297ce 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -72,9 +72,9 @@ transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry # noqa: F401 from ._crs import _resolve_crs_to_wkt, _wkt_to_epsg # noqa: F401 from ._errors import (ConflictingCRSError, ConflictingNodataError, GeoTIFFAmbiguousMetadataError, - InvalidCRSCodeError, MixedBandMetadataError, NonUniformCoordsError, - RotatedTransformError, UnknownCRSModelTypeError, UnparseableCRSError, - UnsupportedGeoTIFFFeatureError) + InvalidCRSCodeError, MixedBandMetadataError, NonRepresentableEPSGCRSError, + NonUniformCoordsError, RotatedTransformError, UnknownCRSModelTypeError, + UnparseableCRSError, UnsupportedGeoTIFFFeatureError) from ._geotags import RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT, GeoTransform # noqa: F401 from ._reader import _MAX_CLOUD_BYTES_SENTINEL, UnsafeURLError from ._reader import read_to_array as _read_to_array @@ -110,6 +110,7 @@ 'GEOREF_STATUS_VALUES', 'InvalidCRSCodeError', 'MixedBandMetadataError', + 'NonRepresentableEPSGCRSError', 'NonUniformCoordsError', 'RotatedTransformError', 'SUPPORTED_FEATURES', diff --git a/xrspatial/geotiff/_crs.py b/xrspatial/geotiff/_crs.py index 0df5fef5..3a85b39b 100644 --- a/xrspatial/geotiff/_crs.py +++ b/xrspatial/geotiff/_crs.py @@ -15,7 +15,7 @@ import numbers import warnings -from ._errors import UnparseableCRSError +from ._errors import NonRepresentableEPSGCRSError, UnparseableCRSError from ._runtime import GeoTIFFFallbackWarning, _geotiff_strict_mode #: WKT root keywords. A string that starts with one of these (after @@ -45,6 +45,49 @@ def _looks_like_wkt(s: str) -> bool: return s.lstrip().upper().startswith(_WKT_ROOT_KEYWORDS) +def _reject_non_representable_epsg(crs_int: int, crs_obj) -> None: + """Refuse compound EPSG codes that the stable EPSG writer cannot + round-trip through rasterio / GDAL (#2418). + + The GeoTIFF writer's stable-path emits only two EPSG-carrying + GeoKeys: ``GeographicTypeGeoKey`` (2048) and + ``ProjectedCSTypeGeoKey`` (3072). Both are spec'd to hold a 2D + horizontal CRS code. When the caller passes a compound EPSG code + (horizontal + vertical, e.g. EPSG:6349 = "NAD83(2011) + NAVD88 + height"), pyproj reports the compound CRS as ``is_geographic`` or + ``is_projected`` based on its horizontal sub-CRS, and the writer + stores the compound code in the horizontal-CRS slot. The resulting + GeoTIFF either reads back through GDAL as the wrong CRS (the + horizontal sub-CRS, with the vertical component silently dropped) + or as no CRS at all, because the GeoKey value claims to be a 2D + horizontal CRS but the EPSG database registers it as compound. + + Reject the code at validation time so the failure surfaces at the + write call instead of after the file is on disk. Callers who need + to preserve the compound semantics can pass the full WKT (which + takes the user-defined / citation path -- see issue #1768) or + pass the horizontal sub-CRS code directly. + + pyproj exposes ``is_compound`` on every CRS object since 2.x; + callers without pyproj never reach this function (the caller + site short-circuits on ``ImportError``). + """ + if getattr(crs_obj, "is_compound", False): + raise NonRepresentableEPSGCRSError( + f"EPSG:{crs_int} is a compound CRS ({crs_obj.name!r}) and " + "cannot be written through the integer-EPSG path: the " + "GeoTIFF writer only emits GeographicTypeGeoKey / " + "ProjectedCSTypeGeoKey, which are specified to carry a 2D " + "horizontal CRS code. Storing a compound EPSG there " + "produces a file that rasterio / GDAL reads back as the " + "horizontal sub-CRS (vertical component lost) or as no " + "CRS at all. Pass the full compound WKT as crs= to take " + "the user-defined CRS path (see issue #1768), or pass the " + "horizontal sub-CRS EPSG directly if the vertical " + "component is not needed." + ) + + def _validate_crs_arg(crs) -> None: """Reject malformed ``crs=`` arguments before they reach the writer. @@ -90,7 +133,7 @@ def _validate_crs_arg(crs) -> None: except ImportError: return try: - CRS.from_epsg(crs_int) + crs_obj = CRS.from_epsg(crs_int) except Exception as e: if _geotiff_strict_mode(): raise @@ -99,6 +142,7 @@ def _validate_crs_arg(crs) -> None: f"(pyproj: {type(e).__name__}: {e}). Pass a valid " f"EPSG integer, a WKT string, or None." ) from e + _reject_non_representable_epsg(crs_int, crs_obj) return if isinstance(crs, str): return @@ -122,6 +166,15 @@ def _wkt_to_epsg(wkt_or_proj: str) -> int | None: from pyproj import CRS crs = CRS.from_user_input(wkt_or_proj) epsg = crs.to_epsg() + # Issue #2418: ``epsg`` is set when pyproj recognises the WKT, + # but if the resolved CRS is compound (horizontal + vertical), + # the integer-EPSG writer path cannot represent it -- the GeoKey + # slots only carry 2D horizontal codes. Return None so the + # caller routes through the WKT / citation fallback instead of + # writing a compound EPSG into a horizontal slot. The original + # WKT is preserved on the fallback path. + if epsg is not None and getattr(crs, "is_compound", False): + return None return epsg except Exception as e: if _geotiff_strict_mode(): diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py index 8e6ae11d..fedb949a 100644 --- a/xrspatial/geotiff/_errors.py +++ b/xrspatial/geotiff/_errors.py @@ -136,6 +136,28 @@ class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): """ +class NonRepresentableEPSGCRSError(GeoTIFFAmbiguousMetadataError): + """EPSG code resolves, but the GeoTIFF writer cannot represent it (#2418). + + Raised by the GeoTIFF writer when the caller supplies an integer + EPSG code that pyproj resolves to a compound CRS (horizontal + + vertical). The writer can only emit ``GeographicTypeGeoKey`` (2048) + or ``ProjectedCSTypeGeoKey`` (3072); storing a compound EPSG in + either slot produces a GeoTIFF that rasterio / GDAL reads back as + a different CRS (or no CRS at all), because the GeoKey value + declares "this is a 2D horizontal CRS" but the EPSG database + registers it as compound. + + Callers who need to preserve a compound CRS should either pass the + horizontal sub-CRS as the integer EPSG (the vertical component is + not representable in a single GeoTIFF without additional GeoKeys + that this release does not emit), or pass the full compound WKT as + ``crs=`` so the writer takes the ``ProjectedCSType=32767`` / + ``GTCitationGeoKey`` fallback path. The WKT fallback path round-trips + through libgeotiff and GDAL but not all readers; see issue #1768. + """ + + class UnsupportedGeoTIFFFeatureError(ValueError): """Caller asked for a feature this release does not implement (#2349). @@ -163,5 +185,6 @@ class UnsupportedGeoTIFFFeatureError(ValueError): "ConflictingNodataError", "VRTUnsupportedError", "UnknownCRSModelTypeError", + "NonRepresentableEPSGCRSError", "UnsupportedGeoTIFFFeatureError", ] diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index d164fe08..312a2e0b 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -4,7 +4,7 @@ from dataclasses import dataclass, field from ._dtypes import resolve_bits_per_sample -from ._errors import RotatedTransformError, UnknownCRSModelTypeError +from ._errors import NonRepresentableEPSGCRSError, RotatedTransformError, UnknownCRSModelTypeError from ._header import (IFD, TAG_BITS_PER_SAMPLE, TAG_COMPRESSION, TAG_EXTRA_SAMPLES, TAG_GDAL_METADATA, TAG_GDAL_NODATA, TAG_GEO_ASCII_PARAMS, TAG_GEO_DOUBLE_PARAMS, TAG_GEO_KEY_DIRECTORY, TAG_IMAGE_LENGTH, @@ -1452,6 +1452,24 @@ def _model_type_from_epsg(crs_epsg: int) -> int: "known EPSG or installing a current pyproj database will " "resolve this." ) from e + # Defense in depth (#2418): the stable-EPSG path can only emit + # GeographicTypeGeoKey or ProjectedCSTypeGeoKey, both of which + # are specified to hold a 2D horizontal CRS. A compound CRS + # (horizontal + vertical) reports ``is_geographic`` or + # ``is_projected`` for its horizontal sub-CRS but is not + # representable in either slot -- rasterio / GDAL reads the + # output back as the wrong CRS. ``_validate_crs_arg`` should + # have rejected this already; trip the same error here so a + # direct call into ``build_geo_tags`` cannot bypass the check. + if getattr(crs, "is_compound", False): + raise NonRepresentableEPSGCRSError( + f"EPSG:{crs_epsg} is a compound CRS ({crs.name!r}); " + "the GeoTIFF integer-EPSG writer path only emits 2D " + "horizontal GeoKeys. Pass the compound CRS as WKT " + "(``crs=``) for the user-defined CRS fallback, " + "or pass the horizontal sub-CRS EPSG directly. " + "See issue #2418." + ) if crs.is_geographic: return MODEL_TYPE_GEOGRAPHIC return MODEL_TYPE_PROJECTED diff --git a/xrspatial/geotiff/tests/test_compound_crs_reject_2418.py b/xrspatial/geotiff/tests/test_compound_crs_reject_2418.py new file mode 100644 index 00000000..3758ed1a --- /dev/null +++ b/xrspatial/geotiff/tests/test_compound_crs_reject_2418.py @@ -0,0 +1,252 @@ +"""Reject compound EPSG codes on the stable-EPSG writer path (#2418). + +The writer's stable-path emits only ``GeographicTypeGeoKey`` (2048) and +``ProjectedCSTypeGeoKey`` (3072). Both are spec'd to hold a 2D +horizontal CRS code. When a compound CRS (horizontal + vertical) is +passed as an integer EPSG, pyproj reports it as ``is_geographic`` or +``is_projected`` based on its horizontal sub-CRS, and the writer stores +the compound code in the horizontal slot. The resulting GeoTIFF reads +back through rasterio / GDAL as the horizontal sub-CRS (vertical +component silently dropped) or as no CRS at all. + +These tests pin three behaviours: + +1. The stable-EPSG path raises ``NonRepresentableEPSGCRSError`` for + compound codes (EPSG:6349, EPSG:5498, EPSG:7415, EPSG:8360), at the + kwarg entry point and via ``DataArray.attrs['crs']``. +2. The WKT fallback path still accepts compound CRSs via WKT -- + ``_wkt_to_epsg`` now returns None for compound CRSs so the writer + routes through the user-defined CRS / citation path instead of + writing a compound EPSG into a horizontal GeoKey. +3. Plain horizontal CRSs (EPSG:4326, EPSG:32633) still round-trip + through rasterio with the input code intact, so the fix is scoped + to the broken case. + +Tests use rasterio (not xrspatial's own reader) for the round-trip +assertions, since the xrspatial reader masks the corruption by +re-resolving the stored integer through pyproj. +""" +from __future__ import annotations + +import io +import os +import tempfile + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + NonRepresentableEPSGCRSError, + open_geotiff, + to_geotiff, +) +from xrspatial.geotiff._crs import _reject_non_representable_epsg, _validate_crs_arg + +pyproj = pytest.importorskip("pyproj") +rasterio = pytest.importorskip("rasterio") + + +# Compound EPSG codes that previously slipped through the validator. +# 6349: NAD83(2011) + NAVD88 height (compound, horizontal=geographic) +# 5498: NAD83 + NAVD88 height (compound, horizontal=geographic) +# 7415: Amersfoort/RD New + NAP height (compound, horizontal=projected) +# 8360: ETRS89 + EVRF2000 Austria height (compound, horizontal=geographic) +COMPOUND_EPSG_CODES = [6349, 5498, 7415, 8360] + + +def _square(dtype=np.float32): + return xr.DataArray( + np.zeros((4, 4), dtype=dtype), + coords={'y': np.arange(4.0), 'x': np.arange(4.0)}, + dims=('y', 'x'), + ) + + +@pytest.mark.parametrize("epsg", COMPOUND_EPSG_CODES) +def test_validate_crs_arg_rejects_compound_epsg(epsg): + """``_validate_crs_arg`` raises for compound EPSG codes.""" + with pytest.raises(NonRepresentableEPSGCRSError, match="compound"): + _validate_crs_arg(epsg) + + +@pytest.mark.parametrize("epsg", COMPOUND_EPSG_CODES) +def test_to_geotiff_rejects_compound_epsg_kwarg(epsg): + """End-to-end ``to_geotiff(crs=)`` raises.""" + buf = io.BytesIO() + with pytest.raises(NonRepresentableEPSGCRSError, match="compound"): + to_geotiff(_square(), buf, crs=epsg) + + +@pytest.mark.parametrize("epsg", COMPOUND_EPSG_CODES) +def test_to_geotiff_rejects_compound_epsg_attrs(epsg): + """End-to-end ``DataArray.attrs['crs'] = `` raises.""" + arr = _square() + arr.attrs['crs'] = epsg + buf = io.BytesIO() + with pytest.raises(NonRepresentableEPSGCRSError, match="compound"): + to_geotiff(arr, buf) + + +def test_reject_non_representable_epsg_helper(): + """The helper raises directly on a compound pyproj CRS object.""" + from pyproj import CRS + crs_obj = CRS.from_epsg(6349) + assert crs_obj.is_compound is True + with pytest.raises(NonRepresentableEPSGCRSError, match="6349"): + _reject_non_representable_epsg(6349, crs_obj) + + +def test_reject_non_representable_epsg_passes_horizontal(): + """The helper does not raise for plain horizontal CRSs.""" + from pyproj import CRS + _reject_non_representable_epsg(4326, CRS.from_epsg(4326)) # geographic 2D + _reject_non_representable_epsg(32633, CRS.from_epsg(32633)) # projected + _reject_non_representable_epsg(3857, CRS.from_epsg(3857)) # web mercator + + +@pytest.mark.parametrize("epsg", [4326, 32633, 3857, 4269]) +def test_horizontal_crs_still_round_trips_through_rasterio(epsg, tmp_path): + """Regression: scoped fix doesn't break the normal stable-EPSG path.""" + arr = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={'y': np.array([3.5, 2.5, 1.5, 0.5]), + 'x': np.array([0.5, 1.5, 2.5, 3.5])}, + dims=('y', 'x'), + ) + path = tmp_path / f"tmp_2418_{epsg}.tif" + to_geotiff(arr, str(path), crs=epsg) + with rasterio.open(str(path)) as src: + assert src.crs is not None + assert src.crs.to_epsg() == epsg + + +def test_compound_wkt_takes_user_defined_path(tmp_path): + """A compound CRS passed as WKT writes via the citation path (#1768). + + ``_wkt_to_epsg`` returns None for a compound CRS so the writer + treats it as a user-defined CRS rather than emitting a compound + EPSG into the horizontal-CRS GeoKey slot. + """ + from pyproj import CRS + wkt = CRS.from_epsg(6349).to_wkt() + arr = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={'y': np.array([3.5, 2.5, 1.5, 0.5]), + 'x': np.array([0.5, 1.5, 2.5, 3.5])}, + dims=('y', 'x'), + ) + path = tmp_path / "tmp_2418_compound_wkt.tif" + # The WKT-only path emits a UserWarning per #1768; we don't care + # about that here, just that the write succeeds and the CRS info + # survives in xrspatial's own attrs. + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + to_geotiff(arr, str(path), crs=wkt) + da = open_geotiff(str(path)) + # ``crs_wkt`` carries the full compound CRS string back through the + # xrspatial reader (the WKT-only path's contract). + assert da.attrs.get('crs_wkt') is not None + assert 'COMPOUND' in da.attrs['crs_wkt'].upper() + + +def test_compound_epsg_corruption_surfaces_when_validator_bypassed( + tmp_path, monkeypatch, +): + """If the validator is bypassed, the file IS corrupted (regression). + + Monkey-patches ``_validate_crs_arg`` and + ``_model_type_from_epsg``'s compound check to no-ops, then writes + EPSG:6349 through the integer path. Asserts the file rasterio + reads back is not EPSG:6349 -- the exact corruption the fix + prevents. If a future refactor drops either guard, this test + starts failing instead of the writer silently corrupting files. + """ + from xrspatial.geotiff import _crs as _crs_mod + from xrspatial.geotiff import _geotags as _geotags_mod + + # Bypass the validator at both gates so we exercise the broken + # write path the production code now refuses. + monkeypatch.setattr( + _crs_mod, "_reject_non_representable_epsg", lambda *a, **kw: None + ) + original_model_type = _geotags_mod._model_type_from_epsg + + def _model_type_without_compound_check(crs_epsg): + # Reproduce the pre-fix behaviour: only branch on is_geographic. + from pyproj import CRS + crs = CRS.from_epsg(crs_epsg) + if crs.is_geographic: + return _geotags_mod.MODEL_TYPE_GEOGRAPHIC + return _geotags_mod.MODEL_TYPE_PROJECTED + + monkeypatch.setattr( + _geotags_mod, "_model_type_from_epsg", _model_type_without_compound_check + ) + + arr = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={'y': np.array([3.5, 2.5, 1.5, 0.5]), + 'x': np.array([0.5, 1.5, 2.5, 3.5])}, + dims=('y', 'x'), + ) + path = tmp_path / "tmp_2418_bypass_demo.tif" + to_geotiff(arr, str(path), crs=6349) + + # Pin the corruption: rasterio either fails to recover the EPSG + # entirely (``to_epsg() is None``) or recovers the wrong one + # (the horizontal sub-CRS, with the vertical component dropped). + # Both outcomes are caught by ``rasterio_epsg != 6349``. + with rasterio.open(str(path)) as src: + rasterio_epsg = src.crs.to_epsg() if src.crs is not None else None + assert rasterio_epsg != 6349, ( + f"rasterio recovered EPSG:6349 from a write that bypassed the " + f"validator: got {rasterio_epsg}. Either the writer is now " + "correctly emitting compound GeoKeys (good, drop the test) or " + "the bypass no longer reaches the broken path (unlikely)." + ) + + # Sanity: when validation is back in place, the same write raises. + monkeypatch.undo() + with pytest.raises(NonRepresentableEPSGCRSError): + to_geotiff(arr, str(path), crs=6349) + + +def test_geographic_3d_crs_still_works(tmp_path): + """EPSG:4979 (3D WGS84) is geographic, not compound: still allowed. + + The fix is narrow -- it only rejects compound CRSs, which are the + demonstrably-broken case. 3D-geographic and other non-2D-horizontal + EPSGs that round-trip cleanly through rasterio remain allowed. + """ + from pyproj import CRS + crs_obj = CRS.from_epsg(4979) + assert crs_obj.is_compound is False + # Should not raise from the validator. + _validate_crs_arg(4979) + arr = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={'y': np.array([3.5, 2.5, 1.5, 0.5]), + 'x': np.array([0.5, 1.5, 2.5, 3.5])}, + dims=('y', 'x'), + ) + path = tmp_path / "tmp_2418_4979.tif" + to_geotiff(arr, str(path), crs=4979) + with rasterio.open(str(path)) as src: + # rasterio resolves EPSG:4979 correctly. + assert src.crs is not None + assert src.crs.to_epsg() == 4979 + + +def test_compound_epsg_error_message_points_to_workaround(): + """Error message names the compound CRS and offers a route forward.""" + with pytest.raises(NonRepresentableEPSGCRSError) as exc: + _validate_crs_arg(6349) + msg = str(exc.value) + assert "6349" in msg + assert "compound" in msg.lower() + # The message should point at both the WKT fallback and the + # horizontal-sub-CRS workaround so the caller can act on it. + assert "WKT" in msg or "wkt" in msg + assert "horizontal" in msg.lower() diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 272ee146..2a7150a8 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2781,6 +2781,11 @@ def test_all_lists_supported_functions(self): 'GeoTIFFAmbiguousMetadataError', 'InvalidCRSCodeError', 'MixedBandMetadataError', + # Issue #2418: writer rejects compound EPSG codes that cannot + # be represented in a single GeographicType / ProjectedCSType + # GeoKey, surfacing the corruption instead of silently writing + # a compound EPSG into a horizontal-CRS slot. + 'NonRepresentableEPSGCRSError', 'NonUniformCoordsError', 'RotatedTransformError', 'UnknownCRSModelTypeError',