From 5e1ee05ec5e6559c42e65626096ec83e896dc43a Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 27 Oct 2025 17:21:35 +0100 Subject: [PATCH 01/46] use path mapper registries and customizations module --- mapchete_eo/base.py | 2 +- mapchete_eo/known_catalogs.py | 7 ++ mapchete_eo/platforms/sentinel2/config.py | 112 +++++++++++++++++- .../platforms/sentinel2/customizations.py | 49 ++++++++ .../platforms/sentinel2/mapper_registry.py | 73 ++++++++++++ tests/platforms/sentinel2/test_config.py | 53 +++++++++ 6 files changed, 289 insertions(+), 7 deletions(-) create mode 100644 mapchete_eo/platforms/sentinel2/customizations.py create mode 100644 mapchete_eo/platforms/sentinel2/mapper_registry.py create mode 100644 tests/platforms/sentinel2/test_config.py diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index 90100ea3..0875ae38 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -467,7 +467,7 @@ def _init_area(self, input_params: dict) -> BaseGeometry: configured_area, configured_area_crs = guess_geometry( self.params.area, bounds=Bounds.from_inp( - input_params.get("delimiters", {}).get("bounds"), + input_params.get("delimiters", {}).get("effective_bounds"), crs=getattr(input_params.get("pyramid"), "crs"), ), raise_if_empty=False, diff --git a/mapchete_eo/known_catalogs.py b/mapchete_eo/known_catalogs.py index 801e5a7b..cb9acfed 100644 --- a/mapchete_eo/known_catalogs.py +++ b/mapchete_eo/known_catalogs.py @@ -9,11 +9,18 @@ class EarthSearchV1S2L2A(STACSearchCatalog): + # DEPRECATED """Earth-Search catalog for Sentinel-2 Level 2A COGs.""" endpoint: str = "https://earth-search.aws.element84.com/v1/" +class EarthSearchV1(STACSearchCatalog): + """Earth-Search catalog V1.""" + + endpoint: str = "https://earth-search.aws.element84.com/v1/" + + class CDSESearch(STACSearchCatalog): """Copernicus Data Space Ecosystem (CDSE) STAC API.""" diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index 3a2bf726..a209ed39 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -1,18 +1,20 @@ from __future__ import annotations -from typing import List, Optional, Union +from typing import List, Optional, Union, Dict, Any, Callable from mapchete.path import MPathLike -from pydantic import ( - BaseModel, - ValidationError, - field_validator, -) +from pydantic import BaseModel, ValidationError, field_validator, model_validator from mapchete_eo.base import BaseDriverConfig from mapchete_eo.io.path import ProductPathGenerationMethod from mapchete_eo.platforms.sentinel2.archives import ArchiveClsFromString, AWSL2ACOGv1 from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels +from mapchete_eo.platforms.sentinel2.customizations import ( + DataArchive, + MetadataArchive, + KNOWN_SOURCES, +) +from mapchete_eo.platforms.sentinel2.mapper_registry import MAPPER_REGISTRIES from mapchete_eo.platforms.sentinel2.types import ( CloudType, ProductQIMaskResolution, @@ -23,6 +25,84 @@ from mapchete_eo.types import TimeRange +def known_catalog_to_url(stac_catalog: str) -> str: + if stac_catalog in KNOWN_SOURCES: + return KNOWN_SOURCES[stac_catalog]["stac_catalog"] + return stac_catalog + + +class Source(BaseModel): + """All information required to consume Sentinel-2 products.""" + + stac_catalog: str + + # if known STAC catalog is given, fill in the defaults + collections: Optional[List[str]] = None + data_archive: Optional[DataArchive] = None + metadata_archive: MetadataArchive = "roda" + + @model_validator(mode="before") + def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Handles short names of sources.""" + if isinstance(values, str): + values = dict(stac_catalog=values) + stac_catalog = values.get("stac_catalog") + if stac_catalog in KNOWN_SOURCES: + values.update(KNOWN_SOURCES[stac_catalog]) + else: + # TODO: make sure catalog then is either a path or an URL + pass + return values + + @model_validator(mode="after") + def verify_mappers(self) -> Source: + # make sure all required mappers are registered + self.get_id_mapper() + self.get_asset_paths_mapper() + self.get_s2metadata_mapper() + return self + + def get_id_mapper(self) -> Callable: + for key in MAPPER_REGISTRIES["ID"]: + if self.stac_catalog == known_catalog_to_url(key): + return MAPPER_REGISTRIES["ID"][key] + else: + raise ValueError(f"no ID mapper for {self.stac_catalog} found") + + def get_asset_paths_mapper(self) -> Union[Callable, None]: + if self.data_archive is None: + return None + for key in MAPPER_REGISTRIES["asset paths"]: + stac_catalog, data_archive = key + if ( + self.stac_catalog == known_catalog_to_url(stac_catalog) + and data_archive == self.data_archive + ): + return MAPPER_REGISTRIES["asset paths"][key] + else: + raise ValueError( + f"no asset paths mapper from {self.stac_catalog} to {self.data_archive} found" + ) + + def get_s2metadata_mapper(self) -> Union[Callable, None]: + if self.metadata_archive is None: + return None + for key in MAPPER_REGISTRIES["S2Metadata"]: + stac_catalog, metadata_archive = key + if ( + self.stac_catalog == known_catalog_to_url(stac_catalog) + and metadata_archive == self.metadata_archive + ): + return MAPPER_REGISTRIES["S2Metadata"][key] + else: + raise ValueError( + f"no S2Metadata mapper from {self.stac_catalog} to {self.metadata_archive} found" + ) + + +default_source = Source.model_validate(KNOWN_SOURCES["EarthSearch"]) + + class BRDFModelConfig(BaseModel): model: BRDFModels = BRDFModels.HLS bands: List[str] = ["blue", "green", "red", "nir"] @@ -107,9 +187,20 @@ class CacheConfig(BaseModel): class Sentinel2DriverConfig(BaseDriverConfig): format: str = "Sentinel-2" time: Union[TimeRange, List[TimeRange]] + + # new + source: List[Source] = [default_source] + + # deprecated + # for backwards compatibility, archive should be converted to + # catalog & data_archive archive: ArchiveClsFromString = AWSL2ACOGv1 + + # don't know yet how to handle this cat_baseurl: Optional[MPathLike] = None search_index: Optional[MPathLike] = None + + # custom params max_cloud_cover: float = 100.0 stac_config: StacSearchConfig = StacSearchConfig() first_granule_only: bool = False @@ -118,6 +209,15 @@ class Sentinel2DriverConfig(BaseDriverConfig): brdf: Optional[BRDFConfig] = None cache: Optional[CacheConfig] = None + @model_validator(mode="before") + def to_list(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Expands source to list.""" + for field in ["source"]: + value = values.get(field) + if value is not None and not isinstance(value, list): + values[field] = [value] + return values + class MaskConfig(BaseModel): # mask by footprint geometry diff --git a/mapchete_eo/platforms/sentinel2/customizations.py b/mapchete_eo/platforms/sentinel2/customizations.py new file mode 100644 index 00000000..2fcdede5 --- /dev/null +++ b/mapchete_eo/platforms/sentinel2/customizations.py @@ -0,0 +1,49 @@ +from typing import Literal, Dict, Any + +from pystac import Item + +from mapchete_eo.platforms.sentinel2.mapper_registry import ( + maps_item_id, + maps_asset_paths, + creates_s2metadata, +) +from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata + + +DataArchive = Literal["AWSCOG", "AWSJP2"] +KNOWN_SOURCES: Dict[str, Any] = { + "EarthSearch": { + "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "collections": ["sentinel-2-l2a"], + "data_archive": "AWSCOG", + }, + "CDSE": { + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", + "collections": ["sentinel-2-l2a"], + }, +} +MetadataArchive = Literal["roda"] + + +# mapper functions decorated with metadata to have driver decide which one to apply when # +########################################################################################## + + +@maps_item_id(from_catalogs=["EarthSearch"]) +def earthsearch_id_mapper(item: Item) -> Item: + return item + + +@maps_asset_paths(from_catalogs=["EarthSearch"], to_data_archives=["AWSCOG"]) +def earthsearch_path_mapper(item: Item) -> Item: + return item + + +@creates_s2metadata(from_catalogs=["EarthSearch"], to_metadata_archives=["roda"]) +def earthsearch_to_s2metadata(item: Item) -> S2Metadata: + return S2Metadata.from_stac_item(item) + + +@maps_item_id(from_catalogs=["CDSE"]) +def plain_id_mapper(item: Item) -> Item: + return item diff --git a/mapchete_eo/platforms/sentinel2/mapper_registry.py b/mapchete_eo/platforms/sentinel2/mapper_registry.py new file mode 100644 index 00000000..dff01c36 --- /dev/null +++ b/mapchete_eo/platforms/sentinel2/mapper_registry.py @@ -0,0 +1,73 @@ +from typing import List, Callable, Dict, Any + + +# decorators for mapper functions using the registry pattern # +############################################################## +ID_MAPPER_REGISTRY: Dict[Any, Callable] = {} +ASSET_PATHS_MAPPER_REGISTRY: Dict[Any, Callable] = {} +S2METADATA_MAPPER_REGISTRY: Dict[Any, Callable] = {} + +MAPPER_REGISTRIES = { + "ID": ID_MAPPER_REGISTRY, + "asset paths": ASSET_PATHS_MAPPER_REGISTRY, + "S2Metadata": S2METADATA_MAPPER_REGISTRY, +} + + +def _register_func(registry: Dict[str, Callable], key: Any, func: Callable): + if key in registry: + raise ValueError(f"{key} already registered in {registry}") + registry[key] = func + + +def maps_item_id(from_catalogs: List[str]): + """ + Decorator registering mapper to common ID. + """ + + def decorator(func): + # Use a tuple of the metadata as the key + # key = (path_type, version) + for catalog in from_catalogs: + _register_func(registry=ID_MAPPER_REGISTRY, key=catalog, func=func) + return func + + return decorator + + +def maps_asset_paths(from_catalogs: List[str], to_data_archives: List[str]): + """ + Decorator registering asset path mapper. + """ + + def decorator(func): + # Use a tuple of the metadata as the key + for catalog in from_catalogs: + for data_archive in to_data_archives: + _register_func( + registry=ASSET_PATHS_MAPPER_REGISTRY, + key=(catalog, data_archive), + func=func, + ) + return func + + return decorator + + +def creates_s2metadata(from_catalogs: List[str], to_metadata_archives: List[str]): + """ + Decorator registering S2Metadata creator. + """ + + def decorator(func): + # Use a tuple of the metadata as the key + for catalog in from_catalogs: + for metadata_archive in to_metadata_archives: + _register_func( + registry=S2METADATA_MAPPER_REGISTRY, + key=(catalog, metadata_archive), + func=func, + ) + return func + + return decorator diff --git a/tests/platforms/sentinel2/test_config.py b/tests/platforms/sentinel2/test_config.py new file mode 100644 index 00000000..662f2975 --- /dev/null +++ b/tests/platforms/sentinel2/test_config.py @@ -0,0 +1,53 @@ +import pytest + +from mapchete_eo.platforms.sentinel2.config import Sentinel2DriverConfig + + +@pytest.mark.parametrize( + "config_dict", + [ + dict(), + dict( + source="EarthSearch", + ), + dict( + source=["EarthSearch"], + ), + dict( + source=[ + dict( + stac_catalog="EarthSearch", + metadata_archive="roda", + ) + ], + ), + dict( + source=[ + dict( + stac_catalog="EarthSearch", + ), + dict(stac_catalog="CDSE", data_archive="AWSJP2"), + ], + ), + dict( + source=[ + dict( + stac_catalog="https://earth-search.aws.element84.com/v1/", + collections=["sentinel-s2-l2a"], + ), + ], + ), + ], +) +def test_valid_configs(config_dict: dict): + config = Sentinel2DriverConfig.model_validate( + dict( + config_dict, + format="Sentinel-2", + time=dict(start="2025-10-01", end="2025-10-01"), + ) + ) + assert config.source + for source in config.source: + assert source.stac_catalog + assert source.collections From fa1540dff3a25290e39f8dd68965a6999222acab Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 28 Oct 2025 10:21:10 +0100 Subject: [PATCH 02/46] dev commit --- mapchete_eo/platforms/sentinel2/archives.py | 2 + mapchete_eo/platforms/sentinel2/config.py | 21 +++- .../platforms/sentinel2/customizations.py | 112 +++++++++++++++++- .../sentinel2/path_mappers/__init__.py | 2 + 4 files changed, 134 insertions(+), 3 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/archives.py b/mapchete_eo/platforms/sentinel2/archives.py index 51ae48c2..9c16def4 100644 --- a/mapchete_eo/platforms/sentinel2/archives.py +++ b/mapchete_eo/platforms/sentinel2/archives.py @@ -1,3 +1,5 @@ +# TODO: deprecate this! + from __future__ import annotations from enum import Enum diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index a209ed39..7b386864 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -1,18 +1,19 @@ from __future__ import annotations from typing import List, Optional, Union, Dict, Any, Callable +import warnings from mapchete.path import MPathLike from pydantic import BaseModel, ValidationError, field_validator, model_validator from mapchete_eo.base import BaseDriverConfig from mapchete_eo.io.path import ProductPathGenerationMethod -from mapchete_eo.platforms.sentinel2.archives import ArchiveClsFromString, AWSL2ACOGv1 from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels from mapchete_eo.platforms.sentinel2.customizations import ( DataArchive, MetadataArchive, KNOWN_SOURCES, + DEPRECATED_ARCHIVES, ) from mapchete_eo.platforms.sentinel2.mapper_registry import MAPPER_REGISTRIES from mapchete_eo.platforms.sentinel2.types import ( @@ -194,7 +195,7 @@ class Sentinel2DriverConfig(BaseDriverConfig): # deprecated # for backwards compatibility, archive should be converted to # catalog & data_archive - archive: ArchiveClsFromString = AWSL2ACOGv1 + # archive: ArchiveClsFromString = AWSL2ACOGv1 # don't know yet how to handle this cat_baseurl: Optional[MPathLike] = None @@ -209,6 +210,22 @@ class Sentinel2DriverConfig(BaseDriverConfig): brdf: Optional[BRDFConfig] = None cache: Optional[CacheConfig] = None + @model_validator(mode="before") + def deprecate_archive(cls, values: Dict[str, Any]) -> Dict[str, Any]: + archive = values.get("archive") + if archive: + warnings.warn( + "'archive' will be deprecated soon. Please use 'source'.", + category=DeprecationWarning, + stacklevel=2, + ) + if values.get("source") is None: + try: + values["source"] = DEPRECATED_ARCHIVES[archive] + except KeyError: + raise + return values + @model_validator(mode="before") def to_list(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Expands source to list.""" diff --git a/mapchete_eo/platforms/sentinel2/customizations.py b/mapchete_eo/platforms/sentinel2/customizations.py index 2fcdede5..e053645a 100644 --- a/mapchete_eo/platforms/sentinel2/customizations.py +++ b/mapchete_eo/platforms/sentinel2/customizations.py @@ -1,5 +1,6 @@ from typing import Literal, Dict, Any +from mapchete.path import MPath from pystac import Item from mapchete_eo.platforms.sentinel2.mapper_registry import ( @@ -8,6 +9,7 @@ creates_s2metadata, ) from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.search.s2_mgrs import S2Tile DataArchive = Literal["AWSCOG", "AWSJP2"] @@ -22,6 +24,30 @@ "collections": ["sentinel-2-l2a"], }, } + +DEPRECATED_ARCHIVES = { + "S2AWS_COG": { + "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "collections": ["sentinel-2-l2a"], + "data_archive": "AWSCOG", + }, + "S2AWS_JP2": { + "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "collections": ["sentinel-2-l2a"], + "data_archive": "AWSJP2", + }, + "S2CDSE_AWSJP2": { + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", + "collections": ["sentinel-2-l2a"], + "data_archive": "AWSJP2", + }, + "S2CDSE_JP2": { + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", + "collections": ["sentinel-2-l2a"], + }, +} + + MetadataArchive = Literal["roda"] @@ -35,7 +61,8 @@ def earthsearch_id_mapper(item: Item) -> Item: @maps_asset_paths(from_catalogs=["EarthSearch"], to_data_archives=["AWSCOG"]) -def earthsearch_path_mapper(item: Item) -> Item: +def earthsearch_assets_paths_mapper(item: Item) -> Item: + """Nothing to do here as paths match catalog.""" return item @@ -47,3 +74,86 @@ def earthsearch_to_s2metadata(item: Item) -> S2Metadata: @maps_item_id(from_catalogs=["CDSE"]) def plain_id_mapper(item: Item) -> Item: return item + + +@maps_asset_paths(from_catalogs=["CDSE"], to_data_archives=["AWSJP2"]) +def map_cdse_paths_to_jp2_archive(item: Item) -> Item: + """ + CSDE has the following assets: + AOT_10m, AOT_20m, AOT_60m, B01_20m, B01_60m, B02_10m, B02_20m, B02_60m, B03_10m, B03_20m, + B03_60m, B04_10m, B04_20m, B04_60m, B05_20m, B05_60m, B06_20m, B06_60m, B07_20m, B07_60m, + B08_10m, B09_60m, B11_20m, B11_60m, B12_20m, B12_60m, B8A_20m, B8A_60m, Product, SCL_20m, + SCL_60m, TCI_10m, TCI_20m, TCI_60m, WVP_10m, WVP_20m, WVP_60m, thumbnail, safe_manifest, + granule_metadata, inspire_metadata, product_metadata, datastrip_metadata + + sample path for AWS JP2: + s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/R10m/ + """ + if item.datetime is None: + raise ValueError(f"product {item.get_self_href()} does not have a timestamp") + band_name_mapping = { + "AOT_10m": "aot", + "B01_20m": "coastal", + "B02_10m": "blue", + "B03_10m": "green", + "B04_10m": "red", + "B05_20m": "rededge1", + "B06_20m": "rededge2", + "B07_20m": "rededge3", + "B08_10m": "nir", + "B09_60m": "nir09", + "B11_20m": "swir16", + "B12_20m": "swir22", + "B8A_20m": "nir08", + "SCL_20m": "scl", + "TCI_10m": "visual", + "WVP_10m": "wvp", + } + path_base_scheme = "s3://sentinel-s2-l2a/tiles/{utm_zone}/{latitude_band}/{grid_square}/{year}/{month}/{day}/{count}" + s2tile = S2Tile.from_grid_code(item.properties["grid:code"]) + product_basepath = MPath( + path_base_scheme.format( + utm_zone=s2tile.utm_zone, + latitude_band=s2tile.latitude_band, + grid_square=s2tile.grid_square, + year=item.datetime.year, + month=item.datetime.month, + day=item.datetime.day, + count=0, # TODO: get count dynamically from metadata + ) + ) + new_assets = {} + for asset_name, asset in item.assets.items(): + # ignore these assets + if asset_name in [ + "Product", + "safe_manifest", + "product_metadata", + "inspire_metadata", + "datastrip_metadata", + ]: + continue + # set thumbnnail + elif asset_name == "thumbnail": + asset.href = str(product_basepath / "R60m" / "TCI.jp2") + # point to proper metadata + elif asset_name == "granule_metadata": + asset.href = str(product_basepath / "metadata.xml") + # change band asset names and point to their new locations + elif asset_name in band_name_mapping: + name, resolution = asset_name.split("_") + asset.href = product_basepath / f"R{resolution}" / f"{name}.jp2" + asset_name = band_name_mapping[asset_name] + else: + continue + new_assets[asset_name] = asset + + item.assets = new_assets + + item.properties["s2:datastrip_id"] = item.properties.get("eopf:datastrip_id") + return item + + +@creates_s2metadata(from_catalogs=["CDSE"], to_metadata_archives=["roda"]) +def cdse_s2metadata(item: Item) -> S2Metadata: + return S2Metadata.from_stac_item(item) diff --git a/mapchete_eo/platforms/sentinel2/path_mappers/__init__.py b/mapchete_eo/platforms/sentinel2/path_mappers/__init__.py index 5079aade..e3115e68 100644 --- a/mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +++ b/mapchete_eo/platforms/sentinel2/path_mappers/__init__.py @@ -1,3 +1,5 @@ +# TODO: deprecate this whole module! + from mapchete_eo.platforms.sentinel2.path_mappers.base import S2PathMapper from mapchete_eo.platforms.sentinel2.path_mappers.earthsearch import ( EarthSearchPathMapper, From 46dc14e53d0e8ed99d425610e4f5a1b02a6dfb70 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 28 Oct 2025 10:49:07 +0100 Subject: [PATCH 03/46] rename Source to Sentinel2Source --- mapchete_eo/platforms/sentinel2/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index 7b386864..dcfe6a2d 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -32,7 +32,7 @@ def known_catalog_to_url(stac_catalog: str) -> str: return stac_catalog -class Source(BaseModel): +class Sentinel2Source(BaseModel): """All information required to consume Sentinel-2 products.""" stac_catalog: str @@ -56,7 +56,7 @@ def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: return values @model_validator(mode="after") - def verify_mappers(self) -> Source: + def verify_mappers(self) -> Sentinel2Source: # make sure all required mappers are registered self.get_id_mapper() self.get_asset_paths_mapper() @@ -101,7 +101,7 @@ def get_s2metadata_mapper(self) -> Union[Callable, None]: ) -default_source = Source.model_validate(KNOWN_SOURCES["EarthSearch"]) +default_source = Sentinel2Source.model_validate(KNOWN_SOURCES["EarthSearch"]) class BRDFModelConfig(BaseModel): @@ -190,7 +190,7 @@ class Sentinel2DriverConfig(BaseDriverConfig): time: Union[TimeRange, List[TimeRange]] # new - source: List[Source] = [default_source] + source: List[Sentinel2Source] = [default_source] # deprecated # for backwards compatibility, archive should be converted to From a49a97d148513175252135324e56fc8492e0ae47 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 28 Oct 2025 15:23:12 +0100 Subject: [PATCH 04/46] replace Archive with Source instances --- mapchete_eo/base.py | 70 ++++++++++++------- mapchete_eo/platforms/sentinel2/config.py | 19 ++--- .../platforms/sentinel2/customizations.py | 2 +- mapchete_eo/platforms/sentinel2/driver.py | 39 ----------- mapchete_eo/source.py | 66 +++++++++++++++++ 5 files changed, 116 insertions(+), 80 deletions(-) create mode 100644 mapchete_eo/source.py diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index 0875ae38..ab8590c3 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -2,7 +2,7 @@ import logging from functools import cached_property -from typing import Any, Callable, List, Optional, Type, Union +from typing import Any, Callable, List, Optional, Sequence, Type, Union, Dict, Generator import croniter from mapchete import Bounds @@ -17,13 +17,13 @@ from mapchete.path import MPath from mapchete.tile import BufferedTile from mapchete.types import MPathLike, NodataVal, NodataVals -from pydantic import BaseModel +from pydantic import BaseModel, model_validator +from pystac import Item from rasterio.enums import Resampling from rasterio.features import geometry_mask from shapely.geometry import mapping from shapely.geometry.base import BaseGeometry -from mapchete_eo.archives.base import Archive from mapchete_eo.exceptions import CorruptedProductMetadata, PreprocessingNotFinished from mapchete_eo.io import ( products_to_np_array, @@ -31,9 +31,9 @@ read_levelled_cube_to_np_array, read_levelled_cube_to_xarray, ) +from mapchete_eo.source import Source from mapchete_eo.product import EOProduct from mapchete_eo.protocols import EOProductProtocol -from mapchete_eo.search.stac_static import STACStaticCatalog from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.sort import SortMethodConfig, TargetDateSort from mapchete_eo.time import to_datetime @@ -44,13 +44,23 @@ class BaseDriverConfig(BaseModel): format: str + source: Sequence[Source] time: Union[TimeRange, List[TimeRange]] cat_baseurl: Optional[str] = None cache: Optional[Any] = None footprint_buffer: float = 0 area: Optional[Union[MPathLike, dict, type[BaseGeometry]]] = None preprocessing_tasks: bool = False - archive: Optional[Type[Archive]] = None + search_kwargs: Optional[Dict[str, Any]] = None + + @model_validator(mode="before") + def to_list(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Expands source to list.""" + for field in ["source"]: + value = values.get(field) + if value is not None and not isinstance(value, list): + values[field] = [value] + return values class EODataCube(base.InputTile): @@ -401,7 +411,6 @@ class InputData(base.InputData): default_preprocessing_task: Callable = staticmethod(EOProduct.from_stac_item) driver_config_model: Type[BaseDriverConfig] = BaseDriverConfig params: BaseDriverConfig - archive: Archive time: Union[TimeRange, List[TimeRange]] area: BaseGeometry _products: Optional[IndexedFeatures] = None @@ -421,6 +430,7 @@ def __init__( self.standalone = standalone self.params = self.driver_config_model(**input_params["abstract"]) + # we have to make sure, the cache path is absolute # not quite fond of this solution if self.params.cache: @@ -429,14 +439,17 @@ def __init__( ).absolute_path(base_dir=input_params.get("conf_dir")) self.area = self._init_area(input_params) self.time = self.params.time + + self.eo_bands = [ + eo_band for source in self.params.source for eo_band in source.eo_bands() + ] + if self.readonly: # pragma: no cover return - self.set_archive(base_dir=input_params["conf_dir"]) - # don't use preprocessing tasks for Sentinel-2 products: if self.params.preprocessing_tasks or self.params.cache is not None: - for item in self.archive.items(): + for item in self.source_items(): self.add_preprocessing_task( self.default_preprocessing_task, fargs=(item,), @@ -455,7 +468,7 @@ def __init__( self.default_preprocessing_task( item, cache_config=self.params.cache, cache_all=True ) - for item in self.archive.items() + for item in self.source_items() ] ) @@ -481,20 +494,27 @@ def _init_area(self, input_params: dict) -> BaseGeometry: ) return process_area - def set_archive(self, base_dir: MPath): - # this only works with some static archive: - if self.params.cat_baseurl: - self.archive = Archive( - catalog=STACStaticCatalog( - baseurl=MPath(self.params.cat_baseurl).absolute_path( - base_dir=base_dir - ), - ), - area=self.bbox(mapchete_eo_settings.default_catalog_crs), + def source_items(self) -> Generator[Item, None, None]: + already_returned = set() + + for source in self.params.source: + for item in source.search( time=self.time, - ) - else: - raise NotImplementedError() + area=reproject_geometry( + self.area, + src_crs=self.crs, + dst_crs=source.catalog_crs, + ), + search_kwargs=self.params.search_kwargs, + ): + # if item was already found in previous source, skip + if item.id in already_returned: + continue + + # if item is new, add to list and yield + already_returned.add(item.id) + yield item + logger.debug("returned set of %s items", len(already_returned)) def bbox(self, out_crs: Optional[str] = None) -> BaseGeometry: """Return data bounding box.""" @@ -525,7 +545,7 @@ def products(self) -> IndexedFeatures: return IndexedFeatures( [ self.get_preprocessing_task_result(item.id) - for item in self.archive.items() + for item in self.source_items() if not isinstance(item, CorruptedProductMetadata) ], crs=self.crs, @@ -557,7 +577,7 @@ def open(self, tile, **kwargs) -> EODataCube: return self.input_tile_cls( tile, products=tile_products, - eo_bands=self.archive.catalog.eo_bands, + eo_bands=self.eo_bands, time=self.time, # passing on the input key is essential so dependent preprocessing tasks can be found! input_key=self.input_key, diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index dcfe6a2d..bfca48c4 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -8,6 +8,7 @@ from mapchete_eo.base import BaseDriverConfig from mapchete_eo.io.path import ProductPathGenerationMethod +from mapchete_eo.source import Source from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels from mapchete_eo.platforms.sentinel2.customizations import ( DataArchive, @@ -32,13 +33,10 @@ def known_catalog_to_url(stac_catalog: str) -> str: return stac_catalog -class Sentinel2Source(BaseModel): +class Sentinel2Source(Source): """All information required to consume Sentinel-2 products.""" - stac_catalog: str - - # if known STAC catalog is given, fill in the defaults - collections: Optional[List[str]] = None + # extends base model with those properties data_archive: Optional[DataArchive] = None metadata_archive: MetadataArchive = "roda" @@ -47,7 +45,7 @@ def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Handles short names of sources.""" if isinstance(values, str): values = dict(stac_catalog=values) - stac_catalog = values.get("stac_catalog") + stac_catalog = values.get("stac_catalog", None) if stac_catalog in KNOWN_SOURCES: values.update(KNOWN_SOURCES[stac_catalog]) else: @@ -226,15 +224,6 @@ def deprecate_archive(cls, values: Dict[str, Any]) -> Dict[str, Any]: raise return values - @model_validator(mode="before") - def to_list(cls, values: Dict[str, Any]) -> Dict[str, Any]: - """Expands source to list.""" - for field in ["source"]: - value = values.get(field) - if value is not None and not isinstance(value, list): - values[field] = [value] - return values - class MaskConfig(BaseModel): # mask by footprint geometry diff --git a/mapchete_eo/platforms/sentinel2/customizations.py b/mapchete_eo/platforms/sentinel2/customizations.py index e053645a..1b2972ad 100644 --- a/mapchete_eo/platforms/sentinel2/customizations.py +++ b/mapchete_eo/platforms/sentinel2/customizations.py @@ -32,7 +32,7 @@ "data_archive": "AWSCOG", }, "S2AWS_JP2": { - "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", "collections": ["sentinel-2-l2a"], "data_archive": "AWSJP2", }, diff --git a/mapchete_eo/platforms/sentinel2/driver.py b/mapchete_eo/platforms/sentinel2/driver.py index 646c3101..8d84c341 100644 --- a/mapchete_eo/platforms/sentinel2/driver.py +++ b/mapchete_eo/platforms/sentinel2/driver.py @@ -1,16 +1,11 @@ from typing import Optional, List, Tuple -from mapchete.geometry import reproject_geometry -from mapchete.path import MPath from mapchete.types import NodataVal from rasterio.enums import Resampling from mapchete_eo import base -from mapchete_eo.archives.base import Archive from mapchete_eo.platforms.sentinel2.config import Sentinel2DriverConfig from mapchete_eo.platforms.sentinel2.preprocessing_tasks import parse_s2_product -from mapchete_eo.search.stac_static import STACStaticCatalog -from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import MergeMethod METADATA: dict = { @@ -42,37 +37,3 @@ class InputData(base.InputData): driver_config_model = Sentinel2DriverConfig params: Sentinel2DriverConfig input_tile_cls = Sentinel2Cube - - def set_archive(self, base_dir: MPath): - if self.params.cat_baseurl: - self.archive = Archive( - catalog=STACStaticCatalog( - baseurl=MPath(self.params.cat_baseurl).absolute_path( - base_dir=base_dir - ), - ), - area=self.bbox(mapchete_eo_settings.default_catalog_crs), - time=self.time, - search_kwargs=dict(max_cloud_cover=self.params.max_cloud_cover), - ) - elif self.params.archive: - catalog_area = reproject_geometry( - self.area, - src_crs=self.crs, - dst_crs=mapchete_eo_settings.default_catalog_crs, - ) - self.archive = self.params.archive( - time=self.time, - bounds=catalog_area.bounds, - area=catalog_area, - search_kwargs=dict( - search_index=( - MPath(self.params.search_index).absolute_path(base_dir=base_dir) - if self.params.search_index - else None - ), - max_cloud_cover=self.params.max_cloud_cover, - ), - ) - else: - raise ValueError("either 'archive' or 'cat_baseurl' or both is required.") diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py new file mode 100644 index 00000000..c2fffb8d --- /dev/null +++ b/mapchete_eo/source.py @@ -0,0 +1,66 @@ +from typing import List, Literal, Optional, Generator, Union, Dict, Any, Callable + +from mapchete.path import MPath +from mapchete.types import BoundsLike, CRSLike, MPathLike +from pydantic import BaseModel, ConfigDict +from pystac import Item +from shapely.geometry.base import BaseGeometry +from shapely.errors import GEOSException + +from mapchete_eo.exceptions import ItemGeometryError +from mapchete_eo.search.base import CatalogSearcher +from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog +from mapchete_eo.settings import mapchete_eo_settings +from mapchete_eo.types import TimeRange + + +class Source(BaseModel): + """All information required to consume EO products.""" + + stac_catalog: str + collections: Optional[List[str]] = None + catalog_crs: CRSLike = mapchete_eo_settings.default_catalog_crs + catalog_type: Literal["search", "static"] = "search" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + @property + def item_modifier_funcs(self) -> List[Callable]: + return [] + + def search( + self, + time: Union[TimeRange, List[TimeRange]], + bounds: Optional[BoundsLike] = None, + area: Optional[BaseGeometry] = None, + search_kwargs: Optional[Dict[str, Any]] = None, + base_dir: Optional[MPathLike] = None, + ) -> Generator[Item, None, None]: + for item in self.get_catalog(base_dir=base_dir).search( + time=time, bounds=bounds, area=area, search_kwargs=search_kwargs + ): + yield self.apply_item_modifier_funcs(item) + + def apply_item_modifier_funcs(self, item: Item) -> Item: + try: + for modifier in self.item_modifier_funcs: + item = modifier(item) + except GEOSException as exc: + raise ItemGeometryError( + f"item {item.get_self_href()} geometry could not be resolved: {str(exc)}" + ) + return item + + def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CatalogSearcher: + match self.catalog_type: + case "search": + return STACSearchCatalog( + endpoint=self.stac_catalog, collections=self.collections + ) + case "static": + return STACStaticCatalog( + baseurl=MPath(self.stac_catalog).absolute_path(base_dir=base_dir) + ) + + def eo_bands(self) -> List[str]: + return self.get_catalog().eo_bands From fead4c611a73c7dadb04701503d724bbfa7c245b Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 29 Oct 2025 11:31:39 +0100 Subject: [PATCH 05/46] dev commit --- mapchete_eo/base.py | 1 - mapchete_eo/platforms/sentinel2/config.py | 122 ++++++------------ mapchete_eo/platforms/sentinel2/source.py | 93 +++++++++++++ .../{customizations.py => sources_mappers.py} | 0 mapchete_eo/search/config.py | 1 + mapchete_eo/search/stac_search.py | 2 +- mapchete_eo/source.py | 9 +- tests/platforms/sentinel2/test_config.py | 6 + 8 files changed, 150 insertions(+), 84 deletions(-) create mode 100644 mapchete_eo/platforms/sentinel2/source.py rename mapchete_eo/platforms/sentinel2/{customizations.py => sources_mappers.py} (100%) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index ab8590c3..8c66b4e6 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -505,7 +505,6 @@ def source_items(self) -> Generator[Item, None, None]: src_crs=self.crs, dst_crs=source.catalog_crs, ), - search_kwargs=self.params.search_kwargs, ): # if item was already found in previous source, skip if item.id in already_returned: diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index bfca48c4..dcbfdbcd 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import List, Optional, Union, Dict, Any, Callable +from typing import List, Optional, Union, Dict, Any import warnings from mapchete.path import MPathLike @@ -8,15 +8,12 @@ from mapchete_eo.base import BaseDriverConfig from mapchete_eo.io.path import ProductPathGenerationMethod -from mapchete_eo.source import Source from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels -from mapchete_eo.platforms.sentinel2.customizations import ( - DataArchive, - MetadataArchive, +from mapchete_eo.platforms.sentinel2.sources_mappers import ( KNOWN_SOURCES, DEPRECATED_ARCHIVES, ) -from mapchete_eo.platforms.sentinel2.mapper_registry import MAPPER_REGISTRIES +from mapchete_eo.platforms.sentinel2.source import Sentinel2Source from mapchete_eo.platforms.sentinel2.types import ( CloudType, ProductQIMaskResolution, @@ -27,78 +24,6 @@ from mapchete_eo.types import TimeRange -def known_catalog_to_url(stac_catalog: str) -> str: - if stac_catalog in KNOWN_SOURCES: - return KNOWN_SOURCES[stac_catalog]["stac_catalog"] - return stac_catalog - - -class Sentinel2Source(Source): - """All information required to consume Sentinel-2 products.""" - - # extends base model with those properties - data_archive: Optional[DataArchive] = None - metadata_archive: MetadataArchive = "roda" - - @model_validator(mode="before") - def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: - """Handles short names of sources.""" - if isinstance(values, str): - values = dict(stac_catalog=values) - stac_catalog = values.get("stac_catalog", None) - if stac_catalog in KNOWN_SOURCES: - values.update(KNOWN_SOURCES[stac_catalog]) - else: - # TODO: make sure catalog then is either a path or an URL - pass - return values - - @model_validator(mode="after") - def verify_mappers(self) -> Sentinel2Source: - # make sure all required mappers are registered - self.get_id_mapper() - self.get_asset_paths_mapper() - self.get_s2metadata_mapper() - return self - - def get_id_mapper(self) -> Callable: - for key in MAPPER_REGISTRIES["ID"]: - if self.stac_catalog == known_catalog_to_url(key): - return MAPPER_REGISTRIES["ID"][key] - else: - raise ValueError(f"no ID mapper for {self.stac_catalog} found") - - def get_asset_paths_mapper(self) -> Union[Callable, None]: - if self.data_archive is None: - return None - for key in MAPPER_REGISTRIES["asset paths"]: - stac_catalog, data_archive = key - if ( - self.stac_catalog == known_catalog_to_url(stac_catalog) - and data_archive == self.data_archive - ): - return MAPPER_REGISTRIES["asset paths"][key] - else: - raise ValueError( - f"no asset paths mapper from {self.stac_catalog} to {self.data_archive} found" - ) - - def get_s2metadata_mapper(self) -> Union[Callable, None]: - if self.metadata_archive is None: - return None - for key in MAPPER_REGISTRIES["S2Metadata"]: - stac_catalog, metadata_archive = key - if ( - self.stac_catalog == known_catalog_to_url(stac_catalog) - and metadata_archive == self.metadata_archive - ): - return MAPPER_REGISTRIES["S2Metadata"][key] - else: - raise ValueError( - f"no S2Metadata mapper from {self.stac_catalog} to {self.metadata_archive} found" - ) - - default_source = Sentinel2Source.model_validate(KNOWN_SOURCES["EarthSearch"]) @@ -200,7 +125,6 @@ class Sentinel2DriverConfig(BaseDriverConfig): search_index: Optional[MPathLike] = None # custom params - max_cloud_cover: float = 100.0 stac_config: StacSearchConfig = StacSearchConfig() first_granule_only: bool = False utm_zone: Optional[int] = None @@ -224,6 +148,46 @@ def deprecate_archive(cls, values: Dict[str, Any]) -> Dict[str, Any]: raise return values + @model_validator(mode="before") + def deprecate_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: + max_cloud_cover = values.get("max_cloud_cover") + if max_cloud_cover: + warnings.warn( + "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", + category=DeprecationWarning, + stacklevel=2, + ) + sources = values.get("source", []) + updated_sources = [] + for source in sources: + if source.get("query") is not None: + raise ValueError( + f"deprecated max_cloud_cover is set but also a query field is given in {source}" + ) + source["query"] = f"eo:cloud_cover<={max_cloud_cover}" + updated_sources.append(source) + values.pop("max_cloud_cover") + values["source"] = updated_sources + return values + + @model_validator(mode="before") + def deprecate_cat_baseurl(cls, values: Dict[str, Any]) -> Dict[str, Any]: + cat_baseurl = values.get("cat_baseurl") + if cat_baseurl: + warnings.warn( + "'cat_baseurl' will be deprecated soon. Please use 'catalog_type=static' in the source.", + category=DeprecationWarning, + stacklevel=2, + ) + sources = values.get("source", []) + updated_sources = [] + for source in sources: + source.update(stac_catalog=cat_baseurl, catalog_type="static") + updated_sources.append(source) + values.pop("cat_baseurl") + values["source"] = updated_sources + return values + class MaskConfig(BaseModel): # mask by footprint geometry diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py new file mode 100644 index 00000000..68e0c518 --- /dev/null +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from typing import Optional, List, Callable, Dict, Any, Union + +from pydantic import model_validator + +from mapchete_eo.source import Source +from mapchete_eo.platforms.sentinel2.sources_mappers import ( + DataArchive, + MetadataArchive, + KNOWN_SOURCES, +) +from mapchete_eo.platforms.sentinel2.mapper_registry import MAPPER_REGISTRIES + + +def known_catalog_to_url(stac_catalog: str) -> str: + if stac_catalog in KNOWN_SOURCES: + return KNOWN_SOURCES[stac_catalog]["stac_catalog"] + return stac_catalog + + +class Sentinel2Source(Source): + """All information required to consume Sentinel-2 products.""" + + # extends base model with those properties + data_archive: Optional[DataArchive] = None + metadata_archive: MetadataArchive = "roda" + + @property + def item_modifier_funcs(self) -> List[Callable]: + return [ + func + for func in (self.get_id_mapper(), self.get_asset_paths_mapper()) + if func is not None + ] + + @model_validator(mode="before") + def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Handles short names of sources.""" + if isinstance(values, str): + values = dict(stac_catalog=values) + stac_catalog = values.get("stac_catalog", None) + if stac_catalog in KNOWN_SOURCES: + values.update(KNOWN_SOURCES[stac_catalog]) + else: + # TODO: make sure catalog then is either a path or an URL + pass + return values + + @model_validator(mode="after") + def verify_mappers(self) -> Sentinel2Source: + # make sure all required mappers are registered + self.get_id_mapper() + self.get_asset_paths_mapper() + self.get_s2metadata_mapper() + return self + + def get_id_mapper(self) -> Callable: + for key in MAPPER_REGISTRIES["ID"]: + if self.stac_catalog == known_catalog_to_url(key): + return MAPPER_REGISTRIES["ID"][key] + else: + raise ValueError(f"no ID mapper for {self.stac_catalog} found") + + def get_asset_paths_mapper(self) -> Union[Callable, None]: + if self.data_archive is None: + return None + for key in MAPPER_REGISTRIES["asset paths"]: + stac_catalog, data_archive = key + if ( + self.stac_catalog == known_catalog_to_url(stac_catalog) + and data_archive == self.data_archive + ): + return MAPPER_REGISTRIES["asset paths"][key] + else: + raise ValueError( + f"no asset paths mapper from {self.stac_catalog} to {self.data_archive} found" + ) + + def get_s2metadata_mapper(self) -> Union[Callable, None]: + if self.metadata_archive is None: + return None + for key in MAPPER_REGISTRIES["S2Metadata"]: + stac_catalog, metadata_archive = key + if ( + self.stac_catalog == known_catalog_to_url(stac_catalog) + and metadata_archive == self.metadata_archive + ): + return MAPPER_REGISTRIES["S2Metadata"][key] + else: + raise ValueError( + f"no S2Metadata mapper from {self.stac_catalog} to {self.metadata_archive} found" + ) diff --git a/mapchete_eo/platforms/sentinel2/customizations.py b/mapchete_eo/platforms/sentinel2/sources_mappers.py similarity index 100% rename from mapchete_eo/platforms/sentinel2/customizations.py rename to mapchete_eo/platforms/sentinel2/sources_mappers.py diff --git a/mapchete_eo/search/config.py b/mapchete_eo/search/config.py index 39690600..dfac4443 100644 --- a/mapchete_eo/search/config.py +++ b/mapchete_eo/search/config.py @@ -6,6 +6,7 @@ class StacSearchConfig(BaseModel): max_cloud_cover: float = 100.0 + query: Optional[str] = None catalog_chunk_threshold: int = 10_000 catalog_chunk_zoom: int = 5 catalog_pagesize: int = 100 diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 8b782bf7..ad1d6755 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -183,7 +183,7 @@ def _search( search_params = dict( self.default_search_params, datetime=f"{start}/{end}", - query=[f"eo:cloud_cover<={config.max_cloud_cover}"], + query=config.query, **kwargs, ) if ( diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index c2fffb8d..6bcd97ce 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -1,4 +1,4 @@ -from typing import List, Literal, Optional, Generator, Union, Dict, Any, Callable +from typing import List, Literal, Optional, Generator, Union, Callable from mapchete.path import MPath from mapchete.types import BoundsLike, CRSLike, MPathLike @@ -21,6 +21,7 @@ class Source(BaseModel): collections: Optional[List[str]] = None catalog_crs: CRSLike = mapchete_eo_settings.default_catalog_crs catalog_type: Literal["search", "static"] = "search" + query: Optional[str] = None model_config = ConfigDict(arbitrary_types_allowed=True) @@ -33,11 +34,13 @@ def search( time: Union[TimeRange, List[TimeRange]], bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, - search_kwargs: Optional[Dict[str, Any]] = None, base_dir: Optional[MPathLike] = None, ) -> Generator[Item, None, None]: for item in self.get_catalog(base_dir=base_dir).search( - time=time, bounds=bounds, area=area, search_kwargs=search_kwargs + time=time, + bounds=bounds, + area=area, + search_kwargs=dict(query=self.query) if self.query else None, ): yield self.apply_item_modifier_funcs(item) diff --git a/tests/platforms/sentinel2/test_config.py b/tests/platforms/sentinel2/test_config.py index 662f2975..ce876b6f 100644 --- a/tests/platforms/sentinel2/test_config.py +++ b/tests/platforms/sentinel2/test_config.py @@ -13,6 +13,12 @@ dict( source=["EarthSearch"], ), + dict( + source=dict( + stac_catalog="EarthSearch", + metadata_archive="roda", + ) + ), dict( source=[ dict( From 892ef8260977d48a43ef07002ba4ab83b2acecfa Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 29 Oct 2025 11:44:39 +0100 Subject: [PATCH 06/46] fix eo_bands and static catalog --- mapchete_eo/base.py | 8 +++++--- mapchete_eo/platforms/sentinel2/config.py | 12 +++++------- mapchete_eo/platforms/sentinel2/source.py | 8 +++++--- mapchete_eo/source.py | 4 ++-- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index 8c66b4e6..a105cff3 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -430,6 +430,7 @@ def __init__( self.standalone = standalone self.params = self.driver_config_model(**input_params["abstract"]) + self.conf_dir = input_params.get("conf_dir") # we have to make sure, the cache path is absolute # not quite fond of this solution @@ -441,12 +442,13 @@ def __init__( self.time = self.params.time self.eo_bands = [ - eo_band for source in self.params.source for eo_band in source.eo_bands() + eo_band + for source in self.params.source + for eo_band in source.eo_bands(base_dir=self.conf_dir) ] if self.readonly: # pragma: no cover return - # don't use preprocessing tasks for Sentinel-2 products: if self.params.preprocessing_tasks or self.params.cache is not None: for item in self.source_items(): @@ -496,7 +498,6 @@ def _init_area(self, input_params: dict) -> BaseGeometry: def source_items(self) -> Generator[Item, None, None]: already_returned = set() - for source in self.params.source: for item in source.search( time=self.time, @@ -505,6 +506,7 @@ def source_items(self) -> Generator[Item, None, None]: src_crs=self.crs, dst_crs=source.catalog_crs, ), + base_dir=self.conf_dir, ): # if item was already found in previous source, skip if item.id in already_returned: diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index dcbfdbcd..cf63c444 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -179,13 +179,11 @@ def deprecate_cat_baseurl(cls, values: Dict[str, Any]) -> Dict[str, Any]: category=DeprecationWarning, stacklevel=2, ) - sources = values.get("source", []) - updated_sources = [] - for source in sources: - source.update(stac_catalog=cat_baseurl, catalog_type="static") - updated_sources.append(source) - values.pop("cat_baseurl") - values["source"] = updated_sources + if values.get("source", []): + raise ValueError( + "deprecated cat_baseurl field found alongside sources." + ) + values["source"] = [dict(stac_catalog=cat_baseurl, catalog_type="static")] return values diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index 68e0c518..848bcfc4 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -55,7 +55,9 @@ def verify_mappers(self) -> Sentinel2Source: self.get_s2metadata_mapper() return self - def get_id_mapper(self) -> Callable: + def get_id_mapper(self) -> Union[Callable, None]: + if self.catalog_type == "static": + return None for key in MAPPER_REGISTRIES["ID"]: if self.stac_catalog == known_catalog_to_url(key): return MAPPER_REGISTRIES["ID"][key] @@ -63,7 +65,7 @@ def get_id_mapper(self) -> Callable: raise ValueError(f"no ID mapper for {self.stac_catalog} found") def get_asset_paths_mapper(self) -> Union[Callable, None]: - if self.data_archive is None: + if self.catalog_type == "static" or self.data_archive is None: return None for key in MAPPER_REGISTRIES["asset paths"]: stac_catalog, data_archive = key @@ -78,7 +80,7 @@ def get_asset_paths_mapper(self) -> Union[Callable, None]: ) def get_s2metadata_mapper(self) -> Union[Callable, None]: - if self.metadata_archive is None: + if self.catalog_type == "static" or self.metadata_archive is None: return None for key in MAPPER_REGISTRIES["S2Metadata"]: stac_catalog, metadata_archive = key diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index 6bcd97ce..f6d35cb9 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -65,5 +65,5 @@ def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CatalogSearcher: baseurl=MPath(self.stac_catalog).absolute_path(base_dir=base_dir) ) - def eo_bands(self) -> List[str]: - return self.get_catalog().eo_bands + def eo_bands(self, base_dir: Optional[MPathLike] = None) -> List[str]: + return self.get_catalog(base_dir=base_dir).eo_bands From 27f86e17cd26c73503ca5e5136bd46bc4cfe0842 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 29 Oct 2025 11:50:10 +0100 Subject: [PATCH 07/46] deactivate obsolete test; fix eostac cat_baseurl functionality --- mapchete_eo/base.py | 17 +++++++++++++++++ mapchete_eo/platforms/sentinel2/config.py | 16 ---------------- tests/test_s2_mgrs.py | 1 + 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index a105cff3..bec34299 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings import logging from functools import cached_property from typing import Any, Callable, List, Optional, Sequence, Type, Union, Dict, Generator @@ -62,6 +63,22 @@ def to_list(cls, values: Dict[str, Any]) -> Dict[str, Any]: values[field] = [value] return values + @model_validator(mode="before") + def deprecate_cat_baseurl(cls, values: Dict[str, Any]) -> Dict[str, Any]: + cat_baseurl = values.get("cat_baseurl") + if cat_baseurl: + warnings.warn( + "'cat_baseurl' will be deprecated soon. Please use 'catalog_type=static' in the source.", + category=DeprecationWarning, + stacklevel=2, + ) + if values.get("source", []): + raise ValueError( + "deprecated cat_baseurl field found alongside sources." + ) + values["source"] = [dict(stac_catalog=cat_baseurl, catalog_type="static")] + return values + class EODataCube(base.InputTile): """Target Tile representation of input data.""" diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index cf63c444..a3b2af07 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -170,22 +170,6 @@ def deprecate_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: values["source"] = updated_sources return values - @model_validator(mode="before") - def deprecate_cat_baseurl(cls, values: Dict[str, Any]) -> Dict[str, Any]: - cat_baseurl = values.get("cat_baseurl") - if cat_baseurl: - warnings.warn( - "'cat_baseurl' will be deprecated soon. Please use 'catalog_type=static' in the source.", - category=DeprecationWarning, - stacklevel=2, - ) - if values.get("source", []): - raise ValueError( - "deprecated cat_baseurl field found alongside sources." - ) - values["source"] = [dict(stac_catalog=cat_baseurl, catalog_type="static")] - return values - class MaskConfig(BaseModel): # mask by footprint geometry diff --git a/tests/test_s2_mgrs.py b/tests/test_s2_mgrs.py index 996cf137..3c1aaead 100644 --- a/tests/test_s2_mgrs.py +++ b/tests/test_s2_mgrs.py @@ -128,6 +128,7 @@ def test_s2tile_antimeridian_footprint(tile_id, control_geom_type): assert Bounds.from_inp(s2tile.latlon_geometry).width < 10 +@pytest.mark.skip(reason="deprecated functionality") def test_sentinel2_jp2_static_catalog(sentinel2_jp2_static_catalog_mapchete): inp = sentinel2_jp2_static_catalog_mapchete.mp().config.at_zoom(13)["input"]["inp"] assert inp.archive.get_catalog_config().search_index From a54df3d997a3767ba4f6cdec49779f794a5efa8e Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 29 Oct 2025 12:24:51 +0100 Subject: [PATCH 08/46] trim the fat --- mapchete_eo/archives/__init__.py | 0 mapchete_eo/archives/base.py | 65 ------ mapchete_eo/cli/options_arguments.py | 10 +- mapchete_eo/cli/s2_cat_results.py | 3 +- mapchete_eo/cli/s2_find_broken_products.py | 3 +- mapchete_eo/cli/static_catalog.py | 7 +- mapchete_eo/known_catalogs.py | 49 ----- mapchete_eo/platforms/sentinel2/archives.py | 192 ------------------ tests/conftest.py | 29 --- tests/platforms/sentinel2/test_archives.py | 62 ------ .../platforms/sentinel2/test_cdse_archive.py | 62 ------ tests/test_catalog.py | 64 ------ tests/test_cli.py | 3 +- tests/test_known_catalogs.py | 75 ------- 14 files changed, 8 insertions(+), 616 deletions(-) delete mode 100644 mapchete_eo/archives/__init__.py delete mode 100644 mapchete_eo/archives/base.py delete mode 100644 mapchete_eo/known_catalogs.py delete mode 100644 mapchete_eo/platforms/sentinel2/archives.py delete mode 100644 tests/platforms/sentinel2/test_archives.py delete mode 100644 tests/platforms/sentinel2/test_cdse_archive.py delete mode 100644 tests/test_known_catalogs.py diff --git a/mapchete_eo/archives/__init__.py b/mapchete_eo/archives/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mapchete_eo/archives/base.py b/mapchete_eo/archives/base.py deleted file mode 100644 index 5ccc589d..00000000 --- a/mapchete_eo/archives/base.py +++ /dev/null @@ -1,65 +0,0 @@ -from abc import ABC -import logging -from typing import Any, Callable, Dict, Generator, List, Optional, Union - -from mapchete.io.vector import IndexedFeatures -from mapchete.types import Bounds -from pystac import Item -from shapely.errors import GEOSException -from shapely.geometry.base import BaseGeometry - -from mapchete_eo.exceptions import ItemGeometryError -from mapchete_eo.search.base import CatalogSearcher -from mapchete_eo.types import TimeRange - -logger = logging.getLogger(__name__) - - -class Archive(ABC): - """ - An archive combines a Catalog and a Storage. - """ - - time: Union[TimeRange, List[TimeRange]] - area: BaseGeometry - catalog: CatalogSearcher - search_kwargs: Dict[str, Any] - _items: Optional[IndexedFeatures] = None - item_modifier_funcs: Optional[List[Callable[[Item], Item]]] = None - - def __init__( - self, - time: Union[TimeRange, List[TimeRange]], - bounds: Optional[Bounds] = None, - area: Optional[BaseGeometry] = None, - search_kwargs: Optional[Dict[str, Any]] = None, - catalog: Optional[CatalogSearcher] = None, - ): - if bounds is None and area is None: - raise ValueError("either bounds or area have to be provided") - elif area is None: - area = Bounds.from_inp(bounds).geometry - self.time = time - self.area = area - self.search_kwargs = search_kwargs or {} - if catalog: - self.catalog = catalog - - def get_catalog_config(self): - return self.catalog.config_cls(**self.search_kwargs) - - def apply_item_modifier_funcs(self, item: Item) -> Item: - try: - for modifier in self.item_modifier_funcs or []: - item = modifier(item) - except GEOSException as exc: - raise ItemGeometryError( - f"item {item.get_self_href()} geometry could not be resolved: {str(exc)}" - ) - return item - - def items(self) -> Generator[Item, None, None]: - for item in self.catalog.search( - time=self.time, area=self.area, search_kwargs=self.search_kwargs - ): - yield self.apply_item_modifier_funcs(item) diff --git a/mapchete_eo/cli/options_arguments.py b/mapchete_eo/cli/options_arguments.py index badeded9..e6b9a606 100644 --- a/mapchete_eo/cli/options_arguments.py +++ b/mapchete_eo/cli/options_arguments.py @@ -6,8 +6,8 @@ from mapchete_eo.platforms.sentinel2.brdf.models import BRDFModels from mapchete_eo.io.profiles import rio_profiles -from mapchete_eo.platforms.sentinel2.archives import KnownArchives from mapchete_eo.platforms.sentinel2.config import SceneClassification +from mapchete_eo.platforms.sentinel2.sources_mappers import DEPRECATED_ARCHIVES from mapchete_eo.platforms.sentinel2.types import L2ABand, Resolution from mapchete_eo.time import to_datetime @@ -58,11 +58,6 @@ def _str_to_l2a_bands(_, __, value): return [L2ABand[v] for v in value.split(",")] -def _archive_name_to_archive_cls(_, __, value): - if value: - return KnownArchives[value] - - def _str_to_datetime(_, param, value): if value: return to_datetime(value) @@ -169,10 +164,9 @@ def _str_to_datetime(_, param, value): ) opt_archive = click.option( "--archive", - type=click.Choice([archive.name for archive in KnownArchives]), + type=click.Choice(list(DEPRECATED_ARCHIVES.keys())), default="S2AWS_COG", help="Archive to read from.", - callback=_archive_name_to_archive_cls, ) opt_collection = click.option( "--collection", diff --git a/mapchete_eo/cli/s2_cat_results.py b/mapchete_eo/cli/s2_cat_results.py index 39beef8f..ab427f0b 100644 --- a/mapchete_eo/cli/s2_cat_results.py +++ b/mapchete_eo/cli/s2_cat_results.py @@ -14,7 +14,6 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.cli.static_catalog import get_catalog from mapchete_eo.io.products import Slice, products_to_slices -from mapchete_eo.platforms.sentinel2.archives import KnownArchives from mapchete_eo.platforms.sentinel2.product import S2Product from mapchete_eo.sort import TargetDateSort from mapchete_eo.types import TimeRange @@ -46,7 +45,7 @@ def s2_cat_results( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - archive: Optional[KnownArchives] = None, + archive: Optional[str] = None, collection: Optional[str] = None, endpoint: Optional[str] = None, catalog_json: Optional[MPath] = None, diff --git a/mapchete_eo/cli/s2_find_broken_products.py b/mapchete_eo/cli/s2_find_broken_products.py index b230385b..ff8a69ac 100644 --- a/mapchete_eo/cli/s2_find_broken_products.py +++ b/mapchete_eo/cli/s2_find_broken_products.py @@ -10,7 +10,6 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.cli.s2_verify import verify_item from mapchete_eo.cli.static_catalog import get_catalog -from mapchete_eo.platforms.sentinel2.archives import KnownArchives from mapchete_eo.product import add_to_blacklist, blacklist_products from mapchete_eo.types import TimeRange @@ -32,7 +31,7 @@ def s2_find_broken_products( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - archive: Optional[KnownArchives] = None, + archive: Optional[str] = None, collection: Optional[str] = None, endpoint: Optional[str] = None, catalog_json: Optional[MPath] = None, diff --git a/mapchete_eo/cli/static_catalog.py b/mapchete_eo/cli/static_catalog.py index 36d889d3..5a801d39 100644 --- a/mapchete_eo/cli/static_catalog.py +++ b/mapchete_eo/cli/static_catalog.py @@ -9,7 +9,6 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.platforms.sentinel2 import S2Metadata -from mapchete_eo.platforms.sentinel2.archives import KnownArchives from mapchete_eo.platforms.sentinel2.types import Resolution from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog from mapchete_eo.search.base import CatalogSearcher @@ -40,7 +39,7 @@ def static_catalog( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - archive: Optional[KnownArchives] = None, + archive: Optional[str] = None, collection: Optional[str] = None, endpoint: Optional[str] = None, catalog_json: Optional[MPath] = None, @@ -102,7 +101,7 @@ def static_catalog( def get_catalog( catalog_json: Optional[MPath], endpoint: Optional[MPath], - known_archive: Optional[KnownArchives] = None, + known_archive: Optional[str] = None, collection: Optional[str] = None, ) -> CatalogSearcher: if catalog_json: @@ -118,6 +117,6 @@ def get_catalog( else: raise ValueError("collection must be provided") elif known_archive: - return known_archive.value.catalog + raise NotImplementedError() else: raise TypeError("cannot determine catalog") diff --git a/mapchete_eo/known_catalogs.py b/mapchete_eo/known_catalogs.py deleted file mode 100644 index cb9acfed..00000000 --- a/mapchete_eo/known_catalogs.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -Catalogs define access to a search interface which provide products -as pystac Items. -""" - -from typing import List - -from mapchete_eo.search import STACSearchCatalog, UTMSearchCatalog - - -class EarthSearchV1S2L2A(STACSearchCatalog): - # DEPRECATED - """Earth-Search catalog for Sentinel-2 Level 2A COGs.""" - - endpoint: str = "https://earth-search.aws.element84.com/v1/" - - -class EarthSearchV1(STACSearchCatalog): - """Earth-Search catalog V1.""" - - endpoint: str = "https://earth-search.aws.element84.com/v1/" - - -class CDSESearch(STACSearchCatalog): - """Copernicus Data Space Ecosystem (CDSE) STAC API.""" - - endpoint: str = "https://stac.dataspace.copernicus.eu/v1" - - -class PlanetaryComputerSearch(STACSearchCatalog): - """Planetary Computer Search.""" - - endpoint: str = "https://planetarycomputer.microsoft.com/api/stac/v1/" - - -class AWSSearchCatalogS2L2A(UTMSearchCatalog): - """ - Not a search endpoint, just hanging STAC collection with items separately. - Need custom parser/browser to find scenes based on date and UTM MGRS Granule - - https://sentinel-s2-l2a-stac.s3.amazonaws.com/sentinel-s2-l2a.json - """ - - id: str = "sentinel-s2-l2a" - endpoint: str = "s3://sentinel-s2-l2a-stac/" - day_subdir_schema: str = "{year}/{month:02d}/{day:02d}" - stac_json_endswith: str = "T{tile_id}.json" - description: str = "Sentinel-2 L2A JPEG2000 archive on AWS." - stac_extensions: List[str] = [] diff --git a/mapchete_eo/platforms/sentinel2/archives.py b/mapchete_eo/platforms/sentinel2/archives.py deleted file mode 100644 index 9c16def4..00000000 --- a/mapchete_eo/platforms/sentinel2/archives.py +++ /dev/null @@ -1,192 +0,0 @@ -# TODO: deprecate this! - -from __future__ import annotations - -from enum import Enum -from typing import Any, Type - -from mapchete.path import MPath -from pydantic import ValidationError -from pydantic.functional_validators import BeforeValidator -from pystac import Item -from typing_extensions import Annotated - -from mapchete_eo.archives.base import Archive -from mapchete_eo.io.items import item_fix_footprint -from mapchete_eo.known_catalogs import ( - AWSSearchCatalogS2L2A, - CDSESearch, - EarthSearchV1S2L2A, -) -from mapchete_eo.platforms.sentinel2.types import ProcessingLevel -from mapchete_eo.search.s2_mgrs import S2Tile - - -def known_archive(v: Any, **args) -> Type[Archive]: - if isinstance(v, str): - return KnownArchives[v].value - elif isinstance(v, type(Archive)): - return v - else: - raise ValidationError(f"cannot validate {v} to archive") - - -ArchiveClsFromString = Annotated[Type[Archive], BeforeValidator(known_archive)] - - -def add_datastrip_id(item: Item) -> Item: - """Make sure item metadata is following the standard.""" - # change 'sentinel2' prefix to 's2' - properties = {k.replace("sentinel2:", "s2:"): v for k, v in item.properties.items()} - - # add datastrip id as 's2:datastrip_id' - if not properties.get("s2:datastrip_id"): - from mapchete_eo.platforms.sentinel2 import S2Metadata - - s2_metadata = S2Metadata.from_stac_item(item) - properties["s2:datastrip_id"] = s2_metadata.datastrip_id - - item.properties = properties - return item - - -def map_cdse_paths_to_jp2_archive(item: Item) -> Item: - """ - CSDE has the following assets: - AOT_10m, AOT_20m, AOT_60m, B01_20m, B01_60m, B02_10m, B02_20m, B02_60m, B03_10m, B03_20m, - B03_60m, B04_10m, B04_20m, B04_60m, B05_20m, B05_60m, B06_20m, B06_60m, B07_20m, B07_60m, - B08_10m, B09_60m, B11_20m, B11_60m, B12_20m, B12_60m, B8A_20m, B8A_60m, Product, SCL_20m, - SCL_60m, TCI_10m, TCI_20m, TCI_60m, WVP_10m, WVP_20m, WVP_60m, thumbnail, safe_manifest, - granule_metadata, inspire_metadata, product_metadata, datastrip_metadata - - sample path for AWS JP2: - s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/R10m/ - """ - band_name_mapping = { - "AOT_10m": "aot", - "B01_20m": "coastal", - "B02_10m": "blue", - "B03_10m": "green", - "B04_10m": "red", - "B05_20m": "rededge1", - "B06_20m": "rededge2", - "B07_20m": "rededge3", - "B08_10m": "nir", - "B09_60m": "nir09", - "B11_20m": "swir16", - "B12_20m": "swir22", - "B8A_20m": "nir08", - "SCL_20m": "scl", - "TCI_10m": "visual", - "WVP_10m": "wvp", - } - path_base_scheme = "s3://sentinel-s2-l2a/tiles/{utm_zone}/{latitude_band}/{grid_square}/{year}/{month}/{day}/{count}" - s2tile = S2Tile.from_grid_code(item.properties["grid:code"]) - if item.datetime is None: - raise ValueError(f"product {item.get_self_href()} does not have a timestamp") - product_basepath = MPath( - path_base_scheme.format( - utm_zone=s2tile.utm_zone, - latitude_band=s2tile.latitude_band, - grid_square=s2tile.grid_square, - year=item.datetime.year, - month=item.datetime.month, - day=item.datetime.day, - count=0, # TODO: get count dynamically from metadata - ) - ) - new_assets = {} - for asset_name, asset in item.assets.items(): - # ignore these assets - if asset_name in [ - "Product", - "safe_manifest", - "product_metadata", - "inspire_metadata", - "datastrip_metadata", - ]: - continue - # set thumbnnail - elif asset_name == "thumbnail": - asset.href = str(product_basepath / "R60m" / "TCI.jp2") - # point to proper metadata - elif asset_name == "granule_metadata": - asset.href = str(product_basepath / "metadata.xml") - # change band asset names and point to their new locations - elif asset_name in band_name_mapping: - name, resolution = asset_name.split("_") - asset.href = product_basepath / f"R{resolution}" / f"{name}.jp2" - asset_name = band_name_mapping[asset_name] - else: - continue - new_assets[asset_name] = asset - - item.assets = new_assets - - item.properties["s2:datastrip_id"] = item.properties.get("eopf:datastrip_id") - return item - - -class AWSL2ACOGv1(Archive): - """COG archive on AWS using E84 STAC search endpoint.""" - - catalog = EarthSearchV1S2L2A( - collections=["sentinel-2-l2a"], - ) - item_modifier_funcs = [ - item_fix_footprint, - ] - processing_level = ProcessingLevel.level2a - - -class AWSL2AJP2(Archive): - """ - JP2000 archive on AWS using dumb S3 path guesser. - """ - - catalog = AWSSearchCatalogS2L2A( - collections=["sentinel-s2-l2a"], - ) - item_modifier_funcs = [ - item_fix_footprint, - add_datastrip_id, - ] - processing_level = ProcessingLevel.level2a - - -class AWSL2AJP2CSDE(Archive): - """ - JP2000 archive on AWS using CDSE STAC search endpoint. - """ - - catalog = CDSESearch( - collections=["sentinel-2-l2a"], - ) - item_modifier_funcs = [ - item_fix_footprint, - map_cdse_paths_to_jp2_archive, - add_datastrip_id, - ] - processing_level = ProcessingLevel.level2a - - -class CDSEL2AJP2CSDE(Archive): - """ - JP2000 archive on CDSE (EODATA s3) using CDSE STAC search endpoint. - """ - - catalog = CDSESearch( - collections=["sentinel-2-l2a"], - ) - item_modifier_funcs = [ - item_fix_footprint, - add_datastrip_id, - ] - processing_level = ProcessingLevel.level2a - - -class KnownArchives(Enum): - S2AWS_COG = AWSL2ACOGv1 - S2AWS_JP2 = AWSL2AJP2 - S2CDSE_AWSJP2 = AWSL2AJP2CSDE - S2CDSE_JP2 = CDSEL2AJP2CSDE diff --git a/tests/conftest.py b/tests/conftest.py index dc619b62..5c603115 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,6 @@ from shapely import wkt from shapely.geometry import base -from mapchete_eo.known_catalogs import AWSSearchCatalogS2L2A, EarthSearchV1S2L2A from mapchete_eo.platforms.sentinel2 import S2Metadata from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog from mapchete_eo.types import TimeRange @@ -342,34 +341,6 @@ def stac_search_catalog(): ) -@pytest.mark.remote -@pytest.fixture(scope="session") -def e84_cog_catalog(): - return EarthSearchV1S2L2A( - collections=["sentinel-2-l2a"], - ) - - -@pytest.mark.remote -@pytest.fixture -def utm_search_catalog(): - return AWSSearchCatalogS2L2A( - collections=["sentinel-s2-l2a"], - ) - - -@pytest.fixture(scope="session") -def e84_cog_catalog_short(): - return EarthSearchV1S2L2A( - time=TimeRange( - start="2022-06-01", - end="2022-06-03", - ), - bounds=[16, 46.4, 16.1, 46.5], - collections=["sentinel-2-l2a"], - ) - - @pytest.fixture(scope="session") def static_catalog_small(s2_stac_collection): return STACStaticCatalog( diff --git a/tests/platforms/sentinel2/test_archives.py b/tests/platforms/sentinel2/test_archives.py deleted file mode 100644 index fdc06e98..00000000 --- a/tests/platforms/sentinel2/test_archives.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest -from mapchete.tile import BufferedTilePyramid -from shapely.geometry import shape -from shapely.ops import unary_union - -from mapchete_eo.io.path import asset_mpath -from mapchete_eo.platforms.sentinel2.archives import ( - AWSL2AJP2, - AWSL2ACOGv1, - AWSL2AJP2CSDE, -) -from mapchete_eo.platforms.sentinel2.product import S2Product -from mapchete_eo.types import TimeRange - - -@pytest.mark.remote -@pytest.mark.parametrize( - "archive_cls", - [AWSL2ACOGv1, AWSL2AJP2, AWSL2AJP2CSDE], -) -def test_s2_archives(archive_cls): - time = TimeRange(start="2022-06-06", end="2022-06-06") - bounds = [16, 46, 17, 47] - archive = archive_cls(time=time, bounds=bounds) - assert len(list(archive.items())) - - -@pytest.mark.remote -@pytest.mark.parametrize( - "archive_cls", - [AWSL2ACOGv1, AWSL2AJP2, AWSL2AJP2CSDE], -) -def test_s2_archives_assets(archive_cls): - assets = ["red", "green", "blue", "coastal", "nir"] - time = TimeRange(start="2022-06-06", end="2022-06-06") - bounds = [16, 46, 17, 47] - archive = archive_cls(time=time, bounds=bounds) - for item in archive.items(): - product = S2Product.from_stac_item(item) - for band_location in product.eo_bands_to_band_location(assets): - assert asset_mpath(item, band_location.asset_name).exists() - - -@pytest.mark.remote -@pytest.mark.parametrize( - "archive_cls", - [AWSL2ACOGv1, AWSL2AJP2, AWSL2AJP2CSDE], -) -def test_s2_archives_multipolygon_search(archive_cls): - pyramid = BufferedTilePyramid("geodetic") - time = TimeRange(start="2022-06-06", end="2022-06-06") - area = unary_union( - [pyramid.tile_from_xy(16, 46, 13).bbox, pyramid.tile_from_xy(17, 47, 13).bbox] - ) - archive = archive_cls( - time=time, - area=area, - ) - items = list(archive.items()) - assert items - for item in items: - assert shape(item.geometry).intersects(area) diff --git a/tests/platforms/sentinel2/test_cdse_archive.py b/tests/platforms/sentinel2/test_cdse_archive.py deleted file mode 100644 index 7c39acf0..00000000 --- a/tests/platforms/sentinel2/test_cdse_archive.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest -from mapchete.tile import BufferedTilePyramid -from shapely.geometry import shape -from shapely.ops import unary_union - -from mapchete_eo.io.path import asset_mpath -from mapchete_eo.platforms.sentinel2.archives import CDSEL2AJP2CSDE - -from mapchete_eo.platforms.sentinel2.product import S2Product -from mapchete_eo.types import TimeRange - - -@pytest.mark.remote -@pytest.mark.use_cdse_test_env -@pytest.mark.parametrize( - "archive_cls", - [CDSEL2AJP2CSDE], -) -def test_s2_archives(archive_cls): - time = TimeRange(start="2022-06-06", end="2022-06-06") - bounds = [16, 46, 17, 47] - archive = archive_cls(time=time, bounds=bounds) - assert len(list(archive.items())) - - -@pytest.mark.remote -@pytest.mark.use_cdse_test_env -@pytest.mark.parametrize( - "archive_cls", - [CDSEL2AJP2CSDE], -) -def test_s2_archives_assets(archive_cls): - assets = ["red", "green", "blue", "coastal", "nir"] - time = TimeRange(start="2022-06-06", end="2022-06-06") - bounds = [16, 46, 17, 47] - archive = archive_cls(time=time, bounds=bounds) - for item in archive.items(): - product = S2Product.from_stac_item(item) - for band_location in product.eo_bands_to_band_location(assets): - assert asset_mpath(item, band_location.asset_name).exists() - - -@pytest.mark.remote -@pytest.mark.use_cdse_test_env -@pytest.mark.parametrize( - "archive_cls", - [CDSEL2AJP2CSDE], -) -def test_s2_archives_multipolygon_search(archive_cls): - pyramid = BufferedTilePyramid("geodetic") - time = TimeRange(start="2022-06-06", end="2022-06-06") - area = unary_union( - [pyramid.tile_from_xy(16, 46, 13).bbox, pyramid.tile_from_xy(17, 47, 13).bbox] - ) - archive = archive_cls( - time=time, - area=area, - ) - items = list(archive.items()) - assert items - for item in items: - assert shape(item.geometry).intersects(area) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 7ecb7708..ca1b3cfc 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -5,7 +5,6 @@ from mapchete.path import MPath from shapely import box -from mapchete_eo.known_catalogs import EarthSearchV1S2L2A, AWSSearchCatalogS2L2A from mapchete_eo.platforms.sentinel2 import S2Metadata from mapchete_eo.platforms.sentinel2.types import Resolution from mapchete_eo.search import STACStaticCatalog @@ -145,66 +144,3 @@ def test_static_catalog_cloud_percent(s2_stac_collection): ) ) assert len(all_products) > len(filtered_products) - - -def test_earthsearch_catalog_cloud_percent(): - all_products = list( - EarthSearchV1S2L2A( - collections=["sentinel-2-l2a"], - ).search( - time=TimeRange(start="2022-04-01", end="2022-04-03"), - bounds=[16.3916015625, 48.69140625, 16.41357421875, 48.71337890625], - ) - ) - filtered_products = list( - EarthSearchV1S2L2A( - collections=["sentinel-2-l2a"], - ).search( - time=TimeRange(start="2022-04-01", end="2022-04-03"), - bounds=[16.3916015625, 48.69140625, 16.41357421875, 48.71337890625], - search_kwargs=dict(max_cloud_cover=20), - ) - ) - assert len(all_products) > len(filtered_products) - - -def test_earthsearch_catalog_chunked_search(): - all_products = list( - EarthSearchV1S2L2A( - collections=["sentinel-2-l2a"], - ).search( - time=TimeRange(start="2022-04-01", end="2022-04-03"), - bounds=[16.3916015625, 48.69140625, 16.41357421875, 48.71337890625], - ) - ) - chunked_products = list( - EarthSearchV1S2L2A( - collections=["sentinel-2-l2a"], - ).search( - time=TimeRange(start="2022-04-01", end="2022-04-03"), - bounds=[16.3916015625, 48.69140625, 16.41357421875, 48.71337890625], - search_kwargs=dict(catalog_chunk_threshold=2), - ) - ) - assert len(all_products) == len(chunked_products) - - -def test_awssearch_catalog_cloud_percent(): - all_products = list( - AWSSearchCatalogS2L2A( - collections=["sentinel-s2-l2a"], - ).search( - time=TimeRange(start="2022-04-01", end="2022-04-03"), - bounds=[16.3916015625, 48.69140625, 16.41357421875, 48.71337890625], - ) - ) - filtered_products = list( - AWSSearchCatalogS2L2A( - collections=["sentinel-s2-l2a"], - ).search( - time=TimeRange(start="2022-04-01", end="2022-04-03"), - bounds=[16.3916015625, 48.69140625, 16.41357421875, 48.71337890625], - search_kwargs=dict(max_cloud_cover=20), - ) - ) - assert len(all_products) > len(filtered_products) diff --git a/tests/test_cli.py b/tests/test_cli.py index 11ffb21d..76c205e4 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,7 +4,6 @@ from pytest_lazyfixture import lazy_fixture from mapchete_eo.cli import eo -from mapchete_eo.known_catalogs import EarthSearchV1S2L2A def test_s2_mask(s2_stac_json_half_footprint, tmp_mpath): @@ -77,7 +76,7 @@ def test_s2_brdf(s2_stac_json_half_footprint, tmp_mpath): [ ("--catalog-json", lazy_fixture("s2_stac_collection"), None), ("--archive", "S2AWS_COG", None), - ("--endpoint", EarthSearchV1S2L2A.endpoint, "sentinel-2-l2a"), + ("--endpoint", "https://earth-search.aws.element84.com/v1/", "sentinel-2-l2a"), ], ) def test_static_catalog(tmp_mpath, flag, value, collection): diff --git a/tests/test_known_catalogs.py b/tests/test_known_catalogs.py deleted file mode 100644 index 709df55d..00000000 --- a/tests/test_known_catalogs.py +++ /dev/null @@ -1,75 +0,0 @@ -import pytest - -from mapchete_eo.known_catalogs import ( - AWSSearchCatalogS2L2A, - CDSESearch, - EarthSearchV1S2L2A, - PlanetaryComputerSearch, -) -from mapchete_eo.types import TimeRange - - -@pytest.mark.remote -def test_e84_cog_catalog_search_items(e84_cog_catalog): - assert ( - len( - list( - e84_cog_catalog.search( - time=TimeRange( - start="2022-06-01", - end="2022-06-06", - ), - bounds=[16, 46, 17, 47], - ) - ) - ) - > 0 - ) - - -@pytest.mark.remote -def test_e84_cog_catalog_eo_bands(e84_cog_catalog): - assert len(e84_cog_catalog.eo_bands) > 0 - - -@pytest.mark.skip(reason="This test is flaky.") -@pytest.mark.remote -def test_utm_search_catalog_search_items(utm_search_catalog): - assert ( - len( - list( - utm_search_catalog.search( - time=TimeRange( - start="2022-06-05", - end="2022-06-05", - ), - bounds=[-180, 65, -179, 65.3], - ) - ) - ) - > 0 - ) - - -@pytest.mark.remote -@pytest.mark.parametrize( - "catalog_cls,collection_name", - [ - (EarthSearchV1S2L2A, "sentinel-2-l2a"), - (CDSESearch, "sentinel-2-l2a"), - (AWSSearchCatalogS2L2A, "sentinel-s2-l2a"), - (PlanetaryComputerSearch, "sentinel-2-l2a"), - ], -) -def test_known_catalogs(catalog_cls, collection_name): - catalog = catalog_cls( - collections=[collection_name], - ) - items = catalog.search( - time=TimeRange( - start="2022-06-05", - end="2022-06-05", - ), - bounds=[-180, 65, -179, 65.3], - ) - assert items From 2a0f0c6355ca7bf210d3fb44423fc8309afba8c4 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 29 Oct 2025 12:49:37 +0100 Subject: [PATCH 09/46] added first test for known sources --- .../platforms/sentinel2/sources_mappers.py | 5 ++++- tests/platforms/sentinel2/test_sources.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/platforms/sentinel2/test_sources.py diff --git a/mapchete_eo/platforms/sentinel2/sources_mappers.py b/mapchete_eo/platforms/sentinel2/sources_mappers.py index 1b2972ad..e3b9f990 100644 --- a/mapchete_eo/platforms/sentinel2/sources_mappers.py +++ b/mapchete_eo/platforms/sentinel2/sources_mappers.py @@ -15,9 +15,12 @@ DataArchive = Literal["AWSCOG", "AWSJP2"] KNOWN_SOURCES: Dict[str, Any] = { "EarthSearch": { + "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "collections": ["sentinel-2-c1-l2a"], + }, + "EarthSearch_legacy": { "stac_catalog": "https://earth-search.aws.element84.com/v1/", "collections": ["sentinel-2-l2a"], - "data_archive": "AWSCOG", }, "CDSE": { "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", diff --git a/tests/platforms/sentinel2/test_sources.py b/tests/platforms/sentinel2/test_sources.py new file mode 100644 index 00000000..97087182 --- /dev/null +++ b/tests/platforms/sentinel2/test_sources.py @@ -0,0 +1,18 @@ +import pytest + +from mapchete_eo.types import TimeRange +from mapchete_eo.platforms.sentinel2.sources_mappers import KNOWN_SOURCES +from mapchete_eo.platforms.sentinel2.source import Sentinel2Source + + +@pytest.mark.parametrize("source_id", list(KNOWN_SOURCES.keys())) +def test_known_source(source_id): + source = Sentinel2Source(stac_catalog=source_id) + assert source + for item in source.search( + time=TimeRange(start="2025-01-01", end="2025-01-10"), bounds=[16, 46, 17, 47] + ): + assert item + break + else: + raise ValueError("no products found!") From 908b774bbc390b218ee60299d86bba89f04b0daf Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 30 Oct 2025 10:09:52 +0100 Subject: [PATCH 10/46] extend source tests --- .../platforms/sentinel2/mapper_registry.py | 25 +++++--- mapchete_eo/platforms/sentinel2/source.py | 34 +++++++---- .../platforms/sentinel2/sources_mappers.py | 60 ++++++++++++------- tests/platforms/sentinel2/test_sources.py | 26 +++++++- 4 files changed, 99 insertions(+), 46 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/mapper_registry.py b/mapchete_eo/platforms/sentinel2/mapper_registry.py index dff01c36..f1569807 100644 --- a/mapchete_eo/platforms/sentinel2/mapper_registry.py +++ b/mapchete_eo/platforms/sentinel2/mapper_registry.py @@ -1,15 +1,15 @@ -from typing import List, Callable, Dict, Any +from typing import List, Callable, Dict, Any, Optional # decorators for mapper functions using the registry pattern # ############################################################## ID_MAPPER_REGISTRY: Dict[Any, Callable] = {} -ASSET_PATHS_MAPPER_REGISTRY: Dict[Any, Callable] = {} +STAC_METADATA_MAPPER_REGISTRY: Dict[Any, Callable] = {} S2METADATA_MAPPER_REGISTRY: Dict[Any, Callable] = {} MAPPER_REGISTRIES = { "ID": ID_MAPPER_REGISTRY, - "asset paths": ASSET_PATHS_MAPPER_REGISTRY, + "STAC metadata": STAC_METADATA_MAPPER_REGISTRY, "S2Metadata": S2METADATA_MAPPER_REGISTRY, } @@ -35,18 +35,27 @@ def decorator(func): return decorator -def maps_asset_paths(from_catalogs: List[str], to_data_archives: List[str]): +def maps_stac_metadata( + from_catalogs: List[str], to_data_archives: Optional[List[str]] = None +): """ - Decorator registering asset path mapper. + Decorator registering STAC metadata mapper. """ def decorator(func): # Use a tuple of the metadata as the key for catalog in from_catalogs: - for data_archive in to_data_archives: + if to_data_archives: + for data_archive in to_data_archives: + _register_func( + registry=STAC_METADATA_MAPPER_REGISTRY, + key=(catalog, data_archive), + func=func, + ) + else: _register_func( - registry=ASSET_PATHS_MAPPER_REGISTRY, - key=(catalog, data_archive), + registry=STAC_METADATA_MAPPER_REGISTRY, + key=catalog, func=func, ) return func diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index 848bcfc4..4f09a0a8 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -30,7 +30,7 @@ class Sentinel2Source(Source): def item_modifier_funcs(self) -> List[Callable]: return [ func - for func in (self.get_id_mapper(), self.get_asset_paths_mapper()) + for func in (self.get_id_mapper(), self.get_stac_metadata_mapper()) if func is not None ] @@ -51,7 +51,7 @@ def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: def verify_mappers(self) -> Sentinel2Source: # make sure all required mappers are registered self.get_id_mapper() - self.get_asset_paths_mapper() + self.get_stac_metadata_mapper() self.get_s2metadata_mapper() return self @@ -64,19 +64,29 @@ def get_id_mapper(self) -> Union[Callable, None]: else: raise ValueError(f"no ID mapper for {self.stac_catalog} found") - def get_asset_paths_mapper(self) -> Union[Callable, None]: - if self.catalog_type == "static" or self.data_archive is None: + def get_stac_metadata_mapper(self) -> Union[Callable, None]: + """Find mapper function. + + A mapper function must be provided if a custom data_archive was configured. + """ + if self.catalog_type == "static": return None - for key in MAPPER_REGISTRIES["asset paths"]: - stac_catalog, data_archive = key - if ( - self.stac_catalog == known_catalog_to_url(stac_catalog) - and data_archive == self.data_archive - ): - return MAPPER_REGISTRIES["asset paths"][key] + for key in MAPPER_REGISTRIES["STAC metadata"]: + if isinstance(key, tuple): + stac_catalog, data_archive = key + if ( + self.stac_catalog == known_catalog_to_url(stac_catalog) + and data_archive == self.data_archive + ): + return MAPPER_REGISTRIES["STAC metadata"][key] + else: + if self.stac_catalog == known_catalog_to_url(key): + return MAPPER_REGISTRIES["STAC metadata"][key] else: + if self.data_archive is None: + return None raise ValueError( - f"no asset paths mapper from {self.stac_catalog} to {self.data_archive} found" + f"no STAC metadata mapper from {self.stac_catalog} to {self.data_archive} found" ) def get_s2metadata_mapper(self) -> Union[Callable, None]: diff --git a/mapchete_eo/platforms/sentinel2/sources_mappers.py b/mapchete_eo/platforms/sentinel2/sources_mappers.py index e3b9f990..a8d5de44 100644 --- a/mapchete_eo/platforms/sentinel2/sources_mappers.py +++ b/mapchete_eo/platforms/sentinel2/sources_mappers.py @@ -5,7 +5,7 @@ from mapchete_eo.platforms.sentinel2.mapper_registry import ( maps_item_id, - maps_asset_paths, + maps_stac_metadata, creates_s2metadata, ) from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata @@ -63,7 +63,7 @@ def earthsearch_id_mapper(item: Item) -> Item: return item -@maps_asset_paths(from_catalogs=["EarthSearch"], to_data_archives=["AWSCOG"]) +@maps_stac_metadata(from_catalogs=["EarthSearch"], to_data_archives=["AWSCOG"]) def earthsearch_assets_paths_mapper(item: Item) -> Item: """Nothing to do here as paths match catalog.""" return item @@ -79,7 +79,39 @@ def plain_id_mapper(item: Item) -> Item: return item -@maps_asset_paths(from_catalogs=["CDSE"], to_data_archives=["AWSJP2"]) +CDSE_ASSET_NAME_MAPPING = { + "AOT_10m": "aot", + "B01_20m": "coastal", + "B02_10m": "blue", + "B03_10m": "green", + "B04_10m": "red", + "B05_20m": "rededge1", + "B06_20m": "rededge2", + "B07_20m": "rededge3", + "B08_10m": "nir", + "B09_60m": "nir09", + "B11_20m": "swir16", + "B12_20m": "swir22", + "B8A_20m": "nir08", + "SCL_20m": "scl", + "TCI_10m": "visual", + "WVP_10m": "wvp", +} + + +@maps_stac_metadata(from_catalogs=["CDSE"]) +def cdse_asset_names(item: Item) -> Item: + new_assets = {} + for asset_name, asset in item.assets.items(): + if asset_name in CDSE_ASSET_NAME_MAPPING: + asset_name = CDSE_ASSET_NAME_MAPPING[asset_name] + new_assets[asset_name] = asset + + item.assets = new_assets + return item + + +@maps_stac_metadata(from_catalogs=["CDSE"], to_data_archives=["AWSJP2"]) def map_cdse_paths_to_jp2_archive(item: Item) -> Item: """ CSDE has the following assets: @@ -94,24 +126,6 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: """ if item.datetime is None: raise ValueError(f"product {item.get_self_href()} does not have a timestamp") - band_name_mapping = { - "AOT_10m": "aot", - "B01_20m": "coastal", - "B02_10m": "blue", - "B03_10m": "green", - "B04_10m": "red", - "B05_20m": "rededge1", - "B06_20m": "rededge2", - "B07_20m": "rededge3", - "B08_10m": "nir", - "B09_60m": "nir09", - "B11_20m": "swir16", - "B12_20m": "swir22", - "B8A_20m": "nir08", - "SCL_20m": "scl", - "TCI_10m": "visual", - "WVP_10m": "wvp", - } path_base_scheme = "s3://sentinel-s2-l2a/tiles/{utm_zone}/{latitude_band}/{grid_square}/{year}/{month}/{day}/{count}" s2tile = S2Tile.from_grid_code(item.properties["grid:code"]) product_basepath = MPath( @@ -143,10 +157,10 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: elif asset_name == "granule_metadata": asset.href = str(product_basepath / "metadata.xml") # change band asset names and point to their new locations - elif asset_name in band_name_mapping: + elif asset_name in CDSE_ASSET_NAME_MAPPING: name, resolution = asset_name.split("_") asset.href = product_basepath / f"R{resolution}" / f"{name}.jp2" - asset_name = band_name_mapping[asset_name] + asset_name = CDSE_ASSET_NAME_MAPPING[asset_name] else: continue new_assets[asset_name] = asset diff --git a/tests/platforms/sentinel2/test_sources.py b/tests/platforms/sentinel2/test_sources.py index 97087182..1183f097 100644 --- a/tests/platforms/sentinel2/test_sources.py +++ b/tests/platforms/sentinel2/test_sources.py @@ -1,18 +1,38 @@ import pytest +from mapchete_eo.io.path import asset_mpath from mapchete_eo.types import TimeRange -from mapchete_eo.platforms.sentinel2.sources_mappers import KNOWN_SOURCES from mapchete_eo.platforms.sentinel2.source import Sentinel2Source -@pytest.mark.parametrize("source_id", list(KNOWN_SOURCES.keys())) -def test_known_source(source_id): +@pytest.mark.remote +@pytest.mark.parametrize("source_id", ["EarthSearch", "EarthSearch_legacy"]) +def test_known_sources(source_id): source = Sentinel2Source(stac_catalog=source_id) assert source for item in source.search( time=TimeRange(start="2025-01-01", end="2025-01-10"), bounds=[16, 46, 17, 47] ): assert item + + # assert asset paths exist + for asset in ["red", "green", "blue", "nir"]: + assert asset_mpath(item, asset).exists() + + # assert S2Metadata object can be created and QI bands are there + s2metadata = source.get_s2metadata_mapper()(item) + assert s2metadata.datastrip_id + for asset in s2metadata.assets.values(): + assert asset.exists() + + # we only need the first item to be checked break else: raise ValueError("no products found!") + + +@pytest.mark.remote +@pytest.mark.use_cdse_test_env +@pytest.mark.parametrize("source_id", ["CSDE"]) +def test_known_sources_cdse(source_id): + test_known_sources(source_id) From f0a818c356b1ea26af3dd0d9f92fcadf0c91f49b Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 30 Oct 2025 15:32:12 +0100 Subject: [PATCH 11/46] restructured modules --- mapchete_eo/cli/options_arguments.py | 4 +- mapchete_eo/cli/s2_brdf.py | 2 +- mapchete_eo/platforms/sentinel2/__init__.py | 2 +- ...mapper_registry.py => _mapper_registry.py} | 0 .../platforms/sentinel2/brdf/correction.py | 2 +- mapchete_eo/platforms/sentinel2/brdf/hls.py | 2 +- .../platforms/sentinel2/brdf/models.py | 2 +- .../platforms/sentinel2/brdf/protocols.py | 2 +- .../platforms/sentinel2/brdf/ross_thick.py | 2 +- .../sentinel2/brdf/sun_angle_arrays.py | 2 +- mapchete_eo/platforms/sentinel2/config.py | 2 +- .../sentinel2/metadata_parser/__init__.py | 6 ++ .../{path_mappers => metadata_parser}/base.py | 2 +- .../default_path_mapper.py} | 4 +- .../sentinel2/metadata_parser/models.py | 78 ++++++++++++++++ .../s2metadata.py} | 88 +++---------------- .../sentinel2/path_mappers/__init__.py | 31 ------- .../sentinel2/path_mappers/earthsearch.py | 34 ------- .../preconfigured_sources/__init__.py | 50 +++++++++++ .../item_mappers.py} | 86 ++++++++---------- .../metadata_xml_mappers.py} | 34 ++++++- mapchete_eo/platforms/sentinel2/product.py | 2 +- mapchete_eo/platforms/sentinel2/source.py | 4 +- tests/platforms/sentinel2/test_mappers.py | 19 ++++ .../sentinel2/test_metadata_parser.py | 9 +- tests/test_io_assets.py | 2 +- 26 files changed, 263 insertions(+), 208 deletions(-) rename mapchete_eo/platforms/sentinel2/{mapper_registry.py => _mapper_registry.py} (100%) create mode 100644 mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py rename mapchete_eo/platforms/sentinel2/{path_mappers => metadata_parser}/base.py (97%) rename mapchete_eo/platforms/sentinel2/{path_mappers/metadata_xml.py => metadata_parser/default_path_mapper.py} (97%) create mode 100644 mapchete_eo/platforms/sentinel2/metadata_parser/models.py rename mapchete_eo/platforms/sentinel2/{metadata_parser.py => metadata_parser/s2metadata.py} (90%) delete mode 100644 mapchete_eo/platforms/sentinel2/path_mappers/__init__.py delete mode 100644 mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py create mode 100644 mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py rename mapchete_eo/platforms/sentinel2/{sources_mappers.py => preconfigured_sources/item_mappers.py} (73%) rename mapchete_eo/platforms/sentinel2/{path_mappers/sinergise.py => preconfigured_sources/metadata_xml_mappers.py} (75%) create mode 100644 tests/platforms/sentinel2/test_mappers.py diff --git a/mapchete_eo/cli/options_arguments.py b/mapchete_eo/cli/options_arguments.py index e6b9a606..e9ede7c8 100644 --- a/mapchete_eo/cli/options_arguments.py +++ b/mapchete_eo/cli/options_arguments.py @@ -7,7 +7,9 @@ from mapchete_eo.platforms.sentinel2.brdf.models import BRDFModels from mapchete_eo.io.profiles import rio_profiles from mapchete_eo.platforms.sentinel2.config import SceneClassification -from mapchete_eo.platforms.sentinel2.sources_mappers import DEPRECATED_ARCHIVES +from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( + DEPRECATED_ARCHIVES, +) from mapchete_eo.platforms.sentinel2.types import L2ABand, Resolution from mapchete_eo.time import to_datetime diff --git a/mapchete_eo/cli/s2_brdf.py b/mapchete_eo/cli/s2_brdf.py index a10dc730..02f68d29 100644 --- a/mapchete_eo/cli/s2_brdf.py +++ b/mapchete_eo/cli/s2_brdf.py @@ -11,7 +11,7 @@ from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels from mapchete_eo.platforms.sentinel2.config import BRDFConfig from mapchete_eo.platforms.sentinel2.product import S2Product -from mapchete_eo.platforms.sentinel2.metadata_parser import Resolution +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import Resolution from mapchete_eo.platforms.sentinel2.types import L2ABand diff --git a/mapchete_eo/platforms/sentinel2/__init__.py b/mapchete_eo/platforms/sentinel2/__init__.py index a28ee761..b9dc3b80 100644 --- a/mapchete_eo/platforms/sentinel2/__init__.py +++ b/mapchete_eo/platforms/sentinel2/__init__.py @@ -4,7 +4,7 @@ Sentinel2Cube, Sentinel2CubeGroup, ) -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.platforms.sentinel2.product import S2Product __all__ = [ diff --git a/mapchete_eo/platforms/sentinel2/mapper_registry.py b/mapchete_eo/platforms/sentinel2/_mapper_registry.py similarity index 100% rename from mapchete_eo/platforms/sentinel2/mapper_registry.py rename to mapchete_eo/platforms/sentinel2/_mapper_registry.py diff --git a/mapchete_eo/platforms/sentinel2/brdf/correction.py b/mapchete_eo/platforms/sentinel2/brdf/correction.py index f5d3d569..834a18cb 100644 --- a/mapchete_eo/platforms/sentinel2/brdf/correction.py +++ b/mapchete_eo/platforms/sentinel2/brdf/correction.py @@ -13,7 +13,7 @@ from mapchete_eo.exceptions import BRDFError from mapchete_eo.platforms.sentinel2.brdf.models import BRDFModels, get_model -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.platforms.sentinel2.types import ( L2ABand, Resolution, diff --git a/mapchete_eo/platforms/sentinel2/brdf/hls.py b/mapchete_eo/platforms/sentinel2/brdf/hls.py index 3d8a9284..537d3d96 100644 --- a/mapchete_eo/platforms/sentinel2/brdf/hls.py +++ b/mapchete_eo/platforms/sentinel2/brdf/hls.py @@ -16,7 +16,7 @@ ) from mapchete_eo.platforms.sentinel2.brdf.config import L2ABandFParams, ModelParameters from mapchete_eo.platforms.sentinel2.brdf.sun_angle_arrays import get_sun_zenith_angles -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.platforms.sentinel2.types import L2ABand diff --git a/mapchete_eo/platforms/sentinel2/brdf/models.py b/mapchete_eo/platforms/sentinel2/brdf/models.py index 452c6559..0404dd4f 100644 --- a/mapchete_eo/platforms/sentinel2/brdf/models.py +++ b/mapchete_eo/platforms/sentinel2/brdf/models.py @@ -12,7 +12,7 @@ from mapchete_eo.platforms.sentinel2.brdf.ross_thick import RossThick # from mapchete_eo.platforms.sentinel2.brdf.hls2 import HLS2 -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.platforms.sentinel2.types import L2ABand logger = logging.getLogger(__name__) diff --git a/mapchete_eo/platforms/sentinel2/brdf/protocols.py b/mapchete_eo/platforms/sentinel2/brdf/protocols.py index 459b71fd..3061745c 100644 --- a/mapchete_eo/platforms/sentinel2/brdf/protocols.py +++ b/mapchete_eo/platforms/sentinel2/brdf/protocols.py @@ -6,7 +6,7 @@ import numpy as np from numpy.typing import DTypeLike -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.platforms.sentinel2.types import L2ABand diff --git a/mapchete_eo/platforms/sentinel2/brdf/ross_thick.py b/mapchete_eo/platforms/sentinel2/brdf/ross_thick.py index 20348f25..67b28131 100644 --- a/mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +++ b/mapchete_eo/platforms/sentinel2/brdf/ross_thick.py @@ -14,7 +14,7 @@ ) from mapchete_eo.platforms.sentinel2.brdf.config import L2ABandFParams, ModelParameters from mapchete_eo.platforms.sentinel2.brdf.hls import _get_viewing_angles -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.platforms.sentinel2.types import L2ABand diff --git a/mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py b/mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py index 792c5792..9581871d 100644 --- a/mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +++ b/mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py @@ -3,7 +3,7 @@ from fiona.transform import transform import numpy as np -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata def get_sun_zenith_angles(s2_metadata: S2Metadata) -> np.ndarray: diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index a3b2af07..288b5eb9 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -9,7 +9,7 @@ from mapchete_eo.base import BaseDriverConfig from mapchete_eo.io.path import ProductPathGenerationMethod from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels -from mapchete_eo.platforms.sentinel2.sources_mappers import ( +from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( KNOWN_SOURCES, DEPRECATED_ARCHIVES, ) diff --git a/mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py b/mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py new file mode 100644 index 00000000..e6c9c65b --- /dev/null +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py @@ -0,0 +1,6 @@ +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import ( + S2Metadata, + S2MetadataPathMapper, +) + +__all__ = ["S2Metadata", "S2MetadataPathMapper"] diff --git a/mapchete_eo/platforms/sentinel2/path_mappers/base.py b/mapchete_eo/platforms/sentinel2/metadata_parser/base.py similarity index 97% rename from mapchete_eo/platforms/sentinel2/path_mappers/base.py rename to mapchete_eo/platforms/sentinel2/metadata_parser/base.py index bf0cfdd2..a20ff37d 100644 --- a/mapchete_eo/platforms/sentinel2/path_mappers/base.py +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/base.py @@ -11,7 +11,7 @@ ) -class S2PathMapper(ABC): +class S2MetadataPathMapper(ABC): """ Abstract class to help mapping asset paths from metadata.xml to their locations of various data archives. diff --git a/mapchete_eo/platforms/sentinel2/path_mappers/metadata_xml.py b/mapchete_eo/platforms/sentinel2/metadata_parser/default_path_mapper.py similarity index 97% rename from mapchete_eo/platforms/sentinel2/path_mappers/metadata_xml.py rename to mapchete_eo/platforms/sentinel2/metadata_parser/default_path_mapper.py index 44ad072e..ca3bbe04 100644 --- a/mapchete_eo/platforms/sentinel2/path_mappers/metadata_xml.py +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/default_path_mapper.py @@ -11,7 +11,7 @@ from mapchete.path import MPath from mapchete_eo.io import open_xml -from mapchete_eo.platforms.sentinel2.path_mappers.base import S2PathMapper +from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper from mapchete_eo.platforms.sentinel2.processing_baseline import ProcessingBaseline from mapchete_eo.platforms.sentinel2.types import ( BandQI, @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -class XMLMapper(S2PathMapper): +class XMLMapper(S2MetadataPathMapper): def __init__( self, metadata_xml: MPath, xml_root: Optional[Element] = None, **kwargs ): diff --git a/mapchete_eo/platforms/sentinel2/metadata_parser/models.py b/mapchete_eo/platforms/sentinel2/metadata_parser/models.py new file mode 100644 index 00000000..ecfebedc --- /dev/null +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/models.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import logging +import warnings +from typing import Dict + +import numpy as np +import numpy.ma as ma +from pydantic import BaseModel +from mapchete.io.raster import ReferencedRaster +from rasterio.fill import fillnodata + +from mapchete_eo.exceptions import CorruptedProductMetadata +from mapchete_eo.platforms.sentinel2.types import ( + SunAngle, + ViewAngle, +) + +logger = logging.getLogger(__name__) + + +class SunAngleData(BaseModel): + model_config = dict(arbitrary_types_allowed=True) + raster: ReferencedRaster + mean: float + + +class SunAnglesData(BaseModel): + azimuth: SunAngleData + zenith: SunAngleData + + def get_angle(self, angle: SunAngle) -> SunAngleData: + if angle == SunAngle.azimuth: + return self.azimuth + elif angle == SunAngle.zenith: + return self.zenith + else: + raise KeyError(f"unknown angle: {angle}") + + +class ViewingIncidenceAngle(BaseModel): + model_config = dict(arbitrary_types_allowed=True) + detectors: Dict[int, ReferencedRaster] + mean: float + + def merge_detectors( + self, fill_edges: bool = True, smoothing_iterations: int = 3 + ) -> ReferencedRaster: + if not self.detectors: + raise CorruptedProductMetadata("no viewing incidence angles available") + sample = next(iter(self.detectors.values())) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=RuntimeWarning) + merged = np.nanmean( + np.stack([raster.data for raster in self.detectors.values()]), axis=0 + ) + if fill_edges: + merged = fillnodata( + ma.masked_invalid(merged), smoothing_iterations=smoothing_iterations + ) + return ReferencedRaster.from_array_like( + array_like=ma.masked_invalid(merged), + transform=sample.transform, + crs=sample.crs, + ) + + +class ViewingIncidenceAngles(BaseModel): + azimuth: ViewingIncidenceAngle + zenith: ViewingIncidenceAngle + + def get_angle(self, angle: ViewAngle) -> ViewingIncidenceAngle: + if angle == ViewAngle.azimuth: + return self.azimuth + elif angle == ViewAngle.zenith: + return self.zenith + else: + raise KeyError(f"unknown angle: {angle}") diff --git a/mapchete_eo/platforms/sentinel2/metadata_parser.py b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py similarity index 90% rename from mapchete_eo/platforms/sentinel2/metadata_parser.py rename to mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py index bf591fe6..81c14321 100644 --- a/mapchete_eo/platforms/sentinel2/metadata_parser.py +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py @@ -6,14 +6,12 @@ from __future__ import annotations import logging -import warnings from functools import cached_property from typing import Any, Callable, Dict, List, Optional, Union from xml.etree.ElementTree import Element, ParseError import numpy as np import numpy.ma as ma -from pydantic import BaseModel import pystac from affine import Affine from fiona.transform import transform_geom @@ -33,9 +31,15 @@ from mapchete_eo.exceptions import AssetEmpty, AssetMissing, CorruptedProductMetadata from mapchete_eo.io import open_xml, read_mask_as_raster -from mapchete_eo.platforms.sentinel2.path_mappers import default_path_mapper_guesser -from mapchete_eo.platforms.sentinel2.path_mappers.base import S2PathMapper -from mapchete_eo.platforms.sentinel2.path_mappers.metadata_xml import XMLMapper +from mapchete_eo.platforms.sentinel2.metadata_parser.models import ( + ViewingIncidenceAngles, + SunAngleData, + SunAnglesData, +) +from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper +from mapchete_eo.platforms.sentinel2.metadata_parser.default_path_mapper import ( + XMLMapper, +) from mapchete_eo.platforms.sentinel2.processing_baseline import ProcessingBaseline from mapchete_eo.platforms.sentinel2.types import ( BandQI, @@ -124,11 +128,10 @@ def _determine_offset(): class S2Metadata: metadata_xml: MPath - path_mapper: S2PathMapper + path_mapper: S2MetadataPathMapper processing_baseline: ProcessingBaseline boa_offset_applied: bool = False _cached_xml_root: Optional[Element] = None - path_mapper_guesser: Callable = default_path_mapper_guesser from_stac_item_constructor: Callable = s2metadata_from_stac_item crs: CRS bounds: Bounds @@ -138,7 +141,7 @@ class S2Metadata: def __init__( self, metadata_xml: MPath, - path_mapper: S2PathMapper, + path_mapper: S2MetadataPathMapper, xml_root: Optional[Element] = None, boa_offset_applied: bool = False, **kwargs, @@ -186,19 +189,15 @@ def footprint_latlon(self) -> BaseGeometry: def from_metadata_xml( cls, metadata_xml: Union[str, MPath], + path_mapper: Optional[S2MetadataPathMapper] = None, processing_baseline: Optional[str] = None, - path_mapper: Optional[S2PathMapper] = None, **kwargs, ) -> S2Metadata: metadata_xml = MPath.from_inp(metadata_xml, **kwargs) xml_root = open_granule_metadata_xml(metadata_xml) + if path_mapper is None: - # guess correct path mapper - path_mapper = cls.path_mapper_guesser( - metadata_xml, - xml_root=xml_root, - **kwargs, - ) + path_mapper = XMLMapper(metadata_xml=metadata_xml, xml_root=xml_root) # use processing baseline version from argument if available if processing_baseline: @@ -587,65 +586,6 @@ def _band_angles(band: L2ABand) -> ma.MaskedArray: return mean -class SunAngleData(BaseModel): - model_config = dict(arbitrary_types_allowed=True) - raster: ReferencedRaster - mean: float - - -class SunAnglesData(BaseModel): - azimuth: SunAngleData - zenith: SunAngleData - - def get_angle(self, angle: SunAngle) -> SunAngleData: - if angle == SunAngle.azimuth: - return self.azimuth - elif angle == SunAngle.zenith: - return self.zenith - else: - raise KeyError(f"unknown angle: {angle}") - - -class ViewingIncidenceAngle(BaseModel): - model_config = dict(arbitrary_types_allowed=True) - detectors: Dict[int, ReferencedRaster] - mean: float - - def merge_detectors( - self, fill_edges: bool = True, smoothing_iterations: int = 3 - ) -> ReferencedRaster: - if not self.detectors: - raise CorruptedProductMetadata("no viewing incidence angles available") - sample = next(iter(self.detectors.values())) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=RuntimeWarning) - merged = np.nanmean( - np.stack([raster.data for raster in self.detectors.values()]), axis=0 - ) - if fill_edges: - merged = fillnodata( - ma.masked_invalid(merged), smoothing_iterations=smoothing_iterations - ) - return ReferencedRaster.from_array_like( - array_like=ma.masked_invalid(merged), - transform=sample.transform, - crs=sample.crs, - ) - - -class ViewingIncidenceAngles(BaseModel): - azimuth: ViewingIncidenceAngle - zenith: ViewingIncidenceAngle - - def get_angle(self, angle: ViewAngle) -> ViewingIncidenceAngle: - if angle == ViewAngle.azimuth: - return self.azimuth - elif angle == ViewAngle.zenith: - return self.zenith - else: - raise KeyError(f"unknown angle: {angle}") - - def _get_grids(root: Element, crs: CRS) -> Dict[Resolution, Grid]: geoinfo = { Resolution["10m"]: dict(crs=crs), diff --git a/mapchete_eo/platforms/sentinel2/path_mappers/__init__.py b/mapchete_eo/platforms/sentinel2/path_mappers/__init__.py deleted file mode 100644 index e3115e68..00000000 --- a/mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -# TODO: deprecate this whole module! - -from mapchete_eo.platforms.sentinel2.path_mappers.base import S2PathMapper -from mapchete_eo.platforms.sentinel2.path_mappers.earthsearch import ( - EarthSearchPathMapper, -) -from mapchete_eo.platforms.sentinel2.path_mappers.metadata_xml import XMLMapper -from mapchete_eo.platforms.sentinel2.path_mappers.sinergise import SinergisePathMapper - - -def default_path_mapper_guesser( - url: str, - **kwargs, -) -> S2PathMapper: - """Guess S2PathMapper based on URL. - - If a new path mapper is added in this module, it should also be added to this function - in order to be detected. - """ - if url.startswith( - ("https://roda.sentinel-hub.com/sentinel-s2-l2a/", "s3://sentinel-s2-l2a/") - ) or url.startswith( - ("https://roda.sentinel-hub.com/sentinel-s2-l1c/", "s3://sentinel-s2-l1c/") - ): - return SinergisePathMapper(url, **kwargs) - elif url.startswith( - "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/" - ): - return EarthSearchPathMapper(url, **kwargs) - else: - return XMLMapper(url, **kwargs) diff --git a/mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py b/mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py deleted file mode 100644 index cb532e09..00000000 --- a/mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +++ /dev/null @@ -1,34 +0,0 @@ -from mapchete.path import MPath - -from mapchete_eo.platforms.sentinel2.path_mappers.sinergise import SinergisePathMapper -from mapchete_eo.platforms.sentinel2.processing_baseline import ProcessingBaseline - - -class EarthSearchPathMapper(SinergisePathMapper): - """ - The COG archive maintained by E84 and covered by EarthSearch does not hold additional data - such as the GML files. This class maps the metadata masks to the current EarthSearch product. - - e.g.: - B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_DETFOO_B01.gml - Cloud masks: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_CLOUDS_B00.gml - - newer products however: - B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2022/6/6/0/qi/DETFOO_B01.jp2 - no vector cloudmasks available anymore - """ - - def __init__( - self, - metadata_xml: MPath, - alternative_metadata_baseurl: str = "sentinel-s2-l2a", - protocol: str = "s3", - baseline_version: str = "04.00", - **kwargs, - ): - basedir = metadata_xml.parent - self._path = (basedir / "tileinfo_metadata.json").read_json()["path"] - self._utm_zone, self._latitude_band, self._grid_square = basedir.elements[-6:-3] - self._baseurl = alternative_metadata_baseurl - self._protocol = protocol - self.processing_baseline = ProcessingBaseline.from_version(baseline_version) diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py new file mode 100644 index 00000000..403cc6c7 --- /dev/null +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py @@ -0,0 +1,50 @@ +from typing import Literal, Dict, Any + +# importing this is crucial so the mapping functions get registered before registry is accessed +from mapchete_eo.platforms.sentinel2.preconfigured_sources.item_mappers import ( + earthsearch_assets_paths_mapper, # noqa: F401 + earthsearch_id_mapper, # noqa: F401 + earthsearch_to_s2metadata, # noqa: F401 + cdse_asset_names, # noqa: F401 + cdse_s2metadata, # noqa: F401 +) + + +DataArchive = Literal["AWSCOG", "AWSJP2"] +KNOWN_SOURCES: Dict[str, Any] = { + "EarthSearch": { + "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "collections": ["sentinel-2-c1-l2a"], + }, + "EarthSearch_legacy": { + "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "collections": ["sentinel-2-l2a"], + }, + "CDSE": { + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", + "collections": ["sentinel-2-l2a"], + }, +} + +DEPRECATED_ARCHIVES = { + "S2AWS_COG": { + "stac_catalog": "https://earth-search.aws.element84.com/v1/", + "collections": ["sentinel-2-l2a"], + "data_archive": "AWSCOG", + }, + "S2AWS_JP2": { + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", + "collections": ["sentinel-2-l2a"], + "data_archive": "AWSJP2", + }, + "S2CDSE_AWSJP2": { + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", + "collections": ["sentinel-2-l2a"], + "data_archive": "AWSJP2", + }, + "S2CDSE_JP2": { + "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", + "collections": ["sentinel-2-l2a"], + }, +} +MetadataArchive = Literal["roda"] diff --git a/mapchete_eo/platforms/sentinel2/sources_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py similarity index 73% rename from mapchete_eo/platforms/sentinel2/sources_mappers.py rename to mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index a8d5de44..8440a498 100644 --- a/mapchete_eo/platforms/sentinel2/sources_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -1,59 +1,18 @@ -from typing import Literal, Dict, Any - from mapchete.path import MPath from pystac import Item -from mapchete_eo.platforms.sentinel2.mapper_registry import ( +from mapchete_eo.platforms.sentinel2._mapper_registry import ( maps_item_id, maps_stac_metadata, creates_s2metadata, ) -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_mappers import ( + EarthSearchPathMapper, +) +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.search.s2_mgrs import S2Tile -DataArchive = Literal["AWSCOG", "AWSJP2"] -KNOWN_SOURCES: Dict[str, Any] = { - "EarthSearch": { - "stac_catalog": "https://earth-search.aws.element84.com/v1/", - "collections": ["sentinel-2-c1-l2a"], - }, - "EarthSearch_legacy": { - "stac_catalog": "https://earth-search.aws.element84.com/v1/", - "collections": ["sentinel-2-l2a"], - }, - "CDSE": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], - }, -} - -DEPRECATED_ARCHIVES = { - "S2AWS_COG": { - "stac_catalog": "https://earth-search.aws.element84.com/v1/", - "collections": ["sentinel-2-l2a"], - "data_archive": "AWSCOG", - }, - "S2AWS_JP2": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], - "data_archive": "AWSJP2", - }, - "S2CDSE_AWSJP2": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], - "data_archive": "AWSJP2", - }, - "S2CDSE_JP2": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], - }, -} - - -MetadataArchive = Literal["roda"] - - # mapper functions decorated with metadata to have driver decide which one to apply when # ########################################################################################## @@ -71,7 +30,7 @@ def earthsearch_assets_paths_mapper(item: Item) -> Item: @creates_s2metadata(from_catalogs=["EarthSearch"], to_metadata_archives=["roda"]) def earthsearch_to_s2metadata(item: Item) -> S2Metadata: - return S2Metadata.from_stac_item(item) + return S2Metadata.from_stac_item(item, path_mapper=EarthSearchPathMapper) @maps_item_id(from_catalogs=["CDSE"]) @@ -174,3 +133,36 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: @creates_s2metadata(from_catalogs=["CDSE"], to_metadata_archives=["roda"]) def cdse_s2metadata(item: Item) -> S2Metadata: return S2Metadata.from_stac_item(item) + + +# from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper +# from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_earthsearch import ( +# EarthSearchPathMapper, +# ) +# from mapchete_eo.platforms.sentinel2.metadata_parser.default_path_mapper import ( +# XMLMapper, +# ) +# from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_sinergise import SinergisePathMapper + + +# def default_path_mapper_guesser( +# url: str, +# **kwargs, +# ) -> S2MetadataPathMapper: +# """Guess S2PathMapper based on URL. + +# If a new path mapper is added in this module, it should also be added to this function +# in order to be detected. +# """ +# if url.startswith( +# ("https://roda.sentinel-hub.com/sentinel-s2-l2a/", "s3://sentinel-s2-l2a/") +# ) or url.startswith( +# ("https://roda.sentinel-hub.com/sentinel-s2-l1c/", "s3://sentinel-s2-l1c/") +# ): +# return SinergisePathMapper(url, **kwargs) +# elif url.startswith( +# "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/" +# ): +# return EarthSearchPathMapper(url, **kwargs) +# else: +# return XMLMapper(url, **kwargs) diff --git a/mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py similarity index 75% rename from mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py rename to mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py index 6949c280..10af5203 100644 --- a/mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py @@ -1,6 +1,6 @@ from mapchete.path import MPath, MPathLike -from mapchete_eo.platforms.sentinel2.path_mappers.base import S2PathMapper +from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper from mapchete_eo.platforms.sentinel2.processing_baseline import ProcessingBaseline from mapchete_eo.platforms.sentinel2.types import ( BandQI, @@ -10,7 +10,7 @@ ) -class SinergisePathMapper(S2PathMapper): +class SinergisePathMapper(S2MetadataPathMapper): """ Return true paths of product quality assets from the Sinergise S2 bucket. @@ -103,3 +103,33 @@ def technical_quality_mask(self, band: L2ABand) -> MPath: def detector_footprints(self, band: L2ABand) -> MPath: return self.band_qi_mask(BandQI.detector_footprints, band) + + +class EarthSearchPathMapper(SinergisePathMapper): + """ + The COG archive maintained by E84 and covered by EarthSearch does not hold additional data + such as the GML files. This class maps the metadata masks to the current EarthSearch product. + + e.g.: + B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_DETFOO_B01.gml + Cloud masks: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_CLOUDS_B00.gml + + newer products however: + B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2022/6/6/0/qi/DETFOO_B01.jp2 + no vector cloudmasks available anymore + """ + + def __init__( + self, + metadata_xml: MPath, + alternative_metadata_baseurl: str = "sentinel-s2-l2a", + protocol: str = "s3", + baseline_version: str = "04.00", + **kwargs, + ): + basedir = metadata_xml.parent + self._path = (basedir / "tileinfo_metadata.json").read_json()["path"] + self._utm_zone, self._latitude_band, self._grid_square = basedir.elements[-6:-3] + self._baseurl = alternative_metadata_baseurl + self._protocol = protocol + self.processing_baseline = ProcessingBaseline.from_version(baseline_version) diff --git a/mapchete_eo/platforms/sentinel2/product.py b/mapchete_eo/platforms/sentinel2/product.py index f9f88ba4..a1ec8fd0 100644 --- a/mapchete_eo/platforms/sentinel2/product.py +++ b/mapchete_eo/platforms/sentinel2/product.py @@ -41,7 +41,7 @@ CacheConfig, MaskConfig, ) -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.platforms.sentinel2.types import ( CloudType, L2ABand, diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index 4f09a0a8..6b47519a 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -5,12 +5,12 @@ from pydantic import model_validator from mapchete_eo.source import Source -from mapchete_eo.platforms.sentinel2.sources_mappers import ( +from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( DataArchive, MetadataArchive, KNOWN_SOURCES, ) -from mapchete_eo.platforms.sentinel2.mapper_registry import MAPPER_REGISTRIES +from mapchete_eo.platforms.sentinel2._mapper_registry import MAPPER_REGISTRIES def known_catalog_to_url(stac_catalog: str) -> str: diff --git a/tests/platforms/sentinel2/test_mappers.py b/tests/platforms/sentinel2/test_mappers.py new file mode 100644 index 00000000..4c1cfd14 --- /dev/null +++ b/tests/platforms/sentinel2/test_mappers.py @@ -0,0 +1,19 @@ +import pytest + +from pystac import Item + +from mapchete_eo.platforms.sentinel2.preconfigured_sources.item_mappers import ( + earthsearch_to_s2metadata, +) + + +@pytest.mark.parametrize( + "item_url", + [ + "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2A_T33TWL_20250109T100401_L2A" + ], +) +def test_earthsearch_to_s2metadata(item_url): + s2metadata = earthsearch_to_s2metadata(Item.from_file(item_url)) + for asset in s2metadata.assets.values(): + assert asset.exists() diff --git a/tests/platforms/sentinel2/test_metadata_parser.py b/tests/platforms/sentinel2/test_metadata_parser.py index c136b45e..c12fcc62 100644 --- a/tests/platforms/sentinel2/test_metadata_parser.py +++ b/tests/platforms/sentinel2/test_metadata_parser.py @@ -6,6 +6,7 @@ from affine import Affine from mapchete.io.raster import ReferencedRaster + try: from mapchete import Bounds, Grid except ImportError: @@ -16,11 +17,13 @@ from shapely.geometry import shape from mapchete_eo.exceptions import AssetEmpty, AssetMissing, CorruptedProductMetadata -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata -from mapchete_eo.platforms.sentinel2.path_mappers import ( +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.default_path_mapper import ( + XMLMapper, +) +from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_mappers import ( EarthSearchPathMapper, SinergisePathMapper, - XMLMapper, ) from mapchete_eo.platforms.sentinel2.processing_baseline import BaselineVersion from mapchete_eo.platforms.sentinel2.types import ( diff --git a/tests/test_io_assets.py b/tests/test_io_assets.py index 29e583ee..490b0397 100644 --- a/tests/test_io_assets.py +++ b/tests/test_io_assets.py @@ -13,7 +13,7 @@ should_be_converted, ) from mapchete_eo.io.profiles import COGDeflateProfile, JP2LossyProfile -from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata def test_asset_mpath(s2_stac_item): From 71efdaca162c7982b128afc451038fe14a234574 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 30 Oct 2025 17:02:26 +0100 Subject: [PATCH 12/46] add guesser functions --- .../sentinel2/metadata_parser/s2metadata.py | 111 +++++++----------- .../preconfigured_sources/__init__.py | 11 ++ .../preconfigured_sources/guessers.py | 108 +++++++++++++++++ .../preconfigured_sources/item_mappers.py | 23 +++- tests/conftest.py | 17 +-- .../sentinel2/test_metadata_parser.py | 25 ++-- 6 files changed, 206 insertions(+), 89 deletions(-) create mode 100644 mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py diff --git a/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py index 81c14321..eab325c5 100644 --- a/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py @@ -7,7 +7,7 @@ import logging from functools import cached_property -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union from xml.etree.ElementTree import Element, ParseError import numpy as np @@ -63,76 +63,12 @@ def open_granule_metadata_xml(metadata_xml: MPath) -> Element: raise CorruptedProductMetadata(exc) -def s2metadata_from_stac_item( - item: pystac.Item, - metadata_assets: List[str] = ["metadata", "granule_metadata"], - boa_offset_fields: List[str] = [ - "sentinel:boa_offset_applied", - "sentinel2:boa_offset_applied", - "earthsearch:boa_offset_applied", - ], - processing_baseline_fields: List[str] = [ - "s2:processing_baseline", - "sentinel:processing_baseline", - "sentinel2:processing_baseline", - "processing:version", - ], - **kwargs, -) -> S2Metadata: - """Custom code to initialize S2Metadata from a STAC item. - - Depending on from which catalog the STAC item comes, this function should correctly - set all custom flags such as BOA offsets or pass on the correct path to the metadata XML - using the proper asset name. - """ - metadata_assets = metadata_assets - for metadata_asset in metadata_assets: - if metadata_asset in item.assets: - metadata_path = MPath(item.assets[metadata_asset].href) - break - else: # pragma: no cover - raise KeyError( - f"could not find path to metadata XML file in assets: {', '.join(item.assets.keys())}" - ) - - def _determine_offset(): - for field in boa_offset_fields: - if item.properties.get(field): - return True - - return False - - boa_offset_applied = _determine_offset() - - if metadata_path.is_remote() or metadata_path.is_absolute(): - metadata_xml = metadata_path - else: - metadata_xml = MPath(item.self_href).parent / metadata_path - for processing_baseline_field in processing_baseline_fields: - try: - processing_baseline = item.properties[processing_baseline_field] - break - except KeyError: - pass - else: # pragma: no cover - raise KeyError( - f"could not find processing baseline version in item properties: {item.properties}" - ) - return S2Metadata.from_metadata_xml( - metadata_xml=metadata_xml, - processing_baseline=processing_baseline, - boa_offset_applied=boa_offset_applied, - **kwargs, - ) - - class S2Metadata: metadata_xml: MPath path_mapper: S2MetadataPathMapper processing_baseline: ProcessingBaseline boa_offset_applied: bool = False _cached_xml_root: Optional[Element] = None - from_stac_item_constructor: Callable = s2metadata_from_stac_item crs: CRS bounds: Bounds footprint: Union[Polygon, MultiPolygon] @@ -216,9 +152,44 @@ def from_metadata_xml( metadata_xml, path_mapper=path_mapper, xml_root=xml_root, **kwargs ) - @classmethod - def from_stac_item(cls, item: pystac.Item, **kwargs) -> S2Metadata: - return cls.from_stac_item_constructor(item, **kwargs) + @staticmethod + def from_stac_item( + item: pystac.Item, + metadata_xml_asset_name: List[str] = ["metadata", "granule_metadata"], + boa_offset_field: Optional[str] = None, + processing_baseline_field: Optional[str] = None, + **kwargs, + ) -> S2Metadata: + metadata_xml_asset_name = metadata_xml_asset_name + if processing_baseline_field is None: + raise NotImplementedError() + for metadata_asset in metadata_xml_asset_name: + if metadata_asset in item.assets: + metadata_path = MPath(item.assets[metadata_asset].href) + break + else: # pragma: no cover + raise KeyError( + f"could not find path to metadata XML file in assets: {', '.join(item.assets.keys())}" + ) + + if metadata_path.is_remote() or metadata_path.is_absolute(): + metadata_xml = metadata_path + else: + metadata_xml = MPath(item.self_href).parent / metadata_path + try: + processing_baseline = item.properties[processing_baseline_field] + except KeyError: + raise KeyError( + f"could not find processing baseline version in item properties: {item.properties}" + ) + return S2Metadata.from_metadata_xml( + metadata_xml=metadata_xml, + processing_baseline=processing_baseline, + boa_offset_applied=item.properties[boa_offset_field] + if boa_offset_field + else False, + **kwargs, + ) @property def xml_root(self): @@ -269,13 +240,13 @@ def assets(self) -> Dict[str, MPath]: for product_qi_mask in ProductQI: if product_qi_mask == ProductQI.classification: out[product_qi_mask.name] = self.path_mapper.product_qi_mask( - product_qi_mask + qi_mask=product_qi_mask ) else: for resolution in ProductQIMaskResolution: out[f"{product_qi_mask.name}-{resolution.name}"] = ( self.path_mapper.product_qi_mask( - product_qi_mask, resolution=resolution + qi_mask=product_qi_mask, resolution=resolution ) ) diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py index 403cc6c7..f1639326 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py @@ -8,7 +8,18 @@ cdse_asset_names, # noqa: F401 cdse_s2metadata, # noqa: F401 ) +from mapchete_eo.platforms.sentinel2.preconfigured_sources.guessers import ( + guess_metadata_path_mapper, + guess_s2metadata_from_item, + guess_s2metadata_from_metadata_xml, +) + +__all__ = [ + "guess_metadata_path_mapper", + "guess_s2metadata_from_item", + "guess_s2metadata_from_metadata_xml", +] DataArchive = Literal["AWSCOG", "AWSJP2"] KNOWN_SOURCES: Dict[str, Any] = { diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py new file mode 100644 index 00000000..5cd61e43 --- /dev/null +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py @@ -0,0 +1,108 @@ +from typing import List + +from mapchete.path import MPathLike, MPath +from pystac import Item + +from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper +from mapchete_eo.platforms.sentinel2.metadata_parser.default_path_mapper import ( + XMLMapper, +) +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata +from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_mappers import ( + EarthSearchPathMapper, + SinergisePathMapper, +) + + +def guess_metadata_path_mapper( + metadata_xml: MPathLike, **kwargs +) -> S2MetadataPathMapper: + """Guess S2PathMapper based on URL. + + If a new path mapper is added in this module, it should also be added to this function + in order to be detected. + """ + metadata_xml = MPath.from_inp(metadata_xml) + if metadata_xml.startswith( + ("https://roda.sentinel-hub.com/sentinel-s2-l2a/", "s3://sentinel-s2-l2a/") + ) or metadata_xml.startswith( + ("https://roda.sentinel-hub.com/sentinel-s2-l1c/", "s3://sentinel-s2-l1c/") + ): + return SinergisePathMapper(metadata_xml, **kwargs) + elif metadata_xml.startswith( + "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/" + ): + return EarthSearchPathMapper(metadata_xml, **kwargs) + else: + return XMLMapper(metadata_xml, **kwargs) + + +def guess_s2metadata_from_metadata_xml(metadata_xml: MPathLike, **kwargs) -> S2Metadata: + return S2Metadata.from_metadata_xml( + metadata_xml=metadata_xml, + path_mapper=guess_metadata_path_mapper(metadata_xml, **kwargs), + **kwargs, + ) + + +def guess_s2metadata_from_item( + item: Item, + metadata_assets: List[str] = ["metadata", "granule_metadata"], + boa_offset_fields: List[str] = [ + "sentinel:boa_offset_applied", + "sentinel2:boa_offset_applied", + "earthsearch:boa_offset_applied", + ], + processing_baseline_fields: List[str] = [ + "s2:processing_baseline", + "sentinel:processing_baseline", + "sentinel2:processing_baseline", + "processing:version", + ], + **kwargs, +) -> S2Metadata: + """Custom code to initialize S2Metadata from a STAC item. + + Depending on from which catalog the STAC item comes, this function should correctly + set all custom flags such as BOA offsets or pass on the correct path to the metadata XML + using the proper asset name. + """ + metadata_assets = metadata_assets + for metadata_asset in metadata_assets: + if metadata_asset in item.assets: + metadata_path = MPath(item.assets[metadata_asset].href) + break + else: # pragma: no cover + raise KeyError( + f"could not find path to metadata XML file in assets: {', '.join(item.assets.keys())}" + ) + + def _determine_offset(): + for field in boa_offset_fields: + if item.properties.get(field): + return True + + return False + + boa_offset_applied = _determine_offset() + + if metadata_path.is_remote() or metadata_path.is_absolute(): + metadata_xml = metadata_path + else: + metadata_xml = MPath(item.self_href).parent / metadata_path + for processing_baseline_field in processing_baseline_fields: + try: + processing_baseline = item.properties[processing_baseline_field] + break + except KeyError: + pass + else: # pragma: no cover + raise KeyError( + f"could not find processing baseline version in item properties: {item.properties}" + ) + return guess_s2metadata_from_metadata_xml( + metadata_xml, + processing_baseline=processing_baseline, + boa_offset_applied=boa_offset_applied, + **kwargs, + ) diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index 8440a498..5e9ca6d4 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -30,7 +30,23 @@ def earthsearch_assets_paths_mapper(item: Item) -> Item: @creates_s2metadata(from_catalogs=["EarthSearch"], to_metadata_archives=["roda"]) def earthsearch_to_s2metadata(item: Item) -> S2Metadata: - return S2Metadata.from_stac_item(item, path_mapper=EarthSearchPathMapper) + 1 / 0 + # TODO: write new path mapper! + return S2Metadata.from_stac_item( + item, + path_mapper=EarthSearchPathMapper(MPath(item.assets["granule_metadata"].href)), + processing_baseline_field="s2:processing_baseline", + ) + + +@creates_s2metadata(from_catalogs=["EarthSearch_legacy"], to_metadata_archives=["roda"]) +def earthsearch_legacy_to_s2metadata(item: Item) -> S2Metadata: + return S2Metadata.from_stac_item( + item, + path_mapper=EarthSearchPathMapper(MPath(item.assets["granule_metadata"].href)), + boa_offset_field="earthsearch:boa_offset_applied", + processing_baseline_field="s2:processing_baseline", + ) @maps_item_id(from_catalogs=["CDSE"]) @@ -132,7 +148,10 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: @creates_s2metadata(from_catalogs=["CDSE"], to_metadata_archives=["roda"]) def cdse_s2metadata(item: Item) -> S2Metadata: - return S2Metadata.from_stac_item(item) + return S2Metadata.from_stac_item( + item, + processing_baseline_field="processing:version", + ) # from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper diff --git a/tests/conftest.py b/tests/conftest.py index 5c603115..2820e016 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,10 @@ from shapely import wkt from shapely.geometry import base -from mapchete_eo.platforms.sentinel2 import S2Metadata +from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( + guess_s2metadata_from_item, + guess_s2metadata_from_metadata_xml, +) from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog from mapchete_eo.types import TimeRange @@ -361,12 +364,12 @@ def s2_l2a_metadata_xml(s2_testdata_dir): @pytest.fixture(scope="session") def s2_l2a_metadata(s2_l2a_metadata_xml): - return S2Metadata.from_metadata_xml(s2_l2a_metadata_xml) + return guess_s2metadata_from_metadata_xml(s2_l2a_metadata_xml) @pytest.fixture(scope="session") def s2_l2a_safe_metadata(s2_testdata_dir): - return S2Metadata.from_metadata_xml( + return guess_s2metadata_from_metadata_xml( str( s2_testdata_dir.joinpath( "SAFE", @@ -381,7 +384,7 @@ def s2_l2a_safe_metadata(s2_testdata_dir): @pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_metadata_remote(): - return S2Metadata.from_metadata_xml( + return guess_s2metadata_from_metadata_xml( "s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/metadata.xml" ) @@ -390,7 +393,7 @@ def s2_l2a_metadata_remote(): @pytest.fixture(scope="session") def s2_l2a_roda_metadata_remote(): """Same content as s2_l2a_metadata_remote, but hosted on different server.""" - return S2Metadata.from_metadata_xml( + return guess_s2metadata_from_metadata_xml( "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/metadata.xml" ) @@ -399,7 +402,7 @@ def s2_l2a_roda_metadata_remote(): @pytest.fixture(scope="session") def s2_l2a_roda_metadata_jp2_masks_remote(): """From about 2022 on, ahte masks are now encoded as JP2 (rasters), not as GMLs (features).""" - return S2Metadata.from_metadata_xml( + return guess_s2metadata_from_metadata_xml( "https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/33/T/WL/2022/6/6/0/metadata.xml" ) @@ -426,7 +429,7 @@ def s2_l2a_earthsearch_xml_remote_broken(): @pytest.fixture(scope="session") def s2_l2a_earthsearch_remote(s2_l2a_earthsearch_remote_item): """Metadata used by Earth-Search V1 endpoint""" - return S2Metadata.from_stac_item(s2_l2a_earthsearch_remote_item) + return guess_s2metadata_from_item(s2_l2a_earthsearch_remote_item) @pytest.mark.remote diff --git a/tests/platforms/sentinel2/test_metadata_parser.py b/tests/platforms/sentinel2/test_metadata_parser.py index c12fcc62..116bbe06 100644 --- a/tests/platforms/sentinel2/test_metadata_parser.py +++ b/tests/platforms/sentinel2/test_metadata_parser.py @@ -6,6 +6,11 @@ from affine import Affine from mapchete.io.raster import ReferencedRaster +from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( + guess_s2metadata_from_item, + guess_s2metadata_from_metadata_xml, +) + try: from mapchete import Bounds, Grid @@ -475,7 +480,7 @@ def test_remote_metadata_viewing_incidence_angles(metadata: S2Metadata): def test_unavailable_metadata_xml(): with pytest.raises(FileNotFoundError): - S2Metadata.from_metadata_xml("unavailable_metadata.xml") + guess_s2metadata_from_metadata_xml("unavailable_metadata.xml") @pytest.mark.remote @@ -488,7 +493,7 @@ def test_unavailable_metadata_xml(): ) def test_from_stac_item(item_url): item = Item.from_file(item_url) - s2_metadata = S2Metadata.from_stac_item(item) + s2_metadata = guess_s2metadata_from_item(item) assert s2_metadata.processing_baseline.version == "04.00" if item.properties.get("sentinel:boa_offset_applied", False) or item.properties.get( "earthsearch:boa_offset_applied", False @@ -522,7 +527,7 @@ def test_from_stac_item(item_url): ], ) def test_from_stac_item_backwards(item): - s2_metadata = S2Metadata.from_stac_item(item) + s2_metadata = guess_s2metadata_from_item(item) assert s2_metadata.datastrip_id if item.properties.get("sentinel:boa_offset_applied", False) or item.properties.get( "earthsearch:boa_offset_applied", False @@ -551,7 +556,7 @@ def test_from_stac_item_backwards(item): @pytest.mark.remote def test_from_stac_item_invalid(stac_item_invalid_pb0001): - S2Metadata.from_stac_item(stac_item_invalid_pb0001) + guess_s2metadata_from_item(stac_item_invalid_pb0001) def test_baseline_version(): @@ -582,14 +587,14 @@ def test_future_baseline_version(): @pytest.mark.remote def test_product_empty_detector_footprints(product_empty_detector_footprints): - s2_product = S2Metadata.from_metadata_xml(product_empty_detector_footprints) + s2_product = guess_s2metadata_from_metadata_xml(product_empty_detector_footprints) with pytest.raises(AssetEmpty): s2_product.detector_footprints(L2ABand.B02) @pytest.mark.remote def test_product_missing_detector_footprints(product_missing_detector_footprints): - s2_product = S2Metadata.from_metadata_xml(product_missing_detector_footprints) + s2_product = guess_s2metadata_from_metadata_xml(product_missing_detector_footprints) with pytest.raises(AssetMissing): s2_product.detector_footprints(L2ABand.B02) @@ -602,8 +607,8 @@ def test_product_missing_detector_footprints(product_missing_detector_footprints ], ) def test_full_product_paths(item): - metadata = S2Metadata.from_stac_item(item) - for name, path in metadata.assets.items(): + metadata = guess_s2metadata_from_item(item) + for path in metadata.assets.values(): assert path.exists() @@ -622,7 +627,7 @@ def test_full_product_paths(item): ], ) def test_full_remote_product_paths(item): - metadata = S2Metadata.from_stac_item(item) + metadata = guess_s2metadata_from_item(item) for path in metadata.assets.values(): assert path.exists() @@ -630,4 +635,4 @@ def test_full_remote_product_paths(item): @pytest.mark.remote def test_broken_metadata_xml(s2_l2a_earthsearch_xml_remote_broken): with pytest.raises(CorruptedProductMetadata): - S2Metadata.from_metadata_xml(s2_l2a_earthsearch_xml_remote_broken) + guess_s2metadata_from_metadata_xml(s2_l2a_earthsearch_xml_remote_broken) From 4ecb087698464a04ec855ee51d2373e34c1e7bb8 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 3 Nov 2025 11:27:19 +0100 Subject: [PATCH 13/46] dev commit --- .../platforms/sentinel2/_mapper_registry.py | 20 ++++++++++ .../preconfigured_sources/item_mappers.py | 8 ++-- .../metadata_xml_mappers.py | 22 +++++++++++ .../sentinel2/processing_baseline.py | 3 ++ mapchete_eo/platforms/sentinel2/product.py | 38 +++++++++++++++++-- mapchete_eo/platforms/sentinel2/source.py | 9 ++++- tests/platforms/sentinel2/test_mappers.py | 8 ++++ tests/platforms/sentinel2/test_sources.py | 2 + 8 files changed, 103 insertions(+), 7 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/_mapper_registry.py b/mapchete_eo/platforms/sentinel2/_mapper_registry.py index f1569807..4e9ce6a4 100644 --- a/mapchete_eo/platforms/sentinel2/_mapper_registry.py +++ b/mapchete_eo/platforms/sentinel2/_mapper_registry.py @@ -14,6 +14,26 @@ } +# @dataclass +# class Registries: +# id_mappers: Dict[Any, Callable] = field(default_factory=dict) +# stac_metadata_mappers: Dict[Any, Callable] = field(default_factory=dict) +# s2metadata_mappers: Dict[Any, Callable] = field(default_factory=dict) + +# def register( +# self, +# mapper: Literal["ID", "STAC metadata", "S2Metadata"], +# key: Any, +# func: Callable, +# ) -> None: +# if key in registry: +# raise ValueError(f"{key} already registered in {registry}") +# registry[key] = func + + +# MAPPER_REGISTRY = Registries() + + def _register_func(registry: Dict[str, Callable], key: Any, func: Callable): if key in registry: raise ValueError(f"{key} already registered in {registry}") diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index 5e9ca6d4..5dd0d1e0 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -8,6 +8,7 @@ ) from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_mappers import ( EarthSearchPathMapper, + EarthSearchC1PathMapper, ) from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.search.s2_mgrs import S2Tile @@ -30,17 +31,18 @@ def earthsearch_assets_paths_mapper(item: Item) -> Item: @creates_s2metadata(from_catalogs=["EarthSearch"], to_metadata_archives=["roda"]) def earthsearch_to_s2metadata(item: Item) -> S2Metadata: - 1 / 0 - # TODO: write new path mapper! return S2Metadata.from_stac_item( item, - path_mapper=EarthSearchPathMapper(MPath(item.assets["granule_metadata"].href)), + path_mapper=EarthSearchC1PathMapper( + MPath(item.assets["granule_metadata"].href) + ), processing_baseline_field="s2:processing_baseline", ) @creates_s2metadata(from_catalogs=["EarthSearch_legacy"], to_metadata_archives=["roda"]) def earthsearch_legacy_to_s2metadata(item: Item) -> S2Metadata: + breakpoint() return S2Metadata.from_stac_item( item, path_mapper=EarthSearchPathMapper(MPath(item.assets["granule_metadata"].href)), diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py index 10af5203..d96a89e6 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py @@ -133,3 +133,25 @@ def __init__( self._baseurl = alternative_metadata_baseurl self._protocol = protocol self.processing_baseline = ProcessingBaseline.from_version(baseline_version) + + +class EarthSearchC1PathMapper(SinergisePathMapper): + """ + The newer C1 collection has cloud and snow probability masks as assets, so we only need to + map to the rest. + """ + + def __init__( + self, + metadata_xml: MPath, + alternative_metadata_baseurl: str = "sentinel-s2-l2a", + protocol: str = "s3", + baseline_version: str = "04.00", + **kwargs, + ): + basedir = metadata_xml.parent + self._path = (basedir / "tileInfo.json").read_json()["path"] + self._utm_zone, self._latitude_band, self._grid_square = basedir.elements[-6:-3] + self._baseurl = alternative_metadata_baseurl + self._protocol = protocol + self.processing_baseline = ProcessingBaseline.from_version(baseline_version) diff --git a/mapchete_eo/platforms/sentinel2/processing_baseline.py b/mapchete_eo/platforms/sentinel2/processing_baseline.py index 20f0cf67..4f8ad349 100644 --- a/mapchete_eo/platforms/sentinel2/processing_baseline.py +++ b/mapchete_eo/platforms/sentinel2/processing_baseline.py @@ -152,6 +152,9 @@ def __init__(self, version: BaselineVersion): self.band_mask_types = self.item_mapping.band_mask_types self.band_mask_extension = self.item_mapping.band_mask_extension + def __repr__(self) -> str: + return f"" + @staticmethod def from_version(version: Union[BaselineVersion, str]) -> "ProcessingBaseline": if isinstance(version, BaselineVersion): diff --git a/mapchete_eo/platforms/sentinel2/product.py b/mapchete_eo/platforms/sentinel2/product.py index a1ec8fd0..9ca01e1a 100644 --- a/mapchete_eo/platforms/sentinel2/product.py +++ b/mapchete_eo/platforms/sentinel2/product.py @@ -172,7 +172,7 @@ def from_stac_item( cache_all: bool = False, **kwargs, ) -> S2Product: - s2product = S2Product(item, cache_config=cache_config) + s2product = S2Product(item, cache_config=cache_config, **kwargs) if cache_all: # cache assets if configured @@ -362,7 +362,23 @@ def read_cloud_probability( cached_read: bool = False, ) -> ReferencedRaster: """Return cloud probability mask.""" - logger.debug("read cloud probability mask for %s", str(self)) + if "cloud" in self.item.assets: + logger.debug("read cloud probability mask for %s from asset", str(self)) + return read_mask_as_raster( + path=asset_mpath(item=self.item, asset="cloud"), + dst_grid=( + self.metadata.grid(grid) + if isinstance(grid, Resolution) + else Grid.from_obj(grid) + ), + resampling=resampling, + rasterize_value_func=lambda feature: True, + masked=False, + cached_read=cached_read, + ) + logger.debug( + "read cloud probability mask for %s from metadata archive", str(self) + ) return self.metadata.cloud_probability( dst_grid=grid, resampling=resampling, @@ -378,7 +394,23 @@ def read_snow_probability( cached_read: bool = False, ) -> ReferencedRaster: """Return classification snow and ice mask.""" - logger.debug("read snow probability mask for %s", str(self)) + if "snow" in self.item.assets: + logger.debug("read snow probability mask for %s from asset", str(self)) + return read_mask_as_raster( + path=asset_mpath(item=self.item, asset="cloud"), + dst_grid=( + self.metadata.grid(grid) + if isinstance(grid, Resolution) + else Grid.from_obj(grid) + ), + resampling=resampling, + rasterize_value_func=lambda feature: True, + masked=False, + cached_read=cached_read, + ) + logger.debug( + "read snow probability mask for %s from metadata archive", str(self) + ) return self.metadata.snow_probability( dst_grid=grid, resampling=resampling, diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index 6b47519a..aa94b250 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Optional, List, Callable, Dict, Any, Union +from typing import Optional, List, Callable, Dict, Any, Tuple, Union from pydantic import model_validator @@ -34,6 +34,13 @@ def item_modifier_funcs(self) -> List[Callable]: if func is not None ] + @property + def _key(self) -> Tuple[Any, ...]: + return ( + self.stac_catalog, + tuple(self.collections) if self.collections else None, + ) + @model_validator(mode="before") def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Handles short names of sources.""" diff --git a/tests/platforms/sentinel2/test_mappers.py b/tests/platforms/sentinel2/test_mappers.py index 4c1cfd14..f019ae07 100644 --- a/tests/platforms/sentinel2/test_mappers.py +++ b/tests/platforms/sentinel2/test_mappers.py @@ -2,11 +2,13 @@ from pystac import Item +from mapchete_eo.platforms.sentinel2.product import S2Product from mapchete_eo.platforms.sentinel2.preconfigured_sources.item_mappers import ( earthsearch_to_s2metadata, ) +@pytest.mark.remote @pytest.mark.parametrize( "item_url", [ @@ -15,5 +17,11 @@ ) def test_earthsearch_to_s2metadata(item_url): s2metadata = earthsearch_to_s2metadata(Item.from_file(item_url)) + s2product = S2Product.from_stac_item(Item.from_file(item_url), metadata=s2metadata) + for asset in s2metadata.assets.values(): assert asset.exists() + + # probability masks + assert s2product.read_cloud_probability() + assert s2product.read_snow_probability() diff --git a/tests/platforms/sentinel2/test_sources.py b/tests/platforms/sentinel2/test_sources.py index 1183f097..1d6f9151 100644 --- a/tests/platforms/sentinel2/test_sources.py +++ b/tests/platforms/sentinel2/test_sources.py @@ -9,6 +9,8 @@ @pytest.mark.parametrize("source_id", ["EarthSearch", "EarthSearch_legacy"]) def test_known_sources(source_id): source = Sentinel2Source(stac_catalog=source_id) + breakpoint() + return assert source for item in source.search( time=TimeRange(start="2025-01-01", end="2025-01-10"), bounds=[16, 46, 17, 47] From 23f286ede90ea3acb6099cfaf917144d1989e4d8 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 3 Nov 2025 13:18:35 +0100 Subject: [PATCH 14/46] only provide URL to a single collection to make sure a source is unique --- .../platforms/sentinel2/_mapper_registry.py | 20 ++++----- .../preconfigured_sources/__init__.py | 21 +++------ .../preconfigured_sources/item_mappers.py | 19 ++++---- mapchete_eo/platforms/sentinel2/source.py | 43 ++++++++----------- mapchete_eo/source.py | 12 +++--- tests/platforms/sentinel2/test_config.py | 14 +++--- tests/platforms/sentinel2/test_sources.py | 14 +++--- 7 files changed, 63 insertions(+), 80 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/_mapper_registry.py b/mapchete_eo/platforms/sentinel2/_mapper_registry.py index 4e9ce6a4..3c2a4c9f 100644 --- a/mapchete_eo/platforms/sentinel2/_mapper_registry.py +++ b/mapchete_eo/platforms/sentinel2/_mapper_registry.py @@ -40,7 +40,7 @@ def _register_func(registry: Dict[str, Callable], key: Any, func: Callable): registry[key] = func -def maps_item_id(from_catalogs: List[str]): +def maps_item_id(from_collections: List[str]): """ Decorator registering mapper to common ID. """ @@ -48,15 +48,15 @@ def maps_item_id(from_catalogs: List[str]): def decorator(func): # Use a tuple of the metadata as the key # key = (path_type, version) - for catalog in from_catalogs: - _register_func(registry=ID_MAPPER_REGISTRY, key=catalog, func=func) + for collection in from_collections: + _register_func(registry=ID_MAPPER_REGISTRY, key=collection, func=func) return func return decorator def maps_stac_metadata( - from_catalogs: List[str], to_data_archives: Optional[List[str]] = None + from_collections: List[str], to_data_archives: Optional[List[str]] = None ): """ Decorator registering STAC metadata mapper. @@ -64,18 +64,18 @@ def maps_stac_metadata( def decorator(func): # Use a tuple of the metadata as the key - for catalog in from_catalogs: + for collection in from_collections: if to_data_archives: for data_archive in to_data_archives: _register_func( registry=STAC_METADATA_MAPPER_REGISTRY, - key=(catalog, data_archive), + key=(collection, data_archive), func=func, ) else: _register_func( registry=STAC_METADATA_MAPPER_REGISTRY, - key=catalog, + key=collection, func=func, ) return func @@ -83,18 +83,18 @@ def decorator(func): return decorator -def creates_s2metadata(from_catalogs: List[str], to_metadata_archives: List[str]): +def creates_s2metadata(from_collections: List[str], to_metadata_archives: List[str]): """ Decorator registering S2Metadata creator. """ def decorator(func): # Use a tuple of the metadata as the key - for catalog in from_catalogs: + for collection in from_collections: for metadata_archive in to_metadata_archives: _register_func( registry=S2METADATA_MAPPER_REGISTRY, - key=(catalog, metadata_archive), + key=(collection, metadata_archive), func=func, ) return func diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py index f1639326..b10f1971 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py @@ -24,38 +24,31 @@ DataArchive = Literal["AWSCOG", "AWSJP2"] KNOWN_SOURCES: Dict[str, Any] = { "EarthSearch": { - "stac_catalog": "https://earth-search.aws.element84.com/v1/", - "collections": ["sentinel-2-c1-l2a"], + "collection": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a", }, "EarthSearch_legacy": { - "stac_catalog": "https://earth-search.aws.element84.com/v1/", - "collections": ["sentinel-2-l2a"], + "collection": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", }, "CDSE": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], + "collection": "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a", }, } DEPRECATED_ARCHIVES = { "S2AWS_COG": { - "stac_catalog": "https://earth-search.aws.element84.com/v1/", - "collections": ["sentinel-2-l2a"], + "collection": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", "data_archive": "AWSCOG", }, "S2AWS_JP2": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], + "collection": "https://stac.dataspace.copernicus.eu/v1collections/sentinel-2-l2a", "data_archive": "AWSJP2", }, "S2CDSE_AWSJP2": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], + "collection": "https://stac.dataspace.copernicus.eu/v1collections/sentinel-2-l2a", "data_archive": "AWSJP2", }, "S2CDSE_JP2": { - "stac_catalog": "https://stac.dataspace.copernicus.eu/v1", - "collections": ["sentinel-2-l2a"], + "collection": "https://stac.dataspace.copernicus.eu/v1collections/sentinel-2-l2a", }, } MetadataArchive = Literal["roda"] diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index 5dd0d1e0..1129abe9 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -18,18 +18,18 @@ ########################################################################################## -@maps_item_id(from_catalogs=["EarthSearch"]) +@maps_item_id(from_collections=["EarthSearch", "EarthSearch_legacy"]) def earthsearch_id_mapper(item: Item) -> Item: return item -@maps_stac_metadata(from_catalogs=["EarthSearch"], to_data_archives=["AWSCOG"]) +@maps_stac_metadata(from_collections=["EarthSearch"], to_data_archives=["AWSCOG"]) def earthsearch_assets_paths_mapper(item: Item) -> Item: """Nothing to do here as paths match catalog.""" return item -@creates_s2metadata(from_catalogs=["EarthSearch"], to_metadata_archives=["roda"]) +@creates_s2metadata(from_collections=["EarthSearch"], to_metadata_archives=["roda"]) def earthsearch_to_s2metadata(item: Item) -> S2Metadata: return S2Metadata.from_stac_item( item, @@ -40,9 +40,10 @@ def earthsearch_to_s2metadata(item: Item) -> S2Metadata: ) -@creates_s2metadata(from_catalogs=["EarthSearch_legacy"], to_metadata_archives=["roda"]) +@creates_s2metadata( + from_collections=["EarthSearch_legacy"], to_metadata_archives=["roda"] +) def earthsearch_legacy_to_s2metadata(item: Item) -> S2Metadata: - breakpoint() return S2Metadata.from_stac_item( item, path_mapper=EarthSearchPathMapper(MPath(item.assets["granule_metadata"].href)), @@ -51,7 +52,7 @@ def earthsearch_legacy_to_s2metadata(item: Item) -> S2Metadata: ) -@maps_item_id(from_catalogs=["CDSE"]) +@maps_item_id(from_collections=["CDSE"]) def plain_id_mapper(item: Item) -> Item: return item @@ -76,7 +77,7 @@ def plain_id_mapper(item: Item) -> Item: } -@maps_stac_metadata(from_catalogs=["CDSE"]) +@maps_stac_metadata(from_collections=["CDSE"]) def cdse_asset_names(item: Item) -> Item: new_assets = {} for asset_name, asset in item.assets.items(): @@ -88,7 +89,7 @@ def cdse_asset_names(item: Item) -> Item: return item -@maps_stac_metadata(from_catalogs=["CDSE"], to_data_archives=["AWSJP2"]) +@maps_stac_metadata(from_collections=["CDSE"], to_data_archives=["AWSJP2"]) def map_cdse_paths_to_jp2_archive(item: Item) -> Item: """ CSDE has the following assets: @@ -148,7 +149,7 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: return item -@creates_s2metadata(from_catalogs=["CDSE"], to_metadata_archives=["roda"]) +@creates_s2metadata(from_collections=["CDSE"], to_metadata_archives=["roda"]) def cdse_s2metadata(item: Item) -> S2Metadata: return S2Metadata.from_stac_item( item, diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index aa94b250..d4bebc16 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Optional, List, Callable, Dict, Any, Tuple, Union +from typing import Optional, List, Callable, Dict, Any, Union from pydantic import model_validator @@ -13,10 +13,10 @@ from mapchete_eo.platforms.sentinel2._mapper_registry import MAPPER_REGISTRIES -def known_catalog_to_url(stac_catalog: str) -> str: - if stac_catalog in KNOWN_SOURCES: - return KNOWN_SOURCES[stac_catalog]["stac_catalog"] - return stac_catalog +def known_collection_to_url(collection: str) -> str: + if collection in KNOWN_SOURCES: + return KNOWN_SOURCES[collection]["collection"] + return collection class Sentinel2Source(Source): @@ -34,21 +34,14 @@ def item_modifier_funcs(self) -> List[Callable]: if func is not None ] - @property - def _key(self) -> Tuple[Any, ...]: - return ( - self.stac_catalog, - tuple(self.collections) if self.collections else None, - ) - @model_validator(mode="before") def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Handles short names of sources.""" if isinstance(values, str): - values = dict(stac_catalog=values) - stac_catalog = values.get("stac_catalog", None) - if stac_catalog in KNOWN_SOURCES: - values.update(KNOWN_SOURCES[stac_catalog]) + values = dict(collection=values) + collection = values.get("collection", None) + if collection in KNOWN_SOURCES: + values.update(KNOWN_SOURCES[collection]) else: # TODO: make sure catalog then is either a path or an URL pass @@ -66,10 +59,10 @@ def get_id_mapper(self) -> Union[Callable, None]: if self.catalog_type == "static": return None for key in MAPPER_REGISTRIES["ID"]: - if self.stac_catalog == known_catalog_to_url(key): + if self.collection == known_collection_to_url(key): return MAPPER_REGISTRIES["ID"][key] else: - raise ValueError(f"no ID mapper for {self.stac_catalog} found") + raise ValueError(f"no ID mapper for {self.collection} found") def get_stac_metadata_mapper(self) -> Union[Callable, None]: """Find mapper function. @@ -80,33 +73,33 @@ def get_stac_metadata_mapper(self) -> Union[Callable, None]: return None for key in MAPPER_REGISTRIES["STAC metadata"]: if isinstance(key, tuple): - stac_catalog, data_archive = key + collection, data_archive = key if ( - self.stac_catalog == known_catalog_to_url(stac_catalog) + self.collection == known_collection_to_url(collection) and data_archive == self.data_archive ): return MAPPER_REGISTRIES["STAC metadata"][key] else: - if self.stac_catalog == known_catalog_to_url(key): + if self.collection == known_collection_to_url(key): return MAPPER_REGISTRIES["STAC metadata"][key] else: if self.data_archive is None: return None raise ValueError( - f"no STAC metadata mapper from {self.stac_catalog} to {self.data_archive} found" + f"no STAC metadata mapper from {self.collection} to {self.data_archive} found" ) def get_s2metadata_mapper(self) -> Union[Callable, None]: if self.catalog_type == "static" or self.metadata_archive is None: return None for key in MAPPER_REGISTRIES["S2Metadata"]: - stac_catalog, metadata_archive = key + collection, metadata_archive = key if ( - self.stac_catalog == known_catalog_to_url(stac_catalog) + self.collection == known_collection_to_url(collection) and metadata_archive == self.metadata_archive ): return MAPPER_REGISTRIES["S2Metadata"][key] else: raise ValueError( - f"no S2Metadata mapper from {self.stac_catalog} to {self.metadata_archive} found" + f"no S2Metadata mapper from {self.collection} to {self.metadata_archive} found" ) diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index f6d35cb9..a130944c 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -17,8 +17,7 @@ class Source(BaseModel): """All information required to consume EO products.""" - stac_catalog: str - collections: Optional[List[str]] = None + collection: str catalog_crs: CRSLike = mapchete_eo_settings.default_catalog_crs catalog_type: Literal["search", "static"] = "search" query: Optional[str] = None @@ -55,14 +54,15 @@ def apply_item_modifier_funcs(self, item: Item) -> Item: return item def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CatalogSearcher: + # TODO: adapt catalog classes + endpoint = "/".join(self.collection.rstrip("/").split("/")[:-2]) + collections = [self.collection.rstrip("/").split("/")[-1]] match self.catalog_type: case "search": - return STACSearchCatalog( - endpoint=self.stac_catalog, collections=self.collections - ) + return STACSearchCatalog(endpoint=endpoint, collections=collections) case "static": return STACStaticCatalog( - baseurl=MPath(self.stac_catalog).absolute_path(base_dir=base_dir) + baseurl=MPath(endpoint).absolute_path(base_dir=base_dir) ) def eo_bands(self, base_dir: Optional[MPathLike] = None) -> List[str]: diff --git a/tests/platforms/sentinel2/test_config.py b/tests/platforms/sentinel2/test_config.py index ce876b6f..6cb6d8e2 100644 --- a/tests/platforms/sentinel2/test_config.py +++ b/tests/platforms/sentinel2/test_config.py @@ -15,14 +15,14 @@ ), dict( source=dict( - stac_catalog="EarthSearch", + collection="EarthSearch", metadata_archive="roda", ) ), dict( source=[ dict( - stac_catalog="EarthSearch", + collection="EarthSearch", metadata_archive="roda", ) ], @@ -30,16 +30,15 @@ dict( source=[ dict( - stac_catalog="EarthSearch", + collection="EarthSearch", ), - dict(stac_catalog="CDSE", data_archive="AWSJP2"), + dict(collection="CDSE", data_archive="AWSJP2"), ], ), dict( source=[ dict( - stac_catalog="https://earth-search.aws.element84.com/v1/", - collections=["sentinel-s2-l2a"], + collection="https://earth-search.aws.element84.com/v1/collections=sentinel-s2-l2a", ), ], ), @@ -55,5 +54,4 @@ def test_valid_configs(config_dict: dict): ) assert config.source for source in config.source: - assert source.stac_catalog - assert source.collections + assert source.collection diff --git a/tests/platforms/sentinel2/test_sources.py b/tests/platforms/sentinel2/test_sources.py index 1d6f9151..c11be8bc 100644 --- a/tests/platforms/sentinel2/test_sources.py +++ b/tests/platforms/sentinel2/test_sources.py @@ -6,11 +6,9 @@ @pytest.mark.remote -@pytest.mark.parametrize("source_id", ["EarthSearch", "EarthSearch_legacy"]) -def test_known_sources(source_id): - source = Sentinel2Source(stac_catalog=source_id) - breakpoint() - return +@pytest.mark.parametrize("collection", ["EarthSearch", "EarthSearch_legacy"]) +def test_known_sources(collection): + source = Sentinel2Source(collection=collection) assert source for item in source.search( time=TimeRange(start="2025-01-01", end="2025-01-10"), bounds=[16, 46, 17, 47] @@ -35,6 +33,6 @@ def test_known_sources(source_id): @pytest.mark.remote @pytest.mark.use_cdse_test_env -@pytest.mark.parametrize("source_id", ["CSDE"]) -def test_known_sources_cdse(source_id): - test_known_sources(source_id) +@pytest.mark.parametrize("collection", ["CSDE"]) +def test_known_sources_cdse(collection): + test_known_sources(collection) From 3ff03e2cb350008421055b3749b6164715099ef7 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 3 Nov 2025 14:18:37 +0100 Subject: [PATCH 15/46] fix some tests --- mapchete_eo/cli/static_catalog.py | 7 ++++- .../sentinel2/metadata_parser/s2metadata.py | 28 ++++++++++++++----- .../preconfigured_sources/__init__.py | 6 ++-- mapchete_eo/search/stac_search.py | 9 ++++++ tests/test_cli.py | 15 +++++++--- 5 files changed, 50 insertions(+), 15 deletions(-) diff --git a/mapchete_eo/cli/static_catalog.py b/mapchete_eo/cli/static_catalog.py index 5a801d39..22f853d9 100644 --- a/mapchete_eo/cli/static_catalog.py +++ b/mapchete_eo/cli/static_catalog.py @@ -10,6 +10,9 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.platforms.sentinel2 import S2Metadata from mapchete_eo.platforms.sentinel2.types import Resolution +from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( + DEPRECATED_ARCHIVES, +) from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog from mapchete_eo.search.base import CatalogSearcher from mapchete_eo.types import TimeRange @@ -117,6 +120,8 @@ def get_catalog( else: raise ValueError("collection must be provided") elif known_archive: - raise NotImplementedError() + return STACSearchCatalog.from_collection_url( + DEPRECATED_ARCHIVES[known_archive]["collection"] + ) else: raise TypeError("cannot determine catalog") diff --git a/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py index eab325c5..4b4d4d20 100644 --- a/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py @@ -157,12 +157,14 @@ def from_stac_item( item: pystac.Item, metadata_xml_asset_name: List[str] = ["metadata", "granule_metadata"], boa_offset_field: Optional[str] = None, - processing_baseline_field: Optional[str] = None, + processing_baseline_field: Union[str, List[str]] = [ + "s2:processing_baseline", + "sentinel2:processing_baseline", + "processing:version", + ], **kwargs, ) -> S2Metadata: - metadata_xml_asset_name = metadata_xml_asset_name - if processing_baseline_field is None: - raise NotImplementedError() + # try to find path to metadata.xml for metadata_asset in metadata_xml_asset_name: if metadata_asset in item.assets: metadata_path = MPath(item.assets[metadata_asset].href) @@ -172,16 +174,28 @@ def from_stac_item( f"could not find path to metadata XML file in assets: {', '.join(item.assets.keys())}" ) + # maek path absolute if metadata_path.is_remote() or metadata_path.is_absolute(): metadata_xml = metadata_path else: metadata_xml = MPath(item.self_href).parent / metadata_path - try: - processing_baseline = item.properties[processing_baseline_field] - except KeyError: + + # try to find information on processing baseline version + for field in ( + processing_baseline_field + if isinstance(processing_baseline_field, list) + else [processing_baseline_field] + ): + try: + processing_baseline = item.properties[field] + break + except KeyError: + pass + else: # pragma: no cover raise KeyError( f"could not find processing baseline version in item properties: {item.properties}" ) + return S2Metadata.from_metadata_xml( metadata_xml=metadata_xml, processing_baseline=processing_baseline, diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py index b10f1971..ab0d82ab 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py @@ -40,15 +40,15 @@ "data_archive": "AWSCOG", }, "S2AWS_JP2": { - "collection": "https://stac.dataspace.copernicus.eu/v1collections/sentinel-2-l2a", + "collection": "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a", "data_archive": "AWSJP2", }, "S2CDSE_AWSJP2": { - "collection": "https://stac.dataspace.copernicus.eu/v1collections/sentinel-2-l2a", + "collection": "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a", "data_archive": "AWSJP2", }, "S2CDSE_JP2": { - "collection": "https://stac.dataspace.copernicus.eu/v1collections/sentinel-2-l2a", + "collection": "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a", }, } MetadataArchive = Literal["roda"] diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index ad1d6755..205d0ec1 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging from datetime import datetime from functools import cached_property @@ -202,6 +204,13 @@ def get_collections(self): for collection_name in self.collections: yield self.client.get_collection(collection_name) + @staticmethod + def from_collection_url(collection_url: str) -> STACSearchCatalog: + return STACSearchCatalog( + endpoint="/".join(collection_url.rstrip("/").split("/")[:-2]), + collections=[collection_url.rstrip("/").split("/")[-1]], + ) + class SpatialSearchChunks: bounds: Bounds diff --git a/tests/test_cli.py b/tests/test_cli.py index 76c205e4..d792787c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,7 +20,9 @@ def test_s2_mask(s2_stac_json_half_footprint, tmp_mpath): str(out_path), ], ) - assert result.exit_code == 0 + if result.exit_code != 0: + raise result.exception + assert out_path.exists() with rasterio_open(out_path) as src: assert src.read().any() @@ -40,7 +42,9 @@ def test_s2_rgb(s2_stac_json_half_footprint, tmp_mpath): str(out_path), ], ) - assert result.exit_code == 0 + if result.exit_code != 0: + raise result.exception + assert out_path.exists() with rasterio_open(out_path) as src: assert not src.read(masked=True).mask.all() @@ -63,7 +67,9 @@ def test_s2_brdf(s2_stac_json_half_footprint, tmp_mpath): str(out_path), ], ) - assert result.exit_code == 0 + if result.exit_code != 0: + raise result.exception + assert len(out_path.ls()) == 2 for path in out_path.ls(): with rasterio_open(path) as src: @@ -100,5 +106,6 @@ def test_static_catalog(tmp_mpath, flag, value, collection): if collection: params.extend(["--collection", collection]) result = runner.invoke(eo, params) - assert result.exit_code == 0 + if result.exit_code != 0: + raise result.exception assert out_path.ls() From e858f65fec06003e1e4458ff0133c0949fcfe43d Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 3 Nov 2025 14:43:22 +0100 Subject: [PATCH 16/46] fix some more tests --- mapchete_eo/base.py | 2 +- .../sentinel2/preconfigured_sources/item_mappers.py | 3 ++- mapchete_eo/source.py | 7 ++----- tests/testdata/sentinel2.mapchete | 4 ++-- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index bec34299..d3cd5354 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -76,7 +76,7 @@ def deprecate_cat_baseurl(cls, values: Dict[str, Any]) -> Dict[str, Any]: raise ValueError( "deprecated cat_baseurl field found alongside sources." ) - values["source"] = [dict(stac_catalog=cat_baseurl, catalog_type="static")] + values["source"] = [dict(collection=cat_baseurl, catalog_type="static")] return values diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index 1129abe9..3b3eeb20 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -86,6 +86,8 @@ def cdse_asset_names(item: Item) -> Item: new_assets[asset_name] = asset item.assets = new_assets + + item.properties["s2:datastrip_id"] = item.properties.get("eopf:datastrip_id") return item @@ -145,7 +147,6 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: item.assets = new_assets - item.properties["s2:datastrip_id"] = item.properties.get("eopf:datastrip_id") return item diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index a130944c..fe53c07e 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -54,15 +54,12 @@ def apply_item_modifier_funcs(self, item: Item) -> Item: return item def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CatalogSearcher: - # TODO: adapt catalog classes - endpoint = "/".join(self.collection.rstrip("/").split("/")[:-2]) - collections = [self.collection.rstrip("/").split("/")[-1]] match self.catalog_type: case "search": - return STACSearchCatalog(endpoint=endpoint, collections=collections) + return STACSearchCatalog.from_collection_url(self.collection) case "static": return STACStaticCatalog( - baseurl=MPath(endpoint).absolute_path(base_dir=base_dir) + baseurl=MPath(self.collection).absolute_path(base_dir=base_dir) ) def eo_bands(self, base_dir: Optional[MPathLike] = None) -> List[str]: diff --git a/tests/testdata/sentinel2.mapchete b/tests/testdata/sentinel2.mapchete index d660d948..4510d6b1 100644 --- a/tests/testdata/sentinel2.mapchete +++ b/tests/testdata/sentinel2.mapchete @@ -4,8 +4,8 @@ input: format: Sentinel-2 level: L2A time: - start: 2022-04-01 - end: 2022-04-03 + start: 2024-04-01 + end: 2024-04-03 output: format: GTiff bands: 3 From 68b92de55f17a35429644a8224b0ae5a146d26a3 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 3 Nov 2025 16:01:56 +0100 Subject: [PATCH 17/46] fix typo --- tests/platforms/sentinel2/test_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/platforms/sentinel2/test_config.py b/tests/platforms/sentinel2/test_config.py index 6cb6d8e2..4ec49c1b 100644 --- a/tests/platforms/sentinel2/test_config.py +++ b/tests/platforms/sentinel2/test_config.py @@ -38,7 +38,7 @@ dict( source=[ dict( - collection="https://earth-search.aws.element84.com/v1/collections=sentinel-s2-l2a", + collection="https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", ), ], ), From a18fb1c072f3e324a213cbde69bd0d7a46b999ff Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 3 Nov 2025 17:02:05 +0100 Subject: [PATCH 18/46] use cql2 to filter items on static catalog --- mapchete_eo/platforms/sentinel2/config.py | 23 +- mapchete_eo/search/base.py | 15 +- mapchete_eo/search/config.py | 28 +- mapchete_eo/search/stac_search.py | 13 +- mapchete_eo/search/stac_static.py | 7 +- mapchete_eo/search/utm_search.py | 3 +- mapchete_eo/source.py | 13 +- pyproject.toml | 1 + tests/platforms/sentinel2/test_base.py | 5 +- tests/testdata/sentinel2_mercator.mapchete | 4 +- tests/testdata/sentinel2_time_ranges.mapchete | 8 +- uv.lock | 334 ++++++++++-------- 12 files changed, 264 insertions(+), 190 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index 288b5eb9..21efbe46 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -133,7 +133,7 @@ class Sentinel2DriverConfig(BaseDriverConfig): cache: Optional[CacheConfig] = None @model_validator(mode="before") - def deprecate_archive(cls, values: Dict[str, Any]) -> Dict[str, Any]: + def deprecated_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: archive = values.get("archive") if archive: warnings.warn( @@ -146,11 +146,21 @@ def deprecate_archive(cls, values: Dict[str, Any]) -> Dict[str, Any]: values["source"] = DEPRECATED_ARCHIVES[archive] except KeyError: raise - return values - @model_validator(mode="before") - def deprecate_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: - max_cloud_cover = values.get("max_cloud_cover") + cat_baseurl = values.pop("cat_baseurl", None) + if cat_baseurl: + warnings.warn( + "'cat_baseurl' will be deprecated soon. Please use 'catalog_type=static' in the source.", + category=DeprecationWarning, + stacklevel=2, + ) + if values.get("source", []): + raise ValueError( + "deprecated cat_baseurl field found alongside sources." + ) + values["source"] = [dict(collection=cat_baseurl, catalog_type="static")] + + max_cloud_cover = values.pop("max_cloud_cover", None) if max_cloud_cover: warnings.warn( "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", @@ -158,6 +168,8 @@ def deprecate_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: stacklevel=2, ) sources = values.get("source", []) + if not sources: + raise ValueError("no sources defined") updated_sources = [] for source in sources: if source.get("query") is not None: @@ -166,7 +178,6 @@ def deprecate_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: ) source["query"] = f"eo:cloud_cover<={max_cloud_cover}" updated_sources.append(source) - values.pop("max_cloud_cover") values["source"] = updated_sources return values diff --git a/mapchete_eo/search/base.py b/mapchete_eo/search/base.py index 65a93b12..7ed76c08 100644 --- a/mapchete_eo/search/base.py +++ b/mapchete_eo/search/base.py @@ -4,6 +4,7 @@ from abc import ABC, abstractmethod from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union +from cql2 import Expr from pydantic import BaseModel from pystac import Item, Catalog, CatalogType, Extent from mapchete.path import MPath, MPathLike @@ -74,6 +75,7 @@ def search( time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, + query: Optional[str] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: ... @@ -222,14 +224,17 @@ def write_static_catalog( def filter_items( items: Generator[Item, None, None], - cloud_cover_field: str = "eo:cloud_cover", - max_cloud_cover: float = 100.0, + query: Optional[str] = None, ) -> Generator[Item, None, None]: """ Only for cloudcover now, this can and should be adapted for filter field and value the field and value for the item filter would be defined in search.config.py corresponding configs and passed down to the individual search approaches via said config and this Function. """ - for item in items: - if item.properties.get(cloud_cover_field, 0.0) <= max_cloud_cover: - yield item + if query: + expr = Expr(query) + for item in items: + if expr.matches(item.properties): + yield item + else: + yield from items diff --git a/mapchete_eo/search/config.py b/mapchete_eo/search/config.py index dfac4443..2f19b761 100644 --- a/mapchete_eo/search/config.py +++ b/mapchete_eo/search/config.py @@ -1,7 +1,7 @@ -from typing import Optional +from typing import Optional, Dict, Any from mapchete.path import MPath, MPathLike -from pydantic import BaseModel +from pydantic import BaseModel, model_validator class StacSearchConfig(BaseModel): @@ -12,13 +12,33 @@ class StacSearchConfig(BaseModel): catalog_pagesize: int = 100 footprint_buffer: float = 0 + @model_validator(mode="before") + def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if "max_cloud_cover" in values: + raise DeprecationWarning( + "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", + ) + return values + class StacStaticConfig(BaseModel): - max_cloud_cover: float = 100.0 + @model_validator(mode="before") + def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if "max_cloud_cover" in values: + raise DeprecationWarning( + "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", + ) + return values class UTMSearchConfig(BaseModel): - max_cloud_cover: float = 100.0 + @model_validator(mode="before") + def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if "max_cloud_cover" in values: + raise DeprecationWarning( + "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", + ) + return values sinergise_aws_collections: dict = dict( S2_L2A=dict( diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 205d0ec1..61122134 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -71,6 +71,7 @@ def search( time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, + query: Optional[str] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: config = self.config_cls(**search_kwargs or {}) @@ -87,7 +88,11 @@ def search( def _searches(): for time_range in time if isinstance(time, list) else [time]: search = self._search( - time_range=time_range, bounds=bounds, area=area, config=config + time_range=time_range, + bounds=bounds, + area=area, + query=query, + config=config, ) logger.debug("found %s products", search.matched()) matched = search.matched() or 0 @@ -107,6 +112,7 @@ def _searches(): with Timer() as duration: chunk_search = self._search( time_range=time_range, + query=query, config=config, **chunk_kwargs, ) @@ -124,11 +130,11 @@ def _searches(): for search in _searches(): for count, item in enumerate(search.items(), 1): item_path = item.get_self_href() - # logger.debug("item %s/%s ...", count, search.matched()) if item_path in self.blacklist: # pragma: no cover logger.debug("item %s found in blacklist and skipping", item_path) else: yield item + logger.debug("returned %s items in total", count) def _eo_bands(self) -> List[str]: for collection_name in self.collections: @@ -157,6 +163,7 @@ def _search( time_range: Optional[TimeRange] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, + query: Optional[str] = None, config: StacSearchConfig = StacSearchConfig(), **kwargs, ): @@ -185,7 +192,7 @@ def _search( search_params = dict( self.default_search_params, datetime=f"{start}/{end}", - query=config.query, + query=query, **kwargs, ) if ( diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index 3fbc6ac0..ae256ac3 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -62,16 +62,13 @@ def search( time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, + query: Optional[str] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: - config = self.config_cls(**search_kwargs or {}) if area is None and bounds: bounds = Bounds.from_inp(bounds) area = shape(bounds) - for item in filter_items( - self._raw_search(time=time, area=area), - max_cloud_cover=config.max_cloud_cover, - ): + for item in filter_items(self._raw_search(time=time, area=area), query=query): yield item def _raw_search( diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index 69796b6f..d8080472 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -63,15 +63,14 @@ def search( time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, + query: Optional[str] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: - config = self.config_cls(**search_kwargs or {}) if bounds: bounds = Bounds.from_inp(bounds) for item in filter_items( self._raw_search(time=time, bounds=bounds, area=area), - max_cloud_cover=config.max_cloud_cover, ): yield item diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index fe53c07e..07245004 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -1,8 +1,8 @@ -from typing import List, Literal, Optional, Generator, Union, Callable +from typing import Any, Dict, List, Literal, Optional, Generator, Union, Callable from mapchete.path import MPath from mapchete.types import BoundsLike, CRSLike, MPathLike -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, model_validator from pystac import Item from shapely.geometry.base import BaseGeometry from shapely.errors import GEOSException @@ -39,6 +39,7 @@ def search( time=time, bounds=bounds, area=area, + query=self.query, search_kwargs=dict(query=self.query) if self.query else None, ): yield self.apply_item_modifier_funcs(item) @@ -64,3 +65,11 @@ def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CatalogSearcher: def eo_bands(self, base_dir: Optional[MPathLike] = None) -> List[str]: return self.get_catalog(base_dir=base_dir).eo_bands + + @model_validator(mode="before") + def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: + if "max_cloud_cover" in values: + raise DeprecationWarning( + "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", + ) + return values diff --git a/pyproject.toml b/pyproject.toml index 24a674fb..6cf913cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ ] dependencies = [ "click", + "cql2", "croniter", "lxml", "mapchete[complete]>=2025.10.0", diff --git a/tests/platforms/sentinel2/test_base.py b/tests/platforms/sentinel2/test_base.py index 83782a2e..ef5d99e6 100644 --- a/tests/platforms/sentinel2/test_base.py +++ b/tests/platforms/sentinel2/test_base.py @@ -102,10 +102,9 @@ def test_s2_time_ranges(sentinel2_time_ranges_mapchete): some_in_second = True for product in cube.products: first, second = cube.time - print((product.item.datetime.date(), first, second)) - if first.start < product.item.datetime.date() < first.end: + if first.start <= product.item.datetime.date() <= first.end: some_in_first = True - elif second.start < product.item.datetime.date() < second.end: + elif second.start <= product.item.datetime.date() <= second.end: some_in_second = True else: raise ValueError("product outside of given time ranges") diff --git a/tests/testdata/sentinel2_mercator.mapchete b/tests/testdata/sentinel2_mercator.mapchete index 630db279..ee145d4d 100644 --- a/tests/testdata/sentinel2_mercator.mapchete +++ b/tests/testdata/sentinel2_mercator.mapchete @@ -4,8 +4,8 @@ input: format: Sentinel-2 level: L2A time: - start: 2022-04-01 - end: 2022-04-03 + start: 2024-04-01 + end: 2024-04-03 output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_time_ranges.mapchete b/tests/testdata/sentinel2_time_ranges.mapchete index f68abc43..c0a0dad3 100644 --- a/tests/testdata/sentinel2_time_ranges.mapchete +++ b/tests/testdata/sentinel2_time_ranges.mapchete @@ -4,10 +4,10 @@ input: format: Sentinel-2 level: L2A time: - - start: 2022-04-01 - end: 2022-04-03 - - start: 2022-05-01 - end: 2022-05-03 + - start: 2024-04-01 + end: 2024-04-03 + - start: 2024-05-01 + end: 2024-05-03 output: format: GTiff bands: 3 diff --git a/uv.lock b/uv.lock index 1590d3f5..111e33da 100644 --- a/uv.lock +++ b/uv.lock @@ -5,7 +5,7 @@ requires-python = ">=3.13" [[package]] name = "affine" version = "2.4.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/69/98/d2f0bb06385069e799fc7d2870d9e078cfa0fa396dc8a2b81227d0da08b9/affine-2.4.0.tar.gz", hash = "sha256:a24d818d6a836c131976d22f8c27b8d3ca32d0af64c1d8d29deb7bafa4da1eea", size = 17132, upload-time = "2023-01-19T23:44:30.696Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/0b/f7/85273299ab57117850cc0a936c64151171fac4da49bc6fba0dad984a7c5f/affine-2.4.0-py3-none-any.whl", hash = "sha256:8a3df80e2b2378aef598a83c1392efd47967afec4242021a0b06b4c7cbc61a92", size = 15662, upload-time = "2023-01-19T23:44:28.833Z" }, @@ -14,7 +14,7 @@ wheels = [ [[package]] name = "aiobotocore" version = "2.24.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "aiohttp" }, { name = "aioitertools" }, @@ -32,7 +32,7 @@ wheels = [ [[package]] name = "aiohappyeyeballs" version = "2.6.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" }, @@ -41,7 +41,7 @@ wheels = [ [[package]] name = "aiohttp" version = "3.12.15" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "aiohappyeyeballs" }, { name = "aiosignal" }, @@ -75,7 +75,7 @@ wheels = [ [[package]] name = "aioitertools" version = "0.12.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/06/de/38491a84ab323b47c7f86e94d2830e748780525f7a10c8600b67ead7e9ea/aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b", size = 19369, upload-time = "2024-09-02T03:33:40.349Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/85/13/58b70a580de00893223d61de8fea167877a3aed97d4a5e1405c9159ef925/aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796", size = 24345, upload-time = "2024-09-02T03:34:59.454Z" }, @@ -84,7 +84,7 @@ wheels = [ [[package]] name = "aiosignal" version = "1.4.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "frozenlist" }, ] @@ -96,7 +96,7 @@ wheels = [ [[package]] name = "alabaster" version = "1.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210, upload-time = "2024-07-26T18:15:03.762Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" }, @@ -105,7 +105,7 @@ wheels = [ [[package]] name = "annotated-types" version = "0.7.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, @@ -114,7 +114,7 @@ wheels = [ [[package]] name = "anyio" version = "4.10.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "idna" }, { name = "sniffio" }, @@ -127,7 +127,7 @@ wheels = [ [[package]] name = "attrs" version = "25.3.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032, upload-time = "2025-03-13T11:10:22.779Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, @@ -136,7 +136,7 @@ wheels = [ [[package]] name = "babel" version = "2.17.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" }, @@ -145,7 +145,7 @@ wheels = [ [[package]] name = "blinker" version = "1.9.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, @@ -154,7 +154,7 @@ wheels = [ [[package]] name = "boto3" version = "1.39.11" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "botocore" }, { name = "jmespath" }, @@ -168,7 +168,7 @@ wheels = [ [[package]] name = "botocore" version = "1.39.11" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "jmespath" }, { name = "python-dateutil" }, @@ -182,7 +182,7 @@ wheels = [ [[package]] name = "cachetools" version = "6.1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/8a/89/817ad5d0411f136c484d535952aef74af9b25e0d99e90cdffbe121e6d628/cachetools-6.1.0.tar.gz", hash = "sha256:b4c4f404392848db3ce7aac34950d17be4d864da4b8b66911008e430bc544587", size = 30714, upload-time = "2025-06-16T18:51:03.07Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/00/f0/2ef431fe4141f5e334759d73e81120492b23b2824336883a91ac04ba710b/cachetools-6.1.0-py3-none-any.whl", hash = "sha256:1c7bb3cf9193deaf3508b7c5f2a79986c13ea38965c5adcff1f84519cf39163e", size = 11189, upload-time = "2025-06-16T18:51:01.514Z" }, @@ -191,7 +191,7 @@ wheels = [ [[package]] name = "certifi" version = "2025.8.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" }, @@ -200,7 +200,7 @@ wheels = [ [[package]] name = "charset-normalizer" version = "3.4.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" }, @@ -231,7 +231,7 @@ wheels = [ [[package]] name = "click" version = "8.2.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] @@ -243,7 +243,7 @@ wheels = [ [[package]] name = "click-plugins" version = "1.1.1.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "click" }, ] @@ -255,7 +255,7 @@ wheels = [ [[package]] name = "click-spinner" version = "0.1.10" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/af/3a/7dbc558fcf0ae9e2e8b7ccc52daeb4eaf32b21f851497f5b409e1638dcee/click-spinner-0.1.10.tar.gz", hash = "sha256:87eacf9d7298973a25d7615ef57d4782aebf913a532bba4b28a37e366e975daf", size = 18720, upload-time = "2020-04-24T07:14:51.955Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/93/2a/04893832bfeddc2d40a7de2e8153b3085f12d63507d91a9cf0157dc3a1c2/click_spinner-0.1.10-py2.py3-none-any.whl", hash = "sha256:d1ffcff1fdad9882396367f15fb957bcf7f5c64ab91927dee2127e0d2991ee84", size = 3986, upload-time = "2020-04-24T07:14:50.575Z" }, @@ -264,7 +264,7 @@ wheels = [ [[package]] name = "cligj" version = "0.7.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "click" }, ] @@ -276,7 +276,7 @@ wheels = [ [[package]] name = "cloudpickle" version = "3.1.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/52/39/069100b84d7418bc358d81669d5748efb14b9cceacd2f9c75f550424132f/cloudpickle-3.1.1.tar.gz", hash = "sha256:b216fa8ae4019d5482a8ac3c95d8f6346115d8835911fd4aefd1a445e4242c64", size = 22113, upload-time = "2025-01-14T17:02:05.085Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/7e/e8/64c37fadfc2816a7701fa8a6ed8d87327c7d54eacfbfb6edab14a2f2be75/cloudpickle-3.1.1-py3-none-any.whl", hash = "sha256:c8c5a44295039331ee9dad40ba100a9c7297b6f988e50e87ccdf3765a668350e", size = 20992, upload-time = "2025-01-14T17:02:02.417Z" }, @@ -285,7 +285,7 @@ wheels = [ [[package]] name = "color-operations" version = "0.2.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, ] @@ -302,7 +302,7 @@ wheels = [ [[package]] name = "colorama" version = "0.4.6" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, @@ -311,7 +311,7 @@ wheels = [ [[package]] name = "contourpy" version = "1.3.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, ] @@ -366,7 +366,7 @@ wheels = [ [[package]] name = "coverage" version = "7.10.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f4/2c/253cc41cd0f40b84c1c34c5363e0407d73d4a1cae005fed6db3b823175bd/coverage-7.10.3.tar.gz", hash = "sha256:812ba9250532e4a823b070b0420a36499859542335af3dca8f47fc6aa1a05619", size = 822936, upload-time = "2025-08-10T21:27:39.968Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/0a/ff/239e4de9cc149c80e9cc359fab60592365b8c4cbfcad58b8a939d18c6898/coverage-7.10.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b99e87304ffe0eb97c5308447328a584258951853807afdc58b16143a530518a", size = 216298, upload-time = "2025-08-10T21:26:10.973Z" }, @@ -416,10 +416,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/19/e67f4ae24e232c7f713337f3f4f7c9c58afd0c02866fb07c7b9255a19ed7/coverage-7.10.3-py3-none-any.whl", hash = "sha256:416a8d74dc0adfd33944ba2f405897bab87b7e9e84a391e09d241956bd953ce1", size = 207921, upload-time = "2025-08-10T21:27:38.254Z" }, ] +[[package]] +name = "cql2" +version = "0.4.1" +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/0dea39ec62f63020ff9c226e6f0113b9c5f4b9cf9b5029648b835e319003/cql2-0.4.1.tar.gz", hash = "sha256:993bdfc4528bbb1082f3925d41823cd1c104ea348f14d529a1d79963ded336bb", size = 173055, upload-time = "2025-11-02T14:18:45.49Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/cb/b49a05cc6d929bcf69ee5341a7c4205d18e5597746d53cf510a2bd861357/cql2-0.4.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:497fe252be574be4e4996aa7d832afff30ed5e4fc7a6d32a2164d328d4051cfc", size = 3620729, upload-time = "2025-11-02T14:18:35.737Z" }, + { url = "https://files.pythonhosted.org/packages/47/00/66388418a4c40eacd80db51b4f6d9bd806bc92a0a30f7018091e648d2005/cql2-0.4.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:0c359b74d8649f84c06a5c78926c1434f7be7cee573fce970a1dae1b380e8359", size = 3404244, upload-time = "2025-11-02T14:18:33.815Z" }, + { url = "https://files.pythonhosted.org/packages/90/c7/6f614b054da6ddad3c668e1bb9074537b731330293b6a7fcbc4fa7a787a6/cql2-0.4.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2c8cfabf8a2c44422e518c804256686d2f933a6db34f786c6f457c96f1cfda5", size = 3691304, upload-time = "2025-11-02T14:18:19.727Z" }, + { url = "https://files.pythonhosted.org/packages/54/43/20edbaf203b01fd7bdcb0b9b977fe57fb09c1f1a7b662bc0f9d5825ed3c2/cql2-0.4.1-cp310-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e6e4fc5611c1366182171cfcd52fe35808951691e4bfc2ac5c4b12b63f42474", size = 3610656, upload-time = "2025-11-02T14:18:22.878Z" }, + { url = "https://files.pythonhosted.org/packages/94/c1/fa6e5f23ab3e2b030037af3831d5ae177fe86aad8a650386168b54ad08a1/cql2-0.4.1-cp310-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35eed733a9753624dbb52ee09f1cae8f6e99a4d2178f455a868116fd80623128", size = 4080725, upload-time = "2025-11-02T14:18:29.251Z" }, + { url = "https://files.pythonhosted.org/packages/ae/f9/309c92d24df173cb3095aa7a5e9608870e4e077712c77658f3736305f2b9/cql2-0.4.1-cp310-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1c912804fbbca60f880dddd2c56e8d5e466215da5638a1a9701469a0f79374a", size = 5443050, upload-time = "2025-11-02T14:18:25.122Z" }, + { url = "https://files.pythonhosted.org/packages/09/d0/c43017725f49705fec98a5dee480357588126858be24bf6798dadd9042d9/cql2-0.4.1-cp310-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b914bb4d2662099cbb20b67e3d5b00b42c57ca0a39e50db2658f67589a74f78", size = 3862800, upload-time = "2025-11-02T14:18:27.146Z" }, + { url = "https://files.pythonhosted.org/packages/14/99/75b274ebcb7ea6a7fff10c44ddfc37c0af7743d54fbc34606e2ec7644d6c/cql2-0.4.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73cbc22d271dbb2ec9dcaca3f39a00caa1fbc00d258417b974201bd8cce5211c", size = 3892182, upload-time = "2025-11-02T14:18:31.538Z" }, + { url = "https://files.pythonhosted.org/packages/47/0d/f61cd0368cc2318fb320b811dbcde4bdc255105b500a0369c2b3362b2a4b/cql2-0.4.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:973be014ac95a7aacadc1b0735557573d6ceee2012548e25d9b5350e0380c36b", size = 3872941, upload-time = "2025-11-02T14:18:37.926Z" }, + { url = "https://files.pythonhosted.org/packages/28/a2/d5d68d358840638f60db3b4d5dde3e2b9724765822e3956624c0f4bf5b6b/cql2-0.4.1-cp310-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ebbabef9998f458632e9718181f82d39fbbae128c96544b11100184c45565424", size = 3869868, upload-time = "2025-11-02T14:18:40.069Z" }, + { url = "https://files.pythonhosted.org/packages/94/03/0b559d2c8646c318eeff3ffe06d1bf3d194cfa92618d4e0e00620de64c26/cql2-0.4.1-cp310-abi3-musllinux_1_2_i686.whl", hash = "sha256:c676e9bfbd4cc9b8dbbb730cd2079eeac17fccc785a81abdb45b4455a958923b", size = 4046731, upload-time = "2025-11-02T14:18:41.902Z" }, + { url = "https://files.pythonhosted.org/packages/22/58/71cfccce51644df927b24514776262e1ee35e7c743e66927d7f10ee0c4ce/cql2-0.4.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:710ce8852e7e847bb4ed7e8769795b073398fc13a1b7731e00371ae8d5930613", size = 4027730, upload-time = "2025-11-02T14:18:43.851Z" }, + { url = "https://files.pythonhosted.org/packages/12/da/7e2226dd8b3070e58d5b09888d735871b3d0cd0c2c1f4840b6841b0ea48f/cql2-0.4.1-cp310-abi3-win32.whl", hash = "sha256:8ae9a391cb274c707f809aec3748dfb578883146bf8a660fc62cd1fa3d52fb66", size = 3013646, upload-time = "2025-11-02T14:18:48.74Z" }, + { url = "https://files.pythonhosted.org/packages/d8/a7/81186bb5cfa1a0935bb0b8dfcc7431aec1a5a8193f09d12c3d1d997b9802/cql2-0.4.1-cp310-abi3-win_amd64.whl", hash = "sha256:3e17cc468b253b8a56fecdfb6d206ae45fb53a2977f71981b9697f5ab1159d21", size = 3287870, upload-time = "2025-11-02T14:18:46.929Z" }, +] + [[package]] name = "croniter" version = "6.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "python-dateutil" }, { name = "pytz" }, @@ -432,7 +454,7 @@ wheels = [ [[package]] name = "cycler" version = "0.12.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, @@ -441,7 +463,7 @@ wheels = [ [[package]] name = "dask" version = "2025.7.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "click" }, { name = "cloudpickle" }, @@ -459,7 +481,7 @@ wheels = [ [[package]] name = "decorator" version = "5.2.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, @@ -468,7 +490,7 @@ wheels = [ [[package]] name = "distributed" version = "2025.7.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "click" }, { name = "cloudpickle" }, @@ -494,7 +516,7 @@ wheels = [ [[package]] name = "docutils" version = "0.21.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444, upload-time = "2024-04-23T18:57:18.24Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" }, @@ -503,7 +525,7 @@ wheels = [ [[package]] name = "fastapi" version = "0.116.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pydantic" }, { name = "starlette" }, @@ -517,7 +539,7 @@ wheels = [ [[package]] name = "fiona" version = "1.10.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "attrs" }, { name = "certifi" }, @@ -536,7 +558,7 @@ wheels = [ [[package]] name = "flask" version = "2.3.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "blinker" }, { name = "click" }, @@ -552,7 +574,7 @@ wheels = [ [[package]] name = "flask-rangerequest" version = "0.0.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "flask" }, ] @@ -564,7 +586,7 @@ wheels = [ [[package]] name = "fonttools" version = "4.59.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/8a/27/ec3c723bfdf86f34c5c82bf6305df3e0f0d8ea798d2d3a7cb0c0a866d286/fonttools-4.59.0.tar.gz", hash = "sha256:be392ec3529e2f57faa28709d60723a763904f71a2b63aabe14fee6648fe3b14", size = 3532521, upload-time = "2025-07-16T12:04:54.613Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/f3/bb/390990e7c457d377b00890d9f96a3ca13ae2517efafb6609c1756e213ba4/fonttools-4.59.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:78813b49d749e1bb4db1c57f2d4d7e6db22c253cb0a86ad819f5dc197710d4b2", size = 2758704, upload-time = "2025-07-16T12:04:22.217Z" }, @@ -581,7 +603,7 @@ wheels = [ [[package]] name = "frozenlist" version = "1.7.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/79/b1/b64018016eeb087db503b038296fd782586432b9c077fc5c7839e9cb6ef6/frozenlist-1.7.0.tar.gz", hash = "sha256:2e310d81923c2437ea8670467121cc3e9b0f76d3043cc1d2331d56c7fb7a3a8f", size = 45078, upload-time = "2025-06-09T23:02:35.538Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/24/90/6b2cebdabdbd50367273c20ff6b57a3dfa89bd0762de02c3a1eb42cb6462/frozenlist-1.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee80eeda5e2a4e660651370ebffd1286542b67e268aa1ac8d6dbe973120ef7ee", size = 79791, upload-time = "2025-06-09T23:01:09.368Z" }, @@ -624,7 +646,7 @@ wheels = [ [[package]] name = "fsspec" version = "2025.7.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/8b/02/0835e6ab9cfc03916fe3f78c0956cfcdb6ff2669ffa6651065d5ebf7fc98/fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58", size = 304432, upload-time = "2025-07-15T16:05:21.19Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597, upload-time = "2025-07-15T16:05:19.529Z" }, @@ -641,7 +663,7 @@ s3 = [ [[package]] name = "geojson" version = "3.2.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/85/5a/33e761df75c732fcea94aaf01f993d823138581d10c91133da58bc231e63/geojson-3.2.0.tar.gz", hash = "sha256:b860baba1e8c6f71f8f5f6e3949a694daccf40820fa8f138b3f712bd85804903", size = 24574, upload-time = "2024-12-21T19:35:29.835Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/a7fa2d650602731c90e0a86279841b4586e14228199e8c09165ba4863e29/geojson-3.2.0-py3-none-any.whl", hash = "sha256:69d14156469e13c79479672eafae7b37e2dcd19bdfd77b53f74fa8fe29910b52", size = 15040, upload-time = "2024-12-21T19:37:02.149Z" }, @@ -650,7 +672,7 @@ wheels = [ [[package]] name = "geojson-pydantic" version = "2.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pydantic" }, ] @@ -662,7 +684,7 @@ wheels = [ [[package]] name = "h11" version = "0.16.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, @@ -671,7 +693,7 @@ wheels = [ [[package]] name = "httpcore" version = "1.0.9" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "certifi" }, { name = "h11" }, @@ -684,7 +706,7 @@ wheels = [ [[package]] name = "httptools" version = "0.6.4" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a7/9a/ce5e1f7e131522e6d3426e8e7a490b3a01f39a6696602e1c4f33f9e94277/httptools-0.6.4.tar.gz", hash = "sha256:4e93eee4add6493b59a5c514da98c939b244fce4a0d8879cd3f466562f4b7d5c", size = 240639, upload-time = "2024-10-16T19:45:08.902Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/94/a3/9fe9ad23fd35f7de6b91eeb60848986058bd8b5a5c1e256f5860a160cc3e/httptools-0.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ade273d7e767d5fae13fa637f4d53b6e961fb7fd93c7797562663f0171c26660", size = 197214, upload-time = "2024-10-16T19:44:38.738Z" }, @@ -699,7 +721,7 @@ wheels = [ [[package]] name = "httpx" version = "0.28.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "anyio" }, { name = "certifi" }, @@ -714,7 +736,7 @@ wheels = [ [[package]] name = "idna" version = "3.10" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, @@ -723,7 +745,7 @@ wheels = [ [[package]] name = "imagesize" version = "1.4.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026, upload-time = "2022-07-01T12:21:05.687Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" }, @@ -732,7 +754,7 @@ wheels = [ [[package]] name = "importlib-metadata" version = "8.7.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "zipp" }, ] @@ -744,7 +766,7 @@ wheels = [ [[package]] name = "importlib-resources" version = "6.5.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/cf/8c/f834fbf984f691b4f7ff60f50b514cc3de5cc08abfc3295564dd89c5e2e7/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c", size = 44693, upload-time = "2025-01-03T18:51:56.698Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" }, @@ -753,7 +775,7 @@ wheels = [ [[package]] name = "iniconfig" version = "2.1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, @@ -762,7 +784,7 @@ wheels = [ [[package]] name = "itsdangerous" version = "2.2.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" }, @@ -771,7 +793,7 @@ wheels = [ [[package]] name = "jinja2" version = "3.1.6" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "markupsafe" }, ] @@ -783,7 +805,7 @@ wheels = [ [[package]] name = "jmespath" version = "1.0.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, @@ -792,7 +814,7 @@ wheels = [ [[package]] name = "jsonschema" version = "4.25.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "attrs" }, { name = "jsonschema-specifications" }, @@ -807,7 +829,7 @@ wheels = [ [[package]] name = "jsonschema-specifications" version = "2025.4.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "referencing" }, ] @@ -819,7 +841,7 @@ wheels = [ [[package]] name = "kiwisolver" version = "1.4.9" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/31/c1/c2686cda909742ab66c7388e9a1a8521a59eb89f8bcfbee28fc980d07e24/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8", size = 123681, upload-time = "2025-08-10T21:26:26.725Z" }, @@ -878,7 +900,7 @@ wheels = [ [[package]] name = "linkify-it-py" version = "2.0.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "uc-micro-py" }, ] @@ -890,7 +912,7 @@ wheels = [ [[package]] name = "locket" version = "1.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/2f/83/97b29fe05cb6ae28d2dbd30b81e2e402a3eed5f460c26e9eaa5895ceacf5/locket-1.0.0.tar.gz", hash = "sha256:5c0d4c052a8bbbf750e056a8e65ccd309086f4f0f18a2eac306a8dfa4112a632", size = 4350, upload-time = "2022-04-20T22:04:44.312Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/db/bc/83e112abc66cd466c6b83f99118035867cecd41802f8d044638aa78a106e/locket-1.0.0-py2.py3-none-any.whl", hash = "sha256:b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3", size = 4398, upload-time = "2022-04-20T22:04:42.23Z" }, @@ -899,7 +921,7 @@ wheels = [ [[package]] name = "loguru" version = "0.7.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "win32-setctime", marker = "sys_platform == 'win32'" }, @@ -912,7 +934,7 @@ wheels = [ [[package]] name = "lxml" version = "6.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/c5/ed/60eb6fa2923602fba988d9ca7c5cdbd7cf25faa795162ed538b527a35411/lxml-6.0.0.tar.gz", hash = "sha256:032e65120339d44cdc3efc326c9f660f5f7205f3a535c1fdbf898b29ea01fb72", size = 4096938, upload-time = "2025-06-26T16:28:19.373Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/79/21/6e7c060822a3c954ff085e5e1b94b4a25757c06529eac91e550f3f5cd8b8/lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6da7cd4f405fd7db56e51e96bff0865b9853ae70df0e6720624049da76bde2da", size = 8414372, upload-time = "2025-06-26T16:26:39.079Z" }, @@ -935,8 +957,8 @@ wheels = [ [[package]] name = "mapchete" -version = "2025.6.0" -source = { registry = "https://pypi.org/simple" } +version = "2025.10.1" +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "aiohttp" }, { name = "cachetools" }, @@ -955,6 +977,8 @@ dependencies = [ { name = "pydantic" }, { name = "pydantic-settings" }, { name = "pyproj" }, + { name = "pystac", extra = ["urllib3"] }, + { name = "pystac-client" }, { name = "python-dateutil" }, { name = "rasterio" }, { name = "retry" }, @@ -962,9 +986,9 @@ dependencies = [ { name = "tilematrix" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/86/a4/16d31f5968c8df3ae06c9d9ab88c4dc00673bbb6e9f895f96bbb9c3bf367/mapchete-2025.6.0.tar.gz", hash = "sha256:66c817c16fc7953376fbe68b40b5b4b90c7c8fd44eb80a84e634dc12b9eed1c4", size = 157936, upload-time = "2025-06-05T09:58:19.025Z" } +sdist = { url = "https://files.pythonhosted.org/packages/56/0a/77ec170c30157323360e576b6b4bcad44511c5e7ecf2d6c17d37fda0033b/mapchete-2025.10.1.tar.gz", hash = "sha256:e812c971016a864de0b36a2d74067420c3a4ea3bd78e128c7a2bf76f70d930ee", size = 160626, upload-time = "2025-10-10T10:20:45.017Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/36/e402219d65eced45ca5b4c97074b22b71d647b93a4675577656ce59ba9da/mapchete-2025.6.0-py2.py3-none-any.whl", hash = "sha256:85d0e74a73a23371e0f89950be999048cfaa3bcd08ba895e45583a639f20012c", size = 225284, upload-time = "2025-06-05T09:58:21.54Z" }, + { url = "https://files.pythonhosted.org/packages/24/3f/19534ed329e0acd4acb1ce81348cd2b270e775523848bfa60572071485ae/mapchete-2025.10.1-py2.py3-none-any.whl", hash = "sha256:b2d9bc22dec5aa794ffc3455abd13ce03218ab453cd3791489ffb49ae4d3c36d", size = 228324, upload-time = "2025-10-10T10:20:46.294Z" }, ] [package.optional-dependencies] @@ -993,6 +1017,7 @@ name = "mapchete-eo" source = { editable = "." } dependencies = [ { name = "click" }, + { name = "cql2" }, { name = "croniter" }, { name = "lxml" }, { name = "mapchete", extra = ["complete"] }, @@ -1022,9 +1047,10 @@ test = [ [package.metadata] requires-dist = [ { name = "click" }, + { name = "cql2" }, { name = "croniter" }, { name = "lxml" }, - { name = "mapchete", extras = ["complete"], specifier = ">=2025.6.0" }, + { name = "mapchete", extras = ["complete"], specifier = ">=2025.10.0" }, { name = "opencv-python-headless" }, { name = "pillow" }, { name = "pydantic" }, @@ -1046,7 +1072,7 @@ provides-extras = ["docs", "test"] [[package]] name = "markdown-it-py" version = "4.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "mdurl" }, ] @@ -1066,7 +1092,7 @@ plugins = [ [[package]] name = "markupsafe" version = "3.0.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload-time = "2024-10-18T15:21:24.577Z" }, @@ -1094,7 +1120,7 @@ wheels = [ [[package]] name = "matplotlib" version = "3.9.4" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "contourpy" }, { name = "cycler" }, @@ -1125,7 +1151,7 @@ wheels = [ [[package]] name = "mdit-py-plugins" version = "0.5.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "markdown-it-py" }, ] @@ -1137,7 +1163,7 @@ wheels = [ [[package]] name = "mdurl" version = "0.1.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, @@ -1146,7 +1172,7 @@ wheels = [ [[package]] name = "memray" version = "1.18.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "jinja2" }, { name = "rich" }, @@ -1177,7 +1203,7 @@ wheels = [ [[package]] name = "morecantile" version = "6.2.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "attrs" }, { name = "pydantic" }, @@ -1191,7 +1217,7 @@ wheels = [ [[package]] name = "msgpack" version = "1.1.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555, upload-time = "2025-06-13T06:52:51.324Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a1/38/561f01cf3577430b59b340b51329803d3a5bf6a45864a55f4ef308ac11e3/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0", size = 81677, upload-time = "2025-06-13T06:52:16.64Z" }, @@ -1209,7 +1235,7 @@ wheels = [ [[package]] name = "multidict" version = "6.6.4" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/69/7f/0652e6ed47ab288e3756ea9c0df8b14950781184d4bd7883f4d87dd41245/multidict-6.6.4.tar.gz", hash = "sha256:d2d4e4787672911b48350df02ed3fa3fffdc2f2e8ca06dd6afdf34189b76a9dd", size = 101843, upload-time = "2025-08-11T12:08:48.217Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/3a/5d/e1db626f64f60008320aab00fbe4f23fc3300d75892a3381275b3d284580/multidict-6.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f46a6e8597f9bd71b31cc708195d42b634c8527fecbcf93febf1052cacc1f16e", size = 75848, upload-time = "2025-08-11T12:07:19.912Z" }, @@ -1254,7 +1280,7 @@ wheels = [ [[package]] name = "numexpr" version = "2.11.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, ] @@ -1277,7 +1303,7 @@ wheels = [ [[package]] name = "numpy" version = "2.2.6" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, @@ -1305,7 +1331,7 @@ wheels = [ [[package]] name = "opencv-python-headless" version = "4.12.0.88" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, ] @@ -1322,7 +1348,7 @@ wheels = [ [[package]] name = "oyaml" version = "1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pyyaml" }, ] @@ -1334,7 +1360,7 @@ wheels = [ [[package]] name = "packaging" version = "25.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, @@ -1343,7 +1369,7 @@ wheels = [ [[package]] name = "pandas" version = "2.3.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, { name = "python-dateutil" }, @@ -1370,7 +1396,7 @@ wheels = [ [[package]] name = "partd" version = "1.4.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "locket" }, { name = "toolz" }, @@ -1383,7 +1409,7 @@ wheels = [ [[package]] name = "pillow" version = "11.3.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069, upload-time = "2025-07-01T09:16:30.666Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd", size = 2128328, upload-time = "2025-07-01T09:14:35.276Z" }, @@ -1438,7 +1464,7 @@ wheels = [ [[package]] name = "platformdirs" version = "4.3.8" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, @@ -1447,7 +1473,7 @@ wheels = [ [[package]] name = "pluggy" version = "1.6.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, @@ -1456,7 +1482,7 @@ wheels = [ [[package]] name = "propcache" version = "0.3.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a6/16/43264e4a779dd8588c21a70f0709665ee8f611211bdd2c87d952cfa7c776/propcache-0.3.2.tar.gz", hash = "sha256:20d7d62e4e7ef05f221e0db2856b979540686342e7dd9973b815599c7057e168", size = 44139, upload-time = "2025-06-09T22:56:06.081Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/dc/d1/8c747fafa558c603c4ca19d8e20b288aa0c7cda74e9402f50f31eb65267e/propcache-0.3.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ca592ed634a73ca002967458187109265e980422116c0a107cf93d81f95af945", size = 71286, upload-time = "2025-06-09T22:54:54.369Z" }, @@ -1497,7 +1523,7 @@ wheels = [ [[package]] name = "psutil" version = "7.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003, upload-time = "2025-02-13T21:54:07.946Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051, upload-time = "2025-02-13T21:54:12.36Z" }, @@ -1512,7 +1538,7 @@ wheels = [ [[package]] name = "py" version = "1.11.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/98/ff/fec109ceb715d2a6b4c4a85a61af3b40c723a961e8828319fbcb15b868dc/py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", size = 207796, upload-time = "2021-11-04T17:17:01.377Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378", size = 98708, upload-time = "2021-11-04T17:17:00.152Z" }, @@ -1521,7 +1547,7 @@ wheels = [ [[package]] name = "pydantic" version = "2.11.7" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "annotated-types" }, { name = "pydantic-core" }, @@ -1536,7 +1562,7 @@ wheels = [ [[package]] name = "pydantic-core" version = "2.33.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "typing-extensions" }, ] @@ -1564,7 +1590,7 @@ wheels = [ [[package]] name = "pydantic-settings" version = "2.10.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, @@ -1578,7 +1604,7 @@ wheels = [ [[package]] name = "pygments" version = "2.19.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, @@ -1587,7 +1613,7 @@ wheels = [ [[package]] name = "pyparsing" version = "3.2.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/bb/22/f1129e69d94ffff626bdb5c835506b3a5b4f3d070f17ea295e12c2c6f60f/pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be", size = 1088608, upload-time = "2025-03-25T05:01:28.114Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" }, @@ -1596,7 +1622,7 @@ wheels = [ [[package]] name = "pyproj" version = "3.7.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "certifi" }, ] @@ -1615,7 +1641,7 @@ wheels = [ [[package]] name = "pystac" version = "1.13.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "python-dateutil" }, ] @@ -1635,7 +1661,7 @@ validation = [ [[package]] name = "pystac-client" version = "0.9.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pystac", extra = ["validation"] }, { name = "python-dateutil" }, @@ -1649,7 +1675,7 @@ wheels = [ [[package]] name = "pytest" version = "7.4.4" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "iniconfig" }, @@ -1664,7 +1690,7 @@ wheels = [ [[package]] name = "pytest-cov" version = "6.2.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "coverage" }, { name = "pluggy" }, @@ -1678,7 +1704,7 @@ wheels = [ [[package]] name = "pytest-cover" version = "3.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pytest-cov" }, ] @@ -1690,7 +1716,7 @@ wheels = [ [[package]] name = "pytest-coverage" version = "0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pytest-cover" }, ] @@ -1702,7 +1728,7 @@ wheels = [ [[package]] name = "pytest-lazy-fixture" version = "0.6.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "pytest" }, ] @@ -1714,7 +1740,7 @@ wheels = [ [[package]] name = "python-dateutil" version = "2.9.0.post0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "six" }, ] @@ -1726,7 +1752,7 @@ wheels = [ [[package]] name = "python-dotenv" version = "1.1.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f6/b0/4bc07ccd3572a2f9df7e6782f52b0c6c90dcbb803ac4a167702d7d0dfe1e/python_dotenv-1.1.1.tar.gz", hash = "sha256:a8a6399716257f45be6a007360200409fce5cda2661e3dec71d23dc15f6189ab", size = 41978, upload-time = "2025-06-24T04:21:07.341Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" }, @@ -1735,7 +1761,7 @@ wheels = [ [[package]] name = "pytz" version = "2025.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, @@ -1744,7 +1770,7 @@ wheels = [ [[package]] name = "pyyaml" version = "6.0.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" }, @@ -1761,7 +1787,7 @@ wheels = [ [[package]] name = "rasterio" version = "1.4.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "affine" }, { name = "attrs" }, @@ -1783,7 +1809,7 @@ wheels = [ [[package]] name = "referencing" version = "0.36.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "attrs" }, { name = "rpds-py" }, @@ -1796,7 +1822,7 @@ wheels = [ [[package]] name = "requests" version = "2.32.4" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "certifi" }, { name = "charset-normalizer" }, @@ -1811,7 +1837,7 @@ wheels = [ [[package]] name = "retry" version = "0.9.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "decorator" }, { name = "py" }, @@ -1824,7 +1850,7 @@ wheels = [ [[package]] name = "rich" version = "14.1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, @@ -1837,7 +1863,7 @@ wheels = [ [[package]] name = "rio-tiler" version = "7.8.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "attrs" }, { name = "cachetools" }, @@ -1859,7 +1885,7 @@ wheels = [ [[package]] name = "roman-numerals-py" version = "3.1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/30/76/48fd56d17c5bdbdf65609abbc67288728a98ed4c02919428d4f52d23b24b/roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d", size = 9017, upload-time = "2025-02-22T07:34:54.333Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/53/97/d2cbbaa10c9b826af0e10fdf836e1bf344d9f0abb873ebc34d1f49642d3f/roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c", size = 7742, upload-time = "2025-02-22T07:34:52.422Z" }, @@ -1868,7 +1894,7 @@ wheels = [ [[package]] name = "rpds-py" version = "0.27.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/1e/d9/991a0dee12d9fc53ed027e26a26a64b151d77252ac477e22666b9688bc16/rpds_py-0.27.0.tar.gz", hash = "sha256:8b23cf252f180cda89220b378d917180f29d313cd6a07b2431c0d3b776aae86f", size = 27420, upload-time = "2025-08-07T08:26:39.624Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/81/d2/dfdfd42565a923b9e5a29f93501664f5b984a802967d48d49200ad71be36/rpds_py-0.27.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:443d239d02d9ae55b74015234f2cd8eb09e59fbba30bf60baeb3123ad4c6d5ff", size = 362133, upload-time = "2025-08-07T08:24:04.508Z" }, @@ -1934,7 +1960,7 @@ wheels = [ [[package]] name = "rtree" version = "1.4.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/95/09/7302695875a019514de9a5dd17b8320e7a19d6e7bc8f85dcfb79a4ce2da3/rtree-1.4.1.tar.gz", hash = "sha256:c6b1b3550881e57ebe530cc6cffefc87cd9bf49c30b37b894065a9f810875e46", size = 52425, upload-time = "2025-08-13T19:32:01.413Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/04/d9/108cd989a4c0954e60b3cdc86fd2826407702b5375f6dfdab2802e5fed98/rtree-1.4.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d672184298527522d4914d8ae53bf76982b86ca420b0acde9298a7a87d81d4a4", size = 468484, upload-time = "2025-08-13T19:31:50.593Z" }, @@ -1950,7 +1976,7 @@ wheels = [ [[package]] name = "s3fs" version = "2025.7.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "aiobotocore" }, { name = "aiohttp" }, @@ -1964,7 +1990,7 @@ wheels = [ [[package]] name = "s3transfer" version = "0.13.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "botocore" }, ] @@ -1976,7 +2002,7 @@ wheels = [ [[package]] name = "scipy" version = "1.16.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, ] @@ -2023,7 +2049,7 @@ wheels = [ [[package]] name = "shapely" version = "2.1.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, ] @@ -2050,7 +2076,7 @@ wheels = [ [[package]] name = "six" version = "1.17.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, @@ -2059,7 +2085,7 @@ wheels = [ [[package]] name = "sniffio" version = "1.3.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, @@ -2068,7 +2094,7 @@ wheels = [ [[package]] name = "snowballstemmer" version = "3.0.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/75/a7/9810d872919697c9d01295633f5d574fb416d47e535f258272ca1f01f447/snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895", size = 105575, upload-time = "2025-05-09T16:34:51.843Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274, upload-time = "2025-05-09T16:34:50.371Z" }, @@ -2077,7 +2103,7 @@ wheels = [ [[package]] name = "sortedcontainers" version = "2.4.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, @@ -2086,7 +2112,7 @@ wheels = [ [[package]] name = "sphinx" version = "8.2.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "alabaster" }, { name = "babel" }, @@ -2114,7 +2140,7 @@ wheels = [ [[package]] name = "sphinx-rtd-theme" version = "3.0.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "docutils" }, { name = "sphinx" }, @@ -2128,7 +2154,7 @@ wheels = [ [[package]] name = "sphinxcontrib-applehelp" version = "2.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053, upload-time = "2024-07-29T01:09:00.465Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300, upload-time = "2024-07-29T01:08:58.99Z" }, @@ -2137,7 +2163,7 @@ wheels = [ [[package]] name = "sphinxcontrib-devhelp" version = "2.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967, upload-time = "2024-07-29T01:09:23.417Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530, upload-time = "2024-07-29T01:09:21.945Z" }, @@ -2146,7 +2172,7 @@ wheels = [ [[package]] name = "sphinxcontrib-htmlhelp" version = "2.1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617, upload-time = "2024-07-29T01:09:37.889Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705, upload-time = "2024-07-29T01:09:36.407Z" }, @@ -2155,7 +2181,7 @@ wheels = [ [[package]] name = "sphinxcontrib-jquery" version = "4.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "sphinx" }, ] @@ -2167,7 +2193,7 @@ wheels = [ [[package]] name = "sphinxcontrib-jsmath" version = "1.0.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787, upload-time = "2019-01-21T16:10:16.347Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" }, @@ -2176,7 +2202,7 @@ wheels = [ [[package]] name = "sphinxcontrib-qthelp" version = "2.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165, upload-time = "2024-07-29T01:09:56.435Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743, upload-time = "2024-07-29T01:09:54.885Z" }, @@ -2185,7 +2211,7 @@ wheels = [ [[package]] name = "sphinxcontrib-serializinghtml" version = "2.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080, upload-time = "2024-07-29T01:10:09.332Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" }, @@ -2194,7 +2220,7 @@ wheels = [ [[package]] name = "starlette" version = "0.47.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "anyio" }, ] @@ -2206,7 +2232,7 @@ wheels = [ [[package]] name = "tblib" version = "3.1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/54/95/4b3044ec4bf248186769629bbfb495a458deb6e4c1f9eff7f298ae1e336e/tblib-3.1.0.tar.gz", hash = "sha256:06404c2c9f07f66fee2d7d6ad43accc46f9c3361714d9b8426e7f47e595cd652", size = 30766, upload-time = "2025-03-31T12:58:27.473Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/27/44/aa5c8b10b2cce7a053018e0d132bd58e27527a0243c4985383d5b6fd93e9/tblib-3.1.0-py3-none-any.whl", hash = "sha256:670bb4582578134b3d81a84afa1b016128b429f3d48e6cbbaecc9d15675e984e", size = 12552, upload-time = "2025-03-31T12:58:26.142Z" }, @@ -2215,7 +2241,7 @@ wheels = [ [[package]] name = "textual" version = "5.3.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "markdown-it-py", extra = ["linkify", "plugins"] }, { name = "platformdirs" }, @@ -2231,7 +2257,7 @@ wheels = [ [[package]] name = "tilebench" version = "0.16.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "fastapi" }, { name = "jinja2" }, @@ -2248,7 +2274,7 @@ wheels = [ [[package]] name = "tilematrix" version = "2024.11.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "affine" }, { name = "click" }, @@ -2264,7 +2290,7 @@ wheels = [ [[package]] name = "toolz" version = "1.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/8a/0b/d80dfa675bf592f636d1ea0b835eab4ec8df6e9415d8cfd766df54456123/toolz-1.0.0.tar.gz", hash = "sha256:2c86e3d9a04798ac556793bced838816296a2f085017664e4995cb40a1047a02", size = 66790, upload-time = "2024-10-04T16:17:04.001Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/03/98/eb27cc78ad3af8e302c9d8ff4977f5026676e130d28dd7578132a457170c/toolz-1.0.0-py3-none-any.whl", hash = "sha256:292c8f1c4e7516bf9086f8850935c799a874039c8bcf959d47b600e4c44a6236", size = 56383, upload-time = "2024-10-04T16:17:01.533Z" }, @@ -2273,7 +2299,7 @@ wheels = [ [[package]] name = "tornado" version = "6.5.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/09/ce/1eb500eae19f4648281bb2186927bb062d2438c2e5093d1360391afd2f90/tornado-6.5.2.tar.gz", hash = "sha256:ab53c8f9a0fa351e2c0741284e06c7a45da86afb544133201c5cc8578eb076a0", size = 510821, upload-time = "2025-08-08T18:27:00.78Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/48/6a7529df2c9cc12efd2e8f5dd219516184d703b34c06786809670df5b3bd/tornado-6.5.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2436822940d37cde62771cff8774f4f00b3c8024fe482e16ca8387b8a2724db6", size = 442563, upload-time = "2025-08-08T18:26:42.945Z" }, @@ -2292,7 +2318,7 @@ wheels = [ [[package]] name = "tqdm" version = "4.67.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] @@ -2304,7 +2330,7 @@ wheels = [ [[package]] name = "typing-extensions" version = "4.14.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/98/5a/da40306b885cc8c09109dc2e1abd358d5684b1425678151cdaed4731c822/typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36", size = 107673, upload-time = "2025-07-04T13:28:34.16Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" }, @@ -2313,7 +2339,7 @@ wheels = [ [[package]] name = "typing-inspection" version = "0.4.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "typing-extensions" }, ] @@ -2325,7 +2351,7 @@ wheels = [ [[package]] name = "tzdata" version = "2025.2" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, @@ -2334,7 +2360,7 @@ wheels = [ [[package]] name = "uc-micro-py" version = "1.0.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/91/7a/146a99696aee0609e3712f2b44c6274566bc368dfe8375191278045186b8/uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a", size = 6043, upload-time = "2024-02-09T16:52:01.654Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/37/87/1f677586e8ac487e29672e4b17455758fce261de06a0d086167bb760361a/uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5", size = 6229, upload-time = "2024-02-09T16:52:00.371Z" }, @@ -2343,7 +2369,7 @@ wheels = [ [[package]] name = "urllib3" version = "2.5.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, @@ -2352,7 +2378,7 @@ wheels = [ [[package]] name = "uvicorn" version = "0.35.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "click" }, { name = "h11" }, @@ -2376,7 +2402,7 @@ standard = [ [[package]] name = "uvloop" version = "0.21.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/af/c0/854216d09d33c543f12a44b393c402e89a920b1a0a7dc634c42de91b9cf6/uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3", size = 2492741, upload-time = "2024-10-14T23:38:35.489Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/3f/8d/2cbef610ca21539f0f36e2b34da49302029e7c9f09acef0b1c3b5839412b/uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281", size = 1468123, upload-time = "2024-10-14T23:38:00.688Z" }, @@ -2390,7 +2416,7 @@ wheels = [ [[package]] name = "watchfiles" version = "1.1.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "anyio" }, ] @@ -2444,7 +2470,7 @@ wheels = [ [[package]] name = "websockets" version = "15.0.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" }, @@ -2464,7 +2490,7 @@ wheels = [ [[package]] name = "werkzeug" version = "3.1.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "markupsafe" }, ] @@ -2476,7 +2502,7 @@ wheels = [ [[package]] name = "win32-setctime" version = "1.2.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b663af0e9bb3d4de6578d08f46b1b101c2442fd9aecaa2/win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0", size = 4867, upload-time = "2024-12-07T15:28:28.314Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, @@ -2485,7 +2511,7 @@ wheels = [ [[package]] name = "wrapt" version = "1.17.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload-time = "2025-08-12T05:51:48.627Z" }, @@ -2524,7 +2550,7 @@ wheels = [ [[package]] name = "xarray" version = "2025.7.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "numpy" }, { name = "packaging" }, @@ -2538,7 +2564,7 @@ wheels = [ [[package]] name = "yarl" version = "1.20.1" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } dependencies = [ { name = "idna" }, { name = "multidict" }, @@ -2586,7 +2612,7 @@ wheels = [ [[package]] name = "zict" version = "3.0.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/d1/ac/3c494dd7ec5122cff8252c1a209b282c0867af029f805ae9befd73ae37eb/zict-3.0.0.tar.gz", hash = "sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5", size = 33238, upload-time = "2023-04-17T21:41:16.041Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/80/ab/11a76c1e2126084fde2639514f24e6111b789b0bfa4fc6264a8975c7e1f1/zict-3.0.0-py2.py3-none-any.whl", hash = "sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae", size = 43332, upload-time = "2023-04-17T21:41:13.444Z" }, @@ -2595,7 +2621,7 @@ wheels = [ [[package]] name = "zipp" version = "3.23.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://gitlab.eox.at/api/v4/projects/255/packages/pypi/simple" } sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, From f0d8b5ac79f2ffb54f53008e13475012d1f818a6 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 3 Nov 2025 17:30:55 +0100 Subject: [PATCH 19/46] streamline code --- mapchete_eo/io/assets.py | 14 ++++++------- tests/platforms/sentinel2/test_product.py | 24 +++++++++++------------ 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/mapchete_eo/io/assets.py b/mapchete_eo/io/assets.py index de3ceee9..69d7a59c 100644 --- a/mapchete_eo/io/assets.py +++ b/mapchete_eo/io/assets.py @@ -82,7 +82,7 @@ def asset_to_np_array( path = asset_mpath(item, asset) # find out asset details if raster:bands is available - stac_raster_bands = STACRasterBandProperties.from_asset( + band_properties = STACRasterBandProperties.from_asset( item.assets[asset], nodataval=nodataval ) @@ -92,23 +92,23 @@ def asset_to_np_array( indexes=indexes, grid=grid, resampling=resampling.name, - dst_nodata=stac_raster_bands.nodata, + dst_nodata=band_properties.nodata, ).data - if apply_offset and stac_raster_bands.offset: - data_type = stac_raster_bands.data_type or data.dtype + if apply_offset and band_properties.offset: + data_type = band_properties.data_type or data.dtype # determine value range for the target data_type clip_min, clip_max = dtype_ranges[str(data_type)] # increase minimum clip value to avoid collission with nodata value - if clip_min == stac_raster_bands.nodata: + if clip_min == band_properties.nodata: clip_min += 1 data[:] = ( ( - ((data * stac_raster_bands.scale) + stac_raster_bands.offset) - / stac_raster_bands.scale + ((data * band_properties.scale) + band_properties.offset) + / band_properties.scale ) .round() .clip(clip_min, clip_max) diff --git a/tests/platforms/sentinel2/test_product.py b/tests/platforms/sentinel2/test_product.py index 7eb8f541..92213170 100644 --- a/tests/platforms/sentinel2/test_product.py +++ b/tests/platforms/sentinel2/test_product.py @@ -378,7 +378,7 @@ def test_read_brdf_scl_classes(s2_stac_item_half_footprint): scl = product.read_scl(grid=tile).data available_scl_classes = [SceneClassification(i) for i in np.unique(scl)] # for each available class, activate/deactivate BRDF correction and compare with rest of image - uncorrected = product.read_np_array(assets=assets, grid=tile) + uncorrected = product.read_np_array(assets=assets, grid=tile)[0] for scl_class in available_scl_classes: corrected = product.read_np_array( assets=assets, @@ -393,19 +393,17 @@ def test_read_brdf_scl_classes(s2_stac_item_half_footprint): ) ], ), - ) + )[0] scl_class_mask = np.where(scl == scl_class.value, True, False) - for corrected_band, uncorrected_band in zip(corrected, uncorrected): - # there should be some pixels not affected by correction - assert np.where(corrected_band == uncorrected_band, True, False).any() - # make sure pixel were not corrected for SCL class - assert ( - uncorrected_band[scl_class_mask] == corrected_band[scl_class_mask] - ).all() - # make sure all other pixels were corrected - assert ( - uncorrected_band[~scl_class_mask] != corrected_band[~scl_class_mask] - ).all() + + # there should be some pixels not affected by correction + assert np.where(corrected == uncorrected, True, False).any() + + # make sure pixel were not corrected for SCL class + assert (uncorrected[scl_class_mask] == corrected[scl_class_mask]).all() + + # make sure all other pixels were corrected + assert (uncorrected[~scl_class_mask] != corrected[~scl_class_mask]).all() def test_read_brdf_scl_classes_inversed(s2_stac_item_half_footprint): From 138dd47f225c180169793974d6e6d520bb267bf3 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 08:18:34 +0100 Subject: [PATCH 20/46] make asset_mpath() and get_item_property() handle multiple keys; add more default field values to S2Metadata.from_stac_item() --- mapchete_eo/io/items.py | 57 ++++++++++++------- mapchete_eo/io/path.py | 27 ++++++--- .../sentinel2/metadata_parser/s2metadata.py | 56 +++++++----------- tests/platforms/sentinel2/test_product.py | 5 +- 4 files changed, 76 insertions(+), 69 deletions(-) diff --git a/mapchete_eo/io/items.py b/mapchete_eo/io/items.py index aa71ae96..1ebc20e1 100644 --- a/mapchete_eo/io/items.py +++ b/mapchete_eo/io/items.py @@ -1,5 +1,5 @@ import logging -from typing import Any, List, Optional +from typing import Any, List, Optional, Tuple, Union import numpy.ma as ma import pystac @@ -69,7 +69,8 @@ def expand_params(param: Any, length: int) -> List[Any]: def get_item_property( item: pystac.Item, - property: str, + property: Union[str, Tuple[str, ...]], + default: Any = None, ) -> Any: """ Return item property. @@ -104,28 +105,40 @@ def get_item_property( | ``collection`` | The collection ID of an Item's collection. | +--------------------+--------------------------------------------------------+ """ - if property == "id": - return item.id - elif property in ["year", "month", "day", "date", "datetime"]: - if item.datetime is None: # pragma: no cover - raise ValueError( - f"STAC item has no datetime attached, thus cannot get property {property}" - ) - elif property == "date": - return item.datetime.date().isoformat() - elif property == "datetime": - return item.datetime + + def _get_item_property(item: pystac.Item, property: str) -> Any: + if property == "id": + return item.id + elif property in ["year", "month", "day", "date", "datetime"]: + if item.datetime is None: # pragma: no cover + raise ValueError( + f"STAC item has no datetime attached, thus cannot get property {property}" + ) + elif property == "date": + return item.datetime.date().isoformat() + elif property == "datetime": + return item.datetime + else: + return item.datetime.__getattribute__(property) + elif property == "collection": + return item.collection_id + elif property in item.properties: + return item.properties[property] + elif property in item.extra_fields: + return item.extra_fields[property] + elif property == "stac_extensions": + return item.stac_extensions else: - return item.datetime.__getattribute__(property) - elif property == "collection": - return item.collection_id - elif property in item.properties: - return item.properties[property] - elif property in item.extra_fields: - return item.extra_fields[property] - elif property == "stac_extensions": - return item.stac_extensions + raise KeyError + + for prop in property if isinstance(property, tuple) else (property,): + try: + return _get_item_property(item, prop) + except KeyError: + pass else: + if default is not None: + return default raise KeyError( f"item {item.id} does not have property {property} in its datetime, properties " f"({', '.join(item.properties.keys())}) or extra_fields " diff --git a/mapchete_eo/io/path.py b/mapchete_eo/io/path.py index 1d400fba..a404310f 100644 --- a/mapchete_eo/io/path.py +++ b/mapchete_eo/io/path.py @@ -3,7 +3,7 @@ from contextlib import contextmanager from enum import Enum from tempfile import TemporaryDirectory -from typing import Generator +from typing import Generator, Tuple, Union from xml.etree.ElementTree import Element, fromstring import fsspec @@ -126,19 +126,30 @@ def cached_path(path: MPath, active: bool = True) -> Generator[MPath, None, None def asset_mpath( item: pystac.Item, - asset: str, + asset: Union[str, Tuple[str, ...]], fs: fsspec.AbstractFileSystem = None, absolute_path: bool = True, ) -> MPath: """Return MPath instance with asset href.""" - try: + def _asset_mpath( + item: pystac.Item, + asset: str, + fs: fsspec.AbstractFileSystem = None, + absolute_path: bool = True, + ) -> MPath: asset_path = MPath(item.assets[asset].href, fs=fs) - except KeyError: + if absolute_path and not asset_path.is_absolute(): + return MPath(item.get_self_href(), fs=fs).parent / asset_path + else: + return asset_path + + for single_asset in asset if isinstance(asset, tuple) else (asset,): + try: + return _asset_mpath(item, single_asset, fs=fs, absolute_path=absolute_path) + except KeyError: + pass + else: raise AssetKeyError( f"{item.id} no asset named '{asset}' found in assets: {', '.join(item.assets.keys())}" ) - if absolute_path and not asset_path.is_absolute(): - return MPath(item.get_self_href(), fs=fs).parent / asset_path - else: - return asset_path diff --git a/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py index 4b4d4d20..7e6719cf 100644 --- a/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py +++ b/mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py @@ -7,7 +7,7 @@ import logging from functools import cached_property -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Tuple, Union from xml.etree.ElementTree import Element, ParseError import numpy as np @@ -31,6 +31,8 @@ from mapchete_eo.exceptions import AssetEmpty, AssetMissing, CorruptedProductMetadata from mapchete_eo.io import open_xml, read_mask_as_raster +from mapchete_eo.io.items import get_item_property +from mapchete_eo.io.path import asset_mpath from mapchete_eo.platforms.sentinel2.metadata_parser.models import ( ViewingIncidenceAngles, SunAngleData, @@ -155,53 +157,33 @@ def from_metadata_xml( @staticmethod def from_stac_item( item: pystac.Item, - metadata_xml_asset_name: List[str] = ["metadata", "granule_metadata"], - boa_offset_field: Optional[str] = None, - processing_baseline_field: Union[str, List[str]] = [ + metadata_xml_asset_name: Tuple[str, ...] = ("metadata", "granule_metadata"), + boa_offset_field: Union[str, Tuple[str, ...]] = ( + "earthsearch:boa_offset_applied" + ), + processing_baseline_field: Union[str, Tuple[str, ...]] = ( "s2:processing_baseline", "sentinel2:processing_baseline", "processing:version", - ], + ), **kwargs, ) -> S2Metadata: # try to find path to metadata.xml - for metadata_asset in metadata_xml_asset_name: - if metadata_asset in item.assets: - metadata_path = MPath(item.assets[metadata_asset].href) - break - else: # pragma: no cover - raise KeyError( - f"could not find path to metadata XML file in assets: {', '.join(item.assets.keys())}" - ) - - # maek path absolute - if metadata_path.is_remote() or metadata_path.is_absolute(): - metadata_xml = metadata_path - else: - metadata_xml = MPath(item.self_href).parent / metadata_path + metadata_xml_path = asset_mpath(item, metadata_xml_asset_name) + # make path absolute + if not (metadata_xml_path.is_remote() or metadata_xml_path.is_absolute()): + metadata_xml_path = MPath(item.self_href).parent / metadata_xml_path # try to find information on processing baseline version - for field in ( - processing_baseline_field - if isinstance(processing_baseline_field, list) - else [processing_baseline_field] - ): - try: - processing_baseline = item.properties[field] - break - except KeyError: - pass - else: # pragma: no cover - raise KeyError( - f"could not find processing baseline version in item properties: {item.properties}" - ) + processing_baseline = get_item_property(item, processing_baseline_field) + + # see if boa_offset_applied flag is available + boa_offset_applied = get_item_property(item, boa_offset_field, default=False) return S2Metadata.from_metadata_xml( - metadata_xml=metadata_xml, + metadata_xml=metadata_xml_path, processing_baseline=processing_baseline, - boa_offset_applied=item.properties[boa_offset_field] - if boa_offset_field - else False, + boa_offset_applied=boa_offset_applied, **kwargs, ) diff --git a/tests/platforms/sentinel2/test_product.py b/tests/platforms/sentinel2/test_product.py index 92213170..cc804b9b 100644 --- a/tests/platforms/sentinel2/test_product.py +++ b/tests/platforms/sentinel2/test_product.py @@ -7,6 +7,7 @@ from mapchete.path import MPath from mapchete.tile import BufferedTilePyramid + try: from mapchete import Bounds except ImportError: @@ -696,8 +697,8 @@ def test_read_levelled_cube_broken_slice(stac_item_missing_detector_footprints): ) def test_read_apply_offset(asset, s2_stac_item, s2_stac_item_jp2): assets = [asset] - cog_product = S2Product(s2_stac_item) - jp2_product = S2Product(s2_stac_item_jp2) + cog_product = S2Product.from_stac_item(s2_stac_item) + jp2_product = S2Product.from_stac_item(s2_stac_item_jp2) tile = _get_product_tile(cog_product) # (1) read array from COG archive where offset was already applied by the provider From 1033c13148bf740e5738ecb006223c7825dda9d1 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 09:15:35 +0100 Subject: [PATCH 21/46] fix query param --- mapchete_eo/platforms/sentinel2/config.py | 21 +++++++++------------ mapchete_eo/search/stac_search.py | 6 +++--- tests/test_catalog.py | 4 +--- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index 21efbe46..9f9ca635 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -119,9 +119,7 @@ class Sentinel2DriverConfig(BaseDriverConfig): # for backwards compatibility, archive should be converted to # catalog & data_archive # archive: ArchiveClsFromString = AWSL2ACOGv1 - - # don't know yet how to handle this - cat_baseurl: Optional[MPathLike] = None + # cat_baseurl: Optional[MPathLike] = None search_index: Optional[MPathLike] = None # custom params @@ -134,7 +132,7 @@ class Sentinel2DriverConfig(BaseDriverConfig): @model_validator(mode="before") def deprecated_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: - archive = values.get("archive") + archive = values.pop("archive", None) if archive: warnings.warn( "'archive' will be deprecated soon. Please use 'source'.", @@ -142,10 +140,7 @@ def deprecated_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: stacklevel=2, ) if values.get("source") is None: - try: - values["source"] = DEPRECATED_ARCHIVES[archive] - except KeyError: - raise + values["source"] = DEPRECATED_ARCHIVES[archive] cat_baseurl = values.pop("cat_baseurl", None) if cat_baseurl: @@ -160,6 +155,11 @@ def deprecated_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: ) values["source"] = [dict(collection=cat_baseurl, catalog_type="static")] + # add default source if necessary + sources = values.get("source", []) + if not sources: + values["source"] = [default_source.model_dump(exclude_none=True)] + max_cloud_cover = values.pop("max_cloud_cover", None) if max_cloud_cover: warnings.warn( @@ -167,11 +167,8 @@ def deprecated_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: category=DeprecationWarning, stacklevel=2, ) - sources = values.get("source", []) - if not sources: - raise ValueError("no sources defined") updated_sources = [] - for source in sources: + for source in values.get("source", []): if source.get("query") is not None: raise ValueError( f"deprecated max_cloud_cover is set but also a query field is given in {source}" diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 61122134..f9e83d6f 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -5,6 +5,7 @@ from functools import cached_property from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Set, Union +from cql2 import Expr from mapchete import Timer from mapchete.path import MPathLike from mapchete.tile import BufferedTilePyramid @@ -128,13 +129,12 @@ def _searches(): yield search for search in _searches(): - for count, item in enumerate(search.items(), 1): + for item in search.items(): item_path = item.get_self_href() if item_path in self.blacklist: # pragma: no cover logger.debug("item %s found in blacklist and skipping", item_path) else: yield item - logger.debug("returned %s items in total", count) def _eo_bands(self) -> List[str]: for collection_name in self.collections: @@ -192,7 +192,7 @@ def _search( search_params = dict( self.default_search_params, datetime=f"{start}/{end}", - query=query, + query=Expr(query).to_json() if query else None, **kwargs, ) if ( diff --git a/tests/test_catalog.py b/tests/test_catalog.py index ca1b3cfc..59e99bde 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -139,8 +139,6 @@ def test_write_static_catalog_metadata_assets(static_catalog_small, tmp_path): def test_static_catalog_cloud_percent(s2_stac_collection): all_products = list(STACStaticCatalog(s2_stac_collection).search()) filtered_products = list( - STACStaticCatalog(s2_stac_collection).search( - search_kwargs=dict(max_cloud_cover=20) - ) + STACStaticCatalog(s2_stac_collection).search(query="eo:cloud_cover<=20") ) assert len(all_products) > len(filtered_products) From 55a019161ef9b0fcd611fd5d9c9b21a026d93dfb Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 10:44:56 +0100 Subject: [PATCH 22/46] add metadata xml mapper for CDSE --- .../preconfigured_sources/item_mappers.py | 2 + .../metadata_xml_mappers.py | 60 +++++++++++++++++++ tests/platforms/sentinel2/test_product.py | 7 ++- tests/platforms/sentinel2/test_sources.py | 2 +- 4 files changed, 68 insertions(+), 3 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index 3b3eeb20..cd24e22a 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -7,6 +7,7 @@ creates_s2metadata, ) from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_mappers import ( + CDSEPathMapper, EarthSearchPathMapper, EarthSearchC1PathMapper, ) @@ -154,6 +155,7 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: def cdse_s2metadata(item: Item) -> S2Metadata: return S2Metadata.from_stac_item( item, + path_mapper=CDSEPathMapper(MPath(item.assets["granule_metadata"].href)), processing_baseline_field="processing:version", ) diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py index d96a89e6..928ea6dd 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py @@ -155,3 +155,63 @@ def __init__( self._baseurl = alternative_metadata_baseurl self._protocol = protocol self.processing_baseline = ProcessingBaseline.from_version(baseline_version) + + +class CDSEPathMapper(S2MetadataPathMapper): + _MASK_FILENAMES = { + ProductQI.classification: "MSK_CLASSI_B00.jp2", + ProductQI.cloud_probability: "MSK_CLDPRB_{resolution}.jp2", + ProductQI.snow_probability: "MSK_SNWPRB_{resolution}.jp2", + BandQI.detector_footprints: "MSK_DETFOO_{band_identifier}.jp2", + BandQI.technical_quality: "MSK_QUALIT_{band_identifier}.jp2", + } + + def __init__( + self, + url: MPathLike, + baseline_version: str = "04.00", + **kwargs, + ): + url = MPath.from_inp(url) + self._path = url.parent + self.processing_baseline = ProcessingBaseline.from_version(baseline_version) + + def product_qi_mask( + self, + qi_mask: ProductQI, + resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"], + ) -> MPath: + """Determine product QI mask according to Sinergise bucket schema.""" + mask_path = self._MASK_FILENAMES[qi_mask] + key = f"QI_DATA/{mask_path.format(resolution=resolution.name)}" + return self._path / key + + def classification_mask(self) -> MPath: + return self.product_qi_mask(ProductQI.classification) + + def cloud_probability_mask( + self, resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"] + ) -> MPath: + return self.product_qi_mask(ProductQI.cloud_probability, resolution=resolution) + + def snow_probability_mask( + self, resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"] + ) -> MPath: + return self.product_qi_mask(ProductQI.snow_probability, resolution=resolution) + + def band_qi_mask(self, qi_mask: BandQI, band: L2ABand) -> MPath: + """Determine product QI mask according to Sinergise bucket schema.""" + try: + mask_path = self._MASK_FILENAMES[qi_mask] + except KeyError: + raise DeprecationWarning( + f"'{qi_mask.name}' quality mask not found in this product" + ) + key = f"QI_DATA/{mask_path.format(band_identifier=band.name)}" + return self._path / key + + def technical_quality_mask(self, band: L2ABand) -> MPath: + return self.band_qi_mask(BandQI.technical_quality, band) + + def detector_footprints(self, band: L2ABand) -> MPath: + return self.band_qi_mask(BandQI.detector_footprints, band) diff --git a/tests/platforms/sentinel2/test_product.py b/tests/platforms/sentinel2/test_product.py index cc804b9b..5f426d91 100644 --- a/tests/platforms/sentinel2/test_product.py +++ b/tests/platforms/sentinel2/test_product.py @@ -719,13 +719,16 @@ def test_read_apply_offset(asset, s2_stac_item, s2_stac_item_jp2): assert (jp2_unapplied - 1000 == cog).all() +@pytest.mark.skip( + reason="CDSE metadata file does not exist anymore: s3://eodata/Sentinel-2/MSI/L2A/2023/08/10/S2B_MSIL2A_20230810T094549_N0509_R079_T33TWM_20230810T130104.SAFE/GRANULE/L2A_T33TWM_A033567_20230810T095651/MTD_TL.xml" +) @pytest.mark.remote @pytest.mark.use_cdse_test_env def test_read_apply_offset_cdse(s2_stac_item, s2_stac_item_cdse_jp2): cog_assets = ["coastal"] jp2_cdse_assets = ["B01_60m"] - cog_product = S2Product(s2_stac_item) - jp2_product = S2Product(s2_stac_item_cdse_jp2) + cog_product = S2Product.from_stac_item(s2_stac_item) + jp2_product = S2Product.from_stac_item(s2_stac_item_cdse_jp2) tile = _get_product_tile(cog_product) # (1) read array from COG archive where offset was already applied by the provider diff --git a/tests/platforms/sentinel2/test_sources.py b/tests/platforms/sentinel2/test_sources.py index c11be8bc..ab0e3c16 100644 --- a/tests/platforms/sentinel2/test_sources.py +++ b/tests/platforms/sentinel2/test_sources.py @@ -33,6 +33,6 @@ def test_known_sources(collection): @pytest.mark.remote @pytest.mark.use_cdse_test_env -@pytest.mark.parametrize("collection", ["CSDE"]) +@pytest.mark.parametrize("collection", ["CDSE"]) def test_known_sources_cdse(collection): test_known_sources(collection) From 64ecf9cc0f038ff22aef12eb3b82e40fadbe0f0c Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 12:07:24 +0100 Subject: [PATCH 23/46] fix test --- mapchete_eo/io/products.py | 11 ++++++----- mapchete_eo/search/stac_search.py | 17 +++++++++-------- tests/platforms/sentinel2/test_base.py | 2 +- tests/testdata/sentinel2_cdse.mapchete | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/mapchete_eo/io/products.py b/mapchete_eo/io/products.py index 524b01e7..59acf6c2 100644 --- a/mapchete_eo/io/products.py +++ b/mapchete_eo/io/products.py @@ -268,7 +268,9 @@ def read_remaining_valid_products( try: yield product.read_np_array(**product_read_kwargs) except (AssetKeyError, CorruptedProduct) as exc: - logger.debug("skip product %s because of %s", product.item.id, exc) + logger.warning( + "skip product %s because of %s", product.item.id, exc + ) except StopIteration: return @@ -286,7 +288,7 @@ def read_remaining_valid_products( out = product.read_np_array(**product_read_kwargs) break except (AssetKeyError, CorruptedProduct) as exc: - logger.debug("skip product %s because of %s", product.item.id, exc) + logger.warning("skip product %s because of %s", product.item.id, exc) else: # we cannot do anything here, as all products are broken raise CorruptedSlice("all products are broken here") @@ -378,7 +380,6 @@ def generate_slice_dataarrays( slices = products_to_slices( products, group_by_property=merge_products_by, sort=sort ) - logger.debug( "reading %s products in %s groups...", len(products), @@ -418,8 +419,8 @@ def generate_slice_dataarrays( ) # if at least one slice can be yielded, the stack is not empty stack_empty = False - except (EmptySliceException, CorruptedSlice): - pass + except (EmptySliceException, CorruptedSlice) as exception: + logger.warning(exception) if stack_empty: raise EmptyStackException("all slices are empty") diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index f9e83d6f..68a7396b 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -11,7 +11,7 @@ from mapchete.tile import BufferedTilePyramid from mapchete.types import Bounds, BoundsLike from pystac import Item -from pystac_client import Client +from pystac_client import Client, ItemSearch from shapely.geometry import shape from shapely.geometry.base import BaseGeometry @@ -86,7 +86,7 @@ def search( if area is not None and area.is_empty: # pragma: no cover return - def _searches(): + def _searches() -> Generator[ItemSearch, None, None]: for time_range in time if isinstance(time, list) else [time]: search = self._search( time_range=time_range, @@ -130,11 +130,12 @@ def _searches(): for search in _searches(): for item in search.items(): - item_path = item.get_self_href() - if item_path in self.blacklist: # pragma: no cover - logger.debug("item %s found in blacklist and skipping", item_path) - else: - yield item + if item.get_self_href() in self.blacklist: # pragma: no cover + logger.debug( + "item %s found in blacklist and skipping", item.get_self_href() + ) + continue + yield item def _eo_bands(self) -> List[str]: for collection_name in self.collections: @@ -166,7 +167,7 @@ def _search( query: Optional[str] = None, config: StacSearchConfig = StacSearchConfig(), **kwargs, - ): + ) -> ItemSearch: if time_range is None: # pragma: no cover raise ValueError("time_range not provided") diff --git a/tests/platforms/sentinel2/test_base.py b/tests/platforms/sentinel2/test_base.py index ef5d99e6..1524e2a9 100644 --- a/tests/platforms/sentinel2/test_base.py +++ b/tests/platforms/sentinel2/test_base.py @@ -92,7 +92,7 @@ def test_remote_s2_read_xarray(mapchete_config): ) def test_remote_s2_read_xarray_cdse(mapchete_config): with mapchete_config.process_mp().open("inp") as cube: - assert isinstance(cube.read(assets=["B01_20m"]), xr.Dataset) + assert isinstance(cube.read(assets=["coastal"]), xr.Dataset) @pytest.mark.remote diff --git a/tests/testdata/sentinel2_cdse.mapchete b/tests/testdata/sentinel2_cdse.mapchete index 4ea6d975..7518cc6d 100644 --- a/tests/testdata/sentinel2_cdse.mapchete +++ b/tests/testdata/sentinel2_cdse.mapchete @@ -2,7 +2,7 @@ process: read_xarray.py input: inp: format: Sentinel-2 - archive: S2CDSE_JP2 + source: CDSE level: L2A time: start: 2024-04-01 From 02f8cc152f5dd8c52da72d38ea28f92a8b4d5f1c Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 12:09:17 +0100 Subject: [PATCH 24/46] remove duplicate cdse test --- tests/platforms/sentinel2/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/platforms/sentinel2/test_base.py b/tests/platforms/sentinel2/test_base.py index 1524e2a9..95c736db 100644 --- a/tests/platforms/sentinel2/test_base.py +++ b/tests/platforms/sentinel2/test_base.py @@ -77,7 +77,7 @@ def test_s2_jp2_band_paths(stac_item_sentinel2_jp2): @pytest.mark.remote @pytest.mark.parametrize( "mapchete_config", - [lazy_fixture("sentinel2_mapchete"), lazy_fixture("sentinel2_aws_cdse_mapchete")], + [lazy_fixture("sentinel2_mapchete")], ) def test_remote_s2_read_xarray(mapchete_config): with mapchete_config.process_mp().open("inp") as cube: From f9a52f395387b1b1d391e3de54b1d50a2759802a Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 14:13:29 +0100 Subject: [PATCH 25/46] removed deprecated mapchete_eo.geometry module --- mapchete_eo/cli/s2_jp2_static_catalog.py | 4 +- mapchete_eo/geometry.py | 271 --------------------- mapchete_eo/io/items.py | 2 +- mapchete_eo/platforms/sentinel2/product.py | 3 +- mapchete_eo/platforms/sentinel2/source.py | 3 - mapchete_eo/search/s2_mgrs.py | 13 +- tests/test_geometry.py | 48 ---- 7 files changed, 10 insertions(+), 334 deletions(-) delete mode 100644 mapchete_eo/geometry.py delete mode 100644 tests/test_geometry.py diff --git a/mapchete_eo/cli/s2_jp2_static_catalog.py b/mapchete_eo/cli/s2_jp2_static_catalog.py index 630b6baf..29c23bc1 100644 --- a/mapchete_eo/cli/s2_jp2_static_catalog.py +++ b/mapchete_eo/cli/s2_jp2_static_catalog.py @@ -19,7 +19,7 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.io.items import item_fix_footprint -from mapchete_eo.search.s2_mgrs import InvalidMGRSSquare, S2Tile, bounds_to_geom +from mapchete_eo.search.s2_mgrs import InvalidMGRSSquare, S2Tile from mapchete_eo.time import day_range logger = logging.getLogger(__name__) @@ -106,7 +106,7 @@ def s2_jp2_static_catalog( - each S2Tile file contains for each STAC item one entry with geometry and href """ bounds = bounds or Bounds(-180, -90, 180, 90) - aoi = bounds_to_geom(bounds) + aoi = bounds.latlon_geometry() prepare(aoi) items_per_tile = defaultdict(list) for day in day_range(start_date=start_time, end_date=end_time): diff --git a/mapchete_eo/geometry.py b/mapchete_eo/geometry.py deleted file mode 100644 index da48db1d..00000000 --- a/mapchete_eo/geometry.py +++ /dev/null @@ -1,271 +0,0 @@ -import logging -import math -from functools import partial -from typing import Callable, Iterable, Tuple - -from fiona.crs import CRS -from fiona.transform import transform as fiona_transform -from mapchete.geometry import reproject_geometry -from mapchete.types import Bounds, CRSLike -from shapely.geometry import ( - GeometryCollection, - LinearRing, - LineString, - MultiLineString, - MultiPoint, - MultiPolygon, - Point, - Polygon, - box, - shape, -) -from shapely.geometry.base import BaseGeometry -from shapely.ops import unary_union - -CoordArrays = Tuple[Iterable[float], Iterable[float]] - - -logger = logging.getLogger(__name__) - - -def transform_to_latlon( - geometry: BaseGeometry, src_crs: CRSLike, width_threshold: float = 180.0 -) -> BaseGeometry: - """Transforms a geometry to lat/lon coordinates. - - If resulting geometry crosses the Antimeridian it will be fixed by moving coordinates - from the Western Hemisphere to outside of the lat/lon bounds on the East, making sure - the correct geometry shape is preserved. - - As a next step, repair_antimeridian_geometry() can be applied, which then splits up - this geometry into a multipart geometry where all of its subgeometries are within the - lat/lon bounds again. - """ - latlon_crs = CRS.from_epsg(4326) - - def transform_shift_coords(coords: CoordArrays) -> CoordArrays: - out_x_coords, out_y_coords = fiona_transform(src_crs, latlon_crs, *coords) - if max(out_x_coords) - min(out_x_coords) > width_threshold: - # we probably have an antimeridian crossing here! - out_x_coords, out_y_coords = coords_longitudinal_shift( - coords_transform(coords, src_crs, latlon_crs), only_negative_coords=True - ) - return (out_x_coords, out_y_coords) - - return custom_transform(geometry, transform_shift_coords) - - -def repair_antimeridian_geometry( - geometry: BaseGeometry, width_threshold: float = 180.0 -) -> BaseGeometry: - """ - Repair geometry and apply fix if it crosses the Antimeridian. - - A geometry crosses the Antimeridian if it is at least partly outside of the - lat/lon bounding box or if its width exceeds a certain threshold. This can happen - after reprojection if the geometry coordinates are transformed separately and land - left and right of the Antimeridian, thus resulting in a polygon spanning almost the - whole lat/lon bounding box width. - """ - # repair geometry if it is broken - geometry = geometry.buffer(0) - latlon_bbox = box(-180, -90, 180, 90) - - # only attempt to fix if geometry is too wide or reaches over the lat/lon bounds - if ( - Bounds.from_inp(geometry).width >= width_threshold - or not geometry.difference(latlon_bbox).is_empty - ): - # (1) shift only coordinates on the western hemisphere by 360°, thus "fixing" - # the footprint, but letting it cross the antimeridian - shifted_geometry = longitudinal_shift(geometry, only_negative_coords=True) - - # (2) split up geometry in one outside of latlon bounds and one inside - inside = shifted_geometry.intersection(latlon_bbox) - outside = shifted_geometry.difference(latlon_bbox) - - # (3) shift back only the polygon outside of latlon bounds by -360, thus moving - # it back to the western hemisphere - outside_shifted = longitudinal_shift( - outside, offset=-360, only_negative_coords=False - ) - - # (4) create a MultiPolygon out from these two polygons - geometry = unary_union([inside, outside_shifted]) - - return geometry - - -def buffer_antimeridian_safe( - footprint: BaseGeometry, buffer_m: float = 0 -) -> BaseGeometry: - """Buffer geometry by meters and make it Antimeridian-safe. - - Safe means that if it crosses the Antimeridian and is a MultiPolygon, - the buffer will only be applied to the edges facing away from the Antimeridian - thus leaving the polygon intact if shifted back. - """ - if footprint.is_empty: - return footprint - - # repair geometry if it is broken - footprint = footprint.buffer(0) - - if not buffer_m: - return footprint - - if isinstance(footprint, MultiPolygon): - # we have a shifted footprint here! - # (1) unshift one part - subpolygons = [] - for polygon in footprint.geoms: - lon = polygon.centroid.x - if lon < 0: - polygon = longitudinal_shift(polygon) - subpolygons.append(polygon) - # (2) merge to single polygon - merged = unary_union(subpolygons) - - # (3) apply buffer - if isinstance(merged, MultiPolygon): - buffered = unary_union( - [ - buffer_antimeridian_safe(polygon, buffer_m=buffer_m) - for polygon in merged.geoms - ] - ) - else: - buffered = buffer_antimeridian_safe(merged, buffer_m=buffer_m) - - # (4) fix again - return repair_antimeridian_geometry(buffered) - - # UTM zone CRS - utm_crs = latlon_to_utm_crs(footprint.centroid.y, footprint.centroid.x) - latlon_crs = CRS.from_string("EPSG:4326") - - return transform_to_latlon( - reproject_geometry( - footprint, src_crs=latlon_crs, dst_crs=utm_crs, clip_to_crs_bounds=False - ).buffer(buffer_m), - src_crs=utm_crs, - ) - - -def longitudinal_shift( - geometry: BaseGeometry, offset: float = 360.0, only_negative_coords: bool = False -) -> BaseGeometry: - """Return geometry with either all or Western hemisphere coordinates shifted by some offset.""" - return custom_transform( - geometry, - partial( - coords_longitudinal_shift, - by=offset, - only_negative_coords=only_negative_coords, - ), - ) - - -def latlon_to_utm_crs(lat: float, lon: float) -> CRS: - min_zone = 1 - max_zone = 60 - utm_zone = ( - f"{max([min([(math.floor((lon + 180) / 6) + 1), max_zone]), min_zone]):02}" - ) - hemisphere_code = "7" if lat <= 0 else "6" - return CRS.from_string(f"EPSG:32{hemisphere_code}{utm_zone}") - - -def bounds_to_geom(bounds: Bounds) -> BaseGeometry: - # TODO: move into core package - if bounds.left < -180: - part1 = Bounds(-180, bounds.bottom, bounds.right, bounds.top) - part2 = Bounds(bounds.left + 360, bounds.bottom, 180, bounds.top) - return unary_union([shape(part1), shape(part2)]) - elif bounds.right > 180: - part1 = Bounds(-180, bounds.bottom, bounds.right - 360, bounds.top) - part2 = Bounds(bounds.left, bounds.bottom, 180, bounds.top) - return unary_union([shape(part1), shape(part2)]) - else: - return shape(bounds) - - -def custom_transform(geometry: BaseGeometry, func: Callable) -> BaseGeometry: - # todo: shapely.transform.transform maybe can make this code more simple - # https://shapely.readthedocs.io/en/stable/reference/shapely.transform.html#shapely.transform - def _point(point: Point) -> Point: - return Point(zip(*func(point.xy))) - - def _multipoint(multipoint: MultiPoint) -> MultiPoint: - return MultiPoint([_point(point) for point in multipoint]) - - def _linestring(linestring: LineString) -> LineString: - return LineString(zip(*func(linestring.xy))) - - def _multilinestring(multilinestring: MultiLineString) -> MultiLineString: - return MultiLineString( - [_linestring(linestring) for linestring in multilinestring.geoms] - ) - - def _linearring(linearring: LinearRing) -> LinearRing: - return LinearRing(((x, y) for x, y in zip(*func(linearring.xy)))) - - def _polygon(polygon: Polygon) -> Polygon: - return Polygon( - _linearring(polygon.exterior), - holes=list(map(_linearring, polygon.interiors)), - ) - - def _multipolygon(multipolygon: MultiPolygon) -> MultiPolygon: - return MultiPolygon([_polygon(polygon) for polygon in multipolygon.geoms]) - - def _geometrycollection( - geometrycollection: GeometryCollection, - ) -> GeometryCollection: - return GeometryCollection( - [_any_geometry(subgeometry) for subgeometry in geometrycollection.geoms] - ) - - def _any_geometry(geometry: BaseGeometry) -> BaseGeometry: - transform_funcs = { - Point: _point, - MultiPoint: _multipoint, - LineString: _linestring, - MultiLineString: _multilinestring, - Polygon: _polygon, - MultiPolygon: _multipolygon, - GeometryCollection: _geometrycollection, - } - try: - return transform_funcs[type(geometry)](geometry) - except KeyError: - raise TypeError(f"unknown geometry {geometry} of type {type(geometry)}") - - if geometry.is_empty: - return geometry - - # make valid by buffering - return _any_geometry(geometry).buffer(0) - - -def coords_transform( - coords: CoordArrays, src_crs: CRSLike, dst_crs: CRSLike -) -> CoordArrays: - return fiona_transform(src_crs, dst_crs, *coords) - - -def coords_longitudinal_shift( - coords: CoordArrays, - by: float = 360, - only_negative_coords: bool = False, -) -> CoordArrays: - x_coords, y_coords = coords - x_coords = ( - ( - x_coord + by - if (only_negative_coords and x_coord < 0) or not only_negative_coords - else x_coord - ) - for x_coord in x_coords - ) - return x_coords, y_coords diff --git a/mapchete_eo/io/items.py b/mapchete_eo/io/items.py index 1ebc20e1..27c28fbb 100644 --- a/mapchete_eo/io/items.py +++ b/mapchete_eo/io/items.py @@ -3,13 +3,13 @@ import numpy.ma as ma import pystac +from mapchete.geometry import repair_antimeridian_geometry from mapchete.protocols import GridProtocol from mapchete.types import Bounds, NodataVals from rasterio.enums import Resampling from shapely.geometry import mapping, shape from mapchete_eo.exceptions import EmptyProductException -from mapchete_eo.geometry import repair_antimeridian_geometry from mapchete_eo.io.assets import asset_to_np_array from mapchete_eo.types import BandLocation diff --git a/mapchete_eo/platforms/sentinel2/product.py b/mapchete_eo/platforms/sentinel2/product.py index 9ca01e1a..b44c7ddd 100644 --- a/mapchete_eo/platforms/sentinel2/product.py +++ b/mapchete_eo/platforms/sentinel2/product.py @@ -7,7 +7,7 @@ import numpy.ma as ma import pystac from mapchete.io.raster import ReferencedRaster, read_raster_window, resample_from_array -from mapchete.geometry import reproject_geometry +from mapchete.geometry import reproject_geometry, buffer_antimeridian_safe from mapchete.path import MPath from mapchete.protocols import GridProtocol from mapchete.types import Bounds, Grid, NodataVals @@ -27,7 +27,6 @@ EmptyFootprintException, EmptyProductException, ) -from mapchete_eo.geometry import buffer_antimeridian_safe from mapchete_eo.io.assets import get_assets, read_mask_as_raster from mapchete_eo.io.path import asset_mpath, get_product_cache_path from mapchete_eo.io.profiles import COGDeflateProfile diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index d4bebc16..0d252c61 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -42,9 +42,6 @@ def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: collection = values.get("collection", None) if collection in KNOWN_SOURCES: values.update(KNOWN_SOURCES[collection]) - else: - # TODO: make sure catalog then is either a path or an URL - pass return values @model_validator(mode="after") diff --git a/mapchete_eo/search/s2_mgrs.py b/mapchete_eo/search/s2_mgrs.py index 149ffe07..a028c803 100644 --- a/mapchete_eo/search/s2_mgrs.py +++ b/mapchete_eo/search/s2_mgrs.py @@ -6,18 +6,17 @@ from itertools import product from typing import List, Literal, Optional, Tuple, Union -from mapchete.geometry import reproject_geometry +from mapchete.geometry import ( + reproject_geometry, + repair_antimeridian_geometry, + transform_to_latlon, +) from mapchete.types import Bounds from rasterio.crs import CRS from shapely import prepare from shapely.geometry import box, mapping, shape from shapely.geometry.base import BaseGeometry -from mapchete_eo.geometry import ( - bounds_to_geom, - repair_antimeridian_geometry, - transform_to_latlon, -) LATLON_LEFT = -180 LATLON_RIGHT = 180 @@ -291,7 +290,7 @@ def s2_tiles_from_bounds( min_latitude_band_idx -= 1 max_latitude_band_idx += 1 - aoi = bounds_to_geom(bounds) + aoi = bounds.latlon_geometry() prepare(aoi) def tiles_generator(): diff --git a/tests/test_geometry.py b/tests/test_geometry.py deleted file mode 100644 index ad789a59..00000000 --- a/tests/test_geometry.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest -from mapchete.types import Bounds -from pytest_lazyfixture import lazy_fixture -from shapely import wkt -from shapely.geometry import Polygon, shape - -from mapchete_eo.geometry import ( - buffer_antimeridian_safe, - repair_antimeridian_geometry, - transform_to_latlon, -) - - -def test_transform_to_latlon_empty(): - assert transform_to_latlon(Polygon(), "EPSG:3857").is_empty - - -@pytest.mark.parametrize( - "item", - [ - lazy_fixture("antimeridian_item1"), - lazy_fixture("antimeridian_item2"), - lazy_fixture("antimeridian_item4"), - lazy_fixture("antimeridian_item5"), - ], -) -def test_item_buffer_antimeridian_footprint(item): - fixed_footprint = repair_antimeridian_geometry(shape(item.geometry)) - buffered = buffer_antimeridian_safe(fixed_footprint, buffer_m=-500) - - # buffered should be smaller than original - assert buffered.area < fixed_footprint.area - - # however, it should still touch the antimeridian - bounds = Bounds.from_inp(buffered) - assert bounds.left == -180 - assert bounds.right == 180 - - -def test_broken_antimeridian_footprint(broken_footprint): - assert buffer_antimeridian_safe(broken_footprint, -500) - - -def test_buffer_antimeridian_safe(): - geometry = wkt.loads( - "MULTIPOLYGON (((-179.9007922830362 -20.96671450145087, -179.89560144107517 -20.967617414455813, -179.90806987842126 -20.96761869724748, -179.9007922830362 -20.96671450145087)), ((-180 -20.943177886491217, -180 -20.7734127657837, -179.78774173780687 -20.77706288786702, -179.79126327516263 -20.967606679820314, -180 -20.943177886491217)), ((179.86082360813083 -20.92720983649908, 179.85883568680532 -20.926860813217523, 179.85888328436795 -20.924579253857743, 179.84773264469558 -20.924104957228145, 179.88569078371066 -20.771447035025357, 180 -20.7734127657837, 180 -20.943177886491217, 179.8925367497856 -20.930601290149554, 179.87522606375526 -20.927564560509428, 179.86082360813083 -20.92720983649908)))" - ) - assert buffer_antimeridian_safe(geometry, buffer_m=-500) From 0d40569e07f753447a39f13c4a9bddc257219b5c Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 14:35:37 +0100 Subject: [PATCH 26/46] set bounds CRS --- mapchete_eo/search/s2_mgrs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapchete_eo/search/s2_mgrs.py b/mapchete_eo/search/s2_mgrs.py index a028c803..f07d37a3 100644 --- a/mapchete_eo/search/s2_mgrs.py +++ b/mapchete_eo/search/s2_mgrs.py @@ -267,7 +267,7 @@ def from_grid_code(grid_code: str) -> S2Tile: def s2_tiles_from_bounds( left: float, bottom: float, right: float, top: float ) -> List[S2Tile]: - bounds = Bounds(left, bottom, right, top) + bounds = Bounds(left, bottom, right, top, crs="EPSG:4326") # determine zones in eastern-western direction min_zone_idx = math.floor((left + LATLON_WIDTH_OFFSET) / UTM_ZONE_WIDTH) From 9726756c324d22819e0869b360927102bf406cf2 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 16:02:08 +0100 Subject: [PATCH 27/46] update test mapchete files --- tests/conftest.py | 2 +- tests/test_cli.py | 16 +++++++--------- tests/testdata/sentinel2.mapchete | 1 - .../sentinel2_antimeridian_east.mapchete | 10 +++++----- .../sentinel2_antimeridian_west.mapchete | 10 +++++----- tests/testdata/sentinel2_aws_cdse.mapchete | 10 +++++----- tests/testdata/sentinel2_cloud_cover.mapchete | 3 +-- tests/testdata/sentinel2_mercator.mapchete | 1 - tests/testdata/sentinel2_stac.mapchete | 5 ++--- tests/testdata/sentinel2_stac_area.mapchete | 4 ++-- .../testdata/sentinel2_stac_cloud_cover.mapchete | 6 +++--- .../sentinel2_stac_footprint_buffer.mapchete | 7 +++---- tests/testdata/sentinel2_time_ranges.mapchete | 1 - tests/testdata/stac.mapchete | 4 ++-- 14 files changed, 36 insertions(+), 44 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2820e016..2edeb4b8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,7 +42,7 @@ def eoxcloudless_testdata_dir(testdata_dir): @pytest.fixture(scope="session") def s2_stac_collection(s2_testdata_dir): - return s2_testdata_dir / "full_products" / "catalog.json" + return s2_testdata_dir / "full_products" / "sentinel-2-l2a" / "collection.json" @pytest.fixture(scope="session") diff --git a/tests/test_cli.py b/tests/test_cli.py index d792787c..f9056efb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -78,14 +78,14 @@ def test_s2_brdf(s2_stac_json_half_footprint, tmp_mpath): @pytest.mark.remote @pytest.mark.parametrize( - "flag,value,collection", + "collection", [ - ("--catalog-json", lazy_fixture("s2_stac_collection"), None), - ("--archive", "S2AWS_COG", None), - ("--endpoint", "https://earth-search.aws.element84.com/v1/", "sentinel-2-l2a"), + lazy_fixture("s2_stac_collection"), + "S2AWS_COG", + "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", ], ) -def test_static_catalog(tmp_mpath, flag, value, collection): +def test_static_catalog(tmp_mpath, collection): runner = CliRunner() out_path = tmp_mpath params = [ @@ -99,12 +99,10 @@ def test_static_catalog(tmp_mpath, flag, value, collection): "2023-08-10", "--end-time", "2023-08-10", - flag, - str(value), + "--collection", + str(collection), str(out_path), ] - if collection: - params.extend(["--collection", collection]) result = runner.invoke(eo, params) if result.exit_code != 0: raise result.exception diff --git a/tests/testdata/sentinel2.mapchete b/tests/testdata/sentinel2.mapchete index 4510d6b1..a4e57575 100644 --- a/tests/testdata/sentinel2.mapchete +++ b/tests/testdata/sentinel2.mapchete @@ -2,7 +2,6 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A time: start: 2024-04-01 end: 2024-04-03 diff --git a/tests/testdata/sentinel2_antimeridian_east.mapchete b/tests/testdata/sentinel2_antimeridian_east.mapchete index bf819eee..7bd6e1e8 100644 --- a/tests/testdata/sentinel2_antimeridian_east.mapchete +++ b/tests/testdata/sentinel2_antimeridian_east.mapchete @@ -2,11 +2,11 @@ process: read_xarray.py input: inp: format: Sentinel-2 - cat_baseurl: sentinel2/full_products_antimeridian/catalog.json - level: L2A - time: - start: 2023-06-01 - end: 2023-06-10 + source: + collection: sentinel2/full_products_antimeridian/sentinel-s2-l2a/collection.json + time: + start: 2023-06-01 + end: 2023-06-10 output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_antimeridian_west.mapchete b/tests/testdata/sentinel2_antimeridian_west.mapchete index d6ab77d0..dd31f508 100644 --- a/tests/testdata/sentinel2_antimeridian_west.mapchete +++ b/tests/testdata/sentinel2_antimeridian_west.mapchete @@ -2,11 +2,11 @@ process: read_xarray.py input: inp: format: Sentinel-2 - cat_baseurl: sentinel2/full_products_antimeridian/catalog.json - level: L2A - time: - start: 2023-06-01 - end: 2023-06-10 + source: + collection: sentinel2/full_products_antimeridian/sentinel-s2-l2a/collection.json + time: + start: 2023-06-01 + end: 2023-06-10 output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_aws_cdse.mapchete b/tests/testdata/sentinel2_aws_cdse.mapchete index 0dbde797..4c853780 100644 --- a/tests/testdata/sentinel2_aws_cdse.mapchete +++ b/tests/testdata/sentinel2_aws_cdse.mapchete @@ -2,11 +2,11 @@ process: read_xarray.py input: inp: format: Sentinel-2 - archive: S2CDSE_AWSJP2 - level: L2A - time: - start: 2024-04-01 - end: 2024-04-03 + source: + collection: CDSE_JP2 + time: + start: 2024-04-01 + end: 2024-04-03 output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_cloud_cover.mapchete b/tests/testdata/sentinel2_cloud_cover.mapchete index c85459ea..e2bbf463 100644 --- a/tests/testdata/sentinel2_cloud_cover.mapchete +++ b/tests/testdata/sentinel2_cloud_cover.mapchete @@ -2,11 +2,10 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A time: start: 2022-04-01 end: 2022-04-03 - max_cloud_cover: 20 + query: "eo:cloud_cover<=20" output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_mercator.mapchete b/tests/testdata/sentinel2_mercator.mapchete index ee145d4d..507d1a04 100644 --- a/tests/testdata/sentinel2_mercator.mapchete +++ b/tests/testdata/sentinel2_mercator.mapchete @@ -2,7 +2,6 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A time: start: 2024-04-01 end: 2024-04-03 diff --git a/tests/testdata/sentinel2_stac.mapchete b/tests/testdata/sentinel2_stac.mapchete index 29b5d11c..6e93427d 100644 --- a/tests/testdata/sentinel2_stac.mapchete +++ b/tests/testdata/sentinel2_stac.mapchete @@ -2,12 +2,11 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A + source: + collection: sentinel2/full_products/sentinel-2-l2a/collection.json time: start: 2023-08-10 end: 2023-08-13 - cat_baseurl: sentinel2/full_products/catalog.json - with_cloudmasks: true output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_stac_area.mapchete b/tests/testdata/sentinel2_stac_area.mapchete index 25f810ac..2b4aed59 100644 --- a/tests/testdata/sentinel2_stac_area.mapchete +++ b/tests/testdata/sentinel2_stac_area.mapchete @@ -2,13 +2,13 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A time: start: 2023-08-10 end: 2023-08-13 + source: + collection: sentinel2/full_products/sentinel-2-l2a/collection.json # tmx bbox 7 29 138 area: "POLYGON ((15.46875 47.8125, 15.46875 49.21875, 14.0625 49.21875, 14.0625 47.8125, 15.46875 47.8125))" - cat_baseurl: sentinel2/full_products/catalog.json with_cloudmasks: true output: format: GTiff diff --git a/tests/testdata/sentinel2_stac_cloud_cover.mapchete b/tests/testdata/sentinel2_stac_cloud_cover.mapchete index 99e7d200..c91d1d8f 100644 --- a/tests/testdata/sentinel2_stac_cloud_cover.mapchete +++ b/tests/testdata/sentinel2_stac_cloud_cover.mapchete @@ -2,12 +2,12 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A time: start: 2023-08-10 end: 2023-08-13 - max_cloud_cover: 20 - cat_baseurl: sentinel2/full_products/catalog.json + source: + query: "eo:cloud_cover<=20" + collection: sentinel2/full_products/sentinel-2-l2a/collection.json output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_stac_footprint_buffer.mapchete b/tests/testdata/sentinel2_stac_footprint_buffer.mapchete index 793fa985..5613a989 100644 --- a/tests/testdata/sentinel2_stac_footprint_buffer.mapchete +++ b/tests/testdata/sentinel2_stac_footprint_buffer.mapchete @@ -2,13 +2,12 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A time: start: 2023-08-10 end: 2023-08-13 - cat_baseurl: sentinel2/full_products/catalog.json - with_cloudmasks: true - footprint_buffer: -550 + source: + collection: sentinel2/full_products/sentinel-2-l2a/collection.json + footprint_buffer: -550 output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_time_ranges.mapchete b/tests/testdata/sentinel2_time_ranges.mapchete index c0a0dad3..5cec0e36 100644 --- a/tests/testdata/sentinel2_time_ranges.mapchete +++ b/tests/testdata/sentinel2_time_ranges.mapchete @@ -2,7 +2,6 @@ process: read_xarray.py input: inp: format: Sentinel-2 - level: L2A time: - start: 2024-04-01 end: 2024-04-03 diff --git a/tests/testdata/stac.mapchete b/tests/testdata/stac.mapchete index c3aa4e27..a82a3c3f 100644 --- a/tests/testdata/stac.mapchete +++ b/tests/testdata/stac.mapchete @@ -6,8 +6,8 @@ input: time: start: 2023-08-10 end: 2023-08-13 - cat_baseurl: sentinel2/full_products/catalog.json - with_cloudmasks: true + source: + collection: sentinel2/full_products/sentinel-2-l2a/collection.json output: format: GTiff bands: 3 From 58a8ee5b9e45ca4f390bcc2d70dd5da19a98712d Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 4 Nov 2025 16:02:29 +0100 Subject: [PATCH 28/46] move from catalogs to collections --- mapchete_eo/cli/options_arguments.py | 20 +-- mapchete_eo/cli/s2_cat_results.py | 21 +-- mapchete_eo/cli/s2_find_broken_products.py | 21 +-- mapchete_eo/cli/static_catalog.py | 51 +------ mapchete_eo/search/base.py | 147 +++++++-------------- mapchete_eo/search/stac_search.py | 4 +- mapchete_eo/search/stac_static.py | 76 +++-------- mapchete_eo/search/utm_search.py | 6 +- mapchete_eo/source.py | 14 +- 9 files changed, 97 insertions(+), 263 deletions(-) diff --git a/mapchete_eo/cli/options_arguments.py b/mapchete_eo/cli/options_arguments.py index e9ede7c8..5bf0f2c5 100644 --- a/mapchete_eo/cli/options_arguments.py +++ b/mapchete_eo/cli/options_arguments.py @@ -7,9 +7,6 @@ from mapchete_eo.platforms.sentinel2.brdf.models import BRDFModels from mapchete_eo.io.profiles import rio_profiles from mapchete_eo.platforms.sentinel2.config import SceneClassification -from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( - DEPRECATED_ARCHIVES, -) from mapchete_eo.platforms.sentinel2.types import L2ABand, Resolution from mapchete_eo.time import to_datetime @@ -164,27 +161,12 @@ def _str_to_datetime(_, param, value): opt_end_time = click.option( "--end-time", type=click.STRING, callback=_str_to_datetime, help="End time" ) -opt_archive = click.option( - "--archive", - type=click.Choice(list(DEPRECATED_ARCHIVES.keys())), - default="S2AWS_COG", - help="Archive to read from.", -) opt_collection = click.option( "--collection", type=click.STRING, + default="EarthSearch", help="Data collection to be queried.", ) -opt_endpoint = click.option( - "--endpoint", - type=click.STRING, - help="Search endpoint.", -) -opt_catalog_json = click.option( - "--catalog-json", - type=click.Path(path_type=MPath), - help="JSON file for a static catalog.", -) opt_name = click.option("--name", type=click.STRING, help="Static catalog name.") opt_description = click.option( "--description", type=click.STRING, help="Static catalog description." diff --git a/mapchete_eo/cli/s2_cat_results.py b/mapchete_eo/cli/s2_cat_results.py index ab427f0b..a17435bd 100644 --- a/mapchete_eo/cli/s2_cat_results.py +++ b/mapchete_eo/cli/s2_cat_results.py @@ -12,10 +12,10 @@ from mapchete.types import Bounds from mapchete_eo.cli import options_arguments -from mapchete_eo.cli.static_catalog import get_catalog from mapchete_eo.io.products import Slice, products_to_slices from mapchete_eo.platforms.sentinel2.product import S2Product from mapchete_eo.sort import TargetDateSort +from mapchete_eo.source import Source from mapchete_eo.types import TimeRange @@ -25,10 +25,7 @@ @options_arguments.opt_end_time @opt_bounds @options_arguments.opt_mgrs_tile -@options_arguments.opt_archive @options_arguments.opt_collection -@options_arguments.opt_endpoint -@options_arguments.opt_catalog_json @click.option( "--format", type=click.Choice(["FlatGeobuf", "GeoJSON"]), @@ -45,32 +42,20 @@ def s2_cat_results( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - archive: Optional[str] = None, - collection: Optional[str] = None, - endpoint: Optional[str] = None, - catalog_json: Optional[MPath] = None, + collection: str = "EarthSearch", format: Literal["FlatGeobuf", "GeoJSON"] = "FlatGeobuf", by_slices: bool = False, add_index: bool = False, debug: bool = False, ): """Write a search result.""" - if catalog_json and endpoint: # pragma: no cover - raise click.ClickException( - "exactly one of --archive, --catalog-json or --endpoint has to be set." - ) if any([start_time is None, end_time is None]): # pragma: no cover raise click.ClickException("--start-time and --end-time are mandatory") if all([bounds is None, mgrs_tile is None]): # pragma: no cover raise click.ClickException("--bounds or --mgrs-tile are required") slice_property_key = "s2:datastrip_id" with click_spinner.Spinner(disable=debug): - catalog = get_catalog( - catalog_json=catalog_json, - endpoint=endpoint, - known_archive=archive, - collection=collection, - ) + catalog = Source(collection=collection).get_catalog() slices = products_to_slices( [ S2Product.from_stac_item(item) diff --git a/mapchete_eo/cli/s2_find_broken_products.py b/mapchete_eo/cli/s2_find_broken_products.py index ff8a69ac..2e6bbb24 100644 --- a/mapchete_eo/cli/s2_find_broken_products.py +++ b/mapchete_eo/cli/s2_find_broken_products.py @@ -9,8 +9,8 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.cli.s2_verify import verify_item -from mapchete_eo.cli.static_catalog import get_catalog from mapchete_eo.product import add_to_blacklist, blacklist_products +from mapchete_eo.source import Source from mapchete_eo.types import TimeRange @@ -18,10 +18,7 @@ @opt_bounds @options_arguments.opt_start_time @options_arguments.opt_end_time -@options_arguments.opt_archive @options_arguments.opt_collection -@options_arguments.opt_endpoint -@options_arguments.opt_catalog_json @options_arguments.opt_assets @options_arguments.opt_blacklist @options_arguments.opt_thumbnail_dir @@ -31,10 +28,7 @@ def s2_find_broken_products( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - archive: Optional[str] = None, - collection: Optional[str] = None, - endpoint: Optional[str] = None, - catalog_json: Optional[MPath] = None, + collection: str = "EarthSearch", assets: List[str] = [], asset_exists_check: bool = True, blacklist: MPath = MPath("s3://eox-mhub-cache/blacklist.txt"), @@ -42,20 +36,11 @@ def s2_find_broken_products( **__, ): """Find broken Sentinel-2 products.""" - if catalog_json and endpoint: # pragma: no cover - raise click.ClickException( - "exactly one of --archive, --catalog-json or --endpoint has to be set." - ) if any([start_time is None, end_time is None]): # pragma: no cover raise click.ClickException("--start-time and --end-time are mandatory") if all([bounds is None, mgrs_tile is None]): # pragma: no cover raise click.ClickException("--bounds or --mgrs-tile are required") - catalog = get_catalog( - catalog_json=catalog_json, - endpoint=endpoint, - known_archive=archive, - collection=collection, - ) + catalog = Source(collection=collection).get_catalog() blacklisted_products = blacklist_products(blacklist) for item in tqdm( catalog.search( diff --git a/mapchete_eo/cli/static_catalog.py b/mapchete_eo/cli/static_catalog.py index 22f853d9..c686030b 100644 --- a/mapchete_eo/cli/static_catalog.py +++ b/mapchete_eo/cli/static_catalog.py @@ -10,11 +10,7 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.platforms.sentinel2 import S2Metadata from mapchete_eo.platforms.sentinel2.types import Resolution -from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( - DEPRECATED_ARCHIVES, -) -from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog -from mapchete_eo.search.base import CatalogSearcher +from mapchete_eo.source import Source from mapchete_eo.types import TimeRange @@ -24,10 +20,7 @@ @options_arguments.opt_mgrs_tile @options_arguments.opt_start_time @options_arguments.opt_end_time -@options_arguments.opt_archive @options_arguments.opt_collection -@options_arguments.opt_endpoint -@options_arguments.opt_catalog_json @options_arguments.opt_name @options_arguments.opt_description @options_arguments.opt_assets @@ -42,10 +35,7 @@ def static_catalog( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - archive: Optional[str] = None, - collection: Optional[str] = None, - endpoint: Optional[str] = None, - catalog_json: Optional[MPath] = None, + collection: str = "EarthSearch", name: Optional[str] = None, description: Optional[str] = None, assets: Optional[List[str]] = None, @@ -56,20 +46,11 @@ def static_catalog( **__, ): """Write a static STAC catalog for selected area.""" - if catalog_json and endpoint: # pragma: no cover - raise click.ClickException( - "exactly one of --archive, --catalog-json or --endpoint has to be set." - ) if any([start_time is None, end_time is None]): # pragma: no cover raise click.ClickException("--start-time and --end-time are mandatory") if all([bounds is None, mgrs_tile is None]): # pragma: no cover raise click.ClickException("--bounds or --mgrs-tile are required") - catalog = get_catalog( - catalog_json=catalog_json, - endpoint=endpoint, - known_archive=archive, - collection=collection, - ) + catalog = Source(collection=collection).get_catalog() if hasattr(catalog, "write_static_catalog"): with options_arguments.TqdmUpTo( unit="products", unit_scale=True, miniters=1, disable=opt_debug @@ -99,29 +80,3 @@ def static_catalog( raise AttributeError( f"catalog {catalog} does not support writing a static version" ) - - -def get_catalog( - catalog_json: Optional[MPath], - endpoint: Optional[MPath], - known_archive: Optional[str] = None, - collection: Optional[str] = None, -) -> CatalogSearcher: - if catalog_json: - return STACStaticCatalog( - baseurl=catalog_json, - ) - elif endpoint: - if collection: - return STACSearchCatalog( - endpoint=endpoint, - collections=[collection], - ) - else: - raise ValueError("collection must be provided") - elif known_archive: - return STACSearchCatalog.from_collection_url( - DEPRECATED_ARCHIVES[known_archive]["collection"] - ) - else: - raise TypeError("cannot determine catalog") diff --git a/mapchete_eo/search/base.py b/mapchete_eo/search/base.py index 7ed76c08..6cf4c3ec 100644 --- a/mapchete_eo/search/base.py +++ b/mapchete_eo/search/base.py @@ -6,12 +6,11 @@ from cql2 import Expr from pydantic import BaseModel -from pystac import Item, Catalog, CatalogType, Extent +from pystac import Item, CatalogType, Extent from mapchete.path import MPath, MPathLike from mapchete.types import Bounds from pystac.collection import Collection from pystac.stac_io import DefaultStacIO -from pystac_client import Client from pystac_client.stac_api_io import StacApiIO from rasterio.profiles import Profile from shapely.geometry.base import BaseGeometry @@ -45,12 +44,11 @@ def save_json(dest: MPathLike, json_dict: dict, *args, **kwargs) -> None: return dst.write(json.dumps(json_dict, indent=2)) -class CatalogSearcher(ABC): +class CollectionSearcher(ABC): """ This class serves as a bridge between an Archive and a catalog implementation. """ - collections: List[str] config_cls: Type[BaseModel] @abstractmethod @@ -80,16 +78,12 @@ def search( ) -> Generator[Item, None, None]: ... -class StaticCatalogWriterMixin(CatalogSearcher): +class StaticCollectionWriterMixin(CollectionSearcher): # client: Client # id: str # description: str # stac_extensions: List[str] - @abstractmethod - def get_collections(self) -> List[Collection]: # pragma: no cover - ... - def write_static_catalog( self, output_path: MPathLike, @@ -109,100 +103,61 @@ def write_static_catalog( progress_callback: Optional[Callable] = None, ) -> MPath: """Dump static version of current items.""" + collection_id = name or f"{self.id}" output_path = MPath.from_inp(output_path) assets = assets or [] # initialize catalog - catalog_json = output_path / "catalog.json" - if catalog_json.exists(): - logger.debug("open existing catalog %s", str(catalog_json)) - client = Client.from_file(catalog_json) - # catalog = pystac.Catalog.from_file(catalog_json) - existing_collections = list(client.get_collections()) - else: - existing_collections = [] - catalog = Catalog( - name or f"{self.id}", - description or f"Static subset of {self.description}", - stac_extensions=self.stac_extensions, - href=str(catalog_json), - catalog_type=CatalogType.SELF_CONTAINED, - ) + collection_json = output_path / f"{collection_id}.json" src_items = list( self.search( time=time, bounds=bounds, area=area, search_kwargs=search_kwargs ) ) - for collection in self.get_collections(): - # collect all items and download assets if required - items: List[Item] = [] - item_ids = set() - for n, item in enumerate(src_items, 1): - logger.debug("found item %s", item) - item = item.clone() - if assets: - logger.debug("get assets %s", assets) - item = get_assets( - item, - assets, - output_path / collection.id / item.id, - resolution=assets_dst_resolution, - convert_profile=assets_convert_profile, - overwrite=overwrite, - ignore_if_exists=True, - ) - if copy_metadata: - item = get_metadata_assets( - item, - output_path / collection.id / item.id, - metadata_parser_classes=metadata_parser_classes, - resolution=assets_dst_resolution, - convert_profile=assets_convert_profile, - overwrite=overwrite, - ) - # this has to be set to None, otherwise pystac will mess up the asset paths - # after normalizing - item.set_self_href(None) - - items.append(item) - item_ids.add(item.id) - - if progress_callback: - progress_callback(n=n, total=len(src_items)) - - for existing_collection in existing_collections: - if existing_collection.id == collection.id: - logger.debug("try to find unregistered items in collection") - collection_root_path = MPath.from_inp( - existing_collection.get_self_href() - ).parent - for subpath in collection_root_path.ls(): - if subpath.is_directory(): - try: - item = Item.from_file( - subpath / subpath.with_suffix(".json").name - ) - if item.id not in item_ids: - logger.debug( - "add existing item with id %s", item.id - ) - items.append(item) - item_ids.add(item.id) - except FileNotFoundError: - pass - break + # collect all items and download assets if required + items: List[Item] = [] + item_ids = set() + for n, item in enumerate(src_items, 1): + logger.debug("found item %s", item) + item = item.clone() + if assets: + logger.debug("get assets %s", assets) + item = get_assets( + item, + assets, + output_path / item.id, + resolution=assets_dst_resolution, + convert_profile=assets_convert_profile, + overwrite=overwrite, + ignore_if_exists=True, + ) + if copy_metadata: + item = get_metadata_assets( + item, + output_path / item.id, + metadata_parser_classes=metadata_parser_classes, + resolution=assets_dst_resolution, + convert_profile=assets_convert_profile, + overwrite=overwrite, + ) + # this has to be set to None, otherwise pystac will mess up the asset paths + # after normalizing + item.set_self_href(None) + + items.append(item) + item_ids.add(item.id) + + if progress_callback: + progress_callback(n=n, total=len(src_items)) + # create collection and copy metadata logger.debug("create new collection") + out_collection = Collection( - id=collection.id, + id=collection_id, extent=Extent.from_items(items), - description=collection.description, - title=collection.title, - stac_extensions=collection.stac_extensions, - license=collection.license, - keywords=collection.keywords, - providers=collection.providers, - summaries=collection.summaries, - extra_fields=collection.extra_fields, + description=description or f"Static subset of {self.description}", + stac_extensions=self.stac_extensions, + href=str(collection_json), catalog_type=CatalogType.SELF_CONTAINED, ) @@ -212,14 +167,12 @@ def write_static_catalog( out_collection.update_extent_from_items() - catalog.add_child(out_collection) - logger.debug("write catalog to %s", output_path) - catalog.normalize_hrefs(str(output_path)) - catalog.make_all_asset_hrefs_relative() - catalog.save(dest_href=str(output_path), stac_io=stac_io) + out_collection.normalize_hrefs(str(output_path)) + out_collection.make_all_asset_hrefs_relative() + out_collection.save(dest_href=str(output_path), stac_io=stac_io) - return catalog_json + return collection_json def filter_items( diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 68a7396b..6b0a1796 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -16,7 +16,7 @@ from shapely.geometry.base import BaseGeometry from mapchete_eo.product import blacklist_products -from mapchete_eo.search.base import CatalogSearcher, StaticCatalogWriterMixin +from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin from mapchete_eo.search.config import StacSearchConfig from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import TimeRange @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) -class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher): +class STACSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher): endpoint: str blacklist: Set[str] = ( blacklist_products(mapchete_eo_settings.blacklist) diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index ae256ac3..fb8f24c0 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -9,14 +9,14 @@ from mapchete.io.vector import bounds_intersect from mapchete.path import MPathLike from pystac.stac_io import StacIO -from pystac_client import Client +from pystac_client import CollectionClient from shapely.geometry import shape from shapely.geometry.base import BaseGeometry from mapchete_eo.search.base import ( - CatalogSearcher, + CollectionSearcher, FSSpecStacIO, - StaticCatalogWriterMixin, + StaticCollectionWriterMixin, filter_items, ) from mapchete_eo.search.config import StacStaticConfig @@ -29,7 +29,7 @@ StacIO.set_default(FSSpecStacIO) -class STACStaticCatalog(StaticCatalogWriterMixin, CatalogSearcher): +class STACStaticCatalog(StaticCollectionWriterMixin, CollectionSearcher): config_cls = StacStaticConfig def __init__( @@ -37,8 +37,8 @@ def __init__( baseurl: MPathLike, stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None, ): - self.client = Client.from_file(str(baseurl), stac_io=FSSpecStacIO()) - self.collections = [c.id for c in self.client.get_children()] + self.client = CollectionClient.from_file(str(baseurl), stac_io=FSSpecStacIO()) + # self.collections = [c.id for c in self.client.get_children()] self.stac_item_modifiers = stac_item_modifiers @cached_property @@ -79,29 +79,27 @@ def _raw_search( if area is not None and area.is_empty: return logger.debug("iterate through children") - for collection in self.client.get_collections(): - if time: - for time_range in time if isinstance(time, list) else [time]: - for item in _all_intersecting_items( - collection, - area=area, - time_range=time_range, - ): - item.make_asset_hrefs_absolute() - yield item - else: + if time: + for time_range in time if isinstance(time, list) else [time]: for item in _all_intersecting_items( - collection, + self.client, area=area, + time_range=time_range, ): item.make_asset_hrefs_absolute() yield item + else: + for item in _all_intersecting_items( + self.client, + area=area, + ): + item.make_asset_hrefs_absolute() + yield item def _eo_bands(self) -> List[str]: - for collection in self.client.get_children(): - eo_bands = collection.extra_fields.get("properties", {}).get("eo:bands") - if eo_bands: - return eo_bands + eo_bands = self.client.extra_fields.get("properties", {}).get("eo:bands") + if eo_bands: + return eo_bands else: warnings.warn( "Unable to read eo:bands definition from collections. " @@ -109,7 +107,7 @@ def _eo_bands(self) -> List[str]: ) # see if eo:bands can be found in properties - item = _get_first_item(self.client.get_children()) + item = next(self.client.get_items()) eo_bands = item.properties.get("eo:bands") if eo_bands: return eo_bands @@ -125,38 +123,6 @@ def _eo_bands(self) -> List[str]: logger.debug("cannot find eo:bands definition") return [] - def get_collections( - self, - time: Optional[Union[TimeRange, List[TimeRange]]] = None, - bounds: Optional[BoundsLike] = None, - area: Optional[BaseGeometry] = None, - ): - if area is None and bounds is not None: - area = Bounds.from_inp(bounds).geometry - for collection in self.client.get_children(): - if time: - for time_range in time if isinstance(time, list) else [time]: - if _collection_extent_intersects( - collection, - area=area, - time_range=time_range, - ): - yield collection - else: - if _collection_extent_intersects(collection, area=area): - yield collection - - -def _get_first_item(collections): - for collection in collections: - for item in collection.get_all_items(): - return item - else: - for child in collection.get_children(): - return _get_first_item(child) - else: - raise ValueError("collections contain no items") - def _all_intersecting_items( collection: Union[Catalog, Collection], diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index d8080472..68a4bb41 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -15,8 +15,8 @@ from mapchete_eo.exceptions import ItemGeometryError from mapchete_eo.product import blacklist_products from mapchete_eo.search.base import ( - CatalogSearcher, - StaticCatalogWriterMixin, + CollectionSearcher, + StaticCollectionWriterMixin, filter_items, ) from mapchete_eo.search.config import UTMSearchConfig @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) -class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher): +class UTMSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher): endpoint: str id: str day_subdir_schema: str diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index 07245004..7662fd8d 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -1,3 +1,4 @@ +from functools import cached_property from typing import Any, Dict, List, Literal, Optional, Generator, Union, Callable from mapchete.path import MPath @@ -8,7 +9,7 @@ from shapely.errors import GEOSException from mapchete_eo.exceptions import ItemGeometryError -from mapchete_eo.search.base import CatalogSearcher +from mapchete_eo.search.base import CollectionSearcher from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import TimeRange @@ -19,7 +20,6 @@ class Source(BaseModel): collection: str catalog_crs: CRSLike = mapchete_eo_settings.default_catalog_crs - catalog_type: Literal["search", "static"] = "search" query: Optional[str] = None model_config = ConfigDict(arbitrary_types_allowed=True) @@ -28,6 +28,14 @@ class Source(BaseModel): def item_modifier_funcs(self) -> List[Callable]: return [] + @cached_property + def catalog_type(self) -> Literal["search", "static"]: + try: + (MPath(self.collection) / "items?limit=1").read_json() + return "search" + except FileNotFoundError: + return "static" + def search( self, time: Union[TimeRange, List[TimeRange]], @@ -54,7 +62,7 @@ def apply_item_modifier_funcs(self, item: Item) -> Item: ) return item - def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CatalogSearcher: + def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CollectionSearcher: match self.catalog_type: case "search": return STACSearchCatalog.from_collection_url(self.collection) From 0f732f25e8522466da36031514bdb4f79c3b533e Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 5 Nov 2025 10:56:53 +0100 Subject: [PATCH 29/46] fix writing static catalog --- mapchete_eo/cli/options_arguments.py | 16 +++- mapchete_eo/cli/s2_cat_results.py | 8 +- mapchete_eo/cli/s2_find_broken_products.py | 8 +- mapchete_eo/cli/static_catalog.py | 8 +- .../preconfigured_sources/__init__.py | 3 +- mapchete_eo/platforms/sentinel2/source.py | 9 ++ mapchete_eo/search/__init__.py | 6 +- mapchete_eo/search/base.py | 72 +++++++++++--- mapchete_eo/search/stac_search.py | 58 ++++++------ mapchete_eo/search/stac_static.py | 42 +++++---- mapchete_eo/source.py | 17 ++-- tests/conftest.py | 93 ++++++++++--------- tests/test_catalog.py | 24 ++--- tests/test_cli.py | 8 +- .../sentinel2_antimeridian_east.mapchete | 6 +- .../sentinel2_antimeridian_west.mapchete | 6 +- 16 files changed, 220 insertions(+), 164 deletions(-) diff --git a/mapchete_eo/cli/options_arguments.py b/mapchete_eo/cli/options_arguments.py index 5bf0f2c5..4e06bc8d 100644 --- a/mapchete_eo/cli/options_arguments.py +++ b/mapchete_eo/cli/options_arguments.py @@ -7,6 +7,7 @@ from mapchete_eo.platforms.sentinel2.brdf.models import BRDFModels from mapchete_eo.io.profiles import rio_profiles from mapchete_eo.platforms.sentinel2.config import SceneClassification +from mapchete_eo.platforms.sentinel2.source import Sentinel2Source from mapchete_eo.platforms.sentinel2.types import L2ABand, Resolution from mapchete_eo.time import to_datetime @@ -60,8 +61,12 @@ def _str_to_l2a_bands(_, __, value): def _str_to_datetime(_, param, value): if value: return to_datetime(value) - else: - raise ValueError(f"--{param.name} is mandatory") + raise ValueError(f"--{param.name} is mandatory") + + +def _str_to_source(_, __, value): + if value: + return Sentinel2Source(collection=value) arg_stac_item = click.argument("stac-item", type=click.Path(path_type=MPath)) @@ -161,11 +166,12 @@ def _str_to_datetime(_, param, value): opt_end_time = click.option( "--end-time", type=click.STRING, callback=_str_to_datetime, help="End time" ) -opt_collection = click.option( - "--collection", +opt_source = click.option( + "--source", type=click.STRING, default="EarthSearch", - help="Data collection to be queried.", + callback=_str_to_source, + help="Data source to be queried.", ) opt_name = click.option("--name", type=click.STRING, help="Static catalog name.") opt_description = click.option( diff --git a/mapchete_eo/cli/s2_cat_results.py b/mapchete_eo/cli/s2_cat_results.py index a17435bd..7c513e6c 100644 --- a/mapchete_eo/cli/s2_cat_results.py +++ b/mapchete_eo/cli/s2_cat_results.py @@ -14,8 +14,8 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.io.products import Slice, products_to_slices from mapchete_eo.platforms.sentinel2.product import S2Product +from mapchete_eo.platforms.sentinel2.source import Sentinel2Source from mapchete_eo.sort import TargetDateSort -from mapchete_eo.source import Source from mapchete_eo.types import TimeRange @@ -25,7 +25,7 @@ @options_arguments.opt_end_time @opt_bounds @options_arguments.opt_mgrs_tile -@options_arguments.opt_collection +@options_arguments.opt_source @click.option( "--format", type=click.Choice(["FlatGeobuf", "GeoJSON"]), @@ -42,7 +42,7 @@ def s2_cat_results( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - collection: str = "EarthSearch", + source: Sentinel2Source = Sentinel2Source(collection="EarthSearch"), format: Literal["FlatGeobuf", "GeoJSON"] = "FlatGeobuf", by_slices: bool = False, add_index: bool = False, @@ -55,7 +55,7 @@ def s2_cat_results( raise click.ClickException("--bounds or --mgrs-tile are required") slice_property_key = "s2:datastrip_id" with click_spinner.Spinner(disable=debug): - catalog = Source(collection=collection).get_catalog() + catalog = source.get_catalog() slices = products_to_slices( [ S2Product.from_stac_item(item) diff --git a/mapchete_eo/cli/s2_find_broken_products.py b/mapchete_eo/cli/s2_find_broken_products.py index 2e6bbb24..de4ecbba 100644 --- a/mapchete_eo/cli/s2_find_broken_products.py +++ b/mapchete_eo/cli/s2_find_broken_products.py @@ -9,8 +9,8 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.cli.s2_verify import verify_item +from mapchete_eo.platforms.sentinel2.source import Sentinel2Source from mapchete_eo.product import add_to_blacklist, blacklist_products -from mapchete_eo.source import Source from mapchete_eo.types import TimeRange @@ -18,7 +18,7 @@ @opt_bounds @options_arguments.opt_start_time @options_arguments.opt_end_time -@options_arguments.opt_collection +@options_arguments.opt_source @options_arguments.opt_assets @options_arguments.opt_blacklist @options_arguments.opt_thumbnail_dir @@ -28,7 +28,7 @@ def s2_find_broken_products( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - collection: str = "EarthSearch", + source: Sentinel2Source = Sentinel2Source(collection="EarthSearch"), assets: List[str] = [], asset_exists_check: bool = True, blacklist: MPath = MPath("s3://eox-mhub-cache/blacklist.txt"), @@ -40,7 +40,7 @@ def s2_find_broken_products( raise click.ClickException("--start-time and --end-time are mandatory") if all([bounds is None, mgrs_tile is None]): # pragma: no cover raise click.ClickException("--bounds or --mgrs-tile are required") - catalog = Source(collection=collection).get_catalog() + catalog = source.get_catalog() blacklisted_products = blacklist_products(blacklist) for item in tqdm( catalog.search( diff --git a/mapchete_eo/cli/static_catalog.py b/mapchete_eo/cli/static_catalog.py index c686030b..d847a532 100644 --- a/mapchete_eo/cli/static_catalog.py +++ b/mapchete_eo/cli/static_catalog.py @@ -9,8 +9,8 @@ from mapchete_eo.cli import options_arguments from mapchete_eo.platforms.sentinel2 import S2Metadata +from mapchete_eo.platforms.sentinel2.source import Sentinel2Source from mapchete_eo.platforms.sentinel2.types import Resolution -from mapchete_eo.source import Source from mapchete_eo.types import TimeRange @@ -20,7 +20,7 @@ @options_arguments.opt_mgrs_tile @options_arguments.opt_start_time @options_arguments.opt_end_time -@options_arguments.opt_collection +@options_arguments.opt_source @options_arguments.opt_name @options_arguments.opt_description @options_arguments.opt_assets @@ -35,7 +35,7 @@ def static_catalog( end_time: datetime, bounds: Optional[Bounds] = None, mgrs_tile: Optional[str] = None, - collection: str = "EarthSearch", + source: Sentinel2Source = Sentinel2Source(collection="EarthSearch"), name: Optional[str] = None, description: Optional[str] = None, assets: Optional[List[str]] = None, @@ -50,7 +50,7 @@ def static_catalog( raise click.ClickException("--start-time and --end-time are mandatory") if all([bounds is None, mgrs_tile is None]): # pragma: no cover raise click.ClickException("--bounds or --mgrs-tile are required") - catalog = Source(collection=collection).get_catalog() + catalog = source.get_catalog() if hasattr(catalog, "write_static_catalog"): with options_arguments.TqdmUpTo( unit="products", unit_scale=True, miniters=1, disable=opt_debug diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py index ab0d82ab..0cf49df6 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py @@ -36,8 +36,7 @@ DEPRECATED_ARCHIVES = { "S2AWS_COG": { - "collection": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", - "data_archive": "AWSCOG", + "collection": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a", }, "S2AWS_JP2": { "collection": "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a", diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index 0d252c61..ebe4b503 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -1,11 +1,13 @@ from __future__ import annotations from typing import Optional, List, Callable, Dict, Any, Union +import warnings from pydantic import model_validator from mapchete_eo.source import Source from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( + DEPRECATED_ARCHIVES, DataArchive, MetadataArchive, KNOWN_SOURCES, @@ -42,6 +44,13 @@ def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: collection = values.get("collection", None) if collection in KNOWN_SOURCES: values.update(KNOWN_SOURCES[collection]) + elif collection in DEPRECATED_ARCHIVES: + warnings.warn( + f"deprecated archive '{collection}' found", + category=DeprecationWarning, + stacklevel=2, + ) + values.update(DEPRECATED_ARCHIVES[collection]) return values @model_validator(mode="after") diff --git a/mapchete_eo/search/__init__.py b/mapchete_eo/search/__init__.py index dd245509..7201c089 100644 --- a/mapchete_eo/search/__init__.py +++ b/mapchete_eo/search/__init__.py @@ -7,8 +7,8 @@ It helps the InputData class to find the input products and their metadata. """ -from mapchete_eo.search.stac_search import STACSearchCatalog -from mapchete_eo.search.stac_static import STACStaticCatalog +from mapchete_eo.search.stac_search import STACSearchCollection +from mapchete_eo.search.stac_static import STACStaticCollection from mapchete_eo.search.utm_search import UTMSearchCatalog -__all__ = ["STACSearchCatalog", "STACStaticCatalog", "UTMSearchCatalog"] +__all__ = ["STACSearchCollection", "STACStaticCollection", "UTMSearchCatalog"] diff --git a/mapchete_eo/search/base.py b/mapchete_eo/search/base.py index 6cf4c3ec..30e10427 100644 --- a/mapchete_eo/search/base.py +++ b/mapchete_eo/search/base.py @@ -6,11 +6,12 @@ from cql2 import Expr from pydantic import BaseModel -from pystac import Item, CatalogType, Extent from mapchete.path import MPath, MPathLike from mapchete.types import Bounds +from pystac import Catalog, Item, CatalogType, Extent from pystac.collection import Collection from pystac.stac_io import DefaultStacIO +from pystac_client import CollectionClient from pystac_client.stac_api_io import StacApiIO from rasterio.profiles import Profile from shapely.geometry.base import BaseGeometry @@ -50,6 +51,11 @@ class CollectionSearcher(ABC): """ config_cls: Type[BaseModel] + collection: str + + @abstractmethod + @cached_property + def client(self) -> CollectionClient: ... @abstractmethod @cached_property @@ -103,11 +109,24 @@ def write_static_catalog( progress_callback: Optional[Callable] = None, ) -> MPath: """Dump static version of current items.""" - collection_id = name or f"{self.id}" output_path = MPath.from_inp(output_path) assets = assets or [] # initialize catalog - collection_json = output_path / f"{collection_id}.json" + catalog_json = output_path / "catalog.json" + if catalog_json.exists(): + logger.debug("open existing catalog %s", str(catalog_json)) + catalog = Catalog.from_file(catalog_json) + # client = Client.from_file(catalog_json) + # existing_collection = client.get_collection(self.id) + else: + # existing_collections = [] + catalog = Catalog( + name or f"{self.id}", + description or f"Static subset of {self.description}", + stac_extensions=self.stac_extensions, + href=str(catalog_json), + catalog_type=CatalogType.SELF_CONTAINED, + ) src_items = list( self.search( time=time, bounds=bounds, area=area, search_kwargs=search_kwargs @@ -124,7 +143,7 @@ def write_static_catalog( item = get_assets( item, assets, - output_path / item.id, + output_path / self.id / item.id, resolution=assets_dst_resolution, convert_profile=assets_convert_profile, overwrite=overwrite, @@ -133,7 +152,7 @@ def write_static_catalog( if copy_metadata: item = get_metadata_assets( item, - output_path / item.id, + output_path / self.id / item.id, metadata_parser_classes=metadata_parser_classes, resolution=assets_dst_resolution, convert_profile=assets_convert_profile, @@ -149,15 +168,40 @@ def write_static_catalog( if progress_callback: progress_callback(n=n, total=len(src_items)) + # for existing_collection in existing_collections: + # if existing_collection.id == collection.id: + # logger.debug("try to find unregistered items in collection") + # collection_root_path = MPath.from_inp( + # existing_collection.get_self_href() + # ).parent + # for subpath in collection_root_path.ls(): + # if subpath.is_directory(): + # try: + # item = Item.from_file( + # subpath / subpath.with_suffix(".json").name + # ) + # if item.id not in item_ids: + # logger.debug( + # "add existing item with id %s", item.id + # ) + # items.append(item) + # item_ids.add(item.id) + # except FileNotFoundError: + # pass + # break # create collection and copy metadata logger.debug("create new collection") - out_collection = Collection( - id=collection_id, + id=self.id, extent=Extent.from_items(items), - description=description or f"Static subset of {self.description}", + description=self.description, + title=self.client.title, stac_extensions=self.stac_extensions, - href=str(collection_json), + license=self.client.license, + keywords=self.client.keywords, + providers=self.client.providers, + summaries=self.client.summaries, + extra_fields=self.client.extra_fields, catalog_type=CatalogType.SELF_CONTAINED, ) @@ -167,12 +211,14 @@ def write_static_catalog( out_collection.update_extent_from_items() + catalog.add_child(out_collection) + logger.debug("write catalog to %s", output_path) - out_collection.normalize_hrefs(str(output_path)) - out_collection.make_all_asset_hrefs_relative() - out_collection.save(dest_href=str(output_path), stac_io=stac_io) + catalog.normalize_hrefs(str(output_path)) + catalog.make_all_asset_hrefs_relative() + catalog.save(dest_href=str(output_path), stac_io=stac_io) - return collection_json + return catalog_json def filter_items( diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 6b0a1796..d4ed49a4 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -7,11 +7,10 @@ from cql2 import Expr from mapchete import Timer -from mapchete.path import MPathLike from mapchete.tile import BufferedTilePyramid from mapchete.types import Bounds, BoundsLike from pystac import Item -from pystac_client import Client, ItemSearch +from pystac_client import Client, CollectionClient, ItemSearch from shapely.geometry import shape from shapely.geometry.base import BaseGeometry @@ -24,8 +23,8 @@ logger = logging.getLogger(__name__) -class STACSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher): - endpoint: str +class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher): + collection: str blacklist: Set[str] = ( blacklist_products(mapchete_eo_settings.blacklist) if mapchete_eo_settings.blacklist @@ -35,21 +34,15 @@ class STACSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher): def __init__( self, - collections: Optional[List[str]] = None, + collection: str, stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None, - endpoint: Optional[MPathLike] = None, ): - if endpoint is not None: - self.endpoint = endpoint - if collections: - self.collections = collections - else: # pragma: no cover - raise ValueError("collections must be given") + self.collection = collection self.stac_item_modifiers = stac_item_modifiers @cached_property - def client(self) -> Client: - return Client.open(self.endpoint) + def client(self) -> CollectionClient: + return CollectionClient.from_file(self.collection) @cached_property def eo_bands(self) -> List[str]: @@ -138,15 +131,10 @@ def _searches() -> Generator[ItemSearch, None, None]: yield item def _eo_bands(self) -> List[str]: - for collection_name in self.collections: - collection = self.client.get_collection(collection_name) - if collection: - item_assets = collection.extra_fields.get("item_assets", {}) - for v in item_assets.values(): - if "eo:bands" in v and "data" in v.get("roles", []): - return ["eo:bands"] - else: # pragma: no cover - raise ValueError(f"cannot find collection {collection}") + item_assets = self.client.extra_fields.get("item_assets", {}) + for v in item_assets.values(): + if "eo:bands" in v and "data" in v.get("roles", []): + return ["eo:bands"] else: # pragma: no cover logger.debug("cannot find eo:bands definition from collections") return [] @@ -154,11 +142,22 @@ def _eo_bands(self) -> List[str]: @cached_property def default_search_params(self): return { - "collections": self.collections, + "collections": [self.client], "bbox": None, "intersects": None, } + @cached_property + def search_client(self) -> Client: + # looks weird, right? + # + # one would assume that directly returning self.client.get_root() would + # do the same but if we do so, it seems to ignore the "collections" parameter + # and thus query all collection available on that search endpoint. + # + # the only way to fix this, is to instantiate Client from scratch. + return Client.from_file(self.client.get_root().self_href) + def _search( self, time_range: Optional[TimeRange] = None, @@ -204,7 +203,9 @@ def _search( raise ValueError("no bounds or area given") logger.debug("query catalog using params: %s", search_params) with Timer() as duration: - result = self.client.search(**search_params, limit=config.catalog_pagesize) + result = self.search_client.search( + **search_params, limit=config.catalog_pagesize + ) logger.debug("query took %s", str(duration)) return result @@ -212,13 +213,6 @@ def get_collections(self): for collection_name in self.collections: yield self.client.get_collection(collection_name) - @staticmethod - def from_collection_url(collection_url: str) -> STACSearchCatalog: - return STACSearchCatalog( - endpoint="/".join(collection_url.rstrip("/").split("/")[:-2]), - collections=[collection_url.rstrip("/").split("/")[-1]], - ) - class SpatialSearchChunks: bounds: Bounds diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index fb8f24c0..f1c9bbfb 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -7,7 +7,6 @@ from mapchete.types import BoundsLike from pystac import Item, Catalog, Collection from mapchete.io.vector import bounds_intersect -from mapchete.path import MPathLike from pystac.stac_io import StacIO from pystac_client import CollectionClient from shapely.geometry import shape @@ -29,18 +28,21 @@ StacIO.set_default(FSSpecStacIO) -class STACStaticCatalog(StaticCollectionWriterMixin, CollectionSearcher): +class STACStaticCollection(StaticCollectionWriterMixin, CollectionSearcher): config_cls = StacStaticConfig def __init__( self, - baseurl: MPathLike, + collection: str, stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None, ): - self.client = CollectionClient.from_file(str(baseurl), stac_io=FSSpecStacIO()) - # self.collections = [c.id for c in self.client.get_children()] + self.collection = collection self.stac_item_modifiers = stac_item_modifiers + @cached_property + def client(self) -> CollectionClient: + return CollectionClient.from_file(str(self.collection), stac_io=FSSpecStacIO()) + @cached_property def eo_bands(self) -> List[str]: return self._eo_bands() @@ -102,23 +104,25 @@ def _eo_bands(self) -> List[str]: return eo_bands else: warnings.warn( - "Unable to read eo:bands definition from collections. " + "Unable to read eo:bands definition from collection. " "Trying now to get information from assets ..." ) - # see if eo:bands can be found in properties - item = next(self.client.get_items()) - eo_bands = item.properties.get("eo:bands") - if eo_bands: - return eo_bands - - # look through the assets and collect eo:bands - out = {} - for asset in item.assets.values(): - for eo_band in asset.extra_fields.get("eo:bands", []): - out[eo_band["name"]] = eo_band - if out: - return [v for v in out.values()] + try: + item = next(self.client.get_items(recursive=True)) + eo_bands = item.properties.get("eo:bands") + if eo_bands: + return eo_bands + + # look through the assets and collect eo:bands + out = {} + for asset in item.assets.values(): + for eo_band in asset.extra_fields.get("eo:bands", []): + out[eo_band["name"]] = eo_band + if out: + return [v for v in out.values()] + except StopIteration: + pass logger.debug("cannot find eo:bands definition") return [] diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index 7662fd8d..e9221b5e 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -10,7 +10,7 @@ from mapchete_eo.exceptions import ItemGeometryError from mapchete_eo.search.base import CollectionSearcher -from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog +from mapchete_eo.search import STACSearchCollection, STACStaticCollection from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import TimeRange @@ -19,7 +19,7 @@ class Source(BaseModel): """All information required to consume EO products.""" collection: str - catalog_crs: CRSLike = mapchete_eo_settings.default_catalog_crs + catalog_crs: Optional[CRSLike] = mapchete_eo_settings.default_catalog_crs query: Optional[str] = None model_config = ConfigDict(arbitrary_types_allowed=True) @@ -30,11 +30,8 @@ def item_modifier_funcs(self) -> List[Callable]: @cached_property def catalog_type(self) -> Literal["search", "static"]: - try: - (MPath(self.collection) / "items?limit=1").read_json() - return "search" - except FileNotFoundError: - return "static" + # TODO: stupid test but probably sufficient + return "static" if self.collection.endswith(".json") else "search" def search( self, @@ -65,10 +62,10 @@ def apply_item_modifier_funcs(self, item: Item) -> Item: def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CollectionSearcher: match self.catalog_type: case "search": - return STACSearchCatalog.from_collection_url(self.collection) + return STACSearchCollection(self.collection) case "static": - return STACStaticCatalog( - baseurl=MPath(self.collection).absolute_path(base_dir=base_dir) + return STACStaticCollection( + collection=MPath(self.collection).absolute_path(base_dir=base_dir) ) def eo_bands(self, base_dir: Optional[MPathLike] = None) -> List[str]: diff --git a/tests/conftest.py b/tests/conftest.py index 2edeb4b8..7ac033b7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,12 +2,12 @@ import numpy as np import numpy.ma as ma -import pystac +from pystac import Item import pytest from mapchete.path import MPath from mapchete.testing import ProcessFixture from mapchete.tile import BufferedTilePyramid -from pystac_client import Client +from pystac_client import CollectionClient from rasterio import Affine from shapely import wkt from shapely.geometry import base @@ -16,7 +16,7 @@ guess_s2metadata_from_item, guess_s2metadata_from_metadata_xml, ) -from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog +from mapchete_eo.search import STACSearchCollection, STACStaticCollection from mapchete_eo.types import TimeRange @@ -47,8 +47,7 @@ def s2_stac_collection(s2_testdata_dir): @pytest.fixture(scope="session") def s2_stac_items(s2_stac_collection): - client = Client.from_file(str(s2_stac_collection)) - collection = next(client.get_collections()) + collection = CollectionClient.from_file(str(s2_stac_collection)) items = [item for item in collection.get_items()] for item in items: item.make_asset_hrefs_absolute() @@ -57,18 +56,22 @@ def s2_stac_items(s2_stac_collection): @pytest.fixture def pf_sr_stac_collection(testdata_dir): - return testdata_dir / "pf_stac_collection" / "stac" / "SR" / "catalog.json" + return ( + testdata_dir / "pf_stac_collection" / "stac" / "SR" / "33N" / "collection.json" + ) @pytest.fixture def pf_sr_stac_item(pf_sr_stac_collection): - catalog = STACStaticCatalog(pf_sr_stac_collection) + catalog = STACStaticCollection(pf_sr_stac_collection) return next(iter(catalog.search())) @pytest.fixture def pf_qa_stac_collection(testdata_dir): - return testdata_dir / "pf_stac_collection" / "stac" / "QA" / "catalog.json" + return ( + testdata_dir / "pf_stac_collection" / "stac" / "QA" / "33N" / "collection.json" + ) @pytest.fixture @@ -106,10 +109,9 @@ def test_affine(): @pytest.fixture def s2_stac_item(s2_stac_collection): - item = pystac.pystac.Item.from_file( + item = Item.from_file( str( s2_stac_collection.parent - / "sentinel-2-l2a" / "S2B_33TWM_20230810_0_L2A" / "S2B_33TWM_20230810_0_L2A.json" ) @@ -120,7 +122,7 @@ def s2_stac_item(s2_stac_collection): @pytest.fixture def s2_stac_item_jp2(): - item = pystac.pystac.Item.from_file( + item = Item.from_file( "s3://sentinel-s2-l2a-stac/2023/08/10/S2B_OPER_MSI_L2A_TL_2BPS_20230810T130104_A033567_T33TWM.json" ) item.make_asset_hrefs_absolute() @@ -129,7 +131,7 @@ def s2_stac_item_jp2(): @pytest.fixture def s2_stac_item_cdse_jp2(): - item = pystac.pystac.Item.from_file( + item = Item.from_file( "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a/items/S2B_MSIL2A_20230810T094549_N0509_R079_T33TWM_20230810T130104" ) item.make_asset_hrefs_absolute() @@ -138,7 +140,7 @@ def s2_stac_item_cdse_jp2(): @pytest.fixture def s2_remote_stac_item(): - item = pystac.pystac.Item.from_file( + item = Item.from_file( "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/33/U/WP/2023/7/S2B_33UWP_20230704_0_L2A/S2B_33UWP_20230704_0_L2A.json" ) return item @@ -148,7 +150,6 @@ def s2_remote_stac_item(): def s2_stac_json_half_footprint(s2_stac_collection): return ( s2_stac_collection.parent - / "sentinel-2-l2a" / "S2B_33TWM_20230813_0_L2A" / "S2B_33TWM_20230813_0_L2A.json" ) @@ -156,7 +157,7 @@ def s2_stac_json_half_footprint(s2_stac_collection): @pytest.fixture def s2_stac_item_half_footprint(s2_stac_json_half_footprint): - item = pystac.pystac.Item.from_file(str(s2_stac_json_half_footprint)) + item = Item.from_file(str(s2_stac_json_half_footprint)) item.make_asset_hrefs_absolute() return item @@ -333,7 +334,7 @@ def test_edge_tile(): @pytest.fixture(scope="session") def stac_search_catalog(): - return STACSearchCatalog( + return STACSearchCollection( collection="sentinel-2-l2a", time=TimeRange( start="2022-06-01", @@ -346,7 +347,7 @@ def stac_search_catalog(): @pytest.fixture(scope="session") def static_catalog_small(s2_stac_collection): - return STACStaticCatalog( + return STACStaticCollection( s2_stac_collection, ) @@ -436,7 +437,7 @@ def s2_l2a_earthsearch_remote(s2_l2a_earthsearch_remote_item): @pytest.fixture(scope="session") def s2_l2a_earthsearch_remote_item(): """Metadata used by Earth-Search V1 endpoint""" - return pystac.Item.from_file( + return Item.from_file( "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/33/T/WL/2022/6/S2A_33TWL_20220601_0_L2A/S2A_33TWL_20220601_0_L2A.json" ) @@ -453,7 +454,7 @@ def tileinfo_jp2_schema(): @pytest.fixture(scope="session") def stac_item_brdf(s2_testdata_dir): - return pystac.Item.from_file( + return Item.from_file( str( s2_testdata_dir / "stac_items" @@ -466,7 +467,7 @@ def stac_item_brdf(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0509(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_32TMS_20221207_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_32TMS_20221207_0_L2A") ) @@ -474,7 +475,7 @@ def stac_item_pb0509(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0400(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20220130_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20220130_0_L2A") ) @@ -482,7 +483,7 @@ def stac_item_pb0400(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0400_offset(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20220226_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20220226_0_L2A") ) @@ -490,7 +491,7 @@ def stac_item_pb0400_offset(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0301(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20220122_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20220122_0_L2A") ) @@ -498,7 +499,7 @@ def stac_item_pb0301(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0300(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20210629_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20210629_0_L2A") ) @@ -506,7 +507,7 @@ def stac_item_pb0300(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0214(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20210328_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20210328_0_L2A") ) @@ -514,7 +515,7 @@ def stac_item_pb0214(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0213(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20200202_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20200202_0_L2A") ) @@ -522,7 +523,7 @@ def stac_item_pb0213(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0212(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20190707_1_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20190707_1_L2A") ) @@ -530,7 +531,7 @@ def stac_item_pb0212(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0211(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20190503_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20190503_0_L2A") ) @@ -538,7 +539,7 @@ def stac_item_pb0211(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0210(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20181119_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20181119_0_L2A") ) @@ -546,7 +547,7 @@ def stac_item_pb0210(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0209(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20181104_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20181104_0_L2A") ) @@ -554,7 +555,7 @@ def stac_item_pb0209(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0208(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20181005_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20181005_0_L2A") ) @@ -562,7 +563,7 @@ def stac_item_pb0208(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb0207(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20180521_1_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20180521_1_L2A") ) @@ -570,7 +571,7 @@ def stac_item_pb0207(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb_l1c_0206(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20180806_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20180806_0_L2A") ) @@ -578,7 +579,7 @@ def stac_item_pb_l1c_0206(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb_l1c_0205(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20171005_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20171005_0_L2A") ) @@ -586,7 +587,7 @@ def stac_item_pb_l1c_0205(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_pb_l1c_0204(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_33TWN_20161202_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2A_33TWN_20161202_0_L2A") ) @@ -594,14 +595,14 @@ def stac_item_pb_l1c_0204(s2_testdata_dir): @pytest.fixture(scope="session") def stac_item_invalid_pb0001(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_33TWN_20180806_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str(s2_testdata_dir / "stac_items" / "S2B_33TWN_20180806_0_L2A") ) @pytest.fixture(scope="session") def full_stac_item_pb0509(s2_testdata_dir): - return pystac.Item.from_file( + return Item.from_file( s2_testdata_dir / "full_products" / "sentinel-2-l2a" @@ -612,7 +613,7 @@ def full_stac_item_pb0509(s2_testdata_dir): @pytest.fixture(scope="session") def antimeridian_item1(testdata_dir): - return pystac.Item.from_file( + return Item.from_file( testdata_dir / "antimeridian_items" / "S2A_OPER_MSI_L2A_TL_2APS_20230603T031757_A041497_T01WCQ.json" @@ -621,7 +622,7 @@ def antimeridian_item1(testdata_dir): @pytest.fixture(scope="session") def antimeridian_item2(testdata_dir): - return pystac.Item.from_file( + return Item.from_file( testdata_dir / "antimeridian_items" / "S2B_OPER_MSI_L2A_TL_2BPS_20230503T100334_A030615_T60VXH.json" @@ -630,7 +631,7 @@ def antimeridian_item2(testdata_dir): @pytest.fixture(scope="session") def antimeridian_item3(testdata_dir): - return pystac.Item.from_file( + return Item.from_file( testdata_dir / "antimeridian_items" / "S2B_OPER_MSI_L2A_TL_2BPS_20230512T234921_A032288_T01VCG.json" @@ -639,7 +640,7 @@ def antimeridian_item3(testdata_dir): @pytest.fixture(scope="session") def antimeridian_item4(testdata_dir): - return pystac.Item.from_file( + return Item.from_file( testdata_dir / "antimeridian_items" / "S2B_OPER_MSI_L2A_TL_2BPS_20230513T005426_A032288_T01VCG.json" @@ -648,7 +649,7 @@ def antimeridian_item4(testdata_dir): @pytest.fixture(scope="session") def antimeridian_item5(testdata_dir): - return pystac.Item.from_file( + return Item.from_file( testdata_dir / "antimeridian_items" / "S2A_OPER_MSI_L2A_TL_2APS_20230730T020155_A042312_T01VCC.json" @@ -658,7 +659,7 @@ def antimeridian_item5(testdata_dir): @pytest.fixture(scope="session") def antimeridian_broken_item(testdata_dir): # this footprint is unfuckingfixable - return pystac.Item.from_file( + return Item.from_file( testdata_dir / "antimeridian_items" / "S2A_OPER_MSI_L2A_TL_2APS_20230806T022123_A042412_T60VXH.json" @@ -677,7 +678,7 @@ def product_missing_detector_footprints(): @pytest.fixture(scope="session") def stac_item_missing_detector_footprints(): - return pystac.Item.from_file( + return Item.from_file( "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2B_37WEP_20231017_0_L2A" ) @@ -691,13 +692,13 @@ def stac_item_path_sentinel2_jp2(): @pytest.fixture(scope="session") def stac_item_sentinel2_jp2(stac_item_path_sentinel2_jp2): - return pystac.Item.from_file(stac_item_path_sentinel2_jp2) + return Item.from_file(stac_item_path_sentinel2_jp2) @pytest.fixture(scope="session") def stac_item_sentinel2_jp2_local(s2_testdata_dir): """https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a/items/S2A_32TMS_20221207_0_L2A""" - return pystac.Item.from_file( + return Item.from_file( str( s2_testdata_dir / "stac_items" diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 59e99bde..8407d5e0 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1,4 +1,4 @@ -import pystac_client +from pystac_client import Client import rasterio from mapchete.io import fs_from_path, path_exists from mapchete.io.raster import rasterio_open @@ -7,27 +7,27 @@ from mapchete_eo.platforms.sentinel2 import S2Metadata from mapchete_eo.platforms.sentinel2.types import Resolution -from mapchete_eo.search import STACStaticCatalog +from mapchete_eo.search import STACStaticCollection from mapchete_eo.types import TimeRange def test_pf_sr_items(pf_sr_stac_collection): - catalog = STACStaticCatalog(pf_sr_stac_collection) + catalog = STACStaticCollection(pf_sr_stac_collection) assert len(list(catalog.search())) > 0 def test_pf_sr_eo_bands(pf_sr_stac_collection): - catalog = STACStaticCatalog(pf_sr_stac_collection) + catalog = STACStaticCollection(pf_sr_stac_collection) assert len(catalog.eo_bands) > 0 def test_pf_qa_items(pf_qa_stac_collection): - catalog = STACStaticCatalog(pf_qa_stac_collection) + catalog = STACStaticCollection(pf_qa_stac_collection) assert len(list(catalog.search())) > 0 def test_pf_qa_eo_bands(pf_qa_stac_collection): - catalog = STACStaticCatalog(pf_qa_stac_collection) + catalog = STACStaticCollection(pf_qa_stac_collection) assert len(catalog.eo_bands) > 0 @@ -37,7 +37,7 @@ def test_write_static_catalog(static_catalog_small, tmp_path): time=TimeRange(start="2023-08-10", end="2023-08-11"), area=box(15.71762, 46.22546, 15.78400, 46.27169), ) - cat = pystac_client.Client.from_file(str(output_path)) + cat = Client.from_file(str(output_path)) collections = list(cat.get_children()) assert len(collections) == 1 collection = collections[0] @@ -51,7 +51,7 @@ def test_write_static_catalog_copy_assets(static_catalog_small, tmp_path): time=TimeRange(start="2023-08-10", end="2023-08-11"), area=box(15.71762, 46.22546, 15.78400, 46.27169), ) - cat = pystac_client.Client.from_file(str(output_path)) + cat = Client.from_file(str(output_path)) collections = list(cat.get_children()) assert len(collections) == 1 collection = collections[0] @@ -72,7 +72,7 @@ def test_write_static_catalog_copy_assets_relative_output_path(static_catalog_sm time=TimeRange(start="2023-08-10", end="2023-08-11"), area=box(15.71762, 46.22546, 15.78400, 46.27169), ) - cat = pystac_client.Client.from_file(str(output_path)) + cat = Client.from_file(str(output_path)) collections = list(cat.get_children()) assert len(collections) == 1 collection = collections[0] @@ -99,7 +99,7 @@ def test_write_static_catalog_convert_assets(static_catalog_small, tmp_path): time=TimeRange(start="2023-08-10", end="2023-08-11"), area=box(15.71762, 46.22546, 15.78400, 46.27169), ) - cat = pystac_client.Client.from_file(str(output_path)) + cat = Client.from_file(str(output_path)) collections = list(cat.get_children()) assert len(collections) == 1 collection = collections[0] @@ -137,8 +137,8 @@ def test_write_static_catalog_metadata_assets(static_catalog_small, tmp_path): def test_static_catalog_cloud_percent(s2_stac_collection): - all_products = list(STACStaticCatalog(s2_stac_collection).search()) + all_products = list(STACStaticCollection(s2_stac_collection).search()) filtered_products = list( - STACStaticCatalog(s2_stac_collection).search(query="eo:cloud_cover<=20") + STACStaticCollection(s2_stac_collection).search(query="eo:cloud_cover<=20") ) assert len(all_products) > len(filtered_products) diff --git a/tests/test_cli.py b/tests/test_cli.py index f9056efb..5445d38e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -78,14 +78,14 @@ def test_s2_brdf(s2_stac_json_half_footprint, tmp_mpath): @pytest.mark.remote @pytest.mark.parametrize( - "collection", + "source", [ lazy_fixture("s2_stac_collection"), "S2AWS_COG", "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a", ], ) -def test_static_catalog(tmp_mpath, collection): +def test_static_catalog(tmp_mpath, source): runner = CliRunner() out_path = tmp_mpath params = [ @@ -99,8 +99,8 @@ def test_static_catalog(tmp_mpath, collection): "2023-08-10", "--end-time", "2023-08-10", - "--collection", - str(collection), + "--source", + str(source), str(out_path), ] result = runner.invoke(eo, params) diff --git a/tests/testdata/sentinel2_antimeridian_east.mapchete b/tests/testdata/sentinel2_antimeridian_east.mapchete index 7bd6e1e8..51d6829c 100644 --- a/tests/testdata/sentinel2_antimeridian_east.mapchete +++ b/tests/testdata/sentinel2_antimeridian_east.mapchete @@ -4,9 +4,9 @@ input: format: Sentinel-2 source: collection: sentinel2/full_products_antimeridian/sentinel-s2-l2a/collection.json - time: - start: 2023-06-01 - end: 2023-06-10 + time: + start: 2023-06-01 + end: 2023-06-10 output: format: GTiff bands: 3 diff --git a/tests/testdata/sentinel2_antimeridian_west.mapchete b/tests/testdata/sentinel2_antimeridian_west.mapchete index dd31f508..59967579 100644 --- a/tests/testdata/sentinel2_antimeridian_west.mapchete +++ b/tests/testdata/sentinel2_antimeridian_west.mapchete @@ -4,9 +4,9 @@ input: format: Sentinel-2 source: collection: sentinel2/full_products_antimeridian/sentinel-s2-l2a/collection.json - time: - start: 2023-06-01 - end: 2023-06-10 + time: + start: 2023-06-01 + end: 2023-06-10 output: format: GTiff bands: 3 From 87b208fb465587cbe1db6be71a1fa2af83de7903 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 5 Nov 2025 11:29:39 +0100 Subject: [PATCH 30/46] fix utm_search --- mapchete_eo/search/base.py | 9 +++++ mapchete_eo/search/stac_search.py | 10 +----- mapchete_eo/search/stac_static.py | 10 +----- mapchete_eo/search/utm_search.py | 58 ++++++++++++------------------- 4 files changed, 33 insertions(+), 54 deletions(-) diff --git a/mapchete_eo/search/base.py b/mapchete_eo/search/base.py index 30e10427..9b630d8f 100644 --- a/mapchete_eo/search/base.py +++ b/mapchete_eo/search/base.py @@ -52,6 +52,15 @@ class CollectionSearcher(ABC): config_cls: Type[BaseModel] collection: str + stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None + + def __init__( + self, + collection: str, + stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None, + ): + self.collection = collection + self.stac_item_modifiers = stac_item_modifiers @abstractmethod @cached_property diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index d4ed49a4..dcdae3f3 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -3,7 +3,7 @@ import logging from datetime import datetime from functools import cached_property -from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Set, Union +from typing import Any, Dict, Generator, Iterator, List, Optional, Set, Union from cql2 import Expr from mapchete import Timer @@ -32,14 +32,6 @@ class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher): ) config_cls = StacSearchConfig - def __init__( - self, - collection: str, - stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None, - ): - self.collection = collection - self.stac_item_modifiers = stac_item_modifiers - @cached_property def client(self) -> CollectionClient: return CollectionClient.from_file(self.collection) diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index f1c9bbfb..df5ec72f 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -1,7 +1,7 @@ from functools import cached_property import logging import warnings -from typing import Any, Callable, Dict, Generator, List, Optional, Union +from typing import Any, Dict, Generator, List, Optional, Union from mapchete import Bounds from mapchete.types import BoundsLike @@ -31,14 +31,6 @@ class STACStaticCollection(StaticCollectionWriterMixin, CollectionSearcher): config_cls = StacStaticConfig - def __init__( - self, - collection: str, - stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None, - ): - self.collection = collection - self.stac_item_modifiers = stac_item_modifiers - @cached_property def client(self) -> CollectionClient: return CollectionClient.from_file(str(self.collection), stac_io=FSSpecStacIO()) diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index 68a4bb41..22e423e4 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -1,7 +1,7 @@ import datetime from functools import cached_property import logging -from typing import Any, Callable, Dict, Generator, List, Optional, Set, Union +from typing import Any, Dict, Generator, List, Optional, Set, Union from mapchete.io.vector import fiona_open from mapchete.path import MPath, MPathLike @@ -42,18 +42,6 @@ class UTMSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher): ) config_cls = UTMSearchConfig - def __init__( - self, - endpoint: Optional[MPathLike] = None, - collections: List[str] = [], - stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None, - ): - self.endpoint = endpoint or self.endpoint - if len(collections) == 0: # pragma: no cover - raise ValueError("no collections provided") - self.collections = collections - self.stac_item_modifiers = stac_item_modifiers - @cached_property def eo_bands(self) -> List[str]: # pragma: no cover return self._eo_bands() @@ -66,11 +54,11 @@ def search( query: Optional[str] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: - if bounds: - bounds = Bounds.from_inp(bounds) - for item in filter_items( - self._raw_search(time=time, bounds=bounds, area=area), + self._raw_search( + time=time, bounds=Bounds.from_inp(bounds) if bounds else None, area=area + ), + query=query, ): yield item @@ -151,24 +139,23 @@ def _raw_search( yield item def _eo_bands(self) -> list: - for collection_name in self.collections: - for ( - collection_properties - ) in UTMSearchConfig().sinergise_aws_collections.values(): - if collection_properties["id"] == collection_name: - collection = Collection.from_dict( - collection_properties["path"].read_json() - ) - if collection: - summary = collection.summaries.to_dict() - if "eo:bands" in summary: - return summary["eo:bands"] - else: - raise ValueError(f"cannot find collection {collection}") + for ( + collection_properties + ) in UTMSearchConfig().sinergise_aws_collections.values(): + if collection_properties["id"] == self.collection.split("/")[-1]: + collection = Collection.from_dict( + collection_properties["path"].read_json() + ) + if collection: + summary = collection.summaries.to_dict() + if "eo:bands" in summary: + return summary["eo:bands"] + else: + raise ValueError(f"cannot find collection {collection}") else: logger.debug( - "cannot find eo:bands definition from collections %s", - self.collections, + "cannot find eo:bands definition from collection %s", + self.collection, ) return [] @@ -181,9 +168,8 @@ def get_collections(self): """ for collection_properties in self.config.sinergise_aws_collections.values(): collection = Collection.from_dict(collection_properties["path"].read_json()) - for collection_name in self.collections: - if collection_name == collection.id: - yield collection + if self.collection.split("/")[-1] == collection.id: + yield collection def items_from_static_index( From 67fd285f95db1618ce68376dc978283892711a6e Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 5 Nov 2025 11:32:42 +0100 Subject: [PATCH 31/46] fix utm_search --- mapchete_eo/platforms/sentinel2/config.py | 10 +++++----- mapchete_eo/search/stac_search.py | 6 +----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index 9f9ca635..a98210ed 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -51,7 +51,7 @@ def to_scl_classes(cls, values: List[str]) -> List[SceneClassification]: out.append(value) elif isinstance(value, str): out.append(SceneClassification[value]) - else: + else: # pragma: no cover raise ValidationError("value must be mappable to SceneClassification") return out @@ -143,7 +143,7 @@ def deprecated_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: values["source"] = DEPRECATED_ARCHIVES[archive] cat_baseurl = values.pop("cat_baseurl", None) - if cat_baseurl: + if cat_baseurl: # pragma: no cover warnings.warn( "'cat_baseurl' will be deprecated soon. Please use 'catalog_type=static' in the source.", category=DeprecationWarning, @@ -161,7 +161,7 @@ def deprecated_values(cls, values: Dict[str, Any]) -> Dict[str, Any]: values["source"] = [default_source.model_dump(exclude_none=True)] max_cloud_cover = values.pop("max_cloud_cover", None) - if max_cloud_cover: + if max_cloud_cover: # pragma: no cover warnings.warn( "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", category=DeprecationWarning, @@ -220,7 +220,7 @@ def to_scl_classes(cls, values: List[str]) -> List[SceneClassification]: out.append(value) elif isinstance(value, str): out.append(SceneClassification[value]) - else: + else: # pragma: no cover raise ValidationError("value must be mappable to SceneClassification") return out @@ -235,7 +235,7 @@ def parse(config: Union[dict, MaskConfig]) -> MaskConfig: elif isinstance(config, dict): return MaskConfig(**config) - else: + else: # pragma: no cover raise TypeError( f"mask configuration should either be a dictionary or a MaskConfig object, not {config}" ) diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index dcdae3f3..19277cfe 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -82,7 +82,7 @@ def _searches() -> Generator[ItemSearch, None, None]: ) logger.debug("found %s products", search.matched()) matched = search.matched() or 0 - if matched > config.catalog_chunk_threshold: + if matched > config.catalog_chunk_threshold: # pragma: no cover spatial_search_chunks = SpatialSearchChunks( bounds=bounds, area=area, @@ -201,10 +201,6 @@ def _search( logger.debug("query took %s", str(duration)) return result - def get_collections(self): - for collection_name in self.collections: - yield self.client.get_collection(collection_name) - class SpatialSearchChunks: bounds: Bounds From d5e805ea62e008e90dc260dac75657aeb3d5a9aa Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 5 Nov 2025 11:45:01 +0100 Subject: [PATCH 32/46] increase test coverage --- mapchete_eo/base.py | 46 +++++++++++++++++------------------ mapchete_eo/search/config.py | 6 ++--- mapchete_eo/search/s2_mgrs.py | 2 +- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index d3cd5354..6a4a2bf8 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -66,7 +66,7 @@ def to_list(cls, values: Dict[str, Any]) -> Dict[str, Any]: @model_validator(mode="before") def deprecate_cat_baseurl(cls, values: Dict[str, Any]) -> Dict[str, Any]: cat_baseurl = values.get("cat_baseurl") - if cat_baseurl: + if cat_baseurl: # pragma: no cover warnings.warn( "'cat_baseurl' will be deprecated soon. Please use 'catalog_type=static' in the source.", category=DeprecationWarning, @@ -341,27 +341,25 @@ def filter_products( """ Return a filtered list of input products. """ - if any([start_time, end_time, timestamps]): + if any([start_time, end_time, timestamps]): # pragma: no cover raise NotImplementedError("time subsets are not yet implemented") if time_pattern: # filter products by time pattern - tz = tzutc() - coord_time = [ - t.replace(tzinfo=tz) - for t in croniter.croniter_range( - to_datetime(self.start_time), - to_datetime(self.end_time), - time_pattern, - ) - ] return [ product for product in self.products - if product.item.datetime in coord_time + if product.item.datetime + in [ + t.replace(tzinfo=tzutc()) + for t in croniter.croniter_range( + to_datetime(self.start_time), + to_datetime(self.end_time), + time_pattern, + ) + ] ] - else: - return self.products + return self.products def is_empty(self) -> bool: # pragma: no cover """ @@ -385,16 +383,16 @@ def default_read_values( nodatavals = self.default_read_nodataval merge_products_by = merge_products_by or self.default_read_merge_products_by merge_method = merge_method or self.default_read_merge_method - if resampling is None: - resampling = self.default_read_resampling - else: - resampling = ( - resampling - if isinstance(resampling, Resampling) - else Resampling[resampling] - ) return dict( - resampling=resampling, + resampling=( + self.default_read_resampling + if resampling is None + else ( + resampling + if isinstance(resampling, Resampling) + else Resampling[resampling] + ) + ), nodatavals=nodatavals, merge_products_by=merge_products_by, merge_method=merge_method, @@ -555,7 +553,7 @@ def products(self) -> IndexedFeatures: return self._products # TODO: copied it from mapchete_satellite, not yet sure which use case this is - elif self.standalone: + elif self.standalone: # pragma: no cover raise NotImplementedError() # if preprocessing tasks are ready, index them for further use diff --git a/mapchete_eo/search/config.py b/mapchete_eo/search/config.py index 2f19b761..0f3f7914 100644 --- a/mapchete_eo/search/config.py +++ b/mapchete_eo/search/config.py @@ -14,7 +14,7 @@ class StacSearchConfig(BaseModel): @model_validator(mode="before") def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "max_cloud_cover" in values: + if "max_cloud_cover" in values: # pragma: no cover raise DeprecationWarning( "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", ) @@ -24,7 +24,7 @@ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: class StacStaticConfig(BaseModel): @model_validator(mode="before") def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "max_cloud_cover" in values: + if "max_cloud_cover" in values: # pragma: no cover raise DeprecationWarning( "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", ) @@ -34,7 +34,7 @@ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: class UTMSearchConfig(BaseModel): @model_validator(mode="before") def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "max_cloud_cover" in values: + if "max_cloud_cover" in values: # pragma: no cover raise DeprecationWarning( "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", ) diff --git a/mapchete_eo/search/s2_mgrs.py b/mapchete_eo/search/s2_mgrs.py index f07d37a3..dc16f173 100644 --- a/mapchete_eo/search/s2_mgrs.py +++ b/mapchete_eo/search/s2_mgrs.py @@ -254,7 +254,7 @@ def from_tile_id(tile_id: str) -> S2Tile: grid_square = tile_id[3:] try: int(utm_zone) - except Exception: + except Exception: # pragma: no cover raise ValueError(f"invalid UTM zone given: {utm_zone}") return MGRSCell(utm_zone, latitude_band).tile(grid_square) From 21f7275ed86fc32704867d3ecb318968abc93067 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 6 Nov 2025 08:57:00 +0100 Subject: [PATCH 33/46] enable multiple sources and add test --- .../preconfigured_sources/item_mappers.py | 34 +------------------ tests/conftest.py | 9 +++++ tests/platforms/sentinel2/test_base.py | 7 ++++ .../sentinel2_multiple_sources.mapchete | 21 ++++++++++++ 4 files changed, 38 insertions(+), 33 deletions(-) create mode 100644 tests/testdata/sentinel2_multiple_sources.mapchete diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index cd24e22a..0b6a09c6 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -21,6 +21,7 @@ @maps_item_id(from_collections=["EarthSearch", "EarthSearch_legacy"]) def earthsearch_id_mapper(item: Item) -> Item: + item.id = item.properties["s2:product_uri"].rstrip(".SAFE") return item @@ -158,36 +159,3 @@ def cdse_s2metadata(item: Item) -> S2Metadata: path_mapper=CDSEPathMapper(MPath(item.assets["granule_metadata"].href)), processing_baseline_field="processing:version", ) - - -# from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper -# from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_earthsearch import ( -# EarthSearchPathMapper, -# ) -# from mapchete_eo.platforms.sentinel2.metadata_parser.default_path_mapper import ( -# XMLMapper, -# ) -# from mapchete_eo.platforms.sentinel2.preconfigured_sources.metadata_xml_sinergise import SinergisePathMapper - - -# def default_path_mapper_guesser( -# url: str, -# **kwargs, -# ) -> S2MetadataPathMapper: -# """Guess S2PathMapper based on URL. - -# If a new path mapper is added in this module, it should also be added to this function -# in order to be detected. -# """ -# if url.startswith( -# ("https://roda.sentinel-hub.com/sentinel-s2-l2a/", "s3://sentinel-s2-l2a/") -# ) or url.startswith( -# ("https://roda.sentinel-hub.com/sentinel-s2-l1c/", "s3://sentinel-s2-l1c/") -# ): -# return SinergisePathMapper(url, **kwargs) -# elif url.startswith( -# "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/" -# ): -# return EarthSearchPathMapper(url, **kwargs) -# else: -# return XMLMapper(url, **kwargs) diff --git a/tests/conftest.py b/tests/conftest.py index 7ac033b7..ddd9c81d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -306,6 +306,15 @@ def sentinel2_stac_area_mapchete(tmp_path, testdata_dir): yield example +@pytest.fixture +def sentinel2_multiple_sources_mapchete(tmp_path, testdata_dir): + with ProcessFixture( + testdata_dir / "sentinel2_multiple_sources.mapchete", + output_tempdir=tmp_path, + ) as example: + yield example + + @pytest.fixture def merge_rasters_mapchete(tmp_path, testdata_dir): with ProcessFixture( diff --git a/tests/platforms/sentinel2/test_base.py b/tests/platforms/sentinel2/test_base.py index 95c736db..d0a24af9 100644 --- a/tests/platforms/sentinel2/test_base.py +++ b/tests/platforms/sentinel2/test_base.py @@ -484,3 +484,10 @@ def test_footprint_buffer(sentinel2_stac_mapchete, test_edge_tile): ) assert buffered.mask.sum() > unbuffered.mask.sum() + + +@pytest.mark.remote +def test_multiple_sources(sentinel2_multiple_sources_mapchete): + mp = sentinel2_multiple_sources_mapchete.mp() + input_data = list(mp.config.inputs.values())[0] + assert input_data.products diff --git a/tests/testdata/sentinel2_multiple_sources.mapchete b/tests/testdata/sentinel2_multiple_sources.mapchete new file mode 100644 index 00000000..d8ca2003 --- /dev/null +++ b/tests/testdata/sentinel2_multiple_sources.mapchete @@ -0,0 +1,21 @@ +process: read_xarray.py +input: + inp: + format: Sentinel-2 + source: + - collection: EarthSearch + - collection: CDSE + data_archive: AWSJP2 + time: + start: 2024-04-01 + end: 2024-04-03 +output: + format: GTiff + bands: 3 + path: sentinel2 + dtype: uint16 +pyramid: + grid: geodetic +zoom_levels: 13 +# tmx bounds 13 1879 8938 +bounds: [16.3916015625, 48.69140625, 16.41357421875, 48.71337890625] \ No newline at end of file From b6e7c9b68c2bb7ac6727c0291341bf6c096635f1 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 6 Nov 2025 14:22:56 +0100 Subject: [PATCH 34/46] fix query format --- mapchete_eo/search/stac_search.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 19277cfe..22bab141 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -5,7 +5,6 @@ from functools import cached_property from typing import Any, Dict, Generator, Iterator, List, Optional, Set, Union -from cql2 import Expr from mapchete import Timer from mapchete.tile import BufferedTilePyramid from mapchete.types import Bounds, BoundsLike @@ -184,7 +183,7 @@ def _search( search_params = dict( self.default_search_params, datetime=f"{start}/{end}", - query=Expr(query).to_json() if query else None, + query=[query] if query else None, **kwargs, ) if ( From 293971aa1150976f20ac2ef4454c72358f27dbaa Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 6 Nov 2025 14:23:13 +0100 Subject: [PATCH 35/46] allow for bounds parameter per source --- mapchete_eo/source.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index e9221b5e..fdc3cd70 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -1,6 +1,7 @@ from functools import cached_property from typing import Any, Dict, List, Literal, Optional, Generator, Union, Callable +from mapchete.bounds import Bounds from mapchete.path import MPath from mapchete.types import BoundsLike, CRSLike, MPathLike from pydantic import BaseModel, ConfigDict, model_validator @@ -21,6 +22,8 @@ class Source(BaseModel): collection: str catalog_crs: Optional[CRSLike] = mapchete_eo_settings.default_catalog_crs query: Optional[str] = None + area: Optional[Union[MPathLike, dict, type[BaseGeometry]]] = None + bounds: Optional[BoundsLike] = None model_config = ConfigDict(arbitrary_types_allowed=True) @@ -33,6 +36,26 @@ def catalog_type(self) -> Literal["search", "static"]: # TODO: stupid test but probably sufficient return "static" if self.collection.endswith(".json") else "search" + def _spatial_subset( + self, + bounds: Optional[BoundsLike] = None, + area: Optional[BaseGeometry] = None, + ) -> Dict[str, Any]: + """Combine bounds and area with bounds defined in Source if any.""" + if self.bounds is None: + return {"bounds": bounds, "area": area} + self_bounds = Bounds.from_inp(self.bounds) + out = dict() + if bounds is not None: + bounds = Bounds.from_inp(bounds) + if bounds.intersects(self_bounds): + out["bounds"] = Bounds.from_inp( + bounds.geometry.intersection(self_bounds.geometry) + ) + if area is not None: + out["area"] = area.intersection(self_bounds.geometry) + return out + def search( self, time: Union[TimeRange, List[TimeRange]], @@ -42,10 +65,12 @@ def search( ) -> Generator[Item, None, None]: for item in self.get_catalog(base_dir=base_dir).search( time=time, - bounds=bounds, - area=area, query=self.query, search_kwargs=dict(query=self.query) if self.query else None, + **self._spatial_subset( + bounds=bounds, + area=area, + ), ): yield self.apply_item_modifier_funcs(item) @@ -77,4 +102,6 @@ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: raise DeprecationWarning( "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.", ) + elif "area" in values: # pragma: no cover + raise NotImplementedError("please use 'bounds' as spatial subset for now") return values From 0f1cc4cced705cc82fba70c6e686e8e4f15408db Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Fri, 7 Nov 2025 10:28:08 +0100 Subject: [PATCH 36/46] clean up --- mapchete_eo/base.py | 1 + .../platforms/sentinel2/_mapper_registry.py | 39 +++++++------------ .../preconfigured_sources/__init__.py | 20 ++++++---- .../preconfigured_sources/item_mappers.py | 14 ++++++- .../sentinel2/preprocessing_tasks.py | 13 ++++++- mapchete_eo/platforms/sentinel2/source.py | 39 ++++++++++--------- mapchete_eo/platforms/sentinel2/types.py | 5 +++ 7 files changed, 76 insertions(+), 55 deletions(-) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index 6a4a2bf8..b6bb7a9b 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -529,6 +529,7 @@ def source_items(self) -> Generator[Item, None, None]: # if item is new, add to list and yield already_returned.add(item.id) + item.properties["mapchete_eo:source"] = source yield item logger.debug("returned set of %s items", len(already_returned)) diff --git a/mapchete_eo/platforms/sentinel2/_mapper_registry.py b/mapchete_eo/platforms/sentinel2/_mapper_registry.py index 3c2a4c9f..e0fa058e 100644 --- a/mapchete_eo/platforms/sentinel2/_mapper_registry.py +++ b/mapchete_eo/platforms/sentinel2/_mapper_registry.py @@ -1,39 +1,24 @@ from typing import List, Callable, Dict, Any, Optional +from pystac import Item + +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata +from mapchete_eo.platforms.sentinel2.types import DataArchive, MetadataArchive + # decorators for mapper functions using the registry pattern # ############################################################## -ID_MAPPER_REGISTRY: Dict[Any, Callable] = {} -STAC_METADATA_MAPPER_REGISTRY: Dict[Any, Callable] = {} -S2METADATA_MAPPER_REGISTRY: Dict[Any, Callable] = {} +ID_MAPPER_REGISTRY: Dict[Any, Callable[[Item], Item]] = {} +STAC_METADATA_MAPPER_REGISTRY: Dict[Any, Callable[[Item], Item]] = {} +S2METADATA_MAPPER_REGISTRY: Dict[Any, Callable[[Item], S2Metadata]] = {} -MAPPER_REGISTRIES = { +MAPPER_REGISTRIES: Dict[str, Any] = { "ID": ID_MAPPER_REGISTRY, "STAC metadata": STAC_METADATA_MAPPER_REGISTRY, "S2Metadata": S2METADATA_MAPPER_REGISTRY, } -# @dataclass -# class Registries: -# id_mappers: Dict[Any, Callable] = field(default_factory=dict) -# stac_metadata_mappers: Dict[Any, Callable] = field(default_factory=dict) -# s2metadata_mappers: Dict[Any, Callable] = field(default_factory=dict) - -# def register( -# self, -# mapper: Literal["ID", "STAC metadata", "S2Metadata"], -# key: Any, -# func: Callable, -# ) -> None: -# if key in registry: -# raise ValueError(f"{key} already registered in {registry}") -# registry[key] = func - - -# MAPPER_REGISTRY = Registries() - - def _register_func(registry: Dict[str, Callable], key: Any, func: Callable): if key in registry: raise ValueError(f"{key} already registered in {registry}") @@ -56,7 +41,7 @@ def decorator(func): def maps_stac_metadata( - from_collections: List[str], to_data_archives: Optional[List[str]] = None + from_collections: List[str], to_data_archives: Optional[List[DataArchive]] = None ): """ Decorator registering STAC metadata mapper. @@ -83,7 +68,9 @@ def decorator(func): return decorator -def creates_s2metadata(from_collections: List[str], to_metadata_archives: List[str]): +def creates_s2metadata( + from_collections: List[str], to_metadata_archives: List[MetadataArchive] +): """ Decorator registering S2Metadata creator. """ diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py index 0cf49df6..65078e1d 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py @@ -1,12 +1,12 @@ -from typing import Literal, Dict, Any +from typing import Dict, Any # importing this is crucial so the mapping functions get registered before registry is accessed from mapchete_eo.platforms.sentinel2.preconfigured_sources.item_mappers import ( - earthsearch_assets_paths_mapper, # noqa: F401 - earthsearch_id_mapper, # noqa: F401 - earthsearch_to_s2metadata, # noqa: F401 - cdse_asset_names, # noqa: F401 - cdse_s2metadata, # noqa: F401 + earthsearch_assets_paths_mapper, + earthsearch_id_mapper, + earthsearch_to_s2metadata, + cdse_asset_names, + cdse_s2metadata, ) from mapchete_eo.platforms.sentinel2.preconfigured_sources.guessers import ( guess_metadata_path_mapper, @@ -19,9 +19,13 @@ "guess_metadata_path_mapper", "guess_s2metadata_from_item", "guess_s2metadata_from_metadata_xml", + "earthsearch_assets_paths_mapper", + "earthsearch_id_mapper", + "earthsearch_to_s2metadata", + "cdse_asset_names", + "cdse_s2metadata", ] -DataArchive = Literal["AWSCOG", "AWSJP2"] KNOWN_SOURCES: Dict[str, Any] = { "EarthSearch": { "collection": "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a", @@ -31,6 +35,7 @@ }, "CDSE": { "collection": "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a", + "metadata_archive": "CDSE", }, } @@ -50,4 +55,3 @@ "collection": "https://stac.dataspace.copernicus.eu/v1/collections/sentinel-2-l2a", }, } -MetadataArchive = Literal["roda"] diff --git a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py index 0b6a09c6..0f0ac7b5 100644 --- a/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +++ b/mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py @@ -10,6 +10,7 @@ CDSEPathMapper, EarthSearchPathMapper, EarthSearchC1PathMapper, + SinergisePathMapper, ) from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.search.s2_mgrs import S2Tile @@ -112,7 +113,7 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: s2tile = S2Tile.from_grid_code(item.properties["grid:code"]) product_basepath = MPath( path_base_scheme.format( - utm_zone=s2tile.utm_zone, + utm_zone=int(s2tile.utm_zone), latitude_band=s2tile.latitude_band, grid_square=s2tile.grid_square, year=item.datetime.year, @@ -152,10 +153,19 @@ def map_cdse_paths_to_jp2_archive(item: Item) -> Item: return item -@creates_s2metadata(from_collections=["CDSE"], to_metadata_archives=["roda"]) +@creates_s2metadata(from_collections=["CDSE"], to_metadata_archives=["CDSE"]) def cdse_s2metadata(item: Item) -> S2Metadata: return S2Metadata.from_stac_item( item, path_mapper=CDSEPathMapper(MPath(item.assets["granule_metadata"].href)), processing_baseline_field="processing:version", ) + + +@creates_s2metadata(from_collections=["CDSE"], to_metadata_archives=["roda"]) +def cdse_to_roda_s2metadata(item: Item) -> S2Metadata: + return S2Metadata.from_stac_item( + item, + path_mapper=SinergisePathMapper(MPath(item.assets["granule_metadata"].href)), + processing_baseline_field="processing:version", + ) diff --git a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py index 60e7cf22..af06fdb0 100644 --- a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +++ b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py @@ -6,6 +6,7 @@ from mapchete_eo.exceptions import CorruptedProductMetadata from mapchete_eo.platforms.sentinel2.config import CacheConfig from mapchete_eo.platforms.sentinel2.product import S2Product +from mapchete_eo.platforms.sentinel2.source import Sentinel2Source from mapchete_eo.product import add_to_blacklist logger = logging.getLogger(__name__) @@ -16,9 +17,19 @@ def parse_s2_product( cache_config: Optional[CacheConfig] = None, cache_all: bool = False, ) -> Union[S2Product, CorruptedProductMetadata]: + # use mapper from source if applickable + source: Union[Sentinel2Source, None] = item.properties.pop( + "mapchete_eo:source", None + ) + metadata = None + if source is not None: + mapper = source.get_s2metadata_mapper() + if mapper: + metadata = mapper(item) + try: s2product = S2Product.from_stac_item( - item, cache_config=cache_config, cache_all=cache_all + item, cache_config=cache_config, cache_all=cache_all, metadata=metadata ) except CorruptedProductMetadata as exc: add_to_blacklist(item.get_self_href()) diff --git a/mapchete_eo/platforms/sentinel2/source.py b/mapchete_eo/platforms/sentinel2/source.py index ebe4b503..a9027506 100644 --- a/mapchete_eo/platforms/sentinel2/source.py +++ b/mapchete_eo/platforms/sentinel2/source.py @@ -4,13 +4,17 @@ import warnings from pydantic import model_validator +from pystac import Item +from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata from mapchete_eo.source import Source from mapchete_eo.platforms.sentinel2.preconfigured_sources import ( DEPRECATED_ARCHIVES, + KNOWN_SOURCES, +) +from mapchete_eo.platforms.sentinel2.types import ( DataArchive, MetadataArchive, - KNOWN_SOURCES, ) from mapchete_eo.platforms.sentinel2._mapper_registry import MAPPER_REGISTRIES @@ -32,7 +36,7 @@ class Sentinel2Source(Source): def item_modifier_funcs(self) -> List[Callable]: return [ func - for func in (self.get_id_mapper(), self.get_stac_metadata_mapper()) + for func in (self.get_id_mapper(), *self.get_stac_metadata_mappers()) if func is not None ] @@ -57,26 +61,27 @@ def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]: def verify_mappers(self) -> Sentinel2Source: # make sure all required mappers are registered self.get_id_mapper() - self.get_stac_metadata_mapper() + self.get_stac_metadata_mappers() self.get_s2metadata_mapper() return self - def get_id_mapper(self) -> Union[Callable, None]: + def get_id_mapper(self) -> Union[Callable[[Item], Item], None]: if self.catalog_type == "static": return None - for key in MAPPER_REGISTRIES["ID"]: + for key in MAPPER_REGISTRIES["ID"].keys(): if self.collection == known_collection_to_url(key): return MAPPER_REGISTRIES["ID"][key] else: raise ValueError(f"no ID mapper for {self.collection} found") - def get_stac_metadata_mapper(self) -> Union[Callable, None]: + def get_stac_metadata_mappers(self) -> List[Callable[[Item], Item]]: """Find mapper function. A mapper function must be provided if a custom data_archive was configured. """ + mappers: List[Callable] = [] if self.catalog_type == "static": - return None + return mappers for key in MAPPER_REGISTRIES["STAC metadata"]: if isinstance(key, tuple): collection, data_archive = key @@ -84,18 +89,16 @@ def get_stac_metadata_mapper(self) -> Union[Callable, None]: self.collection == known_collection_to_url(collection) and data_archive == self.data_archive ): - return MAPPER_REGISTRIES["STAC metadata"][key] - else: - if self.collection == known_collection_to_url(key): - return MAPPER_REGISTRIES["STAC metadata"][key] - else: - if self.data_archive is None: - return None - raise ValueError( - f"no STAC metadata mapper from {self.collection} to {self.data_archive} found" - ) + mappers.append(MAPPER_REGISTRIES["STAC metadata"][key]) + elif self.collection == known_collection_to_url(key): + mappers.append(MAPPER_REGISTRIES["STAC metadata"][key]) + if mappers or self.data_archive is None: + return mappers + raise ValueError( + f"no STAC metadata mapper from {self.collection} to {self.data_archive} found" + ) - def get_s2metadata_mapper(self) -> Union[Callable, None]: + def get_s2metadata_mapper(self) -> Union[Callable[[Item], S2Metadata], None]: if self.catalog_type == "static" or self.metadata_archive is None: return None for key in MAPPER_REGISTRIES["S2Metadata"]: diff --git a/mapchete_eo/platforms/sentinel2/types.py b/mapchete_eo/platforms/sentinel2/types.py index 91451f8e..5fe88c76 100644 --- a/mapchete_eo/platforms/sentinel2/types.py +++ b/mapchete_eo/platforms/sentinel2/types.py @@ -1,4 +1,5 @@ from enum import Enum +from typing import Literal Resolution = Enum( "Resolution", @@ -107,3 +108,7 @@ class SceneClassification(int, Enum): cloud_high_probability = 9 thin_cirrus = 10 snow = 11 + + +DataArchive = Literal["AWSCOG", "AWSJP2"] +MetadataArchive = Literal["roda", "CDSE"] From 301fdd603c1b5582f07a1f040e0c7cea9d5b44d0 Mon Sep 17 00:00:00 2001 From: Scartography Date: Mon, 10 Nov 2025 16:56:01 +0100 Subject: [PATCH 37/46] make EOSTAC driver work, some typing for searching and adding basic test --- mapchete_eo/base.py | 8 ++-- mapchete_eo/eostac.py | 2 +- mapchete_eo/platforms/sentinel2/config.py | 2 +- mapchete_eo/search/base.py | 2 +- mapchete_eo/search/stac_search.py | 46 ++++++++++--------- mapchete_eo/search/stac_static.py | 4 +- mapchete_eo/search/utm_search.py | 11 +++-- mapchete_eo/source.py | 2 +- pyproject.toml | 2 +- tests/conftest.py | 9 ++++ tests/test_eostac.py | 11 ++++- tests/testdata/read_xarray_dem.py | 4 ++ tests/testdata/stac.mapchete | 2 +- .../stac_cdse_copernicus_dem.mapchete | 15 ++++++ tests/testdata/stac_copernicus_dem.mapchete | 15 ++++++ 15 files changed, 97 insertions(+), 38 deletions(-) create mode 100644 tests/testdata/read_xarray_dem.py create mode 100644 tests/testdata/stac_cdse_copernicus_dem.mapchete create mode 100644 tests/testdata/stac_copernicus_dem.mapchete diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index b6bb7a9b..464a4776 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -46,7 +46,7 @@ class BaseDriverConfig(BaseModel): format: str source: Sequence[Source] - time: Union[TimeRange, List[TimeRange]] + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None cat_baseurl: Optional[str] = None cache: Optional[Any] = None footprint_buffer: float = 0 @@ -90,7 +90,7 @@ class EODataCube(base.InputTile): tile: BufferedTile eo_bands: dict - time: List[TimeRange] + time: Optional[List[TimeRange]] area: BaseGeometry area_pixelbuffer: int = 0 @@ -99,7 +99,7 @@ def __init__( tile: BufferedTile, products: Optional[List[EOProductProtocol]], eo_bands: dict, - time: List[TimeRange], + time: Optional[List[TimeRange]] = None, input_key: Optional[str] = None, area: Optional[BaseGeometry] = None, **kwargs, @@ -426,7 +426,7 @@ class InputData(base.InputData): default_preprocessing_task: Callable = staticmethod(EOProduct.from_stac_item) driver_config_model: Type[BaseDriverConfig] = BaseDriverConfig params: BaseDriverConfig - time: Union[TimeRange, List[TimeRange]] + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] area: BaseGeometry _products: Optional[IndexedFeatures] = None diff --git a/mapchete_eo/eostac.py b/mapchete_eo/eostac.py index 8cb51d00..a1f8ef9f 100644 --- a/mapchete_eo/eostac.py +++ b/mapchete_eo/eostac.py @@ -5,7 +5,7 @@ from mapchete_eo import base METADATA: dict = { - "driver_name": "EOSTAC_DEV", + "driver_name": "EOSTAC", "data_type": None, "mode": "r", "file_extensions": [], diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index a98210ed..8cd14e5d 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -110,7 +110,7 @@ class CacheConfig(BaseModel): class Sentinel2DriverConfig(BaseDriverConfig): format: str = "Sentinel-2" - time: Union[TimeRange, List[TimeRange]] + time: Union[TimeRange, List[Optional[TimeRange]]] # new source: List[Sentinel2Source] = [default_source] diff --git a/mapchete_eo/search/base.py b/mapchete_eo/search/base.py index 9b630d8f..d3e6b97b 100644 --- a/mapchete_eo/search/base.py +++ b/mapchete_eo/search/base.py @@ -85,7 +85,7 @@ def stac_extensions(self) -> List[str]: ... @abstractmethod def search( self, - time: Optional[Union[TimeRange, List[TimeRange]]] = None, + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 22bab141..c64aa219 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -53,7 +53,7 @@ def stac_extensions(self) -> List[str]: def search( self, - time: Optional[Union[TimeRange, List[TimeRange]]] = None, + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, @@ -62,8 +62,6 @@ def search( config = self.config_cls(**search_kwargs or {}) if bounds: bounds = Bounds.from_inp(bounds) - if time is None: # pragma: no cover - raise ValueError("time must be set") if area is None and bounds is None: # pragma: no cover raise ValueError("either bounds or area have to be given") @@ -158,9 +156,6 @@ def _search( config: StacSearchConfig = StacSearchConfig(), **kwargs, ) -> ItemSearch: - if time_range is None: # pragma: no cover - raise ValueError("time_range not provided") - if bounds is not None: if shape(bounds).is_empty: # pragma: no cover raise ValueError("bounds empty") @@ -170,22 +165,29 @@ def _search( raise ValueError("area empty") kwargs.update(intersects=area) - start = ( - time_range.start.date() - if isinstance(time_range.start, datetime) - else time_range.start - ) - end = ( - time_range.end.date() - if isinstance(time_range.end, datetime) - else time_range.end - ) - search_params = dict( - self.default_search_params, - datetime=f"{start}/{end}", - query=[query] if query else None, - **kwargs, - ) + if time_range: + start = ( + time_range.start.date() + if isinstance(time_range.start, datetime) + else time_range.start + ) + end = ( + time_range.end.date() + if isinstance(time_range.end, datetime) + else time_range.end + ) + search_params = dict( + self.default_search_params, + datetime=f"{start}/{end}", + query=[query] if query else None, + **kwargs, + ) + else: + search_params = dict( + self.default_search_params, + query=[query] if query else None, + **kwargs, + ) if ( bounds is None and area is None diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index df5ec72f..fe76392d 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -53,7 +53,7 @@ def stac_extensions(self) -> List[str]: def search( self, - time: Optional[Union[TimeRange, List[TimeRange]]] = None, + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, @@ -67,7 +67,7 @@ def search( def _raw_search( self, - time: Optional[Union[TimeRange, List[TimeRange]]] = None, + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, area: Optional[BaseGeometry] = None, ) -> Generator[Item, None, None]: if area is not None and area.is_empty: diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index 22e423e4..974b2fd6 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -48,7 +48,7 @@ def eo_bands(self) -> List[str]: # pragma: no cover def search( self, - time: Optional[Union[TimeRange, List[TimeRange]]] = None, + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, @@ -64,7 +64,7 @@ def search( def _raw_search( self, - time: Optional[Union[TimeRange, List[TimeRange]]] = None, + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, config: UTMSearchConfig = UTMSearchConfig(), @@ -79,7 +79,12 @@ def _raw_search( elif bounds is not None: bounds = Bounds.from_inp(bounds) area = shape(bounds) - for time_range in time if isinstance(time, list) else [time]: + + # Cleaner time list in case None present as time (undefined) + time_list: list[TimeRange] = ( + [t for t in time if t is not None] if isinstance(time, list) else [time] + ) + for time_range in time_list: start_time = ( time_range.start if isinstance(time_range.start, datetime.date) diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index fdc3cd70..5076fd9b 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -58,7 +58,7 @@ def _spatial_subset( def search( self, - time: Union[TimeRange, List[TimeRange]], + time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, base_dir: Optional[MPathLike] = None, diff --git a/pyproject.toml b/pyproject.toml index 6cf913cb..550c1d25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ test = [ eo = "mapchete_eo.cli:eo" [project.entry-points."mapchete.formats.drivers"] -eostac_dev = "mapchete_eo.eostac" +eostac = "mapchete_eo.eostac" sentinel2 = "mapchete_eo.platforms.sentinel2" [project.entry-points."mapchete.processes"] diff --git a/tests/conftest.py b/tests/conftest.py index ddd9c81d..3dc82a47 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -171,6 +171,15 @@ def stac_mapchete(tmp_path, testdata_dir): yield example +@pytest.fixture +def stac_cdse_copernicus_dem_mapchete(tmp_path, testdata_dir): + with ProcessFixture( + testdata_dir / "stac_cdse_copernicus_dem.mapchete", + output_tempdir=tmp_path, + ) as example: + yield example + + @pytest.fixture def eoxcloudless_8bit_dtype_scale_mapchete(tmp_path, testdata_dir): with ProcessFixture( diff --git a/tests/test_eostac.py b/tests/test_eostac.py index 63d8d05e..99837dfb 100644 --- a/tests/test_eostac.py +++ b/tests/test_eostac.py @@ -19,7 +19,7 @@ def test_pf_eo_bands_to_band_locations(pf_sr_stac_item): def test_format_available(): - assert "EOSTAC_DEV" in available_input_formats() + assert "EOSTAC" in available_input_formats() def test_stac_read_xarray(stac_mapchete, test_tile): @@ -36,3 +36,12 @@ def test_preprocessing(stac_mapchete): tile_mp = stac_mapchete.process_mp() assert tile_mp.open("inp").products + + +def test_stac_read_xarray_dem(stac_cdse_copernicus_dem_mapchete, test_tile): + with stac_cdse_copernicus_dem_mapchete.process_mp(tile=test_tile).open( + "inp" + ) as src: + cube = src.read(assets=["data"]) + assert isinstance(cube, xr.Dataset) + assert cube.to_array().any() diff --git a/tests/testdata/read_xarray_dem.py b/tests/testdata/read_xarray_dem.py new file mode 100644 index 00000000..6619f353 --- /dev/null +++ b/tests/testdata/read_xarray_dem.py @@ -0,0 +1,4 @@ +def execute(mp): + with mp.open("inp") as src: + src.read(assets=["data"]) + return "empty" diff --git a/tests/testdata/stac.mapchete b/tests/testdata/stac.mapchete index a82a3c3f..06aac144 100644 --- a/tests/testdata/stac.mapchete +++ b/tests/testdata/stac.mapchete @@ -1,7 +1,7 @@ process: read_xarray.py input: inp: - format: EOSTAC_DEV + format: EOSTAC level: L2A time: start: 2023-08-10 diff --git a/tests/testdata/stac_cdse_copernicus_dem.mapchete b/tests/testdata/stac_cdse_copernicus_dem.mapchete new file mode 100644 index 00000000..340cd7da --- /dev/null +++ b/tests/testdata/stac_cdse_copernicus_dem.mapchete @@ -0,0 +1,15 @@ +process: read_xarray_dem.py +input: + inp: + format: EOSTAC + source: + collection: https://stac.dataspace.copernicus.eu/v1/collections/cop-dem-glo-30-dged-cog +output: + format: GTiff + bands: 3 + path: stac + dtype: uint16 +pyramid: + grid: geodetic +zoom_levels: 13 +bounds: [16, 46, 16.1, 46.1] \ No newline at end of file diff --git a/tests/testdata/stac_copernicus_dem.mapchete b/tests/testdata/stac_copernicus_dem.mapchete new file mode 100644 index 00000000..e68f7f81 --- /dev/null +++ b/tests/testdata/stac_copernicus_dem.mapchete @@ -0,0 +1,15 @@ +process: read_xarray_dem.py +input: + inp: + format: EOSTAC + source: + collection: https://earth-search.aws.element84.com/v1/collections/cop-dem-glo-30 +output: + format: GTiff + bands: 3 + path: stac + dtype: uint16 +pyramid: + grid: geodetic +zoom_levels: 13 +bounds: [16, 46, 16.1, 46.1] \ No newline at end of file From 15ffec7ccfec68e37856b41b4cc6afadcff0cd1d Mon Sep 17 00:00:00 2001 From: Scartography Date: Tue, 11 Nov 2025 19:28:58 +0100 Subject: [PATCH 38/46] area=box(*area.bounds), as search area instaed of passing the whole polygon; duh --- mapchete_eo/search/stac_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index c64aa219..e7aeecd0 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -10,7 +10,7 @@ from mapchete.types import Bounds, BoundsLike from pystac import Item from pystac_client import Client, CollectionClient, ItemSearch -from shapely.geometry import shape +from shapely.geometry import shape, box from shapely.geometry.base import BaseGeometry from mapchete_eo.product import blacklist_products @@ -73,7 +73,7 @@ def _searches() -> Generator[ItemSearch, None, None]: search = self._search( time_range=time_range, bounds=bounds, - area=area, + area=box(*area.bounds), query=query, config=config, ) From 20a8d23bd93ffe1dcf3a82c81ab5b0ec4b3cb4b2 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 12 Nov 2025 08:48:11 +0100 Subject: [PATCH 39/46] fix optional time --- mapchete_eo/base.py | 4 ++-- mapchete_eo/platforms/sentinel2/config.py | 2 +- mapchete_eo/search/base.py | 2 +- mapchete_eo/search/stac_search.py | 28 ++++++++++++++++++++--- mapchete_eo/search/stac_static.py | 4 ++-- mapchete_eo/search/utm_search.py | 4 ++-- mapchete_eo/source.py | 2 +- tests/test_eostac.py | 5 +++- 8 files changed, 38 insertions(+), 13 deletions(-) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index 464a4776..af901b2a 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -46,7 +46,7 @@ class BaseDriverConfig(BaseModel): format: str source: Sequence[Source] - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None + time: Optional[Union[TimeRange, List[TimeRange]]] = None cat_baseurl: Optional[str] = None cache: Optional[Any] = None footprint_buffer: float = 0 @@ -426,7 +426,7 @@ class InputData(base.InputData): default_preprocessing_task: Callable = staticmethod(EOProduct.from_stac_item) driver_config_model: Type[BaseDriverConfig] = BaseDriverConfig params: BaseDriverConfig - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] + time: Optional[Union[TimeRange, List[TimeRange]]] area: BaseGeometry _products: Optional[IndexedFeatures] = None diff --git a/mapchete_eo/platforms/sentinel2/config.py b/mapchete_eo/platforms/sentinel2/config.py index 8cd14e5d..a98210ed 100644 --- a/mapchete_eo/platforms/sentinel2/config.py +++ b/mapchete_eo/platforms/sentinel2/config.py @@ -110,7 +110,7 @@ class CacheConfig(BaseModel): class Sentinel2DriverConfig(BaseDriverConfig): format: str = "Sentinel-2" - time: Union[TimeRange, List[Optional[TimeRange]]] + time: Union[TimeRange, List[TimeRange]] # new source: List[Sentinel2Source] = [default_source] diff --git a/mapchete_eo/search/base.py b/mapchete_eo/search/base.py index d3e6b97b..9b630d8f 100644 --- a/mapchete_eo/search/base.py +++ b/mapchete_eo/search/base.py @@ -85,7 +85,7 @@ def stac_extensions(self) -> List[str]: ... @abstractmethod def search( self, - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, + time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index e7aeecd0..91eece0b 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -53,7 +53,7 @@ def stac_extensions(self) -> List[str]: def search( self, - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, + time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, @@ -69,11 +69,16 @@ def search( return def _searches() -> Generator[ItemSearch, None, None]: - for time_range in time if isinstance(time, list) else [time]: + def _search_chunks( + time_range: Optional[TimeRange] = None, + bounds: Optional[BoundsLike] = None, + area: Optional[BaseGeometry] = None, + query: Optional[str] = None, + ): search = self._search( time_range=time_range, bounds=bounds, - area=box(*area.bounds), + area=box(*area.bounds) if area else None, query=query, config=config, ) @@ -110,6 +115,23 @@ def _searches() -> Generator[ItemSearch, None, None]: else: yield search + if time: + # search time range(s) + for time_range in time if isinstance(time, list) else [time]: + yield from _search_chunks( + time_range=time_range, + bounds=bounds, + area=area, + query=query, + ) + else: + # don't apply temporal filter + yield from _search_chunks( + bounds=bounds, + area=area, + query=query, + ) + for search in _searches(): for item in search.items(): if item.get_self_href() in self.blacklist: # pragma: no cover diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index fe76392d..df5ec72f 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -53,7 +53,7 @@ def stac_extensions(self) -> List[str]: def search( self, - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, + time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, @@ -67,7 +67,7 @@ def search( def _raw_search( self, - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, + time: Optional[Union[TimeRange, List[TimeRange]]] = None, area: Optional[BaseGeometry] = None, ) -> Generator[Item, None, None]: if area is not None and area.is_empty: diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index 974b2fd6..bd99fa4b 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -48,7 +48,7 @@ def eo_bands(self) -> List[str]: # pragma: no cover def search( self, - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, + time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, query: Optional[str] = None, @@ -64,7 +64,7 @@ def search( def _raw_search( self, - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, + time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, config: UTMSearchConfig = UTMSearchConfig(), diff --git a/mapchete_eo/source.py b/mapchete_eo/source.py index 5076fd9b..9c3c8464 100644 --- a/mapchete_eo/source.py +++ b/mapchete_eo/source.py @@ -58,7 +58,7 @@ def _spatial_subset( def search( self, - time: Optional[Union[TimeRange, List[Optional[TimeRange]]]] = None, + time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[BoundsLike] = None, area: Optional[BaseGeometry] = None, base_dir: Optional[MPathLike] = None, diff --git a/tests/test_eostac.py b/tests/test_eostac.py index 99837dfb..5c5e1971 100644 --- a/tests/test_eostac.py +++ b/tests/test_eostac.py @@ -1,5 +1,6 @@ -import xarray as xr from mapchete.formats import available_input_formats +import pytest +import xarray as xr from mapchete_eo.product import eo_bands_to_band_locations @@ -38,6 +39,8 @@ def test_preprocessing(stac_mapchete): assert tile_mp.open("inp").products +@pytest.mark.remote +@pytest.mark.use_cdse_test_env def test_stac_read_xarray_dem(stac_cdse_copernicus_dem_mapchete, test_tile): with stac_cdse_copernicus_dem_mapchete.process_mp(tile=test_tile).open( "inp" From b392d77215c75749ec8f4f6cac0e5af9f1f07392 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 12 Nov 2025 09:01:22 +0100 Subject: [PATCH 40/46] no point in having private methods for this --- mapchete_eo/search/stac_search.py | 17 ++++----- mapchete_eo/search/stac_static.py | 57 +++++++++++++++---------------- mapchete_eo/search/utm_search.py | 41 +++++++++++----------- 3 files changed, 53 insertions(+), 62 deletions(-) diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 91eece0b..d81ca35c 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -37,7 +37,13 @@ def client(self) -> CollectionClient: @cached_property def eo_bands(self) -> List[str]: - return self._eo_bands() + item_assets = self.client.extra_fields.get("item_assets", {}) + for v in item_assets.values(): + if "eo:bands" in v and "data" in v.get("roles", []): + return ["eo:bands"] + else: # pragma: no cover + logger.debug("cannot find eo:bands definition from collections") + return [] @cached_property def id(self) -> str: @@ -141,15 +147,6 @@ def _search_chunks( continue yield item - def _eo_bands(self) -> List[str]: - item_assets = self.client.extra_fields.get("item_assets", {}) - for v in item_assets.values(): - if "eo:bands" in v and "data" in v.get("roles", []): - return ["eo:bands"] - else: # pragma: no cover - logger.debug("cannot find eo:bands definition from collections") - return [] - @cached_property def default_search_params(self): return { diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index df5ec72f..b43a8c85 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -37,7 +37,33 @@ def client(self) -> CollectionClient: @cached_property def eo_bands(self) -> List[str]: - return self._eo_bands() + eo_bands = self.client.extra_fields.get("properties", {}).get("eo:bands") + if eo_bands: + return eo_bands + else: + warnings.warn( + "Unable to read eo:bands definition from collection. " + "Trying now to get information from assets ..." + ) + # see if eo:bands can be found in properties + try: + item = next(self.client.get_items(recursive=True)) + eo_bands = item.properties.get("eo:bands") + if eo_bands: + return eo_bands + + # look through the assets and collect eo:bands + out = {} + for asset in item.assets.values(): + for eo_band in asset.extra_fields.get("eo:bands", []): + out[eo_band["name"]] = eo_band + if out: + return [v for v in out.values()] + except StopIteration: + pass + + logger.debug("cannot find eo:bands definition") + return [] @cached_property def id(self) -> str: @@ -90,35 +116,6 @@ def _raw_search( item.make_asset_hrefs_absolute() yield item - def _eo_bands(self) -> List[str]: - eo_bands = self.client.extra_fields.get("properties", {}).get("eo:bands") - if eo_bands: - return eo_bands - else: - warnings.warn( - "Unable to read eo:bands definition from collection. " - "Trying now to get information from assets ..." - ) - # see if eo:bands can be found in properties - try: - item = next(self.client.get_items(recursive=True)) - eo_bands = item.properties.get("eo:bands") - if eo_bands: - return eo_bands - - # look through the assets and collect eo:bands - out = {} - for asset in item.assets.values(): - for eo_band in asset.extra_fields.get("eo:bands", []): - out[eo_band["name"]] = eo_band - if out: - return [v for v in out.values()] - except StopIteration: - pass - - logger.debug("cannot find eo:bands definition") - return [] - def _all_intersecting_items( collection: Union[Catalog, Collection], diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index bd99fa4b..99df3a54 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -44,7 +44,25 @@ class UTMSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher): @cached_property def eo_bands(self) -> List[str]: # pragma: no cover - return self._eo_bands() + for ( + collection_properties + ) in UTMSearchConfig().sinergise_aws_collections.values(): + if collection_properties["id"] == self.collection.split("/")[-1]: + collection = Collection.from_dict( + collection_properties["path"].read_json() + ) + if collection: + summary = collection.summaries.to_dict() + if "eo:bands" in summary: + return summary["eo:bands"] + else: + raise ValueError(f"cannot find collection {collection}") + else: + logger.debug( + "cannot find eo:bands definition from collection %s", + self.collection, + ) + return [] def search( self, @@ -143,27 +161,6 @@ def _raw_search( elif area.intersects(shape(item.geometry)): yield item - def _eo_bands(self) -> list: - for ( - collection_properties - ) in UTMSearchConfig().sinergise_aws_collections.values(): - if collection_properties["id"] == self.collection.split("/")[-1]: - collection = Collection.from_dict( - collection_properties["path"].read_json() - ) - if collection: - summary = collection.summaries.to_dict() - if "eo:bands" in summary: - return summary["eo:bands"] - else: - raise ValueError(f"cannot find collection {collection}") - else: - logger.debug( - "cannot find eo:bands definition from collection %s", - self.collection, - ) - return [] - def get_collections(self): """ yeild transformed collection from: From c9a740d41170c88e0c880ea5994960e0713fa915 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 12 Nov 2025 09:59:49 +0100 Subject: [PATCH 41/46] make metadata parsing lazy --- .../sentinel2/preprocessing_tasks.py | 11 +++----- mapchete_eo/platforms/sentinel2/product.py | 25 +++++++++++-------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py index af06fdb0..84d9c921 100644 --- a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +++ b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py @@ -21,15 +21,12 @@ def parse_s2_product( source: Union[Sentinel2Source, None] = item.properties.pop( "mapchete_eo:source", None ) - metadata = None - if source is not None: - mapper = source.get_s2metadata_mapper() - if mapper: - metadata = mapper(item) - try: s2product = S2Product.from_stac_item( - item, cache_config=cache_config, cache_all=cache_all, metadata=metadata + item, + cache_config=cache_config, + cache_all=cache_all, + metadata_mapper=None if source is None else source.get_s2metadata_mapper(), ) except CorruptedProductMetadata as exc: add_to_blacklist(item.get_self_href()) diff --git a/mapchete_eo/platforms/sentinel2/product.py b/mapchete_eo/platforms/sentinel2/product.py index b44c7ddd..abb92302 100644 --- a/mapchete_eo/platforms/sentinel2/product.py +++ b/mapchete_eo/platforms/sentinel2/product.py @@ -1,16 +1,16 @@ from __future__ import annotations import logging -from typing import Dict, List, Optional, Union +from typing import Callable, Dict, List, Optional, Union import numpy as np import numpy.ma as ma -import pystac from mapchete.io.raster import ReferencedRaster, read_raster_window, resample_from_array from mapchete.geometry import reproject_geometry, buffer_antimeridian_safe from mapchete.path import MPath from mapchete.protocols import GridProtocol from mapchete.types import Bounds, Grid, NodataVals +from pystac import Item from rasterio.enums import Resampling from rasterio.features import rasterize from shapely.geometry import shape @@ -55,11 +55,11 @@ class Cache: - item: pystac.Item + item: Item config: CacheConfig path: MPath - def __init__(self, item: pystac.Item, config: CacheConfig): + def __init__(self, item: Item, config: CacheConfig): self.item = item self.config = config # TODO: maybe move this function here @@ -148,14 +148,16 @@ class S2Product(EOProduct, EOProductProtocol): def __init__( self, - item: pystac.Item, + item: Item, metadata: Optional[S2Metadata] = None, cache_config: Optional[CacheConfig] = None, + metadata_mapper: Optional[Callable[[Item], S2Metadata]] = None, ): self.item_dict = item.to_dict() self.id = item.id self._metadata = metadata + self._metadata_mapper = metadata_mapper self._scl_cache = dict() self.cache = Cache(item, cache_config) if cache_config else None @@ -166,7 +168,7 @@ def __init__( @classmethod def from_stac_item( self, - item: pystac.Item, + item: Item, cache_config: Optional[CacheConfig] = None, cache_all: bool = False, **kwargs, @@ -185,9 +187,12 @@ def from_stac_item( @property def metadata(self) -> S2Metadata: if not self._metadata: - self._metadata = S2Metadata.from_stac_item( - pystac.Item.from_dict(self.item_dict) - ) + if self._metadata_mapper: + self._metadata = self._metadata_mapper(Item.from_dict(self.item_dict)) + else: + self._metadata = S2Metadata.from_stac_item( + Item.from_dict(self.item_dict) + ) return self._metadata def __repr__(self): @@ -693,7 +698,7 @@ def _apply_brdf( return out_arr -def asset_name_to_l2a_band(item: pystac.Item, asset_name: str) -> L2ABand: +def asset_name_to_l2a_band(item: Item, asset_name: str) -> L2ABand: asset = item.assets[asset_name] asset_path = MPath(asset.href) band_name = asset_path.name.split(".")[0] From e7a178409bd41fd982f4644e12b2e6249e519c89 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 12 Nov 2025 10:22:55 +0100 Subject: [PATCH 42/46] don't bother searching if area is empty anyways --- mapchete_eo/base.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mapchete_eo/base.py b/mapchete_eo/base.py index af901b2a..7f611880 100644 --- a/mapchete_eo/base.py +++ b/mapchete_eo/base.py @@ -514,13 +514,16 @@ def _init_area(self, input_params: dict) -> BaseGeometry: def source_items(self) -> Generator[Item, None, None]: already_returned = set() for source in self.params.source: + area = reproject_geometry( + self.area, + src_crs=self.crs, + dst_crs=source.catalog_crs, + ) + if area.is_empty: + continue for item in source.search( time=self.time, - area=reproject_geometry( - self.area, - src_crs=self.crs, - dst_crs=source.catalog_crs, - ), + area=area, base_dir=self.conf_dir, ): # if item was already found in previous source, skip From 886af713c9550e417b9e85f62258c29004588894 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Wed, 12 Nov 2025 11:33:55 +0100 Subject: [PATCH 43/46] enable lazy loading stac item --- .../sentinel2/preprocessing_tasks.py | 3 ++ mapchete_eo/platforms/sentinel2/product.py | 31 +++++++++++++++---- mapchete_eo/product.py | 20 +++++++----- mapchete_eo/settings.py | 1 + 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py index 84d9c921..6caf1f7f 100644 --- a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +++ b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py @@ -8,6 +8,7 @@ from mapchete_eo.platforms.sentinel2.product import S2Product from mapchete_eo.platforms.sentinel2.source import Sentinel2Source from mapchete_eo.product import add_to_blacklist +from mapchete_eo.settings import mapchete_eo_settings logger = logging.getLogger(__name__) @@ -27,6 +28,8 @@ def parse_s2_product( cache_config=cache_config, cache_all=cache_all, metadata_mapper=None if source is None else source.get_s2metadata_mapper(), + item_modifier_funcs=None if source is None else source.item_modifier_funcs, + lazy_load_item=mapchete_eo_settings.lazy_load_stac_items, ) except CorruptedProductMetadata as exc: add_to_blacklist(item.get_self_href()) diff --git a/mapchete_eo/platforms/sentinel2/product.py b/mapchete_eo/platforms/sentinel2/product.py index abb92302..75389c18 100644 --- a/mapchete_eo/platforms/sentinel2/product.py +++ b/mapchete_eo/platforms/sentinel2/product.py @@ -142,7 +142,7 @@ def get_brdf_grid(self, band: L2ABand): class S2Product(EOProduct, EOProductProtocol): - item_dict: dict + _item_dict: Optional[dict] = None cache: Optional[Cache] = None _scl_cache: Dict[GridProtocol, np.ndarray] @@ -152,12 +152,19 @@ def __init__( metadata: Optional[S2Metadata] = None, cache_config: Optional[CacheConfig] = None, metadata_mapper: Optional[Callable[[Item], S2Metadata]] = None, + item_modifier_funcs: Optional[List[Callable[[Item], Item]]] = None, + lazy_load_item: bool = False, ): - self.item_dict = item.to_dict() + if lazy_load_item: + self._item_dict = None + else: + self._item_dict = item.to_dict() + self.item_href = item.self_href self.id = item.id self._metadata = metadata self._metadata_mapper = metadata_mapper + self._item_modifier_funcs = item_modifier_funcs self._scl_cache = dict() self.cache = Cache(item, cache_config) if cache_config else None @@ -184,15 +191,25 @@ def from_stac_item( return s2product + @property + def item(self) -> Item: + if not self._item: + if self._item_dict: + self._item = Item.from_dict(self._item_dict) + else: + item = Item.from_file(self.item_href) + for modifier in self._item_modifier_funcs or []: + item = modifier(item) + self._item = item + return self._item + @property def metadata(self) -> S2Metadata: if not self._metadata: if self._metadata_mapper: - self._metadata = self._metadata_mapper(Item.from_dict(self.item_dict)) + self._metadata = self._metadata_mapper(self.item) else: - self._metadata = S2Metadata.from_stac_item( - Item.from_dict(self.item_dict) - ) + self._metadata = S2Metadata.from_stac_item(self.item) return self._metadata def __repr__(self): @@ -202,6 +219,8 @@ def clear_cached_data(self): if self._metadata is not None: self._metadata.clear_cached_data() self._metadata = None + if self._item is not None: + self._item = None self._scl_cache = dict() def read_np_array( diff --git a/mapchete_eo/product.py b/mapchete_eo/product.py index 5391cccd..1f5af7e7 100644 --- a/mapchete_eo/product.py +++ b/mapchete_eo/product.py @@ -5,7 +5,7 @@ import numpy as np import numpy.ma as ma -import pystac +from pystac import Item import xarray as xr from mapchete import Timer from mapchete.io.raster import ReferencedRaster @@ -26,15 +26,17 @@ class EOProduct(EOProductProtocol): - """Wrapper class around a pystac.Item which provides read functions.""" + """Wrapper class around a Item which provides read functions.""" default_dtype: DTypeLike = np.uint16 + _item: Optional[Item] = None - def __init__(self, item: pystac.Item): + def __init__(self, item: Item): self.item_dict = item.to_dict() self.__geo_interface__ = self.item.geometry self.bounds = Bounds.from_inp(shape(self)) self.crs = mapchete_eo_settings.default_catalog_crs + self._item = None def __repr__(self): return f"" @@ -43,11 +45,13 @@ def clear_cached_data(self): pass @property - def item(self) -> pystac.Item: - return pystac.Item.from_dict(self.item_dict) + def item(self) -> Item: + if not self._item: + self._item = Item.from_dict(self.item_dict) + return self._item @classmethod - def from_stac_item(self, item: pystac.Item, **kwargs) -> EOProduct: + def from_stac_item(self, item: Item, **kwargs) -> EOProduct: return EOProduct(item) def get_mask(self) -> ReferencedRaster: ... @@ -171,7 +175,7 @@ def assets_eo_bands_to_band_locations( def eo_bands_to_band_locations( - item: pystac.Item, + item: Item, eo_bands: List[str], role: Literal["data", "reflectance", "visual"] = "data", ) -> List[BandLocation]: @@ -182,7 +186,7 @@ def eo_bands_to_band_locations( def find_eo_band( - item: pystac.Item, + item: Item, eo_band_name: str, role: Literal["data", "reflectance", "visual"] = "data", ) -> BandLocation: diff --git a/mapchete_eo/settings.py b/mapchete_eo/settings.py index a44171a5..b5b96e53 100644 --- a/mapchete_eo/settings.py +++ b/mapchete_eo/settings.py @@ -16,6 +16,7 @@ class Settings(BaseSettings): default_cache_location: MPathLike = MPath("s3://eox-mhub-cache/") default_catalog_crs: CRS = CRS.from_epsg(4326) blacklist: Optional[MPathLike] = None + lazy_load_stac_items: bool = True # read from environment model_config = SettingsConfigDict(env_prefix="MAPCHETE_EO_") From 098701b7bb5e03c3176067c8c8d4eba6c3869ddd Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 13 Nov 2025 12:00:18 +0100 Subject: [PATCH 44/46] extend lazy item loads capabilities by adding an item properties cache --- mapchete_eo/io/products.py | 43 +++++++++---------- .../sentinel2/preprocessing_tasks.py | 10 +++++ mapchete_eo/platforms/sentinel2/product.py | 13 +++++- mapchete_eo/product.py | 2 + mapchete_eo/protocols.py | 5 +++ mapchete_eo/sort.py | 10 ++--- 6 files changed, 53 insertions(+), 30 deletions(-) diff --git a/mapchete_eo/io/products.py b/mapchete_eo/io/products.py index 59acf6c2..4c23fee0 100644 --- a/mapchete_eo/io/products.py +++ b/mapchete_eo/io/products.py @@ -29,7 +29,6 @@ EmptyStackException, NoSourceProducts, ) -from mapchete_eo.io.items import get_item_property from mapchete_eo.protocols import EOProductProtocol from mapchete_eo.sort import SortMethodConfig from mapchete_eo.types import MergeMethod @@ -118,7 +117,8 @@ def products_to_xarray( coords = { slice_axis_name: list( np.array( - [product.item.datetime for product in products], dtype=np.datetime64 + [product.get_property("datetime") for product in products], + dtype=np.datetime64, ) ) } @@ -132,7 +132,6 @@ class Slice: """Combine multiple products into one slice.""" name: Any - properties: dict products: Sequence[EOProductProtocol] datetime: datetime @@ -151,21 +150,13 @@ def __init__( # calculate mean datetime timestamps = [ - product.item.datetime.timestamp() + product.get_property("datetime").timestamp() for product in self.products - if product.item.datetime + if product.get_property("datetime") ] mean_timestamp = sum(timestamps) / len(timestamps) self.datetime = datetime.fromtimestamp(mean_timestamp) - # generate combined properties - self.properties = {} - for key in self.products[0].item.properties.keys(): - try: - self.properties[key] = self.get_property(key) - except ValueError: - self.properties[key] = None - def __repr__(self) -> str: return f"" @@ -178,6 +169,17 @@ def __geom_interface__(self) -> Dict: raise EmptySliceException + @property + def properties(self) -> Dict[str, Any]: + # generate combined properties + properties: Dict[str, Any] = {} + for key in self.products[0].item.properties.keys(): + try: + self.properties[key] = self.get_property(key) + except ValueError: + self.properties[key] = None + return properties + @contextmanager def cached(self) -> Generator[Slice, None, None]: """Clear caches and run garbage collector when context manager is closed.""" @@ -200,12 +202,9 @@ def get_property(self, property: str) -> Any: ValueError is raised. """ # if set of value hashes has a length of 1, all values are the same - values = [ - get_hash(get_item_property(product.item, property=property)) - for product in self.products - ] + values = [get_hash(product.get_property(property)) for product in self.products] if len(set(values)) == 1: - return get_item_property(self.products[0].item, property=property) + return self.products[0].get_property(property) raise ValueError( f"cannot get unique property {property} from products {self.products}" @@ -238,7 +237,7 @@ def products_to_slices( grouped[product.get_property(group_by_property)].append(product) slices = [Slice(key, products) for key, products in grouped.items()] else: - slices = [Slice(product.item.id, [product]) for product in products] + slices = [Slice(product.id, [product]) for product in products] # also check if slices is even a list, otherwise it will raise an error if sort and slices: @@ -268,9 +267,7 @@ def read_remaining_valid_products( try: yield product.read_np_array(**product_read_kwargs) except (AssetKeyError, CorruptedProduct) as exc: - logger.warning( - "skip product %s because of %s", product.item.id, exc - ) + logger.warning("skip product %s because of %s", product.id, exc) except StopIteration: return @@ -288,7 +285,7 @@ def read_remaining_valid_products( out = product.read_np_array(**product_read_kwargs) break except (AssetKeyError, CorruptedProduct) as exc: - logger.warning("skip product %s because of %s", product.item.id, exc) + logger.warning("skip product %s because of %s", product.id, exc) else: # we cannot do anything here, as all products are broken raise CorruptedSlice("all products are broken here") diff --git a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py index 6caf1f7f..6ade0028 100644 --- a/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +++ b/mapchete_eo/platforms/sentinel2/preprocessing_tasks.py @@ -4,6 +4,7 @@ import pystac from mapchete_eo.exceptions import CorruptedProductMetadata +from mapchete_eo.io.items import get_item_property from mapchete_eo.platforms.sentinel2.config import CacheConfig from mapchete_eo.platforms.sentinel2.product import S2Product from mapchete_eo.platforms.sentinel2.source import Sentinel2Source @@ -30,6 +31,15 @@ def parse_s2_product( metadata_mapper=None if source is None else source.get_s2metadata_mapper(), item_modifier_funcs=None if source is None else source.item_modifier_funcs, lazy_load_item=mapchete_eo_settings.lazy_load_stac_items, + item_property_cache={ + key: get_item_property(item, key) + for key in [ + "datetime", + "eo:cloud_cover", + "id", + "s2:datastrip_id", + ] + }, ) except CorruptedProductMetadata as exc: add_to_blacklist(item.get_self_href()) diff --git a/mapchete_eo/platforms/sentinel2/product.py b/mapchete_eo/platforms/sentinel2/product.py index 75389c18..b2e4d91c 100644 --- a/mapchete_eo/platforms/sentinel2/product.py +++ b/mapchete_eo/platforms/sentinel2/product.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union import numpy as np import numpy.ma as ma @@ -17,6 +17,7 @@ from mapchete_eo.array.buffer import buffer_array +from mapchete_eo.io.items import get_item_property from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels from mapchete_eo.platforms.sentinel2.brdf.correction import apply_correction from mapchete_eo.exceptions import ( @@ -145,6 +146,7 @@ class S2Product(EOProduct, EOProductProtocol): _item_dict: Optional[dict] = None cache: Optional[Cache] = None _scl_cache: Dict[GridProtocol, np.ndarray] + _item_property_cache: Dict[str, Any] def __init__( self, @@ -154,6 +156,7 @@ def __init__( metadata_mapper: Optional[Callable[[Item], S2Metadata]] = None, item_modifier_funcs: Optional[List[Callable[[Item], Item]]] = None, lazy_load_item: bool = False, + item_property_cache: Optional[Dict[str, Any]] = None, ): if lazy_load_item: self._item_dict = None @@ -166,6 +169,7 @@ def __init__( self._metadata_mapper = metadata_mapper self._item_modifier_funcs = item_modifier_funcs self._scl_cache = dict() + self._item_property_cache = item_property_cache or dict() self.cache = Cache(item, cache_config) if cache_config else None self.__geo_interface__ = item.geometry @@ -221,6 +225,7 @@ def clear_cached_data(self): self._metadata = None if self._item is not None: self._item = None + self._item_property_cache = dict() self._scl_cache = dict() def read_np_array( @@ -624,6 +629,12 @@ def _check_full(arr): bounds=grid.bounds, ) + def get_property(self, name: str) -> Any: + if name not in self._item_property_cache: + raise KeyError(name) + self._item_property_cache[name] = get_item_property(self.item, name) + return self._item_property_cache[name] + def _apply_sentinel2_bandpass_adjustment( self, uncorrected: ma.MaskedArray, assets: List[str], computing_dtype=np.float32 ) -> ma.MaskedArray: diff --git a/mapchete_eo/product.py b/mapchete_eo/product.py index 1f5af7e7..32392c0c 100644 --- a/mapchete_eo/product.py +++ b/mapchete_eo/product.py @@ -28,6 +28,7 @@ class EOProduct(EOProductProtocol): """Wrapper class around a Item which provides read functions.""" + id: str default_dtype: DTypeLike = np.uint16 _item: Optional[Item] = None @@ -37,6 +38,7 @@ def __init__(self, item: Item): self.bounds = Bounds.from_inp(shape(self)) self.crs = mapchete_eo_settings.default_catalog_crs self._item = None + self.id = item.id def __repr__(self): return f"" diff --git a/mapchete_eo/protocols.py b/mapchete_eo/protocols.py index 611884c8..4e2f8832 100644 --- a/mapchete_eo/protocols.py +++ b/mapchete_eo/protocols.py @@ -15,6 +15,7 @@ class EOProductProtocol(Protocol): + id: str bounds: Bounds crs: CRS __geo_interface__: Optional[Dict[str, Any]] @@ -54,3 +55,7 @@ def item(self) -> pystac.Item: ... class DateTimeProtocol(Protocol): datetime: DateTimeLike + + +class GetPropertyProtocol(Protocol): + def get_property(self, property: str) -> Any: ... diff --git a/mapchete_eo/sort.py b/mapchete_eo/sort.py index f2a0e17c..8b5e30ad 100644 --- a/mapchete_eo/sort.py +++ b/mapchete_eo/sort.py @@ -5,10 +5,8 @@ from typing import Callable, List, Optional from pydantic import BaseModel -from pystac import Item -from mapchete_eo.io.items import get_item_property -from mapchete_eo.protocols import DateTimeProtocol +from mapchete_eo.protocols import DateTimeProtocol, GetPropertyProtocol from mapchete_eo.time import timedelta, to_datetime from mapchete_eo.types import DateTimeLike @@ -51,11 +49,11 @@ class TargetDateSort(SortMethodConfig): def sort_objects_by_cloud_cover( - objects: List[Item], reverse: bool = False -) -> List[Item]: + objects: List[GetPropertyProtocol], reverse: bool = False +) -> List[GetPropertyProtocol]: if len(objects) == 0: # pragma: no cover return objects - objects.sort(key=lambda x: get_item_property(x, "eo:cloud_cover"), reverse=reverse) + objects.sort(key=lambda x: x.get_property("eo:cloud_cover"), reverse=reverse) return objects From b25fde6c2db99c39e021bdc8b012f026c6c7f7d1 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 13 Nov 2025 12:08:30 +0100 Subject: [PATCH 45/46] remove dev KeyError --- mapchete_eo/platforms/sentinel2/product.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mapchete_eo/platforms/sentinel2/product.py b/mapchete_eo/platforms/sentinel2/product.py index b2e4d91c..d85a4224 100644 --- a/mapchete_eo/platforms/sentinel2/product.py +++ b/mapchete_eo/platforms/sentinel2/product.py @@ -631,7 +631,6 @@ def _check_full(arr): def get_property(self, name: str) -> Any: if name not in self._item_property_cache: - raise KeyError(name) self._item_property_cache[name] = get_item_property(self.item, name) return self._item_property_cache[name] From 457b90351e9528c3501c729b7c23a19dc1fbf3b5 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Thu, 13 Nov 2025 13:22:48 +0100 Subject: [PATCH 46/46] fix recursion --- mapchete_eo/io/products.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapchete_eo/io/products.py b/mapchete_eo/io/products.py index 4c23fee0..424a7333 100644 --- a/mapchete_eo/io/products.py +++ b/mapchete_eo/io/products.py @@ -175,9 +175,9 @@ def properties(self) -> Dict[str, Any]: properties: Dict[str, Any] = {} for key in self.products[0].item.properties.keys(): try: - self.properties[key] = self.get_property(key) + properties[key] = self.get_property(key) except ValueError: - self.properties[key] = None + properties[key] = None return properties @contextmanager