From 258a97b35cd7ddad836cfbf3ca1eda3333b46485 Mon Sep 17 00:00:00 2001 From: Scartography Date: Wed, 22 Oct 2025 16:29:20 +0200 Subject: [PATCH 1/7] revisit the stac search max_cloud_cover being somewhat hardcoded --- mapchete_eo/search/config.py | 7 ------ .../search/platforms/sentinel2/config.py | 16 ++++++++++++++ mapchete_eo/search/stac_search.py | 22 +++++++++++++++---- mapchete_eo/search/stac_static.py | 8 +------ mapchete_eo/search/utm_search.py | 14 +++++++----- 5 files changed, 43 insertions(+), 24 deletions(-) create mode 100644 mapchete_eo/search/platforms/sentinel2/config.py diff --git a/mapchete_eo/search/config.py b/mapchete_eo/search/config.py index 39690600..2dcd647d 100644 --- a/mapchete_eo/search/config.py +++ b/mapchete_eo/search/config.py @@ -5,20 +5,13 @@ class StacSearchConfig(BaseModel): - max_cloud_cover: float = 100.0 catalog_chunk_threshold: int = 10_000 catalog_chunk_zoom: int = 5 catalog_pagesize: int = 100 footprint_buffer: float = 0 -class StacStaticConfig(BaseModel): - max_cloud_cover: float = 100.0 - - class UTMSearchConfig(BaseModel): - max_cloud_cover: float = 100.0 - sinergise_aws_collections: dict = dict( S2_L2A=dict( id="sentinel-s2-l2a", diff --git a/mapchete_eo/search/platforms/sentinel2/config.py b/mapchete_eo/search/platforms/sentinel2/config.py new file mode 100644 index 00000000..acd68f98 --- /dev/null +++ b/mapchete_eo/search/platforms/sentinel2/config.py @@ -0,0 +1,16 @@ +from pydantic import Field + +from typing import List + +from mapchete_eo.search.config import StacSearchConfig, UTMSearchConfig + + +ALLOWED_SENTINEL2_QUERIES_LIST: List[str] = ["eo:cloud_cover"] + + +class Sentinel2STACSearchQueryablesConfig(StacSearchConfig): + max_cloud_cover: float = Field(100.0, serialization_alias="eo:cloud_cover") + + +class Sentinel2UTMSearchQueryablesConfig(UTMSearchConfig): + max_cloud_cover: float = Field(100.0, serialization_alias="eo:cloud_cover") diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index 8b782bf7..ec007fc2 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -13,11 +13,16 @@ from shapely.geometry.base import BaseGeometry from mapchete_eo.product import blacklist_products + from mapchete_eo.search.base import CatalogSearcher, StaticCatalogWriterMixin -from mapchete_eo.search.config import StacSearchConfig from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import TimeRange +from mapchete_eo.search.platforms.sentinel2.config import ( + ALLOWED_SENTINEL2_QUERIES_LIST, + Sentinel2STACSearchQueryablesConfig, +) + logger = logging.getLogger(__name__) @@ -28,7 +33,7 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher): if mapchete_eo_settings.blacklist else set() ) - config_cls = StacSearchConfig + config_cls = Sentinel2STACSearchQueryablesConfig def __init__( self, @@ -155,7 +160,8 @@ def _search( time_range: Optional[TimeRange] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, - config: StacSearchConfig = StacSearchConfig(), + config: Sentinel2STACSearchQueryablesConfig = Sentinel2STACSearchQueryablesConfig(), + stac_query: Union[Sentinel2STACSearchQueryablesConfig, List] = [], **kwargs, ): if time_range is None: # pragma: no cover @@ -180,10 +186,18 @@ def _search( if isinstance(time_range.end, datetime) else time_range.end ) + + for i in ALLOWED_SENTINEL2_QUERIES_LIST: + for k, v in config.model_dump(by_alias=True).items(): + if k == i: + if "cloud_cover" in k: + stac_query.append(f"{k}<={v}") + + # query = generate_stac_query_str(self) search_params = dict( self.default_search_params, datetime=f"{start}/{end}", - query=[f"eo:cloud_cover<={config.max_cloud_cover}"], + query=stac_query, **kwargs, ) if ( diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index 3fbc6ac0..59aafe0e 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -19,7 +19,6 @@ StaticCatalogWriterMixin, filter_items, ) -from mapchete_eo.search.config import StacStaticConfig from mapchete_eo.time import time_ranges_intersect from mapchete_eo.types import TimeRange @@ -30,8 +29,6 @@ class STACStaticCatalog(StaticCatalogWriterMixin, CatalogSearcher): - config_cls = StacStaticConfig - def __init__( self, baseurl: MPathLike, @@ -68,10 +65,7 @@ def search( if area is None and bounds: bounds = Bounds.from_inp(bounds) area = shape(bounds) - for item in filter_items( - self._raw_search(time=time, area=area), - max_cloud_cover=config.max_cloud_cover, - ): + for item in filter_items(self._raw_search(time=time, area=area), **config): yield item def _raw_search( diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index 69796b6f..2409ecec 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -19,7 +19,9 @@ StaticCatalogWriterMixin, filter_items, ) -from mapchete_eo.search.config import UTMSearchConfig +from mapchete_eo.search.platforms.sentinel2.config import ( + Sentinel2UTMSearchQueryablesConfig, +) from mapchete_eo.search.s2_mgrs import S2Tile, s2_tiles_from_bounds from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.time import day_range, to_datetime @@ -40,7 +42,7 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher): if mapchete_eo_settings.blacklist else set() ) - config_cls = UTMSearchConfig + config_cls = Sentinel2UTMSearchQueryablesConfig def __init__( self, @@ -80,7 +82,7 @@ def _raw_search( time: Optional[Union[TimeRange, List[TimeRange]]] = None, bounds: Optional[Bounds] = None, area: Optional[BaseGeometry] = None, - config: UTMSearchConfig = UTMSearchConfig(), + config: Sentinel2UTMSearchQueryablesConfig = Sentinel2UTMSearchQueryablesConfig(), ) -> Generator[Item, None, None]: if time is None: raise ValueError("time must be given") @@ -153,9 +155,9 @@ def _raw_search( def _eo_bands(self) -> list: for collection_name in self.collections: - for ( - collection_properties - ) in UTMSearchConfig().sinergise_aws_collections.values(): + for collection_properties in ( + Sentinel2UTMSearchQueryablesConfig().sinergise_aws_collections.values() + ): if collection_properties["id"] == collection_name: collection = Collection.from_dict( collection_properties["path"].read_json() From a8bb5b19e539692ae2fa9fdd97eb7d0c2eddacef Mon Sep 17 00:00:00 2001 From: Scartography Date: Wed, 22 Oct 2025 16:56:25 +0200 Subject: [PATCH 2/7] make search config adaptive to sentinel-2, maybe think about even more fluid dynamic --- mapchete_eo/search/stac_search.py | 11 ++++++++--- mapchete_eo/search/stac_static.py | 8 ++++++++ mapchete_eo/search/utm_search.py | 6 +++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index ec007fc2..36e69ee6 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -14,10 +14,11 @@ from mapchete_eo.product import blacklist_products -from mapchete_eo.search.base import CatalogSearcher, StaticCatalogWriterMixin from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import TimeRange +from mapchete_eo.search.base import CatalogSearcher, StaticCatalogWriterMixin +from mapchete_eo.search.config import StacSearchConfig from mapchete_eo.search.platforms.sentinel2.config import ( ALLOWED_SENTINEL2_QUERIES_LIST, Sentinel2STACSearchQueryablesConfig, @@ -33,7 +34,8 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher): if mapchete_eo_settings.blacklist else set() ) - config_cls = Sentinel2STACSearchQueryablesConfig + + config_cls = StacSearchConfig def __init__( self, @@ -76,7 +78,11 @@ def search( area: Optional[BaseGeometry] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: + if [s for s in self.collections if "sentinel-2" or "sentinel-s2" in s]: + self.config_cls = Sentinel2STACSearchQueryablesConfig + config = self.config_cls(**search_kwargs or {}) + if bounds: bounds = Bounds.from_inp(bounds) if time is None: # pragma: no cover @@ -193,7 +199,6 @@ def _search( if "cloud_cover" in k: stac_query.append(f"{k}<={v}") - # query = generate_stac_query_str(self) search_params = dict( self.default_search_params, datetime=f"{start}/{end}", diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index 59aafe0e..aa69eecb 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -22,6 +22,10 @@ from mapchete_eo.time import time_ranges_intersect from mapchete_eo.types import TimeRange +from mapchete_eo.search.platforms.sentinel2.config import ( + Sentinel2STACSearchQueryablesConfig, +) + logger = logging.getLogger(__name__) @@ -61,7 +65,11 @@ def search( area: Optional[BaseGeometry] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: + if [s for s in self.collections if "sentinel-2" or "sentinel-s2" in s]: + self.config_cls = Sentinel2STACSearchQueryablesConfig + config = self.config_cls(**search_kwargs or {}) + if area is None and bounds: bounds = Bounds.from_inp(bounds) area = shape(bounds) diff --git a/mapchete_eo/search/utm_search.py b/mapchete_eo/search/utm_search.py index 2409ecec..5f963ed2 100644 --- a/mapchete_eo/search/utm_search.py +++ b/mapchete_eo/search/utm_search.py @@ -19,6 +19,7 @@ StaticCatalogWriterMixin, filter_items, ) +from mapchete_eo.search.config import UTMSearchConfig from mapchete_eo.search.platforms.sentinel2.config import ( Sentinel2UTMSearchQueryablesConfig, ) @@ -42,7 +43,7 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher): if mapchete_eo_settings.blacklist else set() ) - config_cls = Sentinel2UTMSearchQueryablesConfig + config_cls = UTMSearchConfig def __init__( self, @@ -67,6 +68,9 @@ def search( area: Optional[BaseGeometry] = None, search_kwargs: Optional[Dict[str, Any]] = None, ) -> Generator[Item, None, None]: + if [s for s in self.collections if "sentinel-2" or "sentinel-s2" in s]: + self.config_cls = Sentinel2UTMSearchQueryablesConfig + config = self.config_cls(**search_kwargs or {}) if bounds: bounds = Bounds.from_inp(bounds) From 66f059a96f5f272dc75fd964e1bc906e910c8edc Mon Sep 17 00:00:00 2001 From: Scartography Date: Wed, 22 Oct 2025 17:04:46 +0200 Subject: [PATCH 3/7] add search_kwargs=dict(max_cloud_cover=80) to test_known_catalogs --- tests/test_known_catalogs.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_known_catalogs.py b/tests/test_known_catalogs.py index 709df55d..20490e5a 100644 --- a/tests/test_known_catalogs.py +++ b/tests/test_known_catalogs.py @@ -20,6 +20,7 @@ def test_e84_cog_catalog_search_items(e84_cog_catalog): end="2022-06-06", ), bounds=[16, 46, 17, 47], + search_kwargs=dict(max_cloud_cover=80), ) ) ) @@ -44,6 +45,7 @@ def test_utm_search_catalog_search_items(utm_search_catalog): end="2022-06-05", ), bounds=[-180, 65, -179, 65.3], + search_kwargs=dict(max_cloud_cover=80), ) ) ) @@ -71,5 +73,6 @@ def test_known_catalogs(catalog_cls, collection_name): end="2022-06-05", ), bounds=[-180, 65, -179, 65.3], + search_kwargs=dict(max_cloud_cover=80), ) assert items From 88765517007fd232b0fe47a41f0e2426303e133d Mon Sep 17 00:00:00 2001 From: Scartography Date: Wed, 22 Oct 2025 17:17:07 +0200 Subject: [PATCH 4/7] remove EOSTAC misc content --- mapchete_eo/eostac.py | 30 ------------------------------ tests/test_eostac.py | 38 -------------------------------------- 2 files changed, 68 deletions(-) delete mode 100644 mapchete_eo/eostac.py delete mode 100644 tests/test_eostac.py diff --git a/mapchete_eo/eostac.py b/mapchete_eo/eostac.py deleted file mode 100644 index 8cb51d00..00000000 --- a/mapchete_eo/eostac.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -Driver class for EOSTAC static STAC catalogs. -""" - -from mapchete_eo import base - -METADATA: dict = { - "driver_name": "EOSTAC_DEV", - "data_type": None, - "mode": "r", - "file_extensions": [], -} - - -class InputTile(base.EODataCube): - """ - Target Tile representation of input data. - - Parameters - ---------- - tile : ``Tile`` - kwargs : keyword arguments - driver specific parameters - """ - - -class InputData(base.InputData): - """In case this driver is used when being a readonly input to another process.""" - - input_tile_cls = InputTile diff --git a/tests/test_eostac.py b/tests/test_eostac.py deleted file mode 100644 index 63d8d05e..00000000 --- a/tests/test_eostac.py +++ /dev/null @@ -1,38 +0,0 @@ -import xarray as xr -from mapchete.formats import available_input_formats - -from mapchete_eo.product import eo_bands_to_band_locations - - -# TODO: -# --> PF Elias: -# DataSet -# per band 1 DataArray -# each DataArray has 3 dimensions: time, x, y -def test_pf_eo_bands_to_band_locations(pf_sr_stac_item): - eo_bands = ["B3", "B2", "B4"] - band_locations = eo_bands_to_band_locations(pf_sr_stac_item, eo_bands) - assert len(eo_bands) == len(band_locations) - for band_index, band_location in zip([1, 2, 4], band_locations): - assert band_location.asset_name == "bands" - assert band_location.band_index == band_index - - -def test_format_available(): - assert "EOSTAC_DEV" in available_input_formats() - - -def test_stac_read_xarray(stac_mapchete, test_tile): - with stac_mapchete.process_mp(tile=test_tile).open("inp") as src: - cube = src.read(assets=["red", "green", "blue"]) - assert isinstance(cube, xr.Dataset) - assert cube.to_array().any() - - -def test_preprocessing(stac_mapchete): - mp = stac_mapchete.mp() - input_data = list(mp.config.inputs.values())[0] - assert input_data.products - - tile_mp = stac_mapchete.process_mp() - assert tile_mp.open("inp").products From 721d984d07f17e442e13e9c48385e6d233d6b023 Mon Sep 17 00:00:00 2001 From: Scartography Date: Wed, 22 Oct 2025 17:19:14 +0200 Subject: [PATCH 5/7] add kwargs for filter_items function in search --- mapchete_eo/search/base.py | 1 + mapchete_eo/search/stac_static.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/mapchete_eo/search/base.py b/mapchete_eo/search/base.py index 65a93b12..b20fb4c7 100644 --- a/mapchete_eo/search/base.py +++ b/mapchete_eo/search/base.py @@ -224,6 +224,7 @@ def filter_items( items: Generator[Item, None, None], cloud_cover_field: str = "eo:cloud_cover", max_cloud_cover: float = 100.0, + **kwargs, ) -> Generator[Item, None, None]: """ Only for cloudcover now, this can and should be adapted for filter field and value diff --git a/mapchete_eo/search/stac_static.py b/mapchete_eo/search/stac_static.py index aa69eecb..41da5d4a 100644 --- a/mapchete_eo/search/stac_static.py +++ b/mapchete_eo/search/stac_static.py @@ -73,7 +73,9 @@ def search( if area is None and bounds: bounds = Bounds.from_inp(bounds) area = shape(bounds) - for item in filter_items(self._raw_search(time=time, area=area), **config): + for item in filter_items( + self._raw_search(time=time, area=area), **config.model_dump() + ): yield item def _raw_search( From 74a738c37e6d31d2d99e6e89710322e445c46abd Mon Sep 17 00:00:00 2001 From: Scartography Date: Wed, 22 Oct 2025 17:25:54 +0200 Subject: [PATCH 6/7] remove eostac relict of local dev --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 24a674fb..e6fecb6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,6 @@ test = [ eo = "mapchete_eo.cli:eo" [project.entry-points."mapchete.formats.drivers"] -eostac_dev = "mapchete_eo.eostac" sentinel2 = "mapchete_eo.platforms.sentinel2" [project.entry-points."mapchete.processes"] From 7f34bb13837447e23d498e9faf936cb8f1468a96 Mon Sep 17 00:00:00 2001 From: Scartography Date: Thu, 23 Oct 2025 10:38:48 +0200 Subject: [PATCH 7/7] reintroduce eostac; dunno why I removed it --- mapchete_eo/eostac.py | 30 ++++++++++++++++++++++++++++++ pyproject.toml | 1 + tests/test_eostac.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 mapchete_eo/eostac.py create mode 100644 tests/test_eostac.py diff --git a/mapchete_eo/eostac.py b/mapchete_eo/eostac.py new file mode 100644 index 00000000..8cb51d00 --- /dev/null +++ b/mapchete_eo/eostac.py @@ -0,0 +1,30 @@ +""" +Driver class for EOSTAC static STAC catalogs. +""" + +from mapchete_eo import base + +METADATA: dict = { + "driver_name": "EOSTAC_DEV", + "data_type": None, + "mode": "r", + "file_extensions": [], +} + + +class InputTile(base.EODataCube): + """ + Target Tile representation of input data. + + Parameters + ---------- + tile : ``Tile`` + kwargs : keyword arguments + driver specific parameters + """ + + +class InputData(base.InputData): + """In case this driver is used when being a readonly input to another process.""" + + input_tile_cls = InputTile diff --git a/pyproject.toml b/pyproject.toml index e6fecb6b..24a674fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ test = [ eo = "mapchete_eo.cli:eo" [project.entry-points."mapchete.formats.drivers"] +eostac_dev = "mapchete_eo.eostac" sentinel2 = "mapchete_eo.platforms.sentinel2" [project.entry-points."mapchete.processes"] diff --git a/tests/test_eostac.py b/tests/test_eostac.py new file mode 100644 index 00000000..63d8d05e --- /dev/null +++ b/tests/test_eostac.py @@ -0,0 +1,38 @@ +import xarray as xr +from mapchete.formats import available_input_formats + +from mapchete_eo.product import eo_bands_to_band_locations + + +# TODO: +# --> PF Elias: +# DataSet +# per band 1 DataArray +# each DataArray has 3 dimensions: time, x, y +def test_pf_eo_bands_to_band_locations(pf_sr_stac_item): + eo_bands = ["B3", "B2", "B4"] + band_locations = eo_bands_to_band_locations(pf_sr_stac_item, eo_bands) + assert len(eo_bands) == len(band_locations) + for band_index, band_location in zip([1, 2, 4], band_locations): + assert band_location.asset_name == "bands" + assert band_location.band_index == band_index + + +def test_format_available(): + assert "EOSTAC_DEV" in available_input_formats() + + +def test_stac_read_xarray(stac_mapchete, test_tile): + with stac_mapchete.process_mp(tile=test_tile).open("inp") as src: + cube = src.read(assets=["red", "green", "blue"]) + assert isinstance(cube, xr.Dataset) + assert cube.to_array().any() + + +def test_preprocessing(stac_mapchete): + mp = stac_mapchete.mp() + input_data = list(mp.config.inputs.values())[0] + assert input_data.products + + tile_mp = stac_mapchete.process_mp() + assert tile_mp.open("inp").products