Skip to content

Commit af7ff94

Browse files
Landsat storage client (#13)
* Refactor storage clients to use obstore * Support latest protobuf version * Add quicklook to copernicus client * keep python 3.10 and 3.11 support * Silence incorrect type error * Patch boto3 credential loading
1 parent 447323c commit af7ff94

16 files changed

Lines changed: 1190 additions & 820 deletions

File tree

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ repos:
66
- id: end-of-file-fixer
77
- repo: https://github.com/charliermarsh/ruff-pre-commit
88
# keep the version here in sync with the version in uv.lock
9-
rev: "v0.12.7"
9+
rev: "v0.12.9"
1010
hooks:
1111
- id: ruff-check
1212
args: [--fix, --exit-non-zero-on-fix]

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- `tilebox-storage`: Added `USGSLandsatStorageClient` to download landsat data from the USGS Landsat S3 bucket.
13+
- `tilebox-storage`: Storage client now support concurrent downloads of multiple objects, controlled by the
14+
`max_concurrent_downloads` parameter.
15+
- `tilebox-storage`: Added `quicklook` and `download_quicklook` methods to the `CopernicusStorageClient` to download and
16+
display preview images for Sentinel data.
17+
1018
## [0.41.0] - 2025-08-01
1119

1220
### Added

tilebox-datasets/tilebox/datasets/protobuf_conversion/protobuf_xarray.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -374,19 +374,16 @@ def _create_field_converter(field: FieldDescriptor) -> _FieldConverter:
374374
"""
375375
# special handling for enums:
376376
if field.type == FieldDescriptor.TYPE_ENUM:
377-
if field.label == FieldDescriptor.LABEL_REPEATED:
377+
if field.is_repeated: # type: ignore[attr-defined]
378378
raise NotImplementedError("Repeated enum fields are not supported")
379379

380380
return _EnumFieldConverter(field.name, enum_mapping_from_field_descriptor(field))
381381

382382
field_type = infer_field_type(field)
383-
if field.label == FieldDescriptor.LABEL_OPTIONAL: # simple fields (in proto3 every simple field is optional)
384-
return _SimpleFieldConverter(field.name, field_type)
385-
386-
if field.label == FieldDescriptor.LABEL_REPEATED:
383+
if field.is_repeated: # type: ignore[attr-defined]
387384
return _ArrayFieldConverter(field.name, field_type)
388385

389-
raise ValueError(f"Unsupported field type with label {field.label} and type {field.type}")
386+
return _SimpleFieldConverter(field.name, field_type)
390387

391388

392389
def _combine_dimension_names(array_dimensions: dict[str, int]) -> dict[str, tuple[str, int]]:

tilebox-datasets/tilebox/datasets/protobuf_conversion/to_protobuf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import numpy as np
77
import pandas as pd
88
import xarray as xr
9-
from google.protobuf.descriptor import FieldDescriptor
109
from google.protobuf.message import Message
1110

1211
from tilebox.datasets.protobuf_conversion.field_types import (
@@ -80,7 +79,7 @@ def to_messages( # noqa: C901, PLR0912
8079
descriptor = field_descriptors_by_name[field_name]
8180
field_type = infer_field_type(descriptor)
8281

83-
if descriptor.label == FieldDescriptor.LABEL_REPEATED:
82+
if descriptor.is_repeated:
8483
values = convert_repeated_values_to_proto(values, field_type)
8584
else:
8685
values = convert_values_to_proto(values, field_type, filter_none=False)

tilebox-storage/pyproject.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ dependencies = [
2626
"aiofile>=3.8",
2727
"folium>=0.15",
2828
"shapely>=2",
29-
"boto3>=1.33",
30-
"boto3-stubs[essential]>=1.33",
29+
"obstore>=0.8.0",
3130
]
3231

3332
[dependency-groups]
@@ -37,7 +36,6 @@ dev = [
3736
"pytest-asyncio>=0.24.0",
3837
"pytest-cov>=5.0.0",
3938
"pytest>=8.3.2",
40-
"moto>=5",
4139
]
4240

4341
[project.urls]

tilebox-storage/tests/storage_data.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@
1111

1212
from hypothesis.strategies import DrawFn, booleans, composite, datetimes, integers, just, one_of, text, uuids
1313

14-
from tilebox.storage.granule import ASFStorageGranule, CopernicusStorageGranule, UmbraStorageGranule
14+
from tilebox.storage.granule import (
15+
ASFStorageGranule,
16+
CopernicusStorageGranule,
17+
UmbraStorageGranule,
18+
USGSLandsatStorageGranule,
19+
)
1520
from tilebox.storage.providers import _ASF_URL, StorageURLs
1621

1722

@@ -46,15 +51,14 @@ def alphanumerical_text(draw: DrawFn, min_size: int = 1, max_size: int = 100) ->
4651
@composite
4752
def umbra_granules(draw: DrawFn) -> UmbraStorageGranule:
4853
"""Generate a realistic-looking random Umbra granule."""
49-
level = "L0"
5054
time = draw(datetimes(min_value=datetime(1990, 1, 1), max_value=datetime(2025, 1, 1), timezones=just(None)))
5155
number = draw(integers(min_value=1, max_value=2))
5256
text_location = draw(alphanumerical_text(min_size=1, max_size=20))
5357
granule_id = str(draw(uuids(version=4)))
5458
granule_name = f"{time:%Y-%m-%d-%H-%M-%S}_UMBRA-{number:02d}"
5559
location = str(Path(text_location) / granule_id / granule_name)
5660

57-
return UmbraStorageGranule(time, granule_name, level, location)
61+
return UmbraStorageGranule(time, granule_name, location)
5862

5963

6064
@composite
@@ -80,5 +84,24 @@ def s5p_granules(draw: DrawFn) -> CopernicusStorageGranule:
8084
# /eodata/Sentinel-5P/TROPOMI/L2__AER_LH/2024/04/15/S5P_NRTI_L2__AER_LH_20240415T055540_20240415T060040_33707_03_020600_20240415T063447
8185
location = f"/eodata/Sentinel-5P/{instrument}/{product_type}/{start:%Y}/{start:%m}/{start:%d}/{granule_name.removesuffix('.nc')}"
8286

83-
file_size = draw(integers(min_value=10_000, max_value=999_999_999))
84-
return CopernicusStorageGranule(start, granule_name, location, file_size)
87+
return CopernicusStorageGranule(start, granule_name, location)
88+
89+
90+
@composite
91+
def landsat_granules(draw: DrawFn) -> USGSLandsatStorageGranule:
92+
"""Generate a realistic-looking random USGS Landsat granule."""
93+
time = draw(datetimes(min_value=datetime(1990, 1, 1), max_value=datetime(2025, 1, 1), timezones=just(None)))
94+
landsat_mission = draw(integers(min_value=1, max_value=9))
95+
96+
path = draw(integers(min_value=1, max_value=999))
97+
row = draw(integers(min_value=1, max_value=999))
98+
99+
granule_name = f"LC{landsat_mission:02d}_L1GT_{path:03d}{row:03d}_{time:%Y%m%d}_{time:%Y%m%d}_02_T1"
100+
location = f"s3://usgs-landsat/collection02/level-1/standard/oli-tirs/{time:%Y}/{path:03d}/{row:03d}/{granule_name}"
101+
thumbnail = draw(one_of(just(f"{granule_name}_thumb_small.jpeg"), just(None)))
102+
return USGSLandsatStorageGranule(
103+
time,
104+
granule_name,
105+
location,
106+
thumbnail,
107+
)

tilebox-storage/tests/test_granule.py

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,15 @@
44
from hypothesis import given
55
from hypothesis.strategies import lists
66

7-
from tests.storage_data import ers_granules, s5p_granules, umbra_granules
8-
from tilebox.storage.granule import ASFStorageGranule, CopernicusStorageGranule, UmbraStorageGranule, _asf_download_urls
7+
from tests.storage_data import ers_granules, landsat_granules, s5p_granules, umbra_granules
8+
from tilebox.storage.granule import (
9+
ASFStorageGranule,
10+
CopernicusStorageGranule,
11+
UmbraStorageGranule,
12+
USGSLandsatStorageGranule,
13+
_asf_download_urls,
14+
_thumbnail_relative_to_eodata_location,
15+
)
916

1017

1118
def _asf_granule_to_datapoint(granule: ASFStorageGranule) -> xr.Dataset:
@@ -53,7 +60,6 @@ def _umbra_granule_to_datapoint(granule: UmbraStorageGranule) -> xr.Dataset:
5360
datapoint = xr.Dataset()
5461
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
5562
datapoint["granule_name"] = granule.granule_name
56-
datapoint["processing_level"] = granule.processing_level
5763
datapoint["location"] = granule.location
5864
return datapoint
5965

@@ -76,12 +82,51 @@ def test_granule_from_umbra_datapoints(granules: list[UmbraStorageGranule]) -> N
7682
assert UmbraStorageGranule.from_data(dataset.isel(time=i)) == granules[i]
7783

7884

85+
@pytest.mark.parametrize(
86+
("thumbnail_url", "location", "expected"),
87+
[
88+
(
89+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE/preview/thumbnail.png",
90+
"/eodata/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE",
91+
"preview/thumbnail.png",
92+
),
93+
(
94+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-2/MSI/L1C/2025/08/07/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945.SAFE/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945-ql.jpg",
95+
"/eodata/Sentinel-2/MSI/L1C/2025/08/07/S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945.SAFE",
96+
"S2B_MSIL1C_20250807T004159_N0511_R045_T08XNR_20250807T004945-ql.jpg",
97+
),
98+
(
99+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/OLCI/OL_2_LFR___/2025/08/07/S3A_OL_2_LFR____20250807T011653_20250807T011953_20250807T033036_0179_129_074_1620_PS1_O_NR_003.SEN3/quicklook.jpg",
100+
"/eodata/Sentinel-3/OLCI/OL_2_LFR___/2025/08/07/S3A_OL_2_LFR____20250807T011653_20250807T011953_20250807T033036_0179_129_074_1620_PS1_O_NR_003.SEN3",
101+
"quicklook.jpg",
102+
),
103+
(
104+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/SLSTR/SL_1_RBT___/2025/08/07/S3B_SL_1_RBT____20250807T002314_20250807T002614_20250807T025411_0179_109_316_0720_ESA_O_NR_004.SEN3/quicklook.jpg",
105+
"/eodata/Sentinel-3/SLSTR/SL_1_RBT___/2025/08/07/S3B_SL_1_RBT____20250807T002314_20250807T002614_20250807T025411_0179_109_316_0720_ESA_O_NR_004.SEN3",
106+
"quicklook.jpg",
107+
),
108+
(
109+
"https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-3/SYNERGY/SY_2_VG1___/2025/08/04/S3A_SY_2_VG1____20250804T000000_20250804T235959_20250806T202029_AUSTRALASIA_______PS1_O_NT_002.SEN3/quicklook.jpg",
110+
"/eodata/Sentinel-3/SYNERGY/SY_2_VG1___/2025/08/04/S3A_SY_2_VG1____20250804T000000_20250804T235959_20250806T202029_AUSTRALASIA_______PS1_O_NT_002.SEN3",
111+
"quicklook.jpg",
112+
),
113+
],
114+
)
115+
def test_thumbnail_relative_to_eodata_location(thumbnail_url: str, location: str, expected: str) -> None:
116+
assert (
117+
_thumbnail_relative_to_eodata_location(
118+
thumbnail_url,
119+
location,
120+
)
121+
== expected
122+
)
123+
124+
79125
def _copernicus_granule_to_datapoint(granule: CopernicusStorageGranule) -> xr.Dataset:
80126
datapoint = xr.Dataset()
81127
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
82128
datapoint["granule_name"] = granule.granule_name
83129
datapoint["location"] = granule.location
84-
datapoint["file_size"] = granule.file_size
85130
return datapoint
86131

87132

@@ -101,3 +146,31 @@ def test_granule_from_copernicus_datapoints(granules: list[CopernicusStorageGran
101146

102147
for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
103148
assert CopernicusStorageGranule.from_data(dataset.isel(time=i)) == granules[i]
149+
150+
151+
def _landsat_granule_to_datapoint(granule: USGSLandsatStorageGranule) -> xr.Dataset:
152+
datapoint = xr.Dataset()
153+
datapoint.coords["time"] = np.array(granule.time).astype("datetime64[ns]")
154+
datapoint["granule_name"] = granule.granule_name
155+
datapoint["location"] = granule.location
156+
if granule.thumbnail is not None:
157+
datapoint["thumbnail"] = f"{granule.location}/{granule.thumbnail}"
158+
return datapoint
159+
160+
161+
@given(landsat_granules())
162+
def test_granule_from_landsat_datapoint(granule: USGSLandsatStorageGranule) -> None:
163+
datapoint = _landsat_granule_to_datapoint(granule)
164+
assert USGSLandsatStorageGranule.from_data(datapoint) == granule
165+
assert USGSLandsatStorageGranule.from_data(USGSLandsatStorageGranule.from_data(datapoint)) == granule
166+
167+
168+
@given(lists(landsat_granules(), min_size=2, max_size=5))
169+
def test_granule_from_landsat_datapoints(granules: list[USGSLandsatStorageGranule]) -> None:
170+
datapoints = [_landsat_granule_to_datapoint(granule) for granule in granules]
171+
dataset = xr.concat(datapoints, dim="time")
172+
with pytest.raises(ValueError, match=".*more than one granule.*"):
173+
USGSLandsatStorageGranule.from_data(dataset)
174+
175+
for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
176+
assert USGSLandsatStorageGranule.from_data(dataset.isel(time=i)) == granules[i]

tilebox-storage/tests/test_providers.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from tilebox.storage.providers import _asf_login
66

77

8-
@pytest.mark.anyio
8+
@pytest.mark.asyncio
99
async def test_asf_login(httpx_mock: HTTPXMock) -> None:
1010
httpx_mock.add_response(headers={"Set-Cookie": "logged_in=yes"})
1111

@@ -15,8 +15,10 @@ async def test_asf_login(httpx_mock: HTTPXMock) -> None:
1515
assert isinstance(client.auth, BasicAuth)
1616
assert client.cookies["logged_in"] == "yes"
1717

18+
await client.aclose()
1819

19-
@pytest.mark.anyio
20+
21+
@pytest.mark.asyncio
2022
async def test_asf_login_invalid_auth(httpx_mock: HTTPXMock) -> None:
2123
httpx_mock.add_response(401)
2224
with pytest.raises(ValueError, match="Invalid username or password."):

0 commit comments

Comments
 (0)