diff --git a/.vscode/settings.json b/.vscode/settings.json index 2f467b1..05c28f6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,8 @@ { "editor.formatOnSave": true, - "editor.rulers": [120], + "editor.rulers": [ + 120 + ], "[python]": { "editor.defaultFormatter": "charliermarsh.ruff", "editor.formatOnSave": true, diff --git a/CHANGELOG.md b/CHANGELOG.md index 07f4c09..e0f8c17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.37.1] - 2025-06-10 + ### Fixed +- `tilebox-datasets`: Fixed a bug in `TimeseriesDatasetChunk.from_message` relying on incorrect bool assumptions about + missing protobuf fields. - `tilebox-grpc`: More robust parsing of GRPC channel URLs. +- `tilebox-workflows`: Fixed a bug in the timeseries interceptor that resulted in an error when accessing a collection. ## [0.37.0] - 2025-06-06 @@ -185,7 +190,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc` -[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.37.0...HEAD +[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.37.1...HEAD +[0.37.1]: https://github.com/tilebox/tilebox-python/compare/v0.37.0...v0.37.1 [0.37.0]: https://github.com/tilebox/tilebox-python/compare/v0.36.1...v0.37.0 [0.36.1]: https://github.com/tilebox/tilebox-python/compare/v0.36.0...v0.36.1 [0.36.0]: https://github.com/tilebox/tilebox-python/compare/v0.35.0...v0.36.0 diff --git a/tilebox-datasets/tilebox/datasets/data/timeseries.py b/tilebox-datasets/tilebox/datasets/data/timeseries.py index beea3b0..3a1ace1 100644 --- a/tilebox-datasets/tilebox/datasets/data/timeseries.py +++ b/tilebox-datasets/tilebox/datasets/data/timeseries.py @@ -21,7 +21,13 @@ class TimeseriesDatasetChunk: @classmethod def from_message(cls, chunk: timeseries_pb2.TimeseriesDatasetChunk) -> "TimeseriesDatasetChunk": datapoint_interval = None - if chunk.datapoint_interval and chunk.datapoint_interval.start_id and chunk.datapoint_interval.end_id: + if ( + chunk.datapoint_interval + and chunk.datapoint_interval.start_id + and chunk.datapoint_interval.end_id + and chunk.datapoint_interval.start_id.uuid + and chunk.datapoint_interval.end_id.uuid + ): datapoint_interval = DatapointInterval.from_message(chunk.datapoint_interval) time_interval = None diff --git a/tilebox-workflows/tilebox/workflows/timeseries.py b/tilebox-workflows/tilebox/workflows/timeseries.py index f812334..7a304f0 100644 --- a/tilebox-workflows/tilebox/workflows/timeseries.py +++ b/tilebox-workflows/tilebox/workflows/timeseries.py @@ -22,19 +22,20 @@ @execution_interceptor -def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: ExecutionContext) -> None: +def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: ExecutionContext) -> None: # noqa: C901 if not isinstance(task, TimeseriesTask): raise TypeError("Task is not a timeseries task. Inherit from TimeseriesTask to mark it as such.") chunk: TimeseriesDatasetChunk = task.timeseries_data # type: ignore[attr-defined] - # let's get the collection object - dataset = context.runner_context.datasets_client._dataset_by_id(str(chunk.dataset_id)) # type: ignore[attr-defined] # noqa: SLF001 - collection = dataset.collection("unknown") # dummy collection, we will inject the right id below: + # let's get a collection client + datasets_client = context.runner_context.datasets_client + dataset = datasets_client._dataset_by_id(str(chunk.dataset_id)) # type: ignore[attr-defined] # noqa: SLF001 # we already know the collection id, so we can skip the lookup (we don't know the name, but don't need it) - collection._info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None) # noqa: SLF001 + collection_info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None) + collection = CollectionClient(dataset, collection_info) - # leaf case: we are already executing a specific batch of datapoints fitting in the chunk size, so let's load them and process them + # leaf case: we are already executing a specific batch of datapoints fitting in the chunk size, so let's load them if chunk.datapoint_interval: datapoint_interval = (chunk.datapoint_interval.start_id, chunk.datapoint_interval.end_id) # we already are a leaf task executing for a specific datapoint interval: @@ -44,6 +45,9 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: skip_data=False, show_progress=False, ) + if not datapoints: + return # no datapoints in the interval -> we are done + for i in range(datapoints.sizes["time"]): datapoint = datapoints.isel(time=i) call_next(context, datapoint) # type: ignore[call-arg] @@ -88,7 +92,7 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: subtasks = [replace(task, timeseries_data=sub_chunk) for sub_chunk in sub_chunks] # type: ignore[misc] if len(subtasks) > 0: - context.submit_batch(subtasks) + context.submit_subtasks(subtasks) return