Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{
"editor.formatOnSave": true,
"editor.rulers": [120],
"editor.rulers": [
120
],
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true,
Expand Down
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.37.1] - 2025-06-10

### Fixed

- `tilebox-datasets`: Fixed a bug in `TimeseriesDatasetChunk.from_message` relying on incorrect bool assumptions about
missing protobuf fields.
- `tilebox-grpc`: More robust parsing of GRPC channel URLs.
- `tilebox-workflows`: Fixed a bug in the timeseries interceptor that resulted in an error when accessing a collection.

## [0.37.0] - 2025-06-06

Expand Down Expand Up @@ -185,7 +190,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc`


[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.37.0...HEAD
[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.37.1...HEAD
[0.37.1]: https://github.com/tilebox/tilebox-python/compare/v0.37.0...v0.37.1
[0.37.0]: https://github.com/tilebox/tilebox-python/compare/v0.36.1...v0.37.0
[0.36.1]: https://github.com/tilebox/tilebox-python/compare/v0.36.0...v0.36.1
[0.36.0]: https://github.com/tilebox/tilebox-python/compare/v0.35.0...v0.36.0
Expand Down
8 changes: 7 additions & 1 deletion tilebox-datasets/tilebox/datasets/data/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ class TimeseriesDatasetChunk:
@classmethod
def from_message(cls, chunk: timeseries_pb2.TimeseriesDatasetChunk) -> "TimeseriesDatasetChunk":
datapoint_interval = None
if chunk.datapoint_interval and chunk.datapoint_interval.start_id and chunk.datapoint_interval.end_id:
if (
chunk.datapoint_interval
and chunk.datapoint_interval.start_id
and chunk.datapoint_interval.end_id
and chunk.datapoint_interval.start_id.uuid
and chunk.datapoint_interval.end_id.uuid
):
datapoint_interval = DatapointInterval.from_message(chunk.datapoint_interval)

time_interval = None
Expand Down
18 changes: 11 additions & 7 deletions tilebox-workflows/tilebox/workflows/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,20 @@


@execution_interceptor
def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: ExecutionContext) -> None:
def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: ExecutionContext) -> None: # noqa: C901
if not isinstance(task, TimeseriesTask):
raise TypeError("Task is not a timeseries task. Inherit from TimeseriesTask to mark it as such.")

chunk: TimeseriesDatasetChunk = task.timeseries_data # type: ignore[attr-defined]

# let's get the collection object
dataset = context.runner_context.datasets_client._dataset_by_id(str(chunk.dataset_id)) # type: ignore[attr-defined] # noqa: SLF001
collection = dataset.collection("unknown") # dummy collection, we will inject the right id below:
# let's get a collection client
datasets_client = context.runner_context.datasets_client
dataset = datasets_client._dataset_by_id(str(chunk.dataset_id)) # type: ignore[attr-defined] # noqa: SLF001
# we already know the collection id, so we can skip the lookup (we don't know the name, but don't need it)
collection._info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None) # noqa: SLF001
collection_info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None)
collection = CollectionClient(dataset, collection_info)

# leaf case: we are already executing a specific batch of datapoints fitting in the chunk size, so let's load them and process them
# leaf case: we are already executing a specific batch of datapoints fitting in the chunk size, so let's load them
if chunk.datapoint_interval:
datapoint_interval = (chunk.datapoint_interval.start_id, chunk.datapoint_interval.end_id)
# we already are a leaf task executing for a specific datapoint interval:
Expand All @@ -44,6 +45,9 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
skip_data=False,
show_progress=False,
)
if not datapoints:
return # no datapoints in the interval -> we are done

for i in range(datapoints.sizes["time"]):
datapoint = datapoints.isel(time=i)
call_next(context, datapoint) # type: ignore[call-arg]
Expand Down Expand Up @@ -88,7 +92,7 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:

subtasks = [replace(task, timeseries_data=sub_chunk) for sub_chunk in sub_chunks] # type: ignore[misc]
if len(subtasks) > 0:
context.submit_batch(subtasks)
context.submit_subtasks(subtasks)

return

Expand Down
Loading