Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions matrix.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Matrix test configuration for testing pandas compatibility across Python versions
# Run with: pymatrix --config matrix.toml
#
# Split into scenarios per package due to pytest conftest collision when running
# multiple packages together (each has tests/conftest.py).

[[scenarios]]
name = "datasets-pandas2"
python = ["3.10", "3.11", "3.12", "3.13"]
test-command = "pytest"
test-args = ["tilebox-datasets/tests/", "-v"]

[scenarios.packages]
pandas = ["2.2.3"]

[[scenarios]]
name = "datasets-pandas3"
python = ["3.11", "3.12", "3.13"] # pandas 3.0 requires Python 3.11+
test-command = "pytest"
test-args = ["tilebox-datasets/tests/", "-v"]

[scenarios.packages]
pandas = ["3.0.0"]

[[scenarios]]
name = "storage-pandas2"
python = ["3.10", "3.11", "3.12", "3.13"]
test-command = "pytest"
test-args = ["tilebox-storage/tests/", "-v"]

[scenarios.packages]
pandas = ["2.2.3"]

[[scenarios]]
name = "storage-pandas3"
python = ["3.11", "3.12", "3.13"] # pandas 3.0 requires Python 3.11+
test-command = "pytest"
test-args = ["tilebox-storage/tests/", "-v"]

[scenarios.packages]
pandas = ["3.0.0"]

[[scenarios]]
name = "grpc-pandas2"
python = ["3.10", "3.11", "3.12", "3.13"]
test-command = "pytest"
test-args = ["tilebox-grpc/tests/", "-v"]

[scenarios.packages]
pandas = ["2.2.3"]

[[scenarios]]
name = "grpc-pandas3"
python = ["3.11", "3.12", "3.13"] # pandas 3.0 requires Python 3.11+
test-command = "pytest"
test-args = ["tilebox-grpc/tests/", "-v"]

[scenarios.packages]
pandas = ["3.0.0"]

[[scenarios]]
name = "workflows-pandas2"
python = ["3.10", "3.11", "3.12", "3.13"]
test-command = "pytest"
# Ignore FutureWarning: google-cloud-storage raises deprecation warning on Python 3.10
test-args = ["tilebox-workflows/tests/", "-v", "-W", "ignore::FutureWarning"]

[scenarios.packages]
pandas = ["2.2.3"]

[[scenarios]]
name = "workflows-pandas3"
python = ["3.11", "3.12", "3.13"] # pandas 3.0 requires Python 3.11+
test-command = "pytest"
test-args = ["tilebox-workflows/tests/", "-v"]

[scenarios.packages]
pandas = ["3.0.0"]
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ dev = [
"junitparser>=3.2.0",
"ty>=0.0.11",
"prek>=0.2.27",
# testing
"pytest>=8.3.2",
"pytest-asyncio>=0.24.0",
"pytest-cov>=5.0.0",
"pytest-httpx>=0.30.0",
"hypothesis>=6.112.1",
"moto>=5",
]

[project.scripts]
Expand Down
8 changes: 1 addition & 7 deletions tilebox-datasets/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,7 @@ dependencies = [
"promise>=2.3",
]

[dependency-groups]
dev = [
"hypothesis>=6.112.1",
"pytest-asyncio>=0.24.0",
"pytest-cov>=5.0.0",
"pytest>=8.3.2",
]



[project.urls]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from uuid import UUID

import pandas as pd
import pytest
from hypothesis import given, settings
from hypothesis.strategies import lists
Expand Down Expand Up @@ -152,21 +153,21 @@ def test_convert_datapoints(datapoints: list[ExampleDatapoint]) -> None: # noqa
for uuid in dataset.some_id.to_numpy():
assert isinstance(uuid, str)

# strings should be stored as object arrays, with None as the fill value if missing
# strings should be stored as object arrays, with missing values (None or NaN) as fill
if "some_string" in dataset:
for string in dataset.some_string.to_numpy():
assert string is None or isinstance(string, str)
assert pd.isna(string) or isinstance(string, str)
if "some_repeated_string" in dataset:
for string in dataset.some_repeated_string.to_numpy().ravel():
assert string is None or isinstance(string, str)
assert pd.isna(string) or isinstance(string, str)

# bytes should be stored as object arrays, with None as the fill value if missing
# bytes should be stored as object arrays, with missing values (None or NaN) as fill
if "some_bytes" in dataset:
for bytes_ in dataset.some_bytes.to_numpy():
assert bytes_ is None or isinstance(bytes_, bytes)
assert pd.isna(bytes_) or isinstance(bytes_, bytes)
if "some_repeated_bytes" in dataset:
for bytes_ in dataset.some_repeated_bytes.to_numpy().ravel():
assert bytes_ is None or isinstance(bytes_, bytes)
assert pd.isna(bytes_) or isinstance(bytes_, bytes)


@given(lists(example_datapoints(missing_fields=True), min_size=1, max_size=10))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from uuid import UUID

import numpy as np
import pandas as pd
from google.protobuf.descriptor import FieldDescriptor
from google.protobuf.duration_pb2 import Duration
from google.protobuf.message import Message
Expand All @@ -17,6 +18,21 @@
from tilebox.datasets.datasets.v1.well_known_types_pb2 import Geometry, LatLon, LatLonAlt, Quaternion, Vec3

ScalarProtoFieldValue = Message | float | str | bool | bytes


def _is_missing(value: Any) -> bool:
"""Check if a value represents a missing/null value.

Handles None, np.nan, pd.NA, NaT, and other pandas missing value sentinels.
This is needed for pandas 3.0+ compatibility where object-dtype columns use
np.nan instead of None for missing values.
"""
if value is None:
return True
try:
return bool(pd.isna(value))
except (TypeError, ValueError):
return False
ProtoFieldValue = ScalarProtoFieldValue | Sequence[ScalarProtoFieldValue] | None

_FILL_VALUES_BY_DTYPE: dict[type[np.dtype[Any]], Any] = {
Expand Down Expand Up @@ -107,7 +123,7 @@ def from_proto(self, value: ProtoFieldValue) -> int:
return value.seconds * 10**9 + value.nanos

def to_proto(self, value: DatetimeScalar) -> Timestamp | None:
if value is None or (isinstance(value, np.datetime64) and np.isnat(value)):
if _is_missing(value) or (isinstance(value, np.datetime64) and np.isnat(value)):
return None
# we use pandas to_datetime function to handle a variety of input types that can be coerced to datetimes
seconds, nanos = divmod(to_datetime(value, utc=True).value, 10**9)
Expand All @@ -124,7 +140,7 @@ def from_proto(self, value: ProtoFieldValue) -> int:
return value.seconds * 10**9 + value.nanos

def to_proto(self, value: str | float | timedelta | np.timedelta64) -> Duration | None:
if value is None or (isinstance(value, np.timedelta64) and np.isnat(value)):
if _is_missing(value) or (isinstance(value, np.timedelta64) and np.isnat(value)):
return None
# we use pandas to_timedelta function to handle a variety of input types that can be coerced to timedeltas
seconds, nanos = divmod(to_timedelta(value).value, 10**9) # type: ignore[arg-type]
Expand All @@ -141,7 +157,7 @@ def from_proto(self, value: ProtoFieldValue) -> str:
return str(UUID(bytes=value.uuid))

def to_proto(self, value: str | UUID) -> UUIDMessage | None:
if not value: # None or empty string
if _is_missing(value) or value == "": # missing or empty string
return None

if isinstance(value, str):
Expand All @@ -160,7 +176,7 @@ def from_proto(self, value: ProtoFieldValue) -> Any:
return from_wkb(value.wkb)

def to_proto(self, value: Any) -> Geometry | None:
if value is None:
if _is_missing(value):
return None
return Geometry(wkb=value.wkb)

Expand All @@ -175,7 +191,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
return value.x, value.y, value.z

def to_proto(self, value: tuple[float, float, float]) -> Vec3 | None:
if value is None or np.all(np.isnan(value)):
if _is_missing(value) or np.all(np.isnan(value)):
return None
return Vec3(x=value[0], y=value[1], z=value[2])

Expand All @@ -190,7 +206,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float, float
return value.q1, value.q2, value.q3, value.q4

def to_proto(self, value: tuple[float, float, float, float]) -> Quaternion | None:
if value is None or np.all(np.isnan(value)):
if _is_missing(value) or np.all(np.isnan(value)):
return None
return Quaternion(q1=value[0], q2=value[1], q3=value[2], q4=value[3])

Expand All @@ -205,7 +221,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float]:
return value.latitude, value.longitude

def to_proto(self, value: tuple[float, float]) -> LatLon | None:
if value is None or np.all(np.isnan(value)):
if _is_missing(value) or np.all(np.isnan(value)):
return None
return LatLon(latitude=value[0], longitude=value[1])

Expand All @@ -221,7 +237,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
return value.latitude, value.longitude, value.altitude

def to_proto(self, value: tuple[float, float, float]) -> LatLonAlt | None:
if value is None or np.all(np.isnan(value)):
if _is_missing(value) or np.all(np.isnan(value)):
return None
return LatLonAlt(latitude=value[0], longitude=value[1], altitude=value[2])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,29 @@ def columnar_to_row_based(
yield datapoint


def _is_scalar_missing(value: Any) -> bool:
"""Check if a scalar value is missing (None, NaN, NA, NaT).

Handles both scalar and array-like values safely - for arrays, returns False
since pd.isna would return an array which can't be used in a boolean context.
"""
if value is None:
return True
try:
result = pd.isna(value)
# pd.isna returns an array for array-like inputs; we only want scalar True/False
if isinstance(result, (bool, np.bool_)):
return bool(result)
return False
except (TypeError, ValueError):
return False


def convert_values_to_proto(
values: np.ndarray | pd.Series, field_type: ProtobufFieldType, filter_none: bool = False
) -> list[ProtoFieldValue]:
if filter_none:
return [field_type.to_proto(value) for value in values if value is not None]
return [field_type.to_proto(value) for value in values if not _is_scalar_missing(value)]
return [field_type.to_proto(value) for value in values]


Expand Down
2 changes: 1 addition & 1 deletion tilebox-datasets/tilebox/datasets/query/time_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# A type alias for the different types that can be used to specify a time interval
TimeIntervalLike: TypeAlias = (
DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | "TimeInterval"
"DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | TimeInterval"
)


Expand Down
3 changes: 1 addition & 2 deletions tilebox-grpc/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ dependencies = [
]


[dependency-groups]
dev = ["pytest-asyncio>=0.24.0", "pytest-cov>=5.0.0", "pytest>=8.3.2"]


[project.urls]
Homepage = "https://tilebox.com"
Expand Down
9 changes: 1 addition & 8 deletions tilebox-storage/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,7 @@ dependencies = [
"obstore>=0.8.0",
]

[dependency-groups]
dev = [
"hypothesis>=6.112.1",
"pytest-httpx>=0.30.0",
"pytest-asyncio>=0.24.0",
"pytest-cov>=5.0.0",
"pytest>=8.3.2",
]


[project.urls]
Homepage = "https://tilebox.com"
Expand Down
3 changes: 1 addition & 2 deletions tilebox-workflows/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ dependencies = [
"python-dateutil>=2.9.0.post0",
]

[dependency-groups]
dev = ["hypothesis>=6.112.1", "pytest-cov>=5.0.0", "pytest>=8.3.2", "moto>=5"]


[project.urls]
Homepage = "https://tilebox.com"
Expand Down
2 changes: 2 additions & 0 deletions tilebox-workflows/tilebox/workflows/jobs/client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from typing import Any, TypeAlias
from uuid import UUID

Expand Down
Loading
Loading