tilebox · snamber · Jan 24, 2026
diff --git a/matrix.toml b/matrix.toml
@@ -0,0 +1,78 @@
+# Matrix test configuration for testing pandas compatibility across Python versions
+# Run with: pymatrix --config matrix.toml
+#
+# Split into scenarios per package due to pytest conftest collision when running
+# multiple packages together (each has tests/conftest.py).
+
+[[scenarios]]
+name = "datasets-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+test-command = "pytest"
+test-args = ["tilebox-datasets/tests/", "-v"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "datasets-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+test-command = "pytest"
+test-args = ["tilebox-datasets/tests/", "-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
+
+[[scenarios]]
+name = "storage-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+test-command = "pytest"
+test-args = ["tilebox-storage/tests/", "-v"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "storage-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+test-command = "pytest"
+test-args = ["tilebox-storage/tests/", "-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
+
+[[scenarios]]
+name = "grpc-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+test-command = "pytest"
+test-args = ["tilebox-grpc/tests/", "-v"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "grpc-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+test-command = "pytest"
+test-args = ["tilebox-grpc/tests/", "-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
+
+[[scenarios]]
+name = "workflows-pandas2"
+python = ["3.10", "3.11", "3.12", "3.13"]
+test-command = "pytest"
+# Ignore FutureWarning: google-cloud-storage raises deprecation warning on Python 3.10
+test-args = ["tilebox-workflows/tests/", "-v", "-W", "ignore::FutureWarning"]
+
+[scenarios.packages]
+pandas = ["2.2.3"]
+
+[[scenarios]]
+name = "workflows-pandas3"
+python = ["3.11", "3.12", "3.13"]  # pandas 3.0 requires Python 3.11+
+test-command = "pytest"
+test-args = ["tilebox-workflows/tests/", "-v"]
+
+[scenarios.packages]
+pandas = ["3.0.0"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,13 @@ dev = [
     "junitparser>=3.2.0",
     "ty>=0.0.11",
     "prek>=0.2.27",
+    # testing
+    "pytest>=8.3.2",
+    "pytest-asyncio>=0.24.0",
+    "pytest-cov>=5.0.0",
+    "pytest-httpx>=0.30.0",
+    "hypothesis>=6.112.1",
+    "moto>=5",
 ]
 
 [project.scripts]

diff --git a/tilebox-datasets/pyproject.toml b/tilebox-datasets/pyproject.toml
@@ -34,13 +34,7 @@ dependencies = [
     "promise>=2.3",
 ]
 
-[dependency-groups]
-dev = [
-    "hypothesis>=6.112.1",
-    "pytest-asyncio>=0.24.0",
-    "pytest-cov>=5.0.0",
-    "pytest>=8.3.2",
-]
+
 
 
 [project.urls]

diff --git a/tilebox-datasets/tests/protobuf_conversion/test_protobuf_xarray.py b/tilebox-datasets/tests/protobuf_conversion/test_protobuf_xarray.py
@@ -1,5 +1,6 @@
 from uuid import UUID
 
+import pandas as pd
 import pytest
 from hypothesis import given, settings
 from hypothesis.strategies import lists
@@ -152,21 +153,21 @@ def test_convert_datapoints(datapoints: list[ExampleDatapoint]) -> None:  # noqa
         for uuid in dataset.some_id.to_numpy():
             assert isinstance(uuid, str)
 
-    # strings should be stored as object arrays, with None as the fill value if missing
+    # strings should be stored as object arrays, with missing values (None or NaN) as fill
     if "some_string" in dataset:
         for string in dataset.some_string.to_numpy():
-            assert string is None or isinstance(string, str)
+            assert pd.isna(string) or isinstance(string, str)
     if "some_repeated_string" in dataset:
         for string in dataset.some_repeated_string.to_numpy().ravel():
-            assert string is None or isinstance(string, str)
+            assert pd.isna(string) or isinstance(string, str)
 
-    # bytes should be stored as object arrays, with None as the fill value if missing
+    # bytes should be stored as object arrays, with missing values (None or NaN) as fill
     if "some_bytes" in dataset:
         for bytes_ in dataset.some_bytes.to_numpy():
-            assert bytes_ is None or isinstance(bytes_, bytes)
+            assert pd.isna(bytes_) or isinstance(bytes_, bytes)
     if "some_repeated_bytes" in dataset:
         for bytes_ in dataset.some_repeated_bytes.to_numpy().ravel():
-            assert bytes_ is None or isinstance(bytes_, bytes)
+            assert pd.isna(bytes_) or isinstance(bytes_, bytes)
 
 
 @given(lists(example_datapoints(missing_fields=True), min_size=1, max_size=10))

diff --git a/tilebox-datasets/tilebox/datasets/protobuf_conversion/field_types.py b/tilebox-datasets/tilebox/datasets/protobuf_conversion/field_types.py
@@ -4,6 +4,7 @@
 from uuid import UUID
 
 import numpy as np
+import pandas as pd
 from google.protobuf.descriptor import FieldDescriptor
 from google.protobuf.duration_pb2 import Duration
 from google.protobuf.message import Message
@@ -17,6 +18,21 @@
 from tilebox.datasets.datasets.v1.well_known_types_pb2 import Geometry, LatLon, LatLonAlt, Quaternion, Vec3
 
 ScalarProtoFieldValue = Message | float | str | bool | bytes
+
+
+def _is_missing(value: Any) -> bool:
+    """Check if a value represents a missing/null value.
+
+    Handles None, np.nan, pd.NA, NaT, and other pandas missing value sentinels.
+    This is needed for pandas 3.0+ compatibility where object-dtype columns use
+    np.nan instead of None for missing values.
+    """
+    if value is None:
+        return True
+    try:
+        return bool(pd.isna(value))
+    except (TypeError, ValueError):
+        return False
 ProtoFieldValue = ScalarProtoFieldValue | Sequence[ScalarProtoFieldValue] | None
 
 _FILL_VALUES_BY_DTYPE: dict[type[np.dtype[Any]], Any] = {
@@ -107,7 +123,7 @@ def from_proto(self, value: ProtoFieldValue) -> int:
         return value.seconds * 10**9 + value.nanos
 
     def to_proto(self, value: DatetimeScalar) -> Timestamp | None:
-        if value is None or (isinstance(value, np.datetime64) and np.isnat(value)):
+        if _is_missing(value) or (isinstance(value, np.datetime64) and np.isnat(value)):
             return None
         # we use pandas to_datetime function to handle a variety of input types that can be coerced to datetimes
         seconds, nanos = divmod(to_datetime(value, utc=True).value, 10**9)
@@ -124,7 +140,7 @@ def from_proto(self, value: ProtoFieldValue) -> int:
         return value.seconds * 10**9 + value.nanos
 
     def to_proto(self, value: str | float | timedelta | np.timedelta64) -> Duration | None:
-        if value is None or (isinstance(value, np.timedelta64) and np.isnat(value)):
+        if _is_missing(value) or (isinstance(value, np.timedelta64) and np.isnat(value)):
             return None
         # we use pandas to_timedelta function to handle a variety of input types that can be coerced to timedeltas
         seconds, nanos = divmod(to_timedelta(value).value, 10**9)  # type: ignore[arg-type]
@@ -141,7 +157,7 @@ def from_proto(self, value: ProtoFieldValue) -> str:
         return str(UUID(bytes=value.uuid))
 
     def to_proto(self, value: str | UUID) -> UUIDMessage | None:
-        if not value:  # None or empty string
+        if _is_missing(value) or value == "":  # missing or empty string
             return None
 
         if isinstance(value, str):
@@ -160,7 +176,7 @@ def from_proto(self, value: ProtoFieldValue) -> Any:
         return from_wkb(value.wkb)
 
     def to_proto(self, value: Any) -> Geometry | None:
-        if value is None:
+        if _is_missing(value):
             return None
         return Geometry(wkb=value.wkb)
 
@@ -175,7 +191,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
         return value.x, value.y, value.z
 
     def to_proto(self, value: tuple[float, float, float]) -> Vec3 | None:
-        if value is None or np.all(np.isnan(value)):
+        if _is_missing(value) or np.all(np.isnan(value)):
             return None
         return Vec3(x=value[0], y=value[1], z=value[2])
 
@@ -190,7 +206,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float, float
         return value.q1, value.q2, value.q3, value.q4
 
     def to_proto(self, value: tuple[float, float, float, float]) -> Quaternion | None:
-        if value is None or np.all(np.isnan(value)):
+        if _is_missing(value) or np.all(np.isnan(value)):
             return None
         return Quaternion(q1=value[0], q2=value[1], q3=value[2], q4=value[3])
 
@@ -205,7 +221,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float]:
         return value.latitude, value.longitude
 
     def to_proto(self, value: tuple[float, float]) -> LatLon | None:
-        if value is None or np.all(np.isnan(value)):
+        if _is_missing(value) or np.all(np.isnan(value)):
             return None
         return LatLon(latitude=value[0], longitude=value[1])
 
@@ -221,7 +237,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
         return value.latitude, value.longitude, value.altitude
 
     def to_proto(self, value: tuple[float, float, float]) -> LatLonAlt | None:
-        if value is None or np.all(np.isnan(value)):
+        if _is_missing(value) or np.all(np.isnan(value)):
             return None
         return LatLonAlt(latitude=value[0], longitude=value[1], altitude=value[2])
 

diff --git a/tilebox-datasets/tilebox/datasets/protobuf_conversion/to_protobuf.py b/tilebox-datasets/tilebox/datasets/protobuf_conversion/to_protobuf.py
@@ -116,11 +116,29 @@ def columnar_to_row_based(
         yield datapoint
 
 
+def _is_scalar_missing(value: Any) -> bool:
+    """Check if a scalar value is missing (None, NaN, NA, NaT).
+
+    Handles both scalar and array-like values safely - for arrays, returns False
+    since pd.isna would return an array which can't be used in a boolean context.
+    """
+    if value is None:
+        return True
+    try:
+        result = pd.isna(value)
+        # pd.isna returns an array for array-like inputs; we only want scalar True/False
+        if isinstance(result, (bool, np.bool_)):
+            return bool(result)
+        return False
+    except (TypeError, ValueError):
+        return False
+
+
 def convert_values_to_proto(
     values: np.ndarray | pd.Series, field_type: ProtobufFieldType, filter_none: bool = False
 ) -> list[ProtoFieldValue]:
     if filter_none:
-        return [field_type.to_proto(value) for value in values if value is not None]
+        return [field_type.to_proto(value) for value in values if not _is_scalar_missing(value)]
     return [field_type.to_proto(value) for value in values]
 
 

diff --git a/tilebox-datasets/tilebox/datasets/query/time_interval.py b/tilebox-datasets/tilebox/datasets/query/time_interval.py
@@ -15,7 +15,7 @@
 
 # A type alias for the different types that can be used to specify a time interval
 TimeIntervalLike: TypeAlias = (
-    DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | "TimeInterval"
+    "DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | TimeInterval"
 )
 
 

diff --git a/tilebox-grpc/pyproject.toml b/tilebox-grpc/pyproject.toml
@@ -34,8 +34,7 @@ dependencies = [
 ]
 
 
-[dependency-groups]
-dev = ["pytest-asyncio>=0.24.0", "pytest-cov>=5.0.0", "pytest>=8.3.2"]
+
 
 [project.urls]
 Homepage = "https://tilebox.com"

diff --git a/tilebox-storage/pyproject.toml b/tilebox-storage/pyproject.toml
@@ -29,14 +29,7 @@ dependencies = [
     "obstore>=0.8.0",
 ]
 
-[dependency-groups]
-dev = [
-    "hypothesis>=6.112.1",
-    "pytest-httpx>=0.30.0",
-    "pytest-asyncio>=0.24.0",
-    "pytest-cov>=5.0.0",
-    "pytest>=8.3.2",
-]
+
 
 [project.urls]
 Homepage = "https://tilebox.com"

diff --git a/tilebox-workflows/pyproject.toml b/tilebox-workflows/pyproject.toml
@@ -34,8 +34,7 @@ dependencies = [
     "python-dateutil>=2.9.0.post0",
 ]
 
-[dependency-groups]
-dev = ["hypothesis>=6.112.1", "pytest-cov>=5.0.0", "pytest>=8.3.2", "moto>=5"]
+
 
 [project.urls]
 Homepage = "https://tilebox.com"

diff --git a/tilebox-workflows/tilebox/workflows/jobs/client.py b/tilebox-workflows/tilebox/workflows/jobs/client.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import Any, TypeAlias
 from uuid import UUID