From d7f9aa20edb145f9eda714dbaa5344ff68f2c0ca Mon Sep 17 00:00:00 2001 From: Samuel Aboderin Date: Wed, 10 Jun 2026 08:58:39 +0100 Subject: [PATCH 1/2] GH-49907: [Python] Implement FixedShapeTensorType.to_pandas_dtype --- python/pyarrow/tests/test_extension_type.py | 49 +++++++++++++++++++++ python/pyarrow/types.pxi | 24 ++++++++++ 2 files changed, 73 insertions(+) diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 1adbd4e98070..b3604beb6857 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1786,6 +1786,55 @@ def test_tensor_type_cast(): assert storage_result.equals(storage) +@pytest.mark.pandas +@pytest.mark.parametrize("value_type", [pa.int8(), pa.float32(), pa.float64()]) +@pytest.mark.parametrize("shape,permutation", [ + ([2, 2], None), + ([2, 3], None), + ([2, 2, 3], [0, 2, 1]), +]) +def test_tensor_type_to_pandas(value_type, shape, permutation): + # GH-49907: to_pandas_dtype should return a pandas dtype instead of + # raising NotImplementedError, and enable Table.to_pandas(split_blocks=True) + import pandas as pd + + if Version(pd.__version__) < Version("2.1.0"): + # pd.ArrowDtype extension blocks are only reliable from 2.1.0, + # see GH-35821 + pytest.skip("requires pandas >= 2.1.0") + + tensor_type = pa.fixed_shape_tensor( + value_type, shape, permutation=permutation) + + # The type maps to a pandas ArrowDtype wrapping the extension type + dtype = tensor_type.to_pandas_dtype() + assert isinstance(dtype, pd.ArrowDtype) + assert dtype.pyarrow_dtype == tensor_type + + # Build an extension array of a few tensors via the storage type so the + # explicit permutation is preserved exactly + size = 3 + n = int(np.prod(shape)) + storage = pa.array( + [list(range(i * n, (i + 1) * n)) for i in range(size)], + pa.list_(value_type, n)) + arr = pa.ExtensionArray.from_storage(tensor_type, storage) + + # Array.to_pandas uses the ArrowDtype + series = arr.to_pandas() + assert isinstance(series.dtype, pd.ArrowDtype) + assert series.dtype.pyarrow_dtype == tensor_type + assert len(series) == size + + # Table.to_pandas, including the split_blocks=True path from GH-49907 + table = pa.table({"tensor": arr}) + for split_blocks in [False, True]: + result = table.to_pandas(split_blocks=split_blocks) + assert isinstance(result["tensor"].dtype, pd.ArrowDtype) + assert result["tensor"].dtype.pyarrow_dtype == tensor_type + assert len(result) == size + + @pytest.mark.pandas def test_extension_to_pandas_storage_type(registered_period_type): period_type, _ = registered_period_type diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index ec1a5a2ba9a3..8ace6ecd7b5b 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -2049,6 +2049,30 @@ cdef class FixedShapeTensorType(BaseExtensionType): else: return None + def to_pandas_dtype(self): + """ + Return the equivalent pandas dtype, an instance of + :class:`pandas.ArrowDtype` wrapping this extension type. + + Each value of the resulting pandas column is a tensor with this + type's ``shape``. Returning a pandas extension dtype (rather than a + NumPy dtype) is what lets ``Table.to_pandas(split_blocks=True)`` + build an extension block for this type. + + Examples + -------- + >>> import pyarrow as pa + >>> pa.fixed_shape_tensor(pa.int32(), [2, 2]).to_pandas_dtype() + extension[pyarrow] + """ + import pandas as pd + if not hasattr(pd, "ArrowDtype"): + # pandas < 1.5 has no ArrowDtype able to hold tensors, so keep the + # documented fallback. Conversion code catches this and produces an + # object-dtype column instead. + raise NotImplementedError(str(self)) + return pd.ArrowDtype(self) + def __arrow_ext_class__(self): return FixedShapeTensorArray From 5c1fbd0eb53a4484f7e551c7867b51a6880006a1 Mon Sep 17 00:00:00 2001 From: Samuel Aboderin Date: Wed, 10 Jun 2026 09:23:12 +0100 Subject: [PATCH 2/2] GH-49907: [Python] Gate FixedShapeTensorType.to_pandas_dtype on pandas >= 2.1.0 --- python/pyarrow/types.pxi | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 8ace6ecd7b5b..8f4440411217 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -2059,18 +2059,24 @@ cdef class FixedShapeTensorType(BaseExtensionType): NumPy dtype) is what lets ``Table.to_pandas(split_blocks=True)`` build an extension block for this type. + This requires pandas >= 2.1.0, the first version with reliable + ``ArrowDtype`` extension blocks (see GH-35821). On older pandas it + raises ``NotImplementedError`` and conversion falls back to the + object dtype. + Examples -------- >>> import pyarrow as pa >>> pa.fixed_shape_tensor(pa.int32(), [2, 2]).to_pandas_dtype() extension[pyarrow] """ + if not _pandas_api.is_ge_v21(): + # pandas.ArrowDtype extension blocks are only reliable from 2.1.0 + # (GH-35821); on older pandas keep the documented fallback so the + # conversion code produces an object-dtype column instead. + raise NotImplementedError( + f"{self} requires pandas >= 2.1.0 to map to pandas.ArrowDtype") import pandas as pd - if not hasattr(pd, "ArrowDtype"): - # pandas < 1.5 has no ArrowDtype able to hold tensors, so keep the - # documented fallback. Conversion code catches this and produces an - # object-dtype column instead. - raise NotImplementedError(str(self)) return pd.ArrowDtype(self) def __arrow_ext_class__(self):