diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index e5d367958dd..bdd5ea428a8 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -239,6 +239,9 @@ jobs:
- name: Test
shell: bash
run: ci/scripts/python_test.sh $(pwd) $(pwd)/build
+ - name: Test annotations
+ shell: bash
+ run: ci/scripts/python_test_type_annotations.sh $(pwd)/python
windows:
name: AMD64 Windows 2022 Python 3.13
@@ -296,3 +299,7 @@ jobs:
shell: cmd
run: |
call "ci\scripts\python_test.bat" %cd%
+ - name: Test annotations
+ shell: cmd
+ run: |
+ call "ci\scripts\python_test_type_annotations.bat" %cd%\python
diff --git a/ci/scripts/python_test_type_annotations.bat b/ci/scripts/python_test_type_annotations.bat
new file mode 100644
index 00000000000..3446e329a89
--- /dev/null
+++ b/ci/scripts/python_test_type_annotations.bat
@@ -0,0 +1,38 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set PYARROW_DIR=%1
+
+echo Annotation testing on Windows ...
+
+@REM Install library stubs
+%PYTHON_CMD% -m pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil || exit /B 1
+
+@REM Install other dependencies for type checking
+%PYTHON_CMD% -m pip install fsspec || exit /B 1
+
+@REM Install type checkers
+%PYTHON_CMD% -m pip install mypy pyright ty || exit /B 1
+
+@REM Run type checkers
+pushd %PYARROW_DIR%
+
+mypy
+pyright
+ty check
diff --git a/ci/scripts/python_test_type_annotations.sh b/ci/scripts/python_test_type_annotations.sh
new file mode 100755
index 00000000000..5c3c831d4bd
--- /dev/null
+++ b/ci/scripts/python_test_type_annotations.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+pyarrow_dir=${1}
+
+# Install library stubs
+pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil
+
+# Install type checkers
+pip install mypy pyright ty
+
+# Install other dependencies for type checking
+pip install fsspec
+
+# Run type checkers
+pushd ${pyarrow_dir}
+mypy
+pyright
+ty check;
+fi
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 8d113312927..8d63679de08 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -175,6 +175,11 @@ export CMAKE_PREFIX_PATH=${build_dir}/install
export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION}
pushd ${source_dir}/python
+# We first populate stub docstrings and then build the wheel
+python setup.py build_ext --inplace
+python -m pip install griffe libcst
+python ../dev/update_stub_docstrings.py pyarrow-stubs
+
python setup.py bdist_wheel
popd
diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py
index 84fcaba42e6..ee4a31aedb8 100644
--- a/ci/scripts/python_wheel_validate_contents.py
+++ b/ci/scripts/python_wheel_validate_contents.py
@@ -35,6 +35,11 @@ def validate_wheel(path):
assert not outliers, f"Unexpected contents in wheel: {sorted(outliers)}"
print(f"The wheel: {wheels[0]} seems valid.")
+ candidates = [info for info in f.filelist if info.filename.endswith('compute.pyi')]
+ assert candidates, "compute.pyi not found in wheel"
+ content = f.read(candidates[0]).decode('utf-8', errors='replace')
+ assert '"""' in content, "compute.pyi missing docstrings (no triple quotes found)"
+
def main():
parser = argparse.ArgumentParser()
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index b4b7fed99fd..3da7f60f182 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -135,6 +135,11 @@ pushd C:\arrow\python
@REM Build wheel
%PYTHON_CMD% setup.py bdist_wheel || exit /B 1
+@REM We first populate stub docstrings and then build the wheel
+%PYTHON_CMD% setup.py build_ext --inplace
+%PYTHON_CMD% -m pip install griffe libcst
+%PYTHON_CMD% ..\dev\update_stub_docstrings.py pyarrow-stubs
+
@REM Repair the wheel with delvewheel
@REM
@REM Since we bundled the Arrow C++ libraries ourselves, we only need to
diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh
index a3fbeb3c0b3..977ef64e008 100755
--- a/ci/scripts/python_wheel_xlinux_build.sh
+++ b/ci/scripts/python_wheel_xlinux_build.sh
@@ -167,6 +167,11 @@ export ARROW_HOME=/tmp/arrow-dist
export CMAKE_PREFIX_PATH=/tmp/arrow-dist
pushd /arrow/python
+# We first populate stub docstrings and then build the wheel
+python setup.py build_ext --inplace
+python -m pip install griffe libcst
+python ../dev/update_stub_docstrings.py pyarrow-stubs
+
python setup.py bdist_wheel
echo "=== Strip symbols from wheel ==="
diff --git a/compose.yaml b/compose.yaml
index 84481e1af76..21136ec3c6c 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -924,7 +924,8 @@ services:
["
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build.sh /arrow /build &&
- /arrow/ci/scripts/python_test.sh /arrow"]
+ /arrow/ci/scripts/python_test.sh /arrow &&
+ /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]
conda-python-emscripten:
# Usage:
@@ -1008,7 +1009,8 @@ services:
/bin/bash -c "
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build.sh /arrow /build &&
- /arrow/ci/scripts/python_test.sh /arrow"
+ /arrow/ci/scripts/python_test.sh /arrow &&
+ /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"
debian-python:
# Usage:
@@ -1510,7 +1512,8 @@ services:
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build.sh /arrow /build &&
mamba uninstall -y numpy &&
- /arrow/ci/scripts/python_test.sh /arrow"]
+ /arrow/ci/scripts/python_test.sh /arrow &&
+ /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]
conda-python-docs:
# Usage:
@@ -1536,7 +1539,8 @@ services:
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip install -e /arrow/dev/archery[numpydoc] &&
archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
- /arrow/ci/scripts/python_test.sh /arrow"]
+ /arrow/ci/scripts/python_test.sh /arrow &&
+ /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]
conda-python-dask:
# Possible $DASK parameters:
diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst
index d03b2439b10..c23891e94d0 100644
--- a/docs/source/developers/python/development.rst
+++ b/docs/source/developers/python/development.rst
@@ -42,7 +42,7 @@ Unit Testing
============
We are using `pytest `_ to develop our unit
-test suite. After `building the project `_ you can run its unit tests
+test suite. After `building the project `_ you can run its unit tests
like so:
.. code-block::
@@ -101,6 +101,74 @@ The test groups currently include:
* ``s3``: Tests for Amazon S3
* ``tensorflow``: Tests that involve TensorFlow
+Type Checking
+=============
+
+PyArrow provides type stubs (``*.pyi`` files) for static type checking. These
+stubs are located in the ``pyarrow-stubs/`` directory and are automatically
+included in the distributed wheel packages.
+
+Running Type Checkers
+---------------------
+
+We support multiple type checkers. Their configurations are in
+``pyproject.toml``.
+
+**mypy**
+
+To run mypy on the PyArrow codebase:
+
+.. code-block::
+
+ $ cd arrow/python
+ $ mypy
+
+The mypy configuration is in the ``[tool.mypy]`` section of ``pyproject.toml``.
+
+**pyright**
+
+To run pyright:
+
+.. code-block::
+
+ $ cd arrow/python
+ $ pyright
+
+The pyright configuration is in the ``[tool.pyright]`` section of ``pyproject.toml``.
+
+**ty**
+
+To run ty (note: currently only partially configured):
+
+.. code-block::
+
+ $ cd arrow/python
+ $ ty check
+
+Maintaining Type Stubs
+-----------------------
+
+Type stubs for PyArrow are maintained in the ``pyarrow-stubs/``
+directory. These stubs mirror the structure of the main ``pyarrow/`` package.
+
+When adding or modifying public APIs:
+
+1. **Update the corresponding ``.pyi`` stub file** in ``pyarrow-stubs/``
+ to reflect the new or changed function/class signatures.
+
+2. **Include type annotations** where possible. For Cython modules or
+ dynamically generated APIs such as compute kernels add the corresponding
+ stub in ``pyarrow-stubs/``.
+
+3. **Run type checkers** to ensure the stubs are correct and complete.
+
+The stub files are automatically copied into the built wheel during the build
+process and will be included when users install PyArrow, enabling type checking
+in downstream projects and for users' IDEs.
+
+Note: ``py.typed`` marker file in the ``pyarrow/`` directory indicates to type
+checkers that PyArrow supports type checking according to :pep:`561`.
+
Doctest
=======
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index ed7012e4b70..2840ba74128 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -4,6 +4,7 @@ include ../NOTICE.txt
global-include CMakeLists.txt
graft pyarrow
+graft pyarrow-stubs
graft cmake_modules
global-exclude *.so
diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi
new file mode 100644
index 00000000000..2a68a513099
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/__init__.pyi
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Type stubs for PyArrow.
+
+This is a placeholder stub file.
+Complete type annotations will be added in subsequent PRs.
+"""
+
+from typing import Any
+
+def __getattr__(name: str) -> Any: ...
diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi
new file mode 100644
index 00000000000..0715012fddc
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi
@@ -0,0 +1,133 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+
+from collections.abc import Collection, Iterator, Sequence
+from decimal import Decimal
+from typing import Any, Literal, Protocol, TypeAlias, TypeVar
+
+import numpy as np
+
+from numpy.typing import NDArray
+
+from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray
+
+ArrayLike: TypeAlias = Any
+ScalarLike: TypeAlias = Any
+Order: TypeAlias = Literal["ascending", "descending"]
+JoinType: TypeAlias = Literal[
+ "left semi",
+ "right semi",
+ "left anti",
+ "right anti",
+ "inner",
+ "left outer",
+ "right outer",
+ "full outer",
+]
+Compression: TypeAlias = Literal[
+ "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy"
+]
+NullEncoding: TypeAlias = Literal["mask", "encode"]
+NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
+TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"]
+Mask: TypeAlias = (
+ Sequence[bool | None]
+ | NDArray[np.bool_]
+ | BooleanArray
+ | ChunkedArray[Any]
+)
+Indices: TypeAlias = (
+ Sequence[int | None]
+ | NDArray[np.integer[Any]]
+ | IntegerArray
+ | ChunkedArray[Any]
+)
+
+PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes |
+ dt.date | dt.datetime | dt.time | dt.timedelta)
+
+_T = TypeVar("_T")
+_V = TypeVar("_V", covariant=True)
+
+SingleOrList: TypeAlias = list[_T] | _T
+
+
+class SupportEq(Protocol):
+ def __eq__(self, other) -> bool: ...
+
+
+class SupportLt(Protocol):
+ def __lt__(self, other) -> bool: ...
+
+
+class SupportGt(Protocol):
+ def __gt__(self, other) -> bool: ...
+
+
+class SupportLe(Protocol):
+ def __le__(self, other) -> bool: ...
+
+
+class SupportGe(Protocol):
+ def __ge__(self, other) -> bool: ...
+
+
+FilterTuple: TypeAlias = (
+ tuple[str, Literal["=", "==", "!="], SupportEq]
+ | tuple[str, Literal["<"], SupportLt]
+ | tuple[str, Literal[">"], SupportGt]
+ | tuple[str, Literal["<="], SupportLe]
+ | tuple[str, Literal[">="], SupportGe]
+ | tuple[str, Literal["in", "not in"], Collection]
+ | tuple[str, str, Any] # Allow general str for operator to avoid type errors
+)
+
+
+class Buffer(Protocol):
+ ...
+
+
+class SupportPyBuffer(Protocol):
+ ...
+
+
+class SupportArrowStream(Protocol):
+ def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+
+class SupportPyArrowArray(Protocol):
+ def __arrow_array__(self, type=None) -> Any: ...
+
+
+class SupportArrowArray(Protocol):
+ def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+
+class SupportArrowDeviceArray(Protocol):
+ def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+
+class SupportArrowSchema(Protocol):
+ def __arrow_c_schema__(self) -> Any: ...
+
+
+class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse]
+ def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
+ def __len__(self) -> int: ...
+ def __contains__(self, item: Any, /) -> bool: ...
diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi
new file mode 100644
index 00000000000..3d802382ba1
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_types.pyi
@@ -0,0 +1,966 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt # noqa: F401
+import sys
+
+from collections.abc import Mapping, Sequence, Iterable, Iterator
+from decimal import Decimal # noqa: F401
+
+if sys.version_info >= (3, 11):
+ from typing import Self
+else:
+ from typing_extensions import Self
+
+from typing import Any, Generic, Literal
+
+import numpy as np
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowSchema
+from pyarrow.lib import ( # noqa: F401
+ Array,
+ ChunkedArray,
+ ExtensionArray,
+ MemoryPool,
+ MonthDayNano,
+ Table,
+)
+from typing_extensions import TypeVar, deprecated
+
+from .io import Buffer
+from .scalar import ExtensionScalar
+from ._stubs_typing import TimeUnit
+
+class _Weakrefable:
+ ...
+
+
+class _Metadata(_Weakrefable):
+ ...
+
+
+class DataType(_Weakrefable):
+ def field(self, i: int) -> Field: ...
+
+ @property
+ def id(self) -> int: ...
+ @property
+ def bit_width(self) -> int: ...
+
+ @property
+ def byte_width(self) -> int: ...
+
+ @property
+ def num_fields(self) -> int: ...
+
+ @property
+ def num_buffers(self) -> int: ...
+
+ @property
+ def has_variadic_buffers(self) -> bool: ...
+
+ # Properties that exist on specific subtypes but accessed generically
+ @property
+ def list_size(self) -> int: ...
+
+ def __hash__(self) -> int: ...
+
+ def equals(self, other: DataType | str, *,
+ check_metadata: bool = False) -> bool: ...
+
+ def to_pandas_dtype(self) -> np.generic: ...
+
+ def _export_to_c(self, out_ptr: int) -> None: ...
+
+ @classmethod
+ def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+ def __arrow_c_schema__(self) -> Any: ...
+
+ @classmethod
+ def _import_from_c_capsule(cls, schema) -> Self: ...
+
+
+_AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
+
+class _BasicDataType(DataType, Generic[_AsPyType]):
+ ...
+
+
+class NullType(_BasicDataType[None]):
+ ...
+
+
+class BoolType(_BasicDataType[bool]):
+ ...
+
+
+class UInt8Type(_BasicDataType[int]):
+ ...
+
+
+class Int8Type(_BasicDataType[int]):
+ ...
+
+
+class UInt16Type(_BasicDataType[int]):
+ ...
+
+
+class Int16Type(_BasicDataType[int]):
+ ...
+
+
+class UInt32Type(_BasicDataType[int]):
+ ...
+
+
+class Int32Type(_BasicDataType[int]):
+ ...
+
+
+class UInt64Type(_BasicDataType[int]):
+ ...
+
+
+class Int64Type(_BasicDataType[int]):
+ ...
+
+
+class Float16Type(_BasicDataType[float]):
+ ...
+
+
+class Float32Type(_BasicDataType[float]):
+ ...
+
+
+class Float64Type(_BasicDataType[float]):
+ ...
+
+
+class Date32Type(_BasicDataType[dt.date]):
+ ...
+
+
+class Date64Type(_BasicDataType[dt.date]):
+ ...
+
+
+class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]):
+ ...
+
+
+class StringType(_BasicDataType[str]):
+ ...
+
+
+class LargeStringType(_BasicDataType[str]):
+ ...
+
+
+class StringViewType(_BasicDataType[str]):
+ ...
+
+
+class BinaryType(_BasicDataType[bytes]):
+ ...
+
+
+class LargeBinaryType(_BasicDataType[bytes]):
+ ...
+
+
+class BinaryViewType(_BasicDataType[bytes]):
+ ...
+
+
+_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"])
+_Tz = TypeVar("_Tz", str, None, default=None)
+
+
+class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
+
+ @property
+ def unit(self) -> _Unit: ...
+
+ @property
+ def tz(self) -> _Tz: ...
+
+
+_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"])
+
+
+class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
+ @property
+ def unit(self) -> _Time32Unit: ...
+
+
+_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"])
+
+
+class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
+ @property
+ def unit(self) -> _Time64Unit: ...
+
+
+class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
+ @property
+ def unit(self) -> _Unit: ...
+
+
+class FixedSizeBinaryType(_BasicDataType[Decimal]):
+ ...
+
+
+_Precision = TypeVar("_Precision", default=Any)
+_Scale = TypeVar("_Scale", default=Any)
+
+
+class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+ @property
+ def precision(self) -> _Precision: ...
+
+ @property
+ def scale(self) -> _Scale: ...
+
+
+class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+ @property
+ def precision(self) -> _Precision: ...
+
+ @property
+ def scale(self) -> _Scale: ...
+
+
+class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+ @property
+ def precision(self) -> _Precision: ...
+
+ @property
+ def scale(self) -> _Scale: ...
+
+
+class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+ @property
+ def precision(self) -> _Precision: ...
+
+ @property
+ def scale(self) -> _Scale: ...
+
+
+class ListType(DataType, Generic[_DataTypeT]):
+ @property
+ def value_field(self) -> Field[_DataTypeT]: ...
+
+ @property
+ def value_type(self) -> _DataTypeT: ...
+
+
+class LargeListType(DataType, Generic[_DataTypeT]):
+ @property
+ def value_field(self) -> Field[_DataTypeT]: ...
+ @property
+ def value_type(self) -> _DataTypeT: ...
+
+
+class ListViewType(DataType, Generic[_DataTypeT]):
+ @property
+ def value_field(self) -> Field[_DataTypeT]: ...
+
+ @property
+ def value_type(self) -> _DataTypeT: ...
+
+
+class LargeListViewType(DataType, Generic[_DataTypeT]):
+ @property
+ def value_field(self) -> Field[_DataTypeT]: ...
+
+ @property
+ def value_type(self) -> _DataTypeT: ...
+
+
+class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]):
+ @property
+ def value_field(self) -> Field[_DataTypeT]: ...
+
+ @property
+ def value_type(self) -> _DataTypeT: ...
+
+ @property
+ def list_size(self) -> int: ...
+
+
+class DictionaryMemo(_Weakrefable):
+ ...
+
+
+_IndexT = TypeVar(
+ "_IndexT",
+ UInt8Type,
+ Int8Type,
+ UInt16Type,
+ Int16Type,
+ UInt32Type,
+ Int32Type,
+ UInt64Type,
+ Int64Type,
+)
+_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType)
+_ValueT = TypeVar("_ValueT", bound=DataType)
+_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False])
+
+
+class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]):
+ @property
+ def ordered(self) -> _Ordered: ...
+
+ @property
+ def index_type(self) -> _IndexT: ...
+
+ @property
+ def value_type(self) -> _BasicValueT: ...
+
+
+_K = TypeVar("_K", bound=DataType)
+
+
+class MapType(DataType, Generic[_K, _ValueT, _Ordered]):
+ @property
+ def key_field(self) -> Field[_K]: ...
+
+ @property
+ def key_type(self) -> _K: ...
+
+ @property
+ def item_field(self) -> Field[_ValueT]: ...
+
+ @property
+ def item_type(self) -> _ValueT: ...
+
+ @property
+ def keys_sorted(self) -> _Ordered: ...
+
+
+_Size = TypeVar("_Size", default=int)
+
+
+class StructType(DataType):
+ def get_field_index(self, name: str) -> int: ...
+
+ def field(self, i: int | str) -> Field: ...
+
+ def get_all_field_indices(self, name: str) -> list[int]: ...
+
+ def __len__(self) -> int: ...
+
+ def __iter__(self) -> Iterator[Field]: ...
+
+ __getitem__ = field
+ @property
+ def names(self) -> list[str]: ...
+
+ @property
+ def fields(self) -> list[Field]: ...
+
+
+class UnionType(DataType):
+ @property
+ def mode(self) -> Literal["sparse", "dense"]: ...
+
+ @property
+ def type_codes(self) -> list[int]: ...
+
+ def __len__(self) -> int: ...
+
+ def __iter__(self) -> Iterator[Field]: ...
+
+ def field(self, i: int) -> Field: ...
+
+ __getitem__ = field
+
+
+class SparseUnionType(UnionType):
+ @property
+ def mode(self) -> Literal["sparse"]: ...
+
+
+class DenseUnionType(UnionType):
+ @property
+ def mode(self) -> Literal["dense"]: ...
+
+
+_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type)
+
+
+class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
+ @property
+ def run_end_type(self) -> _RunEndType: ...
+ @property
+ def value_type(self) -> _BasicValueT: ...
+
+
+_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
+
+
+class BaseExtensionType(DataType):
+ def __arrow_ext_class__(self) -> type[ExtensionArray]: ...
+
+ def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ...
+
+ @property
+ def extension_name(self) -> str: ...
+
+ @property
+ def storage_type(self) -> DataType: ...
+
+ def wrap_array(self, storage: _StorageT) -> _StorageT: ...
+
+
+class ExtensionType(BaseExtensionType):
+ def __init__(self, storage_type: DataType, extension_name: str) -> None: ...
+
+ def __arrow_ext_serialize__(self) -> bytes: ...
+
+ @classmethod
+ def __arrow_ext_deserialize__(
+ cls, storage_type: DataType, serialized: bytes) -> Self: ...
+
+
+class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
+ @property
+ def value_type(self) -> _ValueT: ...
+
+ @property
+ def shape(self) -> list[int]: ...
+
+ @property
+ def dim_names(self) -> list[str] | None: ...
+
+ @property
+ def permutation(self) -> list[int] | None: ...
+
+
+class Bool8Type(BaseExtensionType):
+ ...
+
+
+class UuidType(BaseExtensionType):
+ ...
+
+
+class JsonType(BaseExtensionType):
+ ...
+
+
+class OpaqueType(BaseExtensionType):
+ @property
+ def type_name(self) -> str: ...
+
+ @property
+ def vendor_name(self) -> str: ...
+
+
+class UnknownExtensionType(ExtensionType):
+ def __init__(self, storage_type: DataType, serialized: bytes) -> None: ...
+
+
+def register_extension_type(ext_type: ExtensionType) -> None: ...
+
+
+def unregister_extension_type(type_name: str) -> None: ...
+
+
+class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
+ def __init__(
+ self, __arg0__: Mapping[str | bytes, str | bytes]
+ | Iterable[tuple[str, str]]
+ | KeyValueMetadata
+ | None = None, **kwargs: str
+ ) -> None: ...
+
+ def equals(self, other: KeyValueMetadata) -> bool: ...
+
+ def __len__(self) -> int: ...
+
+ def __contains__(self, /, __key: object) -> bool: ... # type: ignore[override]
+
+ def __getitem__(self, /, __key: Any) -> Any: ... # type: ignore[override]
+
+ def __iter__(self) -> Iterator[bytes]: ...
+
+ def get_all(self, key: str) -> list[bytes]: ...
+
+ def to_dict(self) -> dict[bytes, bytes]: ...
+
+
+class Field(_Weakrefable, Generic[_DataTypeT]):
+ def equals(self, other: Field, check_metadata: bool = False) -> bool: ...
+
+ def __hash__(self) -> int: ...
+
+ @property
+ def nullable(self) -> bool: ...
+
+ @property
+ def name(self) -> str: ...
+
+ @property
+ def metadata(self) -> dict[bytes, bytes] | None: ...
+
+ @property
+ def type(self) -> _DataTypeT: ...
+ def with_metadata(self, metadata: dict[bytes | str, bytes | str] |
+ Mapping[bytes | str, bytes | str] | Any) -> Self: ...
+
+ def remove_metadata(self) -> Self: ...
+
+ def with_type(self, new_type: DataType) -> Field: ...
+
+ def with_name(self, name: str) -> Self: ...
+
+ def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ...
+
+ def flatten(self) -> list[Field]: ...
+
+ def _export_to_c(self, out_ptr: int) -> None: ...
+
+ @classmethod
+ def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+ def __arrow_c_schema__(self) -> Any: ...
+
+ @classmethod
+ def _import_from_c_capsule(cls, schema) -> Self: ...
+
+
+class Schema(_Weakrefable):
+ def __len__(self) -> int: ...
+
+ def __getitem__(self, key: str | int) -> Field: ...
+
+ _field = __getitem__
+ def __iter__(self) -> Iterator[Field]: ...
+
+ def __hash__(self) -> int: ...
+
+ def __sizeof__(self) -> int: ...
+ @property
+ def pandas_metadata(self) -> dict: ...
+
+ @property
+ def names(self) -> list[str]: ...
+
+ @property
+ def types(self) -> list[DataType]: ...
+
+ @property
+ def metadata(self) -> dict[bytes, bytes]: ...
+
+ def empty_table(self) -> Table: ...
+
+ def equals(self, other: Schema, check_metadata: bool = False) -> bool: ...
+
+ @classmethod
+ def from_pandas(cls, df: pd.DataFrame, preserve_index: bool |
+ None = None) -> Schema: ...
+
+ def field(self, i: int | str | bytes) -> Field: ...
+
+ @deprecated("Use 'field' instead")
+ def field_by_name(self, name: str) -> Field: ...
+
+ def get_field_index(self, name: str) -> int: ...
+
+ def get_all_field_indices(self, name: str) -> list[int]: ...
+
+ def append(self, field: Field) -> Schema: ...
+
+ def insert(self, i: int, field: Field) -> Schema: ...
+
+ def remove(self, i: int) -> Schema: ...
+
+ def set(self, i: int, field: Field) -> Schema: ...
+
+ @deprecated("Use 'with_metadata' instead")
+ def add_metadata(self, metadata: dict) -> Schema: ...
+
+ def with_metadata(self, metadata: dict) -> Schema: ...
+
+ def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+ def remove_metadata(self) -> Schema: ...
+
+ def to_string(
+ self,
+ truncate_metadata: bool = True,
+ show_field_metadata: bool = True,
+ show_schema_metadata: bool = True,
+ element_size_limit: int | None = None,
+ ) -> str: ...
+
+ def _export_to_c(self, out_ptr: int) -> None: ...
+
+ @classmethod
+ def _import_from_c(cls, in_ptr: int) -> Schema: ...
+
+ def __arrow_c_schema__(self) -> Any: ...
+
+ @staticmethod
+ def _import_from_c_capsule(schema: Any) -> Schema: ...
+
+
+def unify_schemas(
+ schemas: Sequence[Schema],
+ *,
+ promote_options: Literal["default", "permissive"] = "default"
+) -> Schema: ...
+
+
+def field(
+ name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None,
+ nullable: bool = ...,
+ metadata: dict[Any, Any] | None = None
+) -> Field[_DataTypeT] | Field[Any]: ...
+
+
+def null() -> NullType: ...
+
+
+def bool_() -> BoolType: ...
+
+
+def uint8() -> UInt8Type: ...
+
+
+def int8() -> Int8Type: ...
+
+
+def uint16() -> UInt16Type: ...
+
+
+def int16() -> Int16Type: ...
+
+
+def uint32() -> UInt32Type: ...
+
+
+def int32() -> Int32Type: ...
+
+
+def int64() -> Int64Type: ...
+
+
+def uint64() -> UInt64Type: ...
+
+
+def timestamp(
+ unit: _Unit | str, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ...
+
+
+def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ...
+
+
+def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ...
+
+
+def duration(unit: _Unit | str) -> DurationType[_Unit]: ...
+
+
+def month_day_nano_interval() -> MonthDayNanoIntervalType: ...
+
+
+def date32() -> Date32Type: ...
+
+
+def date64() -> Date64Type: ...
+
+
+def float16() -> Float16Type: ...
+
+
+def float32() -> Float32Type: ...
+
+
+def float64() -> Float64Type: ...
+
+
+def decimal32(precision: _Precision, scale: _Scale |
+ None = None) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal64(precision: _Precision, scale: _Scale |
+ None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal128(precision: _Precision, scale: _Scale |
+ None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal256(precision: _Precision, scale: _Scale |
+ None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def string() -> StringType: ...
+
+
+utf8 = string
+
+
+def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ...
+
+
+def large_binary() -> LargeBinaryType: ...
+
+
+def large_string() -> LargeStringType: ...
+
+
+large_utf8 = large_string
+
+
+def binary_view() -> BinaryViewType: ...
+
+
+def string_view() -> StringViewType: ...
+
+
+def list_(
+ value_type: _DataTypeT | Field[_DataTypeT] | None = None,
+ list_size: Literal[-1] | _Size | None = None
+) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ...
+
+
+def large_list(value_type: _DataTypeT |
+ Field[_DataTypeT] | None = None) -> LargeListType[_DataTypeT]: ...
+
+
+def list_view(value_type: _DataTypeT |
+ Field[_DataTypeT] | None = None) -> ListViewType[_DataTypeT]: ...
+
+
+def large_list_view(
+ value_type: _DataTypeT | Field[_DataTypeT] | None = None
+) -> LargeListViewType[_DataTypeT]: ...
+
+
+def map_(
+ key_type: _K | Field | str | None = None,
+ item_type: _ValueT | Field | str | None = None,
+ keys_sorted: bool | None = None
+) -> MapType[_K, _ValueT, Literal[False]]: ...
+
+
+def dictionary(
+ index_type: _IndexT | str,
+ value_type: _BasicValueT | str,
+ ordered: _Ordered | None = None
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
+
+
+def struct(
+ fields: Iterable[
+ Field[Any]
+ | tuple[str, Field[Any] | None]
+ | tuple[str, DataType | None]
+ ] | Mapping[str, Field[Any] | DataType | None],
+) -> StructType: ...
+
+
+def sparse_union(
+ child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> SparseUnionType: ...
+
+
+def dense_union(
+ child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> DenseUnionType: ...
+
+
+def union(
+ child_fields: list[Field[Any]], mode: Literal["sparse", "dense"] | int | str,
+ type_codes: list[int] | None = None) -> SparseUnionType | DenseUnionType: ...
+
+
+def run_end_encoded(
+ run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None
+) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ...
+
+
+def json_(storage_type: DataType = ...) -> JsonType: ...
+
+
+def uuid() -> UuidType: ...
+
+
+def fixed_shape_tensor(
+ value_type: _ValueT,
+ shape: Sequence[int],
+ dim_names: Sequence[str] | None = None,
+ permutation: Sequence[int] | None = None,
+) -> FixedShapeTensorType[_ValueT]: ...
+
+
+def bool8() -> Bool8Type: ...
+
+
+def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ...
+
+
+def type_for_alias(name: Any) -> DataType: ...
+
+
+def schema(
+ fields: (
+ Iterable[Field[Any]]
+ | Iterable[tuple[str, DataType | str | None]]
+ | Mapping[Any, DataType | str | None]
+ ),
+ metadata: Mapping[bytes, bytes]
+ | Mapping[str, str]
+ | Mapping[bytes, str]
+ | Mapping[str, bytes] | None = None,
+) -> Schema: ...
+
+
+def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ...
+
+
+__all__ = [
+ "_Weakrefable",
+ "_Metadata",
+ "DataType",
+ "_BasicDataType",
+ "NullType",
+ "BoolType",
+ "UInt8Type",
+ "Int8Type",
+ "UInt16Type",
+ "Int16Type",
+ "UInt32Type",
+ "Int32Type",
+ "UInt64Type",
+ "Int64Type",
+ "Float16Type",
+ "Float32Type",
+ "Float64Type",
+ "Date32Type",
+ "Date64Type",
+ "MonthDayNanoIntervalType",
+ "StringType",
+ "LargeStringType",
+ "StringViewType",
+ "BinaryType",
+ "LargeBinaryType",
+ "BinaryViewType",
+ "TimestampType",
+ "Time32Type",
+ "Time64Type",
+ "DurationType",
+ "FixedSizeBinaryType",
+ "Decimal32Type",
+ "Decimal64Type",
+ "Decimal128Type",
+ "Decimal256Type",
+ "ListType",
+ "LargeListType",
+ "ListViewType",
+ "LargeListViewType",
+ "FixedSizeListType",
+ "DictionaryMemo",
+ "DictionaryType",
+ "MapType",
+ "StructType",
+ "UnionType",
+ "SparseUnionType",
+ "DenseUnionType",
+ "RunEndEncodedType",
+ "BaseExtensionType",
+ "ExtensionType",
+ "FixedShapeTensorType",
+ "Bool8Type",
+ "UuidType",
+ "JsonType",
+ "OpaqueType",
+ "UnknownExtensionType",
+ "register_extension_type",
+ "unregister_extension_type",
+ "KeyValueMetadata",
+ "Field",
+ "Schema",
+ "unify_schemas",
+ "field",
+ "null",
+ "bool_",
+ "uint8",
+ "int8",
+ "uint16",
+ "int16",
+ "uint32",
+ "int32",
+ "int64",
+ "uint64",
+ "timestamp",
+ "time32",
+ "time64",
+ "duration",
+ "month_day_nano_interval",
+ "date32",
+ "date64",
+ "float16",
+ "float32",
+ "float64",
+ "decimal32",
+ "decimal64",
+ "decimal128",
+ "decimal256",
+ "string",
+ "utf8",
+ "binary",
+ "large_binary",
+ "large_string",
+ "large_utf8",
+ "binary_view",
+ "string_view",
+ "list_",
+ "large_list",
+ "list_view",
+ "large_list_view",
+ "map_",
+ "dictionary",
+ "struct",
+ "sparse_union",
+ "dense_union",
+ "union",
+ "run_end_encoded",
+ "json_",
+ "uuid",
+ "fixed_shape_tensor",
+ "bool8",
+ "opaque",
+ "type_for_alias",
+ "schema",
+ "from_numpy_dtype",
+ "_Unit",
+ "_Tz",
+ "_Time32Unit",
+ "_Time64Unit",
+ "_DataTypeT",
+]
diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi
new file mode 100644
index 00000000000..eac936afcb5
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/error.pyi
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+ from typing import Self
+else:
+ from typing_extensions import Self
+
+
+class ArrowException(Exception):
+ ...
+
+
+class ArrowInvalid(ValueError, ArrowException):
+ ...
+
+
+class ArrowMemoryError(MemoryError, ArrowException):
+ ...
+
+
+class ArrowKeyError(KeyError, ArrowException):
+ ...
+
+
+class ArrowTypeError(TypeError, ArrowException):
+ ...
+
+
+class ArrowNotImplementedError(NotImplementedError, ArrowException):
+ ...
+
+
+class ArrowCapacityError(ArrowException):
+ ...
+
+
+class ArrowIndexError(IndexError, ArrowException):
+ ...
+
+
+class ArrowSerializationError(ArrowException):
+ ...
+
+
+class ArrowCancelled(ArrowException):
+ signum: int | None
+ def __init__(self, message: str, signum: int | None = None) -> None: ...
+
+
+ArrowIOError = IOError
+
+
+class StopToken:
+ ...
+
+
+def enable_signal_handlers(enable: bool) -> None: ...
+
+
+have_signal_refcycle: bool
+
+
+class SignalStopHandler:
+ def __enter__(self) -> Self: ...
+ def __exit__(self, exc_type, exc_value, exc_tb) -> None: ...
+ def __dealloc__(self) -> None: ...
+ @property
+ def stop_token(self) -> StopToken: ...
+
+
+__all__ = [
+ "ArrowException",
+ "ArrowInvalid",
+ "ArrowMemoryError",
+ "ArrowKeyError",
+ "ArrowTypeError",
+ "ArrowNotImplementedError",
+ "ArrowCapacityError",
+ "ArrowIndexError",
+ "ArrowSerializationError",
+ "ArrowCancelled",
+ "ArrowIOError",
+ "StopToken",
+ "enable_signal_handlers",
+ "have_signal_refcycle",
+ "SignalStopHandler",
+]
diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi
new file mode 100644
index 00000000000..467ec48cc76
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/io.pyi
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Placeholder stub - complete annotations in future PR."""
+
+from typing import Any
+
+def __getattr__(name: str) -> Any: ...
diff --git a/python/pyarrow-stubs/pyarrow/lib.pyi b/python/pyarrow-stubs/pyarrow/lib.pyi
new file mode 100644
index 00000000000..775434be2ea
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/lib.pyi
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Placeholder stub for pyarrow.lib C extension module.
+
+Complete type annotations will be added in subsequent PRs.
+"""
+
+from typing import Any
+
+def __getattr__(name: str) -> Any: ...
diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi
new file mode 100644
index 00000000000..467ec48cc76
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/scalar.pyi
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Placeholder stub - complete annotations in future PR."""
+
+from typing import Any
+
+def __getattr__(name: str) -> Any: ...
diff --git a/python/pyarrow/py.typed b/python/pyarrow/py.typed
new file mode 100644
index 00000000000..13a83393a91
--- /dev/null
+++ b/python/pyarrow/py.typed
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 0a730fd4f78..aed9b391e8c 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -84,11 +84,11 @@ zip-safe=false
include-package-data=true
[tool.setuptools.packages.find]
-include = ["pyarrow"]
+include = ["pyarrow", "pyarrow.*"]
namespaces = false
[tool.setuptools.package-data]
-pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"]
+pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd", "py.typed"]
[tool.setuptools_scm]
root = '..'
@@ -96,3 +96,20 @@ version_file = 'pyarrow/_generated_version.py'
version_scheme = 'guess-next-dev'
git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
fallback_version = '23.0.0a0'
+
+# TODO: Enable type checking once stubs are merged
+[tool.mypy]
+files = ["pyarrow-stubs"]
+mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs"
+
+# TODO: Enable type checking once stubs are merged
+[tool.pyright]
+pythonPlatform = "All"
+pythonVersion = "3.10"
+include = ["pyarrow-stubs"]
+stubPath = "pyarrow-stubs"
+typeCheckingMode = "basic"
+
+# TODO: Enable type checking once stubs are merged
+[tool.ty.src]
+include = ["pyarrow-stubs"]
diff --git a/python/setup.py b/python/setup.py
index a27bd3baefd..a25d2d76b36 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -121,8 +121,35 @@ def build_extensions(self):
def run(self):
self._run_cmake()
+ self._copy_stubs()
_build_ext.run(self)
+ def _copy_stubs(self):
+ """Copy .pyi stub files from pyarrow-stubs to the build directory."""
+ build_cmd = self.get_finalized_command('build')
+ build_lib = os.path.abspath(build_cmd.build_lib)
+
+ stubs_src = pjoin(setup_dir, 'pyarrow-stubs', 'pyarrow')
+ stubs_dest = pjoin(build_lib, 'pyarrow')
+
+ if os.path.exists(stubs_src):
+ print(f"-- Copying stub files from {stubs_src} to {stubs_dest}")
+ for root, dirs, files in os.walk(stubs_src):
+ # Calculate relative path from stubs_src
+ rel_dir = os.path.relpath(root, stubs_src)
+ dest_dir = pjoin(stubs_dest, rel_dir) if rel_dir != '.' else stubs_dest
+
+ # Create destination directory if needed
+ if not os.path.exists(dest_dir):
+ os.makedirs(dest_dir)
+
+ # Copy .pyi files
+ for file in files:
+ if file.endswith('.pyi'):
+ src_file = pjoin(root, file)
+ dest_file = pjoin(dest_dir, file)
+ shutil.copy2(src_file, dest_file)
+
# adapted from cmake_build_ext in dynd-python
# github.com/libdynd/dynd-python