From 3b49f62b04e1515f67f3f3ec7839a354a96975c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Sun, 7 Dec 2025 18:28:39 -0500
Subject: [PATCH 01/20] Fix StringDType helper declaration and initialize UTF8

---
 .../pyarrow/src/arrow/python/numpy_convert.cc |  13 ++
 .../pyarrow/src/arrow/python/numpy_convert.h  |   2 +
 .../src/arrow/python/numpy_to_arrow.cc        | 126 ++++++++++++++++++
 python/pyarrow/tests/test_array.py            |  36 +++++
 4 files changed, 177 insertions(+)
diff --git a/python/pyarrow/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/arrow/python/numpy_convert.cc
index 4113cc67d2f..d5faef66193 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.cc
+++ b/python/pyarrow/src/arrow/python/numpy_convert.cc
@@ -122,6 +122,15 @@ Result<std::shared_ptr<DataType>> NumPyScalarToArrowDataType(PyObject* scalar) {
   return NumPyDtypeToArrow(descr);
 }
 
+#if NPY_ABI_VERSION >= 0x02000000
+bool IsStringDType(PyArray_Descr* descr) {
+  // NumPy's variable-width StringDType exposes a dedicated dtype number.
+  return descr != nullptr && descr->type_num == NPY_VSTRING;
+}
+#else
+bool IsStringDType(PyArray_Descr* /*descr*/) { return false; }
+#endif
+
 Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyObject* dtype) {
   if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) {
     return Status::TypeError("Did not pass numpy.dtype object");
@@ -133,6 +142,10 @@ Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyObject* dtype) {
 Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyArray_Descr* descr) {
   int type_num = fix_numpy_type_num(descr->type_num);
 
+  if (IsStringDType(descr)) {
+    return utf8();
+  }
+
   switch (type_num) {
     TO_ARROW_TYPE_CASE(BOOL, boolean);
     TO_ARROW_TYPE_CASE(INT8, int8);
diff --git a/python/pyarrow/src/arrow/python/numpy_convert.h b/python/pyarrow/src/arrow/python/numpy_convert.h
index 2d1086e1355..cac389d17a1 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.h
+++ b/python/pyarrow/src/arrow/python/numpy_convert.h
@@ -55,6 +55,8 @@ Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyArray_Descr* descr);
 ARROW_PYTHON_EXPORT
 Result<std::shared_ptr<DataType>> NumPyScalarToArrowDataType(PyObject* scalar);
 
+ARROW_PYTHON_EXPORT bool IsStringDType(PyArray_Descr* descr);
+
 ARROW_PYTHON_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
                                            const std::vector<std::string>& dim_names,
                                            std::shared_ptr<Tensor>* out);
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 5647e895d0f..b4598d4f3b6 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -27,6 +27,7 @@
 #include <limits>
 #include <memory>
 #include <string>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -43,6 +44,7 @@
 #include "arrow/util/endian.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/scope_guard.h"
 #include "arrow/util/string.h"
 #include "arrow/util/utf8.h"
 #include "arrow/visit_type_inline.h"
@@ -59,6 +61,10 @@
 #include "arrow/python/type_traits.h"
 #include "arrow/python/vendored/pythoncapi_compat.h"
 
+#if NPY_ABI_VERSION >= 0x02000000
+#include <numpy/strings.h>
+#endif
+
 namespace arrow {
 
 using internal::checked_cast;
@@ -233,6 +239,13 @@ class NumPyConverter {
   Status Visit(const LargeStringType& type);
   Status Visit(const StringViewType& type);
 
+#if NPY_ABI_VERSION >= 0x02000000
+  template <typename Builder>
+  Status AppendStringDTypeValues(Builder* builder);
+
+  Status ConvertStringDType();
+#endif
+
   Status Visit(const StructType& type);
 
   Status Visit(const FixedSizeBinaryType& type);
@@ -338,6 +351,25 @@ Status NumPyConverter::Convert() {
     return Status::OK();
   }
 
+  if (IsStringDType(dtype_)) {
+#if NPY_ABI_VERSION >= 0x02000000
+    RETURN_NOT_OK(ConvertStringDType());
+    return Status::OK();
+#else
+    // Fall back to the generic Python sequence conversion path when the StringDType
+    // C API is unavailable.
+    PyConversionOptions py_options;
+    py_options.type = type_;
+    py_options.from_pandas = from_pandas_;
+    ARROW_ASSIGN_OR_RAISE(
+        auto chunked_array,
+        ConvertPySequence(reinterpret_cast<PyObject*>(arr_),
+                          reinterpret_cast<PyObject*>(mask_), py_options, pool_));
+    out_arrays_ = chunked_array->chunks();
+    return Status::OK();
+#endif
+  }
+
   if (type_ == nullptr) {
     return Status::Invalid("Must pass data type for non-object arrays");
   }
@@ -815,6 +847,100 @@ Status NumPyConverter::Visit(const StringViewType& type) {
   return Status::OK();
 }
 
+#if NPY_ABI_VERSION >= 0x02000000
+
+template <typename Builder>
+Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
+  auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
+
+  PyAcquireGIL gil_lock;
+
+  npy_string_allocator* allocator = NpyString_acquire_allocator(descr);
+  if (allocator == nullptr) {
+    return Status::Invalid("Failed to acquire NumPy StringDType allocator");
+  }
+
+  auto release_allocator = ::arrow::internal::MakeScopeGuard(
+      [&]() { NpyString_release_allocator(allocator); });
+
+  npy_static_string value = {0, nullptr};
+
+  auto append_value = [&](const npy_packed_static_string* packed) -> Status {
+    int rc = NpyString_load(allocator, packed, &value);
+    if (rc == -1) {
+      RETURN_IF_PYERROR();
+      return Status::Invalid("Failed to unpack NumPy StringDType value");
+    }
+    if (rc == 1) {
+      return builder->AppendNull();
+    }
+    return builder->Append(std::string_view{value.buf, value.size});
+  };
+
+  char* data = PyArray_BYTES(arr_);
+
+  if (mask_ != nullptr) {
+    Ndarray1DIndexer<uint8_t> mask_values(mask_);
+    for (int64_t i = 0; i < length_; ++i) {
+      if (mask_values[i]) {
+        RETURN_NOT_OK(builder->AppendNull());
+      } else {
+        const auto* packed =
+            reinterpret_cast<const npy_packed_static_string*>(data + i * stride_);
+        RETURN_NOT_OK(append_value(packed));
+      }
+    }
+  } else {
+    for (int64_t i = 0; i < length_; ++i) {
+      const auto* packed = reinterpret_cast<const npy_packed_static_string*>(data);
+      RETURN_NOT_OK(append_value(packed));
+      data += stride_;
+    }
+  }
+
+  return Status::OK();
+}
+
+Status NumPyConverter::ConvertStringDType() {
+  util::InitializeUTF8();
+
+  if (type_ == nullptr) {
+    type_ = utf8();
+  }
+
+  switch (type_->id()) {
+    case Type::STRING: {
+      internal::ChunkedStringBuilder builder(kBinaryChunksize, pool_);
+      RETURN_NOT_OK(builder.Reserve(length_));
+      RETURN_NOT_OK(AppendStringDTypeValues(&builder));
+
+      ArrayVector chunks;
+      RETURN_NOT_OK(builder.Finish(&chunks));
+      for (const auto& chunk : chunks) {
+        RETURN_NOT_OK(PushArray(chunk->data()));
+      }
+      return Status::OK();
+    }
+    case Type::LARGE_STRING: {
+      LargeStringBuilder builder(pool_);
+      RETURN_NOT_OK(builder.Reserve(length_));
+      RETURN_NOT_OK(AppendStringDTypeValues(&builder));
+      return PushBuilderResult(&builder);
+    }
+    case Type::STRING_VIEW: {
+      StringViewBuilder builder(pool_);
+      RETURN_NOT_OK(builder.Reserve(length_));
+      RETURN_NOT_OK(AppendStringDTypeValues(&builder));
+      return PushBuilderResult(&builder);
+    }
+    default:
+      return Status::TypeError(
+          "NumPy StringDType can only be converted to Arrow string types");
+  }
+}
+
+#endif
+
 Status NumPyConverter::Visit(const StructType& type) {
   std::vector<NumPyConverter> sub_converters;
   std::vector<OwnedRefNoGIL> sub_arrays;
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index ec361159c5f..a83e65bdf1c 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2758,6 +2758,42 @@ def test_array_from_numpy_unicode(string_type):
     assert arrow_arr.equals(expected)
 
 
+@pytest.mark.numpy
+def test_array_from_numpy_string_dtype():
+    StringDType = getattr(np.dtypes, "StringDType", None)
+    if StringDType is None:
+        pytest.skip("NumPy StringDType not available")
+
+    arr = np.array(["some", "strings"], dtype=StringDType())
+
+    arrow_arr = pa.array(arr)
+
+    assert arrow_arr.type == pa.utf8()
+    assert arrow_arr.to_pylist() == ["some", "strings"]
+
+    arrow_arr = pa.array(arr, type=pa.large_string())
+    assert arrow_arr.type == pa.large_string()
+    assert arrow_arr.to_pylist() == ["some", "strings"]
+
+
+@pytest.mark.numpy
+def test_array_from_numpy_string_dtype_nulls_and_mask():
+    StringDType = getattr(np.dtypes, "StringDType", None)
+    if StringDType is None:
+        pytest.skip("NumPy StringDType not available")
+
+    dtype = StringDType(na_object=None)
+    arr = np.array(["this array has", None, "as an entry"], dtype=dtype)
+
+    arrow_arr = pa.array(arr)
+    assert arrow_arr.type == pa.utf8()
+    assert arrow_arr.to_pylist() == ["this array has", None, "as an entry"]
+
+    mask = np.array([False, True, False])
+    arrow_arr = pa.array(arr, mask=mask)
+    assert arrow_arr.to_pylist() == ["this array has", None, None]
+
+
 @pytest.mark.numpy
 def test_array_string_from_non_string():
     # ARROW-5682 - when converting to string raise on non string-like dtype

From 6e4c3c64c4278fa3138320370deec07d05476720 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Sun, 7 Dec 2025 19:28:45 -0500
Subject: [PATCH 02/20] Fix NumPy string dtype allocator guard

---
 python/pyarrow/src/arrow/python/numpy_to_arrow.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index b4598d4f3b6..5a6be35f5f0 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -44,7 +44,6 @@
 #include "arrow/util/endian.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/scope_guard.h"
 #include "arrow/util/string.h"
 #include "arrow/util/utf8.h"
 #include "arrow/visit_type_inline.h"
@@ -860,8 +859,8 @@ Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
     return Status::Invalid("Failed to acquire NumPy StringDType allocator");
   }
 
-  auto release_allocator = ::arrow::internal::MakeScopeGuard(
-      [&]() { NpyString_release_allocator(allocator); });
+  std::unique_ptr<npy_string_allocator, decltype(&NpyString_release_allocator)>
+      allocator_guard(allocator, &NpyString_release_allocator);
 
   npy_static_string value = {0, nullptr};
 

From a90ea23f5f006e3b07c9bf7d54de5c186a0b88a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Sun, 7 Dec 2025 21:45:34 -0500
Subject: [PATCH 03/20] Remove StringDType header comment

---
 python/pyarrow/src/arrow/python/numpy_to_arrow.cc | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 5a6be35f5f0..7e624c62751 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -60,10 +60,6 @@
 #include "arrow/python/type_traits.h"
 #include "arrow/python/vendored/pythoncapi_compat.h"
 
-#if NPY_ABI_VERSION >= 0x02000000
-#include <numpy/strings.h>
-#endif
-
 namespace arrow {
 
 using internal::checked_cast;

From 8729eb3ca37413b60c3aa3c86bfda8481e1d4319 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Sun, 7 Dec 2025 22:36:49 -0500
Subject: [PATCH 04/20] Format numpy_to_arrow include

---
 .../src/arrow/python/numpy_to_arrow.cc        | 22 ++++++++++++++-----
 python/pyarrow/tests/test_array.py            | 14 ++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 7e624c62751..c6e9e549f14 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -60,6 +60,12 @@
 #include "arrow/python/type_traits.h"
 #include "arrow/python/vendored/pythoncapi_compat.h"
 
+#if NPY_ABI_VERSION >= 0x02000000
+// Needed for NpyString_acquire_allocator / NpyString_load /
+// NpyString_release_allocator
+#  include <numpy/arrayobject.h>
+#endif
+
 namespace arrow {
 
 using internal::checked_cast;
@@ -848,22 +854,26 @@ template <typename Builder>
 Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
   auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
 
-  PyAcquireGIL gil_lock;
-
   npy_string_allocator* allocator = NpyString_acquire_allocator(descr);
   if (allocator == nullptr) {
     return Status::Invalid("Failed to acquire NumPy StringDType allocator");
   }
 
-  std::unique_ptr<npy_string_allocator, decltype(&NpyString_release_allocator)>
-      allocator_guard(allocator, &NpyString_release_allocator);
+  struct AllocatorGuard {
+    npy_string_allocator* ptr;
+    explicit AllocatorGuard(npy_string_allocator* p) : ptr(p) {}
+    ~AllocatorGuard() {
+      if (ptr != nullptr) {
+        NpyString_release_allocator(ptr);
+      }
+    }
+  } guard(allocator);
 
   npy_static_string value = {0, nullptr};
 
   auto append_value = [&](const npy_packed_static_string* packed) -> Status {
     int rc = NpyString_load(allocator, packed, &value);
     if (rc == -1) {
-      RETURN_IF_PYERROR();
       return Status::Invalid("Failed to unpack NumPy StringDType value");
     }
     if (rc == 1) {
@@ -905,7 +915,7 @@ Status NumPyConverter::ConvertStringDType() {
 
   switch (type_->id()) {
     case Type::STRING: {
-      internal::ChunkedStringBuilder builder(kBinaryChunksize, pool_);
+      arrow::internal::ChunkedStringBuilder builder(kBinaryChunksize, pool_);
       RETURN_NOT_OK(builder.Reserve(length_));
       RETURN_NOT_OK(AppendStringDTypeValues(&builder));
 
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index a83e65bdf1c..987c9f6621b 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2771,10 +2771,24 @@ def test_array_from_numpy_string_dtype():
     assert arrow_arr.type == pa.utf8()
     assert arrow_arr.to_pylist() == ["some", "strings"]
 
+    arrow_arr = pa.array(arr, type=pa.string())
+    assert arrow_arr.type == pa.string()
+    assert arrow_arr.to_pylist() == ["some", "strings"]
+
     arrow_arr = pa.array(arr, type=pa.large_string())
     assert arrow_arr.type == pa.large_string()
     assert arrow_arr.to_pylist() == ["some", "strings"]
 
+    arrow_arr = pa.array(arr, type=pa.string_view())
+    assert arrow_arr.type == pa.string_view()
+    assert arrow_arr.to_pylist() == ["some", "strings"]
+
+    arr_full = np.array(["a", "b", "c", "d", "e"], dtype=StringDType())
+    arr = arr_full[::2]
+    arrow_arr = pa.array(arr)
+    assert arrow_arr.type == pa.utf8()
+    assert arrow_arr.to_pylist() == ["a", "c", "e"]
+
 
 @pytest.mark.numpy
 def test_array_from_numpy_string_dtype_nulls_and_mask():

From f49ba675b6c55b6b7da283b3a33fb387359a2ad3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Sun, 7 Dec 2025 23:38:07 -0500
Subject: [PATCH 05/20] Run clang-format on numpy_to_arrow

---
 .../src/arrow/python/numpy_to_arrow.cc        | 38 +++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index c6e9e549f14..90d4a805d12 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -81,6 +81,37 @@ using internal::NumPyTypeSize;
 
 namespace {
 
+#if NPY_ABI_VERSION >= 0x02000000
+
+// NumPy exposes StringDType helpers in the C-API table from version 2.0 onward,
+// but the corresponding macros are only available when compiling against a
+// 2.0+ feature level. Arrow still targets an older feature level, so provide
+// local wrappers that call the C-API entries directly.
+
+inline npy_string_allocator* ArrowNpyString_acquire_allocator(
+    const PyArray_StringDTypeObject* descr) {
+  using Func = npy_string_allocator* (*)(const PyArray_StringDTypeObject*);
+  auto func = reinterpret_cast<Func>(PyArray_API[316]);
+  return func(descr);
+}
+
+inline void ArrowNpyString_release_allocator(npy_string_allocator* allocator) {
+  using Func = void (*)(npy_string_allocator*);
+  auto func = reinterpret_cast<Func>(PyArray_API[318]);
+  func(allocator);
+}
+
+inline int ArrowNpyString_load(npy_string_allocator* allocator,
+                               const npy_packed_static_string* packed,
+                               npy_static_string* out) {
+  using Func =
+      int (*)(npy_string_allocator*, const npy_packed_static_string*, npy_static_string*);
+  auto func = reinterpret_cast<Func>(PyArray_API[313]);
+  return func(allocator, packed, out);
+}
+
+#endif  // NPY_ABI_VERSION >= 0x02000000
+
 Status AllocateNullBitmap(MemoryPool* pool, int64_t length,
                           std::shared_ptr<ResizableBuffer>* out) {
   int64_t null_bytes = bit_util::BytesForBits(length);
@@ -854,7 +885,7 @@ template <typename Builder>
 Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
   auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
 
-  npy_string_allocator* allocator = NpyString_acquire_allocator(descr);
+  npy_string_allocator* allocator = ArrowNpyString_acquire_allocator(descr);
   if (allocator == nullptr) {
     return Status::Invalid("Failed to acquire NumPy StringDType allocator");
   }
@@ -864,7 +895,7 @@ Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
     explicit AllocatorGuard(npy_string_allocator* p) : ptr(p) {}
     ~AllocatorGuard() {
       if (ptr != nullptr) {
-        NpyString_release_allocator(ptr);
+        ArrowNpyString_release_allocator(ptr);
       }
     }
   } guard(allocator);
@@ -872,8 +903,9 @@ Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
   npy_static_string value = {0, nullptr};
 
   auto append_value = [&](const npy_packed_static_string* packed) -> Status {
-    int rc = NpyString_load(allocator, packed, &value);
+    int rc = ArrowNpyString_load(allocator, packed, &value);
     if (rc == -1) {
+      RETURN_IF_PYERROR();
       return Status::Invalid("Failed to unpack NumPy StringDType value");
     }
     if (rc == 1) {

From 050ca867ad1a74d9b98f2aa1c321fc359562f875 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Mon, 8 Dec 2025 00:24:03 -0500
Subject: [PATCH 06/20] Handle missing NumPy dtypes module in StringDType tests

---
 python/pyarrow/tests/test_array.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 987c9f6621b..f4d85904b3a 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2760,11 +2760,17 @@ def test_array_from_numpy_unicode(string_type):
 
 @pytest.mark.numpy
 def test_array_from_numpy_string_dtype():
-    StringDType = getattr(np.dtypes, "StringDType", None)
+    dtypes_mod = getattr(np, "dtypes", None)
+    if dtypes_mod is None:
+        pytest.skip("NumPy dtypes module not available")
+
+    StringDType = getattr(dtypes_mod, "StringDType", None)
     if StringDType is None:
         pytest.skip("NumPy StringDType not available")
 
-    arr = np.array(["some", "strings"], dtype=StringDType())
+    dtype = StringDType()
+
+    arr = np.array(["some", "strings"], dtype=dtype)
 
     arrow_arr = pa.array(arr)
 
@@ -2783,7 +2789,7 @@ def test_array_from_numpy_string_dtype():
     assert arrow_arr.type == pa.string_view()
     assert arrow_arr.to_pylist() == ["some", "strings"]
 
-    arr_full = np.array(["a", "b", "c", "d", "e"], dtype=StringDType())
+    arr_full = np.array(["a", "b", "c", "d", "e"], dtype=dtype)
     arr = arr_full[::2]
     arrow_arr = pa.array(arr)
     assert arrow_arr.type == pa.utf8()
@@ -2792,10 +2798,15 @@ def test_array_from_numpy_string_dtype():
 
 @pytest.mark.numpy
 def test_array_from_numpy_string_dtype_nulls_and_mask():
-    StringDType = getattr(np.dtypes, "StringDType", None)
+    dtypes_mod = getattr(np, "dtypes", None)
+    if dtypes_mod is None:
+        pytest.skip("NumPy dtypes module not available")
+
+    StringDType = getattr(dtypes_mod, "StringDType", None)
     if StringDType is None:
         pytest.skip("NumPy StringDType not available")
 
+    # Real StringDType, use its NA sentinel
     dtype = StringDType(na_object=None)
     arr = np.array(["this array has", None, "as an entry"], dtype=dtype)
 
@@ -2803,7 +2814,10 @@ def test_array_from_numpy_string_dtype_nulls_and_mask():
     assert arrow_arr.type == pa.utf8()
     assert arrow_arr.to_pylist() == ["this array has", None, "as an entry"]
 
-    mask = np.array([False, True, False])
+    # Test interplay of NA sentinel and an explicit mask:
+    # - index 1 is null because of na_object / Python None
+    # - index 2 is forced null by the mask
+    mask = np.array([False, False, True])
     arrow_arr = pa.array(arr, mask=mask)
     assert arrow_arr.to_pylist() == ["this array has", None, None]
 

From da255c9ec0f8ec0f09cede930064c508866e3faa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Fri, 12 Dec 2025 00:08:23 -0500
Subject: [PATCH 07/20] Make StringDType support unconditional

---
 .../pyarrow/src/arrow/python/numpy_convert.cc |  9 +-
 .../src/arrow/python/numpy_to_arrow.cc        | 93 +++++++------------
 python/pyarrow/tests/test_array.py            | 28 ++++++
 3 files changed, 64 insertions(+), 66 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/arrow/python/numpy_convert.cc
index d5faef66193..facad8adfc8 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.cc
+++ b/python/pyarrow/src/arrow/python/numpy_convert.cc
@@ -37,6 +37,10 @@
 namespace arrow {
 namespace py {
 
+#ifndef NPY_VSTRING
+#  define NPY_VSTRING 2056
+#endif
+
 NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
   PyAcquireGIL lock;
   arr_ = ao;
@@ -122,14 +126,9 @@ Result<std::shared_ptr<DataType>> NumPyScalarToArrowDataType(PyObject* scalar) {
   return NumPyDtypeToArrow(descr);
 }
 
-#if NPY_ABI_VERSION >= 0x02000000
 bool IsStringDType(PyArray_Descr* descr) {
-  // NumPy's variable-width StringDType exposes a dedicated dtype number.
   return descr != nullptr && descr->type_num == NPY_VSTRING;
 }
-#else
-bool IsStringDType(PyArray_Descr* /*descr*/) { return false; }
-#endif
 
 Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyObject* dtype) {
   if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) {
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 90d4a805d12..e39fdadea2f 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -60,11 +60,7 @@
 #include "arrow/python/type_traits.h"
 #include "arrow/python/vendored/pythoncapi_compat.h"
 
-#if NPY_ABI_VERSION >= 0x02000000
-// Needed for NpyString_acquire_allocator / NpyString_load /
-// NpyString_release_allocator
-#  include <numpy/arrayobject.h>
-#endif
+#include <numpy/arrayobject.h>
 
 namespace arrow {
 
@@ -81,24 +77,15 @@ using internal::NumPyTypeSize;
 
 namespace {
 
-#if NPY_ABI_VERSION >= 0x02000000
-
-// NumPy exposes StringDType helpers in the C-API table from version 2.0 onward,
-// but the corresponding macros are only available when compiling against a
-// 2.0+ feature level. Arrow still targets an older feature level, so provide
-// local wrappers that call the C-API entries directly.
-
 inline npy_string_allocator* ArrowNpyString_acquire_allocator(
     const PyArray_StringDTypeObject* descr) {
   using Func = npy_string_allocator* (*)(const PyArray_StringDTypeObject*);
-  auto func = reinterpret_cast<Func>(PyArray_API[316]);
-  return func(descr);
+  return reinterpret_cast<Func>(PyArray_API[316])(descr);
 }
 
 inline void ArrowNpyString_release_allocator(npy_string_allocator* allocator) {
   using Func = void (*)(npy_string_allocator*);
-  auto func = reinterpret_cast<Func>(PyArray_API[318]);
-  func(allocator);
+  reinterpret_cast<Func>(PyArray_API[318])(allocator);
 }
 
 inline int ArrowNpyString_load(npy_string_allocator* allocator,
@@ -106,12 +93,9 @@ inline int ArrowNpyString_load(npy_string_allocator* allocator,
                                npy_static_string* out) {
   using Func =
       int (*)(npy_string_allocator*, const npy_packed_static_string*, npy_static_string*);
-  auto func = reinterpret_cast<Func>(PyArray_API[313]);
-  return func(allocator, packed, out);
+  return reinterpret_cast<Func>(PyArray_API[313])(allocator, packed, out);
 }
 
-#endif  // NPY_ABI_VERSION >= 0x02000000
-
 Status AllocateNullBitmap(MemoryPool* pool, int64_t length,
                           std::shared_ptr<ResizableBuffer>* out) {
   int64_t null_bytes = bit_util::BytesForBits(length);
@@ -271,12 +255,10 @@ class NumPyConverter {
   Status Visit(const LargeStringType& type);
   Status Visit(const StringViewType& type);
 
-#if NPY_ABI_VERSION >= 0x02000000
   template <typename Builder>
   Status AppendStringDTypeValues(Builder* builder);
 
   Status ConvertStringDType();
-#endif
 
   Status Visit(const StructType& type);
 
@@ -384,22 +366,8 @@ Status NumPyConverter::Convert() {
   }
 
   if (IsStringDType(dtype_)) {
-#if NPY_ABI_VERSION >= 0x02000000
     RETURN_NOT_OK(ConvertStringDType());
     return Status::OK();
-#else
-    // Fall back to the generic Python sequence conversion path when the StringDType
-    // C API is unavailable.
-    PyConversionOptions py_options;
-    py_options.type = type_;
-    py_options.from_pandas = from_pandas_;
-    ARROW_ASSIGN_OR_RAISE(
-        auto chunked_array,
-        ConvertPySequence(reinterpret_cast<PyObject*>(arr_),
-                          reinterpret_cast<PyObject*>(mask_), py_options, pool_));
-    out_arrays_ = chunked_array->chunks();
-    return Status::OK();
-#endif
   }
 
   if (type_ == nullptr) {
@@ -879,8 +847,6 @@ Status NumPyConverter::Visit(const StringViewType& type) {
   return Status::OK();
 }
 
-#if NPY_ABI_VERSION >= 0x02000000
-
 template <typename Builder>
 Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
   auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
@@ -901,19 +867,6 @@ Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
   } guard(allocator);
 
   npy_static_string value = {0, nullptr};
-
-  auto append_value = [&](const npy_packed_static_string* packed) -> Status {
-    int rc = ArrowNpyString_load(allocator, packed, &value);
-    if (rc == -1) {
-      RETURN_IF_PYERROR();
-      return Status::Invalid("Failed to unpack NumPy StringDType value");
-    }
-    if (rc == 1) {
-      return builder->AppendNull();
-    }
-    return builder->Append(std::string_view{value.buf, value.size});
-  };
-
   char* data = PyArray_BYTES(arr_);
 
   if (mask_ != nullptr) {
@@ -921,18 +874,38 @@ Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
     for (int64_t i = 0; i < length_; ++i) {
       if (mask_values[i]) {
         RETURN_NOT_OK(builder->AppendNull());
+        continue;
+      }
+
+      const auto* packed =
+          reinterpret_cast<const npy_packed_static_string*>(data + i * stride_);
+      const int is_null = ArrowNpyString_load(allocator, packed, &value);
+      if (is_null == -1) {
+        RETURN_IF_PYERROR();
+        return Status::Invalid("Failed to unpack NumPy StringDType value");
+      }
+      if (is_null) {
+        RETURN_NOT_OK(builder->AppendNull());
       } else {
-        const auto* packed =
-            reinterpret_cast<const npy_packed_static_string*>(data + i * stride_);
-        RETURN_NOT_OK(append_value(packed));
+        RETURN_NOT_OK(builder->Append(std::string_view{value.buf, value.size}));
       }
     }
-  } else {
-    for (int64_t i = 0; i < length_; ++i) {
-      const auto* packed = reinterpret_cast<const npy_packed_static_string*>(data);
-      RETURN_NOT_OK(append_value(packed));
-      data += stride_;
+    return Status::OK();
+  }
+
+  for (int64_t i = 0; i < length_; ++i) {
+    const auto* packed = reinterpret_cast<const npy_packed_static_string*>(data);
+    const int is_null = ArrowNpyString_load(allocator, packed, &value);
+    if (is_null == -1) {
+      RETURN_IF_PYERROR();
+      return Status::Invalid("Failed to unpack NumPy StringDType value");
+    }
+    if (is_null) {
+      RETURN_NOT_OK(builder->AppendNull());
+    } else {
+      RETURN_NOT_OK(builder->Append(std::string_view{value.buf, value.size}));
     }
+    data += stride_;
   }
 
   return Status::OK();
@@ -976,8 +949,6 @@ Status NumPyConverter::ConvertStringDType() {
   }
 }
 
-#endif
-
 Status NumPyConverter::Visit(const StructType& type) {
   std::vector<NumPyConverter> sub_converters;
   std::vector<OwnedRefNoGIL> sub_arrays;
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index f4d85904b3a..a7377477dbe 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2796,6 +2796,28 @@ def test_array_from_numpy_string_dtype():
     assert arrow_arr.to_pylist() == ["a", "c", "e"]
 
 
+@pytest.mark.numpy
+def test_numpy_stringdtype_thresholds_and_unicode():
+    dtypes_mod = getattr(np, "dtypes", None)
+    if dtypes_mod is None:
+        pytest.skip("NumPy dtypes module not available")
+
+    StringDType = getattr(dtypes_mod, "StringDType", None)
+    if StringDType is None:
+        pytest.skip("NumPy StringDType not available")
+
+    dtype = StringDType()
+
+    short = "hello"
+    medium = "a" * 100
+    long_ = "b" * 300
+    unicode_ = "árvíztűrő tükörfúrógép 🥐 你好"
+    long_unicode = "🥐" * 200
+
+    arr = np.array([short, medium, long_, unicode_, long_unicode], dtype=dtype)
+    assert pa.array(arr).to_pylist() == [short, medium, long_, unicode_, long_unicode]
+
+
 @pytest.mark.numpy
 def test_array_from_numpy_string_dtype_nulls_and_mask():
     dtypes_mod = getattr(np, "dtypes", None)
@@ -2822,6 +2844,12 @@ def test_array_from_numpy_string_dtype_nulls_and_mask():
     assert arrow_arr.to_pylist() == ["this array has", None, None]
 
 
+@pytest.mark.numpy
+def test_numpy_object_str_still_works():
+    arr_obj = np.array(["x", "y", None], dtype=object)
+    assert pa.array(arr_obj).to_pylist() == ["x", "y", None]
+
+
 @pytest.mark.numpy
 def test_array_string_from_non_string():
     # ARROW-5682 - when converting to string raise on non string-like dtype

From 80a3aca59adb658533c2406920f3de8299c702ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Fri, 12 Dec 2025 00:38:55 -0500
Subject: [PATCH 08/20] Remove StringDType endif comments

---
 python/pyarrow/src/arrow/python/numpy_to_arrow.cc | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index e39fdadea2f..b3e0dc0c17d 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -77,6 +77,7 @@ using internal::NumPyTypeSize;
 
 namespace {
 
+#ifdef npy_string_allocator
 inline npy_string_allocator* ArrowNpyString_acquire_allocator(
     const PyArray_StringDTypeObject* descr) {
   using Func = npy_string_allocator* (*)(const PyArray_StringDTypeObject*);
@@ -95,6 +96,7 @@ inline int ArrowNpyString_load(npy_string_allocator* allocator,
       int (*)(npy_string_allocator*, const npy_packed_static_string*, npy_static_string*);
   return reinterpret_cast<Func>(PyArray_API[313])(allocator, packed, out);
 }
+#endif
 
 Status AllocateNullBitmap(MemoryPool* pool, int64_t length,
                           std::shared_ptr<ResizableBuffer>* out) {
@@ -255,10 +257,12 @@ class NumPyConverter {
   Status Visit(const LargeStringType& type);
   Status Visit(const StringViewType& type);
 
+#ifdef npy_string_allocator
   template <typename Builder>
   Status AppendStringDTypeValues(Builder* builder);
 
   Status ConvertStringDType();
+#endif
 
   Status Visit(const StructType& type);
 
@@ -366,8 +370,13 @@ Status NumPyConverter::Convert() {
   }
 
   if (IsStringDType(dtype_)) {
+#ifdef npy_string_allocator
     RETURN_NOT_OK(ConvertStringDType());
     return Status::OK();
+#else
+    return Status::NotImplemented(
+        "NumPy StringDType requires building PyArrow with NumPy >= 2.0");
+#endif
   }
 
   if (type_ == nullptr) {
@@ -847,6 +856,7 @@ Status NumPyConverter::Visit(const StringViewType& type) {
   return Status::OK();
 }
 
+#ifdef npy_string_allocator
 template <typename Builder>
 Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
   auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
@@ -948,6 +958,7 @@ Status NumPyConverter::ConvertStringDType() {
           "NumPy StringDType can only be converted to Arrow string types");
   }
 }
+#endif
 
 Status NumPyConverter::Visit(const StructType& type) {
   std::vector<NumPyConverter> sub_converters;

From bef2c71b3d45baae280a4496cb78382a9ffd2e37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Fri, 12 Dec 2025 01:23:33 -0500
Subject: [PATCH 09/20] Add StringDType mask coverage and sentinel test

---
 .../src/arrow/python/numpy_to_arrow.cc        |  8 ++---
 python/pyarrow/tests/test_array.py            | 29 ++++++++++++++++---
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index b3e0dc0c17d..dfbdd25a026 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -77,7 +77,7 @@ using internal::NumPyTypeSize;
 
 namespace {
 
-#ifdef npy_string_allocator
+#if NPY_ABI_VERSION >= 0x02000000
 inline npy_string_allocator* ArrowNpyString_acquire_allocator(
     const PyArray_StringDTypeObject* descr) {
   using Func = npy_string_allocator* (*)(const PyArray_StringDTypeObject*);
@@ -257,7 +257,7 @@ class NumPyConverter {
   Status Visit(const LargeStringType& type);
   Status Visit(const StringViewType& type);
 
-#ifdef npy_string_allocator
+#if NPY_ABI_VERSION >= 0x02000000
   template <typename Builder>
   Status AppendStringDTypeValues(Builder* builder);
 
@@ -370,7 +370,7 @@ Status NumPyConverter::Convert() {
   }
 
   if (IsStringDType(dtype_)) {
-#ifdef npy_string_allocator
+#if NPY_ABI_VERSION >= 0x02000000
     RETURN_NOT_OK(ConvertStringDType());
     return Status::OK();
 #else
@@ -856,7 +856,7 @@ Status NumPyConverter::Visit(const StringViewType& type) {
   return Status::OK();
 }
 
-#ifdef npy_string_allocator
+#if NPY_ABI_VERSION >= 0x02000000
 template <typename Builder>
 Status NumPyConverter::AppendStringDTypeValues(Builder* builder) {
   auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(dtype_);
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index a7377477dbe..74ef81646ed 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2839,15 +2839,36 @@ def test_array_from_numpy_string_dtype_nulls_and_mask():
     # Test interplay of NA sentinel and an explicit mask:
     # - index 1 is null because of na_object / Python None
     # - index 2 is forced null by the mask
-    mask = np.array([False, False, True])
+    mask = np.array([False, False, True], dtype=bool)
     arrow_arr = pa.array(arr, mask=mask)
+    assert arrow_arr.type == pa.utf8()
+    assert arrow_arr.null_count == 2
     assert arrow_arr.to_pylist() == ["this array has", None, None]
 
+    mask = np.array([True, False, True], dtype=bool)
+    assert pa.array(arr, mask=mask).to_pylist() == [None, None, None]
+
 
 @pytest.mark.numpy
-def test_numpy_object_str_still_works():
-    arr_obj = np.array(["x", "y", None], dtype=object)
-    assert pa.array(arr_obj).to_pylist() == ["x", "y", None]
+def test_array_from_numpy_string_dtype_string_sentinel_and_mask():
+    dtypes_mod = getattr(np, "dtypes", None)
+    if dtypes_mod is None:
+        pytest.skip("NumPy dtypes module not available")
+
+    StringDType = getattr(dtypes_mod, "StringDType", None)
+    if StringDType is None:
+        pytest.skip("NumPy StringDType not available")
+
+    sentinel = "__placeholder__"
+    dtype = StringDType(na_object=sentinel)
+    arr = np.array(["this array has", sentinel, "as an entry"], dtype=dtype)
+
+    arrow_arr = pa.array(arr)
+    assert arrow_arr.type == pa.utf8()
+    assert arrow_arr.to_pylist() == ["this array has", None, "as an entry"]
+
+    mask = np.array([False, False, True], dtype=bool)
+    assert pa.array(arr, mask=mask).to_pylist() == ["this array has", None, None]
 
 
 @pytest.mark.numpy

From 7b48c9928ddb0136fe971ac9e155ad797ff1486a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 00:59:22 -0500
Subject: [PATCH 10/20] Adjust NumPy StringDType availability check

---
 python/pyarrow/array.pxi                      |  40 +++-
 python/pyarrow/includes/libarrow_python.pxd   |   8 +
 .../src/arrow/python/arrow_to_pandas.cc       | 219 +++++++++++++++++-
 .../src/arrow/python/arrow_to_pandas.h        |   8 +
 python/pyarrow/table.pxi                      |  38 ++-
 python/pyarrow/tests/test_array.py            |  32 +++
 6 files changed, 339 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index ec58ac727e5..592d0863c23 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -16,6 +16,7 @@
 # under the License.
 
 from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCapsule_New
+from pyarrow.includes.libarrow_python cimport HasNumPyStringDType, StringConversionMode
 
 from collections.abc import Sequence
 import os
@@ -65,6 +66,30 @@ def _ndarray_to_arrow_type(object values, DataType type):
     return pyarrow_wrap_data_type(_ndarray_to_type(values, type))
 
 
+cdef inline StringConversionMode _resolve_string_conversion_mode(object string_dtype):
+    if string_dtype is True:
+        return StringConversionMode.STRING_DTYPE
+    if string_dtype is False:
+        return StringConversionMode.PYTHON_OBJECT
+
+    if string_dtype is None:
+        return StringConversionMode.PYTHON_OBJECT
+
+    if isinstance(string_dtype, str):
+        option = string_dtype.lower()
+        if option == "auto":
+            return StringConversionMode.PYTHON_OBJECT
+        if option in ("numpy", "string", "stringdtype"):
+            return StringConversionMode.STRING_DTYPE
+        if option in ("python", "object"):
+            return StringConversionMode.PYTHON_OBJECT
+
+    raise ValueError(
+        "string_dtype must be one of 'auto', 'numpy', 'python', 'object', "
+        "True or False"
+    )
+
+
 cdef shared_ptr[CDataType] _ndarray_to_type(object values,
                                             DataType type) except *:
     cdef shared_ptr[CDataType] c_type
@@ -1734,7 +1759,7 @@ cdef class Array(_PandasConvertible):
             return values
         return np.asarray(values, dtype=dtype)
 
-    def to_numpy(self, zero_copy_only=True, writable=False):
+    def to_numpy(self, zero_copy_only=True, writable=False, *, string_dtype="auto"):
         """
         Return a NumPy view or copy of this array.
 
@@ -1757,6 +1782,14 @@ cdef class Array(_PandasConvertible):
             By setting this to True, a copy of the array is made to ensure
             it is writable.
 
+        string_dtype : {"auto", "numpy", "python", "object", True, False}, default "auto"
+            Controls how string-like arrays are converted when NumPy 2.0's
+            :class:`~numpy.typing.StringDType` is available. ``"numpy"`` or
+            ``True`` will request StringDType (copying), ``"python"``/``"object"``
+            or ``False`` will force Python object dtype. ``"auto"`` preserves the
+            default object dtype unless StringDType is explicitly requested.
+            Converting to NumPy's StringDType always copies string data.
+
         Returns
         -------
         array : numpy.ndarray
@@ -1775,6 +1808,11 @@ cdef class Array(_PandasConvertible):
             raise ValueError(
                 "Cannot return a writable array if asking for zero-copy")
 
+        c_options.string_conversion_mode = _resolve_string_conversion_mode(string_dtype)
+        if c_options.string_conversion_mode == StringConversionMode.STRING_DTYPE:
+            if not HasNumPyStringDType():
+                raise NotImplementedError("NumPy StringDType not available")
+
         # If there are nulls and the array is a DictionaryArray
         # decoding the dictionary will make sure nulls are correctly handled.
         # Decoding a dictionary does imply a copy by the way,
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 4724c52ccb5..c5661357217 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -161,6 +161,8 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
                                  shared_ptr[CTable] table,
                                  PyObject** out)
 
+    c_bool HasNumPyStringDType()
+
     void c_set_default_memory_pool \
         " arrow::py::set_default_memory_pool"(CMemoryPool* pool)\
 
@@ -182,6 +184,11 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     cdef cppclass PyOutputStream(COutputStream):
         PyOutputStream(object fo)
 
+    cdef enum StringConversionMode "arrow::py::PandasOptions::StringConversionMode":
+        AUTO
+        STRING_DTYPE
+        PYTHON_OBJECT
+
     cdef cppclass PandasOptions:
         CMemoryPool* pool
         c_bool strings_to_categorical
@@ -201,6 +208,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         shared_ptr[const unordered_set[c_string]] categorical_columns
         shared_ptr[const unordered_set[c_string]] extension_columns
         c_bool to_numpy
+        StringConversionMode string_conversion_mode
 
 
 cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index f163266f3b8..d939432c4fd 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -32,12 +32,14 @@
 #include <vector>
 
 #include "arrow/array.h"
+#include "arrow/array/array_binary.h"
 #include "arrow/buffer.h"
 #include "arrow/datum.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/bit_run_reader.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/hashing.h"
 #include "arrow/util/int_util.h"
@@ -68,6 +70,15 @@ using internal::CheckIndexBounds;
 using internal::OptionalParallelFor;
 
 namespace py {
+
+ARROW_PYTHON_EXPORT bool HasNumPyStringDType() {
+#if NPY_ABI_VERSION >= 0x02000000
+  return PyArray_StringDType != nullptr;
+#else
+  return false;
+#endif
+}
+
 namespace {
 
 // Fix options for conversion of an inner (child) array.
@@ -344,6 +355,7 @@ class PandasWriter {
  public:
   enum type {
     OBJECT,
+    STRING_DTYPE,
     UINT8,
     INT8,
     UINT16,
@@ -1405,6 +1417,189 @@ class ObjectWriter : public TypedPandasWriter<NPY_OBJECT> {
   }
 };
 
+#if NPY_ABI_VERSION >= 0x02000000
+Status PackStringValue(npy_string_allocator* allocator, npy_packed_static_string* packed,
+                       const std::string_view& view) {
+  const int result = NpyString_pack(allocator, packed, view.data(), view.size());
+  if (result == -1) {
+    RETURN_IF_PYERROR();
+    return Status::Invalid("Failed to pack NumPy StringDType value");
+  }
+  return Status::OK();
+}
+
+Status PackNullString(npy_string_allocator* allocator, npy_packed_static_string* packed) {
+  const int result = NpyString_pack_null(allocator, packed);
+  if (result == -1) {
+    RETURN_IF_PYERROR();
+    return Status::Invalid("Failed to pack NumPy StringDType value");
+  }
+  return Status::OK();
+}
+
+template <typename ArrayType>
+Status WriteOffsetStringValues(const ArrayType& arr, npy_string_allocator* allocator,
+                               char* data, npy_intp stride) {
+  using offset_type = typename ArrayType::offset_type;
+
+  const offset_type* offsets = arr.raw_value_offsets();
+  const auto base_offset = offsets[0];
+  const uint8_t* value_data = arr.value_data()->data();
+  const uint8_t* validity = arr.null_bitmap_data();
+
+  auto pack_values = [&](int64_t position, int64_t length) -> Status {
+    for (int64_t i = 0; i < length; ++i) {
+      const auto start = static_cast<int64_t>(offsets[position + i] - base_offset);
+      const auto end = static_cast<int64_t>(offsets[position + i + 1] - base_offset);
+      auto* packed =
+          reinterpret_cast<npy_packed_static_string*>(data + (position + i) * stride);
+      RETURN_NOT_OK(PackStringValue(
+          allocator, packed,
+          std::string_view(reinterpret_cast<const char*>(value_data + start),
+                           end - start)));
+    }
+    return Status::OK();
+  };
+
+  auto pack_nulls = [&](int64_t position, int64_t length) -> Status {
+    for (int64_t i = 0; i < length; ++i) {
+      auto* packed =
+          reinterpret_cast<npy_packed_static_string*>(data + (position + i) * stride);
+      RETURN_NOT_OK(PackNullString(allocator, packed));
+    }
+    return Status::OK();
+  };
+
+  if (arr.null_count() == 0) {
+    return pack_values(/*position=*/0, arr.length());
+  }
+
+  internal::BitRunReader reader(validity, arr.offset(), arr.length());
+  auto run = reader.NextRun();
+  while (run.length > 0) {
+    if (run.set) {
+      RETURN_NOT_OK(pack_values(run.position - arr.offset(), run.length));
+    } else {
+      RETURN_NOT_OK(pack_nulls(run.position - arr.offset(), run.length));
+    }
+    run = reader.NextRun();
+  }
+
+  return Status::OK();
+}
+
+template <typename ArrayType>
+Status WriteViewStringValues(const ArrayType& arr, npy_string_allocator* allocator,
+                             char* data, npy_intp stride) {
+  const auto* values = arr.raw_values();
+  const uint8_t* validity = arr.null_bitmap_data();
+
+  auto pack_values = [&](int64_t position, int64_t length) -> Status {
+    for (int64_t i = 0; i < length; ++i) {
+      auto* packed =
+          reinterpret_cast<npy_packed_static_string*>(data + (position + i) * stride);
+      RETURN_NOT_OK(PackStringValue(allocator, packed, values[position + i]));
+    }
+    return Status::OK();
+  };
+
+  auto pack_nulls = [&](int64_t position, int64_t length) -> Status {
+    for (int64_t i = 0; i < length; ++i) {
+      auto* packed =
+          reinterpret_cast<npy_packed_static_string*>(data + (position + i) * stride);
+      RETURN_NOT_OK(PackNullString(allocator, packed));
+    }
+    return Status::OK();
+  };
+
+  if (arr.null_count() == 0) {
+    return pack_values(/*position=*/0, arr.length());
+  }
+
+  internal::BitRunReader reader(validity, arr.offset(), arr.length());
+  auto run = reader.NextRun();
+  while (run.length > 0) {
+    if (run.set) {
+      RETURN_NOT_OK(pack_values(run.position - arr.offset(), run.length));
+    } else {
+      RETURN_NOT_OK(pack_nulls(run.position - arr.offset(), run.length));
+    }
+    run = reader.NextRun();
+  }
+
+  return Status::OK();
+}
+
+class StringDTypeWriter : public PandasWriter {
+ public:
+  using PandasWriter::PandasWriter;
+
+  Status TransferSingle(std::shared_ptr<ChunkedArray> data, PyObject* py_ref) override {
+    ARROW_UNUSED(py_ref);
+    RETURN_NOT_OK(CheckNotZeroCopyOnly(*data));
+    RETURN_NOT_OK(EnsureAllocated());
+    return CopyInto(std::move(data), /*rel_placement=*/0);
+  }
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    RETURN_NOT_OK(CheckNotZeroCopyOnly(*data));
+
+    PyAcquireGIL lock;
+    auto* np_arr = reinterpret_cast<PyArrayObject*>(block_arr_.obj());
+    auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(PyArray_DESCR(np_arr));
+
+    npy_string_allocator* allocator = NpyString_acquire_allocator(descr);
+    if (allocator == nullptr) {
+      return Status::Invalid("Failed to acquire NumPy StringDType allocator");
+    }
+    struct AllocatorGuard {
+      npy_string_allocator* allocator;
+      explicit AllocatorGuard(npy_string_allocator* alloc) : allocator(alloc) {}
+      ~AllocatorGuard() { NpyString_release_allocator(allocator); }
+    } guard(allocator);
+
+    const npy_intp row_stride = PyArray_STRIDES(np_arr)[1];
+    char* data_start = PyArray_BYTES(np_arr) + rel_placement * PyArray_STRIDES(np_arr)[0];
+    int64_t offset = 0;
+
+    for (const auto& chunk : data->chunks()) {
+      char* chunk_data = data_start + offset * row_stride;
+      switch (data->type()->id()) {
+        case Type::STRING: {
+          const auto& arr = checked_cast<const StringArray&>(*chunk);
+          RETURN_NOT_OK(WriteOffsetStringValues(arr, allocator, chunk_data, row_stride));
+          break;
+        }
+        case Type::LARGE_STRING: {
+          const auto& arr = checked_cast<const LargeStringArray&>(*chunk);
+          RETURN_NOT_OK(WriteOffsetStringValues(arr, allocator, chunk_data, row_stride));
+          break;
+        }
+        case Type::STRING_VIEW: {
+          const auto& arr = checked_cast<const StringViewArray&>(*chunk);
+          RETURN_NOT_OK(WriteViewStringValues(arr, allocator, chunk_data, row_stride));
+          break;
+        }
+        case Type::LARGE_STRING_VIEW: {
+          const auto& arr = checked_cast<const LargeStringViewArray&>(*chunk);
+          RETURN_NOT_OK(WriteViewStringValues(arr, allocator, chunk_data, row_stride));
+          break;
+        }
+        default:
+          return Status::TypeError("Expected an Arrow string array, got ",
+                                   data->type()->ToString());
+      }
+      offset += chunk->length();
+    }
+
+    return Status::OK();
+  }
+
+ protected:
+  Status Allocate() override { return AllocateNDArray(NPY_VSTRING); }
+};
+#endif
+
 static inline bool IsNonNullContiguous(const ChunkedArray& data) {
   return data.num_chunks() == 1 && data.null_count() == 0;
 }
@@ -2056,6 +2251,11 @@ Status MakeWriter(const PandasOptions& options, PandasWriter::type writer_type,
     case PandasWriter::EXTENSION:
       *writer = std::make_shared<ExtensionWriter>(options, num_rows, num_columns);
       break;
+#if NPY_ABI_VERSION >= 0x02000000
+    case PandasWriter::STRING_DTYPE:
+      *writer = std::make_shared<StringDTypeWriter>(options, num_rows, num_columns);
+      break;
+#endif
       BLOCK_CASE(OBJECT, ObjectWriter);
       BLOCK_CASE(UINT8, UInt8Writer);
       BLOCK_CASE(INT8, Int8Writer);
@@ -2130,10 +2330,21 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
     case Type::DOUBLE:
       *output_type = PandasWriter::DOUBLE;
       break;
-    case Type::STRING:        // fall through
-    case Type::LARGE_STRING:  // fall through
-    case Type::STRING_VIEW:   // fall through
-    case Type::BINARY:        // fall through
+    case Type::STRING:               // fall through
+    case Type::LARGE_STRING:         // fall through
+    case Type::STRING_VIEW:          // fall through
+    case Type::LARGE_STRING_VIEW: {  // fall through
+#if NPY_ABI_VERSION >= 0x02000000
+      if (options.to_numpy && options.string_conversion_mode ==
+                                  PandasOptions::StringConversionMode::STRING_DTYPE) {
+        *output_type = PandasWriter::STRING_DTYPE;
+        break;
+      }
+#endif
+      *output_type = PandasWriter::OBJECT;
+      break;
+    }
+    case Type::BINARY:  // fall through
     case Type::LARGE_BINARY:
     case Type::BINARY_VIEW:
     case Type::NA:                       // fall through
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.h b/python/pyarrow/src/arrow/python/arrow_to_pandas.h
index b4e91e6cf5a..ce45f4f3456 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.h
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.h
@@ -140,6 +140,12 @@ struct PandasOptions {
   // Used internally to decipher between to_numpy() and to_pandas() when
   // the expected output differs
   bool to_numpy = false;
+
+  enum class StringConversionMode { AUTO, STRING_DTYPE, PYTHON_OBJECT };
+
+  // Controls how string-like Arrow arrays are converted when calling
+  // Array.to_numpy/ChunkedArray.to_numpy
+  StringConversionMode string_conversion_mode = StringConversionMode::PYTHON_OBJECT;
 };
 
 ARROW_PYTHON_EXPORT
@@ -161,5 +167,7 @@ ARROW_PYTHON_EXPORT
 Status ConvertTableToPandas(const PandasOptions& options, std::shared_ptr<Table> table,
                             PyObject** out);
 
+ARROW_PYTHON_EXPORT bool HasNumPyStringDType();
+
 }  // namespace py
 }  // namespace arrow
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 9136f252980..10aa5916680 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -17,10 +17,35 @@
 # under the License.
 
 from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCapsule_New
+from pyarrow.includes.libarrow_python cimport HasNumPyStringDType, StringConversionMode
 
 import warnings
 from cython import sizeof
 
+
+cdef inline StringConversionMode _resolve_string_conversion_mode(object string_dtype):
+    if string_dtype is True:
+        return StringConversionMode.STRING_DTYPE
+    if string_dtype is False:
+        return StringConversionMode.PYTHON_OBJECT
+
+    if string_dtype is None:
+        return StringConversionMode.PYTHON_OBJECT
+
+    if isinstance(string_dtype, str):
+        option = string_dtype.lower()
+        if option == "auto":
+            return StringConversionMode.PYTHON_OBJECT
+        if option in ("numpy", "string", "stringdtype"):
+            return StringConversionMode.STRING_DTYPE
+        if option in ("python", "object"):
+            return StringConversionMode.PYTHON_OBJECT
+
+    raise ValueError(
+        "string_dtype must be one of 'auto', 'numpy', 'python', 'object', "
+        "True or False"
+    )
+
 cdef class ChunkedArray(_PandasConvertible):
     """
     An array-like composed from a (possibly empty) collection of pyarrow.Arrays
@@ -491,7 +516,7 @@ cdef class ChunkedArray(_PandasConvertible):
         self._assert_cpu()
         return _array_like_to_pandas(self, options, types_mapper=types_mapper)
 
-    def to_numpy(self, zero_copy_only=False):
+    def to_numpy(self, zero_copy_only=False, *, string_dtype="auto"):
         """
         Return a NumPy copy of this array (experimental).
 
@@ -500,6 +525,13 @@ cdef class ChunkedArray(_PandasConvertible):
         zero_copy_only : bool, default False
             Introduced for signature consistence with pyarrow.Array.to_numpy.
             This must be False here since NumPy arrays' buffer must be contiguous.
+        string_dtype : {"auto", "numpy", "python", "object", True, False}, default "auto"
+            Controls how string-like arrays are converted when NumPy 2.0's
+            :class:`~numpy.typing.StringDType` is available. ``"numpy"`` or
+            ``True`` will request StringDType (copying), ``"python"``/``"object"``
+            or ``False`` will force Python object dtype. ``"auto"`` preserves the
+            default object dtype unless StringDType is explicitly requested.
+            Converting to NumPy's StringDType always copies string data.
 
         Returns
         -------
@@ -526,6 +558,10 @@ cdef class ChunkedArray(_PandasConvertible):
             object values
 
         c_options.to_numpy = True
+        c_options.string_conversion_mode = _resolve_string_conversion_mode(string_dtype)
+        if c_options.string_conversion_mode == StringConversionMode.STRING_DTYPE:
+            if not HasNumPyStringDType():
+                raise NotImplementedError("NumPy StringDType not available")
 
         with nogil:
             check_status(
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 74ef81646ed..58a62a40284 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2331,6 +2331,38 @@ def test_to_numpy_roundtrip():
         np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
 
 
+@pytest.mark.numpy
+@pytest.mark.parametrize(
+    "arrow_type",
+    [pa.string(), pa.large_string(), pa.string_view(), pa.large_string_view()],
+)
+@pytest.mark.parametrize("scenario", ["no_nulls", "with_nulls", "sliced", "empty"])
+def test_to_numpy_stringdtype(arrow_type, scenario):
+    dtypes_mod = getattr(np, "dtypes", None)
+    if dtypes_mod is None:
+        pytest.skip("NumPy dtypes module not available")
+
+    StringDType = getattr(dtypes_mod, "StringDType", None)
+    if StringDType is None:
+        pytest.skip("NumPy StringDType not available")
+
+    values = {
+        "no_nulls": ["a", "b", "c"],
+        "with_nulls": ["a", None, "c"],
+        "sliced": ["z", "a", None, "c", "q"],
+        "empty": [],
+    }
+
+    arr = pa.array(values[scenario], type=arrow_type)
+    if scenario == "sliced":
+        arr = arr.slice(1, 3)
+
+    result = arr.to_numpy(zero_copy_only=False, string_dtype="numpy")
+
+    assert result.dtype == np.dtype(StringDType())
+    assert result.tolist() == arr.to_pylist()
+
+
 @pytest.mark.numpy
 def test_array_uint64_from_py_over_range():
     arr = pa.array([2 ** 63], type=pa.uint64())

From 38b2ee1dea915af29ecfc0d8ea89c59b5361d29f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 01:27:05 -0500
Subject: [PATCH 11/20] Clarify StringDType copy path and view packing

---
 python/pyarrow/src/arrow/python/arrow_to_pandas.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index d939432c4fd..473a4e7e8ea 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -1498,7 +1498,10 @@ Status WriteViewStringValues(const ArrayType& arr, npy_string_allocator* allocat
     for (int64_t i = 0; i < length; ++i) {
       auto* packed =
           reinterpret_cast<npy_packed_static_string*>(data + (position + i) * stride);
-      RETURN_NOT_OK(PackStringValue(allocator, packed, values[position + i]));
+      const auto view = values[position + i];
+      RETURN_NOT_OK(PackStringValue(
+          allocator, packed,
+          std::string_view(reinterpret_cast<const char*>(view.data()), view.size())));
     }
     return Status::OK();
   };
@@ -2337,6 +2340,8 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
 #if NPY_ABI_VERSION >= 0x02000000
       if (options.to_numpy && options.string_conversion_mode ==
                                   PandasOptions::StringConversionMode::STRING_DTYPE) {
+        // NumPy's StringDType allocator always copies string data, so zero-copy
+        // requests must continue to route through the object-dtype path.
         *output_type = PandasWriter::STRING_DTYPE;
         break;
       }

From 6633e1e6b97c8127a2c0770a1134c59fe4c05024 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 01:27:09 -0500
Subject: [PATCH 12/20] Enable NumPy StringDType API and fix writer build

---
 python/pyarrow/src/arrow/python/arrow_to_pandas.cc | 4 ++--
 python/pyarrow/src/arrow/python/numpy_interop.h    | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index 473a4e7e8ea..820d0b61d68 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -1474,7 +1474,7 @@ Status WriteOffsetStringValues(const ArrayType& arr, npy_string_allocator* alloc
     return pack_values(/*position=*/0, arr.length());
   }
 
-  internal::BitRunReader reader(validity, arr.offset(), arr.length());
+  arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
   auto run = reader.NextRun();
   while (run.length > 0) {
     if (run.set) {
@@ -1519,7 +1519,7 @@ Status WriteViewStringValues(const ArrayType& arr, npy_string_allocator* allocat
     return pack_values(/*position=*/0, arr.length());
   }
 
-  internal::BitRunReader reader(validity, arr.offset(), arr.length());
+  arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
   auto run = reader.NextRun();
   while (run.length > 0) {
     if (run.set) {
diff --git a/python/pyarrow/src/arrow/python/numpy_interop.h b/python/pyarrow/src/arrow/python/numpy_interop.h
index a83ae4a62b9..b897912427d 100644
--- a/python/pyarrow/src/arrow/python/numpy_interop.h
+++ b/python/pyarrow/src/arrow/python/numpy_interop.h
@@ -21,6 +21,10 @@
 
 #include <numpy/numpyconfig.h>  // IWYU pragma: export
 
+#if NPY_ABI_VERSION >= 0x02000000
+#  define NPY_EXPERIMENTAL_DTYPE_API 1
+#endif
+
 // Don't use the deprecated Numpy functions
 #ifdef NPY_1_7_API_VERSION
 #  define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

From 70a389d6040a0d3a610d488423b5f010a1ea2590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 01:27:17 -0500
Subject: [PATCH 13/20] Use PyArray_API table for NumPy StringDType

---
 .../src/arrow/python/arrow_to_pandas.cc       | 36 ++++++++++++++++---
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index 820d0b61d68..80f66267c48 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -73,7 +73,8 @@ namespace py {
 
 ARROW_PYTHON_EXPORT bool HasNumPyStringDType() {
 #if NPY_ABI_VERSION >= 0x02000000
-  return PyArray_StringDType != nullptr;
+  auto* dtype_table = reinterpret_cast<PyArray_DTypeMeta**>(PyArray_API + 320);
+  return dtype_table[39] != nullptr;
 #else
   return false;
 #endif
@@ -1418,9 +1419,34 @@ class ObjectWriter : public TypedPandasWriter<NPY_OBJECT> {
 };
 
 #if NPY_ABI_VERSION >= 0x02000000
+inline npy_string_allocator* ArrowNpyString_acquire_allocator(
+    const PyArray_StringDTypeObject* descr) {
+  using Func = npy_string_allocator* (*)(const PyArray_StringDTypeObject*);
+  return reinterpret_cast<Func>(PyArray_API[316])(descr);
+}
+
+inline void ArrowNpyString_release_allocator(npy_string_allocator* allocator) {
+  using Func = void (*)(npy_string_allocator*);
+  reinterpret_cast<Func>(PyArray_API[318])(allocator);
+}
+
+inline int ArrowNpyString_pack(npy_string_allocator* allocator,
+                               npy_packed_static_string* packed, const char* data,
+                               size_t length) {
+  using Func =
+      int (*)(npy_string_allocator*, npy_packed_static_string*, const char*, size_t);
+  return reinterpret_cast<Func>(PyArray_API[314])(allocator, packed, data, length);
+}
+
+inline int ArrowNpyString_pack_null(npy_string_allocator* allocator,
+                                    npy_packed_static_string* packed) {
+  using Func = int (*)(npy_string_allocator*, npy_packed_static_string*);
+  return reinterpret_cast<Func>(PyArray_API[315])(allocator, packed);
+}
+
 Status PackStringValue(npy_string_allocator* allocator, npy_packed_static_string* packed,
                        const std::string_view& view) {
-  const int result = NpyString_pack(allocator, packed, view.data(), view.size());
+  const int result = ArrowNpyString_pack(allocator, packed, view.data(), view.size());
   if (result == -1) {
     RETURN_IF_PYERROR();
     return Status::Invalid("Failed to pack NumPy StringDType value");
@@ -1429,7 +1455,7 @@ Status PackStringValue(npy_string_allocator* allocator, npy_packed_static_string
 }
 
 Status PackNullString(npy_string_allocator* allocator, npy_packed_static_string* packed) {
-  const int result = NpyString_pack_null(allocator, packed);
+  const int result = ArrowNpyString_pack_null(allocator, packed);
   if (result == -1) {
     RETURN_IF_PYERROR();
     return Status::Invalid("Failed to pack NumPy StringDType value");
@@ -1551,14 +1577,14 @@ class StringDTypeWriter : public PandasWriter {
     auto* np_arr = reinterpret_cast<PyArrayObject*>(block_arr_.obj());
     auto* descr = reinterpret_cast<PyArray_StringDTypeObject*>(PyArray_DESCR(np_arr));
 
-    npy_string_allocator* allocator = NpyString_acquire_allocator(descr);
+    npy_string_allocator* allocator = ArrowNpyString_acquire_allocator(descr);
     if (allocator == nullptr) {
       return Status::Invalid("Failed to acquire NumPy StringDType allocator");
     }
     struct AllocatorGuard {
       npy_string_allocator* allocator;
       explicit AllocatorGuard(npy_string_allocator* alloc) : allocator(alloc) {}
-      ~AllocatorGuard() { NpyString_release_allocator(allocator); }
+      ~AllocatorGuard() { ArrowNpyString_release_allocator(allocator); }
     } guard(allocator);
 
     const npy_intp row_stride = PyArray_STRIDES(np_arr)[1];

From c150bfbe3a9c68b9c1e25e4b2051b9e42ac8c0f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 16:47:56 -0500
Subject: [PATCH 14/20] Remove unnecessary experimental dtype define

---
 python/pyarrow/src/arrow/python/numpy_interop.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/numpy_interop.h b/python/pyarrow/src/arrow/python/numpy_interop.h
index b897912427d..a83ae4a62b9 100644
--- a/python/pyarrow/src/arrow/python/numpy_interop.h
+++ b/python/pyarrow/src/arrow/python/numpy_interop.h
@@ -21,10 +21,6 @@
 
 #include <numpy/numpyconfig.h>  // IWYU pragma: export
 
-#if NPY_ABI_VERSION >= 0x02000000
-#  define NPY_EXPERIMENTAL_DTYPE_API 1
-#endif
-
 // Don't use the deprecated Numpy functions
 #ifdef NPY_1_7_API_VERSION
 #  define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

From 5a21d1c006be15e25eb22d065c9aa471dd4bea62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 16:52:45 -0500
Subject: [PATCH 15/20] Fix StringDType writer run handling

---
 .../src/arrow/python/arrow_to_pandas.cc       | 31 ++++++++-----------
 python/pyarrow/tests/test_array.py            |  2 +-
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index 80f66267c48..514a49b3c0c 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -1501,13 +1501,15 @@ Status WriteOffsetStringValues(const ArrayType& arr, npy_string_allocator* alloc
   }
 
   arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
+  int64_t position = 0;
   auto run = reader.NextRun();
   while (run.length > 0) {
     if (run.set) {
-      RETURN_NOT_OK(pack_values(run.position - arr.offset(), run.length));
+      RETURN_NOT_OK(pack_values(position, run.length));
     } else {
-      RETURN_NOT_OK(pack_nulls(run.position - arr.offset(), run.length));
+      RETURN_NOT_OK(pack_nulls(position, run.length));
     }
+    position += run.length;
     run = reader.NextRun();
   }
 
@@ -1517,17 +1519,14 @@ Status WriteOffsetStringValues(const ArrayType& arr, npy_string_allocator* alloc
 template <typename ArrayType>
 Status WriteViewStringValues(const ArrayType& arr, npy_string_allocator* allocator,
                              char* data, npy_intp stride) {
-  const auto* values = arr.raw_values();
   const uint8_t* validity = arr.null_bitmap_data();
 
   auto pack_values = [&](int64_t position, int64_t length) -> Status {
     for (int64_t i = 0; i < length; ++i) {
       auto* packed =
           reinterpret_cast<npy_packed_static_string*>(data + (position + i) * stride);
-      const auto view = values[position + i];
-      RETURN_NOT_OK(PackStringValue(
-          allocator, packed,
-          std::string_view(reinterpret_cast<const char*>(view.data()), view.size())));
+      const auto view = arr.GetView(position + i);
+      RETURN_NOT_OK(PackStringValue(allocator, packed, view));
     }
     return Status::OK();
   };
@@ -1546,13 +1545,15 @@ Status WriteViewStringValues(const ArrayType& arr, npy_string_allocator* allocat
   }
 
   arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
+  int64_t position = 0;
   auto run = reader.NextRun();
   while (run.length > 0) {
     if (run.set) {
-      RETURN_NOT_OK(pack_values(run.position - arr.offset(), run.length));
+      RETURN_NOT_OK(pack_values(position, run.length));
     } else {
-      RETURN_NOT_OK(pack_nulls(run.position - arr.offset(), run.length));
+      RETURN_NOT_OK(pack_nulls(position, run.length));
     }
+    position += run.length;
     run = reader.NextRun();
   }
 
@@ -1609,11 +1610,6 @@ class StringDTypeWriter : public PandasWriter {
           RETURN_NOT_OK(WriteViewStringValues(arr, allocator, chunk_data, row_stride));
           break;
         }
-        case Type::LARGE_STRING_VIEW: {
-          const auto& arr = checked_cast<const LargeStringViewArray&>(*chunk);
-          RETURN_NOT_OK(WriteViewStringValues(arr, allocator, chunk_data, row_stride));
-          break;
-        }
         default:
           return Status::TypeError("Expected an Arrow string array, got ",
                                    data->type()->ToString());
@@ -2359,10 +2355,9 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
     case Type::DOUBLE:
       *output_type = PandasWriter::DOUBLE;
       break;
-    case Type::STRING:               // fall through
-    case Type::LARGE_STRING:         // fall through
-    case Type::STRING_VIEW:          // fall through
-    case Type::LARGE_STRING_VIEW: {  // fall through
+    case Type::STRING:        // fall through
+    case Type::LARGE_STRING:  // fall through
+    case Type::STRING_VIEW: {  // fall through
 #if NPY_ABI_VERSION >= 0x02000000
       if (options.to_numpy && options.string_conversion_mode ==
                                   PandasOptions::StringConversionMode::STRING_DTYPE) {
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 58a62a40284..7344b9b5b5d 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2334,7 +2334,7 @@ def test_to_numpy_roundtrip():
 @pytest.mark.numpy
 @pytest.mark.parametrize(
     "arrow_type",
-    [pa.string(), pa.large_string(), pa.string_view(), pa.large_string_view()],
+    [pa.string(), pa.large_string(), pa.string_view()],
 )
 @pytest.mark.parametrize("scenario", ["no_nulls", "with_nulls", "sliced", "empty"])
 def test_to_numpy_stringdtype(arrow_type, scenario):

From bdd2706fe4cc99907372f01fb846785b10cbe019 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 17:14:27 -0500
Subject: [PATCH 16/20] Fix StringConversionMode scoping and helper duplication

---
 python/pyarrow/includes/libarrow_python.pxd | 11 ++++++-----
 python/pyarrow/table.pxi                    |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index c5661357217..5139e4a4952 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -184,12 +184,11 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     cdef cppclass PyOutputStream(COutputStream):
         PyOutputStream(object fo)
 
-    cdef enum StringConversionMode "arrow::py::PandasOptions::StringConversionMode":
-        AUTO
-        STRING_DTYPE
-        PYTHON_OBJECT
-
     cdef cppclass PandasOptions:
+        cdef enum StringConversionMode:
+            AUTO
+            STRING_DTYPE
+            PYTHON_OBJECT
         CMemoryPool* pool
         c_bool strings_to_categorical
         c_bool zero_copy_only
@@ -210,6 +209,8 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         c_bool to_numpy
         StringConversionMode string_conversion_mode
 
+ctypedef PandasOptions.StringConversionMode StringConversionMode
+
 
 cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
     cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 10aa5916680..f9856ebc3c8 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -23,7 +23,7 @@ import warnings
 from cython import sizeof
 
 
-cdef inline StringConversionMode _resolve_string_conversion_mode(object string_dtype):
+cdef inline StringConversionMode _resolve_table_string_conversion_mode(object string_dtype):
     if string_dtype is True:
         return StringConversionMode.STRING_DTYPE
     if string_dtype is False:
@@ -558,7 +558,7 @@ cdef class ChunkedArray(_PandasConvertible):
             object values
 
         c_options.to_numpy = True
-        c_options.string_conversion_mode = _resolve_string_conversion_mode(string_dtype)
+        c_options.string_conversion_mode = _resolve_table_string_conversion_mode(string_dtype)
         if c_options.string_conversion_mode == StringConversionMode.STRING_DTYPE:
             if not HasNumPyStringDType():
                 raise NotImplementedError("NumPy StringDType not available")

From ff3eaa923a7209f2b1a0a6017fdc48f513c2595a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 21:21:03 -0500
Subject: [PATCH 17/20] Fix PandasOptions StringConversionMode declaration

---
 python/pyarrow/array.pxi                    | 14 +++++++-------
 python/pyarrow/includes/libarrow_python.pxd | 11 +++++------
 python/pyarrow/table.pxi                    | 14 +++++++-------
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 592d0863c23..6d04278eb25 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -68,21 +68,21 @@ def _ndarray_to_arrow_type(object values, DataType type):
 
 cdef inline StringConversionMode _resolve_string_conversion_mode(object string_dtype):
     if string_dtype is True:
-        return StringConversionMode.STRING_DTYPE
+        return StringConversionMode_STRING_DTYPE
     if string_dtype is False:
-        return StringConversionMode.PYTHON_OBJECT
+        return StringConversionMode_PYTHON_OBJECT
 
     if string_dtype is None:
-        return StringConversionMode.PYTHON_OBJECT
+        return StringConversionMode_PYTHON_OBJECT
 
     if isinstance(string_dtype, str):
         option = string_dtype.lower()
         if option == "auto":
-            return StringConversionMode.PYTHON_OBJECT
+            return StringConversionMode_PYTHON_OBJECT
         if option in ("numpy", "string", "stringdtype"):
-            return StringConversionMode.STRING_DTYPE
+            return StringConversionMode_STRING_DTYPE
         if option in ("python", "object"):
-            return StringConversionMode.PYTHON_OBJECT
+            return StringConversionMode_PYTHON_OBJECT
 
     raise ValueError(
         "string_dtype must be one of 'auto', 'numpy', 'python', 'object', "
@@ -1809,7 +1809,7 @@ cdef class Array(_PandasConvertible):
                 "Cannot return a writable array if asking for zero-copy")
 
         c_options.string_conversion_mode = _resolve_string_conversion_mode(string_dtype)
-        if c_options.string_conversion_mode == StringConversionMode.STRING_DTYPE:
+        if c_options.string_conversion_mode == StringConversionMode_STRING_DTYPE:
             if not HasNumPyStringDType():
                 raise NotImplementedError("NumPy StringDType not available")
 
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 5139e4a4952..127a456d7ab 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -184,11 +184,12 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     cdef cppclass PyOutputStream(COutputStream):
         PyOutputStream(object fo)
 
+    ctypedef enum StringConversionMode "arrow::py::PandasOptions::StringConversionMode":
+        StringConversionMode_AUTO "arrow::py::PandasOptions::StringConversionMode::AUTO"
+        StringConversionMode_STRING_DTYPE "arrow::py::PandasOptions::StringConversionMode::STRING_DTYPE"
+        StringConversionMode_PYTHON_OBJECT "arrow::py::PandasOptions::StringConversionMode::PYTHON_OBJECT"
+
     cdef cppclass PandasOptions:
-        cdef enum StringConversionMode:
-            AUTO
-            STRING_DTYPE
-            PYTHON_OBJECT
         CMemoryPool* pool
         c_bool strings_to_categorical
         c_bool zero_copy_only
@@ -209,8 +210,6 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         c_bool to_numpy
         StringConversionMode string_conversion_mode
 
-ctypedef PandasOptions.StringConversionMode StringConversionMode
-
 
 cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
     cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f9856ebc3c8..502bfa25563 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -25,21 +25,21 @@ from cython import sizeof
 
 cdef inline StringConversionMode _resolve_table_string_conversion_mode(object string_dtype):
     if string_dtype is True:
-        return StringConversionMode.STRING_DTYPE
+        return StringConversionMode_STRING_DTYPE
     if string_dtype is False:
-        return StringConversionMode.PYTHON_OBJECT
+        return StringConversionMode_PYTHON_OBJECT
 
     if string_dtype is None:
-        return StringConversionMode.PYTHON_OBJECT
+        return StringConversionMode_PYTHON_OBJECT
 
     if isinstance(string_dtype, str):
         option = string_dtype.lower()
         if option == "auto":
-            return StringConversionMode.PYTHON_OBJECT
+            return StringConversionMode_PYTHON_OBJECT
         if option in ("numpy", "string", "stringdtype"):
-            return StringConversionMode.STRING_DTYPE
+            return StringConversionMode_STRING_DTYPE
         if option in ("python", "object"):
-            return StringConversionMode.PYTHON_OBJECT
+            return StringConversionMode_PYTHON_OBJECT
 
     raise ValueError(
         "string_dtype must be one of 'auto', 'numpy', 'python', 'object', "
@@ -559,7 +559,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
         c_options.to_numpy = True
         c_options.string_conversion_mode = _resolve_table_string_conversion_mode(string_dtype)
-        if c_options.string_conversion_mode == StringConversionMode.STRING_DTYPE:
+        if c_options.string_conversion_mode == StringConversionMode_STRING_DTYPE:
             if not HasNumPyStringDType():
                 raise NotImplementedError("NumPy StringDType not available")
 

From aff479b9a5285d5d94ffb3d9a2bdc0f5cac0942e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 21:53:24 -0500
Subject: [PATCH 18/20] Fix StringConversionMode enum mapping

---
 python/pyarrow/includes/libarrow_python.pxd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 127a456d7ab..a1cb237ad7c 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -184,7 +184,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     cdef cppclass PyOutputStream(COutputStream):
         PyOutputStream(object fo)
 
-    ctypedef enum StringConversionMode "arrow::py::PandasOptions::StringConversionMode":
+    cdef enum StringConversionMode "arrow::py::PandasOptions::StringConversionMode":
         StringConversionMode_AUTO "arrow::py::PandasOptions::StringConversionMode::AUTO"
         StringConversionMode_STRING_DTYPE "arrow::py::PandasOptions::StringConversionMode::STRING_DTYPE"
         StringConversionMode_PYTHON_OBJECT "arrow::py::PandasOptions::StringConversionMode::PYTHON_OBJECT"

From 25343c259de113de258cb9bae47cb2bf0db40038 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 21:53:49 -0500
Subject: [PATCH 19/20] Apply hook formatting fixes

---
 python/pyarrow/src/arrow/python/arrow_to_pandas.cc | 4 ++--
 python/pyarrow/table.pxi                           | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index 514a49b3c0c..2f026d211ee 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -2355,8 +2355,8 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
     case Type::DOUBLE:
       *output_type = PandasWriter::DOUBLE;
       break;
-    case Type::STRING:        // fall through
-    case Type::LARGE_STRING:  // fall through
+    case Type::STRING:         // fall through
+    case Type::LARGE_STRING:   // fall through
     case Type::STRING_VIEW: {  // fall through
 #if NPY_ABI_VERSION >= 0x02000000
       if (options.to_numpy && options.string_conversion_mode ==
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 502bfa25563..a2bd1edd114 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -558,7 +558,8 @@ cdef class ChunkedArray(_PandasConvertible):
             object values
 
         c_options.to_numpy = True
-        c_options.string_conversion_mode = _resolve_table_string_conversion_mode(string_dtype)
+        c_options.string_conversion_mode = _resolve_table_string_conversion_mode(
+            string_dtype)
         if c_options.string_conversion_mode == StringConversionMode_STRING_DTYPE:
             if not HasNumPyStringDType():
                 raise NotImplementedError("NumPy StringDType not available")

From 78e592c4b9fadedc335aac24d4a1f5e56aff4449 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81d=C3=A1m=20Lippai?= <adam@rigo.sk>
Date: Thu, 25 Dec 2025 21:54:56 -0500
Subject: [PATCH 20/20] Handle null validity when packing NumPy StringDType

---
 .../src/arrow/python/arrow_to_pandas.cc       | 68 +++++++++++++------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index 2f026d211ee..4e699381b65 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -1500,17 +1500,33 @@ Status WriteOffsetStringValues(const ArrayType& arr, npy_string_allocator* alloc
     return pack_values(/*position=*/0, arr.length());
   }
 
-  arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
-  int64_t position = 0;
-  auto run = reader.NextRun();
-  while (run.length > 0) {
-    if (run.set) {
-      RETURN_NOT_OK(pack_values(position, run.length));
-    } else {
-      RETURN_NOT_OK(pack_nulls(position, run.length));
+  if (validity == nullptr) {
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      auto* packed = reinterpret_cast<npy_packed_static_string*>(data + i * stride);
+      if (arr.IsNull(i)) {
+        RETURN_NOT_OK(PackNullString(allocator, packed));
+      } else {
+        const auto start = static_cast<int64_t>(offsets[i] - base_offset);
+        const auto end = static_cast<int64_t>(offsets[i + 1] - base_offset);
+        RETURN_NOT_OK(PackStringValue(
+            allocator, packed,
+            std::string_view(reinterpret_cast<const char*>(value_data + start),
+                             end - start)));
+      }
+    }
+  } else {
+    arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
+    int64_t position = 0;
+    auto run = reader.NextRun();
+    while (run.length > 0) {
+      if (run.set) {
+        RETURN_NOT_OK(pack_values(position, run.length));
+      } else {
+        RETURN_NOT_OK(pack_nulls(position, run.length));
+      }
+      position += run.length;
+      run = reader.NextRun();
     }
-    position += run.length;
-    run = reader.NextRun();
   }
 
   return Status::OK();
@@ -1544,17 +1560,29 @@ Status WriteViewStringValues(const ArrayType& arr, npy_string_allocator* allocat
     return pack_values(/*position=*/0, arr.length());
   }
 
-  arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
-  int64_t position = 0;
-  auto run = reader.NextRun();
-  while (run.length > 0) {
-    if (run.set) {
-      RETURN_NOT_OK(pack_values(position, run.length));
-    } else {
-      RETURN_NOT_OK(pack_nulls(position, run.length));
+  if (validity == nullptr) {
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      auto* packed = reinterpret_cast<npy_packed_static_string*>(data + i * stride);
+      if (arr.IsNull(i)) {
+        RETURN_NOT_OK(PackNullString(allocator, packed));
+      } else {
+        const auto view = arr.GetView(i);
+        RETURN_NOT_OK(PackStringValue(allocator, packed, view));
+      }
+    }
+  } else {
+    arrow::internal::BitRunReader reader(validity, arr.offset(), arr.length());
+    int64_t position = 0;
+    auto run = reader.NextRun();
+    while (run.length > 0) {
+      if (run.set) {
+        RETURN_NOT_OK(pack_values(position, run.length));
+      } else {
+        RETURN_NOT_OK(pack_nulls(position, run.length));
+      }
+      position += run.length;
+      run = reader.NextRun();
     }
-    position += run.length;
-    run = reader.NextRun();
   }
 
   return Status::OK();