From 65df68304152e6cc1c99d68153a6a3037aff0d2a Mon Sep 17 00:00:00 2001 From: Akshith Kandivanam Date: Sun, 23 Nov 2025 17:55:54 -0500 Subject: [PATCH 1/7] proposed fix for issue #61026 --- pandas/core/construction.py | 18 +++++++ .../frame/indexing/test_setitem_2d_object.py | 52 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 pandas/tests/frame/indexing/test_setitem_2d_object.py diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5868bdaa1225b..1c24efcda6bf1 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -611,6 +611,24 @@ def sanitize_array( data = data.A if dtype is None: + # GH#61026: special-case 2D+ object ndarrays when dtype is None. + if data.dtype == object and data.ndim > 1: + if data.ndim == 2 and data.shape[1] == 1: + # allow assigning a (n, 1) object array to a single column, flatten it: + data = data[:, 0] + elif data.ndim == 2: + # More than one column but caller is behaving as if this is a single-column assignment. + raise ValueError( + "Setting a DataFrame column with a 2D object array " + f"requires shape (n, 1); got shape {data.shape}." + ) + else: + # ndim >= 3 + raise ValueError( + f"Setting a DataFrame column with ndim {data.ndim} " + "object array is not supported." + ) + subarr = data if data.dtype == object and infer_object: subarr = lib.maybe_convert_objects( diff --git a/pandas/tests/frame/indexing/test_setitem_2d_object.py b/pandas/tests/frame/indexing/test_setitem_2d_object.py new file mode 100644 index 0000000000000..c59db1d856fcd --- /dev/null +++ b/pandas/tests/frame/indexing/test_setitem_2d_object.py @@ -0,0 +1,52 @@ +import numpy as np +import pandas as pd +import pandas._testing as tm +import pytest + + +class TestSetItem2DObjectArray: + # GH#61026 + def test_setitem_2d_object_array_single_column_unravel(self): + df = pd.DataFrame({"c1": [1, 2, 3]}) + t = np.array([["A"], ["B"], ["C"]], dtype=object) + + df["c1"] = t + + expected = pd.Series(["A", "B", "C"], name="c1") + tm.assert_series_equal(df["c1"], expected) + + # GH#61026 + def test_setitem_2d_object_array_wrong_shape_raises(self): + df = pd.DataFrame({"c1": [1, 2, 3]}) + t = np.array([["A", "B"], ["C", "D"], ["E", "F"]], dtype=object) + + with pytest.raises(ValueError, match="requires shape"): + df["c1"] = t + + # GH#61026 + def test_setitem_3d_object_array_raises(self): + df = pd.DataFrame({"c1": [1, 2, 3]}) + t = np.array([[["A"]], [["B"]], [["C"]]], dtype=object) + + with pytest.raises(ValueError, match="ndim 3"): + df["c1"] = t + + # GH#61026 + def test_setitem_2d_string_array_regression(self): + df = pd.DataFrame({"c1": [1, 2, 3]}) + t = np.array([["A"], ["B"], ["C"]]) # dtype ' Date: Sun, 23 Nov 2025 20:37:09 -0500 Subject: [PATCH 2/7] applied linting --- pandas/core/construction.py | 6 +++--- pandas/tests/frame/indexing/test_setitem_2d_object.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 1c24efcda6bf1..270a62c453a9d 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -614,10 +614,10 @@ def sanitize_array( # GH#61026: special-case 2D+ object ndarrays when dtype is None. if data.dtype == object and data.ndim > 1: if data.ndim == 2 and data.shape[1] == 1: - # allow assigning a (n, 1) object array to a single column, flatten it: + # allow assigning a (n, 1) object array to a single column. data = data[:, 0] elif data.ndim == 2: - # More than one column but caller is behaving as if this is a single-column assignment. + # more than 1 column, now allowed. raise ValueError( "Setting a DataFrame column with a 2D object array " f"requires shape (n, 1); got shape {data.shape}." @@ -628,7 +628,7 @@ def sanitize_array( f"Setting a DataFrame column with ndim {data.ndim} " "object array is not supported." ) - + subarr = data if data.dtype == object and infer_object: subarr = lib.maybe_convert_objects( diff --git a/pandas/tests/frame/indexing/test_setitem_2d_object.py b/pandas/tests/frame/indexing/test_setitem_2d_object.py index c59db1d856fcd..8ed0b5c3acd97 100644 --- a/pandas/tests/frame/indexing/test_setitem_2d_object.py +++ b/pandas/tests/frame/indexing/test_setitem_2d_object.py @@ -1,7 +1,8 @@ import numpy as np +import pytest + import pandas as pd import pandas._testing as tm -import pytest class TestSetItem2DObjectArray: From 62f7c4b9d99af94a4cefa20aeed1a69e9a96a730 Mon Sep 17 00:00:00 2001 From: Akshith Kandivanam Date: Sun, 23 Nov 2025 20:46:14 -0500 Subject: [PATCH 3/7] documenting my changes --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4ab20623cc561..375f788cb12b7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1150,6 +1150,7 @@ Indexing - Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`) +- Bug in :meth:`DataFrame.__setitem__` throwing a ``ValueError`` when setting a column with a 2D object array (:issue:`61026`) - Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`) - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`) - Bug in :meth:`Index.get_indexer` not casting missing values correctly for new string datatype (:issue:`55833`) From aa707b1ce3d8d7201883134125afd4b605748504 Mon Sep 17 00:00:00 2001 From: Akshith Kandivanam Date: Sun, 23 Nov 2025 21:16:34 -0500 Subject: [PATCH 4/7] fix comment error --- pandas/core/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 270a62c453a9d..d0b7a8232af66 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -617,7 +617,7 @@ def sanitize_array( # allow assigning a (n, 1) object array to a single column. data = data[:, 0] elif data.ndim == 2: - # more than 1 column, now allowed. + # more than 1 column, not allowed. raise ValueError( "Setting a DataFrame column with a 2D object array " f"requires shape (n, 1); got shape {data.shape}." From c5c89536b28d8eda13a00ffe680f5f11004112a3 Mon Sep 17 00:00:00 2001 From: Akshith Kandivanam Date: Sun, 23 Nov 2025 22:09:16 -0500 Subject: [PATCH 5/7] fixing pyarrow errors in control flow logic --- pandas/core/construction.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index d0b7a8232af66..98d1c0a384518 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -566,6 +566,14 @@ def sanitize_array( # extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray data = extract_array(data, extract_numpy=True, extract_range=True) + # GH#61026: when 2D input is allowed (e.g. DataFrame column assignment), + # treat a (n, 1) numpy array as a 1D array of length n so downstream code + # (including pyarrow-backed StringArray) always sees 1D. + if allow_2d and isinstance(data, np.ndarray) and data.ndim == 2: + rows, cols = data.shape + if cols == 1: + data = data[:, 0] + if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype @@ -612,7 +620,7 @@ def sanitize_array( if dtype is None: # GH#61026: special-case 2D+ object ndarrays when dtype is None. - if data.dtype == object and data.ndim > 1: + if allow_2d and data.dtype == object and data.ndim > 1: if data.ndim == 2 and data.shape[1] == 1: # allow assigning a (n, 1) object array to a single column. data = data[:, 0] From 78f8ce7d476bb28650a21f74f6ee4d10be4edad9 Mon Sep 17 00:00:00 2001 From: Akshith Kandivanam Date: Sun, 23 Nov 2025 22:53:42 -0500 Subject: [PATCH 6/7] new fix --- pandas/core/construction.py | 26 -------------------------- pandas/core/frame.py | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 98d1c0a384518..5868bdaa1225b 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -566,14 +566,6 @@ def sanitize_array( # extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray data = extract_array(data, extract_numpy=True, extract_range=True) - # GH#61026: when 2D input is allowed (e.g. DataFrame column assignment), - # treat a (n, 1) numpy array as a 1D array of length n so downstream code - # (including pyarrow-backed StringArray) always sees 1D. - if allow_2d and isinstance(data, np.ndarray) and data.ndim == 2: - rows, cols = data.shape - if cols == 1: - data = data[:, 0] - if isinstance(data, np.ndarray) and data.ndim == 0: if dtype is None: dtype = data.dtype @@ -619,24 +611,6 @@ def sanitize_array( data = data.A if dtype is None: - # GH#61026: special-case 2D+ object ndarrays when dtype is None. - if allow_2d and data.dtype == object and data.ndim > 1: - if data.ndim == 2 and data.shape[1] == 1: - # allow assigning a (n, 1) object array to a single column. - data = data[:, 0] - elif data.ndim == 2: - # more than 1 column, not allowed. - raise ValueError( - "Setting a DataFrame column with a 2D object array " - f"requires shape (n, 1); got shape {data.shape}." - ) - else: - # ndim >= 3 - raise ValueError( - f"Setting a DataFrame column with ndim {data.ndim} " - "object array is not supported." - ) - subarr = data if data.dtype == object and infer_object: subarr = lib.maybe_convert_objects( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c8c246434f6d8..a53f43eb7a35d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5501,7 +5501,28 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]: return _reindex_for_setitem(value, self.index) if is_list_like(value): + # GH#61026: this method is only used for *single-column* assignment. + # Reject 2D/3D arrays here, except the (n, 1) case which we treat as 1D. + if isinstance(value, np.ndarray) and value.ndim > 1: + if value.ndim == 2: + if value.shape[1] == 1: + # (n, 1) → length-n 1D array + value = value[:, 0] + else: + # More than one column: users should use df[[...]] = value + raise ValueError( + "Setting a DataFrame column with a 2D array requires " + f"shape (n, 1); got shape {value.shape}." + ) + else: + # ndim >= 3 + raise ValueError( + f"Setting a DataFrame column with ndim {value.ndim} " + "array is not supported." + ) + com.require_length_match(value, self.index) + return sanitize_array(value, self.index, copy=True, allow_2d=True), None @property From e2ad3fba60b832ba21dde24063006be479d6a40b Mon Sep 17 00:00:00 2001 From: Akshith Kandivanam Date: Mon, 24 Nov 2025 00:31:47 -0500 Subject: [PATCH 7/7] trying new patch location and logic & revamped test infra --- pandas/core/frame.py | 44 +++++++-------- pandas/tests/frame/indexing/test_setitem.py | 18 +++++++ .../frame/indexing/test_setitem_2d_object.py | 53 ------------------- 3 files changed, 41 insertions(+), 74 deletions(-) delete mode 100644 pandas/tests/frame/indexing/test_setitem_2d_object.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a53f43eb7a35d..273e70140538b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5501,29 +5501,31 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]: return _reindex_for_setitem(value, self.index) if is_list_like(value): - # GH#61026: this method is only used for *single-column* assignment. - # Reject 2D/3D arrays here, except the (n, 1) case which we treat as 1D. - if isinstance(value, np.ndarray) and value.ndim > 1: - if value.ndim == 2: - if value.shape[1] == 1: - # (n, 1) → length-n 1D array - value = value[:, 0] - else: - # More than one column: users should use df[[...]] = value - raise ValueError( - "Setting a DataFrame column with a 2D array requires " - f"shape (n, 1); got shape {value.shape}." - ) - else: - # ndim >= 3 - raise ValueError( - f"Setting a DataFrame column with ndim {value.ndim} " - "array is not supported." - ) - com.require_length_match(value, self.index) - return sanitize_array(value, self.index, copy=True, allow_2d=True), None + # GH#61026: special-case 2D inputs for single-column assignment. + # - accept shape (n, 1) by flattening to 1D + # - disallow 2D *object* arrays with more than one column, since those + # correspond to a single column key and should be rejected + arr = value + + # np.matrix is always 2D; gonna convert to regular ndarray + if isinstance(arr, np.matrix): + arr = np.asarray(arr) + + if isinstance(arr, np.ndarray) and arr.ndim == 2: + if arr.shape[1] == 1: + # treating (n, 1) as a length-n 1D array + arr = arr[:, 0] + elif arr.dtype == object: + # single-column setitem with a 2D object array is not allowed. + msg = ( + "Setting a DataFrame column with a 2D array requires " + f"shape (n, 1); got shape {arr.shape}." + ) + raise ValueError(msg) + subarr = sanitize_array(arr, self.index, copy=True, allow_2d=True) + return subarr, None @property def _series(self): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 0de2455079027..9d631026fc074 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -816,6 +816,24 @@ def test_setitem_index_object_dtype_not_inferring(self): ) tm.assert_frame_equal(df, expected) + def test_setitem_2d_object_array(self): + # GH#61026 + df = DataFrame( + { + "c1": [1, 2, 3, 4, 5], + } + ) + + arr = np.array([["A"], ["B"], ["C"], ["D"], ["E"]], dtype=object) + df["c1"] = arr + + expected = DataFrame( + { + "c1": ["A", "B", "C", "D", "E"], + } + ) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture diff --git a/pandas/tests/frame/indexing/test_setitem_2d_object.py b/pandas/tests/frame/indexing/test_setitem_2d_object.py deleted file mode 100644 index 8ed0b5c3acd97..0000000000000 --- a/pandas/tests/frame/indexing/test_setitem_2d_object.py +++ /dev/null @@ -1,53 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -import pandas._testing as tm - - -class TestSetItem2DObjectArray: - # GH#61026 - def test_setitem_2d_object_array_single_column_unravel(self): - df = pd.DataFrame({"c1": [1, 2, 3]}) - t = np.array([["A"], ["B"], ["C"]], dtype=object) - - df["c1"] = t - - expected = pd.Series(["A", "B", "C"], name="c1") - tm.assert_series_equal(df["c1"], expected) - - # GH#61026 - def test_setitem_2d_object_array_wrong_shape_raises(self): - df = pd.DataFrame({"c1": [1, 2, 3]}) - t = np.array([["A", "B"], ["C", "D"], ["E", "F"]], dtype=object) - - with pytest.raises(ValueError, match="requires shape"): - df["c1"] = t - - # GH#61026 - def test_setitem_3d_object_array_raises(self): - df = pd.DataFrame({"c1": [1, 2, 3]}) - t = np.array([[["A"]], [["B"]], [["C"]]], dtype=object) - - with pytest.raises(ValueError, match="ndim 3"): - df["c1"] = t - - # GH#61026 - def test_setitem_2d_string_array_regression(self): - df = pd.DataFrame({"c1": [1, 2, 3]}) - t = np.array([["A"], ["B"], ["C"]]) # dtype '