diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1d554d4dc5de4..11510a10a990f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1326,6 +1326,7 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`) +- Bug in :func:`array` where it did not always raise an error when the passed data was not like 1D. (:issue:`63112`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index e50b301c34868..9563a448713ce 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -165,6 +165,7 @@ def maybe_indices_to_slice( indices: npt.NDArray[np.intp], max_len: int, ) -> slice | npt.NDArray[np.intp]: ... +def is_all_scalar(obj: list | tuple) -> bool: ... def is_all_arraylike(obj: list) -> bool: ... # ----------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7ed36419babf1..c074099685610 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -862,6 +862,24 @@ cpdef ndarray[object] ensure_string_array( return result +def is_all_scalar(obj: list | tuple) -> bool: + cdef: + Py_ssize_t i, n = len(obj) + object temp + + all_scalars = True + + for i in range(n): + temp = obj[i] + if isinstance(temp, (bytes, str)): + continue + elif hasattr(temp, "__iter__"): + all_scalars = False + break + + return all_scalars + + def is_all_arraylike(obj: list) -> bool: """ Should we treat these as levels of a MultiIndex, as opposed to Index items? diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5868bdaa1225b..4c48412794734 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -321,6 +321,11 @@ def array( return data.copy() return data + # to avoid returning an array of string representation of objects. + if isinstance(dtype, StringDtype) and isinstance(data, (list, tuple)): + if not lib.is_all_scalar(data): + raise TypeError("Values must be a 1D list-like") + if isinstance(dtype, ExtensionDtype): cls = dtype.construct_array_type() return cls._from_sequence(data, dtype=dtype, copy=copy) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index c327d1b647bce..9aa1ad9b0273d 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -460,6 +460,26 @@ def test_nd_raises(data): pd.array(data, dtype="int64") +@pytest.mark.parametrize( + "data", + [ + # string 2D + [["a"], ["b"]], + # int 2D + [[1], [2]], + # float 2D + [[1.0], [2.0]], + # mixed 2D + [[1, 2], ["a", "b"]], + # mixed 3D + [[[1]], [["a"]], [[3.14]]], + ], +) +def test_not_1D_like_raises(data): + with pytest.raises(TypeError, match="Values must be a 1D list-like"): + pd.array(data, dtype=pd.StringDtype()) + + def test_scalar_raises(): with pytest.raises(ValueError, match="Cannot pass scalar '1'"): pd.array(1)