From b7e7ebb3ae49e1a0183e6250f3dcbf4866ad462c Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 6 Dec 2025 10:21:52 +0530 Subject: [PATCH] ERR: consistent error messages for unsupported reduction operations --- pandas/core/arrays/base.py | 19 +++++++++- pandas/core/arrays/categorical.py | 26 +++++++++---- pandas/core/arrays/datetimelike.py | 11 ++++-- pandas/core/arrays/datetimes.py | 8 ++++ pandas/core/arrays/period.py | 8 ++++ pandas/core/arrays/string_.py | 7 ++++ pandas/core/arrays/timedeltas.py | 17 ++++++++- pandas/core/indexes/base.py | 8 ++++ pandas/core/nanops.py | 4 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/reductions/test_reductions.py | 43 ++++++++-------------- pandas/tests/resample/test_resample_api.py | 2 +- 12 files changed, 109 insertions(+), 46 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 84ec38e2f75d1..e6ffdb4e279d1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -958,6 +958,22 @@ def argmax(self, skipna: bool = True) -> int: raise ValueError("Encountered an NA value with skipna=False") return nargminmax(self, "argmax") + def _supports_reduction(self, op_name: str) -> bool: + """ + Return whether the reduction operation is supported for this array. + + Parameters + ---------- + op_name : str + Name of the reduction operation (e.g., 'sum', 'mean', 'min', etc.) + + Returns + ------- + bool + True if supported, False otherwise. + """ + return False + def interpolate( self, *, @@ -2186,8 +2202,7 @@ def _reduce( meth = getattr(self, name, None) if meth is None: raise TypeError( - f"'{type(self).__name__}' with dtype {self.dtype} " - f"does not support operation '{name}'" + f"operation '{name}' is not supported for dtype '{self.dtype}'" ) result = meth(skipna=skipna, **kwargs) if keepdims: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4b5d2acf008a8..32d4f6212302e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1933,9 +1933,7 @@ def check_for_ordered(self, op) -> None: """assert that we are ordered""" if not self.ordered: raise TypeError( - f"Categorical is not ordered for operation {op}\n" - "you can use .as_ordered() to change the " - "Categorical to an ordered one\n" + f"operation '{op}' is not supported for dtype '{self.dtype}'" ) def argsort( @@ -2419,9 +2417,17 @@ def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]: # ------------------------------------------------------------------ # Reductions + def _supports_reduction(self, op_name: str) -> bool: + return op_name in {"min", "max", "mode"} + def _reduce( self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs ): + if not self._supports_reduction(name): + raise TypeError( + f"operation '{name}' is not supported for dtype '{self.dtype}'" + ) + result = super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs) if name in ["argmax", "argmin"]: # don't wrap in Categorical! @@ -2566,7 +2572,9 @@ def _accumulate(self, name: str, skipna: bool = True, **kwargs) -> Self: elif name == "cummax": func = np.maximum.accumulate else: - raise TypeError(f"Accumulation {name} not supported for {type(self)}") + raise TypeError( + f"operation '{name}' is not supported for dtype '{self.dtype}'" + ) self.check_for_ordered(name) codes = self.codes.copy() @@ -2766,12 +2774,12 @@ def _groupby_op( dtype = self.dtype if how in ["sum", "prod", "cumsum", "cumprod", "skew", "kurt"]: - raise TypeError(f"{dtype} type does not support {how} operations") + raise TypeError(f"operation '{how}' is not supported for dtype '{dtype}'") if how in ["min", "max", "rank", "idxmin", "idxmax"] and not dtype.ordered: # raise TypeError instead of NotImplementedError to ensure we # don't go down a group-by-group path, since in the empty-groups # case that would fail to raise - raise TypeError(f"Cannot perform {how} with non-ordered Categorical") + raise TypeError(f"operation '{how}' is not supported for dtype '{dtype}'") if how not in [ "rank", "any", @@ -2784,8 +2792,10 @@ def _groupby_op( "idxmax", ]: if kind == "transform": - raise TypeError(f"{dtype} type does not support {how} operations") - raise TypeError(f"{dtype} dtype does not support aggregation '{how}'") + raise TypeError( + f"operation '{how}' is not supported for dtype '{dtype}'" + ) + raise TypeError(f"operation '{how}' is not supported for dtype '{dtype}'") result_mask = None mask = self.isna() diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b93d1ae408400..d48eb8e3133cb 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1365,7 +1365,9 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op) -> np.ndarray def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self: if name not in {"cummin", "cummax"}: - raise TypeError(f"Accumulation {name} not supported for {type(self)}") + raise TypeError( + f"operation '{name}' is not supported for dtype '{self.dtype}'" + ) op = getattr(datetimelike_accumulations, name) result = op(self.copy(), skipna=skipna, **kwargs) @@ -1697,12 +1699,13 @@ def _groupby_op( if dtype.kind == "M": # Adding/multiplying datetimes is not valid if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew", "kurt"]: - raise TypeError(f"datetime64 type does not support operation '{how}'") + raise TypeError( + f"operation '{how}' is not supported for dtype '{self.dtype}'" + ) if how in ["any", "all"]: # GH#34479 raise TypeError( - f"'{how}' with datetime64 dtypes is no longer supported. " - f"Use (obj != pd.Timestamp(0)).{how}() instead." + f"operation '{how}' is not supported for dtype '{self.dtype}'" ) elif isinstance(dtype, PeriodDtype): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 38be038efcaa5..fc9fda749fd41 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -222,6 +222,9 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): Length: 2, dtype: datetime64[s] """ + def _supports_reduction(self, op_name: str) -> bool: + return op_name in {"min", "max", "mean", "median", "std"} + _typ = "datetimearray" _internal_fill_value = np.datetime64("NaT", "ns") _recognized_scalars = (datetime, np.datetime64) @@ -2298,6 +2301,11 @@ def to_julian_date(self) -> npt.NDArray[np.float64]: def _reduce( self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs ): + if not self._supports_reduction(name): + raise TypeError( + f"operation '{name}' is not supported for dtype '{self.dtype}'" + ) + result = super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs) if keepdims and isinstance(result, np.ndarray): if name == "std": diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 180080da4cd00..e4274f9a33195 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1133,9 +1133,17 @@ def _check_timedeltalike_freq_compat(self, other): # ------------------------------------------------------------------ # Reductions + def _supports_reduction(self, op_name: str) -> bool: + return op_name in {"min", "max"} + def _reduce( self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs ): + if not self._supports_reduction(name): + raise TypeError( + f"operation '{name}' is not supported for dtype '{self.dtype}'" + ) + result = super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs) if keepdims and isinstance(result, np.ndarray): return self._from_sequence(result, dtype=self.dtype) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 7a61a252d86a6..aa40fbd3e38cd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -897,6 +897,9 @@ def astype(self, dtype, copy: bool = True): return super().astype(dtype, copy) + def _supports_reduction(self, op_name: str) -> bool: + return op_name in {"min", "max"} + def _reduce( self, name: str, @@ -906,6 +909,10 @@ def _reduce( axis: AxisInt | None = 0, **kwargs, ): + if not self._supports_reduction(name): + raise TypeError( + f"operation '{name}' is not supported for dtype '{self.dtype}'" + ) if self.dtype.na_value is np.nan and name in ["any", "all"]: if name == "any": return nanops.nanany(self._ndarray, skipna=skipna) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 64c2e1779aba7..2dc537a53fc1e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -387,6 +387,9 @@ def __iter__(self) -> Iterator: # ---------------------------------------------------------------- # Reductions + def _supports_reduction(self, op_name: str) -> bool: + return op_name in {"min", "max", "sum", "mean", "median", "std", "var"} + def sum( self, *, @@ -398,6 +401,11 @@ def sum( skipna: bool = True, min_count: int = 0, ): + if not self._supports_reduction("sum"): + raise TypeError( + f"operation 'sum' is not supported for dtype '{self.dtype}'" + ) + nv.validate_sum( (), {"dtype": dtype, "out": out, "keepdims": keepdims, "initial": initial} ) @@ -417,6 +425,11 @@ def std( keepdims: bool = False, skipna: bool = True, ): + if not self._supports_reduction("std"): + raise TypeError( + f"operation 'std' is not supported for dtype '{self.dtype}'" + ) + nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" ) @@ -436,7 +449,9 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwargs): return type(self)._simple_new(result, freq=None, dtype=self.dtype) elif name == "cumprod": - raise TypeError("cumprod not supported for Timedelta.") + raise TypeError( + f"operation 'cumprod' is not supported for dtype '{self.dtype}'" + ) else: return super()._accumulate(name, skipna=skipna, **kwargs) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d75479da70d11..9a7881e522f07 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7419,6 +7419,10 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) return self._na_value if not self._is_multi and not isinstance(self._values, np.ndarray): + if not self._values._supports_reduction("min"): + raise TypeError( + f"operation 'min' is not supported for dtype '{self.dtype}'" + ) return self._values._reduce(name="min", skipna=skipna) return nanops.nanmin(self._values, skipna=skipna) @@ -7483,6 +7487,10 @@ def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs) return self._na_value if not self._is_multi and not isinstance(self._values, np.ndarray): + if not self._values._supports_reduction("max"): + raise TypeError( + f"operation 'max' is not supported for dtype '{self.dtype}'" + ) return self._values._reduce(name="max", skipna=skipna) return nanops.nanmax(self._values, skipna=skipna) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 58bcc60f9274e..6c022b1917333 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -523,7 +523,7 @@ def nanany( if values.dtype.kind == "M": # GH#34479 - raise TypeError("datetime64 type does not support operation 'any'") + raise TypeError(f"operation 'any' is not supported for dtype '{values.dtype}'") values, _ = _get_values(values, skipna, fill_value=False, mask=mask) @@ -579,7 +579,7 @@ def nanall( if values.dtype.kind == "M": # GH#34479 - raise TypeError("datetime64 type does not support operation 'all'") + raise TypeError(f"operation 'all' is not supported for dtype '{values.dtype}'") values, _ = _get_values(values, skipna, fill_value=True, mask=mask) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4955b1fe0da54..1d484c8c3897f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -670,7 +670,7 @@ def test_raises_on_nuisance(df, using_infer_string): df = df.loc[:, ["A", "C", "D"]] df["E"] = datetime.now() grouped = df.groupby("A") - msg = "datetime64 type does not support operation 'sum'" + msg = "operation 'sum' is not supported for dtype 'datetime64[ns]'" with pytest.raises(TypeError, match=msg): grouped.agg("sum") with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index db27572b9da26..b64e58486ebfc 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -374,13 +374,7 @@ def test_invalid_td64_reductions(self, opname): ) td = s.diff() - msg = "|".join( - [ - f"reduction operation '{opname}' not allowed for this dtype", - rf"cannot perform {opname} with type timedelta64\[ns\]", - f"does not support operation '{opname}'", - ] - ) + msg = f"operation '{opname}' is not supported for dtype 'timedelta64[ns]'" with pytest.raises(TypeError, match=msg): getattr(td, opname)() @@ -711,13 +705,7 @@ def test_ops_consistency_on_empty(self, method): # timedelta64[ns] tdser = Series([], dtype="m8[ns]") if method == "var": - msg = "|".join( - [ - "operation 'var' not allowed", - r"cannot perform var with type timedelta64\[ns\]", - "does not support operation 'var'", - ] - ) + msg = "operation 'var' is not supported for dtype 'timedelta64[ns]'" with pytest.raises(TypeError, match=msg): getattr(tdser, method)() else: @@ -1019,39 +1007,40 @@ def test_any_all_datetimelike(self): df = DataFrame(ser) # GH#34479 - msg = "datetime64 type does not support operation '(any|all)'" - with pytest.raises(TypeError, match=msg): + msg_all = "operation 'all' is not supported for dtype 'datetime64[ns]'" + msg_any = "operation 'any' is not supported for dtype 'datetime64[ns]'" + with pytest.raises(TypeError, match=msg_all): dta.all() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_any): dta.any() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_all): ser.all() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_any): ser.any() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_all): df.any().all() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_any): df.all().all() dta = dta.tz_localize("UTC") ser = Series(dta) df = DataFrame(ser) # GH#34479 - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_all): dta.all() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_any): dta.any() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_all): ser.all() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_any): ser.any() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_all): df.any().all() - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=msg_any): df.all().all() tda = dta - dta[0] diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 845b5ad7acc00..7878573a99fc7 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -710,7 +710,7 @@ def test_selection_api_validation(): exp.index.name = "d" with pytest.raises( - TypeError, match="datetime64 type does not support operation 'sum'" + TypeError, match="operation 'sum' is not supported for dtype 'datetime64[ns]'" ): df.resample("2D", level="d").sum() result = df.resample("2D", level="d").sum(numeric_only=True)