pandas-dev · pandeconscious · Oct 23, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -211,6 +211,7 @@ Other enhancements
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
+- :meth:`Series.corr`, :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith` with ``method="kendall"`` and ``method="spearman"`` now work with ordered categorical data types (:issue:`60306`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
 - :meth:`Series.rank` and :meth:`DataFrame.rank` with numpy-nullable dtypes preserve ``NA`` values and return ``UInt64`` dtype where appropriate instead of casting ``NA`` to ``NaN`` with ``float64`` dtype (:issue:`62043`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -185,6 +185,7 @@
     treat_as_nested,
 )
 from pandas.core.methods import selectn
+from pandas.core.methods.corr import transform_ord_cat_cols_to_coded_cols
 from pandas.core.reshape.melt import melt
 from pandas.core.series import Series
 from pandas.core.shared_docs import _shared_docs
@@ -11724,6 +11725,10 @@ def corr(
         data = self._get_numeric_data() if numeric_only else self
         cols = data.columns
         idx = cols.copy()
+
+        if method in ("spearman", "kendall"):
+            data = transform_ord_cat_cols_to_coded_cols(data)
+
         mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
 
         if method == "pearson":
@@ -12013,6 +12018,8 @@ def corrwith(
             correl = num / dom
 
         elif method in ["kendall", "spearman"] or callable(method):
+            left = transform_ord_cat_cols_to_coded_cols(left)
+            right = transform_ord_cat_cols_to_coded_cols(right)
 
             def c(x):
                 return nanops.nancorr(x[0], x[1], method=method)

diff --git a/pandas/core/methods/corr.py b/pandas/core/methods/corr.py
@@ -0,0 +1,32 @@
+"""
+Module for correlation related implementation
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+if TYPE_CHECKING:
+    from pandas import DataFrame
+
+
+def transform_ord_cat_cols_to_coded_cols(df: DataFrame) -> DataFrame:
+    """
+    Replace ordered categoricals with their codes, making a shallow copy if necessary.
+    """
+
+    result = df
+    made_copy = False
+    for idx, dtype in enumerate(df.dtypes):
+        if not isinstance(dtype, CategoricalDtype) or not dtype.ordered:
+            continue
+        col = result._ixs(idx, axis=1)
+        if not made_copy:
+            made_copy = True
+            result = result.copy(deep=False)
+        result._iset_item(idx, col.cat.codes.replace(-1, np.nan))
+    return result
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2738,6 +2738,12 @@ def corr(
         if len(this) == 0:
             return np.nan
 
+        if method in ("spearman", "kendall"):
+            if this.dtype == "category" and this.cat.ordered:
+                this = this.cat.codes.replace(-1, np.nan)
+            if other.dtype == "category" and other.cat.ordered:
+                other = other.cat.codes.replace(-1, np.nan)
+
         this_values = this.to_numpy(dtype=float, na_value=np.nan, copy=False)
         other_values = other.to_numpy(dtype=float, na_value=np.nan, copy=False)
 

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -1,3 +1,5 @@
+from itertools import combinations
+
 import numpy as np
 import pytest
 
@@ -252,6 +254,63 @@ def test_corr_numeric_only(self, meth, numeric_only):
             with pytest.raises(ValueError, match="could not convert string to float"):
                 df.corr(meth, numeric_only=numeric_only)
 
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @td.skip_if_no("scipy")
+    def test_corr_rank_ordered_categorical(
+        self,
+        method,
+    ):
+        # GH #60306
+        df = DataFrame(
+            {
+                "ord_cat": pd.Categorical(
+                    ["low", "m", "h", "vh"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+                "ord_cat_none": pd.Categorical(
+                    ["low", "m", "h", None],
+                    categories=["low", "m", "h"],
+                    ordered=True,
+                ),
+                "ord_cat_shuff": pd.Categorical(
+                    ["m", "h", "vh", "low"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+            }
+        )
+        corr_calc = df.corr(method=method)
+        for col1, col2 in combinations(df.columns, r=2):
+            corr_expected = df[col1].corr(df[col2], method=method)
+            tm.assert_almost_equal(corr_calc[col1][col2], corr_expected)
+
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @td.skip_if_no("scipy")
+    def test_corr_rank_ordered_categorical_duplicate_columns(
+        self,
+        method,
+    ):
+        # GH #60306
+        cat = pd.CategoricalDtype(categories=[4, 3, 2, 1], ordered=True)
+        df = DataFrame(
+            {
+                "a": pd.array([1, 2, 3, 4], dtype=cat),
+                "b": pd.array([4, 3, 2, 1], dtype=cat),
+                "c": [4, 3, 2, 1],
+                "d": [10, 20, 30, 40],
+                "e": [100, 200, 300, 400],
+            }
+        )
+        df.columns = ["a", "a", "c", "c", "e"]
+
+        corr_calc = df.corr(method=method)
+        for col1_idx, col2_idx in combinations(range(len(df.columns)), r=2):
+            corr_expected = df.iloc[:, col1_idx].corr(
+                df.iloc[:, col2_idx], method=method
+            )
+            tm.assert_almost_equal(corr_calc.iloc[col1_idx, col2_idx], corr_expected)
+
 
 class TestDataFrameCorrWith:
     @pytest.mark.parametrize(
@@ -493,3 +552,51 @@ def test_cov_with_missing_values(self):
         result2 = df.dropna().cov()
         tm.assert_frame_equal(result1, expected)
         tm.assert_frame_equal(result2, expected)
+
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    def test_corr_rank_ordered_categorical(
+        self,
+        method,
+    ):
+        # GH #60306
+        pytest.importorskip("scipy")
+        df1 = DataFrame(
+            {
+                "a": Series(
+                    pd.Categorical(
+                        ["low", "m", "h", "vh"],
+                        categories=["low", "m", "h", "vh"],
+                        ordered=True,
+                    )
+                ),
+                "b": Series(
+                    pd.Categorical(
+                        ["low", "m", "h", None],
+                        categories=["low", "m", "h"],
+                        ordered=True,
+                    )
+                ),
+                "c": Series([0, 1, 2, 3]),
+                "d": Series([2.0, 3.0, 4.5, 6.5]),
+            }
+        )
+
+        df2 = DataFrame(
+            {
+                "a": Series([2.0, 3.0, 4.5, np.nan]),
+                "b": Series(
+                    pd.Categorical(
+                        ["m", "h", "vh", "low"],
+                        categories=["low", "m", "h", "vh"],
+                        ordered=True,
+                    )
+                ),
+                "c": Series([2, 3, 0, 1]),
+                "d": Series([2.0, 3.0, 4.5, 6.5]),
+            }
+        )
+
+        corr_calc = df1.corrwith(df2, method=method)
+        for col in df1.columns:
+            corr_expected = df1[col].corr(df2[col], method=method)
+            tm.assert_almost_equal(corr_calc.get(col), corr_expected)
diff --git a/pandas/tests/methods/corr.py b/pandas/tests/methods/corr.py
@@ -0,0 +1,150 @@
+"""
+Tests for core/methods/corr.py
+"""
+
+import numpy as np
+import pytest
+
+from pandas import (
+    Categorical,
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+from pandas.core.methods.corr import transform_ord_cat_cols_to_coded_cols
+
+
+@pytest.mark.parametrize(
+    ("input_df", "expected_df"),
+    [
+        pytest.param(
+            # 1) Simple: two ordered categorical columns (with and without None)
+            DataFrame(
+                {
+                    "ord_cat": Series(
+                        Categorical(
+                            ["low", "m", "h", "vh"],
+                            categories=["low", "m", "h", "vh"],
+                            ordered=True,
+                        )
+                    ),
+                    "ord_cat_none": Series(
+                        Categorical(
+                            ["low", "m", "h", None],
+                            categories=["low", "m", "h"],
+                            ordered=True,
+                        )
+                    ),
+                }
+            ),
+            DataFrame(
+                {
+                    # codes: low=0, m=1, h=2, vh=3
+                    "ord_cat": Series([0, 1, 2, 3], dtype="int8"),
+                    # codes: low=0, m=1, h=2, None -> NaN
+                    "ord_cat_none": Series([0, 1.0, 2.0, np.nan]),
+                }
+            ),
+            id="ordered-categoricals-basic",
+        ),
+        pytest.param(
+            # 2) Mixed dtypes: only the ordered categorical should change
+            DataFrame(
+                {
+                    "ordered": Series(
+                        Categorical(
+                            ["a", "c", "b"],
+                            categories=["a", "b", "c"],
+                            ordered=True,
+                        )
+                    ),
+                    "unordered": Series(Categorical(["x", "y", "x"], ordered=False)),
+                    "num": Series([10, 20, 30]),
+                    "text": Series(["u", "v", "w"]),
+                }
+            ),
+            DataFrame(
+                {
+                    # codes: a=0, c=2, b=1
+                    "ordered": Series([0, 2, 1], dtype="int8"),
+                    # unordered categorical should be untouched (still categorical)
+                    "unordered": Series(Categorical(["x", "y", "x"], ordered=False)),
+                    "num": Series([10, 20, 30]),
+                    "text": Series(["u", "v", "w"]),
+                }
+            ),
+            id="mixed-types-only-ordered-changes",
+        ),
+        pytest.param(
+            # 3 Duplicate column names: first 'dup' is ordered categorical,
+            # second 'dup' is non-categorical
+            DataFrame(
+                {
+                    "dup_1": Series(
+                        Categorical(
+                            ["low", "m", "h"],
+                            categories=["low", "m", "h"],
+                            ordered=True,
+                        )
+                    ),
+                    "dup_2": Series([5, 6, 7]),  # duplicate name, later column
+                }
+            ),
+            DataFrame(
+                {
+                    # After transform: position 0 (ordered cat) becomes codes [0,1,2],
+                    # position 1 remains untouched numbers [5,6,7].
+                    "dup_1": Series([0, 1, 2], dtype="int8"),
+                    "dup_2": Series([5, 6, 7]),
+                }
+            ),
+            id="duplicate-names-ordered-first",
+        ),
+        pytest.param(
+            # 4 Duplicate column names: first 'dup' is non-categorical,
+            # second 'dup' is ordered categorical, third 'dup' is ordered categorical
+            DataFrame(
+                {
+                    "dup_1": Series(["a", "b", "c"]),  # non-categorical (object)
+                    "dup_2": Series(
+                        Categorical(
+                            ["p", "q", None],
+                            categories=["p", "q"],
+                            ordered=True,
+                        )
+                    ),
+                    "dup_3": Series(
+                        Categorical(
+                            ["low", "m", "h"],
+                            categories=["low", "m", "h"],
+                            ordered=True,
+                        )
+                    ),
+                }
+            ),
+            DataFrame(
+                {
+                    # First stays object; second turns into codes [0, 1, NaN]
+                    # and third changes into codes [0, 1, 2]
+                    "dup_1": Series(["a", "b", "c"]),
+                    "dup_2": Series([0.0, 1.0, np.nan]),
+                    "dup_3": Series([0, 1, 2], dtype="int8"),
+                }
+            ),
+            id="duplicate-names-ordered-and-non-categorical-and-none",
+        ),
+    ],
+)
+def test_transform_ord_cat_cols_to_coded_cols(
+    input_df: DataFrame, expected_df: DataFrame
+) -> None:
+    # GH #60306
+    # duplicate columns creation for dup columns
+    if "dup_1" in input_df.columns:
+        input_df.columns = ["dup" for _ in range(len(input_df.columns))]
+        expected_df.columns = ["dup" for _ in range(len(expected_df.columns))]
+
+    out_df = transform_ord_cat_cols_to_coded_cols(input_df)
+    assert list(out_df.columns) == list(expected_df.columns)
+    for i, col in enumerate(out_df.columns):
+        tm.assert_series_equal(out_df.iloc[:, i], expected_df.iloc[:, i])