diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fc8edb1473c..562314e1b17 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3060,6 +3060,19 @@ def unstack( >>> arr.identical(roundtripped) True + Notes + ----- + **Sort order** + + When the stacked dimension's ``MultiIndex`` was created by pandas (for + example, by wrapping a ``pd.Series`` with a ``pd.MultiIndex``), pandas + stores ``MultiIndex.levels`` in **sorted order** regardless of + insertion order. In that case the coordinates of the new dimensions + after unstacking will also be sorted. When the ``MultiIndex`` was + created by :py:meth:`DataArray.stack`, xarray preserves insertion + order instead. Use :py:meth:`DataArray.sel` after unstacking to + select a specific order if needed. + See Also -------- DataArray.stack diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1ce84904623..dfe6a105248 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5624,6 +5624,19 @@ def unstack( unstacked : Dataset Dataset with unstacked data. + Notes + ----- + **Sort order** + + When the stacked dimension's ``MultiIndex`` was created by pandas (for + example, by wrapping a ``pd.DataFrame`` or ``pd.Series`` with a + ``pd.MultiIndex``), pandas stores ``MultiIndex.levels`` in **sorted + order** regardless of insertion order. In that case the coordinates + of the new dimensions after unstacking will also be sorted. When the + ``MultiIndex`` was created by :py:meth:`Dataset.stack`, xarray + preserves insertion order instead. Use :py:meth:`Dataset.sel` after + unstacking to select a specific order if needed. + See Also -------- Dataset.stack diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 377fd2f8a8b..2817b81afc6 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2811,6 +2811,32 @@ def test_stack_unstack_decreasing_coordinate(self) -> None: actual = stacked.unstack("allpoints") assert_identical(orig, actual) + def test_unstack_coords_are_sorted(self) -> None: + # When a DataArray is built from a pandas MultiIndex (e.g. from a + # pandas Series), pandas always stores MultiIndex.levels in sorted + # order regardless of insertion order. Consequently, the coordinates + # of the new dimensions after unstack() are sorted, not in the + # original insertion order. + # + # This is distinct from xarray's own stack(), which preserves + # insertion order via factorize() for non-monotonic inputs. + arrays: list[np.ndarray] = [ + np.array(["c", "a", "b"]), + np.array([1, 0, 0]), + ] + midx = pd.MultiIndex.from_arrays(arrays, names=["x", "y"]) + s = pd.Series([10, 20, 30], index=midx, name="val") + + da = DataArray(s, dims="z") + unstacked = da.unstack("z") + + # pandas sorted the MultiIndex levels, so unstacked coords are sorted + assert list(unstacked["x"].values) == ["a", "b", "c"] + assert list(unstacked["y"].values) == [0, 1] + + # use .sel() to restore a specific order + assert list(unstacked.sel(x=["c", "a", "b"])["x"].values) == ["c", "a", "b"] + def test_unstack_pandas_consistency(self) -> None: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) s = df.set_index(["x", "y"])["foo"]