From 4eeba15b3857405aef7218ee10907d070a42d148 Mon Sep 17 00:00:00 2001 From: C1-BA-B1-F3 Date: Fri, 26 Jun 2026 09:26:34 +0800 Subject: [PATCH 1/3] Fix MultiIndex sel with tuple-valued levels (GH#11341) Problem: When a MultiIndex level contains tuple-valued entries (e.g., (1,1)), selecting with a nested tuple key like ((1,1), 2) incorrectly preserved the dimension instead of collapsing it to a scalar result. Root cause: _is_nested_tuple() was checking for 'tuple' in addition to 'list' and 'slice', which caused it to misidentify tuple-valued keys as nested selection tuples. Fix: Remove 'tuple' from the isinstance check in _is_nested_tuple() so that only 'list' and 'slice' are treated as indicators of nested selections. Tuple- valued keys in MultiIndex levels are now correctly handled as scalar key values. Added regression test for selecting with nested tuple keys on MultiIndex with tuple-valued levels. --- xarray/core/indexes.py | 2 +- xarray/tests/test_indexes.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 2242e57e482..123bc75a21b 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -610,7 +610,7 @@ def _asarray_tuplesafe(values): def _is_nested_tuple(possible_tuple): return isinstance(possible_tuple, tuple) and any( - isinstance(value, tuple | list | slice) for value in possible_tuple + isinstance(value, list | slice) for value in possible_tuple ) diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 94adcc3b935..0dbdaba5043 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -509,6 +509,28 @@ def test_sel(self) -> None: with pytest.raises(IndexError): index.sel({"x": (slice(None), 1, "no_level")}) + def test_sel_nested_tuple_key(self) -> None: + """Test that tuple-valued MultiIndex levels can be selected with a single key. + + Regression test for GH#11341: when a MultiIndex level contains tuples, + selecting with a nested tuple key ((1, 1), 2) should collapse the dimension + just like selecting with a non-nested tuple key (1, 2). + """ + # Create a MultiIndex where the first level contains tuples + nested_level_0 = pd.Index( + [(1, 1), (1, 1), (2, 2), (3, 3)], name="a", tupleize_cols=False + ) + nested_level_1 = pd.Index([1, 2, 10, 20], name="b") + nested_mi = pd.MultiIndex.from_arrays([nested_level_0, nested_level_1]) + + index = PandasMultiIndex(nested_mi, "index") + + # Select with a nested tuple key - should return scalar indexer + actual = index.sel({"index": ((1, 1), 2)}) + # pandas.get_loc returns an integer for exact match + expected_dim_indexers = {"index": 1} + assert actual.dim_indexers == expected_dim_indexers + def test_join(self): midx = pd.MultiIndex.from_product([["a", "aa"], [1, 2]], names=("one", "two")) level_coords_dtype = {"one": "=U2", "two": "i"} From 8095f83e6f5e172fec5ec5fe0363a3465763e9bf Mon Sep 17 00:00:00 2001 From: C1-BA-B1-F3 Date: Fri, 26 Jun 2026 09:35:39 +0800 Subject: [PATCH 2/3] fix: handle non-numpy dtypes in PandasIndex.concat() and join() When concatenating indexes with mixed string types (e.g., numpy string dtype and pandas StringDtype), np.result_type() fails because it cannot interpret extension dtypes. This fix checks if all dtypes are valid numpy dtypes before calling np.result_type(), falling back to object dtype if not. Fixes GH#11317 --- xarray/core/indexes.py | 13 +++++++++++-- xarray/tests/test_concat.py | 10 ++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 123bc75a21b..55af4fe2d61 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -26,6 +26,7 @@ is_allowed_extension_array_dtype, is_dict_like, is_scalar, + is_valid_numpy_dtype, ) if TYPE_CHECKING: @@ -785,7 +786,12 @@ def concat( if len(indexes_coord_dtypes) == 1: coord_dtype = next(iter(indexes_coord_dtypes)) else: - coord_dtype = np.result_type(*indexes_coord_dtypes) + # Check if all dtypes are valid numpy dtypes before using np.result_type + # (e.g., pandas StringDtype is not a valid numpy dtype, GH#11317) + if all(is_valid_numpy_dtype(dt) for dt in indexes_coord_dtypes): + coord_dtype = np.result_type(*indexes_coord_dtypes) + else: + coord_dtype = np.dtype("O") return cls(new_pd_index, dim=dim, coord_dtype=coord_dtype) @@ -914,7 +920,10 @@ def join( index = self.index.intersection(other.index) if is_allowed_extension_array_dtype(index.dtype): return type(self)(index, self.dim) - coord_dtype = np.result_type(self.coord_dtype, other.coord_dtype) + if is_valid_numpy_dtype(self.coord_dtype) and is_valid_numpy_dtype(other.coord_dtype): + coord_dtype = np.result_type(self.coord_dtype, other.coord_dtype) + else: + coord_dtype = np.dtype("O") return type(self)(index, self.dim, coord_dtype=coord_dtype) def reindex_like( diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index bc98d72d50c..4ed0315dfca 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1811,3 +1811,13 @@ def test_concat_different_dims_in_different_child(self): actual = concat([dt1, dt2], dim="x") expected = DataTree.from_dict(coords={"/first/x": [1, 3], "/second/x": [2, 4]}) assert actual.identical(expected) + + +def test_concat_string_dtype_from_pd_index(): + # Regression test for GH#11317: concat fails due to StringDtype introduced by pd.Index + da = DataArray([0], dims=["dim_a"], coords=dict(dim_a=["a"])) + db = DataArray([0]) + db2 = concat([db], pd.Index(["b"], name="dim_a")) + result = concat([da, db2], dim="dim_a") + assert result.sizes["dim_a"] == 2 + assert list(result.coords["dim_a"].values) == ["a", "b"] From c19bf8e3fe6299aed4f6a0e357f499401ea7af65 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Jun 2026 01:37:45 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/indexes.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 55af4fe2d61..33e974fec50 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -785,13 +785,12 @@ def concat( indexes_coord_dtypes = {idx.coord_dtype for idx in indexes} if len(indexes_coord_dtypes) == 1: coord_dtype = next(iter(indexes_coord_dtypes)) + # Check if all dtypes are valid numpy dtypes before using np.result_type + # (e.g., pandas StringDtype is not a valid numpy dtype, GH#11317) + elif all(is_valid_numpy_dtype(dt) for dt in indexes_coord_dtypes): + coord_dtype = np.result_type(*indexes_coord_dtypes) else: - # Check if all dtypes are valid numpy dtypes before using np.result_type - # (e.g., pandas StringDtype is not a valid numpy dtype, GH#11317) - if all(is_valid_numpy_dtype(dt) for dt in indexes_coord_dtypes): - coord_dtype = np.result_type(*indexes_coord_dtypes) - else: - coord_dtype = np.dtype("O") + coord_dtype = np.dtype("O") return cls(new_pd_index, dim=dim, coord_dtype=coord_dtype) @@ -920,7 +919,9 @@ def join( index = self.index.intersection(other.index) if is_allowed_extension_array_dtype(index.dtype): return type(self)(index, self.dim) - if is_valid_numpy_dtype(self.coord_dtype) and is_valid_numpy_dtype(other.coord_dtype): + if is_valid_numpy_dtype(self.coord_dtype) and is_valid_numpy_dtype( + other.coord_dtype + ): coord_dtype = np.result_type(self.coord_dtype, other.coord_dtype) else: coord_dtype = np.dtype("O")