From 1a03e391843afb457ee109b4903ffda434f85520 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 13 Jun 2026 18:35:58 +1200 Subject: [PATCH 01/10] Support CuPy-backed arrays in DaskManager.from_array method The "meta" argument passed to dask.array.from_array should not be hardcoded to just `numpy.ndarray`, but allow for `cupy.ndarray` too. --- xarray/namedarray/daskmanager.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index eb01a150c18..8d45a5d0480 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -5,6 +5,7 @@ import numpy as np +from xarray.compat.array_api_compat import get_array_namespace from xarray.core.indexing import ImplicitToExplicitIndexingAdapter from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray from xarray.namedarray.utils import is_duck_dask_array, module_available @@ -68,8 +69,9 @@ def from_array( import dask.array as da if isinstance(data, ImplicitToExplicitIndexingAdapter): - # lazily loaded backend array classes should use NumPy array operations. - kwargs["meta"] = np.ndarray + # lazily loaded backend array classes should use NumPy or CuPy array operations. + xp = get_array_namespace(data.get_duck_array()) + kwargs["meta"] = xp.ndarray return da.from_array( data, From 4465ea4375b59e883dae1b5fed1661e833d7911d Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 13 Jun 2026 19:33:03 +1200 Subject: [PATCH 02/10] Fix no-untyped-call Not sure how to type-hint np | cp | ??, so just use Any for output of get_array_namespace. --- xarray/compat/array_api_compat.py | 4 +++- xarray/core/indexing.py | 2 +- xarray/namedarray/pycompat.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/compat/array_api_compat.py b/xarray/compat/array_api_compat.py index e1e5d5c5bdc..ddbab7212b1 100644 --- a/xarray/compat/array_api_compat.py +++ b/xarray/compat/array_api_compat.py @@ -1,3 +1,5 @@ +from typing import Any + import numpy as np from xarray.namedarray.pycompat import array_type @@ -46,7 +48,7 @@ def result_type(*arrays_and_dtypes, xp) -> np.dtype: return _future_array_api_result_type(*arrays_and_dtypes, xp=xp) -def get_array_namespace(*values): +def get_array_namespace(*values) -> Any: def _get_single_namespace(x): if hasattr(x, "__array_namespace__"): return x.__array_namespace__() diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index bb12704e55c..c0cb9a5777f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -687,7 +687,7 @@ def __array__( else: return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): + def get_duck_array(self) -> duckarray: return self.array.get_duck_array() def __getitem__(self, key: Any): diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 5832f7cc9e7..b41711e07ab 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -140,7 +140,7 @@ def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, return loaded_data if isinstance(data, ExplicitlyIndexed | ImplicitToExplicitIndexingAdapter): - return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] + return data.get_duck_array() elif is_duck_array(data): return data else: From b77cc570f9d52cbd436b1c28a085d43e9ba5ae71 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 15 Jun 2026 13:12:50 +1200 Subject: [PATCH 03/10] Implement __array_namespace__ method on NDArrayMixin Centralize retrieving of the __array_namespace__ through several subclassed layers, to avoid having to go through `.get_duck_array()`. Need to put `from xarray.compat.array_api_compat import get_array_namespace` import within the method to avoid circular import. Also type-hinted output of `get_array_namespace` as ModuleType following https://github.com/numpy/numpy/pull/20719. --- xarray/compat/array_api_compat.py | 4 ++-- xarray/core/utils.py | 5 +++++ xarray/namedarray/daskmanager.py | 3 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/xarray/compat/array_api_compat.py b/xarray/compat/array_api_compat.py index ddbab7212b1..575f8cdf07d 100644 --- a/xarray/compat/array_api_compat.py +++ b/xarray/compat/array_api_compat.py @@ -1,4 +1,4 @@ -from typing import Any +from types import ModuleType import numpy as np @@ -48,7 +48,7 @@ def result_type(*arrays_and_dtypes, xp) -> np.dtype: return _future_array_api_result_type(*arrays_and_dtypes, xp=xp) -def get_array_namespace(*values) -> Any: +def get_array_namespace(*values) -> ModuleType: def _get_single_namespace(x): if hasattr(x, "__array_namespace__"): return x.__array_namespace__() diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 100c256fa9d..b9e0ebf5442 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -704,6 +704,11 @@ def dtype(self: Any) -> np.dtype: def shape(self: Any) -> tuple[int, ...]: return self.array.shape + def __array_namespace__(self: Any) -> ModuleType: + from xarray.compat.array_api_compat import get_array_namespace + + return get_array_namespace(self.array) + def __getitem__(self: Any, key): return self.array[key] diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 8d45a5d0480..c03b9a4da13 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -5,7 +5,6 @@ import numpy as np -from xarray.compat.array_api_compat import get_array_namespace from xarray.core.indexing import ImplicitToExplicitIndexingAdapter from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray from xarray.namedarray.utils import is_duck_dask_array, module_available @@ -70,7 +69,7 @@ def from_array( if isinstance(data, ImplicitToExplicitIndexingAdapter): # lazily loaded backend array classes should use NumPy or CuPy array operations. - xp = get_array_namespace(data.get_duck_array()) + xp = data.__array_namespace__() kwargs["meta"] = xp.ndarray return da.from_array( From 286053fdc102a49f807c426e3f47d9eaa3aa8905 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Wed, 17 Jun 2026 22:26:13 +1200 Subject: [PATCH 04/10] Fix repls for LazilyIndexedArray, MemoryCachedArray and IndexingAdapter To fix repr AssertionError mismatches on: - TestVariable::test_repr_lazy_data - test_repr_pandas_multi_index - test_repr_pandas_range_index - test_display_nbytes - test_repr_file_collapsed - test_coordinate_transform_variable_repr by preventing specific xarray internal array types from going through the is_duck_array repl path. --- xarray/core/formatting.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index b6a6bd2c4b4..36d20c545b9 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -23,6 +23,8 @@ from xarray.core.indexing import ( BasicIndexer, ExplicitlyIndexed, + IndexingAdapter, + LazilyIndexedArray, MemoryCachedArray, ) from xarray.core.options import OPTIONS, _get_boolean_with_default @@ -700,7 +702,9 @@ def short_data_repr(array): if isinstance(array, np.ndarray): return short_array_repr(array) - elif is_duck_array(internal_data): + elif not isinstance( + internal_data, (LazilyIndexedArray, MemoryCachedArray, IndexingAdapter) + ) and is_duck_array(internal_data): return limit_lines(repr(array.data), limit=40) elif getattr(array, "_in_memory", None): return short_array_repr(array) From 551abe887e7c95a29b29f31305cb8d9bc17a1a00 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Wed, 17 Jun 2026 22:40:48 +1200 Subject: [PATCH 05/10] Fix mypy unused-ignore --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 23d55ee0a11..eab7503f262 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -843,7 +843,7 @@ def chunk( # Using OuterIndexer is a pragmatic choice: dask does not yet handle # different indexing types in an explicit way: # https://github.com/dask/dask/issues/2883 - ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[assignment] + ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) if is_dict_like(chunks): chunks = tuple(starmap(chunks.get, enumerate(ndata.shape))) From 7132f73ac00053196cb9e642e8596f60558269b3 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 18 Jun 2026 13:25:23 +1200 Subject: [PATCH 06/10] Put __array_namespace__ on ImplicitToExplicitIndexingAdapter instead Move `__array_namespace__` method from NDArrayMixin to just ImplicitToExplicitIndexingAdapter. --- xarray/core/formatting.py | 6 +----- xarray/core/indexing.py | 5 +++++ xarray/core/utils.py | 5 ----- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 36d20c545b9..b6a6bd2c4b4 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -23,8 +23,6 @@ from xarray.core.indexing import ( BasicIndexer, ExplicitlyIndexed, - IndexingAdapter, - LazilyIndexedArray, MemoryCachedArray, ) from xarray.core.options import OPTIONS, _get_boolean_with_default @@ -702,9 +700,7 @@ def short_data_repr(array): if isinstance(array, np.ndarray): return short_array_repr(array) - elif not isinstance( - internal_data, (LazilyIndexedArray, MemoryCachedArray, IndexingAdapter) - ) and is_duck_array(internal_data): + elif is_duck_array(internal_data): return limit_lines(repr(array.data), limit=40) elif getattr(array, "_in_memory", None): return short_array_repr(array) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 23ae15eeefe..fa6c94c7e10 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -9,6 +9,7 @@ from contextlib import suppress from dataclasses import dataclass, field from datetime import timedelta +from types import ModuleType from typing import TYPE_CHECKING, Any, cast, overload import numpy as np @@ -16,6 +17,7 @@ from numpy.typing import DTypeLike from packaging.version import Version +from xarray.compat.array_api_compat import get_array_namespace from xarray.compat.npcompat import HAS_STRING_DTYPE from xarray.core import duck_array_ops from xarray.core.coordinate_transform import CoordinateTransform @@ -693,6 +695,9 @@ def __array__( else: return np.asarray(to_numpy(self.get_duck_array()), dtype=dtype) + def __array_namespace__(self: Any) -> ModuleType: + return get_array_namespace(self.array) + def get_duck_array(self) -> duckarray: return self.array.get_duck_array() diff --git a/xarray/core/utils.py b/xarray/core/utils.py index b9e0ebf5442..100c256fa9d 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -704,11 +704,6 @@ def dtype(self: Any) -> np.dtype: def shape(self: Any) -> tuple[int, ...]: return self.array.shape - def __array_namespace__(self: Any) -> ModuleType: - from xarray.compat.array_api_compat import get_array_namespace - - return get_array_namespace(self.array) - def __getitem__(self: Any, key): return self.array[key] From 39909d5fbbe3f30a34f9acc5c23e688bcd34d55f Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 26 Jun 2026 22:03:58 +1200 Subject: [PATCH 07/10] Test to check duck_arrays that are chunked then computed return the same An xarray.Dataset backed by duck arrays that gets chunked into dask.Arrays should have their meta type show as that duck array type. On calling .compute(), the underlying duck array should be returned back, and not the numpy array. --- xarray/tests/test_duck_array_wrapping.py | 26 +++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_duck_array_wrapping.py b/xarray/tests/test_duck_array_wrapping.py index 9bbc3d9b06a..f39dfae53fb 100644 --- a/xarray/tests/test_duck_array_wrapping.py +++ b/xarray/tests/test_duck_array_wrapping.py @@ -3,6 +3,7 @@ import pytest import xarray as xr +from xarray.tests import requires_dask # Don't run cupy in CI because it requires a GPU NAMESPACE_ARRAYS = { @@ -22,6 +23,7 @@ "argsort": "no argsort", "conjugate": "conj but no conjugate", "searchsorted": "dask.array.searchsorted but no Array.searchsorted", + "dask_chunk_compute_roundtrip": "no need to test dask with dask", }, }, "jax.numpy": { @@ -123,7 +125,7 @@ def setup_for_test(self, request, namespace): reason = NAMESPACE_ARRAYS[namespace]["xfails"][xarray_method] pytest.xfail(f"xfail for {self.namespace}: {reason}") - def get_test_dataarray(self): + def get_test_dataarray(self) -> xr.DataArray: data = np.asarray([[1, 2, 3, np.nan, 5]]) x = np.arange(5) data = self.constructor(data) @@ -516,3 +518,25 @@ def test_sortby(self): def test_broadcast_like(self): result = self.x.broadcast_like(self.x) assert isinstance(result.data, self.Array) + + +@pytest.mark.parametrize("namespace", NAMESPACE_ARRAYS) +class TestDatasetMethods(_BaseTest): + @pytest.fixture(autouse=True) + def setUp(self, request, namespace): + self.setup_for_test(request, namespace) + self.ds = self.get_test_dataarray().to_dataset() + + @requires_dask + def test_dask_chunk_compute_roundtrip(self): + """ + Ensure duck arrays chunked into a dask.Array get returned as duck arrays + (and not numpy array) after calling `.compute()`. + """ + chunked_ds = self.ds.chunk(x=2, chunked_array_type="dask") + assert isinstance(chunked_ds.foo.data._meta, self.Array) + + computed_ds = chunked_ds.compute() + assert isinstance(computed_ds.foo.data, self.Array), ( + f"Expected: {self.Array}, got {computed_ds.foo.data.__class__}" + ) From 84cc75711d53ff4a9ff203cb135b19ff844638f6 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 27 Jun 2026 12:43:37 +1200 Subject: [PATCH 08/10] Still need __array_namespace__ method on ExplicitlyIndexed So that backends loading duck arrays lazily have the correct array type metadata. Needed for https://github.com/xarray-contrib/cupy-xarray/pull/81, e.g. this will make the xarray/dask repr show cupy.ndarray instead of numpy.ndarray for the chunk types. Partially reverts 7132f73ac00053196cb9e642e8596f60558269b3 --- xarray/core/formatting.py | 4 +++- xarray/core/indexing.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index b6a6bd2c4b4..60123e318ba 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -700,7 +700,9 @@ def short_data_repr(array): if isinstance(array, np.ndarray): return short_array_repr(array) - elif is_duck_array(internal_data): + elif not isinstance(internal_data, (ExplicitlyIndexed)) and is_duck_array( + internal_data + ): return limit_lines(repr(array.data), limit=40) elif getattr(array, "_in_memory", None): return short_array_repr(array) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index fa6c94c7e10..d7a12788221 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -612,6 +612,9 @@ def __array__( else: return np.asarray(self.get_duck_array(), dtype=dtype) + def __array_namespace__(self: Any) -> ModuleType: + return get_array_namespace(self.array) + def get_duck_array(self): return self.array From f0e74e73ccfca9a49611a7284afdb76757690808 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 27 Jun 2026 13:21:22 +1200 Subject: [PATCH 09/10] Put __array_namespace__ on CopyOnWriteArray instead Patches 84cc75711d53ff4a9ff203cb135b19ff844638f6, narrowed it down further from ExplicitlyIndexed -> ExplicitlyIndexedNDArrayMixin -> CopyOnWriteArray for the purposes of https://github.com/xarray-contrib/cupy-xarray/pull/81. Can revert the formatting fixes now. --- xarray/core/formatting.py | 4 +--- xarray/core/indexing.py | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 60123e318ba..b6a6bd2c4b4 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -700,9 +700,7 @@ def short_data_repr(array): if isinstance(array, np.ndarray): return short_array_repr(array) - elif not isinstance(internal_data, (ExplicitlyIndexed)) and is_duck_array( - internal_data - ): + elif is_duck_array(internal_data): return limit_lines(repr(array.data), limit=40) elif getattr(array, "_in_memory", None): return short_array_repr(array) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index d7a12788221..71fd262c259 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -612,9 +612,6 @@ def __array__( else: return np.asarray(self.get_duck_array(), dtype=dtype) - def __array_namespace__(self: Any) -> ModuleType: - return get_array_namespace(self.array) - def get_duck_array(self): return self.array @@ -940,6 +937,9 @@ def get_duck_array(self): async def async_get_duck_array(self): return await self.array.async_get_duck_array() + def __array_namespace__(self: Any) -> ModuleType: + return get_array_namespace(self.array) + def _oindex_get(self, indexer: OuterIndexer): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) From b67ad6223a545db12da95d1690f9358d7e881e80 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sat, 27 Jun 2026 13:55:34 +1200 Subject: [PATCH 10/10] Also need __array_namespace__ on NdArrayLikeIndexingAdapter Additionally to f0e74e73ccfca9a49611a7284afdb76757690808, because I have a CopyOnWriteArray that wraps an NdArrayLikeIndexingAdapter, so need __array_namespace__ on both in order for the xarray/dask repr to show cupy.ndarray instead of numpy.ndarray. Hopefully this is the last of it! --- xarray/core/indexing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 71fd262c259..acefe6243c9 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1771,6 +1771,9 @@ def __init__(self, array): ) self.array = array + def __array_namespace__(self: Any) -> ModuleType: + return get_array_namespace(self.array) + class ArrayApiIndexingAdapter(IndexingAdapter): """Wrap an array API array to use explicit indexing."""