From be58818ca28695b287f35592bb4a65fac5689d77 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Nov 2025 09:59:37 -0700 Subject: [PATCH 1/4] Fix CoordinateTransformIndexingAdapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- xarray/core/indexing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index eead0a1b8af..b1d72baf495 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2181,7 +2181,7 @@ def _oindex_get(self, indexer: OuterIndexer): dim_positions = dict(zip(self._dims, positions, strict=False)) result = self._transform.forward(dim_positions) - return np.asarray(result[self._coord_name]).squeeze() + return np.asarray(result[self._coord_name]) def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: raise TypeError( @@ -2215,7 +2215,11 @@ def __getitem__(self, indexer: ExplicitIndexer): self._check_and_raise_if_non_basic_indexer(indexer) # also works with basic indexing - return self._oindex_get(OuterIndexer(indexer.tuple)) + res = self._oindex_get(OuterIndexer(indexer.tuple)) + squeeze_axes = tuple( + ax for ax, idxr in enumerate(indexer.tuple) if isinstance(idxr, int) + ) + return res.squeeze(squeeze_axes) if squeeze_axes else res def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: raise TypeError( From be580c1c7918137dec36c5969f26cdd70c130f31 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Nov 2025 10:05:28 -0700 Subject: [PATCH 2/4] Add strategies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- properties/test_coordinate_transform.py | 105 +++++++++++++ properties/test_indexing.py | 66 ++++++++ xarray/testing/strategies.py | 195 ++++++++++++++++++++++++ xarray/tests/test_strategies.py | 74 +++++++++ 4 files changed, 440 insertions(+) create mode 100644 properties/test_coordinate_transform.py create mode 100644 properties/test_indexing.py diff --git a/properties/test_coordinate_transform.py b/properties/test_coordinate_transform.py new file mode 100644 index 00000000000..0b3c41205c8 --- /dev/null +++ b/properties/test_coordinate_transform.py @@ -0,0 +1,105 @@ +"""Property tests comparing CoordinateTransformIndex to PandasIndex.""" + +from collections.abc import Hashable +from typing import Any + +import numpy as np +import pytest + +pytest.importorskip("hypothesis") + +import hypothesis.strategies as st +from hypothesis import given + +import xarray as xr +import xarray.testing.strategies as xrst +from xarray.core.coordinate_transform import CoordinateTransform +from xarray.core.indexes import CoordinateTransformIndex +from xarray.testing import assert_identical + +DATA_VAR_NAME = "_test_data_" + + +class IdentityTransform(CoordinateTransform): + """Identity transform that returns dimension positions as coordinate labels.""" + + def forward(self, dim_positions: dict[str, Any]) -> dict[Hashable, Any]: + return {name: dim_positions[name] for name in self.coord_names} + + def reverse(self, coord_labels: dict[Hashable, Any]) -> dict[str, Any]: + return {dim: coord_labels[dim] for dim in self.dims} + + def equals( + self, other: CoordinateTransform, exclude: frozenset[Hashable] | None = None + ) -> bool: + if not isinstance(other, IdentityTransform): + return False + return self.dim_size == other.dim_size + + +def create_transform_da(sizes: dict[str, int]) -> xr.DataArray: + """Create a DataArray with IdentityTransform CoordinateTransformIndex.""" + dims = list(sizes.keys()) + shape = tuple(sizes.values()) + data = np.arange(np.prod(shape)).reshape(shape) + + # Create dataset with transform index for each dimension + ds = xr.Dataset({DATA_VAR_NAME: (dims, data)}) + for dim, size in sizes.items(): + transform = IdentityTransform([dim], {dim: size}, dtype=np.dtype(np.int64)) + index = CoordinateTransformIndex(transform) + ds = ds.assign_coords(xr.Coordinates.from_xindex(index)) + + return ds[DATA_VAR_NAME] + + +def create_pandas_da(sizes: dict[str, int]) -> xr.DataArray: + """Create a DataArray with standard PandasIndex (range index).""" + shape = tuple(sizes.values()) + data = np.arange(np.prod(shape)).reshape(shape) + coords = {dim: np.arange(size) for dim, size in sizes.items()} + return xr.DataArray( + data, dims=list(sizes.keys()), coords=coords, name=DATA_VAR_NAME + ) + + +@given( + st.data(), + xrst.dimension_sizes(min_dims=1, max_dims=3, min_side=1, max_side=5), +) +def test_basic_indexing(data, sizes): + """Test basic indexing produces identical results for transform and pandas index.""" + pandas_da = create_pandas_da(sizes) + transform_da = create_transform_da(sizes) + idxr = data.draw(xrst.basic_indexers(sizes=sizes)) + pandas_result = pandas_da.isel(idxr) + transform_result = transform_da.isel(idxr) + assert_identical(pandas_result, transform_result) + + +@given( + st.data(), + xrst.dimension_sizes(min_dims=1, max_dims=3, min_side=1, max_side=5), +) +def test_outer_indexing(data, sizes): + """Test outer indexing produces identical results for transform and pandas index.""" + pandas_da = create_pandas_da(sizes) + transform_da = create_transform_da(sizes) + idxr = data.draw(xrst.outer_array_indexers(sizes=sizes, min_dims=1)) + pandas_result = pandas_da.isel(idxr) + transform_result = transform_da.isel(idxr) + assert_identical(pandas_result, transform_result) + + +@given( + st.data(), + xrst.dimension_sizes(min_dims=2, max_dims=3, min_side=1, max_side=5), +) +def test_vectorized_indexing(data, sizes): + """Test vectorized indexing produces identical results for transform and pandas index.""" + pandas_da = create_pandas_da(sizes) + transform_da = create_transform_da(sizes) + idxr = data.draw(xrst.vectorized_indexers(sizes=sizes)) + pandas_result = pandas_da.isel(idxr) + transform_result = transform_da.isel(idxr) + assert_identical(pandas_result, transform_result) diff --git a/properties/test_indexing.py b/properties/test_indexing.py new file mode 100644 index 00000000000..bce17f503d6 --- /dev/null +++ b/properties/test_indexing.py @@ -0,0 +1,66 @@ +import pytest + +pytest.importorskip("hypothesis") + +import hypothesis.strategies as st +from hypothesis import given + +import xarray as xr +import xarray.testing.strategies as xrst + + +def _slice_size(s: slice, dim_size: int) -> int: + """Compute the size of a slice applied to a dimension.""" + return len(range(*s.indices(dim_size))) + + +@given( + st.data(), + xrst.variables(dims=xrst.dimension_sizes(min_dims=1, max_dims=4, min_side=1)), +) +def test_basic_indexing(data, var): + """Test that basic indexers produce expected output shape.""" + idxr = data.draw(xrst.basic_indexers(sizes=var.sizes)) + result = var.isel(idxr) + expected_shape = tuple( + _slice_size(idxr[d], var.sizes[d]) if d in idxr else var.sizes[d] + for d in result.dims + ) + assert result.shape == expected_shape + + +@given( + st.data(), + xrst.variables(dims=xrst.dimension_sizes(min_dims=1, max_dims=4, min_side=1)), +) +def test_outer_indexing(data, var): + """Test that outer array indexers produce expected output shape.""" + idxr = data.draw(xrst.outer_array_indexers(sizes=var.sizes, min_dims=1)) + result = var.isel(idxr) + expected_shape = tuple( + len(idxr[d]) if d in idxr else var.sizes[d] for d in result.dims + ) + assert result.shape == expected_shape + + +@given( + st.data(), + xrst.variables(dims=xrst.dimension_sizes(min_dims=2, max_dims=4, min_side=1)), +) +def test_vectorized_indexing(data, var): + """Test that vectorized indexers produce expected output shape.""" + da = xr.DataArray(var) + idxr = data.draw(xrst.vectorized_indexers(sizes=var.sizes)) + result = da.isel(idxr) + + # TODO: this logic works because the dims in idxr don't overlap with da.dims + # Compute expected shape from result dims + # Non-indexed dims keep their original size, indexed dims get broadcast size + broadcast_result = xr.broadcast(*idxr.values()) + broadcast_sizes = dict( + zip(broadcast_result[0].dims, broadcast_result[0].shape, strict=True) + ) + expected_shape = tuple( + var.sizes[d] if d in var.sizes else broadcast_sizes[d] for d in result.dims + ) + assert result.shape == expected_shape diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 19caf16cca9..c0871ce577c 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,6 +1,7 @@ import datetime import warnings from collections.abc import Hashable, Iterable, Mapping, Sequence +from itertools import compress from typing import TYPE_CHECKING, Any, Protocol, overload import hypothesis.extra.numpy as npst @@ -22,15 +23,18 @@ __all__ = [ "attrs", + "basic_indexers", "cftime_datetimes", "datetimes", "dimension_names", "dimension_sizes", "names", + "outer_array_indexers", "pandas_index_dtypes", "supported_dtypes", "unique_subset_of", "variables", + "vectorized_indexers", ] @@ -536,3 +540,194 @@ def cftime_datetimes(draw: st.DrawFn): microseconds_offset = draw(st.integers(0, timespan_microseconds)) return min_value + datetime.timedelta(microseconds=microseconds_offset) + + +@st.composite +def basic_indexers( + draw, + /, + *, + sizes: dict[Hashable, int], + min_dims: int = 1, + max_dims: int | None = None, +) -> dict[Hashable, int | slice]: + """Generate basic indexers using ``hypothesis.extra.numpy.basic_indices``. + + Parameters + ---------- + draw : callable + sizes : dict[Hashable, int] + Dictionary mapping dimension names to their sizes. + min_dims : int, optional + Minimum number of dimensions to index. + max_dims : int or None, optional + Maximum number of dimensions to index. + + Returns + ------- + dict[Hashable, int | slice] + Indexers as a dict with keys randomly selected from ``sizes.keys()``. + + See Also + -------- + hypothesis.strategies.slices + """ + selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims)) + + # Generate one basic index (int or slice) per selected dimension + idxr = { + dim: draw( + st.one_of( + st.integers(min_value=-size, max_value=size - 1), + st.slices(size), + ) + ) + for dim, size in selected_dims.items() + } + return idxr + + +@st.composite +def outer_array_indexers( + draw, + /, + *, + sizes: dict[Hashable, int], + min_dims: int = 0, + max_dims: int | None = None, + max_size: int = 10, +) -> dict[Hashable, np.ndarray]: + """Generate outer array indexers (vectorized/orthogonal indexing). + + Parameters + ---------- + draw : callable + The Hypothesis draw function (automatically provided by @st.composite). + sizes : dict[Hashable, int] + Dictionary mapping dimension names to their sizes. + min_dims : int, optional + Minimum number of dimensions to index + max_dims : int or None, optional + Maximum number of dimensions to index + + Returns + ------- + dict[Hashable, np.ndarray] + Indexers as a dict with keys randomly selected from ``sizes.keys()``. + Values are 1D numpy arrays of integer indices for each dimension. + + See Also + -------- + hypothesis.extra.numpy.arrays + """ + selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims)) + idxr = { + dim: draw( + npst.arrays( + dtype=np.int64, + shape=st.integers(min_value=1, max_value=min(size, max_size)), + elements=st.integers(min_value=-size, max_value=size - 1), + ) + ) + for dim, size in selected_dims.items() + } + return idxr + + +@st.composite +def vectorized_indexers( + draw, + /, + *, + sizes: dict[Hashable, int], + min_dims: int = 2, + max_dims: int | None = None, + min_ndim: int = 1, + max_ndim: int = 3, + min_size: int = 1, + max_size: int = 5, +) -> dict[Hashable, xr.DataArray]: + """Generate vectorized (fancy) indexers where all arrays are broadcastable. + + In vectorized indexing, all array indexers must have compatible shapes + that can be broadcast together, and the result shape is determined by + broadcasting the indexer arrays. + + Parameters + ---------- + draw : callable + The Hypothesis draw function (automatically provided by @st.composite). + sizes : dict[Hashable, int] + Dictionary mapping dimension names to their sizes. + min_dims : int, optional + Minimum number of dimensions to index. Default is 2, so that we always have a "trajectory". + Use ``outer_array_indexers`` for the ``min_dims==1`` case. + max_dims : int or None, optional + Maximum number of dimensions to index. + min_ndim : int, optional + Minimum number of dimensions for the result arrays. + max_ndim : int, optional + Maximum number of dimensions for the result arrays. + min_size : int, optional + Minimum size for each dimension in the result arrays. + max_size : int, optional + Maximum size for each dimension in the result arrays. + + Returns + ------- + dict[Hashable, xr.DataArray] + Indexers as a dict with keys randomly selected from sizes.keys(). + Values are DataArrays of integer indices that are all broadcastable + to a common shape. + + See Also + -------- + hypothesis.extra.numpy.arrays + """ + selected_dims = draw(unique_subset_of(sizes, min_size=min_dims, max_size=max_dims)) + + # Generate a common broadcast shape for all arrays + # Use min_ndim to max_ndim dimensions for the result shape + result_shape = draw( + st.lists( + st.integers(min_value=min_size, max_value=max_size), + min_size=min_ndim, + max_size=max_ndim, + ) + ) + result_ndim = len(result_shape) + + # Create dimension names for the vectorized result + vec_dims = tuple(f"vec_{i}" for i in range(result_ndim)) + + # Generate array indexers for each selected dimension + # All arrays must be broadcastable to the same result_shape + idxr = {} + for dim, size in selected_dims.items(): + array_shape = draw( + npst.broadcastable_shapes( + shape=tuple(result_shape), + min_dims=min_ndim, + max_dims=result_ndim, + ) + ) + + # For xarray broadcasting, drop dimensions where size differs from result_shape + # (numpy broadcasts size-1, but xarray requires matching sizes or missing dims) + # Right-align array_shape with result_shape for comparison + aligned_dims = vec_dims[-len(array_shape) :] if array_shape else () + aligned_result = result_shape[-len(array_shape) :] if array_shape else [] + keep_mask = [s == r for s, r in zip(array_shape, aligned_result, strict=True)] + filtered_shape = tuple(compress(array_shape, keep_mask)) + filtered_dims = tuple(compress(aligned_dims, keep_mask)) + + # Generate array of valid indices for this dimension + indices = draw( + npst.arrays( + dtype=np.int64, + shape=filtered_shape, + elements=st.integers(min_value=-size, max_value=size - 1), + ) + ) + idxr[dim] = xr.DataArray(indices, dims=filtered_dims) + return idxr diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 48819333ca2..b4cf8bf2c16 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -13,15 +13,20 @@ from hypothesis import given from hypothesis.extra.array_api import make_strategies_namespace +import xarray as xr +from xarray import broadcast from xarray.core.options import set_options from xarray.core.variable import Variable from xarray.testing.strategies import ( attrs, + basic_indexers, dimension_names, dimension_sizes, + outer_array_indexers, supported_dtypes, unique_subset_of, variables, + vectorized_indexers, ) ALLOWED_ATTRS_VALUES_TYPES = (int, bool, str, np.ndarray) @@ -279,3 +284,72 @@ def test_mean(self, data, var): # assert property is always satisfied result = var.mean(dim=reduction_dims).data npt.assert_equal(expected, result) + + +class TestBasicIndexers: + @given(st.data(), dimension_sizes(min_dims=1)) + def test_types(self, data, sizes): + idxr = data.draw(basic_indexers(sizes=sizes)) + assert idxr + assert isinstance(idxr, dict) + for key, value in idxr.items(): + assert key in sizes + assert isinstance(value, (int, slice)) + + @given(st.data(), dimension_sizes(min_dims=2)) + def test_min_max_dims(self, data, sizes): + min_dims = data.draw(st.integers(min_value=1, max_value=len(sizes))) + max_dims = data.draw(st.integers(min_value=min_dims, max_value=len(sizes))) + idxr = data.draw( + basic_indexers(sizes=sizes, min_dims=min_dims, max_dims=max_dims) + ) + assert min_dims <= len(idxr) <= max_dims + + +class TestOuterArrayIndexers: + @given(st.data(), dimension_sizes(min_dims=1, min_side=1)) + def test_types(self, data, sizes): + idxr = data.draw(outer_array_indexers(sizes=sizes, min_dims=1)) + assert idxr + assert isinstance(idxr, dict) + for key, value in idxr.items(): + assert key in sizes + assert isinstance(value, np.ndarray) + assert value.dtype == np.int64 + assert value.ndim == 1 + # Check indices in bounds (negative indices valid) + assert np.all((value >= -sizes[key]) & (value < sizes[key])) + + @given(st.data(), dimension_sizes(min_dims=2, min_side=1)) + def test_min_max_dims(self, data, sizes): + min_dims = data.draw(st.integers(min_value=1, max_value=len(sizes))) + max_dims = data.draw(st.integers(min_value=min_dims, max_value=len(sizes))) + idxr = data.draw( + outer_array_indexers(sizes=sizes, min_dims=min_dims, max_dims=max_dims) + ) + assert min_dims <= len(idxr) <= max_dims + + +class TestVectorizedIndexers: + @given(st.data(), dimension_sizes(min_dims=2, min_side=1)) + def test_types(self, data, sizes): + idxr = data.draw(vectorized_indexers(sizes=sizes)) + assert isinstance(idxr, dict) + assert idxr # not empty + # All DataArrays should be broadcastable together + broadcast(*idxr.values()) + for key, value in idxr.items(): + assert key in sizes + assert isinstance(value, xr.DataArray) + assert value.dtype == np.int64 + # Check indices in bounds (negative indices valid) + assert np.all((value.values >= -sizes[key]) & (value.values < sizes[key])) + + @given(st.data(), dimension_sizes(min_dims=3, min_side=1)) + def test_min_max_dims(self, data, sizes): + min_dims = data.draw(st.integers(min_value=2, max_value=len(sizes))) + max_dims = data.draw(st.integers(min_value=min_dims, max_value=len(sizes))) + idxr = data.draw( + vectorized_indexers(sizes=sizes, min_dims=min_dims, max_dims=max_dims) + ) + assert min_dims <= len(idxr) <= max_dims From eae0798419790235060f9531b4ece1f5d9f5e88a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 3 Dec 2025 21:18:30 -0700 Subject: [PATCH 3/4] Fix indentation bug in create_transform_da MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return statement was incorrectly indented inside the for loop, causing only the first coordinate to be assigned to the DataArray. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- properties/test_coordinate_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/properties/test_coordinate_transform.py b/properties/test_coordinate_transform.py index 0b3c41205c8..e2b44f3f611 100644 --- a/properties/test_coordinate_transform.py +++ b/properties/test_coordinate_transform.py @@ -50,7 +50,7 @@ def create_transform_da(sizes: dict[str, int]) -> xr.DataArray: index = CoordinateTransformIndex(transform) ds = ds.assign_coords(xr.Coordinates.from_xindex(index)) - return ds[DATA_VAR_NAME] + return ds[DATA_VAR_NAME] def create_pandas_da(sizes: dict[str, int]) -> xr.DataArray: From 630d01036d519d3ab115b1b88ee174d3746bd0c1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 4 Dec 2025 08:52:32 -0700 Subject: [PATCH 4/4] Add unit test --- xarray/tests/test_coordinate_transform.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/tests/test_coordinate_transform.py b/xarray/tests/test_coordinate_transform.py index 2aed66cfb09..9e38763d251 100644 --- a/xarray/tests/test_coordinate_transform.py +++ b/xarray/tests/test_coordinate_transform.py @@ -142,6 +142,10 @@ def test_coordinate_transform_variable_basic_outer_indexing() -> None: assert var[0, -1] == 6.0 np.testing.assert_array_equal(var[:, 0:2], [[0.0, 2.0]] * 4) + expected = var.values[[0], :][:, [0, -1]] + actual = var.isel(y=[0], x=[0, -1]).values + np.testing.assert_array_equal(actual, expected) + with pytest.raises(IndexError, match="out of bounds index"): var[5]