From d302e83bd4d5bcbffb6fb801958c76704df198b4 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Thu, 4 Dec 2025 15:30:34 +0100 Subject: [PATCH 1/3] remove _iter_chunk functions/methods --- src/zarr/core/array.py | 166 +---------------------------------------- tests/test_array.py | 14 ++-- tests/test_indexing.py | 2 +- 3 files changed, 10 insertions(+), 172 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 6b20ee950d..804e2ec7c8 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1382,36 +1382,6 @@ async def example(): async def nbytes_stored(self) -> int: return await self.store_path.store.getsize_prefix(self.store_path.path) - def _iter_chunk_coords( - self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None - ) -> Iterator[tuple[int, ...]]: - """ - Create an iterator over the coordinates of chunks in chunk grid space. - - If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. - The default behavior is to start at the origin of the grid coordinate space. - If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region - ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as - per python indexing conventions. - - Parameters - ---------- - origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. - - Yields - ------ - chunk_coords: tuple[int, ...] - The coordinates of each chunk in the selection. - """ - return _iter_chunk_coords( - array=self, - origin=origin, - selection_shape=selection_shape, - ) - def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: @@ -1469,30 +1439,6 @@ def _iter_shard_keys( selection_shape=selection_shape, ) - def _iter_chunk_regions( - self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None - ) -> Iterator[tuple[slice, ...]]: - """ - Iterate over the regions spanned by each chunk. - - Parameters - ---------- - origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. - - Yields - ------ - region: tuple[slice, ...] - A tuple of slice objects representing the region spanned by each chunk in the selection. - """ - return _iter_chunk_regions( - array=self, - origin=origin, - selection_shape=selection_shape, - ) - def _iter_shard_regions( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: @@ -2605,32 +2551,6 @@ def _iter_shard_keys( """ return self.async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) - def _iter_chunk_coords( - self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None - ) -> Iterator[tuple[int, ...]]: - """ - Create an iterator over the coordinates of chunks in chunk grid space. - - If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. - The default behavior is to start at the origin of the grid coordinate space. - If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region - ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as - per python indexing conventions. - - Parameters - ---------- - origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. - - Yields - ------ - tuple[int, ...] - The coordinates of each chunk in the selection. - """ - return self.async_array._iter_chunk_coords(origin=origin, selection_shape=selection_shape) - def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: @@ -2657,26 +2577,6 @@ def _iter_shard_coords( """ return self.async_array._iter_shard_coords(origin=origin, selection_shape=selection_shape) - def _iter_chunk_regions( - self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None - ) -> Iterator[tuple[slice, ...]]: - """ - Iterate over the regions spanned by each chunk. - - Parameters - ---------- - origin : Sequence[int] | None, default=None - The origin of the selection relative to the array's chunk grid. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in chunk grid coordinates. - - Yields - ------ - tuple[slice, ...] - A tuple of slice objects representing the region spanned by each chunk in the selection. - """ - return self.async_array._iter_chunk_regions(origin=origin, selection_shape=selection_shape) - def _iter_shard_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: @@ -5337,37 +5237,6 @@ def _parse_data_params( return data, shape_out, dtype_out -def _iter_chunk_coords( - array: AnyArray | AnyAsyncArray, - *, - origin: Sequence[int] | None = None, - selection_shape: Sequence[int] | None = None, -) -> Iterator[tuple[int, ...]]: - """ - Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` - keyword is used, iteration will start at the chunk index specified by `origin`. - The default behavior is to start at the origin of the grid coordinate space. - If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region - ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as - per python indexing conventions. - - Parameters - ---------- - array : Array | AsyncArray - The array to iterate over. - origin : Sequence[int] | None, default=None - The origin of the selection in grid coordinates. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in grid coordinates. - - Yields - ------ - chunk_coords: tuple[int, ...] - The coordinates of each chunk in the selection. - """ - return _iter_grid(array._chunk_grid_shape, origin=origin, selection_shape=selection_shape) - - def _iter_shard_coords( array: AnyArray | AnyAsyncArray, *, @@ -5396,7 +5265,9 @@ def _iter_shard_coords( chunk_coords: tuple[int, ...] The coordinates of each shard in the selection. """ - return _iter_grid(array._shard_grid_shape, origin=origin, selection_shape=selection_shape) + if array._shard_grid_shape: + return _iter_grid(array._shard_grid_shape, origin=origin, selection_shape=selection_shape) + return _iter_grid(array._chunk_grid_shape, origin=origin, selection_shape=selection_shape) def _iter_shard_keys( @@ -5461,34 +5332,3 @@ def _iter_shard_regions( return _iter_regions( array.shape, shard_shape, origin=origin, selection_shape=selection_shape, trim_excess=True ) - - -def _iter_chunk_regions( - array: AnyArray | AnyAsyncArray, - *, - origin: Sequence[int] | None = None, - selection_shape: Sequence[int] | None = None, -) -> Iterator[tuple[slice, ...]]: - """ - Iterate over the regions spanned by each shard. - - These are the smallest regions of the array that are efficient to read concurrently. - - Parameters - ---------- - array : Array | AsyncArray - The array to iterate over. - origin : Sequence[int] | None, default=None - The origin of the selection in grid coordinates. - selection_shape : Sequence[int] | None, default=None - The shape of the selection in grid coordinates. - - Returns - ------- - region: tuple[slice, ...] - A tuple of slice objects representing the region spanned by each shard in the selection. - """ - - return _iter_regions( - array.shape, array.chunks, origin=origin, selection_shape=selection_shape, trim_excess=True - ) diff --git a/tests/test_array.py b/tests/test_array.py index 67be294827..32fb32c40e 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -32,8 +32,6 @@ AsyncArray, CompressorsLike, FiltersLike, - _iter_chunk_coords, - _iter_chunk_regions, _iter_shard_coords, _iter_shard_keys, _iter_shard_regions, @@ -2032,10 +2030,10 @@ def test_iter_chunk_coords( zarr_format=zarr_format, ) expected = tuple(_iter_grid(arr._shard_grid_shape)) - observed = tuple(_iter_chunk_coords(arr)) + observed = tuple(_iter_shard_coords(arr)) assert observed == expected - assert observed == tuple(arr._iter_chunk_coords()) - assert observed == tuple(arr.async_array._iter_chunk_coords()) + assert observed == tuple(arr._iter_shard_coords()) + assert observed == tuple(arr.async_array._iter_shard_coords()) @pytest.mark.parametrize( @@ -2165,10 +2163,10 @@ def test_iter_chunk_regions( ) expected = tuple(_iter_regions(arr.shape, chunk_shape)) - observed = tuple(_iter_chunk_regions(arr)) + observed = tuple(_iter_shard_regions(arr)) assert observed == expected - assert observed == tuple(arr._iter_chunk_regions()) - assert observed == tuple(arr.async_array._iter_chunk_regions()) + assert observed == tuple(arr._iter_shard_regions()) + assert observed == tuple(arr.async_array._iter_shard_regions()) @pytest.mark.parametrize("num_shards", [1, 3]) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index c0bf7dd270..d338707ad0 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1962,7 +1962,7 @@ def test_iter_chunk_regions(): chunks = (2, 3) a = zarr.create((10, 10), chunks=chunks) a[:] = 1 - for region in a._iter_chunk_regions(): + for region in a._iter_shard_regions(): assert_array_equal(a[region], np.ones_like(a[region])) a[region] = 0 assert_array_equal(a[region], np.zeros_like(a[region])) From 38f70287795551ce12debc546708979eabbc4225 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Thu, 4 Dec 2025 16:02:57 +0100 Subject: [PATCH 2/3] adjust docstrings --- src/zarr/core/array.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 804e2ec7c8..582d527816 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1445,6 +1445,8 @@ def _iter_shard_regions( """ Iterate over the regions spanned by each shard. + If no shards are present, then it will fall back on chunks. + Parameters ---------- origin : Sequence[int] | None, default=None @@ -2563,6 +2565,9 @@ def _iter_shard_coords( ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as per python indexing conventions. + If no shard grid space is available, e.g., like in zarr version 2, the method will fall back + to chunk grid space. + Parameters ---------- origin : Sequence[int] | None, default=None @@ -2583,6 +2588,8 @@ def _iter_shard_regions( """ Iterate over the regions spanned by each shard. + If no shard is present, then it will fall back on chunks. + Parameters ---------- origin : Sequence[int] | None, default=None @@ -5308,7 +5315,8 @@ def _iter_shard_regions( """ Iterate over the regions spanned by each shard. - These are the smallest regions of the array that are safe to write concurrently. + These are the smallest regions of the array that are safe to write concurrently. If no shards + are present it will fall back on chunks. Parameters ---------- From eded573248ffb98ddebca50406857daf8cc4318b Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Thu, 4 Dec 2025 16:58:03 +0100 Subject: [PATCH 3/3] revert removal of chunk methods/functions --- src/zarr/core/array.py | 162 +++++++++++++++++++++++++++++++++++++++++ tests/test_array.py | 14 ++-- tests/test_indexing.py | 2 +- 3 files changed, 171 insertions(+), 7 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 582d527816..28b381c13d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1382,6 +1382,36 @@ async def example(): async def nbytes_stored(self) -> int: return await self.store_path.store.getsize_prefix(self.store_path.path) + def _iter_chunk_coords( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[int, ...]]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. + + If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + chunk_coords: tuple[int, ...] + The coordinates of each chunk in the selection. + """ + return _iter_chunk_coords( + array=self, + origin=origin, + selection_shape=selection_shape, + ) + def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: @@ -1414,6 +1444,30 @@ def _iter_shard_coords( selection_shape=selection_shape, ) + def _iter_chunk_regions( + self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each chunk. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + return _iter_chunk_regions( + array=self, + origin=origin, + selection_shape=selection_shape, + ) + def _iter_shard_keys( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: @@ -2553,6 +2607,32 @@ def _iter_shard_keys( """ return self.async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) + def _iter_chunk_coords( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[int, ...]]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. + + If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + tuple[int, ...] + The coordinates of each chunk in the selection. + """ + return self.async_array._iter_chunk_coords(origin=origin, selection_shape=selection_shape) + def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: @@ -2582,6 +2662,26 @@ def _iter_shard_coords( """ return self.async_array._iter_shard_coords(origin=origin, selection_shape=selection_shape) + def _iter_chunk_regions( + self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None + ) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each chunk. + + Parameters + ---------- + origin : Sequence[int] | None, default=None + The origin of the selection relative to the array's chunk grid. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in chunk grid coordinates. + + Yields + ------ + tuple[slice, ...] + A tuple of slice objects representing the region spanned by each chunk in the selection. + """ + return self.async_array._iter_chunk_regions(origin=origin, selection_shape=selection_shape) + def _iter_shard_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: @@ -5244,6 +5344,37 @@ def _parse_data_params( return data, shape_out, dtype_out +def _iter_chunk_coords( + array: AnyArray | AnyAsyncArray, + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, +) -> Iterator[tuple[int, ...]]: + """ + Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` + keyword is used, iteration will start at the chunk index specified by `origin`. + The default behavior is to start at the origin of the grid coordinate space. + If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region + ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as + per python indexing conventions. + + Parameters + ---------- + array : Array | AsyncArray + The array to iterate over. + origin : Sequence[int] | None, default=None + The origin of the selection in grid coordinates. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in grid coordinates. + + Yields + ------ + chunk_coords: tuple[int, ...] + The coordinates of each chunk in the selection. + """ + return _iter_grid(array._chunk_grid_shape, origin=origin, selection_shape=selection_shape) + + def _iter_shard_coords( array: AnyArray | AnyAsyncArray, *, @@ -5340,3 +5471,34 @@ def _iter_shard_regions( return _iter_regions( array.shape, shard_shape, origin=origin, selection_shape=selection_shape, trim_excess=True ) + + +def _iter_chunk_regions( + array: AnyArray | AnyAsyncArray, + *, + origin: Sequence[int] | None = None, + selection_shape: Sequence[int] | None = None, +) -> Iterator[tuple[slice, ...]]: + """ + Iterate over the regions spanned by each shard. + + These are the smallest regions of the array that are efficient to read concurrently. + + Parameters + ---------- + array : Array | AsyncArray + The array to iterate over. + origin : Sequence[int] | None, default=None + The origin of the selection in grid coordinates. + selection_shape : Sequence[int] | None, default=None + The shape of the selection in grid coordinates. + + Returns + ------- + region: tuple[slice, ...] + A tuple of slice objects representing the region spanned by each shard in the selection. + """ + + return _iter_regions( + array.shape, array.chunks, origin=origin, selection_shape=selection_shape, trim_excess=True + ) diff --git a/tests/test_array.py b/tests/test_array.py index 32fb32c40e..67be294827 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -32,6 +32,8 @@ AsyncArray, CompressorsLike, FiltersLike, + _iter_chunk_coords, + _iter_chunk_regions, _iter_shard_coords, _iter_shard_keys, _iter_shard_regions, @@ -2030,10 +2032,10 @@ def test_iter_chunk_coords( zarr_format=zarr_format, ) expected = tuple(_iter_grid(arr._shard_grid_shape)) - observed = tuple(_iter_shard_coords(arr)) + observed = tuple(_iter_chunk_coords(arr)) assert observed == expected - assert observed == tuple(arr._iter_shard_coords()) - assert observed == tuple(arr.async_array._iter_shard_coords()) + assert observed == tuple(arr._iter_chunk_coords()) + assert observed == tuple(arr.async_array._iter_chunk_coords()) @pytest.mark.parametrize( @@ -2163,10 +2165,10 @@ def test_iter_chunk_regions( ) expected = tuple(_iter_regions(arr.shape, chunk_shape)) - observed = tuple(_iter_shard_regions(arr)) + observed = tuple(_iter_chunk_regions(arr)) assert observed == expected - assert observed == tuple(arr._iter_shard_regions()) - assert observed == tuple(arr.async_array._iter_shard_regions()) + assert observed == tuple(arr._iter_chunk_regions()) + assert observed == tuple(arr.async_array._iter_chunk_regions()) @pytest.mark.parametrize("num_shards", [1, 3]) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index d338707ad0..c0bf7dd270 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -1962,7 +1962,7 @@ def test_iter_chunk_regions(): chunks = (2, 3) a = zarr.create((10, 10), chunks=chunks) a[:] = 1 - for region in a._iter_shard_regions(): + for region in a._iter_chunk_regions(): assert_array_equal(a[region], np.ones_like(a[region])) a[region] = 0 assert_array_equal(a[region], np.zeros_like(a[region]))