From e3ee33befc03978450864cac453dcbf7e053ace1 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 3 Dec 2025 16:31:24 +0100 Subject: [PATCH 01/19] add copy_store convenience method --- src/zarr/core/group.py | 54 ++++++++++++++++++++++++++++++++++++++++++ tests/test_group.py | 30 +++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 9b5fee275b..1221fb454a 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -697,6 +697,60 @@ def from_dict( store_path=store_path, ) + async def copy_store( + self, + store: StoreLike, + *, + overwrite: bool = False, + use_consolidated: bool | None = None, + ) -> AsyncGroup: + target_zarr_format = self.metadata.zarr_format + + new_group = await AsyncGroup.from_store( + store, + overwrite=overwrite, + attributes=self.metadata.attributes, + zarr_format=target_zarr_format, + ) + + async for _, member in self.members(max_depth=None): + child_path = member.store_path.path + target_path = StorePath(store=new_group.store, path=child_path) + if isinstance(member, AsyncGroup): + await async_api.group( + store=target_path, + overwrite=overwrite, + attributes=member.metadata.attributes, + zarr_format=target_zarr_format, + ) + else: + # Serializer done this way in case of having zarr_format 2. + new_array = await new_group.create_array( + name=child_path, + shape=member.shape, + dtype=member.dtype, + chunks=member.chunks, + shards=member.shards, + filters=member.filters, + compressors=member.compressors, + serializer=member.serializer if member.serializer is not None else "auto", + fill_value=member.metadata.fill_value, + attributes=member.attrs, + chunk_key_encoding=member.metadata.chunk_key_encoding, + dimension_names=member.metadata.dimension_names, + overwrite=overwrite, + config={"order": member.order}, + ) + + for region in member._iter_shard_regions(): + data = await member.getitem(selection=region) + await new_array.setitem(selection=region, value=data) + + if use_consolidated: + await async_api.consolidate_metadata(new_group.store) + + return new_group + async def setitem(self, key: str, value: Any) -> None: """ Fastpath for creating a new array diff --git a/tests/test_group.py b/tests/test_group.py index 6f1f4e68fa..0ed899bc28 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -250,6 +250,36 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad members_observed = group.members(max_depth=-1) +async def test_copy_store(): + src_store = MemoryStore() + src = await AsyncGroup.from_store(src_store, attributes={"root": True}) + + await src.create_group("subgroup") + + arr_data = np.arange(100) + await src.create_array( + "dataset", + shape=(100,), + chunks=(10,), + shards=(50,), + dtype=arr_data.dtype, + ) + dataset = await src.getitem("dataset") + await dataset.setitem(selection=slice(None), value=arr_data) + + dst_store = MemoryStore() + dst = await src.copy_store(dst_store, overwrite=True) + + assert dst.attrs.get("root") is True + + subgroup = await dst.getitem("subgroup") + assert isinstance(subgroup, AsyncGroup) + + copied_arr = await dst.getitem("dataset") + copied_data = await copied_arr.getitem(selection=slice(None)) + assert np.array_equal(copied_data, arr_data) + + def test_group(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic Group routines. From 83550a3e7f5d8cbe04a6e5f9f32165660517112e Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 3 Dec 2025 16:49:26 +0100 Subject: [PATCH 02/19] add synchronous call --- src/zarr/core/group.py | 15 +++++++++++++++ tests/test_group.py | 19 +++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 1221fb454a..59056c7a73 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1928,6 +1928,21 @@ def open( obj = sync(AsyncGroup.open(store, zarr_format=zarr_format)) return cls(obj) + def copy_store( + self, + store: StoreLike, + *, + overwrite: bool = False, + use_consolidated: bool | None = None, + ) -> Group: + return Group( + sync( + self._async_group.copy_store( + store=store, overwrite=overwrite, use_consolidated=use_consolidated + ) + ) + ) + def __getitem__(self, path: str) -> AnyArray | Group: """Obtain a group member. diff --git a/tests/test_group.py b/tests/test_group.py index 0ed899bc28..520aa26bc4 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -252,31 +252,30 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad async def test_copy_store(): src_store = MemoryStore() - src = await AsyncGroup.from_store(src_store, attributes={"root": True}) + src = Group.from_store(src_store, attributes={"root": True}) - await src.create_group("subgroup") + src.create_group("subgroup") arr_data = np.arange(100) - await src.create_array( + src.create_array( "dataset", shape=(100,), chunks=(10,), shards=(50,), dtype=arr_data.dtype, ) - dataset = await src.getitem("dataset") - await dataset.setitem(selection=slice(None), value=arr_data) + src["dataset"] = arr_data dst_store = MemoryStore() - dst = await src.copy_store(dst_store, overwrite=True) + dst = src.copy_store(dst_store, overwrite=True) assert dst.attrs.get("root") is True - subgroup = await dst.getitem("subgroup") - assert isinstance(subgroup, AsyncGroup) + subgroup = dst["subgroup"] + assert isinstance(subgroup, Group) - copied_arr = await dst.getitem("dataset") - copied_data = await copied_arr.getitem(selection=slice(None)) + copied_arr = dst["dataset"] + copied_data = copied_arr[:] assert np.array_equal(copied_data, arr_data) From d473d6eddf835969e2de1394945dc7ac71bf50dc Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 3 Dec 2025 17:35:02 +0100 Subject: [PATCH 03/19] change comment, test not async --- src/zarr/core/group.py | 2 +- tests/test_group.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 59056c7a73..dd88f6e1d3 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -724,7 +724,7 @@ async def copy_store( zarr_format=target_zarr_format, ) else: - # Serializer done this way in case of having zarr_format 2. + # Serializer done this way in case of having zarr_format 2, otherwise mypy complains. new_array = await new_group.create_array( name=child_path, shape=member.shape, diff --git a/tests/test_group.py b/tests/test_group.py index 520aa26bc4..f4bc0b5ec6 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -250,9 +250,9 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad members_observed = group.members(max_depth=-1) -async def test_copy_store(): +def test_copy_store(): src_store = MemoryStore() - src = Group.from_store(src_store, attributes={"root": True}) + src = Group.from_store(src_store, attributes={"root": True}, zarr_format=2) src.create_group("subgroup") @@ -261,7 +261,7 @@ async def test_copy_store(): "dataset", shape=(100,), chunks=(10,), - shards=(50,), + # shards=(50,), dtype=arr_data.dtype, ) src["dataset"] = arr_data From bb1405e6ec64bfdd9dbc3d6e70ba2fe203bdde59 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 3 Dec 2025 17:37:36 +0100 Subject: [PATCH 04/19] fix test --- tests/test_group.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_group.py b/tests/test_group.py index f4bc0b5ec6..6cbf0d7c08 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -252,7 +252,7 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad def test_copy_store(): src_store = MemoryStore() - src = Group.from_store(src_store, attributes={"root": True}, zarr_format=2) + src = Group.from_store(src_store, attributes={"root": True}) src.create_group("subgroup") @@ -261,7 +261,7 @@ def test_copy_store(): "dataset", shape=(100,), chunks=(10,), - # shards=(50,), + shards=(50,), dtype=arr_data.dtype, ) src["dataset"] = arr_data From c62543acb0e153b53585b8dd7582ee19624b0fdd Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 3 Dec 2025 18:52:28 +0100 Subject: [PATCH 05/19] support zarr v2 --- src/zarr/core/group.py | 33 +++++++++++++++++++++++---------- tests/test_group.py | 31 +++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index dd88f6e1d3..8e1a460c48 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -702,7 +702,7 @@ async def copy_store( store: StoreLike, *, overwrite: bool = False, - use_consolidated: bool | None = None, + consolidate_metadata: bool | None = None, ) -> AsyncGroup: target_zarr_format = self.metadata.zarr_format @@ -724,7 +724,16 @@ async def copy_store( zarr_format=target_zarr_format, ) else: - # Serializer done this way in case of having zarr_format 2, otherwise mypy complains. + kwargs = {} + if target_zarr_format == 3: + kwargs["chunk_key_encoding"] = member.metadata.chunk_key_encoding + kwargs["dimension_names"] = member.metadata.dimension_names + else: + kwargs["chunk_key_encoding"] = { + "name": "v2", + "separator": member.metadata.dimension_separator, + } + # Serializer done this way in case of having zarr_format 2, otherwise mypy complains. new_array = await new_group.create_array( name=child_path, shape=member.shape, @@ -736,17 +745,21 @@ async def copy_store( serializer=member.serializer if member.serializer is not None else "auto", fill_value=member.metadata.fill_value, attributes=member.attrs, - chunk_key_encoding=member.metadata.chunk_key_encoding, - dimension_names=member.metadata.dimension_names, overwrite=overwrite, config={"order": member.order}, + **kwargs, ) - for region in member._iter_shard_regions(): - data = await member.getitem(selection=region) - await new_array.setitem(selection=region, value=data) + if target_zarr_format == 3: + for region in member._iter_shard_regions(): + data = await member.getitem(selection=region) + await new_array.setitem(selection=region, value=data) + else: + for region in member._iter_chunk_regions(): + data = await member.getitem(selection=region) + await new_array.setitem(selection=region, value=data) - if use_consolidated: + if consolidate_metadata: await async_api.consolidate_metadata(new_group.store) return new_group @@ -1933,12 +1946,12 @@ def copy_store( store: StoreLike, *, overwrite: bool = False, - use_consolidated: bool | None = None, + consolidate_metadata: bool | None = None, ) -> Group: return Group( sync( self._async_group.copy_store( - store=store, overwrite=overwrite, use_consolidated=use_consolidated + store=store, overwrite=overwrite, consolidate_metadata=consolidate_metadata ) ) ) diff --git a/tests/test_group.py b/tests/test_group.py index 6cbf0d7c08..858687a9a4 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -250,9 +250,18 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad members_observed = group.members(max_depth=-1) -def test_copy_store(): +@pytest.mark.parametrize( + ("zarr_format", "shards", "consolidate_metadata"), + [ + (2, None, False), + (2, None, True), + (3, (50,), False), + (3, (50,), True), + ], +) +def test_copy_store(zarr_format: int, shards: tuple[int, ...], consolidate_metadata: bool) -> None: src_store = MemoryStore() - src = Group.from_store(src_store, attributes={"root": True}) + src = Group.from_store(src_store, attributes={"root": True}, zarr_format=zarr_format) src.create_group("subgroup") @@ -261,13 +270,22 @@ def test_copy_store(): "dataset", shape=(100,), chunks=(10,), - shards=(50,), + shards=shards, dtype=arr_data.dtype, ) src["dataset"] = arr_data dst_store = MemoryStore() - dst = src.copy_store(dst_store, overwrite=True) + if zarr_format == 3 and consolidate_metadata: + with pytest.warns( + ZarrUserWarning, + match="Consolidated metadata is currently not part in the Zarr format 3 specification.", + ): + dst = src.copy_store( + dst_store, overwrite=True, consolidate_metadata=consolidate_metadata + ) + else: + dst = src.copy_store(dst_store, overwrite=True, consolidate_metadata=consolidate_metadata) assert dst.attrs.get("root") is True @@ -278,6 +296,11 @@ def test_copy_store(): copied_data = copied_arr[:] assert np.array_equal(copied_data, arr_data) + if consolidate_metadata: + assert zarr.open_group(dst_store).metadata.consolidated_metadata + else: + assert not zarr.open_group(dst_store).metadata.consolidated_metadata + def test_group(store: Store, zarr_format: ZarrFormat) -> None: """ From cdbc2f7fe69ff85e119eaa189fdcfa517788c7c3 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 3 Dec 2025 22:44:16 +0100 Subject: [PATCH 06/19] remove use of _iter_chunk_regions --- src/zarr/core/group.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 8e1a460c48..bfeb2b12cd 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -750,14 +750,9 @@ async def copy_store( **kwargs, ) - if target_zarr_format == 3: - for region in member._iter_shard_regions(): - data = await member.getitem(selection=region) - await new_array.setitem(selection=region, value=data) - else: - for region in member._iter_chunk_regions(): - data = await member.getitem(selection=region) - await new_array.setitem(selection=region, value=data) + for region in member._iter_shard_regions(): + data = await member.getitem(selection=region) + await new_array.setitem(selection=region, value=data) if consolidate_metadata: await async_api.consolidate_metadata(new_group.store) From e6e10dfc99fb6f6d44e4c9519e439c7fed4e6751 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 3 Dec 2025 23:06:29 +0100 Subject: [PATCH 07/19] change method name --- src/zarr/core/group.py | 6 +++--- tests/test_group.py | 8 +++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index bfeb2b12cd..700171b32e 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -697,7 +697,7 @@ def from_dict( store_path=store_path, ) - async def copy_store( + async def copy_to( self, store: StoreLike, *, @@ -1936,7 +1936,7 @@ def open( obj = sync(AsyncGroup.open(store, zarr_format=zarr_format)) return cls(obj) - def copy_store( + def copy_to( self, store: StoreLike, *, @@ -1945,7 +1945,7 @@ def copy_store( ) -> Group: return Group( sync( - self._async_group.copy_store( + self._async_group.copy_to( store=store, overwrite=overwrite, consolidate_metadata=consolidate_metadata ) ) diff --git a/tests/test_group.py b/tests/test_group.py index 858687a9a4..f1febab496 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -259,7 +259,7 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad (3, (50,), True), ], ) -def test_copy_store(zarr_format: int, shards: tuple[int, ...], consolidate_metadata: bool) -> None: +def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata: bool) -> None: src_store = MemoryStore() src = Group.from_store(src_store, attributes={"root": True}, zarr_format=zarr_format) @@ -281,11 +281,9 @@ def test_copy_store(zarr_format: int, shards: tuple[int, ...], consolidate_metad ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): - dst = src.copy_store( - dst_store, overwrite=True, consolidate_metadata=consolidate_metadata - ) + dst = src.copy_to(dst_store, overwrite=True, consolidate_metadata=consolidate_metadata) else: - dst = src.copy_store(dst_store, overwrite=True, consolidate_metadata=consolidate_metadata) + dst = src.copy_to(dst_store, overwrite=True, consolidate_metadata=consolidate_metadata) assert dst.attrs.get("root") is True From 9c4256758ecb640582da1b14c76eba54e8667288 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Thu, 4 Dec 2025 00:30:50 +0100 Subject: [PATCH 08/19] remove consolidate_metadata argument --- src/zarr/core/group.py | 14 ++------------ tests/test_group.py | 17 +++++------------ 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 700171b32e..531c25447b 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -702,7 +702,6 @@ async def copy_to( store: StoreLike, *, overwrite: bool = False, - consolidate_metadata: bool | None = None, ) -> AsyncGroup: target_zarr_format = self.metadata.zarr_format @@ -716,6 +715,7 @@ async def copy_to( async for _, member in self.members(max_depth=None): child_path = member.store_path.path target_path = StorePath(store=new_group.store, path=child_path) + if isinstance(member, AsyncGroup): await async_api.group( store=target_path, @@ -754,9 +754,6 @@ async def copy_to( data = await member.getitem(selection=region) await new_array.setitem(selection=region, value=data) - if consolidate_metadata: - await async_api.consolidate_metadata(new_group.store) - return new_group async def setitem(self, key: str, value: Any) -> None: @@ -1941,15 +1938,8 @@ def copy_to( store: StoreLike, *, overwrite: bool = False, - consolidate_metadata: bool | None = None, ) -> Group: - return Group( - sync( - self._async_group.copy_to( - store=store, overwrite=overwrite, consolidate_metadata=consolidate_metadata - ) - ) - ) + return Group(sync(self._async_group.copy_to(store=store, overwrite=overwrite))) def __getitem__(self, path: str) -> AnyArray | Group: """Obtain a group member. diff --git a/tests/test_group.py b/tests/test_group.py index f1febab496..4f8c699679 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -276,14 +276,7 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata src["dataset"] = arr_data dst_store = MemoryStore() - if zarr_format == 3 and consolidate_metadata: - with pytest.warns( - ZarrUserWarning, - match="Consolidated metadata is currently not part in the Zarr format 3 specification.", - ): - dst = src.copy_to(dst_store, overwrite=True, consolidate_metadata=consolidate_metadata) - else: - dst = src.copy_to(dst_store, overwrite=True, consolidate_metadata=consolidate_metadata) + dst = src.copy_to(dst_store, overwrite=True) assert dst.attrs.get("root") is True @@ -294,10 +287,10 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata copied_data = copied_arr[:] assert np.array_equal(copied_data, arr_data) - if consolidate_metadata: - assert zarr.open_group(dst_store).metadata.consolidated_metadata - else: - assert not zarr.open_group(dst_store).metadata.consolidated_metadata + # if consolidate_metadata: + # assert zarr.open_group(dst_store).metadata.consolidated_metadata + # else: + # assert not zarr.open_group(dst_store).metadata.consolidated_metadata def test_group(store: Store, zarr_format: ZarrFormat) -> None: From 63c652e8f6b55819987b486d8ad076702c6ebee5 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Thu, 4 Dec 2025 10:33:00 +0100 Subject: [PATCH 09/19] consolidate if consolidated --- src/zarr/core/group.py | 4 ++++ tests/test_group.py | 20 +++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 531c25447b..4a2c32a1b8 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -754,6 +754,10 @@ async def copy_to( data = await member.getitem(selection=region) await new_array.setitem(selection=region, value=data) + group = await self.open(self.store, zarr_format=target_zarr_format) + if group.metadata.consolidated_metadata: + await async_api.consolidate_metadata(new_group.store) + return new_group async def setitem(self, key: str, value: Any) -> None: diff --git a/tests/test_group.py b/tests/test_group.py index 4f8c699679..76640e4217 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -274,9 +274,19 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata dtype=arr_data.dtype, ) src["dataset"] = arr_data + if consolidate_metadata: + if zarr_format == 3: + with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"): + zarr.consolidate_metadata(src_store) + else: + zarr.consolidate_metadata(src_store) dst_store = MemoryStore() - dst = src.copy_to(dst_store, overwrite=True) + if zarr_format == 3 and consolidate_metadata: + with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"): + dst = src.copy_to(dst_store, overwrite=True) + else: + dst = src.copy_to(dst_store, overwrite=True) assert dst.attrs.get("root") is True @@ -287,10 +297,10 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata copied_data = copied_arr[:] assert np.array_equal(copied_data, arr_data) - # if consolidate_metadata: - # assert zarr.open_group(dst_store).metadata.consolidated_metadata - # else: - # assert not zarr.open_group(dst_store).metadata.consolidated_metadata + if consolidate_metadata: + assert zarr.open_group(dst_store).metadata.consolidated_metadata + else: + assert not zarr.open_group(dst_store).metadata.consolidated_metadata def test_group(store: Store, zarr_format: ZarrFormat) -> None: From d4924f55b73bd64d7cd088c95660532f99d182b4 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 01:43:23 +0100 Subject: [PATCH 10/19] add consolidated_metadata argument --- src/zarr/core/group.py | 18 +++++++++++++----- tests/test_group.py | 7 ++----- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 4a2c32a1b8..3d0bb85b7e 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -472,6 +472,7 @@ async def from_store( store: StoreLike, *, attributes: dict[str, Any] | None = None, + consolidated_metadata: ConsolidatedMetadata | None = None, overwrite: bool = False, zarr_format: ZarrFormat = 3, ) -> AsyncGroup: @@ -486,7 +487,11 @@ async def from_store( await ensure_no_existing_node(store_path, zarr_format=zarr_format) attributes = attributes or {} group = cls( - metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format), + metadata=GroupMetadata( + attributes=attributes, + consolidated_metadata=consolidated_metadata, + zarr_format=zarr_format, + ), store_path=store_path, ) await group._save_metadata(ensure_parents=True) @@ -704,11 +709,14 @@ async def copy_to( overwrite: bool = False, ) -> AsyncGroup: target_zarr_format = self.metadata.zarr_format + group = await self.open(self.store, zarr_format=target_zarr_format) + consolidated_metadata = group.metadata.consolidated_metadata new_group = await AsyncGroup.from_store( store, overwrite=overwrite, attributes=self.metadata.attributes, + consolidated_metadata=consolidated_metadata, zarr_format=target_zarr_format, ) @@ -754,10 +762,6 @@ async def copy_to( data = await member.getitem(selection=region) await new_array.setitem(selection=region, value=data) - group = await self.open(self.store, zarr_format=target_zarr_format) - if group.metadata.consolidated_metadata: - await async_api.consolidate_metadata(new_group.store) - return new_group async def setitem(self, key: str, value: Any) -> None: @@ -1008,6 +1012,7 @@ async def create_group( *, overwrite: bool = False, attributes: dict[str, Any] | None = None, + consolidated_metadata: ConsolidatedMetadata | None = None, ) -> AsyncGroup: """Create a sub-group. @@ -1028,6 +1033,7 @@ async def create_group( return await type(self).from_store( self.store_path / name, attributes=attributes, + consolidated_metadata=consolidated_metadata, overwrite=overwrite, zarr_format=self.metadata.zarr_format, ) @@ -1873,6 +1879,7 @@ def from_store( store: StoreLike, *, attributes: dict[str, Any] | None = None, + consolidated_metadata: ConsolidatedMetadata | None = None, zarr_format: ZarrFormat = 3, overwrite: bool = False, ) -> Group: @@ -1905,6 +1912,7 @@ def from_store( AsyncGroup.from_store( store, attributes=attributes, + consolidated_metadata=consolidated_metadata, overwrite=overwrite, zarr_format=zarr_format, ), diff --git a/tests/test_group.py b/tests/test_group.py index 76640e4217..9975670209 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -282,11 +282,8 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata zarr.consolidate_metadata(src_store) dst_store = MemoryStore() - if zarr_format == 3 and consolidate_metadata: - with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"): - dst = src.copy_to(dst_store, overwrite=True) - else: - dst = src.copy_to(dst_store, overwrite=True) + + dst = src.copy_to(dst_store, overwrite=True) assert dst.attrs.get("root") is True From e83dda56e7ff15da7fb5d605d224455b9a09c21c Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 02:10:39 +0100 Subject: [PATCH 11/19] add docstring and argument --- src/zarr/core/group.py | 54 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 3d0bb85b7e..a5c6a52f39 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -707,7 +707,27 @@ async def copy_to( store: StoreLike, *, overwrite: bool = False, + use_consolidated_for_children: bool = True, ) -> AsyncGroup: + """ + Copy this group and all its contents to a new store. + + Parameters + ---------- + store : StoreLike + The store to copy to. + overwrite : bool, optional + If True, overwrite any existing data in the target store. Default is False. + use_consolidated_for_children : bool, default True + Whether to use the consolidated metadata of child groups when iterating over the store contents. + Note that this only affects groups loaded from the store. If the current Group already has + consolidated metadata, it will always be used. + + Returns + ------- + AsyncGroup + The new group in the target store. + """ target_zarr_format = self.metadata.zarr_format group = await self.open(self.store, zarr_format=target_zarr_format) consolidated_metadata = group.metadata.consolidated_metadata @@ -720,7 +740,9 @@ async def copy_to( zarr_format=target_zarr_format, ) - async for _, member in self.members(max_depth=None): + async for _, member in self.members( + max_depth=None, use_consolidated_for_children=use_consolidated_for_children + ): child_path = member.store_path.path target_path = StorePath(store=new_group.store, path=child_path) @@ -1950,8 +1972,36 @@ def copy_to( store: StoreLike, *, overwrite: bool = False, + use_consolidated_for_children: bool = True, ) -> Group: - return Group(sync(self._async_group.copy_to(store=store, overwrite=overwrite))) + """ + Copy this group and all its contents to a new store. + + Parameters + ---------- + store : StoreLike + The store to copy to. + overwrite : bool, optional + If True, overwrite any existing data in the target store. Default is False. + use_consolidated_for_children : bool, default True + Whether to use the consolidated metadata of child groups when iterating over the store contents. + Note that this only affects groups loaded from the store. If the current Group already has + consolidated metadata, it will always be used. + + Returns + ------- + AsyncGroup + The new group in the target store. + """ + return Group( + sync( + self._async_group.copy_to( + store=store, + overwrite=overwrite, + use_consolidated_for_children=use_consolidated_for_children, + ) + ) + ) def __getitem__(self, path: str) -> AnyArray | Group: """Obtain a group member. From 59b18ea2f084c4fe55f7e258e40afccb6f8a39b7 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 17:56:23 +0100 Subject: [PATCH 12/19] add support subgroup consolidated metadata --- src/zarr/core/group.py | 5 +++-- tests/test_group.py | 24 +++++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index a5c6a52f39..3ea3492c53 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -747,11 +747,12 @@ async def copy_to( target_path = StorePath(store=new_group.store, path=child_path) if isinstance(member, AsyncGroup): - await async_api.group( + await AsyncGroup.from_store( store=target_path, + zarr_format=target_zarr_format, overwrite=overwrite, attributes=member.metadata.attributes, - zarr_format=target_zarr_format, + consolidated_metadata=member.metadata.consolidated_metadata, ) else: kwargs = {} diff --git a/tests/test_group.py b/tests/test_group.py index 9975670209..387f92b3c7 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -263,7 +263,17 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata src_store = MemoryStore() src = Group.from_store(src_store, attributes={"root": True}, zarr_format=zarr_format) - src.create_group("subgroup") + subgroup = src.create_group("subgroup", attributes={"subgroup": True}) + + subgroup_arr_data = np.arange(50) + subgroup.create_array( + "subgroup_dataset", + shape=(50,), + chunks=(10,), + shards=shards, + dtype=subgroup_arr_data.dtype, + ) + subgroup["subgroup_dataset"] = subgroup_arr_data arr_data = np.arange(100) src.create_array( @@ -274,12 +284,16 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata dtype=arr_data.dtype, ) src["dataset"] = arr_data + if consolidate_metadata: if zarr_format == 3: with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"): zarr.consolidate_metadata(src_store) + with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"): + zarr.consolidate_metadata(src_store, path="subgroup") else: zarr.consolidate_metadata(src_store) + zarr.consolidate_metadata(src_store, path="subgroup") dst_store = MemoryStore() @@ -289,15 +303,23 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata subgroup = dst["subgroup"] assert isinstance(subgroup, Group) + assert subgroup.attrs.get("subgroup") is True copied_arr = dst["dataset"] copied_data = copied_arr[:] assert np.array_equal(copied_data, arr_data) + copied_subgroup_arr = subgroup["subgroup_dataset"] + copied_subgroup_data = copied_subgroup_arr[:] + assert np.array_equal(copied_subgroup_data, subgroup_arr_data) + if consolidate_metadata: assert zarr.open_group(dst_store).metadata.consolidated_metadata + if zarr_format == 3: + assert zarr.open_group(dst_store, path="subgroup").metadata.consolidated_metadata else: assert not zarr.open_group(dst_store).metadata.consolidated_metadata + assert not zarr.open_group(dst_store, path="subgroup").metadata.consolidated_metadata def test_group(store: Store, zarr_format: ZarrFormat) -> None: From b65d257da6d21810c3799950aa988480c49dc145 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 18:02:13 +0100 Subject: [PATCH 13/19] add argument to docstring --- src/zarr/core/group.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 3ea3492c53..71a58211f0 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1916,6 +1916,8 @@ def from_store( for a description of all valid StoreLike values. attributes : dict, optional A dictionary of JSON-serializable values with user-defined attributes. + consolidated_metadata : ConsolidatedMetadata, optional + Consolidated Metadata for this Group. This should contain metadata of child nodes below this group. zarr_format : {2, 3}, optional Zarr storage format version. overwrite : bool, optional From 1056b9ee3cfa5a0a51c75124d02d41f6733c5f65 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 18:57:32 +0100 Subject: [PATCH 14/19] add example to docs groups --- docs/user-guide/groups.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/user-guide/groups.md b/docs/user-guide/groups.md index 57201216b6..6a845a6a79 100644 --- a/docs/user-guide/groups.md +++ b/docs/user-guide/groups.md @@ -134,4 +134,11 @@ print(root.tree()) ``` !!! note - [`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra. \ No newline at end of file + [`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra. + +You can copy a Group including consolidated metadata to a new destination using the `copy_to` method: + +```python exec="true" session="groups" source="above" result="ansi" +destination_store = zarr.storage.MemoryStore() +new_group = root.copy_to(destination_store, overwrite=True) +``` \ No newline at end of file From eadb647960be0857a2a0edca40ee3a28a0ea0b31 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 23:27:49 +0100 Subject: [PATCH 15/19] adjust docs --- docs/user-guide/groups.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/user-guide/groups.md b/docs/user-guide/groups.md index 6a845a6a79..78f1f309b9 100644 --- a/docs/user-guide/groups.md +++ b/docs/user-guide/groups.md @@ -136,7 +136,8 @@ print(root.tree()) !!! note [`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra. -You can copy a Group including consolidated metadata to a new destination using the `copy_to` method: +You can copy a Group including consolidated metadata to a new destination store +(type of store can differ from the source store) using the `copy_to` method: ```python exec="true" session="groups" source="above" result="ansi" destination_store = zarr.storage.MemoryStore() From 8c3471cd602f3d6cb5eb7ba67bf7102e247f9465 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 23:43:18 +0100 Subject: [PATCH 16/19] partial fix pre-commit --- src/zarr/testing/strategies.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 5eb17214fe..41deb04e6e 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -352,7 +352,8 @@ def basic_indices( allow_ellipsis: bool = True, ) -> Any: """Basic indices without unsupported negative slices.""" - strategy = npst.basic_indices( + # We can ignore here as it is just to numpy type hints being Literal[False | True] for overload variants + strategy = npst.basic_indices( # type: ignore[call-overload] shape=shape, min_dims=min_dims, max_dims=max_dims, @@ -362,7 +363,7 @@ def basic_indices( lambda idxr: ( not ( is_negative_slice(idxr) - or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) # type: ignore[redundant-expr] + or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) ) ) ) From 2cbb9b9189ea13999973220b2fec2561f8ac91d4 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Fri, 5 Dec 2025 23:46:56 +0100 Subject: [PATCH 17/19] add to changes --- changes/3612.feature.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changes/3612.feature.md diff --git a/changes/3612.feature.md b/changes/3612.feature.md new file mode 100644 index 0000000000..821159d418 --- /dev/null +++ b/changes/3612.feature.md @@ -0,0 +1,3 @@ +Added the convenience method for `zarr.Group` to copy to a destination store which +can be of a different type than the original store of the `zarr.Group` to be +copied. This will also copy over the metadata as is. \ No newline at end of file From 128b9248ec37209969d4ca5fb7da6b5529ec424a Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 10 Dec 2025 12:26:52 +0100 Subject: [PATCH 18/19] change to call using self --- src/zarr/core/group.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 71a58211f0..2b421fe5f6 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -732,7 +732,7 @@ async def copy_to( group = await self.open(self.store, zarr_format=target_zarr_format) consolidated_metadata = group.metadata.consolidated_metadata - new_group = await AsyncGroup.from_store( + new_group = await self.from_store( store, overwrite=overwrite, attributes=self.metadata.attributes, @@ -747,7 +747,7 @@ async def copy_to( target_path = StorePath(store=new_group.store, path=child_path) if isinstance(member, AsyncGroup): - await AsyncGroup.from_store( + await self.from_store( store=target_path, zarr_format=target_zarr_format, overwrite=overwrite, From fa95e9c100a836aeab1eea31d2c4e65bf74a0837 Mon Sep 17 00:00:00 2001 From: Wouter-Michiel Vierdag Date: Wed, 10 Dec 2025 15:43:23 +0100 Subject: [PATCH 19/19] obtain consolidated metadata from self --- src/zarr/core/group.py | 4 +--- tests/test_group.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 2b421fe5f6..120e2d9299 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -729,14 +729,12 @@ async def copy_to( The new group in the target store. """ target_zarr_format = self.metadata.zarr_format - group = await self.open(self.store, zarr_format=target_zarr_format) - consolidated_metadata = group.metadata.consolidated_metadata new_group = await self.from_store( store, overwrite=overwrite, attributes=self.metadata.attributes, - consolidated_metadata=consolidated_metadata, + consolidated_metadata=self.metadata.consolidated_metadata, zarr_format=target_zarr_format, ) diff --git a/tests/test_group.py b/tests/test_group.py index 387f92b3c7..7ab54633cf 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -288,11 +288,11 @@ def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata if consolidate_metadata: if zarr_format == 3: with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"): - zarr.consolidate_metadata(src_store) + src = zarr.consolidate_metadata(src_store) with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"): zarr.consolidate_metadata(src_store, path="subgroup") else: - zarr.consolidate_metadata(src_store) + src = zarr.consolidate_metadata(src_store) zarr.consolidate_metadata(src_store, path="subgroup") dst_store = MemoryStore()