Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changes/3612.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added the convenience method for `zarr.Group` to copy to a destination store which
can be of a different type than the original store of the `zarr.Group` to be
copied. This will also copy over the metadata as is.
10 changes: 9 additions & 1 deletion docs/user-guide/groups.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,12 @@ print(root.tree())
```

!!! note
[`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra.
[`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra.

You can copy a Group including consolidated metadata to a new destination store
(type of store can differ from the source store) using the `copy_to` method:

```python exec="true" session="groups" source="above" result="ansi"
destination_store = zarr.storage.MemoryStore()
new_group = root.copy_to(destination_store, overwrite=True)
```
134 changes: 133 additions & 1 deletion src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ async def from_store(
store: StoreLike,
*,
attributes: dict[str, Any] | None = None,
consolidated_metadata: ConsolidatedMetadata | None = None,
overwrite: bool = False,
zarr_format: ZarrFormat = 3,
) -> AsyncGroup:
Expand All @@ -486,7 +487,11 @@ async def from_store(
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
attributes = attributes or {}
group = cls(
metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format),
metadata=GroupMetadata(
attributes=attributes,
consolidated_metadata=consolidated_metadata,
zarr_format=zarr_format,
),
store_path=store_path,
)
await group._save_metadata(ensure_parents=True)
Expand Down Expand Up @@ -697,6 +702,91 @@ def from_dict(
store_path=store_path,
)

async def copy_to(
self,
store: StoreLike,
*,
overwrite: bool = False,
use_consolidated_for_children: bool = True,
) -> AsyncGroup:
"""
Copy this group and all its contents to a new store.

Parameters
----------
store : StoreLike
The store to copy to.
overwrite : bool, optional
If True, overwrite any existing data in the target store. Default is False.
use_consolidated_for_children : bool, default True
Whether to use the consolidated metadata of child groups when iterating over the store contents.
Note that this only affects groups loaded from the store. If the current Group already has
consolidated metadata, it will always be used.

Returns
-------
AsyncGroup
The new group in the target store.
"""
target_zarr_format = self.metadata.zarr_format
group = await self.open(self.store, zarr_format=target_zarr_format)
consolidated_metadata = group.metadata.consolidated_metadata

new_group = await AsyncGroup.from_store(
store,
overwrite=overwrite,
attributes=self.metadata.attributes,
consolidated_metadata=consolidated_metadata,
zarr_format=target_zarr_format,
)

async for _, member in self.members(
max_depth=None, use_consolidated_for_children=use_consolidated_for_children
):
child_path = member.store_path.path
target_path = StorePath(store=new_group.store, path=child_path)

if isinstance(member, AsyncGroup):
await AsyncGroup.from_store(
store=target_path,
zarr_format=target_zarr_format,
overwrite=overwrite,
attributes=member.metadata.attributes,
consolidated_metadata=member.metadata.consolidated_metadata,
)
else:
kwargs = {}
if target_zarr_format == 3:
kwargs["chunk_key_encoding"] = member.metadata.chunk_key_encoding
kwargs["dimension_names"] = member.metadata.dimension_names
else:
kwargs["chunk_key_encoding"] = {
"name": "v2",
"separator": member.metadata.dimension_separator,
}
# Serializer done this way in case of having zarr_format 2, otherwise mypy complains.
new_array = await new_group.create_array(
name=child_path,
shape=member.shape,
dtype=member.dtype,
chunks=member.chunks,
shards=member.shards,
filters=member.filters,
compressors=member.compressors,
serializer=member.serializer if member.serializer is not None else "auto",
fill_value=member.metadata.fill_value,
attributes=member.attrs,
overwrite=overwrite,
config={"order": member.order},
**kwargs,
)

for region in member._iter_shard_regions():
data = await member.getitem(selection=region)
await new_array.setitem(selection=region, value=data)

return new_group

async def setitem(self, key: str, value: Any) -> None:
"""
Fastpath for creating a new array
Expand Down Expand Up @@ -945,6 +1035,7 @@ async def create_group(
*,
overwrite: bool = False,
attributes: dict[str, Any] | None = None,
consolidated_metadata: ConsolidatedMetadata | None = None,
) -> AsyncGroup:
"""Create a sub-group.

Expand All @@ -965,6 +1056,7 @@ async def create_group(
return await type(self).from_store(
self.store_path / name,
attributes=attributes,
consolidated_metadata=consolidated_metadata,
overwrite=overwrite,
zarr_format=self.metadata.zarr_format,
)
Expand Down Expand Up @@ -1810,6 +1902,7 @@ def from_store(
store: StoreLike,
*,
attributes: dict[str, Any] | None = None,
consolidated_metadata: ConsolidatedMetadata | None = None,
zarr_format: ZarrFormat = 3,
overwrite: bool = False,
) -> Group:
Expand All @@ -1823,6 +1916,8 @@ def from_store(
for a description of all valid StoreLike values.
attributes : dict, optional
A dictionary of JSON-serializable values with user-defined attributes.
consolidated_metadata : ConsolidatedMetadata, optional
Consolidated Metadata for this Group. This should contain metadata of child nodes below this group.
zarr_format : {2, 3}, optional
Zarr storage format version.
overwrite : bool, optional
Expand All @@ -1842,6 +1937,7 @@ def from_store(
AsyncGroup.from_store(
store,
attributes=attributes,
consolidated_metadata=consolidated_metadata,
overwrite=overwrite,
zarr_format=zarr_format,
),
Expand Down Expand Up @@ -1874,6 +1970,42 @@ def open(
obj = sync(AsyncGroup.open(store, zarr_format=zarr_format))
return cls(obj)

def copy_to(
self,
store: StoreLike,
*,
overwrite: bool = False,
use_consolidated_for_children: bool = True,
) -> Group:
"""
Copy this group and all its contents to a new store.

Parameters
----------
store : StoreLike
The store to copy to.
overwrite : bool, optional
If True, overwrite any existing data in the target store. Default is False.
use_consolidated_for_children : bool, default True
Whether to use the consolidated metadata of child groups when iterating over the store contents.
Note that this only affects groups loaded from the store. If the current Group already has
consolidated metadata, it will always be used.

Returns
-------
AsyncGroup
The new group in the target store.
"""
return Group(
sync(
self._async_group.copy_to(
store=store,
overwrite=overwrite,
use_consolidated_for_children=use_consolidated_for_children,
)
)
)

def __getitem__(self, path: str) -> AnyArray | Group:
"""Obtain a group member.

Expand Down
5 changes: 3 additions & 2 deletions src/zarr/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,8 @@ def basic_indices(
allow_ellipsis: bool = True,
) -> Any:
"""Basic indices without unsupported negative slices."""
strategy = npst.basic_indices(
# We can ignore here as it is just to numpy type hints being Literal[False | True] for overload variants
strategy = npst.basic_indices( # type: ignore[call-overload]
shape=shape,
min_dims=min_dims,
max_dims=max_dims,
Expand All @@ -362,7 +363,7 @@ def basic_indices(
lambda idxr: (
not (
is_negative_slice(idxr)
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) # type: ignore[redundant-expr]
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
)
)
)
Expand Down
72 changes: 72 additions & 0 deletions tests/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,78 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad
members_observed = group.members(max_depth=-1)


@pytest.mark.parametrize(
("zarr_format", "shards", "consolidate_metadata"),
[
(2, None, False),
(2, None, True),
(3, (50,), False),
(3, (50,), True),
],
)
def test_copy_to(zarr_format: int, shards: tuple[int, ...], consolidate_metadata: bool) -> None:
src_store = MemoryStore()
src = Group.from_store(src_store, attributes={"root": True}, zarr_format=zarr_format)

subgroup = src.create_group("subgroup", attributes={"subgroup": True})

subgroup_arr_data = np.arange(50)
subgroup.create_array(
"subgroup_dataset",
shape=(50,),
chunks=(10,),
shards=shards,
dtype=subgroup_arr_data.dtype,
)
subgroup["subgroup_dataset"] = subgroup_arr_data

arr_data = np.arange(100)
src.create_array(
"dataset",
shape=(100,),
chunks=(10,),
shards=shards,
dtype=arr_data.dtype,
)
src["dataset"] = arr_data

if consolidate_metadata:
if zarr_format == 3:
with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"):
zarr.consolidate_metadata(src_store)
with pytest.warns(ZarrUserWarning, match="Consolidated metadata is currently"):
zarr.consolidate_metadata(src_store, path="subgroup")
else:
zarr.consolidate_metadata(src_store)
zarr.consolidate_metadata(src_store, path="subgroup")

dst_store = MemoryStore()

dst = src.copy_to(dst_store, overwrite=True)

assert dst.attrs.get("root") is True

subgroup = dst["subgroup"]
assert isinstance(subgroup, Group)
assert subgroup.attrs.get("subgroup") is True

copied_arr = dst["dataset"]
copied_data = copied_arr[:]
assert np.array_equal(copied_data, arr_data)

copied_subgroup_arr = subgroup["subgroup_dataset"]
copied_subgroup_data = copied_subgroup_arr[:]
assert np.array_equal(copied_subgroup_data, subgroup_arr_data)

if consolidate_metadata:
assert zarr.open_group(dst_store).metadata.consolidated_metadata
if zarr_format == 3:
assert zarr.open_group(dst_store, path="subgroup").metadata.consolidated_metadata
else:
assert not zarr.open_group(dst_store).metadata.consolidated_metadata
assert not zarr.open_group(dst_store, path="subgroup").metadata.consolidated_metadata


def test_group(store: Store, zarr_format: ZarrFormat) -> None:
"""
Test basic Group routines.
Expand Down