Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ jobs:
# Use ruff-action so we get annotations in the GitHub UI.
- uses: astral-sh/ruff-action@v3

# The default step above runs `ruff check`; also enforce formatting.
- uses: astral-sh/ruff-action@v3
with:
args: "format --check"

docs:
name: Build docs
runs-on: ubuntu-latest
Expand Down
8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ classifiers = [
"Typing :: Typed",
]
dynamic = ["version"]
dependencies = [
# Type-only package providing Zarr v3 metadata TypedDicts/Literals used in
# the `.pyi` stubs (data-type names, codec configs, array metadata).
"zarr-metadata>=0.3",
]

[tool.maturin]
features = ["pyo3/extension-module", "abi3-py311"]
Expand Down Expand Up @@ -68,6 +73,9 @@ ignore = [
"TRY003", # define exceptions in the exception class
]

[tool.ruff.lint.isort]
known-first-party = ["zarrista"]

[tool.ruff.lint.per-file-ignores]
"*.pyi" = [
"A002", # Function argument `bytes` is shadowing a Python builtin
Expand Down
24 changes: 1 addition & 23 deletions python/zarrista/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""A low-level Zarr API for Python, binding to Rust's Zarrs."""

from typing import Literal, TypeAlias
from typing import TypeAlias

from . import codec, exceptions
from ._zarrista import (
Expand Down Expand Up @@ -29,27 +29,6 @@
type before using layout-specific methods.
"""

DataTypeName: TypeAlias = Literal[
"bool",
"int8",
"int16",
"int32",
"int64",
"uint8",
"uint16",
"uint32",
"uint64",
"float16",
"float32",
"float64",
"complex64",
"complex128",
"string",
"bytes",
]
"""The Zarr v3 names of the built-in fixed data types.
"""

Comment on lines -32 to -52

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally upstream zarr-metadata will add its own type union, cc @d-v-b


__all__ = [
"Array",
Expand All @@ -58,7 +37,6 @@
"AsyncGroup",
"ChunkGrid",
"DataType",
"DataTypeName",
"DecodedArray",
"FilesystemStore",
"FillValue",
Expand Down
32 changes: 21 additions & 11 deletions python/zarrista/_array.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from types import EllipsisType
from typing import Any, TypeAlias, Unpack
from typing import TypeAlias, Unpack

from zarr_metadata import ArrayMetadataV3, JSONValue

from zarrista.codec import CodecOptions

Expand All @@ -26,7 +28,7 @@ class Array:
def open(store: FilesystemStore | MemoryStore, path: str = "/") -> Array:
"""Open the array stored at `path` in `store`."""
@property
def attrs(self) -> dict[str, Any]:
def attrs(self) -> dict[str, JSONValue]:
"""The array's user attributes as a dict."""
@property
def chunk_grid(self) -> ChunkGrid:
Expand All @@ -41,16 +43,18 @@ class Array:
def dtype(self) -> DataType:
"""The Zarr data type."""
@property
def metadata(self) -> dict[str, Any]:
"""The array's full Zarr v3 metadata as a dict."""
def metadata(self) -> ArrayMetadataV3:
"""The array's full Zarr v3 metadata."""
@property
def ndim(self) -> int:
"""The number of dimensions."""
@property
def path(self) -> str:
"""The array's path in the store."""
def retrieve_array_subset(
self, selection: Selection, **codec_options: Unpack[CodecOptions],
self,
selection: Selection,
**codec_options: Unpack[CodecOptions],
) -> DecodedArray:
"""Read and decode an array region selected with numpy-style basic indexing.

Expand All @@ -60,7 +64,9 @@ class Array:
Keyword arguments are passed as [`CodecOptions`][zarrista.codec.CodecOptions].
"""
def retrieve_chunk(
self, chunk_indices: list[int], **codec_options: Unpack[CodecOptions],
self,
chunk_indices: list[int],
**codec_options: Unpack[CodecOptions],
) -> DecodedArray:
"""Read and decode the chunk at the given chunk grid indices.

Expand All @@ -85,7 +91,7 @@ class AsyncArray:
`store` may be an obstore `ObjectStore` or an icechunk `Session`.
"""
@property
def attrs(self) -> dict[str, Any]:
def attrs(self) -> dict[str, JSONValue]:
"""The array's user attributes as a dict."""
@property
def chunk_grid(self) -> ChunkGrid:
Expand All @@ -100,16 +106,18 @@ class AsyncArray:
def dtype(self) -> DataType:
"""The Zarr data type."""
@property
def metadata(self) -> dict[str, Any]:
"""The array's full Zarr v3 metadata as a dict."""
def metadata(self) -> ArrayMetadataV3:
"""The array's full Zarr v3 metadata."""
@property
def ndim(self) -> int:
"""The number of dimensions."""
@property
def path(self) -> str:
"""The array's path in the store."""
async def retrieve_array_subset(
self, selection: Selection, **codec_options: Unpack[CodecOptions],
self,
selection: Selection,
**codec_options: Unpack[CodecOptions],
) -> DecodedArray:
"""Read and decode an array region selected with numpy-style basic indexing.

Expand All @@ -119,7 +127,9 @@ class AsyncArray:
Keyword arguments are passed as [`CodecOptions`][zarrista.codec.CodecOptions].
"""
async def retrieve_chunk(
self, chunk_indices: list[int], **codec_options: Unpack[CodecOptions],
self,
chunk_indices: list[int],
**codec_options: Unpack[CodecOptions],
) -> DecodedArray:
"""Read and decode the chunk at the given chunk grid indices.

Expand Down
67 changes: 46 additions & 21 deletions python/zarrista/_dtype.pyi
Original file line number Diff line number Diff line change
@@ -1,31 +1,56 @@
from typing import Any, Literal, TypeAlias
from typing import TypeAlias

DataTypeName: TypeAlias = Literal[
"bool",
"int8",
"int16",
"int32",
"int64",
"uint8",
"uint16",
"uint32",
"uint64",
"float16",
"float32",
"float64",
"complex64",
"complex128",
"string",
"bytes",
]
"""The Zarr v3 names of the built-in fixed data types.
from zarr_metadata import NamedConfigV3
from zarr_metadata.v3.data_type import (
BoolDataTypeName,
BytesDataTypeName,
Complex64DataTypeName,
Complex128DataTypeName,
Float16DataTypeName,
Float32DataTypeName,
Float64DataTypeName,
Int8DataTypeName,
Int16DataTypeName,
Int32DataTypeName,
Int64DataTypeName,
RawBytesDataTypeName,
StringDataTypeName,
Uint8DataTypeName,
Uint16DataTypeName,
Uint32DataTypeName,
Uint64DataTypeName,
)

DataTypeName: TypeAlias = (
BoolDataTypeName
| Int8DataTypeName
| Int16DataTypeName
| Int32DataTypeName
| Int64DataTypeName
| Uint8DataTypeName
| Uint16DataTypeName
| Uint32DataTypeName
| Uint64DataTypeName
| Float16DataTypeName
| Float32DataTypeName
| Float64DataTypeName
| Complex64DataTypeName
| Complex128DataTypeName
| StringDataTypeName
| BytesDataTypeName
| RawBytesDataTypeName
)
"""The Zarr v3 names of the data types `from_string` can build.

Composed from the per-dtype name literals in `zarr_metadata.v3.data_type`, so
it stays in sync with the spec rather than being hand-maintained here.
Comment on lines +24 to +46

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now we define our own explicit dtype union, cc @d-v-b

"""

class DataType:
"""A Zarr v3 data type."""

@staticmethod
def from_metadata(metadata: dict[str, Any]) -> DataType:
def from_metadata(metadata: NamedConfigV3) -> DataType:
"""Construct a data type from its Zarr v3 metadata."""
@staticmethod
def from_string(name: DataTypeName | str) -> DataType:
Expand Down
6 changes: 3 additions & 3 deletions python/zarrista/_group.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any
from zarr_metadata import JSONValue

from ._array import Array, AsyncArray
from ._store import AsyncStore, FilesystemStore, MemoryStore
Expand All @@ -10,7 +10,7 @@ class Group:
def open(store: FilesystemStore | MemoryStore, path: str = "/") -> Group:
"""Open the group stored at `path` in `store`."""
@property
def attrs(self) -> dict[str, Any]:
def attrs(self) -> dict[str, JSONValue]:
"""The group's user attributes as a dict."""
def array_keys(self) -> list[str]:
"""Names of the direct child arrays."""
Expand All @@ -29,7 +29,7 @@ class AsyncGroup:
`store` may be an obstore `ObjectStore` or an icechunk `Session`.
"""
@property
def attrs(self) -> dict[str, Any]:
def attrs(self) -> dict[str, JSONValue]:
"""The group's user attributes as a dict."""
async def array_keys(self) -> list[str]:
"""Names of the direct child arrays."""
Expand Down
6 changes: 4 additions & 2 deletions python/zarrista/codec/_bytes_to_bytes/_blosc.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Any, Literal, TypeAlias
from typing import Literal, TypeAlias

from zarr_metadata.v3.codec.blosc import BloscCodecConfiguration

from zarrista.codec._bytes_to_bytes import BytesToBytesCodec

Expand Down Expand Up @@ -35,7 +37,7 @@ class Blosc(BytesToBytesCodec):
automatically when `blocksize` is `None` or `0`.
"""
@staticmethod
def from_config(config: dict[str, Any]) -> Blosc:
def from_config(config: BloscCodecConfiguration) -> Blosc:
"""Construct a `blosc` codec from a configuration mapping.

For example `{"cname": "lz4", "clevel": 5, "shuffle": "shuffle",
Expand Down
11 changes: 8 additions & 3 deletions python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Any
from typing import TypedDict

from zarrista.codec._bytes_to_bytes import BytesToBytesCodec

class Crc32cConfig(TypedDict): ...

class Crc32c(BytesToBytesCodec):
"""The `crc32c` bytes-to-bytes codec."""

Expand All @@ -11,5 +13,8 @@ class Crc32c(BytesToBytesCodec):
Appends a CRC32C checksum to the encoded bytestream.
"""
@staticmethod
def from_config(config: dict[str, Any]) -> Crc32c:
"""Construct a `crc32c` codec from a configuration mapping, e.g. `{}`."""
def from_config(config: Crc32cConfig) -> Crc32c:
"""Construct a `crc32c` codec from a configuration mapping, e.g. `{}`.

The `crc32c` codec takes no configuration, so the mapping is empty.
"""
4 changes: 2 additions & 2 deletions python/zarrista/codec/_bytes_to_bytes/_gzip.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any
from zarr_metadata.v3.codec.gzip import GzipCodecConfiguration

from zarrista.codec._bytes_to_bytes import BytesToBytesCodec

Expand All @@ -12,5 +12,5 @@ class Gzip(BytesToBytesCodec):
to 9 (most compression).
"""
@staticmethod
def from_config(config: dict[str, Any]) -> Gzip:
def from_config(config: GzipCodecConfiguration) -> Gzip:
"""Construct a `gzip` codec from a config mapping, e.g. `{"level": 5}`."""
4 changes: 2 additions & 2 deletions python/zarrista/codec/_bytes_to_bytes/_zstd.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any
from zarr_metadata.v3.codec.zstd import ZstdCodecConfiguration

from zarrista.codec._bytes_to_bytes import BytesToBytesCodec

Expand All @@ -12,7 +12,7 @@ class Zstd(BytesToBytesCodec):
is written to (and verified on decode from) the encoded bytestream.
"""
@staticmethod
def from_config(config: dict[str, Any]) -> Zstd:
def from_config(config: ZstdCodecConfiguration) -> Zstd:
"""Construct a `zstd` codec from a configuration mapping.

For example, `{"level": 5, "checksum": false}`.
Expand Down
6 changes: 3 additions & 3 deletions python/zarrista/codec/_codec_chain.pyi
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Any
from zarr_metadata import MetadataV3, NamedConfigV3

class CodecChain:
"""The ordered chain of codecs used to encode and decode an array's chunks."""

def __init__(self, metadatas: list[dict[str, Any]]) -> None:
def __init__(self, metadatas: list[MetadataV3]) -> None:

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this list should probably be Sequence

"""Construct a codec chain from a list of Zarr v3 codec metadata."""
def create_metadatas(self) -> list[dict[str, Any]]:
def create_metadatas(self) -> list[NamedConfigV3]:
"""Return the Zarr v3 metadata for each codec in the chain."""
1 change: 1 addition & 0 deletions tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import zarr
from arro3.core import Array as Arro3Array
from arro3.core import DataType

from zarrista import Array, FilesystemStore, VariableArray


Expand Down
1 change: 1 addition & 0 deletions tests/test_dtype.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

from zarrista import DataType


Expand Down
1 change: 1 addition & 0 deletions tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pytest
import zarr

from zarrista import FilesystemStore, Group
from zarrista import exceptions as exc

Expand Down
6 changes: 5 additions & 1 deletion tests/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pytest
import zarr
from numpy.typing import NDArray

from zarrista import AsyncArray, AsyncGroup

# The session is serialized with the Python icechunk and reconstructed by the
Expand All @@ -38,7 +39,10 @@ def icechunk_session(tmp_path: Path) -> tuple[icechunk.Session, NDArray[np.int32
data = np.arange(9 * 64 * 100, dtype="int32").reshape(9, 64, 100)
root = zarr.group(store=session.store)
z = root.create_array(
"embeddings", shape=data.shape, chunks=(3, 16, 50), dtype=data.dtype,
"embeddings",
shape=data.shape,
chunks=(3, 16, 50),
dtype=data.dtype,
)
z[:] = data
session.commit("write embeddings")
Expand Down
Loading
Loading