diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 401fca0..33ed0c3 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -20,6 +20,11 @@ jobs: # Use ruff-action so we get annotations in the GitHub UI. - uses: astral-sh/ruff-action@v3 + # The default step above runs `ruff check`; also enforce formatting. + - uses: astral-sh/ruff-action@v3 + with: + args: "format --check" + docs: name: Build docs runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index 31cb6dd..0365061 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,11 @@ classifiers = [ "Typing :: Typed", ] dynamic = ["version"] +dependencies = [ + # Type-only package providing Zarr v3 metadata TypedDicts/Literals used in + # the `.pyi` stubs (data-type names, codec configs, array metadata). + "zarr-metadata>=0.3", +] [tool.maturin] features = ["pyo3/extension-module", "abi3-py311"] @@ -68,6 +73,9 @@ ignore = [ "TRY003", # define exceptions in the exception class ] +[tool.ruff.lint.isort] +known-first-party = ["zarrista"] + [tool.ruff.lint.per-file-ignores] "*.pyi" = [ "A002", # Function argument `bytes` is shadowing a Python builtin diff --git a/python/zarrista/__init__.py b/python/zarrista/__init__.py index a0509db..00d0d8c 100644 --- a/python/zarrista/__init__.py +++ b/python/zarrista/__init__.py @@ -1,6 +1,6 @@ """A low-level Zarr API for Python, binding to Rust's Zarrs.""" -from typing import Literal, TypeAlias +from typing import TypeAlias from . import codec, exceptions from ._zarrista import ( @@ -29,27 +29,6 @@ type before using layout-specific methods. """ -DataTypeName: TypeAlias = Literal[ - "bool", - "int8", - "int16", - "int32", - "int64", - "uint8", - "uint16", - "uint32", - "uint64", - "float16", - "float32", - "float64", - "complex64", - "complex128", - "string", - "bytes", -] -"""The Zarr v3 names of the built-in fixed data types. -""" - __all__ = [ "Array", @@ -58,7 +37,6 @@ "AsyncGroup", "ChunkGrid", "DataType", - "DataTypeName", "DecodedArray", "FilesystemStore", "FillValue", diff --git a/python/zarrista/_array.pyi b/python/zarrista/_array.pyi index ee36646..9edbdf9 100644 --- a/python/zarrista/_array.pyi +++ b/python/zarrista/_array.pyi @@ -1,5 +1,7 @@ from types import EllipsisType -from typing import Any, TypeAlias, Unpack +from typing import TypeAlias, Unpack + +from zarr_metadata import ArrayMetadataV3, JSONValue from zarrista.codec import CodecOptions @@ -26,7 +28,7 @@ class Array: def open(store: FilesystemStore | MemoryStore, path: str = "/") -> Array: """Open the array stored at `path` in `store`.""" @property - def attrs(self) -> dict[str, Any]: + def attrs(self) -> dict[str, JSONValue]: """The array's user attributes as a dict.""" @property def chunk_grid(self) -> ChunkGrid: @@ -41,8 +43,8 @@ class Array: def dtype(self) -> DataType: """The Zarr data type.""" @property - def metadata(self) -> dict[str, Any]: - """The array's full Zarr v3 metadata as a dict.""" + def metadata(self) -> ArrayMetadataV3: + """The array's full Zarr v3 metadata.""" @property def ndim(self) -> int: """The number of dimensions.""" @@ -50,7 +52,9 @@ class Array: def path(self) -> str: """The array's path in the store.""" def retrieve_array_subset( - self, selection: Selection, **codec_options: Unpack[CodecOptions], + self, + selection: Selection, + **codec_options: Unpack[CodecOptions], ) -> DecodedArray: """Read and decode an array region selected with numpy-style basic indexing. @@ -60,7 +64,9 @@ class Array: Keyword arguments are passed as [`CodecOptions`][zarrista.codec.CodecOptions]. """ def retrieve_chunk( - self, chunk_indices: list[int], **codec_options: Unpack[CodecOptions], + self, + chunk_indices: list[int], + **codec_options: Unpack[CodecOptions], ) -> DecodedArray: """Read and decode the chunk at the given chunk grid indices. @@ -85,7 +91,7 @@ class AsyncArray: `store` may be an obstore `ObjectStore` or an icechunk `Session`. """ @property - def attrs(self) -> dict[str, Any]: + def attrs(self) -> dict[str, JSONValue]: """The array's user attributes as a dict.""" @property def chunk_grid(self) -> ChunkGrid: @@ -100,8 +106,8 @@ class AsyncArray: def dtype(self) -> DataType: """The Zarr data type.""" @property - def metadata(self) -> dict[str, Any]: - """The array's full Zarr v3 metadata as a dict.""" + def metadata(self) -> ArrayMetadataV3: + """The array's full Zarr v3 metadata.""" @property def ndim(self) -> int: """The number of dimensions.""" @@ -109,7 +115,9 @@ class AsyncArray: def path(self) -> str: """The array's path in the store.""" async def retrieve_array_subset( - self, selection: Selection, **codec_options: Unpack[CodecOptions], + self, + selection: Selection, + **codec_options: Unpack[CodecOptions], ) -> DecodedArray: """Read and decode an array region selected with numpy-style basic indexing. @@ -119,7 +127,9 @@ class AsyncArray: Keyword arguments are passed as [`CodecOptions`][zarrista.codec.CodecOptions]. """ async def retrieve_chunk( - self, chunk_indices: list[int], **codec_options: Unpack[CodecOptions], + self, + chunk_indices: list[int], + **codec_options: Unpack[CodecOptions], ) -> DecodedArray: """Read and decode the chunk at the given chunk grid indices. diff --git a/python/zarrista/_dtype.pyi b/python/zarrista/_dtype.pyi index c7eee92..dabe20e 100644 --- a/python/zarrista/_dtype.pyi +++ b/python/zarrista/_dtype.pyi @@ -1,31 +1,56 @@ -from typing import Any, Literal, TypeAlias +from typing import TypeAlias -DataTypeName: TypeAlias = Literal[ - "bool", - "int8", - "int16", - "int32", - "int64", - "uint8", - "uint16", - "uint32", - "uint64", - "float16", - "float32", - "float64", - "complex64", - "complex128", - "string", - "bytes", -] -"""The Zarr v3 names of the built-in fixed data types. +from zarr_metadata import NamedConfigV3 +from zarr_metadata.v3.data_type import ( + BoolDataTypeName, + BytesDataTypeName, + Complex64DataTypeName, + Complex128DataTypeName, + Float16DataTypeName, + Float32DataTypeName, + Float64DataTypeName, + Int8DataTypeName, + Int16DataTypeName, + Int32DataTypeName, + Int64DataTypeName, + RawBytesDataTypeName, + StringDataTypeName, + Uint8DataTypeName, + Uint16DataTypeName, + Uint32DataTypeName, + Uint64DataTypeName, +) + +DataTypeName: TypeAlias = ( + BoolDataTypeName + | Int8DataTypeName + | Int16DataTypeName + | Int32DataTypeName + | Int64DataTypeName + | Uint8DataTypeName + | Uint16DataTypeName + | Uint32DataTypeName + | Uint64DataTypeName + | Float16DataTypeName + | Float32DataTypeName + | Float64DataTypeName + | Complex64DataTypeName + | Complex128DataTypeName + | StringDataTypeName + | BytesDataTypeName + | RawBytesDataTypeName +) +"""The Zarr v3 names of the data types `from_string` can build. + +Composed from the per-dtype name literals in `zarr_metadata.v3.data_type`, so +it stays in sync with the spec rather than being hand-maintained here. """ class DataType: """A Zarr v3 data type.""" @staticmethod - def from_metadata(metadata: dict[str, Any]) -> DataType: + def from_metadata(metadata: NamedConfigV3) -> DataType: """Construct a data type from its Zarr v3 metadata.""" @staticmethod def from_string(name: DataTypeName | str) -> DataType: diff --git a/python/zarrista/_group.pyi b/python/zarrista/_group.pyi index 0557fbc..bec3a23 100644 --- a/python/zarrista/_group.pyi +++ b/python/zarrista/_group.pyi @@ -1,4 +1,4 @@ -from typing import Any +from zarr_metadata import JSONValue from ._array import Array, AsyncArray from ._store import AsyncStore, FilesystemStore, MemoryStore @@ -10,7 +10,7 @@ class Group: def open(store: FilesystemStore | MemoryStore, path: str = "/") -> Group: """Open the group stored at `path` in `store`.""" @property - def attrs(self) -> dict[str, Any]: + def attrs(self) -> dict[str, JSONValue]: """The group's user attributes as a dict.""" def array_keys(self) -> list[str]: """Names of the direct child arrays.""" @@ -29,7 +29,7 @@ class AsyncGroup: `store` may be an obstore `ObjectStore` or an icechunk `Session`. """ @property - def attrs(self) -> dict[str, Any]: + def attrs(self) -> dict[str, JSONValue]: """The group's user attributes as a dict.""" async def array_keys(self) -> list[str]: """Names of the direct child arrays.""" diff --git a/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi b/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi index 5cd2010..cf92e19 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi @@ -1,4 +1,6 @@ -from typing import Any, Literal, TypeAlias +from typing import Literal, TypeAlias + +from zarr_metadata.v3.codec.blosc import BloscCodecConfiguration from zarrista.codec._bytes_to_bytes import BytesToBytesCodec @@ -35,7 +37,7 @@ class Blosc(BytesToBytesCodec): automatically when `blocksize` is `None` or `0`. """ @staticmethod - def from_config(config: dict[str, Any]) -> Blosc: + def from_config(config: BloscCodecConfiguration) -> Blosc: """Construct a `blosc` codec from a configuration mapping. For example `{"cname": "lz4", "clevel": 5, "shuffle": "shuffle", diff --git a/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi b/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi index 13137d2..a7a56a4 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi @@ -1,7 +1,9 @@ -from typing import Any +from typing import TypedDict from zarrista.codec._bytes_to_bytes import BytesToBytesCodec +class Crc32cConfig(TypedDict): ... + class Crc32c(BytesToBytesCodec): """The `crc32c` bytes-to-bytes codec.""" @@ -11,5 +13,8 @@ class Crc32c(BytesToBytesCodec): Appends a CRC32C checksum to the encoded bytestream. """ @staticmethod - def from_config(config: dict[str, Any]) -> Crc32c: - """Construct a `crc32c` codec from a configuration mapping, e.g. `{}`.""" + def from_config(config: Crc32cConfig) -> Crc32c: + """Construct a `crc32c` codec from a configuration mapping, e.g. `{}`. + + The `crc32c` codec takes no configuration, so the mapping is empty. + """ diff --git a/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi b/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi index 9fa344f..0fae25c 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi @@ -1,4 +1,4 @@ -from typing import Any +from zarr_metadata.v3.codec.gzip import GzipCodecConfiguration from zarrista.codec._bytes_to_bytes import BytesToBytesCodec @@ -12,5 +12,5 @@ class Gzip(BytesToBytesCodec): to 9 (most compression). """ @staticmethod - def from_config(config: dict[str, Any]) -> Gzip: + def from_config(config: GzipCodecConfiguration) -> Gzip: """Construct a `gzip` codec from a config mapping, e.g. `{"level": 5}`.""" diff --git a/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi b/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi index 04ee21c..845c385 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi @@ -1,4 +1,4 @@ -from typing import Any +from zarr_metadata.v3.codec.zstd import ZstdCodecConfiguration from zarrista.codec._bytes_to_bytes import BytesToBytesCodec @@ -12,7 +12,7 @@ class Zstd(BytesToBytesCodec): is written to (and verified on decode from) the encoded bytestream. """ @staticmethod - def from_config(config: dict[str, Any]) -> Zstd: + def from_config(config: ZstdCodecConfiguration) -> Zstd: """Construct a `zstd` codec from a configuration mapping. For example, `{"level": 5, "checksum": false}`. diff --git a/python/zarrista/codec/_codec_chain.pyi b/python/zarrista/codec/_codec_chain.pyi index 8219224..672cb65 100644 --- a/python/zarrista/codec/_codec_chain.pyi +++ b/python/zarrista/codec/_codec_chain.pyi @@ -1,9 +1,9 @@ -from typing import Any +from zarr_metadata import MetadataV3, NamedConfigV3 class CodecChain: """The ordered chain of codecs used to encode and decode an array's chunks.""" - def __init__(self, metadatas: list[dict[str, Any]]) -> None: + def __init__(self, metadatas: list[MetadataV3]) -> None: """Construct a codec chain from a list of Zarr v3 codec metadata.""" - def create_metadatas(self) -> list[dict[str, Any]]: + def create_metadatas(self) -> list[NamedConfigV3]: """Return the Zarr v3 metadata for each codec in the chain.""" diff --git a/tests/test_arrow.py b/tests/test_arrow.py index d464439..37cb35f 100644 --- a/tests/test_arrow.py +++ b/tests/test_arrow.py @@ -11,6 +11,7 @@ import zarr from arro3.core import Array as Arro3Array from arro3.core import DataType + from zarrista import Array, FilesystemStore, VariableArray diff --git a/tests/test_dtype.py b/tests/test_dtype.py index 94d2aa9..dfefff7 100644 --- a/tests/test_dtype.py +++ b/tests/test_dtype.py @@ -1,4 +1,5 @@ import pytest + from zarrista import DataType diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 5ac9b2f..6bd3e96 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -5,6 +5,7 @@ import pytest import zarr + from zarrista import FilesystemStore, Group from zarrista import exceptions as exc diff --git a/tests/test_icechunk.py b/tests/test_icechunk.py index 7ca2a3a..381e3b3 100644 --- a/tests/test_icechunk.py +++ b/tests/test_icechunk.py @@ -16,6 +16,7 @@ import pytest import zarr from numpy.typing import NDArray + from zarrista import AsyncArray, AsyncGroup # The session is serialized with the Python icechunk and reconstructed by the @@ -38,7 +39,10 @@ def icechunk_session(tmp_path: Path) -> tuple[icechunk.Session, NDArray[np.int32 data = np.arange(9 * 64 * 100, dtype="int32").reshape(9, 64, 100) root = zarr.group(store=session.store) z = root.create_array( - "embeddings", shape=data.shape, chunks=(3, 16, 50), dtype=data.dtype, + "embeddings", + shape=data.shape, + chunks=(3, 16, 50), + dtype=data.dtype, ) z[:] = data session.commit("write embeddings") diff --git a/tests/test_indexing.py b/tests/test_indexing.py index f9502f2..14e39d6 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -12,6 +12,7 @@ import zarr from numpy.typing import NDArray from obstore.store import LocalStore + from zarrista import Array, AsyncArray, FilesystemStore, Tensor @@ -21,7 +22,10 @@ def int32_array(tmp_path: Path) -> tuple[Path, NDArray[np.int32]]: path = tmp_path / "a.zarr" data = np.arange(9 * 64 * 100, dtype="int32").reshape(9, 64, 100) z = zarr.create_array( - store=str(path), shape=data.shape, chunks=(3, 16, 50), dtype=data.dtype, + store=str(path), + shape=data.shape, + chunks=(3, 16, 50), + dtype=data.dtype, ) z[:] = data return path, data @@ -84,7 +88,8 @@ def test_getitem_matches_retrieve_array_subset( key = (slice(0, 2), slice(None), slice(5, 7)) np.testing.assert_array_equal( - arr[key].to_numpy(), arr.retrieve_array_subset(key).to_numpy(), + arr[key].to_numpy(), + arr.retrieve_array_subset(key).to_numpy(), ) @@ -136,7 +141,10 @@ def test_float64_dtype(tmp_path): path = tmp_path / "f.zarr" data = (np.arange(4 * 5, dtype="float64") * 0.5).reshape(4, 5) z = zarr.create_array( - store=str(path), shape=data.shape, chunks=(2, 5), dtype=data.dtype, + store=str(path), + shape=data.shape, + chunks=(2, 5), + dtype=data.dtype, ) z[:] = data diff --git a/tests/test_store_input.py b/tests/test_store_input.py index c219835..b36ca5e 100644 --- a/tests/test_store_input.py +++ b/tests/test_store_input.py @@ -9,6 +9,7 @@ import numpy as np import pytest import zarr + from zarrista import Array, FilesystemStore, Group @@ -24,7 +25,8 @@ def array_path(tmp_path: Path) -> Path: def test_filesystem_store_opens_and_reads(array_path: Path): array = Array.open(FilesystemStore(str(array_path))) np.testing.assert_array_equal( - array.retrieve_chunk([0]).to_numpy(), np.array([0, 1], dtype="int32"), + array.retrieve_chunk([0]).to_numpy(), + np.array([0, 1], dtype="int32"), ) diff --git a/uv.lock b/uv.lock index 162ad35..60f138a 100644 --- a/uv.lock +++ b/uv.lock @@ -1883,9 +1883,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/0a/469e2bd01be1490336e6c8707386845655d59261543315778a3ccc7e8019/zarr-3.2.1-py3-none-any.whl", hash = "sha256:f78cdd3d9687ad0e9f9cba2c5683b64f0c52589c19f685eeabe872e93cc0d2c7", size = 319617, upload-time = "2026-05-05T12:37:20.66Z" }, ] +[[package]] +name = "zarr-metadata" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/9c/cfd5aa02a27c63ecec702a77834b395411518da5c748414d7e6a323638ed/zarr_metadata-0.3.0.tar.gz", hash = "sha256:d8fe02feef43380056ea0429ceb50974b7b5afe6f0386853977506b034e89d53", size = 36398, upload-time = "2026-06-19T13:17:38.276Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/96/64137930fb40b96b4d207eb1f1e4e42c5d6c9e682a5a7c3e6feff6eb0e29/zarr_metadata-0.3.0-py3-none-any.whl", hash = "sha256:e6651f418fcc89cc3c6fc11aa852fb6f8dd6f31d62913abc0d4d37ce7302d671", size = 45636, upload-time = "2026-06-19T13:17:37.097Z" }, +] + [[package]] name = "zarrista" source = { editable = "." } +dependencies = [ + { name = "zarr-metadata" }, +] [package.dev-dependencies] dev = [ @@ -1913,6 +1928,7 @@ docs = [ ] [package.metadata] +requires-dist = [{ name = "zarr-metadata", specifier = ">=0.3" }] [package.metadata.requires-dev] dev = [