From 6db92c34b756107c8245062f080d37205dcd34b7 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 22 Jun 2026 16:47:21 -0400 Subject: [PATCH 01/27] chunk grid --- src/chunks.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/chunks.rs b/src/chunks.rs index 256544a..920a667 100644 --- a/src/chunks.rs +++ b/src/chunks.rs @@ -1,11 +1,24 @@ use pyo3::prelude::*; use zarrs::array::ChunkGrid; +use crate::error::ZarristaResult; +use crate::metadata::PyMetadataV3; + #[pyclass(module = "zarrista", frozen, name = "ChunkGrid")] pub struct PyChunkGrid(ChunkGrid); #[pymethods] impl PyChunkGrid { + #[staticmethod] + fn from_metadata(metadata: PyMetadataV3, shape: Vec) -> ZarristaResult { + Ok(Self(ChunkGrid::from_metadata(metadata.as_ref(), &shape)?)) + } + + #[getter] + fn metadata(&self) -> PyMetadataV3 { + self.0.metadata().into() + } + #[getter] fn ndim(&self) -> usize { self.0.dimensionality() From ac3054eb77860e364f6a9981d1dbc25fc869c358 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 22 Jun 2026 17:01:28 -0400 Subject: [PATCH 02/27] wip: create array --- src/array/sync.rs | 54 +++++++++++++++++++++++++++++++-- src/chunks.rs | 3 +- src/codec/array_to_array.rs | 14 ++++++++- src/codec/bytes_to_bytes/mod.rs | 8 ++++- src/dtype.rs | 6 +++- src/fill_value.rs | 7 ++++- 6 files changed, 85 insertions(+), 7 deletions(-) diff --git a/src/array/sync.rs b/src/array/sync.rs index 32387c2..96a1f3b 100644 --- a/src/array/sync.rs +++ b/src/array/sync.rs @@ -3,17 +3,18 @@ use crate::array::selection::PySelection; use crate::array::util::PyChunkIndices; use crate::chunks::PyChunkGrid; -use crate::codec::{PyCodecChain, PyCodecOptions}; +use crate::codec::{PyArrayToArrayCodec, PyBytesToBytesCodec, PyCodecChain, PyCodecOptions}; use crate::decoded_array::DecodedArray; use crate::dtype::PyDataType; use crate::error::ZarristaResult; +use crate::fill_value::PyFillValue; use crate::node::PyNodePath; use crate::storage::PySyncStorage; use pyo3::prelude::*; use pyo3_bytes::PyBytes; use pythonize::pythonize; use pythonize::Result as PythonizeResult; -use zarrs::array::Array; +use zarrs::array::{Array, ArrayBuilder}; use zarrs::storage::ReadableWritableListableStorageTraits; /// A Zarr array. @@ -43,6 +44,55 @@ impl PyArray { ) } + /// Create a new array + #[staticmethod] + #[pyo3( + signature = (store, dtype, chunk_grid, fill_value, *, path="/", subchunk_shape=None, array_to_array_codecs=None, bytes_to_bytes_codecs=None), + text_signature = "(store, dtype, chunk_grid, fill_value, *, path='/', subchunk_shape=None, array_to_array_codecs=None, bytes_to_bytes_codecs=None)" + )] + #[expect(clippy::too_many_arguments)] + fn create( + store: PySyncStorage, + dtype: PyDataType, + chunk_grid: PyChunkGrid, + fill_value: PyFillValue, + path: &str, + subchunk_shape: Option>, + array_to_array_codecs: Option>, + bytes_to_bytes_codecs: Option>, + ) -> ZarristaResult { + let store = store.into_inner(); + let mut builder = ArrayBuilder::new_with_chunk_grid( + chunk_grid, + dtype.into_inner(), + fill_value.into_inner(), + ); + + if let Some(subchunk_shape) = subchunk_shape { + builder.subchunk_shape(subchunk_shape); + } + if let Some(array_to_array_codecs) = array_to_array_codecs { + builder.array_to_array_codecs( + array_to_array_codecs + .into_iter() + .map(|c| c.into_inner()) + .collect(), + ); + } + if let Some(bytes_to_bytes_codecs) = bytes_to_bytes_codecs { + builder.bytes_to_bytes_codecs( + bytes_to_bytes_codecs + .into_iter() + .map(|c| c.into_inner()) + .collect(), + ); + } + + Ok(Self { + inner: builder.build(store, path)?, + }) + } + /// Open the array stored at `path` in `store`. #[staticmethod] #[pyo3( diff --git a/src/chunks.rs b/src/chunks.rs index 920a667..b4359af 100644 --- a/src/chunks.rs +++ b/src/chunks.rs @@ -4,7 +4,8 @@ use zarrs::array::ChunkGrid; use crate::error::ZarristaResult; use crate::metadata::PyMetadataV3; -#[pyclass(module = "zarrista", frozen, name = "ChunkGrid")] +#[derive(Debug, Clone)] +#[pyclass(module = "zarrista", frozen, name = "ChunkGrid", from_py_object)] pub struct PyChunkGrid(ChunkGrid); #[pymethods] diff --git a/src/codec/array_to_array.rs b/src/codec/array_to_array.rs index 967edd8..b55ed9d 100644 --- a/src/codec/array_to_array.rs +++ b/src/codec/array_to_array.rs @@ -22,9 +22,21 @@ pub fn bitround(keepbits: u32) -> PyArrayToArrayCodec { PyArrayToArrayCodec(Arc::new(codec)) } -#[pyclass(module = "zarrista.codec", frozen, name = "ArrayToArrayCodec")] +#[derive(Debug, Clone)] +#[pyclass( + module = "zarrista.codec", + frozen, + name = "ArrayToArrayCodec", + from_py_object +)] pub struct PyArrayToArrayCodec(Arc); +impl PyArrayToArrayCodec { + pub fn into_inner(self) -> Arc { + self.0 + } +} + #[pymethods] impl PyArrayToArrayCodec { fn encoded_data_type(&self, decoded_data_type: &PyDataType) -> ZarristaResult { diff --git a/src/codec/bytes_to_bytes/mod.rs b/src/codec/bytes_to_bytes/mod.rs index 6133346..ddcacfd 100644 --- a/src/codec/bytes_to_bytes/mod.rs +++ b/src/codec/bytes_to_bytes/mod.rs @@ -12,11 +12,13 @@ use zarrs::array::{BytesToBytesCodecTraits, CodecOptions}; use crate::error::ZarristaResult; +#[derive(Debug, Clone)] #[pyclass( module = "zarrista.codec", subclass, frozen, - name = "BytesToBytesCodec" + name = "BytesToBytesCodec", + from_py_object )] pub struct PyBytesToBytesCodec(Arc); @@ -24,6 +26,10 @@ impl PyBytesToBytesCodec { pub fn new(codec: Arc) -> Self { Self(codec) } + + pub fn into_inner(self) -> Arc { + self.0 + } } #[pymethods] diff --git a/src/dtype.rs b/src/dtype.rs index 1caa871..41142af 100644 --- a/src/dtype.rs +++ b/src/dtype.rs @@ -10,7 +10,7 @@ use zarrs::array::{DataType, DataTypeSize}; use zarrs::metadata::v3::MetadataV3; #[derive(Debug, Clone)] -#[pyclass(module = "zarrista", frozen, name = "DataType", skip_from_py_object)] +#[pyclass(module = "zarrista", frozen, name = "DataType", from_py_object)] pub struct PyDataType { inner: DataType, } @@ -19,6 +19,10 @@ impl PyDataType { pub(crate) fn inner(&self) -> &DataType { &self.inner } + + pub fn into_inner(self) -> DataType { + self.inner + } } #[pymethods] diff --git a/src/fill_value.rs b/src/fill_value.rs index 8b8aee8..fd53f04 100644 --- a/src/fill_value.rs +++ b/src/fill_value.rs @@ -2,13 +2,18 @@ use pyo3::prelude::*; use pyo3_bytes::PyBytes; use zarrs::array::FillValue; -#[pyclass(module = "zarrista", frozen, name = "FillValue")] +#[derive(Debug, Clone)] +#[pyclass(module = "zarrista", frozen, name = "FillValue", from_py_object)] pub struct PyFillValue(FillValue); impl PyFillValue { pub(crate) fn inner(&self) -> &FillValue { &self.0 } + + pub fn into_inner(self) -> FillValue { + self.0 + } } #[pymethods] From 5a4e9708935949d4e453d2dfdb6a6025f927d510 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 22 Jun 2026 18:23:07 -0400 Subject: [PATCH 03/27] wip create mod --- src/array/create.rs | 0 src/array/mod.rs | 1 + 2 files changed, 1 insertion(+) create mode 100644 src/array/create.rs diff --git a/src/array/create.rs b/src/array/create.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/array/mod.rs b/src/array/mod.rs index eccdc51..cfa4978 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,4 +1,5 @@ mod r#async; +mod create; mod selection; mod sync; mod util; From 7ca4fd6da16e86959dc23c20cde3d6dac2eabc22 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 10:39:39 -0400 Subject: [PATCH 04/27] edit --- src/array/sync.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/array/sync.rs b/src/array/sync.rs index 6b6101c..d1ffb10 100644 --- a/src/array/sync.rs +++ b/src/array/sync.rs @@ -8,6 +8,7 @@ use crate::array::util::PyChunkIndices; use crate::chunks::PyChunkGrid; use crate::codec::{PyArrayToArrayCodec, PyBytesToBytesCodec, PyCodecChain, PyCodecOptions}; use crate::decoded_array::DecodedArray; +use crate::dtype::PyDataType; use crate::error::ZarristaResult; use crate::fill_value::PyFillValue; use crate::node::PyNodePath; From a248748488dc26c01989c36a481ffd5c04ad160e Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 13:24:49 -0400 Subject: [PATCH 05/27] ArrayBuilder.like --- src/array/async.rs | 4 ++++ src/array/create.rs | 27 +++++++++++++++++++++++++++ src/array/sync.rs | 6 +++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/array/async.rs b/src/array/async.rs index 09d4cbf..ec17d44 100644 --- a/src/array/async.rs +++ b/src/array/async.rs @@ -27,6 +27,10 @@ impl PyAsyncArray { pub(crate) fn new(inner: Arc>) -> Self { Self { inner } } + + pub fn inner(&self) -> &Arc> { + &self.inner + } } // Metadata accessors shared with `PyArray`; see `array/shared.rs`. diff --git a/src/array/create.rs b/src/array/create.rs index e69de29..2e64157 100644 --- a/src/array/create.rs +++ b/src/array/create.rs @@ -0,0 +1,27 @@ +use pyo3::exceptions::PyTypeError; +use pyo3::prelude::*; +use zarrs::array::ArrayBuilder; + +use crate::array::{PyArray, PyAsyncArray}; +use crate::error::ZarristaResult; + +#[pyclass(module = "zarrista.array", frozen, name = "Config")] +pub struct PyArrayBuilder(ArrayBuilder); + +#[pymethods] +impl PyArrayBuilder { + #[staticmethod] + fn like<'py>(array: Bound<'py, PyAny>) -> ZarristaResult { + if let Ok(array) = array.cast::() { + Ok(Self(ArrayBuilder::from_array(array.get().inner()))) + } else if let Ok(array) = array.cast::() { + Ok(Self(ArrayBuilder::from_array(array.get().inner()))) + } else { + Err(PyTypeError::new_err(format!( + "expected an Array or AsyncArray, got {}", + array.get_type().name()? + )) + .into()) + } + } +} diff --git a/src/array/sync.rs b/src/array/sync.rs index d1ffb10..a79588d 100644 --- a/src/array/sync.rs +++ b/src/array/sync.rs @@ -6,7 +6,7 @@ use crate::array::selection::PySelection; use crate::array::shared::array_metadata_accessors; use crate::array::util::PyChunkIndices; use crate::chunks::PyChunkGrid; -use crate::codec::{PyArrayToArrayCodec, PyBytesToBytesCodec, PyCodecChain, PyCodecOptions}; +use crate::codec::{PyArrayToArrayCodec, PyBytesToBytesCodec, PyCodecOptions}; use crate::decoded_array::DecodedArray; use crate::dtype::PyDataType; use crate::error::ZarristaResult; @@ -28,6 +28,10 @@ impl PyArray { pub(crate) fn new(inner: Arc>) -> Self { Self { inner } } + + pub fn inner(&self) -> &Arc> { + &self.inner + } } // Metadata accessors shared with `PyAsyncArray`; see `array/shared.rs`. From c02df792f51944cfe9953df8998195c9e69a71ad Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 13:31:29 -0400 Subject: [PATCH 06/27] patch zarrs --- Cargo.lock | 54 ++++++++++++++++++++++++++++++------------------------ Cargo.toml | 4 ++++ 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3ce066..074fe59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5270,8 +5270,7 @@ dependencies = [ [[package]] name = "zarrs" version = "0.23.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8132307b8fc041fd21f68c7987103fb6e038b11f9838c16ec43b798f5480ccf5" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "async-generic", "async-lock", @@ -5315,15 +5314,14 @@ dependencies = [ "zarrs_metadata", "zarrs_metadata_ext", "zarrs_plugin", - "zarrs_storage", + "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", "zstd", ] [[package]] name = "zarrs_chunk_grid" version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cf67386fd96a0336cd3e5ab5ca6cb14e0e05aee80f1acae8c4d3cf562a8bb65" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "derive_more", "inventory", @@ -5338,21 +5336,19 @@ dependencies = [ [[package]] name = "zarrs_chunk_key_encoding" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9040e7feaa92d1904d492acd0cd91b97214f1791c5b5738e6c05b2ca4145a382" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "derive_more", "inventory", "zarrs_metadata", "zarrs_plugin", - "zarrs_storage", + "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", ] [[package]] name = "zarrs_codec" version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383a129a6a0cbb2c80cdba23809e5cab85159756464b7d0f112468a495c128da" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "async-trait", "bytemuck", @@ -5367,14 +5363,13 @@ dependencies = [ "zarrs_data_type", "zarrs_metadata", "zarrs_plugin", - "zarrs_storage", + "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", ] [[package]] name = "zarrs_data_type" version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc7c594c9363278fcd9db4c205514f009944206eb093ea7ad40b85f50009f31" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "derive_more", "half", @@ -5391,8 +5386,7 @@ dependencies = [ [[package]] name = "zarrs_filesystem" version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "270efeb0181651aee5460b3232f2fc83e91bd646cefe75001d1c8f9a4f3abf81" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "bytes", "derive_more", @@ -5403,7 +5397,7 @@ dependencies = [ "positioned-io", "thiserror 2.0.18", "walkdir", - "zarrs_storage", + "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", ] [[package]] @@ -5416,14 +5410,13 @@ dependencies = [ "futures", "icechunk", "tokio", - "zarrs_storage", + "zarrs_storage 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "zarrs_metadata" version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60c4c363a8a302d7babb3c29017850a7b4e0af6ca5f9ba2946263a185b62fea" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "derive_more", "half", @@ -5436,8 +5429,7 @@ dependencies = [ [[package]] name = "zarrs_metadata_ext" version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2048e07848ca99c7450518e0584929300b1b6a3cf442f18b26ffd3520814bd5b" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "derive_more", "monostate", @@ -5458,14 +5450,13 @@ dependencies = [ "async-trait", "futures", "object_store", - "zarrs_storage", + "zarrs_storage 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "zarrs_plugin" version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cbe0ed432aee86856f70ca33be36eaf4a0dae21ab730750d9280a7ca1e95046" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" dependencies = [ "paste", "regex", @@ -5489,6 +5480,21 @@ dependencies = [ "unsafe_cell_slice", ] +[[package]] +name = "zarrs_storage" +version = "0.4.3" +source = "git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1#e68838becd223ddb7f522b265c1b65b90fe577b1" +dependencies = [ + "async-trait", + "auto_impl", + "bytes", + "derive_more", + "futures", + "itertools", + "thiserror 2.0.18", + "unsafe_cell_slice", +] + [[package]] name = "zerocopy" version = "0.8.52" diff --git a/Cargo.toml b/Cargo.toml index 37df6c7..872bdbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,3 +55,7 @@ pyo3 = { version = "0.29", features = ["auto-initialize"] } [profile.release] lto = true codegen-units = 1 + +[patch.crates-io] +# Branch kyle/v0.23-derive-clone +zarrs = { git = "https://github.com/kylebarron/zarrs", rev = "e68838becd223ddb7f522b265c1b65b90fe577b1" } From 2a466a405ef14b9d4c674d6f383cbb29a03036bc Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 13:52:30 -0400 Subject: [PATCH 07/27] start on builder pattern --- Cargo.toml | 7 +++- src/array/create.rs | 83 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 872bdbb..4f3a949 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,12 @@ pyo3-object_store = { version = "0.11.0", default-features = false } pythonize = "0.29" serde_json = "1" thiserror = "2.0.18" -zarrs = { version = "0.23", features = ["async", "bitround", "ndarray"] } +zarrs = { version = "0.23", features = [ + "async", + "bitround", + "ndarray", + "sharding", +] } zarrs_icechunk = "0.5" zarrs_object_store = { version = "0.6.2" } diff --git a/src/array/create.rs b/src/array/create.rs index 2e64157..6a425da 100644 --- a/src/array/create.rs +++ b/src/array/create.rs @@ -2,12 +2,25 @@ use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use zarrs::array::ArrayBuilder; +use crate::array::util::PyArrayShape; use crate::array::{PyArray, PyAsyncArray}; +use crate::codec::{PyArrayToArrayCodec, PyArrayToBytesCodec, PyBytesToBytesCodec}; +use crate::dtype::PyDataType; use crate::error::ZarristaResult; +use crate::metadata::PyArrayMetadataV3; +use crate::storage::{PyAsyncStorage, PySyncStorage}; #[pyclass(module = "zarrista.array", frozen, name = "Config")] pub struct PyArrayBuilder(ArrayBuilder); +impl PyArrayBuilder { + fn with(&self, f: impl FnOnce(&mut ArrayBuilder)) -> Self { + let mut b = self.0.clone(); + f(&mut b); + Self(b) + } +} + #[pymethods] impl PyArrayBuilder { #[staticmethod] @@ -24,4 +37,74 @@ impl PyArrayBuilder { .into()) } } + + fn attrs(&self, attrs: Bound<'_, PyAny>) -> PyResult { + let attributes = pythonize::depythonize(&attrs)?; + Ok(self.with(|builder| { + builder.attributes(attributes); + })) + } + + // TODO: + // fn codec_options + + fn compressors(&self, compressors: Vec) -> Self { + self.with(|builder| { + builder + .bytes_to_bytes_codecs(compressors.into_iter().map(|c| c.into_inner()).collect()); + }) + } + + fn create(&self, store: PySyncStorage, path: &str) -> ZarristaResult { + Ok(self.0.build_arc(store.into_inner(), path)?.into()) + } + + fn create_async(&self, store: PyAsyncStorage, path: &str) -> ZarristaResult { + Ok(self.0.build_arc(store.into_inner(), path)?.into()) + } + + fn create_metadata(&self) -> ZarristaResult { + Ok(self.0.build_metadata()?.into()) + } + + /// Set the data type of the array to be built. + fn data_type(&self, data_type: PyDataType) -> Self { + self.with(|builder| { + builder.data_type(data_type.into_inner()); + }) + } + + fn dimension_names(&self, dimension_names: Option>>) -> Self { + self.with(|builder| { + builder.dimension_names(dimension_names); + }) + } + + fn filters(&self, filters: Vec) -> Self { + self.with(|builder| { + builder.array_to_array_codecs(filters.into_iter().map(|f| f.into_inner()).collect()); + }) + } + + fn serializer(&self, serializer: PyArrayToBytesCodec) -> Self { + self.with(|builder| { + builder.array_to_bytes_codec(serializer.into_inner()); + }) + } + + /// Set the shape of the array to be built. + fn shape(&self, shape: PyArrayShape) -> Self { + self.with(|builder| { + builder.shape(shape); + }) + } + + fn subchunk_shape(&self, subchunk_shape: Option) -> Self { + self.with(|builder| { + builder.subchunk_shape(subchunk_shape); + }) + } + + // TODO: + // fn storage_transformers } From b02a018bd417ba4dbeb15b4d016b1345b5892b40 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 13:54:11 -0400 Subject: [PATCH 08/27] fix compile --- src/array/async.rs | 6 ++++++ src/array/create.rs | 2 +- src/array/util.rs | 21 +++++++++++++++++++++ src/codec/array_to_bytes/mod.rs | 12 +++++++++++- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/array/async.rs b/src/array/async.rs index ec17d44..bd0220e 100644 --- a/src/array/async.rs +++ b/src/array/async.rs @@ -135,3 +135,9 @@ impl From> for PyAsyncArra Self::new(Arc::new(inner)) } } + +impl From>> for PyAsyncArray { + fn from(inner: Arc>) -> Self { + Self::new(inner) + } +} diff --git a/src/array/create.rs b/src/array/create.rs index 6a425da..53bbdf4 100644 --- a/src/array/create.rs +++ b/src/array/create.rs @@ -101,7 +101,7 @@ impl PyArrayBuilder { fn subchunk_shape(&self, subchunk_shape: Option) -> Self { self.with(|builder| { - builder.subchunk_shape(subchunk_shape); + builder.subchunk_shape(subchunk_shape.map(|s| s.into())); }) } diff --git a/src/array/util.rs b/src/array/util.rs index a5ab379..f32d540 100644 --- a/src/array/util.rs +++ b/src/array/util.rs @@ -8,3 +8,24 @@ impl AsRef<[u64]> for PyChunkIndices { &self.0 } } + +#[derive(IntoPyObject, FromPyObject, Clone, Debug)] +pub struct PyArrayShape(Vec); + +impl From> for PyArrayShape { + fn from(shape: Vec) -> Self { + Self(shape) + } +} + +impl From for Vec { + fn from(shape: PyArrayShape) -> Self { + shape.0 + } +} + +impl AsRef<[u64]> for PyArrayShape { + fn as_ref(&self) -> &[u64] { + &self.0 + } +} diff --git a/src/codec/array_to_bytes/mod.rs b/src/codec/array_to_bytes/mod.rs index 2b6fd30..4b6128b 100644 --- a/src/codec/array_to_bytes/mod.rs +++ b/src/codec/array_to_bytes/mod.rs @@ -11,13 +11,23 @@ use zarrs::array::{ArrayToBytesCodecTraits, Codec}; use crate::error::ZarristaResult; use crate::metadata::{PyConfiguration, PyMetadataV3}; -#[pyclass(module = "zarrista.codec", frozen, name = "ArrayToBytesCodec")] +#[derive(Debug, Clone)] +#[pyclass( + module = "zarrista.codec", + frozen, + name = "ArrayToBytesCodec", + from_py_object +)] pub struct PyArrayToBytesCodec(Arc); impl PyArrayToBytesCodec { pub fn new(codec: Arc) -> Self { Self(codec) } + + pub fn into_inner(self) -> Arc { + self.0 + } } #[pymethods] From bbf4545a3024d6285faf1c1c7fb88a2329e48293 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 13:59:30 -0400 Subject: [PATCH 09/27] chunk key encoding --- src/array/chunk_key_encoding.rs | 47 +++++++++++++++++++++++++++++++++ src/array/mod.rs | 2 ++ 2 files changed, 49 insertions(+) create mode 100644 src/array/chunk_key_encoding.rs diff --git a/src/array/chunk_key_encoding.rs b/src/array/chunk_key_encoding.rs new file mode 100644 index 0000000..3b58110 --- /dev/null +++ b/src/array/chunk_key_encoding.rs @@ -0,0 +1,47 @@ +use std::borrow::Cow; + +use pyo3::prelude::*; +use zarrs::array::ChunkKeyEncoding; + +use crate::error::ZarristaResult; +use crate::metadata::PyMetadataV3; + +#[derive(Debug, Clone)] +#[pyclass(module = "zarrista", frozen, name = "ChunkKeyEncoding", from_py_object)] +pub struct PyChunkKeyEncoding(ChunkKeyEncoding); + +impl PyChunkKeyEncoding { + pub fn into_inner(self) -> ChunkKeyEncoding { + self.0 + } + + pub fn new(encoding: ChunkKeyEncoding) -> Self { + Self(encoding) + } +} + +#[pymethods] +impl PyChunkKeyEncoding { + fn __repr__(&self) -> String { + format!("ChunkKeyEncoding({:?})", self.0) + } + + #[staticmethod] + fn from_metadata(metadata: PyMetadataV3) -> ZarristaResult { + Ok(Self::new(ChunkKeyEncoding::from_metadata( + metadata.as_ref(), + )?)) + } + + /// The codec's Zarr v3 metadata + #[getter] + fn metadata(&self) -> PyMetadataV3 { + self.0.metadata().into() + } + + /// The codec's Zarr v3 name if it has one. + #[getter] + fn name(&self) -> Option> { + self.0.name_v3() + } +} diff --git a/src/array/mod.rs b/src/array/mod.rs index 5f4835d..07a272d 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,9 +1,11 @@ mod r#async; +mod chunk_key_encoding; mod create; mod selection; mod shared; mod sync; mod util; +pub use chunk_key_encoding::PyChunkKeyEncoding; pub use r#async::PyAsyncArray; pub use sync::PyArray; From b401e15a3fdae2d1fb81a2fc2a798146488f1f33 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:05:44 -0400 Subject: [PATCH 10/27] chunk_key_encoding --- src/array/chunk_key_encoding.rs | 32 +++++++++++++++++++++++++++++++- src/array/create.rs | 8 +++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/array/chunk_key_encoding.rs b/src/array/chunk_key_encoding.rs index 3b58110..ff4f04b 100644 --- a/src/array/chunk_key_encoding.rs +++ b/src/array/chunk_key_encoding.rs @@ -1,7 +1,11 @@ use std::borrow::Cow; +use std::sync::Arc; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use zarrs::array::ChunkKeyEncoding; +use pyo3::pybacked::PyBackedStr; +use zarrs::array::chunk_key_encoding::DefaultChunkKeyEncoding; +use zarrs::array::{ChunkKeyEncoding, ChunkKeySeparator}; use crate::error::ZarristaResult; use crate::metadata::PyMetadataV3; @@ -26,6 +30,13 @@ impl PyChunkKeyEncoding { format!("ChunkKeyEncoding({:?})", self.0) } + // TODO: not sure whether we want constructors as classmethods or as free functions. + #[staticmethod] + fn default(sep: PyChunkKeySeparator) -> Self { + let encoding = DefaultChunkKeyEncoding::new(sep.0); + Self(Arc::new(encoding).into()) + } + #[staticmethod] fn from_metadata(metadata: PyMetadataV3) -> ZarristaResult { Ok(Self::new(ChunkKeyEncoding::from_metadata( @@ -45,3 +56,22 @@ impl PyChunkKeyEncoding { self.0.name_v3() } } + +#[derive(Debug, Clone)] +pub struct PyChunkKeySeparator(ChunkKeySeparator); + +impl FromPyObject<'_, '_> for PyChunkKeySeparator { + type Error = PyErr; + + fn extract(obj: Borrowed<'_, '_, PyAny>) -> Result { + let s = obj.extract::()?; + match s.to_ascii_lowercase().as_str() { + "." => Ok(Self(ChunkKeySeparator::Dot)), + "/" => Ok(Self(ChunkKeySeparator::Slash)), + _ => Err(PyValueError::new_err(format!( + "Invalid chunk key separator: {}", + s + ))), + } + } +} diff --git a/src/array/create.rs b/src/array/create.rs index 53bbdf4..dbf7118 100644 --- a/src/array/create.rs +++ b/src/array/create.rs @@ -3,7 +3,7 @@ use pyo3::prelude::*; use zarrs::array::ArrayBuilder; use crate::array::util::PyArrayShape; -use crate::array::{PyArray, PyAsyncArray}; +use crate::array::{PyArray, PyAsyncArray, PyChunkKeyEncoding}; use crate::codec::{PyArrayToArrayCodec, PyArrayToBytesCodec, PyBytesToBytesCodec}; use crate::dtype::PyDataType; use crate::error::ZarristaResult; @@ -45,6 +45,12 @@ impl PyArrayBuilder { })) } + fn chunk_key_encoding(&self, chunk_key_encoding: PyChunkKeyEncoding) -> Self { + self.with(|builder| { + builder.chunk_key_encoding(chunk_key_encoding.into_inner()); + }) + } + // TODO: // fn codec_options From 8cefc8c72a88c7b3905f596702e56c2ab07925a1 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:11:58 -0400 Subject: [PATCH 11/27] update claude.md with design philosophy --- CLAUDE.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index c5cb60b..e1a043d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -3,6 +3,23 @@ A small, prototypical zarrita-like Python Zarr implementation on top of `zarrs`, exposed to Python via `pyo3`. +## Design philosophy + +- **Type-driven design, "parse, don't validate."** Encode invariants in types so + that illegal states are unrepresentable, rather than accepting loose inputs and + checking them afterward. +- **The `FromPyObject` extractor is the validator.** Parse each input at the pyo3 + boundary into its final, already-valid typed form (use `#[derive(FromPyObject)]` + enums / unions for inputs that can take several shapes). The rest of the code + then handles only well-formed values, and the parsing logic lives once on the + type and is reused by every entry point. +- **No manual validation in function bodies.** Prefer a richly-typed single + argument over several nullable, mutually-dependent keywords (which force + cross-field checks). Example: sharding is an array→bytes codec, so it occupies + the single `serializer` slot — there is no separate `shards` keyword to + cross-check against it. Avoid `Option`-everything-then-validate; reserve + `Option` for genuinely optional settings with meaningful defaults. + ## Rust / pyo3 conventions - **Prefix every `#[pyclass]` type with `Py`, and set the macro `name` to the From 58d186adaa1b786f130a1f30565aa74fe1e65be5 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:29:49 -0400 Subject: [PATCH 12/27] start defining chunk grids --- src/{chunks.rs => array/chunk_grid.rs} | 24 ++++++++++++++++++ src/array/create.rs | 12 ++++++++- src/array/mod.rs | 3 +++ src/array/util.rs | 35 ++++++++++++++++++++++++++ src/lib.rs | 4 +-- 5 files changed, 74 insertions(+), 4 deletions(-) rename src/{chunks.rs => array/chunk_grid.rs} (62%) diff --git a/src/chunks.rs b/src/array/chunk_grid.rs similarity index 62% rename from src/chunks.rs rename to src/array/chunk_grid.rs index b4359af..caa1ea7 100644 --- a/src/chunks.rs +++ b/src/array/chunk_grid.rs @@ -1,6 +1,10 @@ +use std::sync::Arc; + use pyo3::prelude::*; +use zarrs::array::chunk_grid::{RectilinearChunkGrid, RegularBoundedChunkGrid}; use zarrs::array::ChunkGrid; +use crate::array::{PyArrayShape, PyChunkShape}; use crate::error::ZarristaResult; use crate::metadata::PyMetadataV3; @@ -8,8 +12,28 @@ use crate::metadata::PyMetadataV3; #[pyclass(module = "zarrista", frozen, name = "ChunkGrid", from_py_object)] pub struct PyChunkGrid(ChunkGrid); +impl PyChunkGrid { + pub fn new(chunk_grid: ChunkGrid) -> Self { + Self(chunk_grid) + } + + pub fn into_inner(self) -> ChunkGrid { + self.0 + } +} + #[pymethods] impl PyChunkGrid { + #[staticmethod] + fn regular_bounded( + array_shape: PyArrayShape, + chunk_shape: PyChunkShape, + ) -> ZarristaResult { + let chunk_grid = + RegularBoundedChunkGrid::new(array_shape.into_inner(), chunk_shape.into_inner())?; + Ok(Self(Arc::new(chunk_grid).into())) + } + #[staticmethod] fn from_metadata(metadata: PyMetadataV3, shape: Vec) -> ZarristaResult { Ok(Self(ChunkGrid::from_metadata(metadata.as_ref(), &shape)?)) diff --git a/src/array/create.rs b/src/array/create.rs index dbf7118..cb552c0 100644 --- a/src/array/create.rs +++ b/src/array/create.rs @@ -3,10 +3,11 @@ use pyo3::prelude::*; use zarrs::array::ArrayBuilder; use crate::array::util::PyArrayShape; -use crate::array::{PyArray, PyAsyncArray, PyChunkKeyEncoding}; +use crate::array::{PyArray, PyAsyncArray, PyChunkGrid, PyChunkKeyEncoding}; use crate::codec::{PyArrayToArrayCodec, PyArrayToBytesCodec, PyBytesToBytesCodec}; use crate::dtype::PyDataType; use crate::error::ZarristaResult; +use crate::fill_value::PyFillValue; use crate::metadata::PyArrayMetadataV3; use crate::storage::{PyAsyncStorage, PySyncStorage}; @@ -23,6 +24,15 @@ impl PyArrayBuilder { #[pymethods] impl PyArrayBuilder { + #[new] + fn py_new(chunk_grid: PyChunkGrid, dtype: PyDataType, fill_value: PyFillValue) -> Self { + Self(ArrayBuilder::new_with_chunk_grid( + chunk_grid.into_inner(), + dtype.into_inner(), + fill_value.into_inner(), + )) + } + #[staticmethod] fn like<'py>(array: Bound<'py, PyAny>) -> ZarristaResult { if let Ok(array) = array.cast::() { diff --git a/src/array/mod.rs b/src/array/mod.rs index 07a272d..f2927e4 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,4 +1,5 @@ mod r#async; +mod chunk_grid; mod chunk_key_encoding; mod create; mod selection; @@ -6,6 +7,8 @@ mod shared; mod sync; mod util; +pub use chunk_grid::PyChunkGrid; pub use chunk_key_encoding::PyChunkKeyEncoding; pub use r#async::PyAsyncArray; pub use sync::PyArray; +pub use util::{PyArrayShape, PyChunkIndices, PyChunkShape}; diff --git a/src/array/util.rs b/src/array/util.rs index f32d540..c4ee8c9 100644 --- a/src/array/util.rs +++ b/src/array/util.rs @@ -1,3 +1,5 @@ +use std::num::NonZeroU64; + use pyo3::prelude::*; #[derive(IntoPyObject, FromPyObject, Clone, Debug)] @@ -12,6 +14,12 @@ impl AsRef<[u64]> for PyChunkIndices { #[derive(IntoPyObject, FromPyObject, Clone, Debug)] pub struct PyArrayShape(Vec); +impl PyArrayShape { + pub fn into_inner(self) -> Vec { + self.0 + } +} + impl From> for PyArrayShape { fn from(shape: Vec) -> Self { Self(shape) @@ -29,3 +37,30 @@ impl AsRef<[u64]> for PyArrayShape { &self.0 } } + +#[derive(IntoPyObject, FromPyObject, Clone, Debug)] +pub struct PyChunkShape(Vec); + +impl PyChunkShape { + pub fn into_inner(self) -> Vec { + self.0 + } +} + +impl From> for PyChunkShape { + fn from(shape: Vec) -> Self { + Self(shape) + } +} + +impl From for Vec { + fn from(shape: PyChunkShape) -> Self { + shape.0 + } +} + +impl AsRef<[NonZeroU64]> for PyChunkShape { + fn as_ref(&self) -> &[NonZeroU64] { + &self.0 + } +} diff --git a/src/lib.rs b/src/lib.rs index 42481c0..b8a99e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,6 @@ mod array; mod array_bytes; -mod chunks; mod codec; mod decoded_array; mod dtype; @@ -16,9 +15,8 @@ mod storage; use pyo3::prelude::*; -use crate::array::{PyArray, PyAsyncArray}; +use crate::array::{PyArray, PyAsyncArray, PyChunkGrid}; use crate::array_bytes::PyArrayBytes; -use crate::chunks::PyChunkGrid; use crate::codec::register_codec_module; use crate::decoded_array::{PyMaskedTensor, PyMaskedVariableArray, PyTensor, PyVariableArray}; use crate::dtype::PyDataType; From e99b0715bc4b8c20a853acfdd3a5adb2f9e6eeda Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:39:46 -0400 Subject: [PATCH 13/27] more chunk grid options --- src/array/chunk_grid.rs | 54 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/array/chunk_grid.rs b/src/array/chunk_grid.rs index caa1ea7..45045f6 100644 --- a/src/array/chunk_grid.rs +++ b/src/array/chunk_grid.rs @@ -1,7 +1,11 @@ use std::sync::Arc; use pyo3::prelude::*; -use zarrs::array::chunk_grid::{RectilinearChunkGrid, RegularBoundedChunkGrid}; +use pyo3::types::PyInt; +use zarrs::array::chunk_grid::{ + ChunkEdgeLengths, RectilinearChunkGrid, RegularBoundedChunkGrid, RegularChunkGrid, + RunLengthElement, +}; use zarrs::array::ChunkGrid; use crate::array::{PyArrayShape, PyChunkShape}; @@ -24,6 +28,23 @@ impl PyChunkGrid { #[pymethods] impl PyChunkGrid { + #[staticmethod] + fn rectilinear( + array_shape: PyArrayShape, + chunk_shapes: Vec, + ) -> ZarristaResult { + let chunk_shapes = chunk_shapes.into_iter().map(|c| c.0).collect::>(); + let chunk_grid = RectilinearChunkGrid::new(array_shape.into_inner(), &chunk_shapes)?; + Ok(Self(Arc::new(chunk_grid).into())) + } + + #[staticmethod] + fn regular(array_shape: PyArrayShape, chunk_shape: PyChunkShape) -> ZarristaResult { + let chunk_grid = RegularChunkGrid::new(array_shape.into_inner(), chunk_shape.into_inner())?; + Ok(Self(Arc::new(chunk_grid).into())) + } + + /// This chunk grid is experimental and may be incompatible with other Zarr V3 implementations. #[staticmethod] fn regular_bounded( array_shape: PyArrayShape, @@ -71,3 +92,34 @@ impl From for ChunkGrid { py_chunk_grid.0 } } + +pub struct PyChunkEdgeLengths(ChunkEdgeLengths); + +impl FromPyObject<'_, '_> for PyChunkEdgeLengths { + type Error = PyErr; + + fn extract(obj: Borrowed<'_, '_, PyAny>) -> Result { + if obj.is_instance_of::() { + Ok(Self(ChunkEdgeLengths::Scalar(obj.extract()?))) + } else { + let elements = obj.extract::>()?; + Ok(Self(ChunkEdgeLengths::Varying( + elements.into_iter().map(|e| e.0).collect(), + ))) + } + } +} + +pub struct PyRunLengthElement(RunLengthElement); + +impl FromPyObject<'_, '_> for PyRunLengthElement { + type Error = PyErr; + + fn extract(obj: Borrowed<'_, '_, PyAny>) -> Result { + if obj.is_instance_of::() { + Ok(Self(RunLengthElement::Single(obj.extract()?))) + } else { + Ok(Self(RunLengthElement::Repeated(obj.extract()?))) + } + } +} From 3387bb192a7e990a575758e7daab770dff15aca5 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:41:36 -0400 Subject: [PATCH 14/27] remove builder out of array/sync --- src/array/sync.rs | 56 ++--------------------------------------------- 1 file changed, 2 insertions(+), 54 deletions(-) diff --git a/src/array/sync.rs b/src/array/sync.rs index a79588d..fc1c9be 100644 --- a/src/array/sync.rs +++ b/src/array/sync.rs @@ -5,17 +5,14 @@ use std::sync::Arc; use crate::array::selection::PySelection; use crate::array::shared::array_metadata_accessors; use crate::array::util::PyChunkIndices; -use crate::chunks::PyChunkGrid; -use crate::codec::{PyArrayToArrayCodec, PyBytesToBytesCodec, PyCodecOptions}; +use crate::codec::PyCodecOptions; use crate::decoded_array::DecodedArray; -use crate::dtype::PyDataType; use crate::error::ZarristaResult; -use crate::fill_value::PyFillValue; use crate::node::PyNodePath; use crate::storage::PySyncStorage; use pyo3::prelude::*; use pyo3_bytes::PyBytes; -use zarrs::array::{Array, ArrayBuilder}; +use zarrs::array::Array; use zarrs::storage::ReadableWritableListableStorageTraits; /// A Zarr array. @@ -52,55 +49,6 @@ impl PyArray { ) } - /// Create a new array - #[staticmethod] - #[pyo3( - signature = (store, dtype, chunk_grid, fill_value, *, path="/", subchunk_shape=None, array_to_array_codecs=None, bytes_to_bytes_codecs=None), - text_signature = "(store, dtype, chunk_grid, fill_value, *, path='/', subchunk_shape=None, array_to_array_codecs=None, bytes_to_bytes_codecs=None)" - )] - #[expect(clippy::too_many_arguments)] - fn create( - store: PySyncStorage, - dtype: PyDataType, - chunk_grid: PyChunkGrid, - fill_value: PyFillValue, - path: &str, - subchunk_shape: Option>, - array_to_array_codecs: Option>, - bytes_to_bytes_codecs: Option>, - ) -> ZarristaResult { - let store = store.into_inner(); - let mut builder = ArrayBuilder::new_with_chunk_grid( - chunk_grid, - dtype.into_inner(), - fill_value.into_inner(), - ); - - if let Some(subchunk_shape) = subchunk_shape { - builder.subchunk_shape(subchunk_shape); - } - if let Some(array_to_array_codecs) = array_to_array_codecs { - builder.array_to_array_codecs( - array_to_array_codecs - .into_iter() - .map(|c| c.into_inner()) - .collect(), - ); - } - if let Some(bytes_to_bytes_codecs) = bytes_to_bytes_codecs { - builder.bytes_to_bytes_codecs( - bytes_to_bytes_codecs - .into_iter() - .map(|c| c.into_inner()) - .collect(), - ); - } - - Ok(Self { - inner: builder.build(store, path)?, - }) - } - /// Open the array stored at `path` in `store`. #[staticmethod] #[pyo3( From 042c2edb8699426317903478dc6a7725b42ea0af Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:43:09 -0400 Subject: [PATCH 15/27] define more exceptions --- src/error.rs | 21 ++++++++++++++++++++- src/exceptions.rs | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/error.rs b/src/error.rs index 67b05a0..c721882 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,8 +9,9 @@ use pyo3::prelude::*; use pythonize::PythonizeError; use thiserror::Error; +use zarrs::array::chunk_grid::{RectilinearChunkGridCreateError, RegularChunkGridCreateError}; use zarrs::array::codec::TransposeOrderError; -use zarrs::array::{ArrayCreateError, ArrayError, CodecError}; +use zarrs::array::{ArrayCreateError, ArrayError, CodecError, IncompatibleDimensionalityError}; use zarrs::filesystem::FilesystemStoreCreateError; use zarrs::group::GroupCreateError; use zarrs::node::{NodeCreateError, NodePathError}; @@ -67,6 +68,15 @@ pub enum ZarristaError { /// Failed to create a codec (or other plugin) from its configuration. #[error(transparent)] PluginCreate(#[from] PluginCreateError), + /// Failed to create a regular chunk grid. + #[error(transparent)] + RegularChunkGridCreate(#[from] RegularChunkGridCreateError), + /// Failed to create a rectilinear chunk grid. + #[error(transparent)] + RectilinearChunkGridCreate(#[from] RectilinearChunkGridCreateError), + /// A shape's dimensionality is incompatible with another. + #[error(transparent)] + IncompatibleDimensionality(#[from] IncompatibleDimensionalityError), } impl From for PyErr { @@ -89,6 +99,15 @@ impl From for PyErr { exc::TransposeOrderError::new_err(err.to_string()) } ZarristaError::PluginCreate(err) => exc::PluginCreateError::new_err(err.to_string()), + ZarristaError::RegularChunkGridCreate(err) => { + exc::ChunkGridCreateError::new_err(err.to_string()) + } + ZarristaError::RectilinearChunkGridCreate(err) => { + exc::ChunkGridCreateError::new_err(err.to_string()) + } + ZarristaError::IncompatibleDimensionality(err) => { + exc::IncompatibleDimensionalityError::new_err(err.to_string()) + } } } } diff --git a/src/exceptions.rs b/src/exceptions.rs index 379a54b..0a7b04f 100644 --- a/src/exceptions.rs +++ b/src/exceptions.rs @@ -78,6 +78,18 @@ create_exception!( ZarristaError, "Raised when (de)serializing JSON or converting to/from Python objects fails." ); +create_exception!( + zarrista.exceptions, + ChunkGridCreateError, + ZarristaError, + "Raised when a chunk grid cannot be created from the given shapes." +); +create_exception!( + zarrista.exceptions, + IncompatibleDimensionalityError, + ZarristaError, + "Raised when a shape's dimensionality is incompatible with another." +); /// Build the `zarrista.exceptions` submodule and attach it to `parent`. /// @@ -100,6 +112,14 @@ pub fn register_exceptions_module(parent: &Bound<'_, PyModule>) -> PyResult<()> exceptions.add("TransposeOrderError", py.get_type::())?; exceptions.add("PluginCreateError", py.get_type::())?; exceptions.add("SerializationError", py.get_type::())?; + exceptions.add( + "ChunkGridCreateError", + py.get_type::(), + )?; + exceptions.add( + "IncompatibleDimensionalityError", + py.get_type::(), + )?; py.import("sys")? .getattr("modules")? From 9fe4cb67bdd5758519a6957361705f23d414a496 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:43:30 -0400 Subject: [PATCH 16/27] fix import --- src/array/shared.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/shared.rs b/src/array/shared.rs index 023d8bb..0e90f25 100644 --- a/src/array/shared.rs +++ b/src/array/shared.rs @@ -17,7 +17,7 @@ macro_rules! array_metadata_accessors { } #[getter] - fn chunk_grid(&self) -> $crate::chunks::PyChunkGrid { + fn chunk_grid(&self) -> $crate::array::PyChunkGrid { self.inner.chunk_grid().clone().into() } From 6a05517be9b74535e8cb57adfb68d307585ecd07 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:44:07 -0400 Subject: [PATCH 17/27] fix lint --- src/array/async.rs | 2 +- src/array/sync.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/array/async.rs b/src/array/async.rs index bd0220e..c345dcb 100644 --- a/src/array/async.rs +++ b/src/array/async.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use crate::array::selection::PySelection; use crate::array::shared::array_metadata_accessors; -use crate::array::util::PyChunkIndices; +use crate::array::PyChunkIndices; use crate::codec::PyCodecOptions; use crate::decoded_array::DecodedArray; use crate::error::ZarristaError; diff --git a/src/array/sync.rs b/src/array/sync.rs index fc1c9be..c0fe7e8 100644 --- a/src/array/sync.rs +++ b/src/array/sync.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use crate::array::selection::PySelection; use crate::array::shared::array_metadata_accessors; -use crate::array::util::PyChunkIndices; +use crate::array::PyChunkIndices; use crate::codec::PyCodecOptions; use crate::decoded_array::DecodedArray; use crate::error::ZarristaResult; From 2cd775c8e9c6c8d9f39410aeab552384ddb38628 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:46:55 -0400 Subject: [PATCH 18/27] rename create.rs to builder.rs --- src/array/{create.rs => builder.rs} | 0 src/array/mod.rs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/array/{create.rs => builder.rs} (100%) diff --git a/src/array/create.rs b/src/array/builder.rs similarity index 100% rename from src/array/create.rs rename to src/array/builder.rs diff --git a/src/array/mod.rs b/src/array/mod.rs index f2927e4..e4b00a3 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -1,7 +1,7 @@ mod r#async; +mod builder; mod chunk_grid; mod chunk_key_encoding; -mod create; mod selection; mod shared; mod sync; From 1cda13771db3c61b7914a1972f52d0b43d68cc21 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:50:50 -0400 Subject: [PATCH 19/27] fix pin of zarrs_storage --- Cargo.lock | 28 ++++++---------------------- Cargo.toml | 1 + 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 074fe59..6952ab1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5314,7 +5314,7 @@ dependencies = [ "zarrs_metadata", "zarrs_metadata_ext", "zarrs_plugin", - "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", + "zarrs_storage", "zstd", ] @@ -5342,7 +5342,7 @@ dependencies = [ "inventory", "zarrs_metadata", "zarrs_plugin", - "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", + "zarrs_storage", ] [[package]] @@ -5363,7 +5363,7 @@ dependencies = [ "zarrs_data_type", "zarrs_metadata", "zarrs_plugin", - "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", + "zarrs_storage", ] [[package]] @@ -5397,7 +5397,7 @@ dependencies = [ "positioned-io", "thiserror 2.0.18", "walkdir", - "zarrs_storage 0.4.3 (git+https://github.com/kylebarron/zarrs?rev=e68838becd223ddb7f522b265c1b65b90fe577b1)", + "zarrs_storage", ] [[package]] @@ -5410,7 +5410,7 @@ dependencies = [ "futures", "icechunk", "tokio", - "zarrs_storage 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "zarrs_storage", ] [[package]] @@ -5450,7 +5450,7 @@ dependencies = [ "async-trait", "futures", "object_store", - "zarrs_storage 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "zarrs_storage", ] [[package]] @@ -5464,22 +5464,6 @@ dependencies = [ "thiserror 2.0.18", ] -[[package]] -name = "zarrs_storage" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d098796d2ed4cf94896569615101e0432e870a7665396da5cc32300fb68f7c1" -dependencies = [ - "async-trait", - "auto_impl", - "bytes", - "derive_more", - "futures", - "itertools", - "thiserror 2.0.18", - "unsafe_cell_slice", -] - [[package]] name = "zarrs_storage" version = "0.4.3" diff --git a/Cargo.toml b/Cargo.toml index 4f3a949..f801fcc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,3 +64,4 @@ codegen-units = 1 [patch.crates-io] # Branch kyle/v0.23-derive-clone zarrs = { git = "https://github.com/kylebarron/zarrs", rev = "e68838becd223ddb7f522b265c1b65b90fe577b1" } +zarrs_storage = { git = "https://github.com/kylebarron/zarrs", rev = "e68838becd223ddb7f522b265c1b65b90fe577b1" } From acb547fcd65380cd048560baaaec99b244c8cfad Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:51:43 -0400 Subject: [PATCH 20/27] define chunk grid on builder --- src/array/builder.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/array/builder.rs b/src/array/builder.rs index cb552c0..e14f02f 100644 --- a/src/array/builder.rs +++ b/src/array/builder.rs @@ -55,6 +55,12 @@ impl PyArrayBuilder { })) } + fn chunk_grid(&self, chunk_grid: PyChunkGrid) -> Self { + self.with(|builder| { + builder.chunk_grid(chunk_grid.into_inner()); + }) + } + fn chunk_key_encoding(&self, chunk_key_encoding: PyChunkKeyEncoding) -> Self { self.with(|builder| { builder.chunk_key_encoding(chunk_key_encoding.into_inner()); From 664883cdddbdaa6b77250282f371ed42e6c53ce3 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 14:56:54 -0400 Subject: [PATCH 21/27] cleaner use of pythonize --- src/array/builder.rs | 11 +++++++---- src/array/shared.rs | 7 ++----- src/group/shared.rs | 7 ++----- src/metadata.rs | 2 ++ 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/array/builder.rs b/src/array/builder.rs index e14f02f..1015722 100644 --- a/src/array/builder.rs +++ b/src/array/builder.rs @@ -8,7 +8,7 @@ use crate::codec::{PyArrayToArrayCodec, PyArrayToBytesCodec, PyBytesToBytesCodec use crate::dtype::PyDataType; use crate::error::ZarristaResult; use crate::fill_value::PyFillValue; -use crate::metadata::PyArrayMetadataV3; +use crate::metadata::{PyArrayMetadataV3, PyAttributes}; use crate::storage::{PyAsyncStorage, PySyncStorage}; #[pyclass(module = "zarrista.array", frozen, name = "Config")] @@ -48,10 +48,9 @@ impl PyArrayBuilder { } } - fn attrs(&self, attrs: Bound<'_, PyAny>) -> PyResult { - let attributes = pythonize::depythonize(&attrs)?; + fn attrs(&self, attrs: PyAttributes) -> PyResult { Ok(self.with(|builder| { - builder.attributes(attributes); + builder.attributes(attrs.into_inner()); })) } @@ -78,10 +77,14 @@ impl PyArrayBuilder { } fn create(&self, store: PySyncStorage, path: &str) -> ZarristaResult { + // TODO: should this additionally store the metadata? Or make the user call store_metadata + // on the result themselves? Ok(self.0.build_arc(store.into_inner(), path)?.into()) } fn create_async(&self, store: PyAsyncStorage, path: &str) -> ZarristaResult { + // TODO: should this additionally store the metadata? Or make the user call store_metadata + // on the result themselves? Ok(self.0.build_arc(store.into_inner(), path)?.into()) } diff --git a/src/array/shared.rs b/src/array/shared.rs index 0e90f25..9980338 100644 --- a/src/array/shared.rs +++ b/src/array/shared.rs @@ -9,11 +9,8 @@ macro_rules! array_metadata_accessors { impl $ty { /// The array's user attributes as a dict. #[getter] - fn attrs<'py>( - &self, - py: ::pyo3::Python<'py>, - ) -> ::pythonize::Result<::pyo3::Bound<'py, ::pyo3::PyAny>> { - ::pythonize::pythonize(py, self.inner.attributes()) + fn attrs(&self) -> $crate::metadata::PyAttributes { + self.inner.attributes().clone().into() } #[getter] diff --git a/src/group/shared.rs b/src/group/shared.rs index cdc594d..8ad51ce 100644 --- a/src/group/shared.rs +++ b/src/group/shared.rs @@ -9,11 +9,8 @@ macro_rules! group_metadata_accessors { impl $ty { /// The group's user attributes as a dict. #[getter] - fn attrs<'py>( - &self, - py: ::pyo3::Python<'py>, - ) -> ::pythonize::Result<::pyo3::Bound<'py, ::pyo3::PyAny>> { - ::pythonize::pythonize(py, self.inner.attributes()) + fn attrs(&self) -> $crate::metadata::PyAttributes { + self.inner.attributes().clone().into() } /// The group's metadata, always exported as Zarr V3. diff --git a/src/metadata.rs b/src/metadata.rs index 1534418..d4cb9ff 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -1,5 +1,6 @@ use pyo3::prelude::*; use pythonize::{depythonize, pythonize, PythonizeError}; +use serde_json::{Map, Value}; use zarrs::metadata::v2::{ArrayMetadataV2, GroupMetadataV2, MetadataV2}; use zarrs::metadata::v3::{ArrayMetadataV3, GroupMetadataV3, MetadataV3}; use zarrs::metadata::{ArrayMetadata, Configuration, GroupMetadata}; @@ -66,3 +67,4 @@ pythonized_metadata!(PyGroupMetadataV2, GroupMetadataV2); pythonized_metadata!(PyGroupMetadataV3, GroupMetadataV3); pythonized_metadata!(PyConsolidatedMetadata, ConsolidatedMetadata); pythonized_metadata!(PyConfiguration, Configuration); +pythonized_metadata!(PyAttributes, Map); From 40e0bac686ccfcf146b64b11e5a86a8c6305a0e4 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 15:01:45 -0400 Subject: [PATCH 22/27] fix python name --- src/array/builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/builder.rs b/src/array/builder.rs index 1015722..467877c 100644 --- a/src/array/builder.rs +++ b/src/array/builder.rs @@ -11,7 +11,7 @@ use crate::fill_value::PyFillValue; use crate::metadata::{PyArrayMetadataV3, PyAttributes}; use crate::storage::{PyAsyncStorage, PySyncStorage}; -#[pyclass(module = "zarrista.array", frozen, name = "Config")] +#[pyclass(module = "zarrista.array", frozen, name = "ArrayBuilder")] pub struct PyArrayBuilder(ArrayBuilder); impl PyArrayBuilder { From 3701a223845bf52392e4aaa890684809008afb5f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 15:08:38 -0400 Subject: [PATCH 23/27] expose classes --- src/array/mod.rs | 1 + src/lib.rs | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/array/mod.rs b/src/array/mod.rs index e4b00a3..1b7c1dd 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -7,6 +7,7 @@ mod shared; mod sync; mod util; +pub use builder::PyArrayBuilder; pub use chunk_grid::PyChunkGrid; pub use chunk_key_encoding::PyChunkKeyEncoding; pub use r#async::PyAsyncArray; diff --git a/src/lib.rs b/src/lib.rs index b8a99e9..575c76b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ mod storage; use pyo3::prelude::*; -use crate::array::{PyArray, PyAsyncArray, PyChunkGrid}; +use crate::array::{PyArray, PyArrayBuilder, PyAsyncArray, PyChunkGrid, PyChunkKeyEncoding}; use crate::array_bytes::PyArrayBytes; use crate::codec::register_codec_module; use crate::decoded_array::{PyMaskedTensor, PyMaskedVariableArray, PyTensor, PyVariableArray}; @@ -31,10 +31,12 @@ fn _zarrista(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", env!("CARGO_PKG_VERSION"))?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; From 0f4ac9e7c9c571ce14c7b53c5503de71c0c086eb Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 15:10:33 -0400 Subject: [PATCH 24/27] update type hints --- python/zarrista/__init__.py | 4 ++ python/zarrista/_builder.pyi | 85 +++++++++++++++++++++++++ python/zarrista/_chunk_key_encoding.pyi | 19 ++++++ python/zarrista/_chunks.pyi | 40 ++++++++++++ python/zarrista/_zarrista.pyi | 4 ++ python/zarrista/exceptions.pyi | 8 +++ 6 files changed, 160 insertions(+) create mode 100644 python/zarrista/_builder.pyi create mode 100644 python/zarrista/_chunk_key_encoding.pyi diff --git a/python/zarrista/__init__.py b/python/zarrista/__init__.py index 00d0d8c..8ee8465 100644 --- a/python/zarrista/__init__.py +++ b/python/zarrista/__init__.py @@ -5,10 +5,12 @@ from . import codec, exceptions from ._zarrista import ( Array, + ArrayBuilder, ArrayBytes, AsyncArray, AsyncGroup, ChunkGrid, + ChunkKeyEncoding, DataType, FilesystemStore, FillValue, @@ -32,10 +34,12 @@ __all__ = [ "Array", + "ArrayBuilder", "ArrayBytes", "AsyncArray", "AsyncGroup", "ChunkGrid", + "ChunkKeyEncoding", "DataType", "DecodedArray", "FilesystemStore", diff --git a/python/zarrista/_builder.pyi b/python/zarrista/_builder.pyi new file mode 100644 index 0000000..2fe981a --- /dev/null +++ b/python/zarrista/_builder.pyi @@ -0,0 +1,85 @@ +from collections.abc import Mapping, Sequence + +from zarr_metadata import ArrayMetadataV3, JSONValue + +from zarrista.codec import ( + ArrayToArrayCodec, + ArrayToBytesCodec, + BytesToBytesCodec, +) + +from ._array import Array, AsyncArray +from ._chunk_key_encoding import ChunkKeyEncoding +from ._chunks import ChunkGrid +from ._dtype import DataType +from ._fill_value import FillValue +from ._store import AsyncStore, FilesystemStore, MemoryStore + +class ArrayBuilder: + """A chained, immutable builder for creating Zarr arrays. + + Every setter returns a *new* `ArrayBuilder` and leaves the receiver + unchanged, so a builder can be safely shared and specialized. Seed one with + the constructor or [`ArrayBuilder.like`][zarrista.ArrayBuilder.like], chain + setters to configure it, then materialize the array with + [`create`][zarrista.ArrayBuilder.create] / + [`create_async`][zarrista.ArrayBuilder.create_async], or produce only its + metadata with + [`create_metadata`][zarrista.ArrayBuilder.create_metadata]. + """ + + def __init__( + self, + chunk_grid: ChunkGrid, + dtype: DataType, + fill_value: FillValue, + ) -> None: + """Create a builder from a chunk grid, data type, and fill value.""" + @staticmethod + def like(array: Array | AsyncArray) -> ArrayBuilder: + """Create a builder copying the configuration of an existing array.""" + def attrs(self, attrs: Mapping[str, JSONValue]) -> ArrayBuilder: + """Return a new builder with the given user attributes set.""" + def chunk_grid(self, chunk_grid: ChunkGrid) -> ArrayBuilder: + """Return a new builder with the chunk grid set. + + This may also change the array shape, since the grid carries one. + """ + def chunk_key_encoding( + self, + chunk_key_encoding: ChunkKeyEncoding, + ) -> ArrayBuilder: + """Return a new builder with the chunk key encoding set.""" + def compressors( + self, + compressors: Sequence[BytesToBytesCodec], + ) -> ArrayBuilder: + """Return a new builder with the bytes-to-bytes codecs ("compressors") set.""" + def data_type(self, data_type: DataType) -> ArrayBuilder: + """Return a new builder with the data type set.""" + def dimension_names( + self, + dimension_names: Sequence[str | None] | None, + ) -> ArrayBuilder: + """Return a new builder with the dimension names set (or cleared).""" + def filters(self, filters: Sequence[ArrayToArrayCodec]) -> ArrayBuilder: + """Return a new builder with the array-to-array codecs ("filters") set.""" + def serializer(self, serializer: ArrayToBytesCodec) -> ArrayBuilder: + """Return a new builder with the array-to-bytes codec ("serializer") set. + + Sharding is itself an array-to-bytes codec, so a sharding serializer is + passed here too. + """ + def shape(self, shape: Sequence[int]) -> ArrayBuilder: + """Return a new builder with the array shape set.""" + def subchunk_shape( + self, + subchunk_shape: Sequence[int] | None, + ) -> ArrayBuilder: + """Return a new builder with the inner (subchunk) shape set, enabling sharding.""" + def create(self, store: FilesystemStore | MemoryStore, path: str) -> Array: + """Build the array in `store` at `path` and return it.""" + def create_async(self, store: AsyncStore, path: str) -> AsyncArray: + """Build the array in an async `store` at `path` and return it.""" + def create_metadata(self) -> ArrayMetadataV3: + """Build the array's Zarr v3 metadata without touching a store.""" diff --git a/python/zarrista/_chunk_key_encoding.pyi b/python/zarrista/_chunk_key_encoding.pyi new file mode 100644 index 0000000..f8f3847 --- /dev/null +++ b/python/zarrista/_chunk_key_encoding.pyi @@ -0,0 +1,19 @@ +from typing import Literal + +from zarr_metadata import NamedConfigV3 + +class ChunkKeyEncoding: + """How an array maps chunk grid indices to store keys.""" + + @staticmethod + def default(sep: Literal[".", "/"]) -> ChunkKeyEncoding: + """The `default` chunk key encoding with the given separator.""" + @staticmethod + def from_metadata(metadata: NamedConfigV3) -> ChunkKeyEncoding: + """Build a chunk key encoding from its Zarr v3 metadata.""" + @property + def metadata(self) -> NamedConfigV3: + """The chunk key encoding's Zarr v3 metadata.""" + @property + def name(self) -> str | None: + """The chunk key encoding's Zarr v3 name (e.g. `"default"`), if any.""" diff --git a/python/zarrista/_chunks.pyi b/python/zarrista/_chunks.pyi index bc96356..236b498 100644 --- a/python/zarrista/_chunks.pyi +++ b/python/zarrista/_chunks.pyi @@ -1,6 +1,46 @@ +from collections.abc import Sequence +from typing import TypeAlias + +from zarr_metadata import NamedConfigV3 + +_RunLength: TypeAlias = int | tuple[int, int] +"""One run of a rectilinear chunk edge: a single chunk size, or a +`(size, count)` pair meaning `count` consecutive chunks of `size`.""" + +_ChunkEdgeLengths: TypeAlias = int | Sequence[_RunLength] +"""Chunk sizes along one dimension: a scalar (regular along that axis) or a +sequence of runs (varying sizes).""" + class ChunkGrid: """The chunk grid of an array: how its shape is partitioned into chunks.""" + @staticmethod + def regular( + array_shape: Sequence[int], chunk_shape: Sequence[int] + ) -> ChunkGrid: + """A regular chunk grid with a fixed `chunk_shape` over `array_shape`.""" + @staticmethod + def rectilinear( + array_shape: Sequence[int], + chunk_shapes: Sequence[_ChunkEdgeLengths], + ) -> ChunkGrid: + """A rectilinear grid with per-dimension (possibly varying) chunk sizes.""" + @staticmethod + def regular_bounded( + array_shape: Sequence[int], chunk_shape: Sequence[int] + ) -> ChunkGrid: + """A regular grid whose final chunks are clipped to the array bounds. + + Experimental and may be incompatible with other Zarr V3 implementations. + """ + @staticmethod + def from_metadata( + metadata: NamedConfigV3, shape: Sequence[int] + ) -> ChunkGrid: + """Build a chunk grid from its Zarr v3 metadata and the array shape.""" + @property + def metadata(self) -> NamedConfigV3: + """The chunk grid's Zarr v3 metadata.""" @property def ndim(self) -> int: """The number of dimensions.""" diff --git a/python/zarrista/_zarrista.pyi b/python/zarrista/_zarrista.pyi index 2e9f480..7925a85 100644 --- a/python/zarrista/_zarrista.pyi +++ b/python/zarrista/_zarrista.pyi @@ -1,5 +1,7 @@ from ._array import Array, AsyncArray from ._array_bytes import ArrayBytes +from ._builder import ArrayBuilder +from ._chunk_key_encoding import ChunkKeyEncoding from ._chunks import ChunkGrid from ._decoded_array import MaskedTensor, MaskedVariableArray, Tensor, VariableArray from ._dtype import DataType @@ -11,10 +13,12 @@ __version__: str __all__ = [ "Array", + "ArrayBuilder", "ArrayBytes", "AsyncArray", "AsyncGroup", "ChunkGrid", + "ChunkKeyEncoding", "DataType", "FilesystemStore", "FillValue", diff --git a/python/zarrista/exceptions.pyi b/python/zarrista/exceptions.pyi index b559ec7..86bb6fe 100644 --- a/python/zarrista/exceptions.pyi +++ b/python/zarrista/exceptions.pyi @@ -34,11 +34,19 @@ class PluginCreateError(ZarristaError): class SerializationError(ZarristaError): """Raised when (de)serializing JSON or converting to/from Python objects fails.""" +class ChunkGridCreateError(ZarristaError): + """Raised when a chunk grid cannot be created from the given shapes.""" + +class IncompatibleDimensionalityError(ZarristaError): + """Raised when a shape's dimensionality is incompatible with another.""" + __all__ = [ "ArrayCreateError", "ArrayError", + "ChunkGridCreateError", "CodecError", "GroupCreateError", + "IncompatibleDimensionalityError", "NodeCreateError", "NodePathError", "PluginCreateError", From d4230b7c3722c15b74cb3ac1a78aae892651c942 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 15:12:02 -0400 Subject: [PATCH 25/27] update excpetions --- python/zarrista/exceptions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/zarrista/exceptions.py b/python/zarrista/exceptions.py index 3450434..5799897 100644 --- a/python/zarrista/exceptions.py +++ b/python/zarrista/exceptions.py @@ -3,8 +3,10 @@ from zarrista._zarrista.exceptions import ( ArrayCreateError, ArrayError, + ChunkGridCreateError, CodecError, GroupCreateError, + IncompatibleDimensionalityError, NodeCreateError, NodePathError, PluginCreateError, @@ -17,8 +19,10 @@ __all__ = [ "ArrayCreateError", "ArrayError", + "ChunkGridCreateError", "CodecError", "GroupCreateError", + "IncompatibleDimensionalityError", "NodeCreateError", "NodePathError", "PluginCreateError", From 8cf48c8b5fcd5d20e059c80834316f900bba8743 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 15:14:11 -0400 Subject: [PATCH 26/27] update tests --- python/zarrista/_builder.pyi | 2 +- python/zarrista/_chunks.pyi | 11 +-- tests/test_builder.py | 153 +++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+), 8 deletions(-) create mode 100644 tests/test_builder.py diff --git a/python/zarrista/_builder.pyi b/python/zarrista/_builder.pyi index 2fe981a..1721d41 100644 --- a/python/zarrista/_builder.pyi +++ b/python/zarrista/_builder.pyi @@ -76,7 +76,7 @@ class ArrayBuilder: self, subchunk_shape: Sequence[int] | None, ) -> ArrayBuilder: - """Return a new builder with the inner (subchunk) shape set, enabling sharding.""" + """Return a new builder with the inner (subchunk) shape, enabling sharding.""" def create(self, store: FilesystemStore | MemoryStore, path: str) -> Array: """Build the array in `store` at `path` and return it.""" def create_async(self, store: AsyncStore, path: str) -> AsyncArray: diff --git a/python/zarrista/_chunks.pyi b/python/zarrista/_chunks.pyi index 236b498..b179e6d 100644 --- a/python/zarrista/_chunks.pyi +++ b/python/zarrista/_chunks.pyi @@ -15,9 +15,7 @@ class ChunkGrid: """The chunk grid of an array: how its shape is partitioned into chunks.""" @staticmethod - def regular( - array_shape: Sequence[int], chunk_shape: Sequence[int] - ) -> ChunkGrid: + def regular(array_shape: Sequence[int], chunk_shape: Sequence[int]) -> ChunkGrid: """A regular chunk grid with a fixed `chunk_shape` over `array_shape`.""" @staticmethod def rectilinear( @@ -27,16 +25,15 @@ class ChunkGrid: """A rectilinear grid with per-dimension (possibly varying) chunk sizes.""" @staticmethod def regular_bounded( - array_shape: Sequence[int], chunk_shape: Sequence[int] + array_shape: Sequence[int], + chunk_shape: Sequence[int], ) -> ChunkGrid: """A regular grid whose final chunks are clipped to the array bounds. Experimental and may be incompatible with other Zarr V3 implementations. """ @staticmethod - def from_metadata( - metadata: NamedConfigV3, shape: Sequence[int] - ) -> ChunkGrid: + def from_metadata(metadata: NamedConfigV3, shape: Sequence[int]) -> ChunkGrid: """Build a chunk grid from its Zarr v3 metadata and the array shape.""" @property def metadata(self) -> NamedConfigV3: diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..42b1233 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,153 @@ +"""Tests for the chained `ArrayBuilder` array-creation API.""" + +import pytest + +from zarrista import ( + ArrayBuilder, + ChunkGrid, + ChunkKeyEncoding, + DataType, + FillValue, + MemoryStore, + codec, +) +from zarrista.codec import ArrayToBytesCodec +from zarrista.exceptions import ChunkGridCreateError, ZarristaError + + +def _builder() -> ArrayBuilder: + """A minimal int8 builder: 8x8 array, 4x4 regular chunks, fill value 0.""" + return ArrayBuilder( + ChunkGrid.regular([8, 8], [4, 4]), + DataType.from_string("int8"), + FillValue(b"\x00"), + ) + + +def test_create_metadata_without_store(): + """`create_metadata` produces v3 metadata without touching a store.""" + meta = _builder().create_metadata() + + assert meta["zarr_format"] == 3 + assert meta["node_type"] == "array" + assert meta["shape"] == [8, 8] + assert meta["data_type"] == "int8" + assert meta["chunk_grid"] == { + "name": "regular", + "configuration": {"chunk_shape": [4, 4]}, + } + assert meta["fill_value"] == 0 + # No serializer set -> default `bytes` codec. + assert meta["codecs"] == [{"name": "bytes", "configuration": {"endian": "little"}}] + + +def test_setters_return_new_instances(): + """Each setter returns a new builder and leaves the receiver unchanged.""" + base = _builder() + modified = base.shape([16, 16]) + + assert modified is not base + assert base.create_metadata()["shape"] == [8, 8] + assert modified.create_metadata()["shape"] == [16, 16] + + +def test_create_returns_configured_array(): + """`create` returns an array reflecting the builder's configuration.""" + array = ( + _builder() + .shape([16, 16]) + .dimension_names(["y", "x"]) + .create(MemoryStore(), "/a") + ) + + assert array.shape == [16, 16] + assert array.dtype == DataType.from_string("int8") + assert array.dimension_names == ["y", "x"] + + +def test_dimension_names_can_be_cleared(): + meta = ( + _builder().dimension_names(["y", "x"]).dimension_names(None).create_metadata() + ) + assert "dimension_names" not in meta or meta["dimension_names"] is None + + +def test_filters_and_compressors(): + array = ( + _builder() + .filters([codec.transpose([1, 0])]) + .compressors([codec.zstd(3, checksum=False)]) + .create(MemoryStore(), "/a") + ) + + assert [f.name for f in array.filters] == ["transpose"] + assert [c.name for c in array.compressors] == ["zstd"] + + +def test_serializer(): + array = ( + _builder() + .serializer( + ArrayToBytesCodec.from_config( + {"name": "bytes", "configuration": {"endian": "big"}}, + ), + ) + .create(MemoryStore(), "/a") + ) + + assert array.serializer.name == "bytes" + assert array.serializer.config == {"endian": "big"} + + +def test_subchunk_shape_enables_sharding(): + """Setting a subchunk shape selects the sharding serializer.""" + meta = _builder().subchunk_shape([2, 2]).create_metadata() + assert meta["codecs"][0]["name"] == "sharding_indexed" + + +def test_chunk_key_encoding(): + cke = ChunkKeyEncoding.default(".") + meta = _builder().chunk_key_encoding(cke).create_metadata() + assert meta["chunk_key_encoding"] == { + "name": "default", + "configuration": {"separator": "."}, + } + + +def test_attrs(): + meta = _builder().attrs({"units": "m", "scale": 2}).create_metadata() + assert meta["attributes"] == {"units": "m", "scale": 2} + + +def test_like_copies_configuration(): + """`like` reproduces an existing array's metadata.""" + source = _builder().dimension_names(["y", "x"]).create(MemoryStore(), "/a") + copied = ArrayBuilder.like(source).create_metadata() + assert copied == source.metadata + + +def test_like_with_override(): + """`like` followed by a setter overrides only that field.""" + source = ( + _builder() + .compressors([codec.zstd(3, checksum=False)]) + .create(MemoryStore(), "/a") + ) + overridden = ( + ArrayBuilder.like(source) + .compressors([codec.gzip(5)]) + .create(MemoryStore(), "/b") + ) + + assert [c.name for c in source.compressors] == ["zstd"] + assert [c.name for c in overridden.compressors] == ["gzip"] + + +def test_chunk_grid_dimension_mismatch_raises(): + """A chunk shape with the wrong dimensionality is rejected at parse time.""" + with pytest.raises(ChunkGridCreateError): + ChunkGrid.regular([8, 8], [4, 4, 4]) + + +def test_chunk_grid_create_error_is_zarrista_error(): + assert issubclass(ChunkGridCreateError, ZarristaError) From 9121eb076a725b9a7b2f60db59d3103e78ce0278 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 23 Jun 2026 15:18:04 -0400 Subject: [PATCH 27/27] update typing --- python/zarrista/_chunk_key_encoding.pyi | 2 +- python/zarrista/_chunks.pyi | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/zarrista/_chunk_key_encoding.pyi b/python/zarrista/_chunk_key_encoding.pyi index f8f3847..837f41d 100644 --- a/python/zarrista/_chunk_key_encoding.pyi +++ b/python/zarrista/_chunk_key_encoding.pyi @@ -7,7 +7,7 @@ class ChunkKeyEncoding: @staticmethod def default(sep: Literal[".", "/"]) -> ChunkKeyEncoding: - """The `default` chunk key encoding with the given separator.""" + """Construct the `default` chunk key encoding with the given separator.""" @staticmethod def from_metadata(metadata: NamedConfigV3) -> ChunkKeyEncoding: """Build a chunk key encoding from its Zarr v3 metadata.""" diff --git a/python/zarrista/_chunks.pyi b/python/zarrista/_chunks.pyi index b179e6d..022be56 100644 --- a/python/zarrista/_chunks.pyi +++ b/python/zarrista/_chunks.pyi @@ -16,19 +16,19 @@ class ChunkGrid: @staticmethod def regular(array_shape: Sequence[int], chunk_shape: Sequence[int]) -> ChunkGrid: - """A regular chunk grid with a fixed `chunk_shape` over `array_shape`.""" + """Construct a regular grid with a fixed `chunk_shape` over `array_shape`.""" @staticmethod def rectilinear( array_shape: Sequence[int], chunk_shapes: Sequence[_ChunkEdgeLengths], ) -> ChunkGrid: - """A rectilinear grid with per-dimension (possibly varying) chunk sizes.""" + """Construct a rectilinear grid with per-dimension chunk sizes.""" @staticmethod def regular_bounded( array_shape: Sequence[int], chunk_shape: Sequence[int], ) -> ChunkGrid: - """A regular grid whose final chunks are clipped to the array bounds. + """Construct a regular grid whose final chunks are clipped to the array bounds. Experimental and may be incompatible with other Zarr V3 implementations. """