From 95cd65c66446faccfc6b4ce0b0a419cb7fd5e930 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 22 Jun 2026 21:39:46 -0400 Subject: [PATCH 1/2] init config --- src/config.rs | 26 ++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 27 insertions(+) create mode 100644 src/config.rs diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..61f660f --- /dev/null +++ b/src/config.rs @@ -0,0 +1,26 @@ +use pyo3::prelude::*; +use zarrs::config::{global_config, global_config_mut}; + +#[pyclass(module = "zarrista", name = "Config")] +pub struct PyConfig; + +#[pymethods] +impl PyConfig { + #[getter] + fn chunk_concurrent_minimum(&self) -> usize { + let config = global_config(); + config.chunk_concurrent_minimum() + } + + #[setter] + fn set_validate_checksums(&mut self, value: bool) { + let mut config = global_config_mut(); + config.set_validate_checksums(value); + } + + #[getter] + fn validate_checksums(&self) -> bool { + let config = global_config(); + config.validate_checksums() + } +} diff --git a/src/lib.rs b/src/lib.rs index 42481c0..d9d0a14 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ mod array; mod array_bytes; mod chunks; mod codec; +mod config; mod decoded_array; mod dtype; mod error; From ef31d89b5f38bd8f52a0da5a74bdc4f718b8950d Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 22 Jun 2026 22:11:04 -0400 Subject: [PATCH 2/2] feat: Expose global zarrs config --- python/zarrista/__init__.py | 4 + python/zarrista/_config.pyi | 113 +++++++++++++++++++++ python/zarrista/_zarrista.pyi | 6 ++ src/config.rs | 185 ++++++++++++++++++++++++++++++++-- src/lib.rs | 3 + 5 files changed, 302 insertions(+), 9 deletions(-) create mode 100644 python/zarrista/_config.pyi diff --git a/python/zarrista/__init__.py b/python/zarrista/__init__.py index 00d0d8c..60e538c 100644 --- a/python/zarrista/__init__.py +++ b/python/zarrista/__init__.py @@ -9,6 +9,7 @@ AsyncArray, AsyncGroup, ChunkGrid, + Config, DataType, FilesystemStore, FillValue, @@ -19,6 +20,7 @@ Tensor, VariableArray, __version__, + config, ) DecodedArray: TypeAlias = Tensor | VariableArray | MaskedTensor | MaskedVariableArray @@ -36,6 +38,7 @@ "AsyncArray", "AsyncGroup", "ChunkGrid", + "Config", "DataType", "DecodedArray", "FilesystemStore", @@ -48,5 +51,6 @@ "VariableArray", "__version__", "codec", + "config", "exceptions", ] diff --git a/python/zarrista/_config.pyi b/python/zarrista/_config.pyi new file mode 100644 index 0000000..469d5c7 --- /dev/null +++ b/python/zarrista/_config.pyi @@ -0,0 +1,113 @@ +from typing import Literal + +MetadataConvertVersion = Literal["default", "v3"] +"""The Zarr version to write when converting metadata. + +- `"default"`: write the same version as the input metadata. +- `"v3"`: write Zarr V3 metadata (existing V2 metadata is not removed). +""" + +MetadataEraseVersion = Literal["default", "all", "v3", "v2"] +"""The Zarr version of metadata to erase. + +- `"default"`: erase the same version as the input metadata. +- `"all"`: erase all metadata. +- `"v3"`: erase Zarr V3 metadata. +- `"v2"`: erase Zarr V2 metadata. +""" + +UseConsolidatedMetadata = Literal["auto", "must", "never"] +"""Whether to use a root group's consolidated metadata when opening a hierarchy. + +- `"auto"`: use consolidated metadata if present, otherwise list storage. +- `"must"`: require consolidated metadata to be present, else fail. +- `"never"`: never use consolidated metadata, always re-discover from storage. +""" + +class Config: + """A proxy to the `zarrs` global configuration. + + This type is not constructable; use the module-level singleton + [`zarrista.config`][zarrista.config] instead. Its getters and setters read + from and write to the process-wide global configuration. + """ + + @property + def validate_checksums(self) -> bool: + """Whether checksum codecs (e.g. `crc32c`, `fletcher32`) validate that + encoded data matches stored checksums. Default `True`.""" + + @validate_checksums.setter + def validate_checksums(self, value: bool) -> None: ... + @property + def store_empty_chunks(self) -> bool: + """If `False`, chunks where every element equals the fill value are not + stored. If `True`, the fill-value check is skipped and empty chunks are + stored. Default `False`.""" + + @store_empty_chunks.setter + def store_empty_chunks(self, value: bool) -> None: ... + @property + def codec_concurrent_target(self) -> int: + """The default number of concurrent operations to target for codec + encoding and decoding. Zero means unconstrained. Defaults to the number + of available threads.""" + + @codec_concurrent_target.setter + def codec_concurrent_target(self, value: int) -> None: ... + @property + def chunk_concurrent_minimum(self) -> int: + """The preferred minimum chunk concurrency for array operations spanning + multiple chunks. Default `4`.""" + + @chunk_concurrent_minimum.setter + def chunk_concurrent_minimum(self, value: int) -> None: ... + @property + def codec_store_metadata_if_encode_only(self) -> bool: + """Whether codecs performing irreversible encode-only transformations + (currently only `bitround`) write their metadata. Default `True`.""" + + @codec_store_metadata_if_encode_only.setter + def codec_store_metadata_if_encode_only(self, value: bool) -> None: ... + @property + def include_zarrs_metadata(self) -> bool: + """Whether generated array metadata includes the `_zarrs` attribute + recording the `zarrs` version and source repository. Default `True`.""" + + @include_zarrs_metadata.setter + def include_zarrs_metadata(self, value: bool) -> None: ... + @property + def experimental_partial_encoding(self) -> bool: + """Whether `store_chunk_subset` / `store_array_subset` may use partial + encoding (relevant to the sharding codec). Experimental. Default + `False`.""" + + @experimental_partial_encoding.setter + def experimental_partial_encoding(self, value: bool) -> None: ... + @property + def convert_aliased_extension_names(self) -> bool: + """Whether aliased extension names are replaced by their standard name + when metadata is resaved. Default `False`.""" + + @convert_aliased_extension_names.setter + def convert_aliased_extension_names(self, value: bool) -> None: ... + @property + def metadata_convert_version(self) -> MetadataConvertVersion: + """The Zarr version to write when converting metadata. Default + `"default"`.""" + + @metadata_convert_version.setter + def metadata_convert_version(self, value: MetadataConvertVersion) -> None: ... + @property + def metadata_erase_version(self) -> MetadataEraseVersion: + """The Zarr version of metadata to erase. Default `"default"`.""" + + @metadata_erase_version.setter + def metadata_erase_version(self, value: MetadataEraseVersion) -> None: ... + @property + def use_consolidated_metadata(self) -> UseConsolidatedMetadata: + """Whether to use a root group's consolidated metadata when opening a + hierarchy. Default `"auto"`.""" + + @use_consolidated_metadata.setter + def use_consolidated_metadata(self, value: UseConsolidatedMetadata) -> None: ... diff --git a/python/zarrista/_zarrista.pyi b/python/zarrista/_zarrista.pyi index 2e9f480..30771e2 100644 --- a/python/zarrista/_zarrista.pyi +++ b/python/zarrista/_zarrista.pyi @@ -1,6 +1,7 @@ from ._array import Array, AsyncArray from ._array_bytes import ArrayBytes from ._chunks import ChunkGrid +from ._config import Config from ._decoded_array import MaskedTensor, MaskedVariableArray, Tensor, VariableArray from ._dtype import DataType from ._fill_value import FillValue @@ -9,12 +10,16 @@ from ._store import FilesystemStore, MemoryStore __version__: str +config: Config +"""The `zarrs` global configuration singleton.""" + __all__ = [ "Array", "ArrayBytes", "AsyncArray", "AsyncGroup", "ChunkGrid", + "Config", "DataType", "FilesystemStore", "FillValue", @@ -25,4 +30,5 @@ __all__ = [ "Tensor", "VariableArray", "__version__", + "config", ] diff --git a/src/config.rs b/src/config.rs index 61f660f..069676b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,26 +1,193 @@ +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use zarrs::config::{global_config, global_config_mut}; +use pyo3::pybacked::PyBackedStr; +use zarrs::config::{ + global_config, global_config_mut, MetadataConvertVersion, MetadataEraseVersion, + UseConsolidatedMetadata, +}; +/// A proxy to the `zarrs` global configuration. +/// +/// This type is not constructable from Python; use the module-level singleton +/// [`zarrista.config`] instead. Its getters and setters read from and write to +/// the process-wide global configuration. #[pyclass(module = "zarrista", name = "Config")] pub struct PyConfig; #[pymethods] impl PyConfig { #[getter] - fn chunk_concurrent_minimum(&self) -> usize { - let config = global_config(); - config.chunk_concurrent_minimum() + fn validate_checksums(&self) -> bool { + global_config().validate_checksums() } #[setter] fn set_validate_checksums(&mut self, value: bool) { - let mut config = global_config_mut(); - config.set_validate_checksums(value); + global_config_mut().set_validate_checksums(value); } #[getter] - fn validate_checksums(&self) -> bool { - let config = global_config(); - config.validate_checksums() + fn store_empty_chunks(&self) -> bool { + global_config().store_empty_chunks() + } + + #[setter] + fn set_store_empty_chunks(&mut self, value: bool) { + global_config_mut().set_store_empty_chunks(value); + } + + #[getter] + fn codec_concurrent_target(&self) -> usize { + global_config().codec_concurrent_target() + } + + #[setter] + fn set_codec_concurrent_target(&mut self, value: usize) { + global_config_mut().set_codec_concurrent_target(value); + } + + #[getter] + fn chunk_concurrent_minimum(&self) -> usize { + global_config().chunk_concurrent_minimum() + } + + #[setter] + fn set_chunk_concurrent_minimum(&mut self, value: usize) { + global_config_mut().set_chunk_concurrent_minimum(value); + } + + #[getter] + fn codec_store_metadata_if_encode_only(&self) -> bool { + global_config().codec_store_metadata_if_encode_only() + } + + #[setter] + fn set_codec_store_metadata_if_encode_only(&mut self, value: bool) { + global_config_mut().set_codec_store_metadata_if_encode_only(value); + } + + #[getter] + fn include_zarrs_metadata(&self) -> bool { + global_config().include_zarrs_metadata() + } + + #[setter] + fn set_include_zarrs_metadata(&mut self, value: bool) { + global_config_mut().set_include_zarrs_metadata(value); + } + + #[getter] + fn experimental_partial_encoding(&self) -> bool { + global_config().experimental_partial_encoding() + } + + #[setter] + fn set_experimental_partial_encoding(&mut self, value: bool) { + global_config_mut().set_experimental_partial_encoding(value); + } + + #[getter] + fn convert_aliased_extension_names(&self) -> bool { + global_config().convert_aliased_extension_names() + } + + #[setter] + fn set_convert_aliased_extension_names(&mut self, value: bool) { + global_config_mut().set_convert_aliased_extension_names(value); + } + + #[getter] + fn metadata_convert_version(&self) -> &'static str { + metadata_convert_version_to_str(global_config().metadata_convert_version()) + } + + #[setter] + fn set_metadata_convert_version(&mut self, value: PyBackedStr) -> PyResult<()> { + let version = parse_metadata_convert_version(&value)?; + global_config_mut().set_metadata_convert_version(version); + Ok(()) + } + + #[getter] + fn metadata_erase_version(&self) -> &'static str { + metadata_erase_version_to_str(global_config().metadata_erase_version()) + } + + #[setter] + fn set_metadata_erase_version(&mut self, value: PyBackedStr) -> PyResult<()> { + let version = parse_metadata_erase_version(&value)?; + global_config_mut().set_metadata_erase_version(version); + Ok(()) + } + + #[getter] + fn use_consolidated_metadata(&self) -> &'static str { + use_consolidated_metadata_to_str(global_config().use_consolidated_metadata()) + } + + #[setter] + fn set_use_consolidated_metadata(&mut self, value: PyBackedStr) -> PyResult<()> { + let mode = parse_use_consolidated_metadata(&value)?; + global_config_mut().set_use_consolidated_metadata(mode); + Ok(()) + } +} + +fn metadata_convert_version_to_str(version: MetadataConvertVersion) -> &'static str { + match version { + MetadataConvertVersion::Default => "default", + MetadataConvertVersion::V3 => "v3", + } +} + +fn parse_metadata_convert_version(value: &str) -> PyResult { + match value.to_ascii_lowercase().as_str() { + "default" => Ok(MetadataConvertVersion::Default), + "v3" => Ok(MetadataConvertVersion::V3), + other => Err(PyValueError::new_err(format!( + "unknown metadata convert version {other:?}; expected one of 'default', 'v3'" + ))), + } +} + +fn metadata_erase_version_to_str(version: MetadataEraseVersion) -> &'static str { + match version { + MetadataEraseVersion::Default => "default", + MetadataEraseVersion::All => "all", + MetadataEraseVersion::V3 => "v3", + MetadataEraseVersion::V2 => "v2", + } +} + +fn parse_metadata_erase_version(value: &str) -> PyResult { + match value.to_ascii_lowercase().as_str() { + "default" => Ok(MetadataEraseVersion::Default), + "all" => Ok(MetadataEraseVersion::All), + "v3" => Ok(MetadataEraseVersion::V3), + "v2" => Ok(MetadataEraseVersion::V2), + other => Err(PyValueError::new_err(format!( + "unknown metadata erase version {other:?}; expected one of \ + 'default', 'all', 'v3', 'v2'" + ))), + } +} + +fn use_consolidated_metadata_to_str(mode: UseConsolidatedMetadata) -> &'static str { + match mode { + UseConsolidatedMetadata::Auto => "auto", + UseConsolidatedMetadata::Must => "must", + UseConsolidatedMetadata::Never => "never", + } +} + +fn parse_use_consolidated_metadata(value: &str) -> PyResult { + match value.to_ascii_lowercase().as_str() { + "auto" => Ok(UseConsolidatedMetadata::Auto), + "must" => Ok(UseConsolidatedMetadata::Must), + "never" => Ok(UseConsolidatedMetadata::Never), + other => Err(PyValueError::new_err(format!( + "unknown use consolidated metadata mode {other:?}; expected one of \ + 'auto', 'must', 'never'" + ))), } } diff --git a/src/lib.rs b/src/lib.rs index d9d0a14..3369194 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ use crate::array::{PyArray, PyAsyncArray}; use crate::array_bytes::PyArrayBytes; use crate::chunks::PyChunkGrid; use crate::codec::register_codec_module; +use crate::config::PyConfig; use crate::decoded_array::{PyMaskedTensor, PyMaskedVariableArray, PyTensor, PyVariableArray}; use crate::dtype::PyDataType; use crate::exceptions::register_exceptions_module; @@ -38,6 +39,8 @@ fn _zarrista(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add("config", Bound::new(m.py(), PyConfig)?)?; m.add_class::()?; m.add_class::()?; m.add_class::()?;