diff --git a/python/zarrista/_array.pyi b/python/zarrista/_array.pyi index 9edbdf9..5b6036d 100644 --- a/python/zarrista/_array.pyi +++ b/python/zarrista/_array.pyi @@ -3,10 +3,14 @@ from typing import TypeAlias, Unpack from zarr_metadata import ArrayMetadataV3, JSONValue -from zarrista.codec import CodecOptions +from zarrista.codec import ( + ArrayToArrayCodec, + ArrayToBytesCodec, + BytesToBytesCodec, + CodecOptions, +) from ._chunks import ChunkGrid -from ._codec import CodecChain from ._decoded_array import DecodedArray from ._dtype import DataType from ._store import AsyncStore, FilesystemStore, MemoryStore @@ -34,8 +38,14 @@ class Array: def chunk_grid(self) -> ChunkGrid: """The chunk grid of the array.""" @property - def codecs(self) -> CodecChain: - """The codec chain used to encode and decode the array's chunks.""" + def compressors(self) -> list[BytesToBytesCodec]: + """The bytes-to-bytes codecs ("compressors").""" + @property + def filters(self) -> list[ArrayToArrayCodec]: + """The array-to-array codecs ("filters").""" + @property + def serializer(self) -> ArrayToBytesCodec: + """The array-to-bytes codec ("serializer").""" @property def dimension_names(self) -> list[str | None] | None: """The dimension names, if any were specified.""" @@ -97,8 +107,14 @@ class AsyncArray: def chunk_grid(self) -> ChunkGrid: """The chunk grid of the array.""" @property - def codecs(self) -> CodecChain: - """The codec chain used to encode and decode the array's chunks.""" + def compressors(self) -> list[BytesToBytesCodec]: + """The bytes-to-bytes codecs ("compressors").""" + @property + def filters(self) -> list[ArrayToArrayCodec]: + """The array-to-array codecs ("filters").""" + @property + def serializer(self) -> ArrayToBytesCodec: + """The array-to-bytes codec ("serializer").""" @property def dimension_names(self) -> list[str | None] | None: """The dimension names, if any were specified.""" diff --git a/python/zarrista/codec/__init__.py b/python/zarrista/codec/__init__.py index 131fc7a..c0471ac 100644 --- a/python/zarrista/codec/__init__.py +++ b/python/zarrista/codec/__init__.py @@ -2,24 +2,24 @@ from zarrista._zarrista.codec import ( ArrayToArrayCodec, - Blosc, + ArrayToBytesCodec, BytesToBytesCodec, - CodecChain, - Crc32c, - Gzip, - Zstd, bitround, + blosc, + crc32c, + gzip, transpose, + zstd, ) __all__ = [ "ArrayToArrayCodec", - "Blosc", + "ArrayToBytesCodec", "BytesToBytesCodec", - "CodecChain", - "Crc32c", - "Gzip", - "Zstd", "bitround", + "blosc", + "crc32c", + "gzip", "transpose", + "zstd", ] diff --git a/python/zarrista/codec/__init__.pyi b/python/zarrista/codec/__init__.pyi index 00b26cb..4cd26e2 100644 --- a/python/zarrista/codec/__init__.pyi +++ b/python/zarrista/codec/__init__.pyi @@ -1,10 +1,10 @@ from zarrista.codec._array_to_array import ArrayToArrayCodec as ArrayToArrayCodec from zarrista.codec._array_to_array import bitround as bitround from zarrista.codec._array_to_array import transpose as transpose +from zarrista.codec._array_to_bytes import ArrayToBytesCodec as ArrayToBytesCodec from zarrista.codec._bytes_to_bytes import BytesToBytesCodec as BytesToBytesCodec -from zarrista.codec._bytes_to_bytes._blosc import Blosc as Blosc -from zarrista.codec._bytes_to_bytes._crc32c import Crc32c as Crc32c -from zarrista.codec._bytes_to_bytes._gzip import Gzip as Gzip -from zarrista.codec._bytes_to_bytes._zstd import Zstd as Zstd -from zarrista.codec._codec_chain import CodecChain as CodecChain +from zarrista.codec._bytes_to_bytes._blosc import blosc as blosc +from zarrista.codec._bytes_to_bytes._crc32c import crc32c as crc32c +from zarrista.codec._bytes_to_bytes._gzip import gzip as gzip +from zarrista.codec._bytes_to_bytes._zstd import zstd as zstd from zarrista.codec._options import CodecOptions as CodecOptions diff --git a/python/zarrista/codec/_array_to_array.pyi b/python/zarrista/codec/_array_to_array.pyi index b52bf31..6bede9c 100644 --- a/python/zarrista/codec/_array_to_array.pyi +++ b/python/zarrista/codec/_array_to_array.pyi @@ -1,3 +1,5 @@ +from zarr_metadata import JSONValue + from zarrista._array_bytes import ArrayBytes from zarrista._dtype import DataType from zarrista._fill_value import FillValue @@ -5,6 +7,18 @@ from zarrista._fill_value import FillValue class ArrayToArrayCodec: """A Zarr v3 array-to-array codec.""" + @property + def name(self) -> str | None: + """The codec's Zarr v3 name (e.g. `"transpose"`), if any.""" + @property + def config(self) -> JSONValue | None: + """The codec's Zarr v3 configuration as a dict, if any.""" + @staticmethod + def from_config(metadata: JSONValue) -> ArrayToArrayCodec: + """Build a codec from its Zarr v3 metadata. + + For example `{"name": "transpose", "configuration": {"order": [1, 0]}}`. + """ def encoded_data_type(self, decoded_data_type: DataType) -> DataType: """Return the data type produced by encoding `decoded_data_type`.""" def encoded_fill_value( diff --git a/python/zarrista/codec/_array_to_bytes.pyi b/python/zarrista/codec/_array_to_bytes.pyi new file mode 100644 index 0000000..15359d2 --- /dev/null +++ b/python/zarrista/codec/_array_to_bytes.pyi @@ -0,0 +1,17 @@ +from zarr_metadata import JSONValue + +class ArrayToBytesCodec: + """A Zarr v3 array-to-bytes codec (the "serializer").""" + + @property + def name(self) -> str | None: + """The codec's Zarr v3 name (e.g. `"bytes"`, `"sharding_indexed"`), if any.""" + @property + def config(self) -> JSONValue | None: + """The codec's Zarr v3 configuration as a dict, if any.""" + @staticmethod + def from_config(metadata: JSONValue) -> ArrayToBytesCodec: + """Build a codec from its Zarr v3 metadata. + + For example `{"name": "bytes", "configuration": {"endian": "little"}}`. + """ diff --git a/python/zarrista/codec/_bytes_to_bytes/__init__.pyi b/python/zarrista/codec/_bytes_to_bytes/__init__.pyi index 5c090ee..30819bc 100644 --- a/python/zarrista/codec/_bytes_to_bytes/__init__.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/__init__.pyi @@ -1,5 +1,19 @@ +from zarr_metadata import JSONValue + class BytesToBytesCodec: """A Zarr v3 bytes-to-bytes codec.""" + @property + def name(self) -> str | None: + """The codec's Zarr v3 name (e.g. `"blosc"`), if any.""" + @property + def config(self) -> JSONValue | None: + """The codec's Zarr v3 configuration as a dict, if any.""" + @staticmethod + def from_config(metadata: JSONValue) -> BytesToBytesCodec: + """Build a codec from its Zarr v3 metadata. + + For example `{"name": "gzip", "configuration": {"level": 5}}`. + """ def encode(self, decoded_value: bytes) -> bytes: """Encode chunk bytes for this codec.""" diff --git a/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi b/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi index cf92e19..3d9548a 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_blosc.pyi @@ -1,7 +1,5 @@ from typing import Literal, TypeAlias -from zarr_metadata.v3.codec.blosc import BloscCodecConfiguration - from zarrista.codec._bytes_to_bytes import BytesToBytesCodec BloscCompressor: TypeAlias = Literal[ @@ -17,29 +15,18 @@ BloscCompressor: TypeAlias = Literal[ BloscShuffle: TypeAlias = Literal["noshuffle", "shuffle", "bitshuffle"] """A `blosc` shuffle mode.""" -class Blosc(BytesToBytesCodec): - """The `blosc` bytes-to-bytes codec.""" - - def __init__( - self, - cname: BloscCompressor, - clevel: int, - shuffle_mode: BloscShuffle, - *, - blocksize: int | None = None, - typesize: int | None = None, - ) -> None: - """Construct a `blosc` codec from its parameters. - - `clevel` is the compression level, an integer from 0 (no compression) - to 9 (most compression). `typesize` is required (a positive integer) - whenever `shuffle_mode` is not `"noshuffle"`. The block size is chosen - automatically when `blocksize` is `None` or `0`. - """ - @staticmethod - def from_config(config: BloscCodecConfiguration) -> Blosc: - """Construct a `blosc` codec from a configuration mapping. - - For example `{"cname": "lz4", "clevel": 5, "shuffle": "shuffle", - "typesize": 4, "blocksize": 0}`. - """ +def blosc( + cname: BloscCompressor, + clevel: int, + shuffle_mode: BloscShuffle, + *, + blocksize: int | None = None, + typesize: int | None = None, +) -> BytesToBytesCodec: + """Construct a `blosc` codec from its parameters. + + `clevel` is the compression level, an integer from 0 (no compression) to 9 + (most compression). `typesize` is required (a positive integer) whenever + `shuffle_mode` is not `"noshuffle"`. The block size is chosen automatically + when `blocksize` is `None` or `0`. + """ diff --git a/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi b/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi index a7a56a4..b4b9ac0 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_crc32c.pyi @@ -1,20 +1,7 @@ -from typing import TypedDict - from zarrista.codec._bytes_to_bytes import BytesToBytesCodec -class Crc32cConfig(TypedDict): ... - -class Crc32c(BytesToBytesCodec): - """The `crc32c` bytes-to-bytes codec.""" - - def __init__(self) -> None: - """Construct a `crc32c` codec. - - Appends a CRC32C checksum to the encoded bytestream. - """ - @staticmethod - def from_config(config: Crc32cConfig) -> Crc32c: - """Construct a `crc32c` codec from a configuration mapping, e.g. `{}`. +def crc32c() -> BytesToBytesCodec: + """Construct a `crc32c` codec. - The `crc32c` codec takes no configuration, so the mapping is empty. - """ + Appends a CRC32C checksum to the encoded bytestream. + """ diff --git a/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi b/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi index 0fae25c..b632117 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_gzip.pyi @@ -1,16 +1,8 @@ -from zarr_metadata.v3.codec.gzip import GzipCodecConfiguration - from zarrista.codec._bytes_to_bytes import BytesToBytesCodec -class Gzip(BytesToBytesCodec): - """The `gzip` bytes-to-bytes codec.""" - - def __init__(self, level: int) -> None: - """Construct a `gzip` codec. +def gzip(level: int) -> BytesToBytesCodec: + """Construct a `gzip` codec. - `level` is the compression level, an integer from 0 (no compression) - to 9 (most compression). - """ - @staticmethod - def from_config(config: GzipCodecConfiguration) -> Gzip: - """Construct a `gzip` codec from a config mapping, e.g. `{"level": 5}`.""" + `level` is the compression level, an integer from 0 (no compression) to 9 + (most compression). + """ diff --git a/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi b/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi index 845c385..40a354d 100644 --- a/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi +++ b/python/zarrista/codec/_bytes_to_bytes/_zstd.pyi @@ -1,19 +1,8 @@ -from zarr_metadata.v3.codec.zstd import ZstdCodecConfiguration - from zarrista.codec._bytes_to_bytes import BytesToBytesCodec -class Zstd(BytesToBytesCodec): - """The `zstd` bytes-to-bytes codec.""" - - def __init__(self, level: int, checksum: bool) -> None: - """Construct a `zstd` codec. - - `level` is the compression level. When `checksum` is true, a checksum - is written to (and verified on decode from) the encoded bytestream. - """ - @staticmethod - def from_config(config: ZstdCodecConfiguration) -> Zstd: - """Construct a `zstd` codec from a configuration mapping. +def zstd(level: int, checksum: bool) -> BytesToBytesCodec: + """Construct a `zstd` codec. - For example, `{"level": 5, "checksum": false}`. - """ + `level` is the compression level. When `checksum` is true, a checksum is + written to (and verified on decode from) the encoded bytestream. + """ diff --git a/python/zarrista/codec/_codec_chain.pyi b/python/zarrista/codec/_codec_chain.pyi deleted file mode 100644 index 672cb65..0000000 --- a/python/zarrista/codec/_codec_chain.pyi +++ /dev/null @@ -1,9 +0,0 @@ -from zarr_metadata import MetadataV3, NamedConfigV3 - -class CodecChain: - """The ordered chain of codecs used to encode and decode an array's chunks.""" - - def __init__(self, metadatas: list[MetadataV3]) -> None: - """Construct a codec chain from a list of Zarr v3 codec metadata.""" - def create_metadatas(self) -> list[NamedConfigV3]: - """Return the Zarr v3 metadata for each codec in the chain.""" diff --git a/src/array/shared.rs b/src/array/shared.rs index 0314d2e..023d8bb 100644 --- a/src/array/shared.rs +++ b/src/array/shared.rs @@ -21,9 +21,15 @@ macro_rules! array_metadata_accessors { self.inner.chunk_grid().clone().into() } + /// The bytes-to-bytes codecs ("compressors"). #[getter] - fn codecs(&self) -> $crate::codec::PyCodecChain { - self.inner.codecs().into() + fn compressors(&self) -> Vec<$crate::codec::PyBytesToBytesCodec> { + let codecs = self.inner.codecs(); + codecs + .bytes_to_bytes_codecs() + .iter() + .map(|c| $crate::codec::PyBytesToBytesCodec::new(c.clone())) + .collect() } /// The dimension names, if any were specified. @@ -38,6 +44,17 @@ macro_rules! array_metadata_accessors { self.inner.data_type().clone().into() } + /// The array-to-array codecs ("filters"). + #[getter] + fn filters(&self) -> Vec<$crate::codec::PyArrayToArrayCodec> { + let codecs = self.inner.codecs(); + codecs + .array_to_array_codecs() + .iter() + .map(|f| $crate::codec::PyArrayToArrayCodec::new(f.clone())) + .collect() + } + #[getter] fn metadata(&self) -> $crate::metadata::PyArrayMetadata { self.inner.metadata().clone().into() @@ -55,6 +72,13 @@ macro_rules! array_metadata_accessors { self.inner.path().as_str() } + /// The array-to-bytes codec ("serializer"). + #[getter] + fn serializer(&self) -> $crate::codec::PyArrayToBytesCodec { + let codecs = self.inner.codecs(); + $crate::codec::PyArrayToBytesCodec::new(codecs.array_to_bytes_codec().clone()) + } + /// The array shape. #[getter] fn shape(&self) -> &[u64] { diff --git a/src/codec/array_to_array.rs b/src/codec/array_to_array.rs index 967edd8..c535e7a 100644 --- a/src/codec/array_to_array.rs +++ b/src/codec/array_to_array.rs @@ -1,14 +1,18 @@ +use std::borrow::Cow; use std::num::NonZeroU64; use std::sync::Arc; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use zarrs::array::codec::api::CodecMetadata; use zarrs::array::codec::{BitroundCodec, TransposeCodec, TransposeOrder}; -use zarrs::array::{ArrayToArrayCodecTraits, CodecOptions}; +use zarrs::array::{ArrayToArrayCodecTraits, Codec, CodecOptions}; use crate::array_bytes::PyArrayBytes; use crate::dtype::PyDataType; use crate::error::ZarristaResult; use crate::fill_value::PyFillValue; +use crate::metadata::{PyConfiguration, PyMetadataV3}; #[pyfunction] pub fn transpose(order: Vec) -> ZarristaResult { @@ -25,8 +29,38 @@ pub fn bitround(keepbits: u32) -> PyArrayToArrayCodec { #[pyclass(module = "zarrista.codec", frozen, name = "ArrayToArrayCodec")] pub struct PyArrayToArrayCodec(Arc); +impl PyArrayToArrayCodec { + pub fn new(codec: Arc) -> Self { + Self(codec) + } +} + #[pymethods] impl PyArrayToArrayCodec { + fn __repr__(&self) -> String { + format!("ArrayToArrayCodec({:?})", self.0) + } + + /// Build a codec from its Zarr v3 metadata, + #[staticmethod] + fn from_config(metadata: PyMetadataV3) -> ZarristaResult { + let codec = Codec::from_metadata(CodecMetadata::V3(metadata.as_ref()))?; + match codec { + Codec::ArrayToArray(c) => Ok(Self::new(c)), + _ => Err( + PyValueError::new_err("metadata does not describe an ArrayToArray codec").into(), + ), + } + } + + /// The codec's Zarr v3 configuration + #[getter] + fn config(&self) -> Option { + self.0 + .configuration_v3(&Default::default()) + .map(|config| config.into()) + } + fn encoded_data_type(&self, decoded_data_type: &PyDataType) -> ZarristaResult { Ok(self.0.encoded_data_type(decoded_data_type.inner())?.into()) } @@ -87,7 +121,9 @@ impl PyArrayToArrayCodec { Ok(self.0.decoded_shape(&encoded_shape)?) } - fn __repr__(&self) -> String { - format!("ArrayToArrayCodec({:?})", self.0) + /// The codec's Zarr v3 name if it has one. + #[getter] + fn name(&self) -> Option> { + self.0.name_v3() } } diff --git a/src/codec/array_to_bytes/mod.rs b/src/codec/array_to_bytes/mod.rs new file mode 100644 index 0000000..2b6fd30 --- /dev/null +++ b/src/codec/array_to_bytes/mod.rs @@ -0,0 +1,54 @@ +//! Array to bytes codecs, or "serializers". + +use std::borrow::Cow; +use std::sync::Arc; + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use zarrs::array::codec::api::CodecMetadata; +use zarrs::array::{ArrayToBytesCodecTraits, Codec}; + +use crate::error::ZarristaResult; +use crate::metadata::{PyConfiguration, PyMetadataV3}; + +#[pyclass(module = "zarrista.codec", frozen, name = "ArrayToBytesCodec")] +pub struct PyArrayToBytesCodec(Arc); + +impl PyArrayToBytesCodec { + pub fn new(codec: Arc) -> Self { + Self(codec) + } +} + +#[pymethods] +impl PyArrayToBytesCodec { + fn __repr__(&self) -> String { + format!("ArrayToBytesCodec({:?})", self.0) + } + + /// Build a codec from its Zarr v3 metadata, + #[staticmethod] + fn from_config(metadata: PyMetadataV3) -> ZarristaResult { + let codec = Codec::from_metadata(CodecMetadata::V3(metadata.as_ref()))?; + match codec { + Codec::ArrayToBytes(c) => Ok(Self::new(c)), + _ => Err( + PyValueError::new_err("metadata does not describe an ArrayToBytes codec").into(), + ), + } + } + + /// The codec's Zarr v3 configuration + #[getter] + fn config(&self) -> Option { + self.0 + .configuration_v3(&Default::default()) + .map(|config| config.into()) + } + + /// The codec's Zarr v3 name if it has one. + #[getter] + fn name(&self) -> Option> { + self.0.name_v3() + } +} diff --git a/src/codec/bytes_to_bytes/blosc.rs b/src/codec/bytes_to_bytes/blosc.rs index 44231d1..52aaf32 100644 --- a/src/codec/bytes_to_bytes/blosc.rs +++ b/src/codec/bytes_to_bytes/blosc.rs @@ -1,14 +1,14 @@ +use std::sync::Arc; + use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; -use pyo3::{Borrowed, FromPyObject, PyClassInitializer}; -use pythonize::depythonize; +use pyo3::{Borrowed, FromPyObject}; use zarrs::array::codec::{BloscCodec, BloscCompressionLevel, BloscCompressor, BloscShuffleMode}; +use crate::codec::PyBytesToBytesCodec; use crate::error::ZarristaResult; -pub use sealed::PyBlosc; - /// The `blosc` compressor. /// /// Extracted from a Python string: one of `"blosclz"`, `"lz4"`, `"lz4hc"`, @@ -82,69 +82,27 @@ impl FromPyObject<'_, '_> for PyBloscShuffleMode { } } -/// `PyBlosc` lives in a private module with a private `()` field, so it can only -/// be constructed via [`PyBlosc::new`], enforcing correct submodule instantiation -mod sealed { - use std::sync::Arc; - - use pyo3::prelude::*; - use pyo3::PyClassInitializer; - use zarrs::array::codec::BloscCodec; - - use crate::codec::PyBytesToBytesCodec; - - /// The `blosc` bytes-to-bytes codec. - /// - /// A subclass of `BytesToBytesCodec`, so it inherits the codec methods (e.g. - /// `encode`) while adding `blosc`-specific constructors. - // - // See https://pyo3.rs/v0.29.0/class.html#inheritance for docs on subclassing in pyo3 - #[pyclass(module = "zarrista.codec", extends = PyBytesToBytesCodec, frozen, name = "Blosc")] - pub struct PyBlosc(()); - - impl PyBlosc { - /// Wrap a [`BloscCodec`] as an initializer for the `PyBlosc` subclass: the - /// codec is stored in the [`PyBytesToBytesCodec`] base, with `PyBlosc` - /// itself carrying no extra state. - pub(super) fn new(codec: BloscCodec) -> PyClassInitializer { - PyClassInitializer::from(PyBytesToBytesCodec::new(Arc::new(codec))) - .add_subclass(PyBlosc(())) - } - } -} - -#[pymethods] -impl PyBlosc { - /// Create a `blosc` codec from its parameters. - /// - /// `typesize` is required (a positive integer) whenever `shuffle_mode` is - /// not `"noshuffle"`. The block size is chosen automatically when - /// `blocksize` is `None` or `0`. - #[new] - #[pyo3(signature = ( - cname, - clevel, - shuffle_mode, - *, - blocksize = None, - typesize = None, - ))] - fn py_new( - cname: PyBloscCompressor, - clevel: PyBloscCompressionLevel, - shuffle_mode: PyBloscShuffleMode, - blocksize: Option, - typesize: Option, - ) -> ZarristaResult> { - let codec = BloscCodec::new(cname.0, clevel.0, blocksize, shuffle_mode.0, typesize)?; - Ok(Self::new(codec)) - } - - /// Create a `blosc` codec from a configuration mapping, e.g. - /// `{"cname": "lz4", "clevel": 5, "shuffle": "shuffle", "typesize": 4, "blocksize": 0}`. - #[staticmethod] - fn from_config(config: &Bound<'_, PyAny>) -> ZarristaResult> { - let codec = BloscCodec::new_with_configuration(&depythonize(config)?)?; - Ok(Py::new(config.py(), Self::new(codec))?) - } +/// Create a `blosc` codec from its parameters. +/// +/// `typesize` is required (a positive integer) whenever `shuffle_mode` is not +/// `"noshuffle"`. The block size is chosen automatically when `blocksize` is +/// `None` or `0`. +#[pyfunction] +#[pyo3(signature = ( + cname, + clevel, + shuffle_mode, + *, + blocksize = None, + typesize = None, +))] +pub fn blosc( + cname: PyBloscCompressor, + clevel: PyBloscCompressionLevel, + shuffle_mode: PyBloscShuffleMode, + blocksize: Option, + typesize: Option, +) -> ZarristaResult { + let codec = BloscCodec::new(cname.0, clevel.0, blocksize, shuffle_mode.0, typesize)?; + Ok(PyBytesToBytesCodec::new(Arc::new(codec))) } diff --git a/src/codec/bytes_to_bytes/crc32c.rs b/src/codec/bytes_to_bytes/crc32c.rs index d1d2ab0..ea5a1e8 100644 --- a/src/codec/bytes_to_bytes/crc32c.rs +++ b/src/codec/bytes_to_bytes/crc32c.rs @@ -1,55 +1,13 @@ +use std::sync::Arc; + use pyo3::prelude::*; -use pythonize::depythonize; use zarrs::array::codec::Crc32cCodec; -use crate::error::ZarristaResult; - -pub use sealed::PyCrc32c; - -/// `PyCrc32c` lives in a private module with a private `()` field, so it can only -/// be constructed via [`PyCrc32c::new`], enforcing correct submodule instantiation -mod sealed { - use std::sync::Arc; - - use pyo3::prelude::*; - use pyo3::PyClassInitializer; - use zarrs::array::codec::Crc32cCodec; - - use crate::codec::PyBytesToBytesCodec; - - /// The `crc32c` bytes-to-bytes codec. - /// - /// A subclass of `BytesToBytesCodec`, so it inherits the codec methods (e.g. - /// `encode`) while adding `crc32c`-specific constructors. - // - // See https://pyo3.rs/v0.29.0/class.html#inheritance for docs on subclassing in pyo3 - #[pyclass(module = "zarrista.codec", extends = PyBytesToBytesCodec, frozen, name = "Crc32c")] - pub struct PyCrc32c(()); - - impl PyCrc32c { - /// Wrap a [`Crc32cCodec`] as an initializer for the `PyCrc32c` subclass: - /// the codec is stored in the [`PyBytesToBytesCodec`] base, with - /// `PyCrc32c` itself carrying no extra state. - pub(super) fn new(codec: Crc32cCodec) -> PyClassInitializer { - PyClassInitializer::from(PyBytesToBytesCodec::new(Arc::new(codec))) - .add_subclass(PyCrc32c(())) - } - } -} - -#[pymethods] -impl PyCrc32c { - /// Create a `crc32c` codec, which appends a CRC32C checksum to the encoded - /// bytestream. - #[new] - fn py_new() -> PyClassInitializer { - Self::new(Crc32cCodec::new()) - } +use crate::codec::PyBytesToBytesCodec; - /// Create a `crc32c` codec from a configuration mapping, e.g. `{}`. - #[staticmethod] - fn from_config(config: &Bound<'_, PyAny>) -> ZarristaResult> { - let codec = Crc32cCodec::new_with_configuration(&depythonize(config)?); - Ok(Py::new(config.py(), Self::new(codec))?) - } +/// Create a `crc32c` codec, which appends a CRC32C checksum to the encoded +/// bytestream. +#[pyfunction] +pub fn crc32c() -> PyBytesToBytesCodec { + PyBytesToBytesCodec::new(Arc::new(Crc32cCodec::new())) } diff --git a/src/codec/bytes_to_bytes/gzip.rs b/src/codec/bytes_to_bytes/gzip.rs index b757ca7..447ffb6 100644 --- a/src/codec/bytes_to_bytes/gzip.rs +++ b/src/codec/bytes_to_bytes/gzip.rs @@ -1,63 +1,22 @@ +use std::sync::Arc; + use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use pythonize::depythonize; use zarrs::array::codec::GzipCodec; +use crate::codec::PyBytesToBytesCodec; use crate::error::ZarristaResult; -pub use sealed::PyGzip; - -/// `PyGzip` lives in a private module with a private `()` field, so it can only -/// be constructed via [`PyGzip::new`], enforcing correct submodule instantiation -mod sealed { - use std::sync::Arc; - - use pyo3::prelude::*; - use pyo3::PyClassInitializer; - use zarrs::array::codec::GzipCodec; - - use crate::codec::PyBytesToBytesCodec; - - /// The `gzip` bytes-to-bytes codec. - /// - /// A subclass of `BytesToBytesCodec`, so it inherits the codec methods (e.g. - /// `encode`) while adding `gzip`-specific constructors. - // - // See https://pyo3.rs/v0.29.0/class.html#inheritance for docs on subclassing in pyo3 - #[pyclass(module = "zarrista.codec", extends = PyBytesToBytesCodec, frozen, name = "Gzip")] - pub struct PyGzip(()); - - impl PyGzip { - /// Wrap a [`GzipCodec`] as an initializer for the `PyGzip` subclass: the - /// codec is stored in the [`PyBytesToBytesCodec`] base, with `PyGzip` - /// itself carrying no extra state. - pub(super) fn new(codec: GzipCodec) -> PyClassInitializer { - PyClassInitializer::from(PyBytesToBytesCodec::new(Arc::new(codec))) - .add_subclass(PyGzip(())) - } - } -} - -#[pymethods] -impl PyGzip { - /// Create a `gzip` codec. - /// - /// `level` is the compression level, an integer from 0 (no compression) to - /// 9 (most compression). - #[new] - fn py_new(level: u32) -> ZarristaResult> { - let codec = GzipCodec::new(level).map_err(|_| { - PyValueError::new_err(format!( - "invalid gzip compression level {level}; must be between 0 and 9" - )) - })?; - Ok(Self::new(codec)) - } - - /// Create a `gzip` codec from a configuration mapping, e.g. `{"level": 5}`. - #[staticmethod] - fn from_config(config: &Bound<'_, PyAny>) -> ZarristaResult> { - let codec = GzipCodec::new_with_configuration(&depythonize(config)?)?; - Ok(Py::new(config.py(), Self::new(codec))?) - } +/// Create a `gzip` codec. +/// +/// `level` is the compression level, an integer from 0 (no compression) to 9 +/// (most compression). +#[pyfunction] +pub fn gzip(level: u32) -> ZarristaResult { + let codec = GzipCodec::new(level).map_err(|_| { + PyValueError::new_err(format!( + "invalid gzip compression level {level}; must be between 0 and 9" + )) + })?; + Ok(PyBytesToBytesCodec::new(Arc::new(codec))) } diff --git a/src/codec/bytes_to_bytes/mod.rs b/src/codec/bytes_to_bytes/mod.rs index 6133346..efbc2ce 100644 --- a/src/codec/bytes_to_bytes/mod.rs +++ b/src/codec/bytes_to_bytes/mod.rs @@ -1,3 +1,5 @@ +//! Bytes to bytes codecs, or "compressors". + pub(super) mod blosc; pub(super) mod crc32c; pub(super) mod gzip; @@ -6,18 +8,16 @@ pub(super) mod zstd; use std::borrow::Cow; use std::sync::Arc; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3_bytes::PyBytes; -use zarrs::array::{BytesToBytesCodecTraits, CodecOptions}; +use zarrs::array::codec::api::CodecMetadata; +use zarrs::array::{BytesToBytesCodecTraits, Codec, CodecOptions}; use crate::error::ZarristaResult; +use crate::metadata::{PyConfiguration, PyMetadataV3}; -#[pyclass( - module = "zarrista.codec", - subclass, - frozen, - name = "BytesToBytesCodec" -)] +#[pyclass(module = "zarrista.codec", frozen, name = "BytesToBytesCodec")] pub struct PyBytesToBytesCodec(Arc); impl PyBytesToBytesCodec { @@ -28,6 +28,30 @@ impl PyBytesToBytesCodec { #[pymethods] impl PyBytesToBytesCodec { + fn __repr__(&self) -> String { + format!("BytesToBytesCodec({:?})", self.0) + } + + /// Build a codec from its Zarr v3 metadata, + #[staticmethod] + fn from_config(metadata: PyMetadataV3) -> ZarristaResult { + let codec = Codec::from_metadata(CodecMetadata::V3(metadata.as_ref()))?; + match codec { + Codec::BytesToBytes(c) => Ok(Self::new(c)), + _ => { + Err(PyValueError::new_err("metadata does not describe a BytesToBytes codec").into()) + } + } + } + + /// The codec's Zarr v3 configuration + #[getter] + fn config(&self) -> Option { + self.0 + .configuration_v3(&Default::default()) + .map(|config| config.into()) + } + fn encode(&self, decoded_value: PyBytes) -> ZarristaResult { let encoded = self.0.encode( Cow::Borrowed(decoded_value.as_ref()), @@ -35,4 +59,10 @@ impl PyBytesToBytesCodec { )?; Ok(PyBytes::new(encoded.into_owned().into())) } + + /// The codec's Zarr v3 name if it has one. + #[getter] + fn name(&self) -> Option> { + self.0.name_v3() + } } diff --git a/src/codec/bytes_to_bytes/zstd.rs b/src/codec/bytes_to_bytes/zstd.rs index ac1fbcb..2dab04c 100644 --- a/src/codec/bytes_to_bytes/zstd.rs +++ b/src/codec/bytes_to_bytes/zstd.rs @@ -1,58 +1,15 @@ +use std::sync::Arc; + use pyo3::prelude::*; -use pythonize::depythonize; use zarrs::array::codec::ZstdCodec; -use crate::error::ZarristaResult; - -pub use sealed::PyZstd; - -/// `PyZstd` lives in a private module with a private `()` field, so it can only -/// be constructed via [`PyZstd::new`], enforcing correct submodule instantiation -mod sealed { - use std::sync::Arc; - - use pyo3::prelude::*; - use pyo3::PyClassInitializer; - use zarrs::array::codec::ZstdCodec; - - use crate::codec::PyBytesToBytesCodec; - - /// The `zstd` bytes-to-bytes codec. - /// - /// A subclass of `BytesToBytesCodec`, so it inherits the codec methods (e.g. - /// `encode`) while adding `zstd`-specific constructors. - // - // See https://pyo3.rs/v0.29.0/class.html#inheritance for docs on subclassing in pyo3 - #[pyclass(module = "zarrista.codec", extends = PyBytesToBytesCodec, frozen, name = "Zstd")] - pub struct PyZstd(()); - - impl PyZstd { - /// Wrap a [`ZstdCodec`] as an initializer for the `PyZstd` subclass: the - /// codec is stored in the [`PyBytesToBytesCodec`] base, with `PyZstd` - /// itself carrying no extra state. - pub(super) fn new(codec: ZstdCodec) -> PyClassInitializer { - PyClassInitializer::from(PyBytesToBytesCodec::new(Arc::new(codec))) - .add_subclass(PyZstd(())) - } - } -} - -#[pymethods] -impl PyZstd { - /// Create a `zstd` codec. - /// - /// `level` is the compression level. When `checksum` is true, a checksum is - /// written to (and verified on decode from) the encoded bytestream. - #[new] - fn py_new(level: i32, checksum: bool) -> PyClassInitializer { - Self::new(ZstdCodec::new(level, checksum)) - } +use crate::codec::PyBytesToBytesCodec; - /// Create a `zstd` codec from a configuration mapping, e.g. - /// `{"level": 5, "checksum": false}`. - #[staticmethod] - fn from_config(config: &Bound<'_, PyAny>) -> ZarristaResult> { - let codec = ZstdCodec::new_with_configuration(&depythonize(config)?)?; - Ok(Py::new(config.py(), Self::new(codec))?) - } +/// Create a `zstd` codec. +/// +/// `level` is the compression level. When `checksum` is true, a checksum is +/// written to (and verified on decode from) the encoded bytestream. +#[pyfunction] +pub fn zstd(level: i32, checksum: bool) -> PyBytesToBytesCodec { + PyBytesToBytesCodec::new(Arc::new(ZstdCodec::new(level, checksum))) } diff --git a/src/codec/codec_chain.rs b/src/codec/codec_chain.rs deleted file mode 100644 index ed9555a..0000000 --- a/src/codec/codec_chain.rs +++ /dev/null @@ -1,36 +0,0 @@ -use std::sync::Arc; - -use pyo3::prelude::*; -use zarrs::array::CodecChain; - -use crate::metadata::PyMetadataV3; - -#[pyclass(module = "zarrista.codec", frozen, name = "CodecChain")] -pub struct PyCodecChain(Arc); - -#[pymethods] -impl PyCodecChain { - #[new] - fn new(metadatas: Vec) -> Self { - let metadatas = metadatas - .into_iter() - .map(|m| m.into_inner()) - .collect::>(); - let codec_chain = CodecChain::from_metadata(&metadatas).unwrap(); - PyCodecChain(Arc::new(codec_chain)) - } - - fn create_metadatas(&self) -> Vec { - self.0 - .create_metadatas(&Default::default()) - .into_iter() - .map(|m| m.into()) - .collect() - } -} - -impl From> for PyCodecChain { - fn from(codec_chain: Arc) -> Self { - PyCodecChain(codec_chain) - } -} diff --git a/src/codec/mod.rs b/src/codec/mod.rs index 71c549e..ee424b8 100644 --- a/src/codec/mod.rs +++ b/src/codec/mod.rs @@ -1,17 +1,17 @@ mod array_to_array; +mod array_to_bytes; mod bytes_to_bytes; -mod codec_chain; mod options; use pyo3::prelude::*; pub use array_to_array::{bitround, transpose, PyArrayToArrayCodec}; -pub use bytes_to_bytes::blosc::PyBlosc; -pub use bytes_to_bytes::crc32c::PyCrc32c; -pub use bytes_to_bytes::gzip::PyGzip; -pub use bytes_to_bytes::zstd::PyZstd; +pub use array_to_bytes::PyArrayToBytesCodec; +pub use bytes_to_bytes::blosc::blosc; +pub use bytes_to_bytes::crc32c::crc32c; +pub use bytes_to_bytes::gzip::gzip; +pub use bytes_to_bytes::zstd::zstd; pub use bytes_to_bytes::PyBytesToBytesCodec; -pub use codec_chain::PyCodecChain; pub use options::PyCodecOptions; /// Build the `zarrista.codec` submodule and attach it to `parent`. @@ -24,14 +24,14 @@ pub fn register_codec_module(parent: &Bound<'_, PyModule>) -> PyResult<()> { let codec = PyModule::new(py, "codec")?; codec.add_class::()?; + codec.add_class::()?; codec.add_class::()?; - codec.add_class::()?; - codec.add_class::()?; - codec.add_class::()?; - codec.add_class::()?; - codec.add_class::()?; codec.add_function(wrap_pyfunction!(transpose, &codec)?)?; codec.add_function(wrap_pyfunction!(bitround, &codec)?)?; + codec.add_function(wrap_pyfunction!(blosc, &codec)?)?; + codec.add_function(wrap_pyfunction!(crc32c, &codec)?)?; + codec.add_function(wrap_pyfunction!(gzip, &codec)?)?; + codec.add_function(wrap_pyfunction!(zstd, &codec)?)?; py.import("sys")? .getattr("modules")? diff --git a/src/metadata.rs b/src/metadata.rs index f04a673..1534418 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -2,7 +2,7 @@ use pyo3::prelude::*; use pythonize::{depythonize, pythonize, PythonizeError}; use zarrs::metadata::v2::{ArrayMetadataV2, GroupMetadataV2, MetadataV2}; use zarrs::metadata::v3::{ArrayMetadataV3, GroupMetadataV3, MetadataV3}; -use zarrs::metadata::{ArrayMetadata, GroupMetadata}; +use zarrs::metadata::{ArrayMetadata, Configuration, GroupMetadata}; use zarrs::metadata_ext::group::consolidated_metadata::ConsolidatedMetadata; /// Generate a pythonize-compatible newtype wrapper around a zarrs metadata type. @@ -65,3 +65,4 @@ pythonized_metadata!(PyGroupMetadata, GroupMetadata); pythonized_metadata!(PyGroupMetadataV2, GroupMetadataV2); pythonized_metadata!(PyGroupMetadataV3, GroupMetadataV3); pythonized_metadata!(PyConsolidatedMetadata, ConsolidatedMetadata); +pythonized_metadata!(PyConfiguration, Configuration);