From e07b3ce03d4e6fa6d47738801fa53787567b8da5 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 23 Mar 2026 14:22:04 +0200 Subject: [PATCH 1/4] Draft LC codec --- Cargo.toml | 3 + codecs/lc/Cargo.toml | 30 ++ codecs/lc/LICENSE | 1 + codecs/lc/README.md | 38 ++ codecs/lc/src/lib.rs | 396 ++++++++++++++++++ .../numcodecs-wasm-builder/buildenv/flake.nix | 2 + crates/numcodecs-wasm-builder/src/main.rs | 3 + 7 files changed, 473 insertions(+) create mode 100644 codecs/lc/Cargo.toml create mode 120000 codecs/lc/LICENSE create mode 100644 codecs/lc/README.md create mode 100644 codecs/lc/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index feed4a121..18c5324b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ members = [ "codecs/fourier-network", "codecs/identity", "codecs/jpeg2000", + "codecs/lc", "codecs/linear-quantize", "codecs/log", "codecs/pco", @@ -64,6 +65,7 @@ numcodecs-fixed-offset-scale = { version = "0.4", path = "codecs/fixed-offset-sc numcodecs-fourier-network = { version = "0.3", path = "codecs/fourier-network", default-features = false } numcodecs-identity = { version = "0.4", path = "codecs/identity", default-features = false } numcodecs-jpeg2000 = { version = "0.3", path = "codecs/jpeg2000", default-features = false } +numcodecs-lc = { version = "0.1", path = "codecs/lc", default-features = false } numcodecs-linear-quantize = { version = "0.5", path = "codecs/linear-quantize", default-features = false } numcodecs-log = { version = "0.5", path = "codecs/log", default-features = false } numcodecs-pco = { version = "0.3", path = "codecs/pco", default-features = false } @@ -91,6 +93,7 @@ ebcc = { version = "0.1", default-features = false } format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } +lc-framework = { version = "0.1", git = "ssh://github.com/juntyr/lc-framework-rs.git", rev = "10a6e0d", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/lc/Cargo.toml b/codecs/lc/Cargo.toml new file mode 100644 index 000000000..71a95cd5a --- /dev/null +++ b/codecs/lc/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "numcodecs-lc" +version = "0.1.0" +edition = { workspace = true } +authors = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +rust-version = { workspace = true } + +description = "LC codec implementation for the numcodecs API" +readme = "README.md" +categories = ["compression", "encoding"] +keywords = ["lc", "numcodecs", "compression", "encoding"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lc-framework = { workspace = true } +ndarray = { workspace = true } +numcodecs = { workspace = true } +postcard = { workspace = true } +schemars = { workspace = true, features = ["derive", "preserve_order"] } +serde = { workspace = true, features = ["std", "derive"] } +thiserror = { workspace = true } + +[dev-dependencies] +ndarray = { workspace = true, features = ["std"] } + +[lints] +workspace = true diff --git a/codecs/lc/LICENSE b/codecs/lc/LICENSE new file mode 120000 index 000000000..30cff7403 --- /dev/null +++ b/codecs/lc/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/codecs/lc/README.md b/codecs/lc/README.md new file mode 100644 index 000000000..3511eeaf2 --- /dev/null +++ b/codecs/lc/README.md @@ -0,0 +1,38 @@ +[![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![PyPi Release]][pypi] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] [![Read the Docs]][rtdocs] + +[CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +[workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain + +[MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue +[repo]: https://github.com/juntyr/numcodecs-rs + +[Latest Version]: https://img.shields.io/crates/v/numcodecs-lc +[crates.io]: https://crates.io/crates/numcodecs-lc + +[PyPi Release]: https://img.shields.io/pypi/v/numcodecs-wasm-lc.svg +[pypi]: https://pypi.python.org/pypi/numcodecs-wasm-lc + +[Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-lc +[docs.rs]: https://docs.rs/numcodecs-lc/ + +[Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +[docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_lc + +[Read the Docs]: https://img.shields.io/readthedocs/numcodecs-wasm?label=readthedocs +[rtdocs]: https://numcodecs-wasm.readthedocs.io/en/stable/api/numcodecs_wasm_lc/ + +# numcodecs-lc + +LC codec implementation for the [`numcodecs`] API. + +[`numcodecs`]: https://docs.rs/numcodecs/0.2/numcodecs/ + +## License + +Licensed under the Mozilla Public License, Version 2.0 ([LICENSE](LICENSE) or https://www.mozilla.org/en-US/MPL/2.0/). + +## Funding + +The `numcodecs-lc` crate has been developed as part of [ESiWACE3](https://www.esiwace.eu), the third phase of the Centre of Excellence in Simulation of Weather and Climate in Europe. + +Funded by the European Union. This work has received funding from the European High Performance Computing Joint Undertaking (JU) under grant agreement No 101093054. diff --git a/codecs/lc/src/lib.rs b/codecs/lc/src/lib.rs new file mode 100644 index 000000000..8c2cfa7f4 --- /dev/null +++ b/codecs/lc/src/lib.rs @@ -0,0 +1,396 @@ +//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] +//! +//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain +//! +//! [MSRV]: https://img.shields.io/badge/MSRV-1.87.0-blue +//! [repo]: https://github.com/juntyr/numcodecs-rs +//! +//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-lc +//! [crates.io]: https://crates.io/crates/numcodecs-lc +//! +//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-lc +//! [docs.rs]: https://docs.rs/numcodecs-lc/ +//! +//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_lc +//! +//! LC codec implementation for the [`numcodecs`] API. + +#![allow(clippy::multiple_crate_versions)] // embedded-io + +use std::{borrow::Cow, ffi::CString, io}; + +use ndarray::Array1; +use numcodecs::{ + AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, + Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +type LcCodecVersion = StaticCodecVersion<0, 1, 0>; + +#[derive(Clone, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] +/// Codec providing compression using LC +pub struct LcCodec { + /// LC preprocessor + pub preprocessor: String, + /// LC components + pub components: String, + /// The codec's encoding format version. Do not provide this parameter explicitly. + #[serde(default, rename = "_version")] + pub version: LcCodecVersion, +} + +impl Codec for LcCodec { + type Error = LcCodecError; + + fn encode(&self, data: AnyCowArray) -> Result { + compress(data.view(), &self.preprocessor, &self.components) + .map(|bytes| AnyArray::U8(Array1::from_vec(bytes).into_dyn())) + } + + fn decode(&self, encoded: AnyCowArray) -> Result { + let AnyCowArray::U8(encoded) = encoded else { + return Err(LcCodecError::EncodedDataNotBytes { + dtype: encoded.dtype(), + }); + }; + + if !matches!(encoded.shape(), [_]) { + return Err(LcCodecError::EncodedDataNotOneDimensional { + shape: encoded.shape().to_vec(), + }); + } + + decompress( + &self.preprocessor, + &self.components, + &AnyCowArray::U8(encoded).as_bytes(), + ) + } + + fn decode_into( + &self, + encoded: AnyArrayView, + decoded: AnyArrayViewMut, + ) -> Result<(), Self::Error> { + let AnyArrayView::U8(encoded) = encoded else { + return Err(LcCodecError::EncodedDataNotBytes { + dtype: encoded.dtype(), + }); + }; + + if !matches!(encoded.shape(), [_]) { + return Err(LcCodecError::EncodedDataNotOneDimensional { + shape: encoded.shape().to_vec(), + }); + } + + decompress_into( + &self.preprocessor, + &self.components, + &AnyArrayView::U8(encoded).as_bytes(), + decoded, + ) + } +} + +impl StaticCodec for LcCodec { + const CODEC_ID: &'static str = "lc.rs"; + + type Config<'de> = Self; + + fn from_config(config: Self::Config<'_>) -> Self { + config + } + + fn get_config(&self) -> StaticCodecConfig<'_, Self> { + StaticCodecConfig::from(self) + } +} + +#[derive(Debug, Error)] +/// Errors that may occur when applying the [`LcCodec`]. +pub enum LcCodecError { + /// [`LcCodec`] failed to encode the header + #[error("Lc failed to encode the header")] + HeaderEncodeFailed { + /// Opaque source error + source: LcHeaderError, + }, + /// [`LcCodec`] failed to encode the encoded data + #[error("Lc failed to decode the encoded data")] + LcEncodeFailed { + /// Opaque source error + source: LcCodingError, + }, + /// [`LcCodec`] can only decode one-dimensional byte arrays but received + /// an array of a different dtype + #[error( + "Lc can only decode one-dimensional byte arrays but received an array of dtype {dtype}" + )] + EncodedDataNotBytes { + /// The unexpected dtype of the encoded array + dtype: AnyArrayDType, + }, + /// [`LcCodec`] can only decode one-dimensional byte arrays but received + /// an array of a different shape + #[error( + "Lc can only decode one-dimensional byte arrays but received a byte array of shape {shape:?}" + )] + EncodedDataNotOneDimensional { + /// The unexpected shape of the encoded array + shape: Vec, + }, + /// [`LcCodec`] failed to encode the header + #[error("Lc failed to decode the header")] + HeaderDecodeFailed { + /// Opaque source error + source: LcHeaderError, + }, + /// [`LcCodec`] decode produced a different number of bytes than expected + #[error("Lc decode produced a different number of bytes than expected")] + DecodeDataLengthMismatch, + /// [`LcCodec`] failed to decode the encoded data + #[error("Lc failed to decode the encoded data")] + LcDecodeFailed { + /// Opaque source error + source: LcCodingError, + }, + /// [`LcCodec`] cannot decode into the provided array + #[error("Lc cannot decode into the provided array")] + MismatchedDecodeIntoArray { + /// The source of the error + #[from] + source: AnyArrayAssignError, + }, +} + +#[derive(Debug, Error)] +#[error(transparent)] +/// Opaque error for when encoding or decoding the header fails +pub struct LcHeaderError(postcard::Error); + +#[derive(Debug, Error)] +#[error(transparent)] +/// Opaque error for when encoding or decoding with LC fails +pub struct LcCodingError(io::Error); + +#[expect(clippy::needless_pass_by_value)] +/// Compress the `array` using LC with the provided `preprocessor` and +/// `components`. +/// +/// # Errors +/// +/// Errors with +/// - [`LcCodecError::HeaderEncodeFailed`] if encoding the header to the +/// output bytevec failed +/// - [`LcCodecError::LcEncodeFailed`] if an opaque encoding error occurred +pub fn compress( + array: AnyArrayView, + preprocessor: &str, + components: &str, +) -> Result, LcCodecError> { + let mut encoded = postcard::to_extend( + &CompressionHeader { + dtype: array.dtype(), + shape: Cow::Borrowed(array.shape()), + version: StaticCodecVersion, + }, + Vec::new(), + ) + .map_err(|err| LcCodecError::HeaderEncodeFailed { + source: LcHeaderError(err), + })?; + + // LC does not support empty input, so skip encoding + if array.is_empty() { + return Ok(encoded); + } + + let preprocessor = CString::new(preprocessor).unwrap(); + let components = CString::new(components).unwrap(); + + encoded.append( + &mut lc_framework::compress(&preprocessor, &components, &*array.as_bytes()).map_err( + |()| LcCodecError::LcEncodeFailed { + source: LcCodingError(io::Error::other("todo")), + }, + )?, + ); + + Ok(encoded) +} + +/// Decompress the `encoded` data into an array using LC. +/// +/// # Errors +/// +/// Errors with +/// - [`LcCodecError::HeaderDecodeFailed`] if decoding the header failed +/// - [`LcCodecError::DecodeDataLengthMismatch`] if decoding produced a +/// different number of bytes than expected +/// - [`LcCodecError::LcDecodeFailed`] if an opaque decoding error occurred +pub fn decompress( + preprocessor: &str, + components: &str, + encoded: &[u8], +) -> Result { + let (header, encoded) = + postcard::take_from_bytes::(encoded).map_err(|err| { + LcCodecError::HeaderDecodeFailed { + source: LcHeaderError(err), + } + })?; + + let (decoded, result) = AnyArray::with_zeros_bytes(header.dtype, &header.shape, |decoded| { + decompress_into_bytes(preprocessor, components, encoded, decoded) + }); + + result.map(|()| decoded) +} + +/// Decompress the `encoded` data into a `decoded` array using LC. +/// +/// # Errors +/// +/// Errors with +/// - [`LcCodecError::HeaderDecodeFailed`] if decoding the header failed +/// - [`LcCodecError::MismatchedDecodeIntoArray`] if the `decoded` array is of +/// the wrong dtype or shape +/// - [`LcCodecError::HeaderDecodeFailed`] if decoding the header failed +/// - [`LcCodecError::DecodeDataLengthMismatch`] if decoding produced a +/// different number of bytes than expected +/// - [`LcCodecError::LcDecodeFailed`] if an opaque decoding error occurred +pub fn decompress_into( + preprocessor: &str, + components: &str, + encoded: &[u8], + mut decoded: AnyArrayViewMut, +) -> Result<(), LcCodecError> { + let (header, encoded) = + postcard::take_from_bytes::(encoded).map_err(|err| { + LcCodecError::HeaderDecodeFailed { + source: LcHeaderError(err), + } + })?; + + if header.dtype != decoded.dtype() { + return Err(LcCodecError::MismatchedDecodeIntoArray { + source: AnyArrayAssignError::DTypeMismatch { + src: header.dtype, + dst: decoded.dtype(), + }, + }); + } + + if header.shape != decoded.shape() { + return Err(LcCodecError::MismatchedDecodeIntoArray { + source: AnyArrayAssignError::ShapeMismatch { + src: header.shape.into_owned(), + dst: decoded.shape().to_vec(), + }, + }); + } + + decoded + .with_bytes_mut(|decoded| decompress_into_bytes(preprocessor, components, encoded, decoded)) +} + +fn decompress_into_bytes( + preprocessor: &str, + components: &str, + encoded: &[u8], + decoded: &mut [u8], +) -> Result<(), LcCodecError> { + // LC does not support empty input, so skip decoding + if decoded.is_empty() && encoded.is_empty() { + return Ok(()); + } + + let preprocessor = CString::new(preprocessor).unwrap(); + let components = CString::new(components).unwrap(); + + let dec = lc_framework::decompress(&preprocessor, &components, encoded).map_err(|()| { + LcCodecError::LcDecodeFailed { + source: LcCodingError(io::Error::other("todod")), + } + })?; + + if dec.len() != decoded.len() { + return Err(LcCodecError::DecodeDataLengthMismatch); + } + + decoded.copy_from_slice(&dec); + + Ok(()) +} + +#[derive(Serialize, Deserialize)] +struct CompressionHeader<'a> { + dtype: AnyArrayDType, + #[serde(borrow)] + shape: Cow<'a, [usize]>, + version: LcCodecVersion, +} + +#[cfg(test)] +#[allow(clippy::unwrap_used, clippy::panic)] +mod tests { + use super::*; + + #[test] + fn lossless() { + let data = ndarray::linspace(0.0, std::f32::consts::PI, 100) + .collect::>() + .into_shape_with_order((10, 10)) + .unwrap() + .cos(); + + let preprocessor = ""; + let components = "BIT_4 RLE_4"; + + let compressed = compress( + AnyArrayView::F32(data.view().into_dyn()), + preprocessor, + components, + ) + .unwrap(); + let decompressed = decompress(preprocessor, components, &compressed).unwrap(); + + assert_eq!(decompressed, AnyArray::F32(data.into_dyn())); + } + + #[test] + fn abs_error() { + let data = ndarray::linspace(0.0, std::f32::consts::PI, 100) + .collect::>() + .into_shape_with_order((10, 10)) + .unwrap() + .cos(); + + let preprocessor = "QUANT_ABS_0_f32(0.1)"; + let components = "BIT_4 RLE_4"; + + let compressed = compress( + AnyArrayView::F32(data.view().into_dyn()), + preprocessor, + components, + ) + .unwrap(); + let decompressed = decompress(preprocessor, components, &compressed).unwrap(); + + let AnyArray::F32(decompressed) = decompressed else { + panic!("unexpected decompressed dtype {}", decompressed.dtype()); + }; + assert_eq!(decompressed.shape(), data.shape()); + + for (o, d) in data.into_iter().zip(decompressed) { + assert!((o - d).abs() <= 0.1); + } + } +} diff --git a/crates/numcodecs-wasm-builder/buildenv/flake.nix b/crates/numcodecs-wasm-builder/buildenv/flake.nix index 67b4f57c1..b02efb029 100644 --- a/crates/numcodecs-wasm-builder/buildenv/flake.nix +++ b/crates/numcodecs-wasm-builder/buildenv/flake.nix @@ -66,6 +66,7 @@ pkgs.cmake pkgs.binaryen pkgs.pkg-config + pkgs.python3 ]; env = { MY_LLVM_VERSION = "${llvmVersion}"; @@ -82,6 +83,7 @@ MY_LIBCLANG_RT = "${libclang_rt}"; MY_WASM_OPT = "${pkgs.binaryen}/bin/wasm-opt"; MY_PKG_CONFIG = "${pkgs.pkg-config}/bin/pkg-config"; + MY_PYTHON3 = "${pkgs.python3}/bin/python3"; }; }; }); diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index 035120029..329d6acf0 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -201,6 +201,8 @@ struct NixEnv { libclang_rt: PathBuf, wasm_opt: PathBuf, pkg_config: PathBuf, + #[expect(dead_code)] + python3: PathBuf, } impl NixEnv { @@ -267,6 +269,7 @@ impl NixEnv { libclang_rt: try_read_env(&env, "MY_LIBCLANG_RT")?, wasm_opt: try_read_env(&env, "MY_WASM_OPT")?, pkg_config: try_read_env(&env, "MY_PKG_CONFIG")?, + python3: try_read_env(&env, "MY_PYTHON3")?, }) } } From 03c75cc92208398c6d86276fa4bf232a1c4678a2 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 24 Mar 2026 10:37:59 +0200 Subject: [PATCH 2/4] Update the LC codec --- Cargo.toml | 2 +- codecs/lc/Cargo.toml | 2 + codecs/lc/src/lib.rs | 515 +++++++++++++++++++++++++++++++--- codecs/lc/tests/config.rs | 57 ++++ codecs/lc/tests/schema.json | 540 ++++++++++++++++++++++++++++++++++++ codecs/lc/tests/schema.rs | 24 ++ 6 files changed, 1099 insertions(+), 41 deletions(-) create mode 100644 codecs/lc/tests/config.rs create mode 100644 codecs/lc/tests/schema.json create mode 100644 codecs/lc/tests/schema.rs diff --git a/Cargo.toml b/Cargo.toml index 18c5324b5..f9800287d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,7 +93,7 @@ ebcc = { version = "0.1", default-features = false } format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } -lc-framework = { version = "0.1", git = "ssh://github.com/juntyr/lc-framework-rs.git", rev = "10a6e0d", default-features = false } +lc-framework = { version = "0.1", git = "https://github.com/juntyr/lc-framework-rs.git", rev = "2ca64b6", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/lc/Cargo.toml b/codecs/lc/Cargo.toml index 71a95cd5a..dcc606f9d 100644 --- a/codecs/lc/Cargo.toml +++ b/codecs/lc/Cargo.toml @@ -21,9 +21,11 @@ numcodecs = { workspace = true } postcard = { workspace = true } schemars = { workspace = true, features = ["derive", "preserve_order"] } serde = { workspace = true, features = ["std", "derive"] } +serde_repr = { workspace = true } thiserror = { workspace = true } [dev-dependencies] +serde_json = { workspace = true, features = ["std"] } ndarray = { workspace = true, features = ["std"] } [lints] diff --git a/codecs/lc/src/lib.rs b/codecs/lc/src/lib.rs index 8c2cfa7f4..6c4ba3116 100644 --- a/codecs/lc/src/lib.rs +++ b/codecs/lc/src/lib.rs @@ -19,37 +19,435 @@ #![allow(clippy::multiple_crate_versions)] // embedded-io -use std::{borrow::Cow, ffi::CString, io}; +use std::borrow::Cow; use ndarray::Array1; use numcodecs::{ AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, }; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use schemars::{JsonSchema, JsonSchema_repr}; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_repr::{Deserialize_repr, Serialize_repr}; use thiserror::Error; +#[cfg(test)] +use ::serde_json as _; + type LcCodecVersion = StaticCodecVersion<0, 1, 0>; #[derive(Clone, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] /// Codec providing compression using LC pub struct LcCodec { - /// LC preprocessor - pub preprocessor: String, + /// LC preprocessors + #[serde(default)] + pub preprocessors: Vec, /// LC components - pub components: String, + #[serde(deserialize_with = "deserialize_components")] + #[schemars(length(min = 1, max = lc_framework::MAX_COMPONENTS))] + pub components: Vec, /// The codec's encoding format version. Do not provide this parameter explicitly. #[serde(default, rename = "_version")] pub version: LcCodecVersion, } +fn deserialize_components<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + let components = Vec::::deserialize(deserializer)?; + + if components.is_empty() { + return Err(serde::de::Error::custom("expected at least one component")); + } + + if components.len() > lc_framework::MAX_COMPONENTS { + return Err(serde::de::Error::custom(format_args!( + "expected at most {} components", + lc_framework::MAX_COMPONENTS + ))); + } + + Ok(components) +} + +#[expect(missing_docs)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] +#[serde(tag = "id")] +/// LC preprocessor +pub enum LcPreprocessor { + #[serde(rename = "NUL")] + Noop, + #[serde(rename = "LOR")] + Lorenzo1D { dtype: LcLorenzoDtype }, + #[serde(rename = "QUANT")] + QuantizeErrorBound { + dtype: LcQuantizeDType, + kind: LcErrorKind, + error_bound: f64, + threshold: Option, + decorrelation: LcDecorrelation, + }, +} + +impl LcPreprocessor { + const fn into_lc(self) -> lc_framework::Preprocessor { + match self { + Self::Noop => lc_framework::Preprocessor::Noop, + Self::Lorenzo1D { dtype } => lc_framework::Preprocessor::Lorenzo1D { + dtype: dtype.into_lc(), + }, + Self::QuantizeErrorBound { + dtype, + kind, + error_bound, + threshold, + decorrelation, + } => lc_framework::Preprocessor::QuantizeErrorBound { + dtype: dtype.into_lc(), + kind: kind.into_lc(), + error_bound, + threshold, + decorrelation: decorrelation.into_lc(), + }, + } + } +} + +#[derive( + Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, JsonSchema, +)] +/// LC error bound kind +pub enum LcErrorKind { + /// pointwise absolute error bound + #[serde(rename = "ABS")] + Abs, + /// pointwise normalised absolute / data-range-relative error bound + #[serde(rename = "NOA")] + Noa, + /// pointwise relative error bound + #[serde(rename = "REL")] + Rel, +} + +impl LcErrorKind { + const fn into_lc(self) -> lc_framework::ErrorKind { + match self { + Self::Abs => lc_framework::ErrorKind::Abs, + Self::Noa => lc_framework::ErrorKind::Noa, + Self::Rel => lc_framework::ErrorKind::Rel, + } + } +} + +#[expect(missing_docs)] +#[derive( + Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, JsonSchema, +)] +/// LC quantisation decorrelation mode +pub enum LcDecorrelation { + #[serde(rename = "0")] + Zero, + #[serde(rename = "R")] + Random, +} + +impl LcDecorrelation { + const fn into_lc(self) -> lc_framework::Decorrelation { + match self { + Self::Zero => lc_framework::Decorrelation::Zero, + Self::Random => lc_framework::Decorrelation::Random, + } + } +} + +#[expect(missing_docs)] +#[derive( + Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, JsonSchema, +)] +/// LC Lorenzo preprocessor dtype +pub enum LcLorenzoDtype { + #[serde(rename = "i32")] + I32, +} + +impl LcLorenzoDtype { + const fn into_lc(self) -> lc_framework::LorenzoDtype { + match self { + Self::I32 => lc_framework::LorenzoDtype::I32, + } + } +} + +#[expect(missing_docs)] +#[derive( + Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, JsonSchema, +)] +/// LC quantization dtype +pub enum LcQuantizeDType { + #[serde(rename = "f32")] + F32, + #[serde(rename = "f64")] + F64, +} + +impl LcQuantizeDType { + const fn into_lc(self) -> lc_framework::QuantizeDType { + match self { + Self::F32 => lc_framework::QuantizeDType::F32, + Self::F64 => lc_framework::QuantizeDType::F64, + } + } +} + +#[expect(missing_docs)] +#[derive( + Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, JsonSchema, +)] +#[serde(deny_unknown_fields)] +#[serde(tag = "id")] +/// LC component +pub enum LcComponent { + #[serde(rename = "NUL")] + Noop, + // mutators + #[serde(rename = "TCMS")] + TwosComplementToSignMagnitude { size: LcElemSize }, + #[serde(rename = "TCNB")] + TwosComplementToNegaBinary { size: LcElemSize }, + #[serde(rename = "DBEFS")] + DebiasedExponentFractionSign { size: LcFloatSize }, + #[serde(rename = "DBESF")] + DebiasedExponentSignFraction { size: LcFloatSize }, + // shufflers + #[serde(rename = "BIT")] + BitShuffle { size: LcElemSize }, + #[serde(rename = "TUPL")] + Tuple { size: LcTupleSize }, + // predictors + #[serde(rename = "DIFF")] + Delta { size: LcElemSize }, + #[serde(rename = "DIFFMS")] + DeltaAsSignMagnitude { size: LcElemSize }, + #[serde(rename = "DIFFNB")] + DeltaAsNegaBinary { size: LcElemSize }, + // reducers + #[serde(rename = "CLOG")] + Clog { size: LcElemSize }, + #[serde(rename = "HCLOG")] + HClog { size: LcElemSize }, + #[serde(rename = "RARE")] + Rare { size: LcElemSize }, + #[serde(rename = "RAZE")] + Raze { size: LcElemSize }, + #[serde(rename = "RLE")] + RunLengthEncoding { size: LcElemSize }, + #[serde(rename = "RRE")] + RepetitionRunBitmapEncoding { size: LcElemSize }, + #[serde(rename = "RZE")] + ZeroRunBitmapEncoding { size: LcElemSize }, +} + +impl LcComponent { + const fn into_lc(self) -> lc_framework::Component { + match self { + Self::Noop => lc_framework::Component::Noop, + // mutators + Self::TwosComplementToSignMagnitude { size } => { + lc_framework::Component::TwosComplementToSignMagnitude { + size: size.into_lc(), + } + } + Self::TwosComplementToNegaBinary { size } => { + lc_framework::Component::TwosComplementToNegaBinary { + size: size.into_lc(), + } + } + Self::DebiasedExponentFractionSign { size } => { + lc_framework::Component::DebiasedExponentFractionSign { + size: size.into_lc(), + } + } + Self::DebiasedExponentSignFraction { size } => { + lc_framework::Component::DebiasedExponentSignFraction { + size: size.into_lc(), + } + } + // shufflers + Self::BitShuffle { size } => lc_framework::Component::BitShuffle { + size: size.into_lc(), + }, + Self::Tuple { size } => lc_framework::Component::Tuple { + size: size.into_lc(), + }, + // predictors + Self::Delta { size } => lc_framework::Component::Delta { + size: size.into_lc(), + }, + Self::DeltaAsSignMagnitude { size } => lc_framework::Component::DeltaAsSignMagnitude { + size: size.into_lc(), + }, + Self::DeltaAsNegaBinary { size } => lc_framework::Component::DeltaAsNegaBinary { + size: size.into_lc(), + }, + // reducers + Self::Clog { size } => lc_framework::Component::Clog { + size: size.into_lc(), + }, + Self::HClog { size } => lc_framework::Component::HClog { + size: size.into_lc(), + }, + Self::Rare { size } => lc_framework::Component::Rare { + size: size.into_lc(), + }, + Self::Raze { size } => lc_framework::Component::Raze { + size: size.into_lc(), + }, + Self::RunLengthEncoding { size } => lc_framework::Component::RunLengthEncoding { + size: size.into_lc(), + }, + Self::RepetitionRunBitmapEncoding { size } => { + lc_framework::Component::RepetitionRunBitmapEncoding { + size: size.into_lc(), + } + } + Self::ZeroRunBitmapEncoding { size } => { + lc_framework::Component::ZeroRunBitmapEncoding { + size: size.into_lc(), + } + } + } + } +} + +#[expect(missing_docs)] +#[derive( + Copy, + Clone, + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Serialize_repr, + Deserialize_repr, + JsonSchema_repr, +)] +/// LC component element size, in bytes +#[repr(u8)] +pub enum LcElemSize { + S1 = 1, + S2 = 2, + S4 = 4, + S8 = 8, +} + +impl LcElemSize { + const fn into_lc(self) -> lc_framework::ElemSize { + match self { + Self::S1 => lc_framework::ElemSize::S1, + Self::S2 => lc_framework::ElemSize::S2, + Self::S4 => lc_framework::ElemSize::S4, + Self::S8 => lc_framework::ElemSize::S8, + } + } +} + +#[expect(missing_docs)] +#[derive( + Copy, + Clone, + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Serialize_repr, + Deserialize_repr, + JsonSchema_repr, +)] +/// LC component float element size, in bytes +#[repr(u8)] +pub enum LcFloatSize { + S4 = 4, + S8 = 8, +} + +impl LcFloatSize { + const fn into_lc(self) -> lc_framework::FloatSize { + match self { + Self::S4 => lc_framework::FloatSize::S4, + Self::S8 => lc_framework::FloatSize::S8, + } + } +} + +#[expect(missing_docs)] +#[derive( + Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, JsonSchema, +)] +/// LC tuple component element size, in bytes x tuple length +#[schemars(description = "LC tuple component element size, in tuple length _ bytes")] +pub enum LcTupleSize { + #[serde(rename = "2_1")] + S1x2, + #[serde(rename = "3_1")] + S1x3, + #[serde(rename = "4_1")] + S1x4, + #[serde(rename = "6_1")] + S1x6, + #[serde(rename = "8_1")] + S1x8, + #[serde(rename = "12_1")] + S1x12, + #[serde(rename = "2_2")] + S2x2, + #[serde(rename = "3_2")] + S2x3, + #[serde(rename = "4_2")] + S2x4, + #[serde(rename = "6_2")] + S2x6, + #[serde(rename = "2_4")] + S4x2, + #[serde(rename = "6_4")] + S4x6, + #[serde(rename = "3_8")] + S8x3, + #[serde(rename = "6_8")] + S8x6, +} + +impl LcTupleSize { + const fn into_lc(self) -> lc_framework::TupleSize { + match self { + Self::S1x2 => lc_framework::TupleSize::S1x2, + Self::S1x3 => lc_framework::TupleSize::S1x3, + Self::S1x4 => lc_framework::TupleSize::S1x4, + Self::S1x6 => lc_framework::TupleSize::S1x6, + Self::S1x8 => lc_framework::TupleSize::S1x8, + Self::S1x12 => lc_framework::TupleSize::S1x12, + Self::S2x2 => lc_framework::TupleSize::S2x2, + Self::S2x3 => lc_framework::TupleSize::S2x3, + Self::S2x4 => lc_framework::TupleSize::S2x4, + Self::S2x6 => lc_framework::TupleSize::S2x6, + Self::S4x2 => lc_framework::TupleSize::S4x2, + Self::S4x6 => lc_framework::TupleSize::S4x6, + Self::S8x3 => lc_framework::TupleSize::S8x3, + Self::S8x6 => lc_framework::TupleSize::S8x6, + } + } +} + impl Codec for LcCodec { type Error = LcCodecError; fn encode(&self, data: AnyCowArray) -> Result { - compress(data.view(), &self.preprocessor, &self.components) + compress(data.view(), &self.preprocessors, &self.components) .map(|bytes| AnyArray::U8(Array1::from_vec(bytes).into_dyn())) } @@ -67,7 +465,7 @@ impl Codec for LcCodec { } decompress( - &self.preprocessor, + &self.preprocessors, &self.components, &AnyCowArray::U8(encoded).as_bytes(), ) @@ -91,7 +489,7 @@ impl Codec for LcCodec { } decompress_into( - &self.preprocessor, + &self.preprocessors, &self.components, &AnyArrayView::U8(encoded).as_bytes(), decoded, @@ -178,10 +576,10 @@ pub struct LcHeaderError(postcard::Error); #[derive(Debug, Error)] #[error(transparent)] /// Opaque error for when encoding or decoding with LC fails -pub struct LcCodingError(io::Error); +pub struct LcCodingError(lc_framework::Error); #[expect(clippy::needless_pass_by_value)] -/// Compress the `array` using LC with the provided `preprocessor` and +/// Compress the `array` using LC with the provided `preprocessors` and /// `components`. /// /// # Errors @@ -192,8 +590,8 @@ pub struct LcCodingError(io::Error); /// - [`LcCodecError::LcEncodeFailed`] if an opaque encoding error occurred pub fn compress( array: AnyArrayView, - preprocessor: &str, - components: &str, + preprocessors: &[LcPreprocessor], + components: &[LcComponent], ) -> Result, LcCodecError> { let mut encoded = postcard::to_extend( &CompressionHeader { @@ -212,13 +610,21 @@ pub fn compress( return Ok(encoded); } - let preprocessor = CString::new(preprocessor).unwrap(); - let components = CString::new(components).unwrap(); + let preprocessors = preprocessors + .iter() + .cloned() + .map(LcPreprocessor::into_lc) + .collect::>(); + let components = components + .iter() + .copied() + .map(LcComponent::into_lc) + .collect::>(); encoded.append( - &mut lc_framework::compress(&preprocessor, &components, &*array.as_bytes()).map_err( - |()| LcCodecError::LcEncodeFailed { - source: LcCodingError(io::Error::other("todo")), + &mut lc_framework::compress(&preprocessors, &components, &array.as_bytes()).map_err( + |err| LcCodecError::LcEncodeFailed { + source: LcCodingError(err), }, )?, ); @@ -236,8 +642,8 @@ pub fn compress( /// different number of bytes than expected /// - [`LcCodecError::LcDecodeFailed`] if an opaque decoding error occurred pub fn decompress( - preprocessor: &str, - components: &str, + preprocessors: &[LcPreprocessor], + components: &[LcComponent], encoded: &[u8], ) -> Result { let (header, encoded) = @@ -248,7 +654,7 @@ pub fn decompress( })?; let (decoded, result) = AnyArray::with_zeros_bytes(header.dtype, &header.shape, |decoded| { - decompress_into_bytes(preprocessor, components, encoded, decoded) + decompress_into_bytes(preprocessors, components, encoded, decoded) }); result.map(|()| decoded) @@ -267,8 +673,8 @@ pub fn decompress( /// different number of bytes than expected /// - [`LcCodecError::LcDecodeFailed`] if an opaque decoding error occurred pub fn decompress_into( - preprocessor: &str, - components: &str, + preprocessors: &[LcPreprocessor], + components: &[LcComponent], encoded: &[u8], mut decoded: AnyArrayViewMut, ) -> Result<(), LcCodecError> { @@ -297,13 +703,14 @@ pub fn decompress_into( }); } - decoded - .with_bytes_mut(|decoded| decompress_into_bytes(preprocessor, components, encoded, decoded)) + decoded.with_bytes_mut(|decoded| { + decompress_into_bytes(preprocessors, components, encoded, decoded) + }) } fn decompress_into_bytes( - preprocessor: &str, - components: &str, + preprocessors: &[LcPreprocessor], + components: &[LcComponent], encoded: &[u8], decoded: &mut [u8], ) -> Result<(), LcCodecError> { @@ -312,12 +719,20 @@ fn decompress_into_bytes( return Ok(()); } - let preprocessor = CString::new(preprocessor).unwrap(); - let components = CString::new(components).unwrap(); - - let dec = lc_framework::decompress(&preprocessor, &components, encoded).map_err(|()| { + let preprocessors = preprocessors + .iter() + .cloned() + .map(LcPreprocessor::into_lc) + .collect::>(); + let components = components + .iter() + .copied() + .map(LcComponent::into_lc) + .collect::>(); + + let dec = lc_framework::decompress(&preprocessors, &components, encoded).map_err(|err| { LcCodecError::LcDecodeFailed { - source: LcCodingError(io::Error::other("todod")), + source: LcCodingError(err), } })?; @@ -351,16 +766,23 @@ mod tests { .unwrap() .cos(); - let preprocessor = ""; - let components = "BIT_4 RLE_4"; + let preprocessors = &[]; + let components = &[ + LcComponent::BitShuffle { + size: LcElemSize::S4, + }, + LcComponent::RunLengthEncoding { + size: LcElemSize::S4, + }, + ]; let compressed = compress( AnyArrayView::F32(data.view().into_dyn()), - preprocessor, + preprocessors, components, ) .unwrap(); - let decompressed = decompress(preprocessor, components, &compressed).unwrap(); + let decompressed = decompress(preprocessors, components, &compressed).unwrap(); assert_eq!(decompressed, AnyArray::F32(data.into_dyn())); } @@ -373,16 +795,29 @@ mod tests { .unwrap() .cos(); - let preprocessor = "QUANT_ABS_0_f32(0.1)"; - let components = "BIT_4 RLE_4"; + let preprocessors = &[LcPreprocessor::QuantizeErrorBound { + dtype: LcQuantizeDType::F32, + kind: LcErrorKind::Abs, + error_bound: 0.1, + threshold: None, + decorrelation: LcDecorrelation::Zero, + }]; + let components = &[ + LcComponent::BitShuffle { + size: LcElemSize::S4, + }, + LcComponent::RunLengthEncoding { + size: LcElemSize::S4, + }, + ]; let compressed = compress( AnyArrayView::F32(data.view().into_dyn()), - preprocessor, + preprocessors, components, ) .unwrap(); - let decompressed = decompress(preprocessor, components, &compressed).unwrap(); + let decompressed = decompress(preprocessors, components, &compressed).unwrap(); let AnyArray::F32(decompressed) = decompressed else { panic!("unexpected decompressed dtype {}", decompressed.dtype()); diff --git a/codecs/lc/tests/config.rs b/codecs/lc/tests/config.rs new file mode 100644 index 000000000..fa419d27a --- /dev/null +++ b/codecs/lc/tests/config.rs @@ -0,0 +1,57 @@ +#![expect(missing_docs)] +#![expect(clippy::unwrap_used)] + +use ::{ + lc_framework as _, ndarray as _, postcard as _, schemars as _, serde_repr as _, thiserror as _, +}; + +use numcodecs::StaticCodec; +use numcodecs_lc::LcCodec; +use serde::Deserialize; +use serde_json::json; + +#[test] +#[should_panic(expected = "missing field `components`")] +fn empty_config() { + let _ = LcCodec::from_config(Deserialize::deserialize(json!({})).unwrap()); +} + +#[test] +#[should_panic(expected = "expected at least one component")] +fn no_components() { + let _ = LcCodec::from_config( + Deserialize::deserialize(json!({ + "components": [] + })) + .unwrap(), + ); +} + +#[test] +#[should_panic(expected = "expected at most 8 components")] +fn too_many_components() { + let _ = LcCodec::from_config( + Deserialize::deserialize(json!({ + "components": [ + { "id": "NUL" }, { "id": "NUL" }, { "id": "NUL" }, { "id": "NUL" }, + { "id": "NUL" }, { "id": "NUL" }, { "id": "NUL" }, { "id": "NUL" }, + { "id": "NUL" }, { "id": "NUL" }, { "id": "NUL" }, { "id": "NUL" }, + ], + })) + .unwrap(), + ); +} + +#[test] +fn with_preprocessors() { + let _ = LcCodec::from_config(Deserialize::deserialize(json!({ + "preprocessors": [ + { "id": "QUANT", "dtype": "f32", "kind": "REL", "error_bound": 0.1, "decorrelation": "R" }, + { "id": "LOR", "dtype": "i32" }, + ], + "components": [ + { "id": "BIT", "size": 4 }, + { "id": "RLE", "size": 4 }, + ], + })).unwrap()); +} diff --git a/codecs/lc/tests/schema.json b/codecs/lc/tests/schema.json new file mode 100644 index 000000000..3de092cbf --- /dev/null +++ b/codecs/lc/tests/schema.json @@ -0,0 +1,540 @@ +{ + "type": "object", + "additionalProperties": false, + "properties": { + "preprocessors": { + "type": "array", + "items": { + "description": "LC preprocessor", + "oneOf": [ + { + "type": "object", + "properties": { + "id": { + "type": "string", + "const": "NUL" + } + }, + "required": [ + "id" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "dtype": { + "description": "LC Lorenzo preprocessor dtype", + "type": "string", + "enum": [ + "i32" + ] + }, + "id": { + "type": "string", + "const": "LOR" + } + }, + "additionalProperties": false, + "required": [ + "id", + "dtype" + ] + }, + { + "type": "object", + "properties": { + "dtype": { + "description": "LC quantization dtype", + "type": "string", + "enum": [ + "f32", + "f64" + ] + }, + "kind": { + "description": "LC error bound kind", + "oneOf": [ + { + "description": "pointwise absolute error bound", + "type": "string", + "const": "ABS" + }, + { + "description": "pointwise normalised absolute / data-range-relative error bound", + "type": "string", + "const": "NOA" + }, + { + "description": "pointwise relative error bound", + "type": "string", + "const": "REL" + } + ] + }, + "error_bound": { + "type": "number", + "format": "double" + }, + "threshold": { + "type": [ + "number", + "null" + ], + "format": "double" + }, + "decorrelation": { + "description": "LC quantisation decorrelation mode", + "type": "string", + "enum": [ + "0", + "R" + ] + }, + "id": { + "type": "string", + "const": "QUANT" + } + }, + "additionalProperties": false, + "required": [ + "id", + "dtype", + "kind", + "error_bound", + "decorrelation" + ] + } + ] + }, + "description": "LC preprocessors", + "default": [] + }, + "components": { + "type": "array", + "items": { + "description": "LC component", + "oneOf": [ + { + "type": "object", + "properties": { + "id": { + "type": "string", + "const": "NUL" + } + }, + "required": [ + "id" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "TCMS" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "TCNB" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component float element size, in bytes", + "type": "integer", + "enum": [ + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "DBEFS" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component float element size, in bytes", + "type": "integer", + "enum": [ + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "DBESF" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "BIT" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC tuple component element size, in tuple length _ bytes", + "type": "string", + "enum": [ + "2_1", + "3_1", + "4_1", + "6_1", + "8_1", + "12_1", + "2_2", + "3_2", + "4_2", + "6_2", + "2_4", + "6_4", + "3_8", + "6_8" + ] + }, + "id": { + "type": "string", + "const": "TUPL" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "DIFF" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "DIFFMS" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "DIFFNB" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "CLOG" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "HCLOG" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "RARE" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "RAZE" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "RLE" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "RRE" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + }, + { + "type": "object", + "properties": { + "size": { + "description": "LC component element size, in bytes", + "type": "integer", + "enum": [ + 1, + 2, + 4, + 8 + ] + }, + "id": { + "type": "string", + "const": "RZE" + } + }, + "additionalProperties": false, + "required": [ + "id", + "size" + ] + } + ] + }, + "description": "LC components", + "minItems": 1, + "maxItems": 8 + }, + "_version": { + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$", + "description": "The codec's encoding format version. Do not provide this parameter explicitly.", + "default": "0.1.0" + } + }, + "required": [ + "components" + ], + "description": "Codec providing compression using LC", + "title": "LcCodec", + "$schema": "https://json-schema.org/draft/2020-12/schema" +} \ No newline at end of file diff --git a/codecs/lc/tests/schema.rs b/codecs/lc/tests/schema.rs new file mode 100644 index 000000000..db1bee299 --- /dev/null +++ b/codecs/lc/tests/schema.rs @@ -0,0 +1,24 @@ +#![expect(missing_docs)] + +use ::{ + lc_framework as _, ndarray as _, postcard as _, schemars as _, serde as _, serde_json as _, + serde_repr as _, thiserror as _, +}; + +use numcodecs::{DynCodecType, StaticCodecType}; +use numcodecs_lc::LcCodec; + +#[test] +fn schema() { + let schema = format!( + "{:#}", + StaticCodecType::::of() + .codec_config_schema() + .to_value() + ); + + #[expect(clippy::manual_assert, clippy::panic)] + if schema != include_str!("schema.json") { + panic!("Lc schema has changed\n===\n{schema}\n==="); + } +} From cfd5c919d18e87b8fcb388f9775c1b9c3ff4ca50 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 24 Mar 2026 10:57:53 +0200 Subject: [PATCH 3/4] Don't check the lc codec in wasm CI --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3b909a7e9..3fb965be2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,6 +92,7 @@ jobs: --exclude numcodecs-python \ --exclude numcodecs-ebcc \ --exclude numcodecs-jpeg2000 \ + --exclude numcodecs-lc \ --exclude numcodecs-qpet-sperr \ --exclude numcodecs-sperr \ --exclude numcodecs-sz3 \ @@ -227,6 +228,7 @@ jobs: --exclude numcodecs-python \ --exclude numcodecs-ebcc \ --exclude numcodecs-jpeg2000 \ + --exclude numcodecs-lc \ --exclude numcodecs-qpet-sperr \ --exclude numcodecs-sperr \ --exclude numcodecs-sz3 \ @@ -245,6 +247,7 @@ jobs: --exclude numcodecs-python \ --exclude numcodecs-ebcc \ --exclude numcodecs-jpeg2000 \ + --exclude numcodecs-lc \ --exclude numcodecs-qpet-sperr \ --exclude numcodecs-sperr \ --exclude numcodecs-sz3 \ From c725d9c1a5d58455eb7be926a3da55460d1a5f21 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 27 Mar 2026 06:24:20 +0200 Subject: [PATCH 4/4] Switch to published lc-framework-rs --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f9800287d..1608d699c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,7 +93,7 @@ ebcc = { version = "0.1", default-features = false } format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } -lc-framework = { version = "0.1", git = "https://github.com/juntyr/lc-framework-rs.git", rev = "2ca64b6", default-features = false } +lc-framework = { version = "0.1", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy