From 60964d0e93579fc79cb0af18ef6b2d8f84a5af6e Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Thu, 24 Apr 2025 18:24:12 +0000 Subject: [PATCH 01/43] Start work on a libpressio codec wrapper --- Cargo.toml | 2 + codecs/pressio/Cargo.toml | 27 +++++++++++++ codecs/pressio/LICENSE | 1 + codecs/pressio/README.md | 38 ++++++++++++++++++ codecs/pressio/src/lib.rs | 82 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 150 insertions(+) create mode 100644 codecs/pressio/Cargo.toml create mode 120000 codecs/pressio/LICENSE create mode 100644 codecs/pressio/README.md create mode 100644 codecs/pressio/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 1608d699c..b329464b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ members = [ "codecs/linear-quantize", "codecs/log", "codecs/pco", + "codecs/pressio", "codecs/qpet-sperr", "codecs/random-projection", "codecs/reinterpret", @@ -69,6 +70,7 @@ numcodecs-lc = { version = "0.1", path = "codecs/lc", default-features = false } numcodecs-linear-quantize = { version = "0.5", path = "codecs/linear-quantize", default-features = false } numcodecs-log = { version = "0.5", path = "codecs/log", default-features = false } numcodecs-pco = { version = "0.3", path = "codecs/pco", default-features = false } +numcodecs-pressio = { version = "0.1", path = "codecs/pressio", default-features = false } numcodecs-qpet-sperr = { version = "0.2.2", path = "codecs/qpet-sperr", default-features = false } numcodecs-random-projection = { version = "0.4", path = "codecs/random-projection", default-features = false } numcodecs-reinterpret = { version = "0.4", path = "codecs/reinterpret", default-features = false } diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml new file mode 100644 index 000000000..2a8830c3f --- /dev/null +++ b/codecs/pressio/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "numcodecs-pressio" +version = "0.1.0" +edition = { workspace = true } +authors = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +rust-version = { workspace = true } + +description = "libpressio codec wrapper for the numcodecs API" +readme = "README.md" +categories = ["compression", "encoding"] +keywords = ["libpressio", "numcodecs", "compression", "encoding"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +numcodecs = { workspace = true } +schemars = { workspace = true, features = ["derive", "preserve_order"] } +serde = { workspace = true, features = ["std", "derive"] } +thiserror = { workspace = true } + +# FIXME: move into workspace dependencies +libpressio = { git = "https://github.com/juntyr/libpressio-rs.git", rev = "ddceba6" } + +[lints] +workspace = true diff --git a/codecs/pressio/LICENSE b/codecs/pressio/LICENSE new file mode 120000 index 000000000..30cff7403 --- /dev/null +++ b/codecs/pressio/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/codecs/pressio/README.md b/codecs/pressio/README.md new file mode 100644 index 000000000..2a211f673 --- /dev/null +++ b/codecs/pressio/README.md @@ -0,0 +1,38 @@ +[![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![PyPi Release]][pypi] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] [![Read the Docs]][rtdocs] + +[CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +[workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain + +[MSRV]: https://img.shields.io/badge/MSRV-1.85.0-blue +[repo]: https://github.com/juntyr/numcodecs-rs + +[Latest Version]: https://img.shields.io/crates/v/numcodecs-pressio +[crates.io]: https://crates.io/crates/numcodecs-pressio + +[PyPi Release]: https://img.shields.io/pypi/v/numcodecs-wasm-pressio.svg +[pypi]: https://pypi.python.org/pypi/numcodecs-wasm-pressio + +[Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-pressio +[docs.rs]: https://docs.rs/numcodecs-pressio/ + +[Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +[docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_pressio + +[Read the Docs]: https://img.shields.io/readthedocs/numcodecs-wasm?label=readthedocs +[rtdocs]: https://numcodecs-wasm.readthedocs.io/en/stable/api/numcodecs_wasm_pressio/ + +# numcodecs-pressio + +libpressio codec wrapper for the [`numcodecs`] API. + +[`numcodecs`]: https://docs.rs/numcodecs/0.2/numcodecs/ + +## License + +Licensed under the Mozilla Public License, Version 2.0 ([LICENSE](LICENSE) or https://www.mozilla.org/en-US/MPL/2.0/). + +## Funding + +The `numcodecs-pressio` crate has been developed as part of [ESiWACE3](https://www.esiwace.eu), the third phase of the Centre of Excellence in Simulation of Weather and Climate in Europe. + +Funded by the European Union. This work has received funding from the European High Performance Computing Joint Undertaking (JU) under grant agreement No 101093054. diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs new file mode 100644 index 000000000..cfe500cb9 --- /dev/null +++ b/codecs/pressio/src/lib.rs @@ -0,0 +1,82 @@ +//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs] +//! +//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main +//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain +//! +//! [MSRV]: https://img.shields.io/badge/MSRV-1.85.0-blue +//! [repo]: https://github.com/juntyr/numcodecs-rs +//! +//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-pressio +//! [crates.io]: https://crates.io/crates/numcodecs-pressio +//! +//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-pressio +//! [docs.rs]: https://docs.rs/numcodecs-pressio/ +//! +//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue +//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs_pressio +//! +//! libpressio codec wrapper for the [`numcodecs`] API. + +use numcodecs::{ + AnyArray, AnyArrayAssignError, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, + StaticCodecConfig, StaticCodecVersion, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Clone, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] +/// Identity codec which applies the identity function, i.e. passes through the +/// input unchanged during encoding and decoding. +pub struct IdentityCodec { + /// The codec's encoding format version. Do not provide this parameter explicitly. + #[serde(default, rename = "_version")] + pub version: StaticCodecVersion<1, 0, 0>, +} + +impl Codec for IdentityCodec { + type Error = IdentityCodecError; + + fn encode(&self, data: AnyCowArray) -> Result { + Ok(data.into_owned()) + } + + fn decode(&self, encoded: AnyCowArray) -> Result { + Ok(encoded.into_owned()) + } + + fn decode_into( + &self, + encoded: AnyArrayView, + mut decoded: AnyArrayViewMut, + ) -> Result<(), Self::Error> { + Ok(decoded.assign(&encoded)?) + } +} + +impl StaticCodec for IdentityCodec { + const CODEC_ID: &'static str = "identity.rs"; + + type Config<'de> = Self; + + fn from_config(config: Self::Config<'_>) -> Self { + config + } + + fn get_config(&self) -> StaticCodecConfig { + StaticCodecConfig::from(self) + } +} + +#[derive(Debug, Error)] +/// Errors that may occur when applying the [`IdentityCodec`]. +pub enum IdentityCodecError { + /// [`IdentityCodec`] cannot decode into the provided array + #[error("Identity cannot decode into the provided array")] + MismatchedDecodeIntoArray { + /// The source of the error + #[from] + source: AnyArrayAssignError, + }, +} From d8db7fa8cb485680e6a222074c677b873ba8446b Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Fri, 25 Apr 2025 07:50:21 +0000 Subject: [PATCH 02/43] Try with some libpressio fixes --- codecs/pressio/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index 2a8830c3f..bec78ed0a 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -21,7 +21,7 @@ serde = { workspace = true, features = ["std", "derive"] } thiserror = { workspace = true } # FIXME: move into workspace dependencies -libpressio = { git = "https://github.com/juntyr/libpressio-rs.git", rev = "ddceba6" } +libpressio = { git = "https://github.com/juntyr/libpressio-rs.git", rev = "82afbea" } [lints] workspace = true From 027d78d31464c17bbe06d8b296dcecf0d21204bf Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Fri, 25 Apr 2025 08:10:37 +0000 Subject: [PATCH 03/43] Skip numcodecs-pressio in non-builder WASM CI --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3fb965be2..195cdbb63 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,6 +93,7 @@ jobs: --exclude numcodecs-ebcc \ --exclude numcodecs-jpeg2000 \ --exclude numcodecs-lc \ + --exclude numcodecs-pressio \ --exclude numcodecs-qpet-sperr \ --exclude numcodecs-sperr \ --exclude numcodecs-sz3 \ @@ -229,6 +230,7 @@ jobs: --exclude numcodecs-ebcc \ --exclude numcodecs-jpeg2000 \ --exclude numcodecs-lc \ + --exclude numcodecs-pressio \ --exclude numcodecs-qpet-sperr \ --exclude numcodecs-sperr \ --exclude numcodecs-sz3 \ @@ -248,6 +250,7 @@ jobs: --exclude numcodecs-ebcc \ --exclude numcodecs-jpeg2000 \ --exclude numcodecs-lc \ + --exclude numcodecs-pressio \ --exclude numcodecs-qpet-sperr \ --exclude numcodecs-sperr \ --exclude numcodecs-sz3 \ From c3803b3d5c3c1a722d16eed107499a1b61b78f57 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 25 Apr 2025 12:12:29 +0300 Subject: [PATCH 04/43] Update libpressio rev --- codecs/pressio/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index bec78ed0a..b3b7233ef 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -21,7 +21,7 @@ serde = { workspace = true, features = ["std", "derive"] } thiserror = { workspace = true } # FIXME: move into workspace dependencies -libpressio = { git = "https://github.com/juntyr/libpressio-rs.git", rev = "82afbea" } +libpressio = { git = "https://github.com/juntyr/libpressio-rs.git", rev = "6d25c06", default-features = false } [lints] workspace = true From f4f12f091dbc547d84b091b3d1304ca90ad13c6a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 25 Apr 2025 12:29:15 +0300 Subject: [PATCH 05/43] Fix clippy lints --- codecs/pressio/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index cfe500cb9..4026c773d 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -17,6 +17,7 @@ //! //! libpressio codec wrapper for the [`numcodecs`] API. +use ::libpressio as _; use numcodecs::{ AnyArray, AnyArrayAssignError, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, From 878f3513d92bdeb67d362b4ef7d750267dbd828c Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 12 Feb 2026 10:56:19 +0200 Subject: [PATCH 06/43] Some WASM hacks --- Cargo.toml | 1 + codecs/pressio/Cargo.toml | 4 +--- codecs/pressio/src/lib.rs | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b329464b0..1213efb37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,6 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "cad5bcc", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index b3b7233ef..d4cc6179a 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -15,13 +15,11 @@ keywords = ["libpressio", "numcodecs", "compression", "encoding"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +libpressio = { workspace = true } numcodecs = { workspace = true } schemars = { workspace = true, features = ["derive", "preserve_order"] } serde = { workspace = true, features = ["std", "derive"] } thiserror = { workspace = true } -# FIXME: move into workspace dependencies -libpressio = { git = "https://github.com/juntyr/libpressio-rs.git", rev = "6d25c06", default-features = false } - [lints] workspace = true diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 4026c773d..814f054be 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -28,16 +28,16 @@ use thiserror::Error; #[derive(Clone, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] -/// Identity codec which applies the identity function, i.e. passes through the +/// Pressio codec which applies the identity function, i.e. passes through the /// input unchanged during encoding and decoding. -pub struct IdentityCodec { +pub struct PressioCodec { /// The codec's encoding format version. Do not provide this parameter explicitly. #[serde(default, rename = "_version")] pub version: StaticCodecVersion<1, 0, 0>, } -impl Codec for IdentityCodec { - type Error = IdentityCodecError; +impl Codec for PressioCodec { + type Error = PressioCodecError; fn encode(&self, data: AnyCowArray) -> Result { Ok(data.into_owned()) @@ -56,8 +56,8 @@ impl Codec for IdentityCodec { } } -impl StaticCodec for IdentityCodec { - const CODEC_ID: &'static str = "identity.rs"; +impl StaticCodec for PressioCodec { + const CODEC_ID: &'static str = "pressio.rs"; type Config<'de> = Self; @@ -65,16 +65,16 @@ impl StaticCodec for IdentityCodec { config } - fn get_config(&self) -> StaticCodecConfig { + fn get_config(&self) -> StaticCodecConfig<'_, Self> { StaticCodecConfig::from(self) } } #[derive(Debug, Error)] -/// Errors that may occur when applying the [`IdentityCodec`]. -pub enum IdentityCodecError { - /// [`IdentityCodec`] cannot decode into the provided array - #[error("Identity cannot decode into the provided array")] +/// Errors that may occur when applying the [`PressioCodec`]. +pub enum PressioCodecError { + /// [`PressioCodec`] cannot decode into the provided array + #[error("Pressio cannot decode into the provided array")] MismatchedDecodeIntoArray { /// The source of the error #[from] From 96bf7f86dcd5a21375917770a5734818677cbbf4 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 12 Feb 2026 11:03:24 +0200 Subject: [PATCH 07/43] Clean up libpressio dependencies --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1213efb37..fd9773327 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "cad5bcc", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "3a00062", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From be6b2f76aff19a85e7d217f57921da6d28db7158 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 12 Feb 2026 12:25:26 +0200 Subject: [PATCH 08/43] Some experimentation to produce more link errors --- codecs/pressio/src/lib.rs | 183 ++++++++++++++++++++++++++++++++++---- 1 file changed, 164 insertions(+), 19 deletions(-) diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 814f054be..d35129594 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -17,42 +17,191 @@ //! //! libpressio codec wrapper for the [`numcodecs`] API. -use ::libpressio as _; +use std::{borrow::Cow, collections::BTreeMap, sync::LazyLock}; + use numcodecs::{ - AnyArray, AnyArrayAssignError, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, - StaticCodecConfig, StaticCodecVersion, + AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, StaticCodecConfig, + StaticCodecVersion, }; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use schemars::{JsonSchema, Schema, SchemaGenerator}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use thiserror::Error; +static PRESSIO: LazyLock = LazyLock::new(Pressio::new); + #[derive(Clone, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] /// Pressio codec which applies the identity function, i.e. passes through the /// input unchanged during encoding and decoding. pub struct PressioCodec { + /// The Pressio compressor + #[serde(flatten)] + pub compressor: PressioCompressor, /// The codec's encoding format version. Do not provide this parameter explicitly. #[serde(default, rename = "_version")] pub version: StaticCodecVersion<1, 0, 0>, } +/// Pressio compressor +pub struct PressioCompressor { + format: PressioCompressorFormat, + compressor: libpressio::PressioCompressor, +} + +// FIXME: UNSOUND +#[expect(unsafe_code, clippy::non_send_fields_in_send_ty)] +unsafe impl Send for PressioCompressor {} +#[expect(unsafe_code)] +unsafe impl Sync for PressioCompressor {} + +impl Clone for PressioCompressor { + #[expect(clippy::unwrap_used)] + fn clone(&self) -> Self { + let pressio = PRESSIO.get_or_unwrap(); + let compressor = pressio.get_compressor(self.format.id.as_str()).unwrap(); + let options = self.compressor.get_options().unwrap(); + compressor.set_options(&options).unwrap(); + + Self { + format: self.format.clone(), + compressor, + } + } +} + +impl Serialize for PressioCompressor { + fn serialize(&self, serializer: S) -> Result { + self.format.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for PressioCompressor { + fn deserialize>(deserializer: D) -> Result { + let pressio = PRESSIO + .get() + .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + + // TODO: better error handling + let format = PressioCompressorFormat::deserialize(deserializer)?; + + let compressor = pressio + .get_compressor(format.id.as_str()) + .map_err(|err| serde::de::Error::custom(err.message))?; + let mut options = compressor + .get_options() + .map_err(|err| serde::de::Error::custom(err.message))?; + + for (key, value) in &format.options { + options = options + .set( + key, + match value { + PressioOption::U8(x) => libpressio::PressioOption::uint8(Some(*x)), + PressioOption::I8(x) => libpressio::PressioOption::int8(Some(*x)), + PressioOption::U16(x) => libpressio::PressioOption::uint16(Some(*x)), + PressioOption::I16(x) => libpressio::PressioOption::int16(Some(*x)), + PressioOption::U32(x) => libpressio::PressioOption::uint32(Some(*x)), + PressioOption::I32(x) => libpressio::PressioOption::int32(Some(*x)), + PressioOption::U64(x) => libpressio::PressioOption::uint64(Some(*x)), + PressioOption::I64(x) => libpressio::PressioOption::int64(Some(*x)), + PressioOption::F32(x) => libpressio::PressioOption::float32(Some(*x)), + PressioOption::F64(x) => libpressio::PressioOption::float64(Some(*x)), + PressioOption::String(x) => { + libpressio::PressioOption::string(Some(x.clone())) + } + PressioOption::VecString(x) => { + libpressio::PressioOption::vec_string(Some(x.clone())) + } + }, + ) + .map_err(|err| serde::de::Error::custom(err.message))?; + } + + Ok(Self { format, compressor }) + } +} + +impl JsonSchema for PressioCompressor { + fn schema_name() -> Cow<'static, str> { + PressioCompressorFormat::schema_name() + } + + fn json_schema(generator: &mut SchemaGenerator) -> Schema { + PressioCompressorFormat::json_schema(generator) + } +} + +#[derive(Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename = "PressioCompressor")] +struct PressioCompressorFormat { + id: String, + #[serde(flatten)] + options: BTreeMap, +} + +#[expect(missing_docs)] +#[derive(Clone, Serialize, Deserialize, JsonSchema)] +#[serde(untagged)] +/// Pressio option value +pub enum PressioOption { + U8(u8), + I8(i8), + U16(u16), + I16(i16), + U32(u32), + I32(i32), + U64(u64), + I64(i64), + F32(f32), + F64(f64), + String(String), + VecString(Vec), +} + +struct Pressio { + pressio: Result, +} + +impl Pressio { + fn new() -> Self { + Self { + pressio: libpressio::Pressio::new(), + } + } + + const fn get(&self) -> Result<&libpressio::Pressio, &libpressio::PressioError> { + self.pressio.as_ref() + } + + #[expect(clippy::unwrap_used)] + fn get_or_unwrap(&self) -> &libpressio::Pressio { + self.pressio.as_ref().unwrap() + } +} + +// FIXME: UNSOUND +#[expect(unsafe_code, clippy::non_send_fields_in_send_ty)] +unsafe impl Send for Pressio {} +#[expect(unsafe_code)] +unsafe impl Sync for Pressio {} + impl Codec for PressioCodec { type Error = PressioCodecError; - fn encode(&self, data: AnyCowArray) -> Result { - Ok(data.into_owned()) + fn encode(&self, _data: AnyCowArray) -> Result { + Err(PressioCodecError::Unimplemented) } - fn decode(&self, encoded: AnyCowArray) -> Result { - Ok(encoded.into_owned()) + fn decode(&self, _encoded: AnyCowArray) -> Result { + Err(PressioCodecError::Unimplemented) } fn decode_into( &self, - encoded: AnyArrayView, - mut decoded: AnyArrayViewMut, + _encoded: AnyArrayView, + _decoded: AnyArrayViewMut, ) -> Result<(), Self::Error> { - Ok(decoded.assign(&encoded)?) + Err(PressioCodecError::Unimplemented) } } @@ -73,11 +222,7 @@ impl StaticCodec for PressioCodec { #[derive(Debug, Error)] /// Errors that may occur when applying the [`PressioCodec`]. pub enum PressioCodecError { - /// [`PressioCodec`] cannot decode into the provided array - #[error("Pressio cannot decode into the provided array")] - MismatchedDecodeIntoArray { - /// The source of the error - #[from] - source: AnyArrayAssignError, - }, + /// [`PressioCodec`] does not yet implement this functionality + #[error("Pressio does not yet implement this functionality")] + Unimplemented, } From 20f76058170b164e4e137b9e4af655bd46d1c5f4 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 12 Feb 2026 19:03:37 +0200 Subject: [PATCH 09/43] Upgrade to upstream std_compat --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fd9773327..56cc6eeb0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "3a00062", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5d91490", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From 64fe90bf990132ddea22b70594e94c1056051d2a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 12 Feb 2026 21:19:54 +0200 Subject: [PATCH 10/43] Only link libstdc++ in libpressio on Linux target --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 56cc6eeb0..bb4f9040b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5d91490", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "818f30d", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index d35129594..e69c734b5 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -226,3 +226,8 @@ pub enum PressioCodecError { #[error("Pressio does not yet implement this functionality")] Unimplemented, } + +// FIXME: don't stub +#[expect(unsafe_code)] +#[unsafe(no_mangle)] +const extern "C" fn pressio_register_all() {} From e573721dc2378a4ed7b69c34c7fa99108f3871a0 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 12 Feb 2026 22:15:54 +0200 Subject: [PATCH 11/43] Fix target_os check in libpressio build.rs --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bb4f9040b..dd71b85c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "818f30d", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "a1f99af", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From 7b04c21d104b335673a735761c3d52c4ce5a8792 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 17 Feb 2026 09:50:48 +0200 Subject: [PATCH 12/43] no really really don't ask --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dd71b85c5..2ab2760c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "a1f99af", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "a664de1", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index e69c734b5..d35129594 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -226,8 +226,3 @@ pub enum PressioCodecError { #[error("Pressio does not yet implement this functionality")] Unimplemented, } - -// FIXME: don't stub -#[expect(unsafe_code)] -#[unsafe(no_mangle)] -const extern "C" fn pressio_register_all() {} From 67bec0c564485eb915bdf79c3f61a36296f11146 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 17 Feb 2026 14:19:26 +0200 Subject: [PATCH 13/43] add some option parsing and inspection support --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 67 +++++++++++++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2ab2760c9..ff268c4ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "a664de1", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "435c82a", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index d35129594..40bcbbeda 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -30,7 +30,7 @@ use thiserror::Error; static PRESSIO: LazyLock = LazyLock::new(Pressio::new); #[derive(Clone, Serialize, Deserialize, JsonSchema)] -#[serde(deny_unknown_fields)] +#[schemars(deny_unknown_fields)] /// Pressio codec which applies the identity function, i.e. passes through the /// input unchanged during encoding and decoding. pub struct PressioCodec { @@ -58,7 +58,9 @@ impl Clone for PressioCompressor { #[expect(clippy::unwrap_used)] fn clone(&self) -> Self { let pressio = PRESSIO.get_or_unwrap(); - let compressor = pressio.get_compressor(self.format.id.as_str()).unwrap(); + let compressor = pressio + .get_compressor(self.format.compressor.as_str()) + .unwrap(); let options = self.compressor.get_options().unwrap(); compressor.set_options(&options).unwrap(); @@ -80,13 +82,26 @@ impl<'de> Deserialize<'de> for PressioCompressor { let pressio = PRESSIO .get() .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; - // TODO: better error handling let format = PressioCompressorFormat::deserialize(deserializer)?; - let compressor = pressio - .get_compressor(format.id.as_str()) - .map_err(|err| serde::de::Error::custom(err.message))?; + .get_compressor(format.compressor.as_str()) + .map_err(|err| { + let supported_compressors = + pressio + .supported_compressors() + .map_or(String::from(""), |x| { + x.iter() + .map(|x| format!("`{x}`")) + .collect::>() + .join(", ") + }); + + serde::de::Error::custom(format_args!( + "{}, choose one of: {}", + err.message, supported_compressors + )) + })?; let mut options = compressor .get_options() .map_err(|err| serde::de::Error::custom(err.message))?; @@ -96,6 +111,7 @@ impl<'de> Deserialize<'de> for PressioCompressor { .set( key, match value { + PressioOption::Bool(x) => libpressio::PressioOption::bool(Some(*x)), PressioOption::U8(x) => libpressio::PressioOption::uint8(Some(*x)), PressioOption::I8(x) => libpressio::PressioOption::int8(Some(*x)), PressioOption::U16(x) => libpressio::PressioOption::uint16(Some(*x)), @@ -117,6 +133,35 @@ impl<'de> Deserialize<'de> for PressioCompressor { .map_err(|err| serde::de::Error::custom(err.message))?; } + let mut format = format; + if let Ok(format_options) = options.get_options() { + format.options = format_options + .into_iter() + .filter_map(|(k, v)| match v { + libpressio::PressioOption::bool(Some(x)) => Some((k, PressioOption::Bool(x))), + libpressio::PressioOption::int8(Some(x)) => Some((k, PressioOption::I8(x))), + libpressio::PressioOption::int16(Some(x)) => Some((k, PressioOption::I16(x))), + libpressio::PressioOption::int32(Some(x)) => Some((k, PressioOption::I32(x))), + libpressio::PressioOption::int64(Some(x)) => Some((k, PressioOption::I64(x))), + libpressio::PressioOption::uint8(Some(x)) => Some((k, PressioOption::U8(x))), + libpressio::PressioOption::uint16(Some(x)) => Some((k, PressioOption::U16(x))), + libpressio::PressioOption::uint32(Some(x)) => Some((k, PressioOption::U32(x))), + libpressio::PressioOption::uint64(Some(x)) => Some((k, PressioOption::U64(x))), + libpressio::PressioOption::float32(Some(x)) => Some((k, PressioOption::F32(x))), + libpressio::PressioOption::float64(Some(x)) => Some((k, PressioOption::F64(x))), + libpressio::PressioOption::string(Some(x)) => { + Some((k, PressioOption::String(x))) + } + // FIXME: seems to return strings as a single joined string + libpressio::PressioOption::vec_string(Some(x)) => { + Some((k, PressioOption::VecString(x))) + } + _ => None, + }) + .collect(); + } + let format = format; + Ok(Self { format, compressor }) } } @@ -131,19 +176,21 @@ impl JsonSchema for PressioCompressor { } } -#[derive(Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename = "PressioCompressor")] struct PressioCompressorFormat { - id: String, - #[serde(flatten)] + compressor: String, + // TODO: flatten + #[serde(default)] options: BTreeMap, } #[expect(missing_docs)] -#[derive(Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] #[serde(untagged)] /// Pressio option value pub enum PressioOption { + Bool(bool), U8(u8), I8(i8), U16(u16), From 11d0dd0691e51c945b714740b09bbbbeea876296 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 18 Feb 2026 08:43:40 +0200 Subject: [PATCH 14/43] some more plugins --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ff268c4ef..297eddf6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "435c82a", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "e562964", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From 760913d295b47c3a28262f1a2ce09d45c9fff877 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 18 Feb 2026 15:04:06 +0200 Subject: [PATCH 15/43] enable more plugins --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 297eddf6d..eea5f15ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "e562964", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "f142487", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From 2da115cde51132f5d86707ec06f710d030d78a4a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 21 Feb 2026 06:36:13 +0200 Subject: [PATCH 16/43] Simple encode/decode implementation --- Cargo.toml | 2 +- codecs/pressio/Cargo.toml | 1 + codecs/pressio/src/lib.rs | 167 +++++++++++++++++++++++++++++++++++--- 3 files changed, 157 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eea5f15ad..508f8cb5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "f142487", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "4e7b44f", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index d4cc6179a..e664605e6 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -16,6 +16,7 @@ keywords = ["libpressio", "numcodecs", "compression", "encoding"] [dependencies] libpressio = { workspace = true } +ndarray = { workspace = true } numcodecs = { workspace = true } schemars = { workspace = true, features = ["derive", "preserve_order"] } serde = { workspace = true, features = ["std", "derive"] } diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 40bcbbeda..56fd0f3ea 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -19,9 +19,10 @@ use std::{borrow::Cow, collections::BTreeMap, sync::LazyLock}; +use ndarray::{CowArray, IxDyn}; use numcodecs::{ - AnyArray, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, StaticCodecConfig, - StaticCodecVersion, + AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, + Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, }; use schemars::{JsonSchema, Schema, SchemaGenerator}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -235,20 +236,126 @@ unsafe impl Sync for Pressio {} impl Codec for PressioCodec { type Error = PressioCodecError; - fn encode(&self, _data: AnyCowArray) -> Result { - Err(PressioCodecError::Unimplemented) + fn encode(&self, data: AnyCowArray) -> Result { + fn encode_typed( + compressor: &libpressio::PressioCompressor, + data: CowArray, + ) -> Result { + let data = match data.try_into_owned_nocopy() { + Ok(data) => libpressio::PressioData::new(data), + Err(data) => libpressio::PressioData::new_copied(data.view()), + }; + + let compressed_data = + libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); + + let compressed_data = compressor.compress(&data, compressed_data).map_err(|err| { + PressioCodecError::PressioEncodeFailed { + source: PressioCodingError(err), + } + })?; + + let Some(compressed_data) = compressed_data.clone_into_array() else { + return Err(PressioCodecError::EncodeToUnknownDtype); + }; + + match compressed_data { + libpressio::PressioArray::Bool(_) => Err(PressioCodecError::EncodeToBoolArray), + libpressio::PressioArray::U8(a) | libpressio::PressioArray::Byte(a) => { + Ok(AnyArray::U8(a)) + } + libpressio::PressioArray::U16(a) => Ok(AnyArray::U16(a)), + libpressio::PressioArray::U32(a) => Ok(AnyArray::U32(a)), + libpressio::PressioArray::U64(a) => Ok(AnyArray::U64(a)), + libpressio::PressioArray::I8(a) => Ok(AnyArray::I8(a)), + libpressio::PressioArray::I16(a) => Ok(AnyArray::I16(a)), + libpressio::PressioArray::I32(a) => Ok(AnyArray::I32(a)), + libpressio::PressioArray::I64(a) => Ok(AnyArray::I64(a)), + libpressio::PressioArray::F32(a) => Ok(AnyArray::F32(a)), + libpressio::PressioArray::F64(a) => Ok(AnyArray::F64(a)), + } + } + + match data { + AnyCowArray::U8(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::U16(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::U32(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::U64(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::I8(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::I16(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::I32(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::I64(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::F32(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::F64(data) => encode_typed(&self.compressor.compressor, data), + data => Err(PressioCodecError::UnsupportedDtype(data.dtype())), + } } - fn decode(&self, _encoded: AnyCowArray) -> Result { - Err(PressioCodecError::Unimplemented) + fn decode(&self, encoded: AnyCowArray) -> Result { + fn decode_typed( + compressor: &libpressio::PressioCompressor, + encoded: CowArray, + ) -> Result { + let encoded = match encoded.try_into_owned_nocopy() { + Ok(encoded) => libpressio::PressioData::new(encoded), + Err(encoded) => libpressio::PressioData::new_copied(encoded.view()), + }; + + let decompressed_data = + libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); + + let decompressed_data = + compressor + .compress(&encoded, decompressed_data) + .map_err(|err| PressioCodecError::PressioDecodeFailed { + source: PressioCodingError(err), + })?; + + let Some(decompressed_data) = decompressed_data.clone_into_array() else { + return Err(PressioCodecError::DecodeToUnknownDtype); + }; + + match decompressed_data { + libpressio::PressioArray::Bool(_) => Err(PressioCodecError::DecodeToBoolArray), + libpressio::PressioArray::U8(a) | libpressio::PressioArray::Byte(a) => { + Ok(AnyArray::U8(a)) + } + libpressio::PressioArray::U16(a) => Ok(AnyArray::U16(a)), + libpressio::PressioArray::U32(a) => Ok(AnyArray::U32(a)), + libpressio::PressioArray::U64(a) => Ok(AnyArray::U64(a)), + libpressio::PressioArray::I8(a) => Ok(AnyArray::I8(a)), + libpressio::PressioArray::I16(a) => Ok(AnyArray::I16(a)), + libpressio::PressioArray::I32(a) => Ok(AnyArray::I32(a)), + libpressio::PressioArray::I64(a) => Ok(AnyArray::I64(a)), + libpressio::PressioArray::F32(a) => Ok(AnyArray::F32(a)), + libpressio::PressioArray::F64(a) => Ok(AnyArray::F64(a)), + } + } + + match encoded { + AnyCowArray::U8(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::U16(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::U32(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::U64(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::I8(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::I16(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::I32(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::I64(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::F32(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::F64(encoded) => decode_typed(&self.compressor.compressor, encoded), + encoded => Err(PressioCodecError::UnsupportedDtype(encoded.dtype())), + } } fn decode_into( &self, - _encoded: AnyArrayView, - _decoded: AnyArrayViewMut, + encoded: AnyArrayView, + mut decoded: AnyArrayViewMut, ) -> Result<(), Self::Error> { - Err(PressioCodecError::Unimplemented) + // TODO: optimize + let decoded_in = self.decode(encoded.cow())?; + + Ok(decoded.assign(&decoded_in)?) } } @@ -269,7 +376,43 @@ impl StaticCodec for PressioCodec { #[derive(Debug, Error)] /// Errors that may occur when applying the [`PressioCodec`]. pub enum PressioCodecError { - /// [`PressioCodec`] does not yet implement this functionality - #[error("Pressio does not yet implement this functionality")] - Unimplemented, + /// [`PressioCodec`] does not support the dtype + #[error("Pressio does not support the dtype {0}")] + UnsupportedDtype(AnyArrayDType), + /// [`PressioCodec`] failed to encode the data + #[error("Pressio failed to encode the data")] + PressioEncodeFailed { + /// Opaque source error + source: PressioCodingError, + }, + /// [`PressioCodec`] encoded to an unknown unsupported dtype + #[error("Pressio encoded to an unknown unsupported dtype")] + EncodeToUnknownDtype, + /// [`PressioCodec`] encoded to a bool array, which is unsupported + #[error("Pressio encoded to a bool array, which is unsupported")] + EncodeToBoolArray, + /// [`PressioCodec`] failed to decode the data + #[error("Pressio failed to decode the data")] + PressioDecodeFailed { + /// Opaque source error + source: PressioCodingError, + }, + /// [`PressioCodec`] decoded to an unknown unsupported dtype + #[error("Pressio decoded to an unknown unsupported dtype")] + DecodeToUnknownDtype, + /// [`PressioCodec`] decoded to a bool array, which is unsupported + #[error("Pressio decoded to a bool array, which is unsupported")] + DecodeToBoolArray, + /// [`PressioCodec`] cannot decode into the provided array + #[error("Pressio cannot decode into the provided array")] + MismatchedDecodeIntoArray { + /// The source of the error + #[from] + source: AnyArrayAssignError, + }, } + +#[derive(Debug, Error)] +#[error(transparent)] +/// Opaque error for when encoding or decoding with libpressio fails +pub struct PressioCodingError(libpressio::PressioError); From 48cbe5a8689aad06aeecb557e1d7a51b038fc6ef Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 23 Feb 2026 12:37:52 +0200 Subject: [PATCH 17/43] add host libcxx to the nix flake --- Cargo.toml | 2 +- .../numcodecs-wasm-builder/buildenv/flake.nix | 2 ++ crates/numcodecs-wasm-builder/src/main.rs | 20 +++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 508f8cb5c..56d45c501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "4e7b44f", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "3b90d94", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/crates/numcodecs-wasm-builder/buildenv/flake.nix b/crates/numcodecs-wasm-builder/buildenv/flake.nix index b02efb029..3e4b8bab1 100644 --- a/crates/numcodecs-wasm-builder/buildenv/flake.nix +++ b/crates/numcodecs-wasm-builder/buildenv/flake.nix @@ -61,6 +61,7 @@ packages = [ (pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain) pkgs."llvmPackages_${llvmVersion}".libclang + pkgs."llvmPackages_${llvmVersion}".libcxx wasi-sysroot libclang_rt pkgs.cmake @@ -84,6 +85,7 @@ MY_WASM_OPT = "${pkgs.binaryen}/bin/wasm-opt"; MY_PKG_CONFIG = "${pkgs.pkg-config}/bin/pkg-config"; MY_PYTHON3 = "${pkgs.python3}/bin/python3"; + MY_HOST_LIBCXX = "${pkgs."llvmPackages_${llvmVersion}".libcxx}"; }; }; }); diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index 329d6acf0..577da25c0 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -203,6 +203,8 @@ struct NixEnv { pkg_config: PathBuf, #[expect(dead_code)] python3: PathBuf, + host_libcxx: PathBuf, + host_sysroot: PathBuf, } impl NixEnv { @@ -270,6 +272,11 @@ impl NixEnv { wasm_opt: try_read_env(&env, "MY_WASM_OPT")?, pkg_config: try_read_env(&env, "MY_PKG_CONFIG")?, python3: try_read_env(&env, "MY_PYTHON3")?, + host_libcxx: try_read_env(&env, "MY_HOST_LIBCXX")?, + // FIXME + host_sysroot: PathBuf::from( + "/nix/store/5gfsv5n8zhpnl9yhggjpxrxg0jyflwja-apple-sdk-11.3/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk", + ), }) } } @@ -295,6 +302,8 @@ fn configure_cargo_cmd( wasi_sysroot, libclang_rt, pkg_config, + host_libcxx, + host_sysroot, .. } = nix_env; @@ -379,6 +388,17 @@ fn configure_cargo_cmd( cpp_include_path = crate_dir.join("include.hpp").display(), debug = if debug { "-g" } else { "" }, )); + cmd.arg(format!( + "CXXFLAGSHOST=-isysroot {host_sysroot} -isystem {host_libcxx_include} \ + -isystem {clang_include}", + host_sysroot = host_sysroot.display(), + host_libcxx_include = host_libcxx.join("include").join("c++").join("v1").display(), + clang_include = libclang + .join("clang") + .join(llvm_version) + .join("include") + .display(), + )); cmd.arg(format!( "BINDGEN_EXTRA_CLANG_ARGS=--target=wasm32-wasip1 -nodefaultlibs -resource-dir \ {resource_dir} --sysroot={wasi_sysroot} -isystem {wasm32_wasi_cxx_include} -isystem \ From ccdc4d668c3850a20d49f0ea46cd217cea3a77ac Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 23 Feb 2026 13:04:04 +0200 Subject: [PATCH 18/43] fix cmake env lookup --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 56d45c501..88f0202b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "3b90d94", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5bc8ff0", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From 33510b640a046c369c7f9bca2a55a798340061e8 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 23 Feb 2026 14:52:10 +0200 Subject: [PATCH 19/43] Fix clippy lint --- codecs/pressio/src/lib.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 56fd0f3ea..ad5a58597 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -88,15 +88,15 @@ impl<'de> Deserialize<'de> for PressioCompressor { let compressor = pressio .get_compressor(format.compressor.as_str()) .map_err(|err| { - let supported_compressors = - pressio - .supported_compressors() - .map_or(String::from(""), |x| { - x.iter() - .map(|x| format!("`{x}`")) - .collect::>() - .join(", ") - }); + let supported_compressors = pressio.supported_compressors().map_or_else( + |_| String::from(""), + |x| { + x.iter() + .map(|x| format!("`{x}`")) + .collect::>() + .join(", ") + }, + ); serde::de::Error::custom(format_args!( "{}, choose one of: {}", From 2b19eccf46f9e32281a3b75fd741e87f49c34ab7 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 1 Mar 2026 23:08:36 +0200 Subject: [PATCH 20/43] Pressio and PressioCompressor are Send + !Sync --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 148 ++++++++++++++++++++------------------ 2 files changed, 80 insertions(+), 70 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 88f0202b5..3d31188ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5bc8ff0", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "fea47ef", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index ad5a58597..40b008b06 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -17,7 +17,11 @@ //! //! libpressio codec wrapper for the [`numcodecs`] API. -use std::{borrow::Cow, collections::BTreeMap, sync::LazyLock}; +use std::{ + borrow::Cow, + collections::BTreeMap, + sync::{LazyLock, Mutex}, +}; use ndarray::{CowArray, IxDyn}; use numcodecs::{ @@ -46,28 +50,24 @@ pub struct PressioCodec { /// Pressio compressor pub struct PressioCompressor { format: PressioCompressorFormat, - compressor: libpressio::PressioCompressor, + compressor: Mutex, } -// FIXME: UNSOUND -#[expect(unsafe_code, clippy::non_send_fields_in_send_ty)] -unsafe impl Send for PressioCompressor {} -#[expect(unsafe_code)] -unsafe impl Sync for PressioCompressor {} - impl Clone for PressioCompressor { #[expect(clippy::unwrap_used)] fn clone(&self) -> Self { - let pressio = PRESSIO.get_or_unwrap(); - let compressor = pressio - .get_compressor(self.format.compressor.as_str()) - .unwrap(); - let options = self.compressor.get_options().unwrap(); + let mut compressor = { + let mut pressio = PRESSIO.get_or_unwrap().lock().unwrap(); + pressio + .get_compressor(self.format.compressor.as_str()) + .unwrap() + }; + let options = self.compressor.lock().unwrap().get_options().unwrap(); compressor.set_options(&options).unwrap(); Self { format: self.format.clone(), - compressor, + compressor: Mutex::new(compressor), } } } @@ -80,29 +80,32 @@ impl Serialize for PressioCompressor { impl<'de> Deserialize<'de> for PressioCompressor { fn deserialize>(deserializer: D) -> Result { - let pressio = PRESSIO - .get() - .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; // TODO: better error handling let format = PressioCompressorFormat::deserialize(deserializer)?; - let compressor = pressio - .get_compressor(format.compressor.as_str()) - .map_err(|err| { - let supported_compressors = pressio.supported_compressors().map_or_else( - |_| String::from(""), - |x| { - x.iter() - .map(|x| format!("`{x}`")) - .collect::>() - .join(", ") - }, - ); - - serde::de::Error::custom(format_args!( - "{}, choose one of: {}", - err.message, supported_compressors - )) - })?; + let compressor = { + let pressio = PRESSIO + .get() + .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + let mut pressio = pressio.lock().map_err(serde::de::Error::custom)?; + pressio + .get_compressor(format.compressor.as_str()) + .map_err(|err| { + let supported_compressors = pressio.supported_compressors().map_or_else( + |_| String::from(""), + |x| { + x.iter() + .map(|x| format!("`{x}`")) + .collect::>() + .join(", ") + }, + ); + + serde::de::Error::custom(format_args!( + "{}, choose one of: {}", + err.message, supported_compressors + )) + })? + }; let mut options = compressor .get_options() .map_err(|err| serde::de::Error::custom(err.message))?; @@ -161,9 +164,11 @@ impl<'de> Deserialize<'de> for PressioCompressor { }) .collect(); } - let format = format; - Ok(Self { format, compressor }) + Ok(Self { + format, + compressor: Mutex::new(compressor), + }) } } @@ -207,38 +212,32 @@ pub enum PressioOption { } struct Pressio { - pressio: Result, + pressio: Result, libpressio::PressioError>, } impl Pressio { fn new() -> Self { Self { - pressio: libpressio::Pressio::new(), + pressio: libpressio::Pressio::new().map(Mutex::new), } } - const fn get(&self) -> Result<&libpressio::Pressio, &libpressio::PressioError> { + const fn get(&self) -> Result<&Mutex, &libpressio::PressioError> { self.pressio.as_ref() } #[expect(clippy::unwrap_used)] - fn get_or_unwrap(&self) -> &libpressio::Pressio { + fn get_or_unwrap(&self) -> &Mutex { self.pressio.as_ref().unwrap() } } -// FIXME: UNSOUND -#[expect(unsafe_code, clippy::non_send_fields_in_send_ty)] -unsafe impl Send for Pressio {} -#[expect(unsafe_code)] -unsafe impl Sync for Pressio {} - impl Codec for PressioCodec { type Error = PressioCodecError; fn encode(&self, data: AnyCowArray) -> Result { fn encode_typed( - compressor: &libpressio::PressioCompressor, + compressor: &mut libpressio::PressioCompressor, data: CowArray, ) -> Result { let data = match data.try_into_owned_nocopy() { @@ -276,24 +275,28 @@ impl Codec for PressioCodec { } } + let Ok(mut compressor) = self.compressor.compressor.lock() else { + return Err(PressioCodecError::PressioPoisonedMutex); + }; + match data { - AnyCowArray::U8(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::U16(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::U32(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::U64(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::I8(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::I16(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::I32(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::I64(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::F32(data) => encode_typed(&self.compressor.compressor, data), - AnyCowArray::F64(data) => encode_typed(&self.compressor.compressor, data), + AnyCowArray::U8(data) => encode_typed(&mut compressor, data), + AnyCowArray::U16(data) => encode_typed(&mut compressor, data), + AnyCowArray::U32(data) => encode_typed(&mut compressor, data), + AnyCowArray::U64(data) => encode_typed(&mut compressor, data), + AnyCowArray::I8(data) => encode_typed(&mut compressor, data), + AnyCowArray::I16(data) => encode_typed(&mut compressor, data), + AnyCowArray::I32(data) => encode_typed(&mut compressor, data), + AnyCowArray::I64(data) => encode_typed(&mut compressor, data), + AnyCowArray::F32(data) => encode_typed(&mut compressor, data), + AnyCowArray::F64(data) => encode_typed(&mut compressor, data), data => Err(PressioCodecError::UnsupportedDtype(data.dtype())), } } fn decode(&self, encoded: AnyCowArray) -> Result { fn decode_typed( - compressor: &libpressio::PressioCompressor, + compressor: &mut libpressio::PressioCompressor, encoded: CowArray, ) -> Result { let encoded = match encoded.try_into_owned_nocopy() { @@ -332,17 +335,21 @@ impl Codec for PressioCodec { } } + let Ok(mut compressor) = self.compressor.compressor.lock() else { + return Err(PressioCodecError::PressioPoisonedMutex); + }; + match encoded { - AnyCowArray::U8(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::U16(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::U32(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::U64(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::I8(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::I16(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::I32(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::I64(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::F32(encoded) => decode_typed(&self.compressor.compressor, encoded), - AnyCowArray::F64(encoded) => decode_typed(&self.compressor.compressor, encoded), + AnyCowArray::U8(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::U16(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::U32(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::U64(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::I8(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::I16(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::I32(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::I64(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::F32(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::F64(encoded) => decode_typed(&mut compressor, encoded), encoded => Err(PressioCodecError::UnsupportedDtype(encoded.dtype())), } } @@ -379,6 +386,9 @@ pub enum PressioCodecError { /// [`PressioCodec`] does not support the dtype #[error("Pressio does not support the dtype {0}")] UnsupportedDtype(AnyArrayDType), + /// [`PressioCodec`] lock was poisoned + #[error("Pressio lock was poisoned")] + PressioPoisonedMutex, /// [`PressioCodec`] failed to encode the data #[error("Pressio failed to encode the data")] PressioEncodeFailed { From 144d3495be45351197c5be141ce62c679c0e675d Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Mar 2026 10:03:15 +0200 Subject: [PATCH 21/43] derive host sysroot in builder from clang include paths --- Cargo.toml | 2 +- .../numcodecs-wasm-builder/buildenv/flake.nix | 2 +- crates/numcodecs-wasm-builder/src/main.rs | 68 ++++++++++++++++--- 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3d31188ad..666a5f759 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "fea47ef", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "6329913", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/crates/numcodecs-wasm-builder/buildenv/flake.nix b/crates/numcodecs-wasm-builder/buildenv/flake.nix index 3e4b8bab1..22ce9af96 100644 --- a/crates/numcodecs-wasm-builder/buildenv/flake.nix +++ b/crates/numcodecs-wasm-builder/buildenv/flake.nix @@ -85,7 +85,7 @@ MY_WASM_OPT = "${pkgs.binaryen}/bin/wasm-opt"; MY_PKG_CONFIG = "${pkgs.pkg-config}/bin/pkg-config"; MY_PYTHON3 = "${pkgs.python3}/bin/python3"; - MY_HOST_LIBCXX = "${pkgs."llvmPackages_${llvmVersion}".libcxx}"; + MY_HOST_LIBCXX = "${pkgs."llvmPackages_${llvmVersion}".libcxx.dev}"; }; }; }); diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index 577da25c0..7fbb48295 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -3,9 +3,12 @@ use std::{ collections::HashMap, - env, fs, io, + env, + ffi::OsStr, + fs, io, + os::unix::ffi::OsStrExt, path::{Path, PathBuf}, - process::Command, + process::{Command, Stdio}, str::FromStr, }; @@ -69,9 +72,11 @@ fn main() -> io::Result<()> { copy_buildenv_to_crate(&crate_dir)?; let nix_env = NixEnv::new(&crate_dir)?; + let host_sysroot = find_clang_host_sysroot(&nix_env, &crate_dir)?; let wasm = build_wasm_codec( &nix_env, + &host_sysroot, &target_dir, &crate_dir, &format!("{}-wasm", args.crate_), @@ -204,7 +209,6 @@ struct NixEnv { #[expect(dead_code)] python3: PathBuf, host_libcxx: PathBuf, - host_sysroot: PathBuf, } impl NixEnv { @@ -273,17 +277,63 @@ impl NixEnv { pkg_config: try_read_env(&env, "MY_PKG_CONFIG")?, python3: try_read_env(&env, "MY_PYTHON3")?, host_libcxx: try_read_env(&env, "MY_HOST_LIBCXX")?, - // FIXME - host_sysroot: PathBuf::from( - "/nix/store/5gfsv5n8zhpnl9yhggjpxrxg0jyflwja-apple-sdk-11.3/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk", - ), }) } } +fn find_clang_host_sysroot(nix_env: &NixEnv, flake_parent_dir: &Path) -> io::Result { + let NixEnv { clang, .. } = nix_env; + + let mut cmd = Command::new("nix"); + cmd.current_dir(flake_parent_dir); + cmd.arg("develop"); + // cmd.arg("--store"); + // cmd.arg(nix_store_path); + cmd.arg("--no-update-lock-file"); + cmd.arg("--ignore-environment"); + cmd.arg("path:."); + cmd.arg("--command"); + cmd.arg(clang.join("clang")); + cmd.arg("-v"); + cmd.arg("-x"); + cmd.arg("c"); + cmd.arg("-c"); + cmd.arg("-"); + cmd.stdin(Stdio::null()); + + eprintln!("executing {cmd:?}"); + + let output = cmd.output()?; + let Some(include) = output + .stderr + .split(|x| *x == b'\n') + .skip_while(|x| x.trim_ascii() != b"#include <...> search starts here:") + .nth(1) + else { + return Err(io::Error::other( + "failed to find #include <...> search path for clang", + )); + }; + let include = Path::new(OsStr::from_bytes(include.trim_ascii())); + let include = if include.ends_with("include") + && let Some(include) = include.parent() + && include.ends_with("usr") + && let Some(include) = include.parent() + { + include + } else { + return Err(io::Error::other( + "clang #include <...> search path should end in /usr/include", + )); + }; + + Ok(PathBuf::from(include)) +} + #[expect(clippy::too_many_lines)] fn configure_cargo_cmd( nix_env: &NixEnv, + host_sysroot: &Path, target_dir: &Path, crate_dir: &Path, debug: bool, @@ -303,7 +353,6 @@ fn configure_cargo_cmd( libclang_rt, pkg_config, host_libcxx, - host_sysroot, .. } = nix_env; @@ -471,13 +520,14 @@ fn configure_cargo_cmd( fn build_wasm_codec( nix_env: &NixEnv, + host_sysroot: &Path, target_dir: &Path, crate_dir: &Path, crate_name: &str, debug: bool, verbose: bool, ) -> io::Result { - let mut cmd = configure_cargo_cmd(nix_env, target_dir, crate_dir, debug); + let mut cmd = configure_cargo_cmd(nix_env, host_sysroot, target_dir, crate_dir, debug); cmd.arg("rustc") .arg("--crate-type=cdylib") .arg("-Z") From 915a8ac9e9b17d979b38e5c00c12f13c2fb1c94c Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Mar 2026 10:08:46 +0200 Subject: [PATCH 22/43] debug clang include path on Linux --- crates/numcodecs-wasm-builder/src/main.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index 7fbb48295..632c691f7 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -304,6 +304,7 @@ fn find_clang_host_sysroot(nix_env: &NixEnv, flake_parent_dir: &Path) -> io::Res eprintln!("executing {cmd:?}"); let output = cmd.output()?; + eprintln!("output={:?}", String::from_utf8_lossy(&output.stderr)); let Some(include) = output .stderr .split(|x| *x == b'\n') @@ -314,7 +315,9 @@ fn find_clang_host_sysroot(nix_env: &NixEnv, flake_parent_dir: &Path) -> io::Res "failed to find #include <...> search path for clang", )); }; + eprintln!("include={:?}", String::from_utf8_lossy(include)); let include = Path::new(OsStr::from_bytes(include.trim_ascii())); + eprintln!("include={}", include.display()); let include = if include.ends_with("include") && let Some(include) = include.parent() && include.ends_with("usr") From 6d622b02ea50ef9841f9582d34642f8a3fbac972 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Mar 2026 10:22:13 +0200 Subject: [PATCH 23/43] more flexible sysroot finding --- crates/numcodecs-wasm-builder/src/main.rs | 40 +++++++++++++++-------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index 632c691f7..ffb4dd34f 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -304,30 +304,42 @@ fn find_clang_host_sysroot(nix_env: &NixEnv, flake_parent_dir: &Path) -> io::Res eprintln!("executing {cmd:?}"); let output = cmd.output()?; - eprintln!("output={:?}", String::from_utf8_lossy(&output.stderr)); - let Some(include) = output + let Some(full_include) = output .stderr .split(|x| *x == b'\n') .skip_while(|x| x.trim_ascii() != b"#include <...> search starts here:") .nth(1) else { - return Err(io::Error::other( - "failed to find #include <...> search path for clang", - )); + return Err(io::Error::other(format!( + "failed to find #include <...> search path for clang in {:?}", + String::from_utf8_lossy(&output.stderr) + ))); }; - eprintln!("include={:?}", String::from_utf8_lossy(include)); - let include = Path::new(OsStr::from_bytes(include.trim_ascii())); - eprintln!("include={}", include.display()); - let include = if include.ends_with("include") - && let Some(include) = include.parent() - && include.ends_with("usr") + let full_include = Path::new(OsStr::from_bytes(full_include.trim_ascii())); + let mut include = if full_include.ends_with("include") + && let Some(include) = full_include.parent() + { + include + } else { + return Err(io::Error::other(format!( + "clang #include <...> search path {} should end in /usr/.../include", + full_include.display() + ))); + }; + while !include.ends_with("usr") + && let Some(include_parent) = include.parent() + { + include = include_parent; + } + let include = if include.ends_with("usr") && let Some(include) = include.parent() { include } else { - return Err(io::Error::other( - "clang #include <...> search path should end in /usr/include", - )); + return Err(io::Error::other(format!( + "clang #include <...> search path {} should end in /usr/.../include", + full_include.display() + ))); }; Ok(PathBuf::from(include)) From eaedb626dd128148fe9eddc01fe07475107a0c47 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Mar 2026 10:57:55 +0200 Subject: [PATCH 24/43] why not? --- crates/numcodecs-wasm-builder/src/main.rs | 79 ++--------------------- 1 file changed, 4 insertions(+), 75 deletions(-) diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index ffb4dd34f..5ce8346f6 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -3,12 +3,9 @@ use std::{ collections::HashMap, - env, - ffi::OsStr, - fs, io, - os::unix::ffi::OsStrExt, + env, fs, io, path::{Path, PathBuf}, - process::{Command, Stdio}, + process::Command, str::FromStr, }; @@ -72,11 +69,9 @@ fn main() -> io::Result<()> { copy_buildenv_to_crate(&crate_dir)?; let nix_env = NixEnv::new(&crate_dir)?; - let host_sysroot = find_clang_host_sysroot(&nix_env, &crate_dir)?; let wasm = build_wasm_codec( &nix_env, - &host_sysroot, &target_dir, &crate_dir, &format!("{}-wasm", args.crate_), @@ -281,74 +276,9 @@ impl NixEnv { } } -fn find_clang_host_sysroot(nix_env: &NixEnv, flake_parent_dir: &Path) -> io::Result { - let NixEnv { clang, .. } = nix_env; - - let mut cmd = Command::new("nix"); - cmd.current_dir(flake_parent_dir); - cmd.arg("develop"); - // cmd.arg("--store"); - // cmd.arg(nix_store_path); - cmd.arg("--no-update-lock-file"); - cmd.arg("--ignore-environment"); - cmd.arg("path:."); - cmd.arg("--command"); - cmd.arg(clang.join("clang")); - cmd.arg("-v"); - cmd.arg("-x"); - cmd.arg("c"); - cmd.arg("-c"); - cmd.arg("-"); - cmd.stdin(Stdio::null()); - - eprintln!("executing {cmd:?}"); - - let output = cmd.output()?; - let Some(full_include) = output - .stderr - .split(|x| *x == b'\n') - .skip_while(|x| x.trim_ascii() != b"#include <...> search starts here:") - .nth(1) - else { - return Err(io::Error::other(format!( - "failed to find #include <...> search path for clang in {:?}", - String::from_utf8_lossy(&output.stderr) - ))); - }; - let full_include = Path::new(OsStr::from_bytes(full_include.trim_ascii())); - let mut include = if full_include.ends_with("include") - && let Some(include) = full_include.parent() - { - include - } else { - return Err(io::Error::other(format!( - "clang #include <...> search path {} should end in /usr/.../include", - full_include.display() - ))); - }; - while !include.ends_with("usr") - && let Some(include_parent) = include.parent() - { - include = include_parent; - } - let include = if include.ends_with("usr") - && let Some(include) = include.parent() - { - include - } else { - return Err(io::Error::other(format!( - "clang #include <...> search path {} should end in /usr/.../include", - full_include.display() - ))); - }; - - Ok(PathBuf::from(include)) -} - #[expect(clippy::too_many_lines)] fn configure_cargo_cmd( nix_env: &NixEnv, - host_sysroot: &Path, target_dir: &Path, crate_dir: &Path, debug: bool, @@ -455,7 +385,7 @@ fn configure_cargo_cmd( cmd.arg(format!( "CXXFLAGSHOST=-isysroot {host_sysroot} -isystem {host_libcxx_include} \ -isystem {clang_include}", - host_sysroot = host_sysroot.display(), + host_sysroot = wasi_sysroot.display(), // I mean, what could go wrong? host_libcxx_include = host_libcxx.join("include").join("c++").join("v1").display(), clang_include = libclang .join("clang") @@ -535,14 +465,13 @@ fn configure_cargo_cmd( fn build_wasm_codec( nix_env: &NixEnv, - host_sysroot: &Path, target_dir: &Path, crate_dir: &Path, crate_name: &str, debug: bool, verbose: bool, ) -> io::Result { - let mut cmd = configure_cargo_cmd(nix_env, host_sysroot, target_dir, crate_dir, debug); + let mut cmd = configure_cargo_cmd(nix_env, target_dir, crate_dir, debug); cmd.arg("rustc") .arg("--crate-type=cdylib") .arg("-Z") From 1c7e05404329d828666dcccc61b2d4d5d8e6d616 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Mar 2026 11:10:43 +0200 Subject: [PATCH 25/43] try again --- crates/numcodecs-wasm-builder/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index 5ce8346f6..d2a5851ad 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -385,7 +385,7 @@ fn configure_cargo_cmd( cmd.arg(format!( "CXXFLAGSHOST=-isysroot {host_sysroot} -isystem {host_libcxx_include} \ -isystem {clang_include}", - host_sysroot = wasi_sysroot.display(), // I mean, what could go wrong? + host_sysroot = wasi_sysroot.join("include").join("wasm32-wasi").display(), // I mean, what could go wrong? host_libcxx_include = host_libcxx.join("include").join("c++").join("v1").display(), clang_include = libclang .join("clang") From c4be1f542321f73347b124e658d1b3c500773cf1 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 2 Mar 2026 16:30:45 +0200 Subject: [PATCH 26/43] separate error for en/de-code to array without data --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 666a5f759..c5b0d52fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "6329913", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "7c38eb3", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 40b008b06..296d3d7d2 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -244,18 +244,43 @@ impl Codec for PressioCodec { Ok(data) => libpressio::PressioData::new(data), Err(data) => libpressio::PressioData::new_copied(data.view()), }; + eprintln!( + "data: {} {} {} {:?}", + data.has_data(), + data.len(), + data.ndim(), + data.dtype() + ); let compressed_data = libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); + eprintln!( + "compressed: {} {} {} {:?}", + compressed_data.has_data(), + compressed_data.len(), + compressed_data.ndim(), + compressed_data.dtype() + ); let compressed_data = compressor.compress(&data, compressed_data).map_err(|err| { PressioCodecError::PressioEncodeFailed { source: PressioCodingError(err), } })?; + eprintln!( + "compressed: {} {} {} {:?}", + compressed_data.has_data(), + compressed_data.len(), + compressed_data.ndim(), + compressed_data.dtype() + ); let Some(compressed_data) = compressed_data.clone_into_array() else { - return Err(PressioCodecError::EncodeToUnknownDtype); + if compressed_data.has_data() { + return Err(PressioCodecError::EncodeToUnknownDtype); + } + + return Err(PressioCodecError::EncodeToArrayWithoutData); }; match compressed_data { @@ -315,7 +340,11 @@ impl Codec for PressioCodec { })?; let Some(decompressed_data) = decompressed_data.clone_into_array() else { - return Err(PressioCodecError::DecodeToUnknownDtype); + if decompressed_data.has_data() { + return Err(PressioCodecError::DecodeToUnknownDtype); + } + + return Err(PressioCodecError::DecodeToArrayWithoutData); }; match decompressed_data { @@ -398,6 +427,9 @@ pub enum PressioCodecError { /// [`PressioCodec`] encoded to an unknown unsupported dtype #[error("Pressio encoded to an unknown unsupported dtype")] EncodeToUnknownDtype, + /// [`PressioCodec`] encoded to an array without data + #[error("Pressio encoded to an array without data")] + EncodeToArrayWithoutData, /// [`PressioCodec`] encoded to a bool array, which is unsupported #[error("Pressio encoded to a bool array, which is unsupported")] EncodeToBoolArray, @@ -410,6 +442,9 @@ pub enum PressioCodecError { /// [`PressioCodec`] decoded to an unknown unsupported dtype #[error("Pressio decoded to an unknown unsupported dtype")] DecodeToUnknownDtype, + /// [`PressioCodec`] decoded to an array without data + #[error("Pressio decoded to an array without data")] + DecodeToArrayWithoutData, /// [`PressioCodec`] decoded to a bool array, which is unsupported #[error("Pressio decoded to a bool array, which is unsupported")] DecodeToBoolArray, From 4cf08aeedf1dfbb519ec48d80d5833e8c563858b Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 9 Mar 2026 10:37:44 +0200 Subject: [PATCH 27/43] Enable more plugins --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 58 ++++++++++++++++----------------------- 2 files changed, 24 insertions(+), 36 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c5b0d52fb..490d32d41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "7c38eb3", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5e533d8", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 296d3d7d2..df38fad0f 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -240,33 +240,17 @@ impl Codec for PressioCodec { compressor: &mut libpressio::PressioCompressor, data: CowArray, ) -> Result { - let data = match data.try_into_owned_nocopy() { - Ok(data) => libpressio::PressioData::new(data), - Err(data) => libpressio::PressioData::new_copied(data.view()), - }; - eprintln!( - "data: {} {} {} {:?}", - data.has_data(), - data.len(), - data.ndim(), - data.dtype() - ); - - let compressed_data = - libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); - eprintln!( - "compressed: {} {} {} {:?}", - compressed_data.has_data(), - compressed_data.len(), - compressed_data.ndim(), - compressed_data.dtype() - ); + let compressed_data = libpressio::PressioData::new_with_shared(data, |data| { + let compressed_data = + libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); - let compressed_data = compressor.compress(&data, compressed_data).map_err(|err| { - PressioCodecError::PressioEncodeFailed { - source: PressioCodingError(err), - } + compressor.compress(data, compressed_data).map_err(|err| { + PressioCodecError::PressioEncodeFailed { + source: PressioCodingError(err), + } + }) })?; + eprintln!( "compressed: {} {} {} {:?}", compressed_data.has_data(), @@ -324,20 +308,24 @@ impl Codec for PressioCodec { compressor: &mut libpressio::PressioCompressor, encoded: CowArray, ) -> Result { - let encoded = match encoded.try_into_owned_nocopy() { - Ok(encoded) => libpressio::PressioData::new(encoded), - Err(encoded) => libpressio::PressioData::new_copied(encoded.view()), - }; + let decompressed_data = libpressio::PressioData::new_with_shared(encoded, |encoded| { + let decompressed_data = + libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); - let decompressed_data = - libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); - - let decompressed_data = compressor - .compress(&encoded, decompressed_data) + .compress(encoded, decompressed_data) .map_err(|err| PressioCodecError::PressioDecodeFailed { source: PressioCodingError(err), - })?; + }) + })?; + + eprintln!( + "decompressed: {} {} {} {:?}", + decompressed_data.has_data(), + decompressed_data.len(), + decompressed_data.ndim(), + decompressed_data.dtype() + ); let Some(decompressed_data) = decompressed_data.clone_into_array() else { if decompressed_data.has_data() { From 3061b57c38ddf193207149e717457d584efc2176 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 10 Mar 2026 09:31:27 +0200 Subject: [PATCH 28/43] port pressio_register_generator to CMake --- Cargo.toml | 2 +- crates/numcodecs-wasm-builder/buildenv/flake.nix | 2 -- crates/numcodecs-wasm-builder/src/main.rs | 14 -------------- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 490d32d41..0f9fb2837 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -146,7 +146,7 @@ zstd = { version = "0.13", default-features = false } zstd-sys = { version = "2.0.16", default-features = false } # git third-party dependencies with non-upstream fixes -wasm_component_layer = { git = "https://github.com/juntyr/wasm_component_layer.git", rev = "e923536", version = "0.1", default-features = false } +wasm_component_layer = { git = "https://github.com/juntyr/wasm_component_layer.git", rev = "1b8708e", version = "0.1", default-features = false } [workspace.lints.rust] unsafe_code = "deny" diff --git a/crates/numcodecs-wasm-builder/buildenv/flake.nix b/crates/numcodecs-wasm-builder/buildenv/flake.nix index 22ce9af96..b02efb029 100644 --- a/crates/numcodecs-wasm-builder/buildenv/flake.nix +++ b/crates/numcodecs-wasm-builder/buildenv/flake.nix @@ -61,7 +61,6 @@ packages = [ (pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain) pkgs."llvmPackages_${llvmVersion}".libclang - pkgs."llvmPackages_${llvmVersion}".libcxx wasi-sysroot libclang_rt pkgs.cmake @@ -85,7 +84,6 @@ MY_WASM_OPT = "${pkgs.binaryen}/bin/wasm-opt"; MY_PKG_CONFIG = "${pkgs.pkg-config}/bin/pkg-config"; MY_PYTHON3 = "${pkgs.python3}/bin/python3"; - MY_HOST_LIBCXX = "${pkgs."llvmPackages_${llvmVersion}".libcxx.dev}"; }; }; }); diff --git a/crates/numcodecs-wasm-builder/src/main.rs b/crates/numcodecs-wasm-builder/src/main.rs index d2a5851ad..329d6acf0 100644 --- a/crates/numcodecs-wasm-builder/src/main.rs +++ b/crates/numcodecs-wasm-builder/src/main.rs @@ -203,7 +203,6 @@ struct NixEnv { pkg_config: PathBuf, #[expect(dead_code)] python3: PathBuf, - host_libcxx: PathBuf, } impl NixEnv { @@ -271,7 +270,6 @@ impl NixEnv { wasm_opt: try_read_env(&env, "MY_WASM_OPT")?, pkg_config: try_read_env(&env, "MY_PKG_CONFIG")?, python3: try_read_env(&env, "MY_PYTHON3")?, - host_libcxx: try_read_env(&env, "MY_HOST_LIBCXX")?, }) } } @@ -297,7 +295,6 @@ fn configure_cargo_cmd( wasi_sysroot, libclang_rt, pkg_config, - host_libcxx, .. } = nix_env; @@ -382,17 +379,6 @@ fn configure_cargo_cmd( cpp_include_path = crate_dir.join("include.hpp").display(), debug = if debug { "-g" } else { "" }, )); - cmd.arg(format!( - "CXXFLAGSHOST=-isysroot {host_sysroot} -isystem {host_libcxx_include} \ - -isystem {clang_include}", - host_sysroot = wasi_sysroot.join("include").join("wasm32-wasi").display(), // I mean, what could go wrong? - host_libcxx_include = host_libcxx.join("include").join("c++").join("v1").display(), - clang_include = libclang - .join("clang") - .join(llvm_version) - .join("include") - .display(), - )); cmd.arg(format!( "BINDGEN_EXTRA_CLANG_ARGS=--target=wasm32-wasip1 -nodefaultlibs -resource-dir \ {resource_dir} --sysroot={wasi_sysroot} -isystem {wasm32_wasi_cxx_include} -isystem \ From c6472c0a266b245b5b76c046879fb27ebd391032 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 10 Mar 2026 09:33:15 +0200 Subject: [PATCH 29/43] Fix git rev --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0f9fb2837..1f38002b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5e533d8", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "1b8708e", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy @@ -146,7 +146,7 @@ zstd = { version = "0.13", default-features = false } zstd-sys = { version = "2.0.16", default-features = false } # git third-party dependencies with non-upstream fixes -wasm_component_layer = { git = "https://github.com/juntyr/wasm_component_layer.git", rev = "1b8708e", version = "0.1", default-features = false } +wasm_component_layer = { git = "https://github.com/juntyr/wasm_component_layer.git", rev = "e923536", version = "0.1", default-features = false } [workspace.lints.rust] unsafe_code = "deny" From dbb9407a0b77b3a2a8a87b4c3821833dba470c9d Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 10 Mar 2026 09:50:47 +0200 Subject: [PATCH 30/43] add LIBPRESSIO_WITH_EXTERNAL option --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1f38002b8..acee0f2d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "1b8708e", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "6b65157", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From f8b3493d915115195644bfc2ac8a557372bb0875 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 11 Mar 2026 10:53:12 +0200 Subject: [PATCH 31/43] Start bringing codec config closer to existing libpressio numcodecs interface --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 412 ++++++++++++++++++++++++++++---------- 2 files changed, 304 insertions(+), 110 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index acee0f2d2..310f565f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "6b65157", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "ec5df41", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index df38fad0f..fde1210f6 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -17,23 +17,17 @@ //! //! libpressio codec wrapper for the [`numcodecs`] API. -use std::{ - borrow::Cow, - collections::BTreeMap, - sync::{LazyLock, Mutex}, -}; +use std::{borrow::Cow, collections::BTreeMap, sync::Mutex}; -use ndarray::{CowArray, IxDyn}; +use ndarray::{ArrayView, ArrayViewMut, CowArray, IxDyn}; use numcodecs::{ AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, }; -use schemars::{JsonSchema, Schema, SchemaGenerator}; +use schemars::{JsonSchema, Schema, SchemaGenerator, json_schema}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use thiserror::Error; -static PRESSIO: LazyLock = LazyLock::new(Pressio::new); - #[derive(Clone, Serialize, Deserialize, JsonSchema)] #[schemars(deny_unknown_fields)] /// Pressio codec which applies the identity function, i.e. passes through the @@ -56,12 +50,10 @@ pub struct PressioCompressor { impl Clone for PressioCompressor { #[expect(clippy::unwrap_used)] fn clone(&self) -> Self { - let mut compressor = { - let mut pressio = PRESSIO.get_or_unwrap().lock().unwrap(); - pressio - .get_compressor(self.format.compressor.as_str()) - .unwrap() - }; + let mut pressio = libpressio::Pressio::new().unwrap(); + let mut compressor = pressio + .get_compressor(self.format.compressor_id.as_str()) + .unwrap(); let options = self.compressor.lock().unwrap().get_options().unwrap(); compressor.set_options(&options).unwrap(); @@ -80,89 +72,102 @@ impl Serialize for PressioCompressor { impl<'de> Deserialize<'de> for PressioCompressor { fn deserialize>(deserializer: D) -> Result { - // TODO: better error handling - let format = PressioCompressorFormat::deserialize(deserializer)?; - let compressor = { - let pressio = PRESSIO - .get() - .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; - let mut pressio = pressio.lock().map_err(serde::de::Error::custom)?; - pressio - .get_compressor(format.compressor.as_str()) - .map_err(|err| { - let supported_compressors = pressio.supported_compressors().map_or_else( - |_| String::from(""), - |x| { - x.iter() - .map(|x| format!("`{x}`")) - .collect::>() - .join(", ") - }, - ); - - serde::de::Error::custom(format_args!( - "{}, choose one of: {}", - err.message, supported_compressors - )) - })? - }; - let mut options = compressor - .get_options() - .map_err(|err| serde::de::Error::custom(err.message))?; - - for (key, value) in &format.options { - options = options - .set( - key, - match value { - PressioOption::Bool(x) => libpressio::PressioOption::bool(Some(*x)), - PressioOption::U8(x) => libpressio::PressioOption::uint8(Some(*x)), - PressioOption::I8(x) => libpressio::PressioOption::int8(Some(*x)), - PressioOption::U16(x) => libpressio::PressioOption::uint16(Some(*x)), - PressioOption::I16(x) => libpressio::PressioOption::int16(Some(*x)), - PressioOption::U32(x) => libpressio::PressioOption::uint32(Some(*x)), - PressioOption::I32(x) => libpressio::PressioOption::int32(Some(*x)), - PressioOption::U64(x) => libpressio::PressioOption::uint64(Some(*x)), - PressioOption::I64(x) => libpressio::PressioOption::int64(Some(*x)), - PressioOption::F32(x) => libpressio::PressioOption::float32(Some(*x)), - PressioOption::F64(x) => libpressio::PressioOption::float64(Some(*x)), + fn convert_to_pressio_options( + config: &BTreeMap, + ) -> Result { + let mut options = libpressio::PressioOptions::new()?; + + let mut entries = vec![(vec![], config)]; + + while let Some((path, entry)) = entries.pop() { + for (key, value) in entry { + let option = match value { + PressioOption::None(None) => Option::None, + PressioOption::Bool(x) => Some(libpressio::PressioOption::bool(Some(*x))), + PressioOption::U8(x) => Some(libpressio::PressioOption::uint8(Some(*x))), + PressioOption::I8(x) => Some(libpressio::PressioOption::int8(Some(*x))), + PressioOption::U16(x) => Some(libpressio::PressioOption::uint16(Some(*x))), + PressioOption::I16(x) => Some(libpressio::PressioOption::int16(Some(*x))), + PressioOption::U32(x) => Some(libpressio::PressioOption::uint32(Some(*x))), + PressioOption::I32(x) => Some(libpressio::PressioOption::int32(Some(*x))), + PressioOption::U64(x) => Some(libpressio::PressioOption::uint64(Some(*x))), + PressioOption::I64(x) => Some(libpressio::PressioOption::int64(Some(*x))), + PressioOption::F32(x) => Some(libpressio::PressioOption::float32(Some(*x))), + PressioOption::F64(x) => Some(libpressio::PressioOption::float64(Some(*x))), PressioOption::String(x) => { - libpressio::PressioOption::string(Some(x.clone())) + Some(libpressio::PressioOption::string(Some(x.clone()))) } PressioOption::VecString(x) => { - libpressio::PressioOption::vec_string(Some(x.clone())) + Some(libpressio::PressioOption::vec_string(Some(x.clone()))) } + PressioOption::Nested(entry) => { + let mut nested_path = path.clone(); + nested_path.push(key.clone()); + entries.push((nested_path, entry)); + continue; + } + }; + + let name = if path.is_empty() { + key.clone() + } else { + format!("{path}:{key}", path = path.join("/")) + }; + + if let Some(option) = option { + options = options.set(name, option)?; + } + } + } + + Ok(options) + } + + // TODO: better error handling + let format = PressioCompressorFormat::deserialize(deserializer)?; + + let mut pressio = libpressio::Pressio::new() + .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + let mut compressor = pressio + .get_compressor(format.compressor_id.as_str()) + .map_err(|err| { + let supported_compressors = pressio.supported_compressors().map_or_else( + |_| String::from(""), + |x| { + x.iter() + .map(|x| format!("`{x}`")) + .collect::>() + .join(", ") }, - ) - .map_err(|err| serde::de::Error::custom(err.message))?; + ); + + serde::de::Error::custom(format_args!( + "{}, choose one of: {}", + err.message, supported_compressors + )) + })?; + + if let Some(name) = &format.name { + compressor + .set_name(name) + .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; } - let mut format = format; - if let Ok(format_options) = options.get_options() { - format.options = format_options - .into_iter() - .filter_map(|(k, v)| match v { - libpressio::PressioOption::bool(Some(x)) => Some((k, PressioOption::Bool(x))), - libpressio::PressioOption::int8(Some(x)) => Some((k, PressioOption::I8(x))), - libpressio::PressioOption::int16(Some(x)) => Some((k, PressioOption::I16(x))), - libpressio::PressioOption::int32(Some(x)) => Some((k, PressioOption::I32(x))), - libpressio::PressioOption::int64(Some(x)) => Some((k, PressioOption::I64(x))), - libpressio::PressioOption::uint8(Some(x)) => Some((k, PressioOption::U8(x))), - libpressio::PressioOption::uint16(Some(x)) => Some((k, PressioOption::U16(x))), - libpressio::PressioOption::uint32(Some(x)) => Some((k, PressioOption::U32(x))), - libpressio::PressioOption::uint64(Some(x)) => Some((k, PressioOption::U64(x))), - libpressio::PressioOption::float32(Some(x)) => Some((k, PressioOption::F32(x))), - libpressio::PressioOption::float64(Some(x)) => Some((k, PressioOption::F64(x))), - libpressio::PressioOption::string(Some(x)) => { - Some((k, PressioOption::String(x))) - } - // FIXME: seems to return strings as a single joined string - libpressio::PressioOption::vec_string(Some(x)) => { - Some((k, PressioOption::VecString(x))) - } - _ => None, - }) - .collect(); + let early_options = convert_to_pressio_options(&format.early_config) + .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + compressor + .set_options(&early_options) + .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + + let _options_template = compressor + .get_options() + .map_err(|err| serde::de::Error::custom(err.message))?; + + if !format.compressor_config.is_empty() { + // TODO + return Err(serde::de::Error::custom( + "compressor_config is not yet supported", + )); } Ok(Self { @@ -185,10 +190,17 @@ impl JsonSchema for PressioCompressor { #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename = "PressioCompressor")] struct PressioCompressorFormat { - compressor: String, - // TODO: flatten + /// The id of the compressor + compressor_id: String, + /// Configuration for the structure of the compressor #[serde(default)] - options: BTreeMap, + early_config: BTreeMap, + /// Configuration for the compressor + #[serde(default)] + compressor_config: BTreeMap, + /// Optional name for the compressor when used in hierarchical mode + #[serde(default)] + name: Option, } #[expect(missing_docs)] @@ -196,6 +208,7 @@ struct PressioCompressorFormat { #[serde(untagged)] /// Pressio option value pub enum PressioOption { + None(None), Bool(bool), U8(u8), I8(i8), @@ -209,26 +222,50 @@ pub enum PressioOption { F64(f64), String(String), VecString(Vec), + Nested(BTreeMap), } -struct Pressio { - pressio: Result, libpressio::PressioError>, +#[derive(Copy, Clone, Debug)] +/// Equivalent of `Option::None` +pub struct None; + +impl Serialize for None { + fn serialize(&self, serializer: S) -> Result { + serializer.serialize_none() + } } -impl Pressio { - fn new() -> Self { - Self { - pressio: libpressio::Pressio::new().map(Mutex::new), +impl<'de> Deserialize<'de> for None { + fn deserialize>(deserializer: D) -> Result { + enum Never {} + + impl<'de> Deserialize<'de> for Never { + fn deserialize>(_deserializer: D) -> Result { + Err(serde::de::Error::custom("never")) + } } + + match Option::::deserialize(deserializer) { + Ok(Option::Some(x)) => match x {}, + Ok(Option::None) => Ok(Self), + Err(err) => Err(err), + } + } +} + +impl JsonSchema for None { + fn schema_name() -> Cow<'static, str> { + Cow::Borrowed("null") } - const fn get(&self) -> Result<&Mutex, &libpressio::PressioError> { - self.pressio.as_ref() + fn inline_schema() -> bool { + true } - #[expect(clippy::unwrap_used)] - fn get_or_unwrap(&self) -> &Mutex { - self.pressio.as_ref().unwrap() + fn json_schema(_generator: &mut SchemaGenerator) -> Schema { + json_schema!({ + "type": "null" + }) } } @@ -371,15 +408,172 @@ impl Codec for PressioCodec { } } + #[expect(clippy::too_many_lines)] // FIXME fn decode_into( &self, encoded: AnyArrayView, - mut decoded: AnyArrayViewMut, + decoded: AnyArrayViewMut, ) -> Result<(), Self::Error> { - // TODO: optimize - let decoded_in = self.decode(encoded.cow())?; + fn decompress_typed( + compressor: &mut libpressio::PressioCompressor, + encoded: ArrayView, + decoded_dtype: libpressio::PressioDtype, + decoded_shape: &[usize], + ) -> Result { + libpressio::PressioData::new_with_shared(encoded, |encoded| { + let decompressed_data = + libpressio::PressioData::new_empty(decoded_dtype, decoded_shape); + + compressor + .compress(encoded, decompressed_data) + .map_err(|err| PressioCodecError::PressioDecodeFailed { + source: PressioCodingError(err), + }) + }) + } + + fn decode_into_typed( + decompressed_data: &libpressio::PressioData, + mut decoded: ArrayViewMut, + ) -> Result<(), PressioCodecError> { + eprintln!( + "decompressed into: {} {} {} {:?}", + decompressed_data.has_data(), + decompressed_data.len(), + decompressed_data.ndim(), + decompressed_data.dtype() + ); + + if !decompressed_data.has_data() { + return Err(PressioCodecError::DecodeToArrayWithoutData); + } + + let dtype = match ::DTYPE { + libpressio::PressioDtype::Bool => { + return Err(PressioCodecError::DecodeToBoolArray); + } + libpressio::PressioDtype::Byte | libpressio::PressioDtype::U8 => AnyArrayDType::U8, + libpressio::PressioDtype::U16 => AnyArrayDType::U16, + libpressio::PressioDtype::U32 => AnyArrayDType::U32, + libpressio::PressioDtype::U64 => AnyArrayDType::U64, + libpressio::PressioDtype::I8 => AnyArrayDType::I8, + libpressio::PressioDtype::I16 => AnyArrayDType::I16, + libpressio::PressioDtype::I32 => AnyArrayDType::I32, + libpressio::PressioDtype::I64 => AnyArrayDType::I64, + libpressio::PressioDtype::F32 => AnyArrayDType::F32, + libpressio::PressioDtype::F64 => AnyArrayDType::F64, + }; + let decompressed_dtype = match decompressed_data.dtype() { + Option::None => return Err(PressioCodecError::DecodeToUnknownDtype), + Some(libpressio::PressioDtype::Bool) => { + return Err(PressioCodecError::DecodeToBoolArray); + } + Some(libpressio::PressioDtype::Byte | libpressio::PressioDtype::U8) => { + AnyArrayDType::U8 + } + Some(libpressio::PressioDtype::U16) => AnyArrayDType::U16, + Some(libpressio::PressioDtype::U32) => AnyArrayDType::U32, + Some(libpressio::PressioDtype::U64) => AnyArrayDType::U64, + Some(libpressio::PressioDtype::I8) => AnyArrayDType::I8, + Some(libpressio::PressioDtype::I16) => AnyArrayDType::I16, + Some(libpressio::PressioDtype::I32) => AnyArrayDType::I32, + Some(libpressio::PressioDtype::I64) => AnyArrayDType::I64, + Some(libpressio::PressioDtype::F32) => AnyArrayDType::F32, + Some(libpressio::PressioDtype::F64) => AnyArrayDType::F64, + }; + + if dtype != decompressed_dtype { + return Err(PressioCodecError::MismatchedDecodeIntoArray { + source: AnyArrayAssignError::DTypeMismatch { + src: decompressed_dtype, + dst: dtype, + }, + }); + } - Ok(decoded.assign(&decoded_in)?) + if decompressed_data + .with_shared::(decoded.dim(), |decompressed| { + decoded.assign(&decompressed); + }) + .is_none() + { + return Err(PressioCodecError::MismatchedDecodeIntoArray { + source: AnyArrayAssignError::ShapeMismatch { + src: decompressed_data.shape(), + dst: decoded.shape().to_vec(), + }, + }); + } + + Ok(()) + } + + let Ok(mut compressor) = self.compressor.compressor.lock() else { + return Err(PressioCodecError::PressioPoisonedMutex); + }; + + let decoded_dtype = match decoded.dtype() { + AnyArrayDType::U8 => libpressio::PressioDtype::U8, + AnyArrayDType::U16 => libpressio::PressioDtype::U16, + AnyArrayDType::U32 => libpressio::PressioDtype::U32, + AnyArrayDType::U64 => libpressio::PressioDtype::U64, + AnyArrayDType::I8 => libpressio::PressioDtype::I8, + AnyArrayDType::I16 => libpressio::PressioDtype::I16, + AnyArrayDType::I32 => libpressio::PressioDtype::I32, + AnyArrayDType::I64 => libpressio::PressioDtype::I64, + AnyArrayDType::F32 => libpressio::PressioDtype::F32, + AnyArrayDType::F64 => libpressio::PressioDtype::F64, + decoded_dtype => return Err(PressioCodecError::UnsupportedDtype(decoded_dtype)), + }; + let decoded_shape = decoded.shape(); + + let decompressed_data = match encoded { + AnyArrayView::U8(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::U16(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::U32(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::U64(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::I8(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::I16(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::I32(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::I64(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::F32(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + AnyArrayView::F64(encoded) => { + decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + } + encoded => return Err(PressioCodecError::UnsupportedDtype(encoded.dtype())), + }?; + + match decoded { + AnyArrayViewMut::U8(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::U16(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::U32(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::U64(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::I8(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::I16(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::I32(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::I64(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::F32(decoded) => decode_into_typed(&decompressed_data, decoded), + AnyArrayViewMut::F64(decoded) => decode_into_typed(&decompressed_data, decoded), + decoded => Err(PressioCodecError::UnsupportedDtype(decoded.dtype())), + } } } From 683eec97847f14d70fb94fc37ce0bdbe2675f6d0 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 11 Mar 2026 11:52:07 +0200 Subject: [PATCH 32/43] Fix clippy lint --- codecs/pressio/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index fde1210f6..7c2723349 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -222,7 +222,7 @@ pub enum PressioOption { F64(f64), String(String), VecString(Vec), - Nested(BTreeMap), + Nested(BTreeMap), } #[derive(Copy, Clone, Debug)] From bad2acd11339412a7586cc93650255784724bee0 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 12 Mar 2026 12:11:19 +0200 Subject: [PATCH 33/43] Progress with serialising the compressor config --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 197 +++++++++++++++++++++++++++++++------- 2 files changed, 165 insertions(+), 34 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 310f565f5..15c8c26d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "ec5df41", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "bed5dc3", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 7c2723349..a664e0acc 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -17,7 +17,11 @@ //! //! libpressio codec wrapper for the [`numcodecs`] API. -use std::{borrow::Cow, collections::BTreeMap, sync::Mutex}; +use std::{ + borrow::Cow, + collections::{BTreeMap, btree_map::Entry}, + sync::Mutex, +}; use ndarray::{ArrayView, ArrayViewMut, CowArray, IxDyn}; use numcodecs::{ @@ -43,39 +47,137 @@ pub struct PressioCodec { /// Pressio compressor pub struct PressioCompressor { - format: PressioCompressorFormat, compressor: Mutex, + compressor_id: String, + early_config: BTreeMap, + name: Option, } impl Clone for PressioCompressor { #[expect(clippy::unwrap_used)] fn clone(&self) -> Self { let mut pressio = libpressio::Pressio::new().unwrap(); - let mut compressor = pressio - .get_compressor(self.format.compressor_id.as_str()) - .unwrap(); + let mut compressor = pressio.get_compressor(self.compressor_id.as_str()).unwrap(); + if let Some(name) = &self.name { + compressor.set_name(name).unwrap(); + } let options = self.compressor.lock().unwrap().get_options().unwrap(); compressor.set_options(&options).unwrap(); Self { - format: self.format.clone(), compressor: Mutex::new(compressor), + compressor_id: self.compressor_id.clone(), + early_config: self.early_config.clone(), + name: self.name.clone(), } } } impl Serialize for PressioCompressor { fn serialize(&self, serializer: S) -> Result { - self.format.serialize(serializer) + fn convert_from_pressio_options( + options: BTreeMap, + ) -> Result, E> { + let mut config = BTreeMap::new(); + + for (name, option) in options { + let value = match option { + libpressio::PressioOption::bool(Some(x)) => PressioOption::Bool(x), + libpressio::PressioOption::int8(Some(x)) => PressioOption::I8(x), + libpressio::PressioOption::int16(Some(x)) => PressioOption::I16(x), + libpressio::PressioOption::int32(Some(x)) => PressioOption::I32(x), + libpressio::PressioOption::int64(Some(x)) => PressioOption::I64(x), + libpressio::PressioOption::uint8(Some(x)) => PressioOption::U8(x), + libpressio::PressioOption::uint16(Some(x)) => PressioOption::U16(x), + libpressio::PressioOption::uint32(Some(x)) => PressioOption::U32(x), + libpressio::PressioOption::uint64(Some(x)) => PressioOption::U64(x), + libpressio::PressioOption::float32(Some(x)) => PressioOption::F32(x), + libpressio::PressioOption::float64(Some(x)) => PressioOption::F64(x), + libpressio::PressioOption::string(Some(x)) => PressioOption::String(x), + // FIXME: seems to return strings as a single joined string + libpressio::PressioOption::vec_string(Some(x)) => PressioOption::VecString(x), + libpressio::PressioOption::data(_) + | libpressio::PressioOption::user_ptr(_) + | libpressio::PressioOption::unset + | _ /* non-exhaustive */ => continue, + }; + + let Some(nested_name) = name.strip_prefix('/') else { + // global option + if config.insert(name.clone(), value).is_some() { + return Err(serde::ser::Error::custom(format!( + "duplicate option {name:?}" + ))); + } + continue; + }; + + // hierarchical option + let mut parts = nested_name.split(':').peekable(); + + let Some(first) = parts.next() else { + return Err(serde::ser::Error::custom(format!( + "invalid hierarchical config name {name:?}" + ))); + }; + let paths = first.split('/'); + + if parts.peek().is_none() { + return Err(serde::ser::Error::custom(format!( + "invalid hierarchical config name {name:?}" + ))); + } + let option_name = parts.map(String::from).collect::>().join(":"); + + let mut it = &mut config; + for path in paths { + if let Entry::Vacant(entry) = it.entry(String::from(path)) { + entry.insert(PressioOption::Nested(BTreeMap::new())); + } + + let Some(PressioOption::Nested(entry)) = it.get_mut(path) else { + return Err(serde::ser::Error::custom(format!( + "duplicate option {path:?}" + ))); + }; + it = entry; + } + if it.insert(option_name.clone(), value).is_some() { + return Err(serde::ser::Error::custom(format!( + "duplicate option {option_name:?}" + ))); + } + } + + Ok(config) + } + + let options = { + let compressor = self.compressor.lock().map_err(serde::ser::Error::custom)?; + compressor + .get_options() + .map_err(serde::ser::Error::custom)? + }; + let options = options.get_options().map_err(serde::ser::Error::custom)?; + + PressioCompressorBorrowedFormat { + compressor_id: self.compressor_id.as_str(), + early_config: &self.early_config, + compressor_config: &convert_from_pressio_options(options)?, + name: self.name.as_deref(), + } + .serialize(serializer) } } impl<'de> Deserialize<'de> for PressioCompressor { fn deserialize>(deserializer: D) -> Result { - fn convert_to_pressio_options( + fn convert_to_pressio_options( config: &BTreeMap, - ) -> Result { - let mut options = libpressio::PressioOptions::new()?; + template: Option<&libpressio::PressioOptions>, + ) -> Result { + let mut options = + libpressio::PressioOptions::new().map_err(serde::de::Error::custom)?; let mut entries = vec![(vec![], config)]; @@ -114,8 +216,25 @@ impl<'de> Deserialize<'de> for PressioCompressor { format!("{path}:{key}", path = path.join("/")) }; + if let Some(template) = template { + if !template + .has_option(&name) + .map_err(serde::de::Error::custom)? + { + return Err(serde::de::Error::custom(format!( + "unknown compressor configuration option: {name:?}" + ))); + } + } + + // TODO: handle conversion and type errors + // TODO: check if the options were actually set + // (e.g. compressor names are validated and fallback to noop) + if let Some(option) = option { - options = options.set(name, option)?; + options + .set(name, option) + .map_err(serde::de::Error::custom)?; } } } @@ -124,10 +243,9 @@ impl<'de> Deserialize<'de> for PressioCompressor { } // TODO: better error handling - let format = PressioCompressorFormat::deserialize(deserializer)?; + let format = PressioCompressorOwnedFormat::deserialize(deserializer)?; - let mut pressio = libpressio::Pressio::new() - .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + let mut pressio = libpressio::Pressio::new().map_err(serde::de::Error::custom)?; let mut compressor = pressio .get_compressor(format.compressor_id.as_str()) .map_err(|err| { @@ -150,46 +268,43 @@ impl<'de> Deserialize<'de> for PressioCompressor { if let Some(name) = &format.name { compressor .set_name(name) - .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + .map_err(serde::de::Error::custom)?; } - let early_options = convert_to_pressio_options(&format.early_config) - .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; + let early_options = convert_to_pressio_options(&format.early_config, Option::None)?; compressor .set_options(&early_options) - .map_err(|err| serde::de::Error::custom(err.message.as_str()))?; - - let _options_template = compressor - .get_options() - .map_err(|err| serde::de::Error::custom(err.message))?; + .map_err(serde::de::Error::custom)?; + let options_template = compressor.get_options().map_err(serde::de::Error::custom)?; - if !format.compressor_config.is_empty() { - // TODO - return Err(serde::de::Error::custom( - "compressor_config is not yet supported", - )); - } + let options = + convert_to_pressio_options(&format.compressor_config, Some(&options_template))?; + compressor + .set_options(&options) + .map_err(serde::de::Error::custom)?; Ok(Self { - format, compressor: Mutex::new(compressor), + compressor_id: format.compressor_id, + early_config: format.early_config, + name: format.name, }) } } impl JsonSchema for PressioCompressor { fn schema_name() -> Cow<'static, str> { - PressioCompressorFormat::schema_name() + PressioCompressorOwnedFormat::schema_name() } fn json_schema(generator: &mut SchemaGenerator) -> Schema { - PressioCompressorFormat::json_schema(generator) + PressioCompressorOwnedFormat::json_schema(generator) } } -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Deserialize, JsonSchema)] #[serde(rename = "PressioCompressor")] -struct PressioCompressorFormat { +struct PressioCompressorOwnedFormat { /// The id of the compressor compressor_id: String, /// Configuration for the structure of the compressor @@ -203,6 +318,22 @@ struct PressioCompressorFormat { name: Option, } +#[derive(Debug, Serialize)] +#[serde(rename = "PressioCompressor")] +struct PressioCompressorBorrowedFormat<'a> { + /// The id of the compressor + compressor_id: &'a str, + /// Configuration for the structure of the compressor + #[serde(default)] + early_config: &'a BTreeMap, + /// Configuration for the compressor + #[serde(default)] + compressor_config: &'a BTreeMap, + /// Optional name for the compressor when used in hierarchical mode + #[serde(default)] + name: Option<&'a str>, +} + #[expect(missing_docs)] #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] #[serde(untagged)] From 75a7c580068a9926e7135d0e12ff6beb35c7c27e Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 13 Mar 2026 09:51:52 +0200 Subject: [PATCH 34/43] small code updates --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 15c8c26d0..a212e2363 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "bed5dc3", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "095bd73", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index a664e0acc..6e2580128 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -76,11 +76,16 @@ impl Clone for PressioCompressor { impl Serialize for PressioCompressor { fn serialize(&self, serializer: S) -> Result { fn convert_from_pressio_options( - options: BTreeMap, + options: impl Iterator, Option)>, ) -> Result, E> { let mut config = BTreeMap::new(); for (name, option) in options { + // skip invalid option names and values + let (Some(name), Some(option)) = (name, option) else { + continue; + }; + let value = match option { libpressio::PressioOption::bool(Some(x)) => PressioOption::Bool(x), libpressio::PressioOption::int8(Some(x)) => PressioOption::I8(x), @@ -158,12 +163,11 @@ impl Serialize for PressioCompressor { .get_options() .map_err(serde::ser::Error::custom)? }; - let options = options.get_options().map_err(serde::ser::Error::custom)?; PressioCompressorBorrowedFormat { compressor_id: self.compressor_id.as_str(), early_config: &self.early_config, - compressor_config: &convert_from_pressio_options(options)?, + compressor_config: &convert_from_pressio_options(options.iter())?, name: self.name.as_deref(), } .serialize(serializer) @@ -249,7 +253,7 @@ impl<'de> Deserialize<'de> for PressioCompressor { let mut compressor = pressio .get_compressor(format.compressor_id.as_str()) .map_err(|err| { - let supported_compressors = pressio.supported_compressors().map_or_else( + let supported_compressors = libpressio::supported_compressors().map_or_else( |_| String::from(""), |x| { x.iter() From 825849de374b7d92f047c8074a2fca115f5437c0 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 13 Mar 2026 13:11:06 +0200 Subject: [PATCH 35/43] fix some config bugs --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 62 +++++++++++++++------------------------ 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a212e2363..d047975c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "095bd73", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "d928ca7", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 6e2580128..1bb50d640 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -101,6 +101,8 @@ impl Serialize for PressioCompressor { libpressio::PressioOption::string(Some(x)) => PressioOption::String(x), // FIXME: seems to return strings as a single joined string libpressio::PressioOption::vec_string(Some(x)) => PressioOption::VecString(x), + libpressio::PressioOption::dtype(Some(x)) => PressioOption::String(format!("{x}")), + libpressio::PressioOption::thread_safety(Some(x)) => PressioOption::String(format!("{x}")), libpressio::PressioOption::data(_) | libpressio::PressioOption::user_ptr(_) | libpressio::PressioOption::unset @@ -111,7 +113,7 @@ impl Serialize for PressioCompressor { // global option if config.insert(name.clone(), value).is_some() { return Err(serde::ser::Error::custom(format!( - "duplicate option {name:?}" + "duplicate global option: {name:?}" ))); } continue; @@ -142,14 +144,14 @@ impl Serialize for PressioCompressor { let Some(PressioOption::Nested(entry)) = it.get_mut(path) else { return Err(serde::ser::Error::custom(format!( - "duplicate option {path:?}" + "duplicate option nesting: {path:?} in {name:?}" ))); }; it = entry; } if it.insert(option_name.clone(), value).is_some() { return Err(serde::ser::Error::custom(format!( - "duplicate option {option_name:?}" + "duplicate nested option: {option_name:?} in {name:?}" ))); } } @@ -175,6 +177,7 @@ impl Serialize for PressioCompressor { } impl<'de> Deserialize<'de> for PressioCompressor { + #[expect(clippy::too_many_lines)] // FIXME fn deserialize>(deserializer: D) -> Result { fn convert_to_pressio_options( config: &BTreeMap, @@ -217,25 +220,32 @@ impl<'de> Deserialize<'de> for PressioCompressor { let name = if path.is_empty() { key.clone() } else { - format!("{path}:{key}", path = path.join("/")) + format!("/{path}:{key}", path = path.join("/")) }; if let Some(template) = template { - if !template - .has_option(&name) - .map_err(serde::de::Error::custom)? - { + let Some(option_template) = + template.get(&name).map_err(serde::de::Error::custom)? + else { return Err(serde::de::Error::custom(format!( "unknown compressor configuration option: {name:?}" ))); - } - } + }; - // TODO: handle conversion and type errors - // TODO: check if the options were actually set - // (e.g. compressor names are validated and fallback to noop) + options + .set(&name, option_template.copy_type_only()) + .map_err(serde::de::Error::custom)?; - if let Some(option) = option { + if let Some(option) = option { + options + .set_with_cast( + name, + option, + libpressio::PressioConversionSafety::Special, + ) + .map_err(serde::de::Error::custom)?; + } + } else if let Some(option) = option { options .set(name, option) .map_err(serde::de::Error::custom)?; @@ -423,14 +433,6 @@ impl Codec for PressioCodec { }) })?; - eprintln!( - "compressed: {} {} {} {:?}", - compressed_data.has_data(), - compressed_data.len(), - compressed_data.ndim(), - compressed_data.dtype() - ); - let Some(compressed_data) = compressed_data.clone_into_array() else { if compressed_data.has_data() { return Err(PressioCodecError::EncodeToUnknownDtype); @@ -491,14 +493,6 @@ impl Codec for PressioCodec { }) })?; - eprintln!( - "decompressed: {} {} {} {:?}", - decompressed_data.has_data(), - decompressed_data.len(), - decompressed_data.ndim(), - decompressed_data.dtype() - ); - let Some(decompressed_data) = decompressed_data.clone_into_array() else { if decompressed_data.has_data() { return Err(PressioCodecError::DecodeToUnknownDtype); @@ -571,14 +565,6 @@ impl Codec for PressioCodec { decompressed_data: &libpressio::PressioData, mut decoded: ArrayViewMut, ) -> Result<(), PressioCodecError> { - eprintln!( - "decompressed into: {} {} {} {:?}", - decompressed_data.has_data(), - decompressed_data.len(), - decompressed_data.ndim(), - decompressed_data.dtype() - ); - if !decompressed_data.has_data() { return Err(PressioCodecError::DecodeToArrayWithoutData); } From feb2d856bc7665667b7bb567d5b8ed214c25d3fe Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 14 Mar 2026 08:51:26 +0200 Subject: [PATCH 36/43] include metric results in config and add help when deserialising options fails --- codecs/pressio/src/lib.rs | 95 ++++++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 1bb50d640..8fcd29d35 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -113,7 +113,7 @@ impl Serialize for PressioCompressor { // global option if config.insert(name.clone(), value).is_some() { return Err(serde::ser::Error::custom(format!( - "duplicate global option: {name:?}" + "duplicate global option: `{name}`" ))); } continue; @@ -124,14 +124,14 @@ impl Serialize for PressioCompressor { let Some(first) = parts.next() else { return Err(serde::ser::Error::custom(format!( - "invalid hierarchical config name {name:?}" + "invalid hierarchical config name `{name}`" ))); }; let paths = first.split('/'); if parts.peek().is_none() { return Err(serde::ser::Error::custom(format!( - "invalid hierarchical config name {name:?}" + "invalid hierarchical config name `{name}`" ))); } let option_name = parts.map(String::from).collect::>().join(":"); @@ -144,14 +144,14 @@ impl Serialize for PressioCompressor { let Some(PressioOption::Nested(entry)) = it.get_mut(path) else { return Err(serde::ser::Error::custom(format!( - "duplicate option nesting: {path:?} in {name:?}" + "duplicate option nesting: `{path}` in `{name}`" ))); }; it = entry; } if it.insert(option_name.clone(), value).is_some() { return Err(serde::ser::Error::custom(format!( - "duplicate nested option: {option_name:?} in {name:?}" + "duplicate nested option: `{option_name}` in `{name}`" ))); } } @@ -159,20 +159,28 @@ impl Serialize for PressioCompressor { Ok(config) } - let options = { - let compressor = self.compressor.lock().map_err(serde::ser::Error::custom)?; - compressor - .get_options() - .map_err(serde::ser::Error::custom)? - }; + let compressor = self.compressor.lock().map_err(serde::ser::Error::custom)?; + let options = compressor + .get_options() + .map_err(serde::ser::Error::custom)?; + let metric_results = compressor + .get_metric_results() + .map_err(serde::ser::Error::custom)?; + let name = compressor.get_name().map_err(serde::ser::Error::custom)?; - PressioCompressorBorrowedFormat { + let result = PressioCompressorBorrowedFormat { compressor_id: self.compressor_id.as_str(), early_config: &self.early_config, compressor_config: &convert_from_pressio_options(options.iter())?, - name: self.name.as_deref(), + metric_results: &convert_from_pressio_options(metric_results.iter())?, + name: match name { + "" => Option::None, + name => Some(name), + }, } - .serialize(serializer) + .serialize(serializer); + std::mem::drop(compressor); + result } } @@ -182,6 +190,7 @@ impl<'de> Deserialize<'de> for PressioCompressor { fn convert_to_pressio_options( config: &BTreeMap, template: Option<&libpressio::PressioOptions>, + documentation: &libpressio::PressioOptions, ) -> Result { let mut options = libpressio::PressioOptions::new().map_err(serde::de::Error::custom)?; @@ -227,8 +236,15 @@ impl<'de> Deserialize<'de> for PressioCompressor { let Some(option_template) = template.get(&name).map_err(serde::de::Error::custom)? else { + let supported_options = template + .iter() + .filter_map(|(key, _value)| key) + .map(|x| format!("`{x}`")) + .collect::>() + .join(", "); + return Err(serde::de::Error::custom(format!( - "unknown compressor configuration option: {name:?}" + "unknown compressor configuration option: `{name}`, use one of {supported_options}" ))); }; @@ -239,11 +255,24 @@ impl<'de> Deserialize<'de> for PressioCompressor { if let Some(option) = option { options .set_with_cast( - name, + &name, option, libpressio::PressioConversionSafety::Special, ) - .map_err(serde::de::Error::custom)?; + .map_err(|err| { + let docs = match documentation.get(&name) { + Ok(Some(libpressio::PressioOption::string(Some(docs)))) => { + Some(docs) + } + _ => Option::None, + }; + + if let Some(docs) = docs { + serde::de::Error::custom(format_args!("{err} ({docs})")) + } else { + serde::de::Error::custom(err) + } + })?; } } else if let Some(option) = option { options @@ -258,6 +287,7 @@ impl<'de> Deserialize<'de> for PressioCompressor { // TODO: better error handling let format = PressioCompressorOwnedFormat::deserialize(deserializer)?; + std::mem::drop(format.metric_results); let mut pressio = libpressio::Pressio::new().map_err(serde::de::Error::custom)?; let mut compressor = pressio @@ -274,8 +304,7 @@ impl<'de> Deserialize<'de> for PressioCompressor { ); serde::de::Error::custom(format_args!( - "{}, choose one of: {}", - err.message, supported_compressors + "{err}, choose one of: {supported_compressors}" )) })?; @@ -285,14 +314,22 @@ impl<'de> Deserialize<'de> for PressioCompressor { .map_err(serde::de::Error::custom)?; } - let early_options = convert_to_pressio_options(&format.early_config, Option::None)?; + let documentation = compressor + .get_documentation() + .map_err(serde::de::Error::custom)?; + + let early_options = + convert_to_pressio_options(&format.early_config, Option::None, &documentation)?; compressor .set_options(&early_options) .map_err(serde::de::Error::custom)?; let options_template = compressor.get_options().map_err(serde::de::Error::custom)?; - let options = - convert_to_pressio_options(&format.compressor_config, Some(&options_template))?; + let options = convert_to_pressio_options( + &format.compressor_config, + Some(&options_template), + &documentation, + )?; compressor .set_options(&options) .map_err(serde::de::Error::custom)?; @@ -327,6 +364,9 @@ struct PressioCompressorOwnedFormat { /// Configuration for the compressor #[serde(default)] compressor_config: BTreeMap, + /// Results of the compressor metrics (output-only) + #[serde(default)] + metric_results: BTreeMap, /// Optional name for the compressor when used in hierarchical mode #[serde(default)] name: Option, @@ -338,13 +378,14 @@ struct PressioCompressorBorrowedFormat<'a> { /// The id of the compressor compressor_id: &'a str, /// Configuration for the structure of the compressor - #[serde(default)] early_config: &'a BTreeMap, /// Configuration for the compressor - #[serde(default)] compressor_config: &'a BTreeMap, + /// Results of the compressor metrics (output-only) + #[serde(skip_serializing_if = "BTreeMap::is_empty")] + metric_results: &'a BTreeMap, /// Optional name for the compressor when used in hierarchical mode - #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] name: Option<&'a str>, } @@ -487,7 +528,7 @@ impl Codec for PressioCodec { libpressio::PressioData::new_empty(libpressio::PressioDtype::Byte, []); compressor - .compress(encoded, decompressed_data) + .decompress(encoded, decompressed_data) .map_err(|err| PressioCodecError::PressioDecodeFailed { source: PressioCodingError(err), }) @@ -554,7 +595,7 @@ impl Codec for PressioCodec { libpressio::PressioData::new_empty(decoded_dtype, decoded_shape); compressor - .compress(encoded, decompressed_data) + .decompress(encoded, decompressed_data) .map_err(|err| PressioCodecError::PressioDecodeFailed { source: PressioCodingError(err), }) From 0c605803ad3d2597182cf0015d6a68ba477767ea Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 14 Mar 2026 13:40:18 +0200 Subject: [PATCH 37/43] support inline data nd arrays in the config --- Cargo.toml | 1 + codecs/pressio/Cargo.toml | 1 + codecs/pressio/src/lib.rs | 140 +++- codecs/pressio/tests/schema.json | 1331 ++++++++++++++++++++++++++++++ codecs/pressio/tests/schema.rs | 20 + 5 files changed, 1490 insertions(+), 3 deletions(-) create mode 100644 codecs/pressio/tests/schema.json create mode 100644 codecs/pressio/tests/schema.rs diff --git a/Cargo.toml b/Cargo.toml index d047975c4..632553493 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -118,6 +118,7 @@ schemars = { version = "1.0.3", default-features = false } scratch = { version = "1.0", default-features = false } semver = { version = "1.0.23", default-features = false } serde = { version = "1.0.218", default-features = false } +serde-ndim = { version = "=2.1.0", default-features = false } serde-transcode = { version = "1.1", default-features = false } serde_json = { version = "1.0.140", default-features = false } serde_repr = { version = "0.1.5", default-features = false } diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index e664605e6..5eb125e6f 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -20,6 +20,7 @@ ndarray = { workspace = true } numcodecs = { workspace = true } schemars = { workspace = true, features = ["derive", "preserve_order"] } serde = { workspace = true, features = ["std", "derive"] } +serde-ndim = { workspace = true, features = ["ndarray"] } thiserror = { workspace = true } [lints] diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 8fcd29d35..112897d53 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -23,7 +23,7 @@ use std::{ sync::Mutex, }; -use ndarray::{ArrayView, ArrayViewMut, CowArray, IxDyn}; +use ndarray::{Array, ArrayView, ArrayViewMut, CowArray, IxDyn}; use numcodecs::{ AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, @@ -74,6 +74,7 @@ impl Clone for PressioCompressor { } impl Serialize for PressioCompressor { + #[expect(clippy::too_many_lines)] fn serialize(&self, serializer: S) -> Result { fn convert_from_pressio_options( options: impl Iterator, Option)>, @@ -103,8 +104,21 @@ impl Serialize for PressioCompressor { libpressio::PressioOption::vec_string(Some(x)) => PressioOption::VecString(x), libpressio::PressioOption::dtype(Some(x)) => PressioOption::String(format!("{x}")), libpressio::PressioOption::thread_safety(Some(x)) => PressioOption::String(format!("{x}")), - libpressio::PressioOption::data(_) - | libpressio::PressioOption::user_ptr(_) + libpressio::PressioOption::data(Some(x)) => match x.clone_into_array() { + Option::None => continue, + Some(libpressio::PressioArray::Bool(x)) => PressioOption::DataBool(NdArray(x)), + Some(libpressio::PressioArray::Byte(x) | libpressio::PressioArray::U8(x)) => PressioOption::DataU8(NdArray(x)), + Some(libpressio::PressioArray::U16(x)) => PressioOption::DataU16(NdArray(x)), + Some(libpressio::PressioArray::U32(x)) => PressioOption::DataU32(NdArray(x)), + Some(libpressio::PressioArray::U64(x)) => PressioOption::DataU64(NdArray(x)), + Some(libpressio::PressioArray::I8(x)) => PressioOption::DataI8(NdArray(x)), + Some(libpressio::PressioArray::I16(x)) => PressioOption::DataI16(NdArray(x)), + Some(libpressio::PressioArray::I32(x)) => PressioOption::DataI32(NdArray(x)), + Some(libpressio::PressioArray::I64(x)) => PressioOption::DataI64(NdArray(x)), + Some(libpressio::PressioArray::F32(x)) => PressioOption::DataF32(NdArray(x)), + Some(libpressio::PressioArray::F64(x)) => PressioOption::DataF64(NdArray(x)), + }, + libpressio::PressioOption::user_ptr(_) | libpressio::PressioOption::unset | _ /* non-exhaustive */ => continue, }; @@ -218,6 +232,57 @@ impl<'de> Deserialize<'de> for PressioCompressor { PressioOption::VecString(x) => { Some(libpressio::PressioOption::vec_string(Some(x.clone()))) } + PressioOption::DataBool(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataU8(NdArray(x)) => Some(libpressio::PressioOption::data( + Some(libpressio::PressioData::new_copied(x)), + )), + PressioOption::DataU16(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataU32(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataU64(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataI8(NdArray(x)) => Some(libpressio::PressioOption::data( + Some(libpressio::PressioData::new_copied(x)), + )), + PressioOption::DataI16(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataI32(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataI64(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataF32(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } + PressioOption::DataF64(NdArray(x)) => { + Some(libpressio::PressioOption::data(Some( + libpressio::PressioData::new_copied(x), + ))) + } PressioOption::Nested(entry) => { let mut nested_path = path.clone(); nested_path.push(key.clone()); @@ -408,9 +473,78 @@ pub enum PressioOption { F64(f64), String(String), VecString(Vec), + DataBool(NdArray), + DataU8(NdArray), + DataU16(NdArray), + DataU32(NdArray), + DataU64(NdArray), + DataI8(NdArray), + DataI16(NdArray), + DataI32(NdArray), + DataI64(NdArray), + DataF32(NdArray), + DataF64(NdArray), Nested(BTreeMap), } +#[derive(Clone)] +/// Pressio n-dimensional data array +pub struct NdArray(Array); + +impl std::fmt::Debug for NdArray { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + self.0.fmt(fmt) + } +} + +impl Serialize for NdArray { + fn serialize(&self, serializer: S) -> Result { + serde_ndim::serialize(&self.0, serializer) + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for NdArray { + fn deserialize>(deserializer: D) -> Result { + serde_ndim::deserialize(deserializer).map(Self) + } +} + +impl JsonSchema for NdArray { + fn inline_schema() -> bool { + false + } + + fn schema_name() -> Cow<'static, str> { + Cow::Owned(format!("{}NdArray", std::any::type_name::())) + } + + fn schema_id() -> Cow<'static, str> { + Cow::Owned(format!( + "{}::NdArray<{}>", + module_path!(), + std::any::type_name::() + )) + } + + fn json_schema(generator: &mut SchemaGenerator) -> Schema { + let item = generator.subschema_for::(); + let nested = generator.subschema_for::(); + + json_schema!({ + "anyOf": [ + { + "type": "array", + "items": item, + }, + { + "type": "array", + "items": nested, + } + ] + }) + } +} + #[derive(Copy, Clone, Debug)] /// Equivalent of `Option::None` pub struct None; diff --git a/codecs/pressio/tests/schema.json b/codecs/pressio/tests/schema.json new file mode 100644 index 000000000..0cec2dfe5 --- /dev/null +++ b/codecs/pressio/tests/schema.json @@ -0,0 +1,1331 @@ +{ + "type": "object", + "additionalProperties": false, + "properties": { + "compressor_id": { + "type": "string", + "description": "The id of the compressor" + }, + "early_config": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + }, + { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + }, + { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + }, + { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + }, + { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + { + "type": "integer", + "format": "int32" + }, + { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + { + "type": "integer", + "format": "int64" + }, + { + "type": "number", + "format": "float" + }, + { + "type": "number", + "format": "double" + }, + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "boolean" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/boolNdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int32" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "float" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f64NdArray" + } + } + ] + }, + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/PressioOption" + } + } + ], + "description": "Pressio option value" + }, + "description": "Configuration for the structure of the compressor", + "default": {} + }, + "compressor_config": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + }, + { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + }, + { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + }, + { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + }, + { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + { + "type": "integer", + "format": "int32" + }, + { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + { + "type": "integer", + "format": "int64" + }, + { + "type": "number", + "format": "float" + }, + { + "type": "number", + "format": "double" + }, + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "boolean" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/boolNdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int32" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "float" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f64NdArray" + } + } + ] + }, + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/PressioOption" + } + } + ], + "description": "Pressio option value" + }, + "description": "Configuration for the compressor", + "default": {} + }, + "metric_results": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + }, + { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + }, + { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + }, + { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + }, + { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + { + "type": "integer", + "format": "int32" + }, + { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + { + "type": "integer", + "format": "int64" + }, + { + "type": "number", + "format": "float" + }, + { + "type": "number", + "format": "double" + }, + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "boolean" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/boolNdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int32" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "float" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f64NdArray" + } + } + ] + }, + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/PressioOption" + } + } + ], + "description": "Pressio option value" + }, + "description": "Results of the compressor metrics (output-only)", + "default": {} + }, + "name": { + "type": [ + "string", + "null" + ], + "description": "Optional name for the compressor when used in hierarchical mode", + "default": null + }, + "_version": { + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$", + "description": "The codec's encoding format version. Do not provide this parameter explicitly.", + "default": "1.0.0" + } + }, + "required": [ + "compressor_id" + ], + "description": "Pressio codec which applies the identity function, i.e. passes through the\ninput unchanged during encoding and decoding.", + "title": "PressioCodec", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "boolNdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "boolean" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/boolNdArray" + } + } + ] + }, + "u8NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u8NdArray" + } + } + ] + }, + "u16NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u16NdArray" + } + } + ] + }, + "u32NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u32NdArray" + } + } + ] + }, + "u64NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u64NdArray" + } + } + ] + }, + "i8NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i8NdArray" + } + } + ] + }, + "i16NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i16NdArray" + } + } + ] + }, + "i32NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int32" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i32NdArray" + } + } + ] + }, + "i64NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i64NdArray" + } + } + ] + }, + "f32NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "float" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f32NdArray" + } + } + ] + }, + "f64NdArray": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f64NdArray" + } + } + ] + }, + "PressioOption": { + "anyOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + }, + { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + }, + { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + }, + { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + }, + { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + { + "type": "integer", + "format": "int32" + }, + { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + { + "type": "integer", + "format": "int64" + }, + { + "type": "number", + "format": "float" + }, + { + "type": "number", + "format": "double" + }, + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "boolean" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/boolNdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint8", + "minimum": 0, + "maximum": 255 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint16", + "minimum": 0, + "maximum": 65535 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/u64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int8", + "minimum": -128, + "maximum": 127 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i8NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int16", + "minimum": -32768, + "maximum": 32767 + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i16NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int32" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/i64NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "float" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f32NdArray" + } + } + ] + }, + { + "anyOf": [ + { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + { + "type": "array", + "items": { + "$ref": "#/$defs/f64NdArray" + } + } + ] + }, + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/PressioOption" + } + } + ], + "description": "Pressio option value" + } + } +} \ No newline at end of file diff --git a/codecs/pressio/tests/schema.rs b/codecs/pressio/tests/schema.rs new file mode 100644 index 000000000..8945df79b --- /dev/null +++ b/codecs/pressio/tests/schema.rs @@ -0,0 +1,20 @@ +#![expect(missing_docs)] + +use ::{libpressio as _, ndarray as _, schemars as _, serde as _, serde_ndim as _, thiserror as _}; + +use numcodecs::{DynCodecType, StaticCodecType}; +use numcodecs_pressio::PressioCodec; + +#[test] +fn schema() { + let schema = format!( + "{:#}", + StaticCodecType::::of() + .codec_config_schema() + .to_value() + ); + + if schema != include_str!("schema.json") { + panic!("Pressio schema has changed\n===\n{schema}\n==="); + } +} From f354e63110cb1acbd53485e3d9b32f7e88d90bde Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 15 Mar 2026 07:07:06 +0200 Subject: [PATCH 38/43] add linear quantizer test --- Cargo.toml | 2 +- codecs/pressio/Cargo.toml | 4 +++ codecs/pressio/src/lib.rs | 49 +++++++++++++++++++++++++++++++++- codecs/pressio/tests/schema.rs | 5 +++- 4 files changed, 57 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 632553493..db2618e5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "d928ca7", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "1289c47", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index 5eb125e6f..074682eee 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -23,5 +23,9 @@ serde = { workspace = true, features = ["std", "derive"] } serde-ndim = { workspace = true, features = ["ndarray"] } thiserror = { workspace = true } +[dev-dependencies] +ndarray = { workspace = true, features = ["std"] } +serde_json = { workspace = true, features = ["std"] } + [lints] workspace = true diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 112897d53..13078ca77 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -100,7 +100,6 @@ impl Serialize for PressioCompressor { libpressio::PressioOption::float32(Some(x)) => PressioOption::F32(x), libpressio::PressioOption::float64(Some(x)) => PressioOption::F64(x), libpressio::PressioOption::string(Some(x)) => PressioOption::String(x), - // FIXME: seems to return strings as a single joined string libpressio::PressioOption::vec_string(Some(x)) => PressioOption::VecString(x), libpressio::PressioOption::dtype(Some(x)) => PressioOption::String(format!("{x}")), libpressio::PressioOption::thread_safety(Some(x)) => PressioOption::String(format!("{x}")), @@ -939,3 +938,51 @@ pub enum PressioCodecError { #[error(transparent)] /// Opaque error for when encoding or decoding with libpressio fails pub struct PressioCodingError(libpressio::PressioError); + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + use ndarray::Array1; + use serde_json::json; + + #[test] + fn linear_quantizer() { + let pressio = PressioCodec::deserialize(json!({ + "compressor_id": "linear_quantizer", + "early_config": { + "pressio:metric": "composite", + }, + "compressor_config": { + "pressio:abs": 10.0, + "pressio:metric": "composite", + "composite:plugins": ["printer", "size"], + } + })) + .unwrap(); + + let data = ndarray::linspace(0.0, 100.0, 50) + .collect::>() + .into_dyn(); + + let encoded = pressio + .encode(AnyCowArray::F64(CowArray::from(&data))) + .unwrap(); + + let decoded = pressio.decode(encoded.cow()); + assert!(matches!( + decoded, + Err(PressioCodecError::DecodeToArrayWithoutData) + )); + + let mut decoded = ndarray::Array::zeros(data.dim()); + pressio + .decode_into(encoded.view(), AnyArrayViewMut::F64(decoded.view_mut())) + .unwrap(); + + for (i, o) in data.iter().zip(decoded.iter()) { + assert!(((*i) - (*o)).abs() <= 10.0); + } + } +} diff --git a/codecs/pressio/tests/schema.rs b/codecs/pressio/tests/schema.rs index 8945df79b..ae8dde7ff 100644 --- a/codecs/pressio/tests/schema.rs +++ b/codecs/pressio/tests/schema.rs @@ -1,6 +1,9 @@ #![expect(missing_docs)] -use ::{libpressio as _, ndarray as _, schemars as _, serde as _, serde_ndim as _, thiserror as _}; +use ::{ + libpressio as _, ndarray as _, schemars as _, serde as _, serde_json as _, serde_ndim as _, + thiserror as _, +}; use numcodecs::{DynCodecType, StaticCodecType}; use numcodecs_pressio::PressioCodec; From 0e6bd7a10e9c25c576fa8ae25705f165adbc82ff Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 16 Mar 2026 01:05:53 +0200 Subject: [PATCH 39/43] fix pressio metrics results --- Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 153 ++++++++++++++++++++++++-------------- 2 files changed, 98 insertions(+), 57 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index db2618e5e..e88d3c0a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "1289c47", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5be5b1b", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 13078ca77..64400c9dc 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -20,7 +20,7 @@ use std::{ borrow::Cow, collections::{BTreeMap, btree_map::Entry}, - sync::Mutex, + sync::{Arc, Mutex, RwLock}, }; use ndarray::{Array, ArrayView, ArrayViewMut, CowArray, IxDyn}; @@ -47,28 +47,51 @@ pub struct PressioCodec { /// Pressio compressor pub struct PressioCompressor { + // get_config clones the compressor, but we want the config to include the + // compressor metrics + // so we make cheap shallow clones whenever possible and then later make + // the compressor unique with the clone-on-write `Arc::make_mut` + // we pinky-promise to only lock the inner `Mutex` for immutable access + // when we have read-only access, otherwise we can go through + // `Mutex::get_mut` + inner: RwLock>, +} + +impl Clone for PressioCompressor { + #[expect(clippy::unwrap_used)] + fn clone(&self) -> Self { + Self { + inner: RwLock::new(self.inner.read().unwrap().clone()), + } + } +} + +struct PressioCompressorInner { compressor: Mutex, compressor_id: String, early_config: BTreeMap, - name: Option, } -impl Clone for PressioCompressor { +impl Clone for PressioCompressorInner { #[expect(clippy::unwrap_used)] fn clone(&self) -> Self { let mut pressio = libpressio::Pressio::new().unwrap(); - let mut compressor = pressio.get_compressor(self.compressor_id.as_str()).unwrap(); - if let Some(name) = &self.name { - compressor.set_name(name).unwrap(); - } - let options = self.compressor.lock().unwrap().get_options().unwrap(); - compressor.set_options(&options).unwrap(); + let compressor = self.compressor.lock().unwrap(); + + let mut compressor_clone = pressio.get_compressor(self.compressor_id.as_str()).unwrap(); + compressor_clone + .set_name(compressor.get_name().unwrap()) + .unwrap(); + compressor_clone + .set_options(&compressor.get_options().unwrap()) + .unwrap(); + + std::mem::drop(compressor); Self { - compressor: Mutex::new(compressor), + compressor: Mutex::new(compressor_clone), compressor_id: self.compressor_id.clone(), early_config: self.early_config.clone(), - name: self.name.clone(), } } } @@ -172,7 +195,8 @@ impl Serialize for PressioCompressor { Ok(config) } - let compressor = self.compressor.lock().map_err(serde::ser::Error::custom)?; + let inner = self.inner.read().map_err(serde::ser::Error::custom)?; + let compressor = inner.compressor.lock().map_err(serde::ser::Error::custom)?; let options = compressor .get_options() .map_err(serde::ser::Error::custom)?; @@ -182,8 +206,8 @@ impl Serialize for PressioCompressor { let name = compressor.get_name().map_err(serde::ser::Error::custom)?; let result = PressioCompressorBorrowedFormat { - compressor_id: self.compressor_id.as_str(), - early_config: &self.early_config, + compressor_id: inner.compressor_id.as_str(), + early_config: &inner.early_config, compressor_config: &convert_from_pressio_options(options.iter())?, metric_results: &convert_from_pressio_options(metric_results.iter())?, name: match name { @@ -193,6 +217,7 @@ impl Serialize for PressioCompressor { } .serialize(serializer); std::mem::drop(compressor); + std::mem::drop(inner); result } } @@ -399,10 +424,11 @@ impl<'de> Deserialize<'de> for PressioCompressor { .map_err(serde::de::Error::custom)?; Ok(Self { - compressor: Mutex::new(compressor), - compressor_id: format.compressor_id, - early_config: format.early_config, - name: format.name, + inner: RwLock::new(Arc::new(PressioCompressorInner { + compressor: Mutex::new(compressor), + compressor_id: format.compressor_id, + early_config: format.early_config, + })), }) } } @@ -632,21 +658,25 @@ impl Codec for PressioCodec { } } - let Ok(mut compressor) = self.compressor.compressor.lock() else { - return Err(PressioCodecError::PressioPoisonedMutex); + let Ok(mut inner) = self.compressor.inner.write() else { + return Err(PressioCodecError::PressioPoisonedLock); + }; + + let Ok(compressor) = Arc::make_mut(&mut inner).compressor.get_mut() else { + return Err(PressioCodecError::PressioPoisonedLock); }; match data { - AnyCowArray::U8(data) => encode_typed(&mut compressor, data), - AnyCowArray::U16(data) => encode_typed(&mut compressor, data), - AnyCowArray::U32(data) => encode_typed(&mut compressor, data), - AnyCowArray::U64(data) => encode_typed(&mut compressor, data), - AnyCowArray::I8(data) => encode_typed(&mut compressor, data), - AnyCowArray::I16(data) => encode_typed(&mut compressor, data), - AnyCowArray::I32(data) => encode_typed(&mut compressor, data), - AnyCowArray::I64(data) => encode_typed(&mut compressor, data), - AnyCowArray::F32(data) => encode_typed(&mut compressor, data), - AnyCowArray::F64(data) => encode_typed(&mut compressor, data), + AnyCowArray::U8(data) => encode_typed(compressor, data), + AnyCowArray::U16(data) => encode_typed(compressor, data), + AnyCowArray::U32(data) => encode_typed(compressor, data), + AnyCowArray::U64(data) => encode_typed(compressor, data), + AnyCowArray::I8(data) => encode_typed(compressor, data), + AnyCowArray::I16(data) => encode_typed(compressor, data), + AnyCowArray::I32(data) => encode_typed(compressor, data), + AnyCowArray::I64(data) => encode_typed(compressor, data), + AnyCowArray::F32(data) => encode_typed(compressor, data), + AnyCowArray::F64(data) => encode_typed(compressor, data), data => Err(PressioCodecError::UnsupportedDtype(data.dtype())), } } @@ -692,21 +722,25 @@ impl Codec for PressioCodec { } } - let Ok(mut compressor) = self.compressor.compressor.lock() else { - return Err(PressioCodecError::PressioPoisonedMutex); + let Ok(mut inner) = self.compressor.inner.write() else { + return Err(PressioCodecError::PressioPoisonedLock); + }; + + let Ok(compressor) = Arc::make_mut(&mut inner).compressor.get_mut() else { + return Err(PressioCodecError::PressioPoisonedLock); }; match encoded { - AnyCowArray::U8(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::U16(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::U32(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::U64(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::I8(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::I16(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::I32(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::I64(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::F32(encoded) => decode_typed(&mut compressor, encoded), - AnyCowArray::F64(encoded) => decode_typed(&mut compressor, encoded), + AnyCowArray::U8(encoded) => decode_typed(compressor, encoded), + AnyCowArray::U16(encoded) => decode_typed(compressor, encoded), + AnyCowArray::U32(encoded) => decode_typed(compressor, encoded), + AnyCowArray::U64(encoded) => decode_typed(compressor, encoded), + AnyCowArray::I8(encoded) => decode_typed(compressor, encoded), + AnyCowArray::I16(encoded) => decode_typed(compressor, encoded), + AnyCowArray::I32(encoded) => decode_typed(compressor, encoded), + AnyCowArray::I64(encoded) => decode_typed(compressor, encoded), + AnyCowArray::F32(encoded) => decode_typed(compressor, encoded), + AnyCowArray::F64(encoded) => decode_typed(compressor, encoded), encoded => Err(PressioCodecError::UnsupportedDtype(encoded.dtype())), } } @@ -803,8 +837,12 @@ impl Codec for PressioCodec { Ok(()) } - let Ok(mut compressor) = self.compressor.compressor.lock() else { - return Err(PressioCodecError::PressioPoisonedMutex); + let Ok(mut inner) = self.compressor.inner.write() else { + return Err(PressioCodecError::PressioPoisonedLock); + }; + + let Ok(compressor) = Arc::make_mut(&mut inner).compressor.get_mut() else { + return Err(PressioCodecError::PressioPoisonedLock); }; let decoded_dtype = match decoded.dtype() { @@ -824,34 +862,34 @@ impl Codec for PressioCodec { let decompressed_data = match encoded { AnyArrayView::U8(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::U16(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::U32(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::U64(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::I8(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::I16(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::I32(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::I64(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::F32(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } AnyArrayView::F64(encoded) => { - decompress_typed(&mut compressor, encoded, decoded_dtype, decoded_shape) + decompress_typed(compressor, encoded, decoded_dtype, decoded_shape) } encoded => return Err(PressioCodecError::UnsupportedDtype(encoded.dtype())), }?; @@ -894,7 +932,7 @@ pub enum PressioCodecError { UnsupportedDtype(AnyArrayDType), /// [`PressioCodec`] lock was poisoned #[error("Pressio lock was poisoned")] - PressioPoisonedMutex, + PressioPoisonedLock, /// [`PressioCodec`] failed to encode the data #[error("Pressio failed to encode the data")] PressioEncodeFailed { @@ -957,7 +995,7 @@ mod tests { "compressor_config": { "pressio:abs": 10.0, "pressio:metric": "composite", - "composite:plugins": ["printer", "size"], + "composite:plugins": ["printer", "size", "time"], } })) .unwrap(); @@ -984,5 +1022,8 @@ mod tests { for (i, o) in data.iter().zip(decoded.iter()) { assert!(((*i) - (*o)).abs() <= 10.0); } + + let config = serde_json::to_string(&pressio.get_config()).unwrap(); + assert!(config.contains("\"size:compressed_size\":400")); } } From c710d1170cd9fb51a27de2519b924a19f94f4257 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 16 Mar 2026 10:59:48 +0200 Subject: [PATCH 40/43] add support for the libpressio bzip2 compressor --- Cargo.toml | 2 +- codecs/pressio/Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 41 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e88d3c0a6..d3a0678b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "5be5b1b", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "4508eb7", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index 074682eee..1a4966743 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -15,7 +15,7 @@ keywords = ["libpressio", "numcodecs", "compression", "encoding"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -libpressio = { workspace = true } +libpressio = { workspace = true, features = ["bzip2"] } ndarray = { workspace = true } numcodecs = { workspace = true } schemars = { workspace = true, features = ["derive", "preserve_order"] } diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 64400c9dc..eb6ccc0fa 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -986,7 +986,7 @@ mod tests { use serde_json::json; #[test] - fn linear_quantizer() { + fn linear_quantizer_noop() { let pressio = PressioCodec::deserialize(json!({ "compressor_id": "linear_quantizer", "early_config": { @@ -1026,4 +1026,43 @@ mod tests { let config = serde_json::to_string(&pressio.get_config()).unwrap(); assert!(config.contains("\"size:compressed_size\":400")); } + + #[test] + fn linear_quantizer_bzip2() { + let pressio = PressioCodec::deserialize(json!({ + "compressor_id": "linear_quantizer", + "early_config": { + "linear_quantizer:compressor": "bzip2", + }, + "compressor_config": { + "pressio:abs": 10.0, + "pressio:lossless": 9, + "pressio:metric": "size", + } + })) + .unwrap(); + + let data = ndarray::linspace(0.0, 100.0, 50) + .collect::>() + .into_dyn(); + + let encoded = pressio + .encode(AnyCowArray::F64(CowArray::from(&data))) + .unwrap(); + + let decoded = pressio.decode(encoded.cow()); + assert!(decoded.is_err()); + + let mut decoded = ndarray::Array::zeros(data.dim()); + pressio + .decode_into(encoded.view(), AnyArrayViewMut::F64(decoded.view_mut())) + .unwrap(); + + for (i, o) in data.iter().zip(decoded.iter()) { + assert!(((*i) - (*o)).abs() <= 10.0); + } + + let config = serde_json::to_string(&pressio.get_config()).unwrap(); + assert!(config.contains("\"size:compressed_size\":63")); + } } From 560ff85a0ebfcc268d997d9b1a19d598589dead4 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 17 Mar 2026 09:17:44 +0200 Subject: [PATCH 41/43] Try out Lua support for libpressio --- Cargo.toml | 2 +- codecs/pressio/Cargo.toml | 2 +- codecs/pressio/src/lib.rs | 52 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d3a0678b8..1aca865f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "4508eb7", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "22f085e", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index 1a4966743..ffe83260b 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -15,7 +15,7 @@ keywords = ["libpressio", "numcodecs", "compression", "encoding"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -libpressio = { workspace = true, features = ["bzip2"] } +libpressio = { workspace = true, features = ["bzip2", "lua"] } ndarray = { workspace = true } numcodecs = { workspace = true } schemars = { workspace = true, features = ["derive", "preserve_order"] } diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index eb6ccc0fa..8ccf0d0b3 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -1065,4 +1065,56 @@ mod tests { let config = serde_json::to_string(&pressio.get_config()).unwrap(); assert!(config.contains("\"size:compressed_size\":63")); } + + #[test] + fn lua_metrics() { + let pressio = PressioCodec::deserialize(json!({ + "compressor_id": "noop", + "early_config": { + "pressio:metric": "composite", + }, + "compressor_config": { + "pressio:metric": "composite", + "composite:plugins": ["size"], + "composite:scripts": [ + "return \"objective\", 1.2", + "return \"objective2\", metrics[\"size:compression_ratio\"] * 4.2", + ] + } + })) + .unwrap(); + + let config = serde_json::to_string(&pressio.get_config()).unwrap(); + assert!(!config.contains("\"size:compression_ratio\"")); + assert!(config.contains("\"composite:objective\":1.2")); + assert!(!config.contains("\"composite:objective2\"")); + + let data = ndarray::linspace(0.0, 100.0, 50) + .collect::>() + .into_dyn(); + + let encoded = pressio + .encode(AnyCowArray::F64(CowArray::from(&data))) + .unwrap(); + + let decoded = pressio.decode(encoded.cow()); + assert!(matches!( + decoded, + Err(PressioCodecError::DecodeToArrayWithoutData) + )); + + let mut decoded = ndarray::Array::zeros(data.dim()); + pressio + .decode_into(encoded.view(), AnyArrayViewMut::F64(decoded.view_mut())) + .unwrap(); + + for (i, o) in data.iter().zip(decoded.iter()) { + assert!(i.to_bits() == o.to_bits()); + } + + let config = serde_json::to_string(&pressio.get_config()).unwrap(); + assert!(config.contains("\"size:compression_ratio\":1.0")); + assert!(config.contains("\"composite:objective\":1.2")); + assert!(config.contains("\"composite:objective2\":4.2")); + } } From d8944df887abba3ada73e5a2111ea1526b12344c Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 22 Mar 2026 08:24:29 +0200 Subject: [PATCH 42/43] update libpressio-rs --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1aca865f1..dd860554e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ format_serde_error = { version = "0.3", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "22f085e", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "255ed51", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy From ff442bda45a7d3c9b81c32eb9bebace25acb18a6 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 26 Mar 2026 14:35:33 +0200 Subject: [PATCH 43/43] allow using non-sendable pressio compressors using fragile --- Cargo.toml | 3 ++- codecs/pressio/Cargo.toml | 1 + codecs/pressio/src/lib.rs | 56 +++++++++++++++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dd860554e..658a5243b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,10 +93,11 @@ clap = { version = "4.5", default-features = false } convert_case = { version = "0.8", default-features = false } ebcc = { version = "0.1", default-features = false } format_serde_error = { version = "0.3", default-features = false } +fragile = { version = "2.0", default-features = false } indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } lc-framework = { version = "0.1", default-features = false } -libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "255ed51", default-features = false } +libpressio = { version = "0.1", git = "https://github.com/juntyr/libpressio-rs.git", rev = "30545dd", default-features = false } log = { version = "0.4.27", default-features = false } miniz_oxide = { version = "0.8.5", default-features = false } ndarray = { version = "0.16.1", default-features = false } # keep in sync with numpy diff --git a/codecs/pressio/Cargo.toml b/codecs/pressio/Cargo.toml index ffe83260b..87a49a490 100644 --- a/codecs/pressio/Cargo.toml +++ b/codecs/pressio/Cargo.toml @@ -15,6 +15,7 @@ keywords = ["libpressio", "numcodecs", "compression", "encoding"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +fragile = { workspace = true } libpressio = { workspace = true, features = ["bzip2", "lua"] } ndarray = { workspace = true } numcodecs = { workspace = true } diff --git a/codecs/pressio/src/lib.rs b/codecs/pressio/src/lib.rs index 8ccf0d0b3..2da4148a3 100644 --- a/codecs/pressio/src/lib.rs +++ b/codecs/pressio/src/lib.rs @@ -23,6 +23,7 @@ use std::{ sync::{Arc, Mutex, RwLock}, }; +use fragile::Fragile; use ndarray::{Array, ArrayView, ArrayViewMut, CowArray, IxDyn}; use numcodecs::{ AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, @@ -67,7 +68,7 @@ impl Clone for PressioCompressor { } struct PressioCompressorInner { - compressor: Mutex, + compressor: Mutex, compressor_id: String, early_config: BTreeMap, } @@ -76,7 +77,8 @@ impl Clone for PressioCompressorInner { #[expect(clippy::unwrap_used)] fn clone(&self) -> Self { let mut pressio = libpressio::Pressio::new().unwrap(); - let compressor = self.compressor.lock().unwrap(); + let compressor_guard = self.compressor.lock().unwrap(); + let compressor = compressor_guard.try_get().unwrap(); let mut compressor_clone = pressio.get_compressor(self.compressor_id.as_str()).unwrap(); compressor_clone @@ -86,7 +88,12 @@ impl Clone for PressioCompressorInner { .set_options(&compressor.get_options().unwrap()) .unwrap(); - std::mem::drop(compressor); + std::mem::drop(compressor_guard); + + let compressor_clone = match compressor_clone.try_into_sendable() { + Ok(compressor) => PressioCompressorSendable::Sendable(compressor), + Err((compressor, _err)) => PressioCompressorSendable::Fragile(Fragile::new(compressor)), + }; Self { compressor: Mutex::new(compressor_clone), @@ -96,6 +103,31 @@ impl Clone for PressioCompressorInner { } } +enum PressioCompressorSendable { + Sendable(libpressio::PressioSendableCompressor), + Fragile(Fragile), +} + +impl PressioCompressorSendable { + fn try_get(&self) -> Result<&libpressio::PressioCompressor, PressioCodecError> { + match self { + Self::Sendable(compressor) => Ok(compressor), + Self::Fragile(compressor) => compressor + .try_get() + .map_err(|_| PressioCodecError::PressioNonThreadsafeSend), + } + } + + fn try_get_mut(&mut self) -> Result<&mut libpressio::PressioCompressor, PressioCodecError> { + match self { + Self::Sendable(compressor) => Ok(compressor), + Self::Fragile(compressor) => compressor + .try_get_mut() + .map_err(|_| PressioCodecError::PressioNonThreadsafeSend), + } + } +} + impl Serialize for PressioCompressor { #[expect(clippy::too_many_lines)] fn serialize(&self, serializer: S) -> Result { @@ -196,7 +228,10 @@ impl Serialize for PressioCompressor { } let inner = self.inner.read().map_err(serde::ser::Error::custom)?; - let compressor = inner.compressor.lock().map_err(serde::ser::Error::custom)?; + let compressor_guard = inner.compressor.lock().map_err(serde::ser::Error::custom)?; + let compressor = compressor_guard + .try_get() + .map_err(serde::ser::Error::custom)?; let options = compressor .get_options() .map_err(serde::ser::Error::custom)?; @@ -216,7 +251,7 @@ impl Serialize for PressioCompressor { }, } .serialize(serializer); - std::mem::drop(compressor); + std::mem::drop(compressor_guard); std::mem::drop(inner); result } @@ -423,6 +458,11 @@ impl<'de> Deserialize<'de> for PressioCompressor { .set_options(&options) .map_err(serde::de::Error::custom)?; + let compressor = match compressor.try_into_sendable() { + Ok(compressor) => PressioCompressorSendable::Sendable(compressor), + Err((compressor, _err)) => PressioCompressorSendable::Fragile(Fragile::new(compressor)), + }; + Ok(Self { inner: RwLock::new(Arc::new(PressioCompressorInner { compressor: Mutex::new(compressor), @@ -665,6 +705,7 @@ impl Codec for PressioCodec { let Ok(compressor) = Arc::make_mut(&mut inner).compressor.get_mut() else { return Err(PressioCodecError::PressioPoisonedLock); }; + let compressor = compressor.try_get_mut()?; match data { AnyCowArray::U8(data) => encode_typed(compressor, data), @@ -729,6 +770,7 @@ impl Codec for PressioCodec { let Ok(compressor) = Arc::make_mut(&mut inner).compressor.get_mut() else { return Err(PressioCodecError::PressioPoisonedLock); }; + let compressor = compressor.try_get_mut()?; match encoded { AnyCowArray::U8(encoded) => decode_typed(compressor, encoded), @@ -844,6 +886,7 @@ impl Codec for PressioCodec { let Ok(compressor) = Arc::make_mut(&mut inner).compressor.get_mut() else { return Err(PressioCodecError::PressioPoisonedLock); }; + let compressor = compressor.try_get_mut()?; let decoded_dtype = match decoded.dtype() { AnyArrayDType::U8 => libpressio::PressioDtype::U8, @@ -933,6 +976,9 @@ pub enum PressioCodecError { /// [`PressioCodec`] lock was poisoned #[error("Pressio lock was poisoned")] PressioPoisonedLock, + /// [`PressioCodec`] was used on a different thread with a non-threadsafe compressor + #[error("Pressio was used on a different thread with a non-threadsafe compressor")] + PressioNonThreadsafeSend, /// [`PressioCodec`] failed to encode the data #[error("Pressio failed to encode the data")] PressioEncodeFailed {