From bb25dada5543b833a3e355e98e2d7230cd915fad Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Wed, 15 Oct 2025 18:01:39 -0700 Subject: [PATCH] feat(gzip): switch to zlib-rs for default gz backend --- CHANGELOG.md | 26 +++++++++++++++++++- Cargo.toml | 24 ++++++++++++++---- benches/gzip_decompress.rs | 46 +++++++++++++++++++++++++++++++++++ src/oneio/compressions/mod.rs | 6 ++--- src/oneio/mod.rs | 6 ++--- tests/async_integration.rs | 4 +-- tests/basic_integration.rs | 6 ++--- 7 files changed, 101 insertions(+), 17 deletions(-) create mode 100644 benches/gzip_decompress.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f64c71..56b63ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,30 @@ All notable changes to this project will be documented in this file. +## v0.19.1 -- 2025-10-15 + +### Changed +- Gzip backend selection via feature flags: + - Default feature `gz` switched to use flate2 with the `gz-zlib-rs` backend for improved performance. + - New selectors and aliases: + - `gz-zlib-rs` — enables `flate2/zlib-rs` (Rust, fast) + - `gz-miniz` — enables `flate2/miniz_oxide` (pure Rust, most portable) + - Disabled `flate2` default-features to allow explicit backend choice. + +### Added +- Criterion benchmark `benches/gzip_decompress.rs` to measure gzip decompression throughput across backends. + +### Usage +- Default (zlib-rs): + - cargo build + - cargo bench --bench gzip_decompress --features gz +- zlib-rs: + - cargo build --no-default-features --features gz-zlib-rs + - cargo bench --bench gzip_decompress --no-default-features --features gz-zlib-rs +- miniz_oxide (explicit): + - cargo build --no-default-features --features gz-miniz + - cargo bench --bench gzip_decompress --no-default-features --features gz-miniz + ## v0.19.0 -- 2025-08-31 ### Breaking Changes @@ -14,7 +38,7 @@ All notable changes to this project will be documented in this file. **Migration guide:** -```toml +``` # Before (v0.18.x) oneio = { version = "0.18", features = ["lib-core", "rustls"] } diff --git a/Cargo.toml b/Cargo.toml index fb4fcab..c5c69ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oneio" -version = "0.19.0" +version = "0.19.1" authors = ["Mingwei Zhang "] edition = "2021" readme = "README.md" @@ -30,7 +30,8 @@ reqwest = { version = "0.12", default-features = false, features = ["blocking", suppaftp = { version = "7.0", optional = true } # feature: compressions -flate2 = { version = "1", optional = true } +# Turn off flate2 default-features so we can explicitly choose backend via features +flate2 = { version = "1", optional = true, default-features = false } bzip2 = { version = "0.6.0", optional = true } lz4 = { version = "1.24", optional = true } xz2 = { version = "0.1", optional = true } @@ -45,7 +46,7 @@ serde = { version = "1.0", optional = true } serde_json = { version = "1.0", optional = true } # feature: s3 -rust-s3 = { version = "0.35", optional = true, default-features = false, features = [ +rust-s3 = { version = "0.37", optional = true, default-features = false, features = [ "sync", ] } @@ -75,8 +76,14 @@ https = ["http", "rustls"] # https needs http ftp = ["https", "suppaftp"] # ftp needs https s3 = ["rust-s3"] -# Compression features (independent) -gz = ["flate2"] +gz = ["gz-zlib-rs"] +# internal feature to enable gzip support +any_gz = [] +# fastest Rust impl with some unsafe code +gz-zlib-rs = ["any_gz", "flate2/zlib-rs"] +# slower pure safe Rust impl +gz-miniz = ["any_gz", "flate2/miniz_oxide", "flate2/any_impl"] + bz = ["bzip2"] lz = ["lz4"] xz = ["xz2"] @@ -111,6 +118,13 @@ tracing-subscriber = "0.3" tar = "0.4" tokio = { version = "1.0", features = ["macros", "rt"] } indicatif = "0.18" +criterion = { version = "0.5", default-features = false } + +# Benchmarks +[[bench]] +name = "gzip_decompress" +harness = false +required-features = ["any_gz"] # This list only includes examples which require additional features to run. These are more in the examples' directory. [[example]] diff --git a/benches/gzip_decompress.rs b/benches/gzip_decompress.rs new file mode 100644 index 0000000..a22e413 --- /dev/null +++ b/benches/gzip_decompress.rs @@ -0,0 +1,46 @@ +use std::fs::File; +use std::hint::black_box; +use std::io::Read; + +use criterion::{criterion_group, criterion_main, BatchSize, Criterion, Throughput}; +use flate2::read::GzDecoder; + +// Benchmark gzip decompression using flate2 with the selected backend. +// To run with default (miniz_oxide) backend: +// cargo bench --bench gzip_decompress --no-default-features --features gz-miniz +// To run with zlib-rs backend: +// cargo bench --bench gzip_decompress --no-default-features --features gz-zlib-rs +// To compare, run both commands and compare Criterion reports. + +fn load_gz_bytes() -> Vec { + let mut f = File::open("tests/test_data.txt.gz").expect("missing tests/test_data.txt.gz"); + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + buf +} + +fn bench_gzip_decompress(c: &mut Criterion) { + let input = load_gz_bytes(); + + let mut group = c.benchmark_group("gzip_decompress"); + group.throughput(Throughput::Bytes(input.len() as u64)); + + group.bench_function("flate2_gz_decode", |b| { + b.iter_batched( + || input.clone(), + |bytes| { + let reader = GzDecoder::new(bytes.as_slice()); + let mut out = Vec::with_capacity(128 * 1024); + let mut r = reader; + r.read_to_end(&mut out).unwrap(); + black_box(out) + }, + BatchSize::SmallInput, + ) + }); + + group.finish(); +} + +criterion_group!(benches, bench_gzip_decompress); +criterion_main!(benches); diff --git a/src/oneio/compressions/mod.rs b/src/oneio/compressions/mod.rs index fc7da78..95adcaa 100644 --- a/src/oneio/compressions/mod.rs +++ b/src/oneio/compressions/mod.rs @@ -11,7 +11,7 @@ use std::io::{BufWriter, Read, Write}; #[cfg(feature = "bz")] pub(crate) mod bzip2; -#[cfg(feature = "gz")] +#[cfg(feature = "any_gz")] pub(crate) mod gzip; #[cfg(feature = "lz")] pub(crate) mod lz4; @@ -50,7 +50,7 @@ pub(crate) fn get_compression_reader( file_suffix: &str, ) -> Result, OneIoError> { match file_suffix { - #[cfg(feature = "gz")] + #[cfg(feature = "any_gz")] "gz" | "gzip" | "tgz" => gzip::get_reader(raw_reader), #[cfg(feature = "bz")] "bz2" | "bz" => bzip2::get_reader(raw_reader), @@ -97,7 +97,7 @@ pub(crate) fn get_compression_writer( file_suffix: &str, ) -> Result, OneIoError> { match file_suffix { - #[cfg(feature = "gz")] + #[cfg(feature = "any_gz")] "gz" | "gzip" | "tgz" => gzip::get_writer(raw_writer), #[cfg(feature = "bz")] "bz2" | "bz" => bzip2::get_writer(raw_writer), diff --git a/src/oneio/mod.rs b/src/oneio/mod.rs index 9866795..15154ff 100644 --- a/src/oneio/mod.rs +++ b/src/oneio/mod.rs @@ -552,7 +552,7 @@ fn get_async_compression_reader( file_type: &str, ) -> Result, OneIoError> { match file_type { - #[cfg(all(feature = "async", feature = "gz"))] + #[cfg(all(feature = "async", feature = "any_gz"))] "gz" | "gzip" => { use async_compression::tokio::bufread::GzipDecoder; use tokio::io::BufReader; @@ -608,7 +608,7 @@ mod tests { const TEST_TEXT: &str = "OneIO test file.\nThis is a test."; - #[cfg(feature = "gz")] + #[cfg(feature = "any_gz")] #[test] fn test_progress_tracking_local() { use std::sync::{Arc, Mutex}; @@ -751,7 +751,7 @@ mod tests { } // Test with compression formats that support async - #[cfg(feature = "gz")] + #[cfg(feature = "any_gz")] { match get_reader_async("tests/test_data.txt.gz").await { Ok(mut reader) => { diff --git a/tests/async_integration.rs b/tests/async_integration.rs index ce4abf0..db36ad0 100644 --- a/tests/async_integration.rs +++ b/tests/async_integration.rs @@ -18,7 +18,7 @@ async fn async_read_local_plain() { assert_eq!(content, TEST_TEXT); } -#[cfg(feature = "gz")] +#[cfg(feature = "any_gz")] #[tokio::test] async fn async_read_local_gzip() { let mut reader = oneio::get_reader_async("tests/test_data.txt.gz") @@ -46,7 +46,7 @@ async fn async_read_http_plain() { } } -#[cfg(all(feature = "http", feature = "gz"))] +#[cfg(all(feature = "http", feature = "any_gz"))] #[tokio::test] async fn async_read_http_gzip() { match oneio::get_reader_async("https://spaces.bgpkit.org/oneio/test_data.txt.gz").await { diff --git a/tests/basic_integration.rs b/tests/basic_integration.rs index d16594f..d4416ba 100644 --- a/tests/basic_integration.rs +++ b/tests/basic_integration.rs @@ -53,7 +53,7 @@ fn test_local_files() { test_read("tests/test_data.txt"); // Test gzip (default feature) - #[cfg(feature = "gz")] + #[cfg(feature = "any_gz")] test_read("tests/test_data.txt.gz"); // Test bzip2 (default feature) @@ -66,7 +66,7 @@ fn test_writers() { // Test writing with default compression formats test_write("tests/test_write_data.txt", "tests/test_data.txt"); - #[cfg(feature = "gz")] + #[cfg(feature = "any_gz")] test_write("tests/test_write_data.txt.gz", "tests/test_data.txt.gz"); #[cfg(feature = "bz")] @@ -79,7 +79,7 @@ fn test_remote_files() { // Test HTTP reading (default feature) test_read("https://spaces.bgpkit.org/oneio/test_data.txt"); - #[cfg(feature = "gz")] + #[cfg(feature = "any_gz")] test_read("https://spaces.bgpkit.org/oneio/test_data.txt.gz"); #[cfg(feature = "bz")]