diff --git a/Cargo.lock b/Cargo.lock index 00b2e56836e..44f2049c13d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1690,12 +1690,12 @@ dependencies = [ "gix-trace", "gix-utils", "libc", - "libz-rs-sys", "once_cell", "parking_lot", "prodash", "thiserror 2.0.17", "walkdir", + "zlib-rs 0.5.2 (git+https://github.com/trifectatechfoundation/zlib-rs.git?rev=bc0c9fd8b29a5ba64869717f47cb241008621f30)", ] [[package]] @@ -3196,7 +3196,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" dependencies = [ - "zlib-rs", + "zlib-rs 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -5865,3 +5865,8 @@ name = "zlib-rs" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" + +[[package]] +name = "zlib-rs" +version = "0.5.2" +source = "git+https://github.com/trifectatechfoundation/zlib-rs.git?rev=bc0c9fd8b29a5ba64869717f47cb241008621f30#bc0c9fd8b29a5ba64869717f47cb241008621f30" diff --git a/gix-features/Cargo.toml b/gix-features/Cargo.toml index 672529edabc..bb4a5c734ad 100644 --- a/gix-features/Cargo.toml +++ b/gix-features/Cargo.toml @@ -57,7 +57,7 @@ crc32 = ["dep:crc32fast"] ## Enable the usage of zlib-related utilities to compress or decompress data. ## This enables and uses the high-performance `zlib-rs` backend. -zlib = ["dep:libz-rs-sys", "dep:thiserror"] +zlib = ["dep:zlib-rs", "dep:thiserror"] #! ### Other @@ -108,7 +108,7 @@ bytesize = { version = "2.3.1", optional = true } bytes = { version = "1.0.0", optional = true } # zlib module -libz-rs-sys = { version = "0.5.2", optional = true } +zlib-rs = { git = "https://github.com/trifectatechfoundation/zlib-rs.git", rev = "bc0c9fd8b29a5ba64869717f47cb241008621f30", optional = true, default-features = false, features = ["std", "rust-allocator"] } thiserror = { version = "2.0.17", optional = true } # Note: once_cell is kept for OnceCell type because std::sync::OnceLock::get_or_try_init() is not yet stable. diff --git a/gix-features/src/zlib/mod.rs b/gix-features/src/zlib/mod.rs index 810715f3b15..2ee70a6fcfd 100644 --- a/gix-features/src/zlib/mod.rs +++ b/gix-features/src/zlib/mod.rs @@ -1,79 +1,8 @@ use std::ffi::c_int; -/// A type to hold all state needed for decompressing a ZLIB encoded stream. -pub struct Decompress(libz_rs_sys::z_stream); - -unsafe impl Sync for Decompress {} -unsafe impl Send for Decompress {} - -impl Default for Decompress { - fn default() -> Self { - Self::new() - } -} - -impl Decompress { - /// The amount of bytes consumed from the input so far. - pub fn total_in(&self) -> u64 { - self.0.total_in as _ - } - - /// The amount of decompressed bytes that have been written to the output thus far. - pub fn total_out(&self) -> u64 { - self.0.total_out as _ - } - - /// Create a new instance. Note that it allocates in various ways and thus should be re-used. - pub fn new() -> Self { - let mut this = libz_rs_sys::z_stream::default(); - - unsafe { - libz_rs_sys::inflateInit_( - &mut this, - libz_rs_sys::zlibVersion(), - core::mem::size_of::() as core::ffi::c_int, - ); - } - - Self(this) - } - - /// Reset the state to allow handling a new stream. - pub fn reset(&mut self) { - unsafe { libz_rs_sys::inflateReset(&mut self.0) }; - } - - /// Decompress `input` and write all decompressed bytes into `output`, with `flush` defining some details about this. - pub fn decompress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushDecompress, - ) -> Result { - self.0.avail_in = input.len() as _; - self.0.avail_out = output.len() as _; - - self.0.next_in = input.as_ptr(); - self.0.next_out = output.as_mut_ptr(); - - match unsafe { libz_rs_sys::inflate(&mut self.0, flush as _) } { - libz_rs_sys::Z_OK => Ok(Status::Ok), - libz_rs_sys::Z_BUF_ERROR => Ok(Status::BufError), - libz_rs_sys::Z_STREAM_END => Ok(Status::StreamEnd), - - libz_rs_sys::Z_STREAM_ERROR => Err(DecompressError::StreamError), - libz_rs_sys::Z_DATA_ERROR => Err(DecompressError::DataError), - libz_rs_sys::Z_MEM_ERROR => Err(DecompressError::InsufficientMemory), - err => Err(DecompressError::Unknown { err }), - } - } -} - -impl Drop for Decompress { - fn drop(&mut self) { - unsafe { libz_rs_sys::inflateEnd(&mut self.0) }; - } -} +pub use zlib_rs::Inflate as Decompress; +pub use zlib_rs::InflateFlush as FlushDecompress; +pub use zlib_rs::Status; /// The error produced by [`Decompress::decompress()`]. #[derive(Debug, thiserror::Error)] @@ -89,43 +18,15 @@ pub enum DecompressError { Unknown { err: c_int }, } -/// The status returned by [`Decompress::decompress()`]. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Status { - /// The decompress operation went well. Not to be confused with `StreamEnd`, so one can continue - /// the decompression. - Ok, - /// An error occurred when decompression. - BufError, - /// The stream was fully decompressed. - StreamEnd, -} - -/// Values which indicate the form of flushing to be used when -/// decompressing in-memory data. -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -#[non_exhaustive] -#[allow(clippy::unnecessary_cast)] -pub enum FlushDecompress { - /// A typical parameter for passing to compression/decompression functions, - /// this indicates that the underlying stream to decide how much data to - /// accumulate before producing output in order to maximize compression. - None = libz_rs_sys::Z_NO_FLUSH as isize, - - /// All pending output is flushed to the output buffer and the output is - /// aligned on a byte boundary so that the decompressor can get all input - /// data available so far. - /// - /// Flushing may degrade compression for some compression algorithms and so - /// it should only be used when necessary. This will complete the current - /// deflate block and follow it with an empty stored block. - Sync = libz_rs_sys::Z_SYNC_FLUSH as isize, - - /// Pending input is processed and pending output is flushed. - /// - /// The return value may indicate that the stream is not yet done and more - /// data has yet to be processed. - Finish = libz_rs_sys::Z_FINISH as isize, +impl From for DecompressError { + fn from(value: zlib_rs::InflateError) -> Self { + match value { + zlib_rs::InflateError::NeedDict { .. } => Self::Unknown { err: 2 }, + zlib_rs::InflateError::StreamError => Self::StreamError, + zlib_rs::InflateError::DataError => Self::DataError, + zlib_rs::InflateError::MemError => Self::InsufficientMemory, + } + } } /// non-streaming interfaces for decompression @@ -144,28 +45,57 @@ pub mod inflate { } /// Decompress a few bytes of a zlib stream without allocation -#[derive(Default)] pub struct Inflate { /// The actual decompressor doing all the work. - pub state: Decompress, + pub state: zlib_rs::Inflate, +} + +impl Default for Inflate { + fn default() -> Self { + Self { + state: zlib_rs::Inflate::new(true, 15), + } + } } impl Inflate { + /// The amount of bytes consumed from the input so far. + pub fn total_in(&self) -> u64 { + self.state.total_in() + } + + /// The amount of decompressed bytes that have been written to the output thus far. + pub fn total_out(&self) -> u64 { + self.state.total_out() + } + + /// Decompress `input` and write all decompressed bytes into `output`, with `flush` defining some details about this. + pub fn decompress( + &mut self, + input: &[u8], + output: &mut [u8], + flush: zlib_rs::InflateFlush, + ) -> Result { + self.state.decompress(input, output, flush) + } + /// Run the decompressor exactly once. Cannot be run multiple times pub fn once(&mut self, input: &[u8], out: &mut [u8]) -> Result<(Status, usize, usize), inflate::Error> { let before_in = self.state.total_in(); let before_out = self.state.total_out(); - let status = self.state.decompress(input, out, FlushDecompress::None)?; - Ok(( - status, - (self.state.total_in() - before_in) as usize, - (self.state.total_out() - before_out) as usize, - )) + match self.state.decompress(input, out, FlushDecompress::NoFlush) { + Ok(status) => Ok(( + status, + (self.state.total_in() - before_in) as usize, + (self.state.total_out() - before_out) as usize, + )), + Err(e) => Err(inflate::Error::Inflate(e.into())), + } } /// Ready this instance for decoding another data stream. pub fn reset(&mut self) { - self.state.reset(); + self.state.reset(true); } } diff --git a/gix-features/src/zlib/stream/deflate/mod.rs b/gix-features/src/zlib/stream/deflate/mod.rs index 9945288977a..ffbff861352 100644 --- a/gix-features/src/zlib/stream/deflate/mod.rs +++ b/gix-features/src/zlib/stream/deflate/mod.rs @@ -1,4 +1,3 @@ -use crate::zlib::Status; use std::ffi::c_int; const BUF_SIZE: usize = 4096 * 8; @@ -7,7 +6,7 @@ const BUF_SIZE: usize = 4096 * 8; /// /// Be sure to call `flush()` when done to finalize the deflate stream. pub struct Write { - compressor: Compress, + compressor: zlib_rs::Deflate, inner: W, buf: [u8; BUF_SIZE], } @@ -25,76 +24,6 @@ where } } -/// Hold all state needed for compressing data. -pub struct Compress(libz_rs_sys::z_stream); - -unsafe impl Sync for Compress {} -unsafe impl Send for Compress {} - -impl Default for Compress { - fn default() -> Self { - Self::new() - } -} - -impl Compress { - /// The number of bytes that were read from the input. - pub fn total_in(&self) -> u64 { - self.0.total_in as _ - } - - /// The number of compressed bytes that were written to the output. - pub fn total_out(&self) -> u64 { - self.0.total_out as _ - } - - /// Create a new instance - this allocates so should be done with care. - pub fn new() -> Self { - let mut this = libz_rs_sys::z_stream::default(); - - unsafe { - libz_rs_sys::deflateInit_( - &mut this, - libz_rs_sys::Z_BEST_SPEED, - libz_rs_sys::zlibVersion(), - core::mem::size_of::() as core::ffi::c_int, - ); - } - - Self(this) - } - - /// Prepare the instance for a new stream. - pub fn reset(&mut self) { - unsafe { libz_rs_sys::deflateReset(&mut self.0) }; - } - - /// Compress `input` and write compressed bytes to `output`, with `flush` controlling additional characteristics. - pub fn compress(&mut self, input: &[u8], output: &mut [u8], flush: FlushCompress) -> Result { - self.0.avail_in = input.len() as _; - self.0.avail_out = output.len() as _; - - self.0.next_in = input.as_ptr(); - self.0.next_out = output.as_mut_ptr(); - - match unsafe { libz_rs_sys::deflate(&mut self.0, flush as _) } { - libz_rs_sys::Z_OK => Ok(Status::Ok), - libz_rs_sys::Z_BUF_ERROR => Ok(Status::BufError), - libz_rs_sys::Z_STREAM_END => Ok(Status::StreamEnd), - - libz_rs_sys::Z_STREAM_ERROR => Err(CompressError::StreamError), - libz_rs_sys::Z_MEM_ERROR => Err(CompressError::InsufficientMemory), - err => Err(CompressError::Unknown { err }), - } - } -} - -impl Drop for Compress { - fn drop(&mut self) { - unsafe { libz_rs_sys::deflateEnd(&mut self.0) }; - } -} - /// The error produced by [`Compress::compress()`]. #[derive(Debug, thiserror::Error)] #[error("{msg}")] @@ -108,58 +37,24 @@ pub enum CompressError { Unknown { err: c_int }, } -/// Values which indicate the form of flushing to be used when compressing -/// in-memory data. -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -#[non_exhaustive] -#[allow(clippy::unnecessary_cast)] -pub enum FlushCompress { - /// A typical parameter for passing to compression/decompression functions, - /// this indicates that the underlying stream to decide how much data to - /// accumulate before producing output in order to maximize compression. - None = libz_rs_sys::Z_NO_FLUSH as isize, - - /// All pending output is flushed to the output buffer, but the output is - /// not aligned to a byte boundary. - /// - /// All input data so far will be available to the decompressor (as with - /// `Flush::Sync`). This completes the current deflate block and follows it - /// with an empty fixed codes block that is 10 bits long, and it assures - /// that enough bytes are output in order for the decompressor to finish the - /// block before the empty fixed code block. - Partial = libz_rs_sys::Z_PARTIAL_FLUSH as isize, - - /// All pending output is flushed to the output buffer and the output is - /// aligned on a byte boundary so that the decompressor can get all input - /// data available so far. - /// - /// Flushing may degrade compression for some compression algorithms and so - /// it should only be used when necessary. This will complete the current - /// deflate block and follow it with an empty stored block. - Sync = libz_rs_sys::Z_SYNC_FLUSH as isize, - - /// All output is flushed as with `Flush::Sync` and the compression state is - /// reset so decompression can restart from this point if previous - /// compressed data has been damaged or if random access is desired. - /// - /// Using this option too often can seriously degrade compression. - Full = libz_rs_sys::Z_FULL_FLUSH as isize, - - /// Pending input is processed and pending output is flushed. - /// - /// The return value may indicate that the stream is not yet done and more - /// data has yet to be processed. - Finish = libz_rs_sys::Z_FINISH as isize, +impl From for CompressError { + fn from(value: zlib_rs::DeflateError) -> Self { + match value { + zlib_rs::DeflateError::StreamError => Self::StreamError, + zlib_rs::DeflateError::MemError => Self::InsufficientMemory, + zlib_rs::DeflateError::DataError => Self::Unknown { err: value as c_int }, + } + } } mod impls { use std::io; - use crate::zlib::stream::deflate::{self, Compress, FlushCompress}; + use crate::zlib::stream::deflate::{self, CompressError}; use crate::zlib::Status; - pub(crate) fn new_compress() -> Compress { - Compress::new() + pub(crate) fn new_compress() -> zlib_rs::Deflate { + zlib_rs::Deflate::new(1, true, 15) } impl deflate::Write @@ -187,7 +82,7 @@ mod impls { self.inner } - fn write_inner(&mut self, mut buf: &[u8], flush: FlushCompress) -> io::Result { + fn write_inner(&mut self, mut buf: &[u8], flush: zlib_rs::DeflateFlush) -> io::Result { let total_in_when_start = self.compressor.total_in(); loop { let last_total_in = self.compressor.total_in(); @@ -196,6 +91,7 @@ mod impls { let status = self .compressor .compress(buf, &mut self.buf, flush) + .map_err(CompressError::from) .map_err(io::Error::other)?; let written = self.compressor.total_out() - last_total_out; @@ -227,11 +123,11 @@ mod impls { impl io::Write for deflate::Write { fn write(&mut self, buf: &[u8]) -> io::Result { - self.write_inner(buf, FlushCompress::None) + self.write_inner(buf, zlib_rs::DeflateFlush::NoFlush) } fn flush(&mut self) -> io::Result<()> { - self.write_inner(&[], FlushCompress::Finish).map(|_| ()) + self.write_inner(&[], zlib_rs::DeflateFlush::Finish).map(|_| ()) } } } diff --git a/gix-features/src/zlib/stream/deflate/tests.rs b/gix-features/src/zlib/stream/deflate/tests.rs index 37e19e8e90c..0115a69ee4e 100644 --- a/gix-features/src/zlib/stream/deflate/tests.rs +++ b/gix-features/src/zlib/stream/deflate/tests.rs @@ -22,7 +22,7 @@ mod deflate_stream { { pub fn from_read(read: R) -> InflateReader { InflateReader { - decompressor: Decompress::new(), + decompressor: Decompress::new(true, 15), inner: read, } } diff --git a/gix-features/src/zlib/stream/inflate.rs b/gix-features/src/zlib/stream/inflate.rs index 3654fe6a538..8440d269d3b 100644 --- a/gix-features/src/zlib/stream/inflate.rs +++ b/gix-features/src/zlib/stream/inflate.rs @@ -1,9 +1,9 @@ use std::{io, io::BufRead}; -use crate::zlib::{Decompress, FlushDecompress, Status}; +use crate::zlib::{FlushDecompress, Status}; /// Read bytes from `rd` and decompress them using `state` into a pre-allocated fitting buffer `dst`, returning the amount of bytes written. -pub fn read(rd: &mut impl BufRead, state: &mut Decompress, mut dst: &mut [u8]) -> io::Result { +pub fn read(rd: &mut impl BufRead, state: &mut zlib_rs::Inflate, mut dst: &mut [u8]) -> io::Result { let mut total_written = 0; loop { let (written, consumed, ret, eof); @@ -15,7 +15,7 @@ pub fn read(rd: &mut impl BufRead, state: &mut Decompress, mut dst: &mut [u8]) - let flush = if eof { FlushDecompress::Finish } else { - FlushDecompress::None + FlushDecompress::NoFlush }; ret = state.decompress(input, dst, flush); written = (state.total_out() - before_out) as usize; diff --git a/gix-pack/src/data/input/bytes_to_entries.rs b/gix-pack/src/data/input/bytes_to_entries.rs index 4f563621dfe..5c7234079c7 100644 --- a/gix-pack/src/data/input/bytes_to_entries.rs +++ b/gix-pack/src/data/input/bytes_to_entries.rs @@ -1,6 +1,6 @@ use std::{fs, io}; -use gix_features::zlib::Decompress; +use gix_features::zlib::Inflate; use gix_hash::{Hasher, ObjectId}; use crate::data::input; @@ -10,7 +10,7 @@ use crate::data::input; /// The iterator used as part of [`Bundle::write_to_directory(…)`][crate::Bundle::write_to_directory()]. pub struct BytesToEntriesIter
{ read: BR, - decompressor: Decompress, + decompressor: Inflate, offset: u64, had_error: bool, version: crate::data::Version, @@ -62,7 +62,7 @@ where ); Ok(BytesToEntriesIter { read, - decompressor: Decompress::new(), + decompressor: Inflate::default(), compressed, offset: 12, had_error: false, @@ -288,7 +288,7 @@ pub struct DecompressRead<'a, R> { /// The reader from which bytes should be decompressed. pub inner: R, /// The decompressor doing all the work. - pub decompressor: &'a mut Decompress, + pub decompressor: &'a mut Inflate, } impl io::Read for DecompressRead<'_, R> @@ -296,7 +296,7 @@ where R: io::BufRead, { fn read(&mut self, into: &mut [u8]) -> io::Result { - gix_features::zlib::stream::inflate::read(&mut self.inner, self.decompressor, into) + gix_features::zlib::stream::inflate::read(&mut self.inner, &mut self.decompressor.state, into) } }