diff --git a/CHANGELOG.md b/CHANGELOG.md index 05f4b82..7f6cfdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **Raw LZMA2 encoder** (`lzma2`): `compcol::lzma2::Lzma2` now encodes as well + as decodes — it emits the raw 7-Zip LZMA2 chunk stream (full dict/props/state + reset per chunk, uncompressed-chunk fallback when compression would expand, + `0x00` end marker), reusing the xz LZMA2 chunk codec. The dictionary size is + out of band (the 7z coder property); the encoder uses the 4 MiB default so a + default-config decoder round-trips. Validated by round-trip and by decoding + the output through the shared xz LZMA2 codec. +- **LZFSE `bvx2` decoding** (`lzfse`): the core LZFSE v2 block type (LZ77 + + Finite State Entropy) now decodes — full v2 header parse, 4-way interleaved + literal FSE, three interleaved L/M/D FSE streams (reverse bitstreams), and LZ + reconstruction. The FSE table construction matches Apple's general + `fse_init_decoder_table` (the `k`/`k-1` split), so arbitrary frequency tables + are handled, not just power-of-two ones. Validated by round-trip against an + in-crate v2 encoder plus a frozen hand-written non-dyadic vector; there is no + Apple `lzfse` tool in the build environment, so real-stream interop is + best-effort but follows the documented format precisely. `bvx1` (v1) remains + `Unsupported`. + +### Changed + +- **lz5 (Lizard) Huffman sub-streams** stay `Unsupported`, now with a precise + rationale in the module docs: the Huff0 entropy stage selects X1/X2 from + `(regenSize, comprLen)` at runtime and there is no reference encoder or + fixture available to validate a decoder bit-exactly, so — consistent with the + crate's `lzham`/`sit13` policy — it is left honest rather than shipped blind. + The docs record the concrete reuse path (zstd's X1 Huff0 decoder + an X2 + decoder + the `HUF_selectDecoder` heuristic) for a future round with fixtures. + + ### Added - **HTTP/3 QPACK header compression** (RFC 9204) behind the new `qpack` diff --git a/README.md b/README.md index 3bee9b0..567f56e 100644 --- a/README.md +++ b/README.md @@ -47,14 +47,14 @@ flag, and a `compcol` binary turns the library into a Unix-style filter. | LZW (`compress(1)` `.Z`) | `lzw` | `.lzw` | full | full | `compress(1)` / `uncompress(1)` | | LZMA (legacy `.lzma`) | `lzma` | `.lzma` | full | full | `python3 -m lzma` (FORMAT_ALONE) | | xz | `xz` | `.xz` | compressed-LZMA2 chunks + uncompressed fallback | full envelope + all reset variants | `xz(1)` both directions | -| Raw LZMA2 (7z coder 21) | `lzma2` | `.lzma2` | `Unsupported` (decode-only) | full (raw LZMA2 chunk stream; reuses the xz LZMA2 engine) | round-trip vs the xz LZMA2 encoder | +| Raw LZMA2 (7z coder 21) | `lzma2` | `.lzma2` | full (raw LZMA2 chunk stream; reuses the xz LZMA2 engine) | full (raw LZMA2 chunk stream; reuses the xz LZMA2 engine) | round-trip + cross-decode via the shared xz LZMA2 codec | | Zstandard (RFC 8478) | `zstd` | `.zst` | LZ77 + Huffman literals + FSE_Compressed_Mode sequences + repeat offsets + RLE blocks | full Compressed_Block | `zstd(1)` both directions | | Brotli (RFC 7932) | `brotli` | `.br` | LZ77 + length-limited Huffman + 704-symbol IC alphabet + static-dictionary refs | full (with 122 KiB static dictionary) | `brotli(1)` both directions | | LZO (LZO1X-1) | `lzo` | `.lzo` | LZ77 hash matcher | full | `python3 -c "import lzo"` | | LZX (Microsoft CAB / WIM) | `lzx` | `.lzx` | uncompressed blocks only | full (verbatim + aligned-offset + uncompressed; E8 filter) | — | | Amiga LZX (original 1995 Forbes) | `amiga_lzx` | — (`.lzx` claimed by MS LZX) | uncompressed blocks only | full (verbatim + aligned + uncompressed; fixed 64 KiB window, no chunk reset, no E8 filter) | — | | Quantum (Stac, old CAB) | `quantum` | `.q` | `Unsupported` (no public encoder exists) | full (libmspack-equivalent) | libmspack regression fixtures | -| LZFSE (Apple) | `lzfse` | `.lzfse` | `Unsupported` (decoder-only) | `bvx-` raw + `bvxn` (LZVN); `bvx2` returns `Unsupported` | hand-built fixtures (no Apple toolchain bundled) | +| LZFSE (Apple) | `lzfse` | `.lzfse` | `Unsupported` (decoder-only) | `bvx-` raw + `bvxn` (LZVN) + `bvx2` (LZ77 + FSE); `bvx1` returns `Unsupported` | round-trip (bvx2 vs own FSE encoder; no Apple toolchain bundled) | | ADC (Apple DMG) | `adc` | `.adc` | LZSS-style greedy match-finder | full | hand-built fixtures | | bzip2 | `bzip2` | `.bz2` | full (RLE-1 + SA-IS BWT + MTF + RLE-2 + dynamic Huffman) | full | `bzip2(1)` both directions | | PPMd (Shkarin's PPMII variant H) | `ppmd` | `.ppmd` | `Unsupported` (decoder-only; PPM model is intricate) | full (used in 7z / RAR3+ / ZIP method 98) | `python3 ppmd-cffi` | @@ -427,7 +427,7 @@ lzw = ["alloc"] lzo = ["alloc"] lzx = ["alloc"] quantum = ["alloc"] -lzfse = ["alloc"] # decoder-only, bvx2 returns Unsupported +lzfse = ["alloc"] # decoder-only; bvx-/bvxn/bvx2, bvx1 Unsupported adc = ["alloc"] rar1 = ["alloc"] rar2 = ["alloc"] diff --git a/src/lz5/block.rs b/src/lz5/block.rs index a674319..f06ebb9 100644 --- a/src/lz5/block.rs +++ b/src/lz5/block.rs @@ -10,6 +10,19 @@ //! Only the LZ4-codeword sequence loop (levels 10..=19, 30..=39) with //! all sub-streams stored raw (no Huffman entropy stage) is //! implemented; everything else returns [`Error::Unsupported`]. +//! +//! Two paths stay `Unsupported` for documented, validation-driven +//! reasons (see the inline comments at the `huffman_bits` and LIZv1 +//! rejections below): +//! +//! * **Huff0 entropy stage** (any sub-stream flag bit set): Lizard's +//! generic `HUF_decompress` recomputes an X1-vs-X2 decoder choice +//! that is never carried in the stream; the crate has only an X1 +//! Huff0 decoder (private to `zstd`), and there is no `lizard` CLI +//! or fixture here to validate an X2 decoder against. A round-trip +//! against our own X1-only encoder would prove nothing. +//! * **LIZv1 codewords** (levels 20..=29, 40..=49): a separate, larger +//! sequence format, out of scope for this round. use alloc::vec::Vec; @@ -61,6 +74,12 @@ pub fn decode_compressed_block(input: &[u8], out: &mut Vec, cap: usize) -> R // Lizard groups levels by decompression strategy: // 10..=19, 30..=39 → LZ4 codewords (this build supports) // 20..=29, 40..=49 → LIZv1 codewords (not supported) + // + // LIZv1 is a distinct, larger sequence format (`Lizard_decompress_LIZv1` + // vs `Lizard_decompress_LZ4` in the reference): different token layout, + // explicit `lengths`/`offset16`/`offset24` streams, and a 24-bit offset + // path. Implementing it is a separate effort from the Huffman stage and + // is out of scope for this round, so it stays `Unsupported`. let is_lz4_mode = matches!(clevel, 10..=19 | 30..=39); if !is_lz4_mode { return Err(Error::Unsupported); @@ -96,8 +115,38 @@ pub fn decode_compressed_block(input: &[u8], out: &mut Vec, cap: usize) -> R if res & FLAG_LEN != 0 { return Err(Error::Corrupt); } - // Any Huffman bit set on a sub-stream means we'd need to FSE-Huffman - // decode that stream. Out of scope. + // Any Huffman bit set on a sub-stream means the stream is entropy-coded + // with Huff0 (Yann Collet's FiniteStateEntropy library) and must be + // `HUF_decompress`'d before the sequence loop runs. Each such sub-stream + // is framed as a 6-byte header (3-byte LE regenerated size + 3-byte LE + // compressed size) followed by `compressed_size` bytes of Huff0 payload + // (`Lizard_readStream` → `HUF_decompress(op, regenSize, ip + 6, comprLen)`). + // + // This stays `Unsupported`. The decision is deliberate, not a TODO — + // there is no faithful way to *validate* such a decoder in this + // environment, and the crate's policy (see `lzham`, `sit13`) is to mark + // formats we cannot validate bit-exactly as `Unsupported` rather than + // ship a blind decoder. Concretely: + // + // * The crate already has a Huff0 decoder in `src/zstd/huffman.rs`, but + // it is (a) private to the `zstd` module (`mod huffman;`, not + // reachable from here without re-exporting it) and (b) implements + // only the **X1** (single-symbol) decode table that zstd's *literals* + // spec restricts itself to. + // * Lizard calls the *generic* `HUF_decompress`, which selects **X1 or + // X2** (double-symbol) at runtime via `HUF_selectDecoder`. That + // choice is **recomputed from (regenSize, comprLen)** and is **never + // stored in the stream**, so a conformant decoder must implement both + // X1 and X2 *and* reproduce `HUF_selectDecoder`'s timing heuristic + // exactly. The crate has no X2 decoder anywhere. (The 4-stream jump + // table — three LE u16 sizes — does match zstd's literals framing, so + // that part would be reusable; the X1/X2 split is the blocker.) + // * The lz5 encoder here is store-only, and there is no `lizard` CLI or + // Huff0 fixture in this environment. A round-trip against a + // hand-written X1-only encoder would always select X1 and "pass" + // while proving nothing about a real (possibly X2) Lizard block — a + // self-validating fiction. Absent a real fixture or reference + // encoder there is no honest round-trip, so we do not ship. let huffman_bits = res & (FLAG_LITERALS | FLAG_FLAGS | FLAG_OFFSET16 | FLAG_OFFSET24); if huffman_bits != 0 { return Err(Error::Unsupported); diff --git a/src/lz5/mod.rs b/src/lz5/mod.rs index 1a21eab..ea27572 100644 --- a/src/lz5/mod.rs +++ b/src/lz5/mod.rs @@ -29,9 +29,34 @@ //! **Decoder**: implemented for the **LZ4 codeword path with all //! sub-streams stored raw** (the most common shape produced by the //! reference CLI at levels 10..=19 on non-tiny inputs). Frames whose -//! blocks use the LIZv1 sequence format (levels 20..=29) or any -//! Huffman-coded sub-stream (levels 30+) are rejected with -//! [`Error::Unsupported`]. The frame-level uncompressed block path +//! blocks use the LIZv1 sequence format (levels 20..=29, 40..=49) or any +//! Huffman-coded sub-stream are rejected with [`Error::Unsupported`]. +//! +//! The Huffman path stays `Unsupported` for a concrete, validation-first +//! reason rather than mere absence of effort. Lizard's entropy stage is +//! Huff0 (`HUF_decompress` from Yann Collet's FiniteStateEntropy), the +//! same family as zstd's literals Huffman, and each Huffman sub-stream is +//! framed as a 6-byte header (3-byte LE regenerated size + 3-byte LE +//! compressed size) then the Huff0 payload. But the *generic* +//! `HUF_decompress` Lizard calls selects between **X1** (single-symbol) +//! and **X2** (double-symbol) decode tables via `HUF_selectDecoder`, and +//! that choice is **recomputed from the regenerated/compressed sizes, +//! never stored in the stream**. This crate's Huff0 decoder +//! (`src/zstd/huffman.rs`) is X1-only and is private to the `zstd` +//! module; it covers neither X2 nor the size-driven selector. With no +//! `lizard` CLI and no Huff0 fixtures in this environment, the only +//! "test" available would be a round-trip against a hand-written +//! X1-only encoder, which would always pick X1 and therefore validate +//! nothing about real (possibly X2) blocks. Per the crate's +//! `lzham`/`sit13` policy, an unvalidatable decoder is worse than an +//! honest `Unsupported`, so we do not ship one. +//! +//! A future round could lift this once validation is possible: expose +//! zstd's X1 Huff0 decoder as `pub(crate)`, add an X2 decoder plus the +//! `HUF_selectDecoder` heuristic, and validate against fixtures from the +//! `lizard` CLI (e.g. `lizard -30`). The 6-byte sub-stream header and the +//! 4-stream jump table (three LE u16 sizes) already match formats this +//! crate parses elsewhere. The frame-level uncompressed block path //! (high bit on block-size word) is handled fully, so frames where //! every block stored raw decode without ever exercising the sequence //! loop. Block checksums (FLG bit 4) and external dictionaries are diff --git a/src/lzfse/decoder.rs b/src/lzfse/decoder.rs index da14f70..d4eff69 100644 --- a/src/lzfse/decoder.rs +++ b/src/lzfse/decoder.rs @@ -59,10 +59,12 @@ enum State { enum BlockKind { Uncompressed, Lzvn, - /// `bvx2` returns Unsupported once we've parsed its header far enough - /// to know we hit it; this variant exists so the state machine can - /// surface that decision uniformly with the other block kinds. + /// `bvx2` (LZFSE v2): FSE + LZ77. Decoded by [`lzfse_v2::decode_block`] + /// once the whole block (variable-length header + both payload streams) + /// is buffered. V2, + /// `bvx1` (LZFSE v1, uncompressed-freq variant): not emitted by modern + /// encoders; returns [`Error::Unsupported`]. V1, } @@ -216,23 +218,56 @@ impl Decoder { }; } BlockKind::V2 => { - // We don't decode v2 in this build, but we need to - // skip past the block cleanly so callers don't - // confuse "block we can't decode" with "garbage". - // Parse the n_payload_bytes field from the header. - if self.input_buf.len() < lzfse_v2::V2_HEADER_FIXED_BYTES { + // The v2 header is variable-length (FSE frequency + // tables follow the fixed packed fields). Buffer the + // fixed 28 bytes (post-magic: n_raw + three u64 words) + // first so we can read `header_size` and the payload + // sizes, then arrange to buffer the whole block (header + // + payload) before decoding it in one shot. + let fixed = lzfse_v2::V2_HEADER_FIXED_BYTES; + if self.input_buf.len() < fixed { return Ok(RawProgress { consumed, written, done: false, }); } - // We *could* skip past the v2 block, but the spec is - // explicit that the encoder may mix block types - // freely. Returning Unsupported here is the - // documented behaviour for v2 in this build. - self.poisoned = true; - return Err(Error::Unsupported); + let header_size = match lzfse_v2::parse_header_size(&self.input_buf) { + Ok(h) => h as usize, + Err(e) => { + self.poisoned = true; + return Err(e); + } + }; + let n_payload = match lzfse_v2::parse_payload_size(&self.input_buf) { + Ok(n) => n as usize, + Err(e) => { + self.poisoned = true; + return Err(e); + } + }; + // `header_size` includes the 4-byte magic we already + // dropped; remaining block bytes after the magic are + // `header_size - 4 + n_payload`. + let header_len = match header_size.checked_sub(4) { + Some(h) if h >= fixed => h, + _ => { + self.poisoned = true; + return Err(Error::Corrupt); + } + }; + let block_len = match header_len.checked_add(n_payload) { + Some(b) => b, + None => { + self.poisoned = true; + return Err(Error::Corrupt); + } + }; + self.state = State::AwaitPayload { + kind: BlockKind::V2, + payload_len: block_len, + decoded_size: 0, + }; } BlockKind::V1 => { self.poisoned = true; @@ -287,7 +322,33 @@ impl Decoder { self.input_buf.drain(..payload_len); self.state = State::AwaitMagic; } - BlockKind::V2 | BlockKind::V1 => { + BlockKind::V2 => { + // The whole block (header + both payload streams) + // is now buffered in `payload_len` bytes. Decode in + // one shot. Bound the up-front output reservation by + // a payload-derived hint (an FSE block can expand + // more than LZVN, but is still bounded; the decoder + // enforces the exact `n_raw_bytes` internally). + let cap_hint = payload_len.saturating_mul(32).saturating_add(1 << 16); + let mut block_out = Vec::new(); + match lzfse_v2::decode_block( + &self.input_buf[..payload_len], + &mut block_out, + cap_hint, + ) { + Ok(consumed_block) => { + debug_assert_eq!(consumed_block, payload_len); + } + Err(e) => { + self.poisoned = true; + return Err(e); + } + } + self.output_buf.append(&mut block_out); + self.input_buf.drain(..payload_len); + self.state = State::AwaitMagic; + } + BlockKind::V1 => { // Unreachable — header step would have errored. self.poisoned = true; return Err(Error::Unsupported); diff --git a/src/lzfse/fse.rs b/src/lzfse/fse.rs index be962dd..106ee1e 100644 --- a/src/lzfse/fse.rs +++ b/src/lzfse/fse.rs @@ -20,6 +20,27 @@ //! the symbol is a `u8`; for L/M/D, a base value and a count of extra value //! bits are stored. //! +//! ## Table construction (general, k/k-1 split) +//! +//! Table construction matches Apple's `fse_init_decoder_table`: the `f` +//! slots spread for a symbol are **not** all assigned the same bit-width. +//! With `n_states = 2^L` (always a power of two) and per-symbol frequency +//! `f` (arbitrary, `1..=n_states`, summing to `n_states`): +//! +//! ```text +//! k = L - floor(log2(f)) // == clz(f) - clz(n_states) +//! j0 = ((2 * n_states) >> k) - f +//! for i in 0..f (i = the i-th slot for this symbol, in spread order): +//! if i < j0: entry.k = k; entry.delta = ((f + i) << k) - n_states +//! else: entry.k = k - 1; entry.delta = (i - j0) << (k - 1) +//! ``` +//! +//! The first `j0` slots consume `k` bits, the remaining `f - j0` consume +//! `k - 1` bits. When `f` is a power of two `j0 == f` and the table +//! degenerates to a single bit-width per symbol; for general `f` the split +//! is required to tile `[0, n_states)` exactly. This is the algorithm real +//! Apple-produced LZFSE v2 streams rely on. +//! //! Frequency tables in the v2 block header are encoded with the custom //! variable-width scheme implemented by [`decode_freq_table`]. @@ -71,29 +92,32 @@ pub(crate) fn build_literal_decoder(freq: &[u16], n_states: usize) -> Result> k) as i32) - f as i32; for i in 0..f { while occupied[t] { t = (t + step) & mask; } - let delta = ((f as i32 + i as i32) << k) - n_states as i32; + let (ek, delta) = if (i as i32) < j0 { + (k, ((f as i32 + i as i32) << k) - n_states as i32) + } else { + (k - 1, (i as i32 - j0) << (k - 1)) + }; table[t] = FseEntry { - k: k as u8, + k: ek as u8, symbol: s as u8, delta: delta as i16, }; @@ -129,29 +153,32 @@ pub(crate) fn build_lmd_decoder( let mut t = 0usize; let step = spread_step(n_states); let mask = n_states - 1; - let n_states_log2 = n_states.trailing_zeros(); + let n_states_log2 = n_states.trailing_zeros() as i32; for (s, &f) in freq.iter().enumerate() { let f = f as usize; if f == 0 { continue; } - let k = if f == 1 { - n_states_log2 as i32 - } else { - let ceil = 32 - (f as u32 - 1).leading_zeros(); - n_states_log2 as i32 - ceil as i32 - }; + // k = L - floor(log2(f)); j0 splits the symbol's slots into a k-bit + // prefix and a (k-1)-bit suffix (see module docs). + let floor_log2 = 31 - (f as u32).leading_zeros() as i32; + let k = n_states_log2 - floor_log2; if k < 0 { return Err(Error::Corrupt); } let k = k as u32; + let j0 = (((2 * n_states) >> k) as i32) - f as i32; for i in 0..f { while occupied[t] { t = (t + step) & mask; } - let delta = ((f as i32 + i as i32) << k) - n_states as i32; + let (ek, delta) = if (i as i32) < j0 { + (k, ((f as i32 + i as i32) << k) - n_states as i32) + } else { + (k - 1, (i as i32 - j0) << (k - 1)) + }; table[t] = LmdVEntry { - total_bits: (k as u8) + bits_per_symbol[s], + total_bits: (ek as u8) + bits_per_symbol[s], value_bits: bits_per_symbol[s], delta: delta as i16, v_base: base_per_symbol[s], @@ -260,3 +287,119 @@ pub(crate) fn decode_freq_table( } Ok((freqs, pos)) } + +#[cfg(test)] +mod tests { + use super::*; + + /// Core FSE invariant: for **each symbol** the `f` entries that carry it + /// must, via their `[delta, delta + 2^k)` next-state ranges, tile + /// `[0, n_states)` exactly once — that is what lets the encoder transition + /// to that symbol from any state. This holds **iff** the k/k-1 split is + /// implemented correctly; a regression to a single bit-width per symbol + /// breaks the tiling for any non-power-of-two frequency. The check is + /// independent of any encoder. + fn assert_literal_table_bijective(freq: &[u16], n_states: usize) { + let table = build_literal_decoder(freq, n_states).expect("table builds"); + assert_eq!(table.len(), n_states); + // Per-symbol coverage of the next-state space. + let mut hits = vec![vec![0u32; n_states]; freq.len()]; + for e in &table { + let span = 1usize << e.k; + let base = e.delta as i32; + for off in 0..span as i32 { + let next = base + off; + assert!( + (0..n_states as i32).contains(&next), + "next {next} out of range for entry {e:?}" + ); + hits[e.symbol as usize][next as usize] += 1; + } + } + for (sym, &f) in freq.iter().enumerate() { + if f == 0 { + assert!( + hits[sym].iter().all(|&h| h == 0), + "absent symbol {sym} has table entries" + ); + continue; + } + for (s, &h) in hits[sym].iter().enumerate() { + assert_eq!( + h, 1, + "symbol {sym}: state {s} reachable {h} times (expected exactly 1)" + ); + } + } + } + + #[test] + fn literal_table_bijective_non_dyadic() { + // Deliberately non-power-of-two frequency sets that sum to 1024. + // A single-`k` table builder cannot tile [0,1024) for any of these. + assert_literal_table_bijective(&[3, 5, 1000, 16], 1024); + assert_literal_table_bijective(&[300, 700, 24], 1024); + // Many singletons + one large symbol (1 is non-dyadic-adjacent edge). + let mut f = vec![1u16; 24]; + f[0] = 1024 - 23; + assert_literal_table_bijective(&f, 1024); + // Skewed but smooth distribution (sums to 1024). + assert_literal_table_bijective(&[100, 101, 103, 107, 109, 504], 1024); + } + + #[test] + fn literal_table_bijective_dyadic_still_ok() { + // The power-of-two case (j0 == f) must still tile correctly. + assert_literal_table_bijective(&[512, 256, 256], 1024); + assert_literal_table_bijective(&[1024], 1024); + } + + #[test] + fn lmd_table_built_for_non_dyadic_freqs() { + // L stream: 64 states, a non-power-of-two split across symbols. + let mut freq = vec![0u16; 20]; + freq[0] = 30; + freq[1] = 20; + freq[2] = 7; + freq[3] = 5; + freq[16] = 2; // a symbol carrying extra value bits + let extra = [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8]; + let base = [ + 0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60, + ]; + let table = build_lmd_decoder(&freq, 64, &extra, &base).expect("lmd table builds"); + assert_eq!(table.len(), 64); + // For each symbol the state-transition portion (total_bits-value_bits) + // must tile [0,64). Group entries by symbol via v_base, which is + // unique per symbol in `base`. + let mut hits: Vec> = (0..20).map(|_| vec![0u32; 64]).collect(); + for e in &table { + let sym = base + .iter() + .position(|&b| b == e.v_base) + .expect("known base"); + let kbits = e.total_bits - e.value_bits; + let span = 1usize << kbits; + for off in 0..span as i32 { + let next = e.delta as i32 + off; + assert!((0..64).contains(&next)); + hits[sym][next as usize] += 1; + } + } + for (sym, &f) in freq.iter().enumerate() { + if f == 0 { + continue; + } + assert!( + hits[sym].iter().all(|&h| h == 1), + "lmd symbol {sym} not bijective over states" + ); + } + } + + #[test] + fn non_power_of_two_table_size_rejected() { + // The table SIZE must be 2^L even though per-symbol freqs are general. + assert!(build_literal_decoder(&[5, 5], 10).is_err()); + } +} diff --git a/src/lzfse/lzfse_v2.rs b/src/lzfse/lzfse_v2.rs index 92527bb..c59bf0a 100644 --- a/src/lzfse/lzfse_v2.rs +++ b/src/lzfse/lzfse_v2.rs @@ -1,62 +1,1378 @@ -//! LZFSE v2 block decoder. +//! LZFSE v2 (`bvx2`) block decoder. //! //! ## Status in this build //! -//! **`bvx2` blocks return [`Error::Unsupported`]**. The FSE primitives that -//! a full v2 implementation needs are present in [`super::fse`], but the -//! intricate bit-packed v2 block header, the L/M/D table parsing, and the -//! reverse FSE bit stream are sufficiently subtle that a half-correct -//! implementation would silently corrupt output for some inputs. -//! -//! The decoder dispatches on `bvx2` magic, parses just enough of the v2 -//! header to know how many bytes the block claims to occupy (so we can -//! advance past it cleanly), and returns Unsupported rather than risk a -//! buggy decode. -//! -//! ## Wire format reference -//! -//! For a future round, the v2 header layout is (LSB-first packed): -//! - `n_raw_bytes: 20` -//! - `n_payload_bytes: 20` -//! - `n_literals: 20` -//! - `n_matches: 20` -//! - `n_literal_payload_bytes: 20` -//! - `n_lmd_payload_bytes: 20` -//! - `literal_bits: 3` (number of stub bits in the literal stream final byte) -//! - `literal_state[0..=3]: 10 each` (40 bits — four interleaved FSE states) -//! - `lmd_bits: 3` -//! - `l_state: 10` -//! - `m_state: 10` -//! - `d_state: 10` -//! - followed by packed frequency tables for D (64 syms), M (20 syms), -//! L (20 syms), and LIT (256 syms). -//! -//! The two payload streams (literal then LMD) are encoded *in reverse*: -//! the decoder pulls bytes from the end of each payload toward its start. - -#![allow(dead_code)] +//! **`bvx2` blocks are now decoded.** This is the core LZFSE block type +//! (LZ77 literal/match commands entropy-coded with Finite State Entropy), +//! so the `lzfse` decoder handles real compressed payloads here rather than +//! only the `bvx-` (uncompressed) and `bvxn` (LZVN) block kinds. +//! +//! ## Validation & interop caveat +//! +//! There is **no Apple `lzfse` reference tool and no captured `bvx2` +//! fixtures available in this build environment**, so correctness is gated +//! by **round-trip against this crate's own spec-conformant v2 encoder** +//! ([`encode_block`], `#[cfg(test)]`): we assert `decode(encode(x)) == x` +//! over empty / small / text / repetitive / random / multi-block inputs, +//! including inputs large enough to force a genuine FSE-coded block. The +//! encoder builds FSE frequency tables from the L/M/D/LIT histograms with the +//! standard quantized (nearest) normalization — producing **general, +//! non-power-of-two frequencies** — FSE-encodes the interleaved literal and +//! LMD streams in reverse, and packs the v2 header exactly per the documented +//! wire layout. Round-trip tests deliberately include skewed, non-dyadic +//! literal distributions and small (singleton) match-count histograms, plus +//! one hand-frozen non-dyadic block decoded independently of the encoder, so +//! a regression to a single bit-width per symbol would fail. +//! +//! The FSE table construction ([`super::fse`]) now matches Apple's general +//! `fse_init_decoder_table` (the **k/k-1 split**: a symbol's `f` spread slots +//! are partitioned into a `k`-bit prefix and a `(k-1)`-bit suffix at the +//! boundary `j0 = (2·n_states >> k) − f`), so arbitrary per-symbol +//! frequencies are handled — not just power-of-two normalizations. The table +//! *size* is always `2^L`; only the per-symbol frequencies are general. +//! +//! Interop with Apple-produced `bvx2` is therefore **best-effort but follows +//! the real table-construction algorithm**: the decoder mirrors the +//! documented format precisely (the same header layout, the same L/M/D +//! base/extra-bit tables, the same frequency-table encoding, the same reverse +//! FSE bit convention, and now the same general FSE table construction). It +//! has still not been cross-checked against an actual Apple-produced stream +//! in this environment, so full Apple-stream interop remains unverified here. +//! +//! ## Wire format reference (v2 header, authoritative) +//! +//! After the 4-byte `bvx2` magic the v2 header is (little-endian, +//! `__packed__`): +//! +//! - `n_raw_bytes: u32` — decoded output size of this block. +//! - `packed_fields[0]: u64` +//! - `[0..20)` `n_literals` +//! - `[20..40)` `n_literal_payload_bytes` +//! - `[40..60)` `n_matches` +//! - `[60..63)` `literal_bits` (FSE final-byte stub width for the literal +//! stream) +//! - `packed_fields[1]: u64` +//! - `[0..10)` `literal_state[0]` +//! - `[10..20)` `literal_state[1]` +//! - `[20..30)` `literal_state[2]` +//! - `[30..40)` `literal_state[3]` +//! - `[40..60)` `n_lmd_payload_bytes` +//! - `[60..63)` `lmd_bits` (FSE stub width for the LMD stream) +//! - `packed_fields[2]: u64` +//! - `[0..32)` `header_size` (bytes, magic..end of freq tables) +//! - `[32..42)` `l_state` +//! - `[42..52)` `m_state` +//! - `[52..62)` `d_state` +//! - then the variable-length frequency tables, bit-contiguous, in order +//! **L (20 syms), M (20 syms), D (64 syms), LIT (256 syms)**, each packed +//! with the LZFSE Huffman-style fixed encoding +//! ([`super::fse::decode_freq_table`]). +//! +//! The two payload streams follow the header: `n_literal_payload_bytes` of +//! literal FSE stream, then `n_lmd_payload_bytes` of LMD FSE stream. Both are +//! decoded **in reverse** (the FSE encoder is LIFO, so the decoder pulls +//! bytes from the end of each stream toward its start). + +use alloc::vec; +use alloc::vec::Vec; use crate::error::Error; -use crate::lzfse::bits::HeaderBits; +use crate::lzfse::bits::FseBits; +use crate::lzfse::fse; -/// Size of the fixed-width portion of the v2 header (the packed bit fields -/// before the variable-length frequency tables). Apple's reference: the v2 -/// header is 28 bytes of packed fields plus the freq-table payload. +/// Size of the fixed-width portion of the v2 header **after the 4-byte +/// magic**: `n_raw_bytes`(4) + three packed `u64` words (24) = 28 bytes. The +/// variable-length frequency tables follow it. (Apple's `header_size` field +/// additionally counts the 4-byte magic, so `header_size == 4 + +/// V2_HEADER_FIXED_BYTES + freq_table_bytes`.) pub(crate) const V2_HEADER_FIXED_BYTES: usize = 28; -/// Parse just the `n_payload_bytes` field out of a v2 block header. Used -/// by the main decoder to know how many bytes the block occupies so we -/// can skip it cleanly when returning Unsupported. -/// -/// `bytes` is the slice starting **after** the 4-byte magic. -/// Returns `Err(Error::UnexpectedEnd)` if `bytes.len() < V2_HEADER_FIXED_BYTES`. +/// Number of symbols in each stream's alphabet. +const N_L_SYMBOLS: usize = 20; +const N_M_SYMBOLS: usize = 20; +const N_D_SYMBOLS: usize = 64; +const N_LIT_SYMBOLS: usize = 256; + +/// FSE state counts (table sizes) for each stream. Fixed by the LZFSE format. +const L_STATES: usize = 64; +const M_STATES: usize = 64; +const D_STATES: usize = 256; +const LIT_STATES: usize = 1024; + +/// L/M/D extra-bit widths and base values (Apple's `lzfse_internal.h`). +const L_EXTRA_BITS: [u8; N_L_SYMBOLS] = + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8]; +const L_BASE: [i32; N_L_SYMBOLS] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60, +]; +const M_EXTRA_BITS: [u8; N_M_SYMBOLS] = + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11]; +const M_BASE: [i32; N_M_SYMBOLS] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 56, 312, +]; +const D_EXTRA_BITS: [u8; N_D_SYMBOLS] = [ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, + 14, 14, 15, 15, 15, 15, +]; +const D_BASE: [i32; N_D_SYMBOLS] = [ + 0, 1, 2, 3, 4, 6, 8, 10, 12, 16, 20, 24, 28, 36, 44, 52, 60, 76, 92, 108, 124, 156, 188, 220, + 252, 316, 380, 444, 508, 636, 764, 892, 1020, 1276, 1532, 1788, 2044, 2556, 3068, 3580, 4092, + 5116, 6140, 7164, 8188, 10236, 12284, 14332, 16380, 20476, 24572, 28668, 32764, 40956, 49148, + 57340, 65532, 81916, 98300, 114684, 131068, 163836, 196604, 229372, +]; + +/// Parsed v2 header. +struct V2Header { + n_raw_bytes: u32, + n_literals: u32, + n_literal_payload_bytes: u32, + n_matches: u32, + literal_bits: u32, + literal_state: [u32; 4], + n_lmd_payload_bytes: u32, + lmd_bits: u32, + header_size: u32, + l_state: u32, + m_state: u32, + d_state: u32, + l_freq: Vec, + m_freq: Vec, + d_freq: Vec, + lit_freq: Vec, +} + +/// Extract `width` bits starting at `lo` from a 64-bit packed word. +#[inline] +fn bits64(word: u64, lo: u32, width: u32) -> u64 { + if width == 0 { + return 0; + } + let mask = if width == 64 { + u64::MAX + } else { + (1u64 << width) - 1 + }; + (word >> lo) & mask +} + +/// Total payload size (literal + LMD) declared by a v2 block header. Used by +/// the streaming decoder to know how many payload bytes to buffer. `bytes` +/// is the slice starting **after** the 4-byte magic. pub(crate) fn parse_payload_size(bytes: &[u8]) -> Result { if bytes.len() < V2_HEADER_FIXED_BYTES { return Err(Error::UnexpectedEnd); } - let mut bits = HeaderBits::new(&bytes[..V2_HEADER_FIXED_BYTES]); - // Skip n_raw_bytes (20 bits). - let _n_raw = bits.read(20)?; - let n_payload = bits.read(20)?; - Ok(n_payload) + let w0 = u64::from_le_bytes([ + bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], + ]); + let w1 = u64::from_le_bytes([ + bytes[12], bytes[13], bytes[14], bytes[15], bytes[16], bytes[17], bytes[18], bytes[19], + ]); + let n_literal_payload_bytes = bits64(w0, 20, 20) as u32; + let n_lmd_payload_bytes = bits64(w1, 40, 20) as u32; + n_literal_payload_bytes + .checked_add(n_lmd_payload_bytes) + .ok_or(Error::Corrupt) +} + +/// Total header length (including magic) declared by a v2 block header. +/// `bytes` starts after the magic. +pub(crate) fn parse_header_size(bytes: &[u8]) -> Result { + if bytes.len() < V2_HEADER_FIXED_BYTES { + return Err(Error::UnexpectedEnd); + } + let w2 = u64::from_le_bytes([ + bytes[20], bytes[21], bytes[22], bytes[23], bytes[24], bytes[25], bytes[26], bytes[27], + ]); + Ok(bits64(w2, 0, 32) as u32) +} + +/// Parse the v2 header from `bytes`, which begins **just after** the 4-byte +/// magic. +fn parse_header(bytes: &[u8]) -> Result { + // The fixed post-magic header is n_raw(4) + three u64 packed words (24) = + // 28 bytes = V2_HEADER_FIXED_BYTES; the frequency tables follow it. + let fixed = V2_HEADER_FIXED_BYTES; + if bytes.len() < fixed { + return Err(Error::UnexpectedEnd); + } + let n_raw_bytes = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + let w0 = u64::from_le_bytes([ + bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], + ]); + let w1 = u64::from_le_bytes([ + bytes[12], bytes[13], bytes[14], bytes[15], bytes[16], bytes[17], bytes[18], bytes[19], + ]); + let w2 = u64::from_le_bytes([ + bytes[20], bytes[21], bytes[22], bytes[23], bytes[24], bytes[25], bytes[26], bytes[27], + ]); + + let n_literals = bits64(w0, 0, 20) as u32; + let n_literal_payload_bytes = bits64(w0, 20, 20) as u32; + let n_matches = bits64(w0, 40, 20) as u32; + let literal_bits = bits64(w0, 60, 3) as u32; + + let literal_state = [ + bits64(w1, 0, 10) as u32, + bits64(w1, 10, 10) as u32, + bits64(w1, 20, 10) as u32, + bits64(w1, 30, 10) as u32, + ]; + let n_lmd_payload_bytes = bits64(w1, 40, 20) as u32; + let lmd_bits = bits64(w1, 60, 3) as u32; + + let header_size = bits64(w2, 0, 32) as u32; + let l_state = bits64(w2, 32, 10) as u32; + let m_state = bits64(w2, 42, 10) as u32; + let d_state = bits64(w2, 52, 10) as u32; + + if literal_bits > 7 || lmd_bits > 7 { + return Err(Error::Corrupt); + } + + // `header_size` includes the 4-byte magic, so the minimum valid value is + // magic(4) + the fixed packed fields. + if (header_size as usize) < 4 + V2_HEADER_FIXED_BYTES { + return Err(Error::Corrupt); + } + let freq_end = (header_size as usize) - 4; // post-magic offset + if freq_end < fixed || freq_end > bytes.len() { + return Err(Error::UnexpectedEnd); + } + let freq_bytes = &bytes[fixed..freq_end]; + + let (l_freq, m_freq, d_freq, lit_freq) = decode_all_freqs(freq_bytes)?; + + check_freq_sum(&l_freq, L_STATES)?; + check_freq_sum(&m_freq, M_STATES)?; + check_freq_sum(&d_freq, D_STATES)?; + check_freq_sum(&lit_freq, LIT_STATES)?; + + if literal_state.iter().any(|&s| s as usize >= LIT_STATES) + || l_state as usize >= L_STATES + || m_state as usize >= M_STATES + || d_state as usize >= D_STATES + { + return Err(Error::Corrupt); + } + + Ok(V2Header { + n_raw_bytes, + n_literals, + n_literal_payload_bytes, + n_matches, + literal_bits, + literal_state, + n_lmd_payload_bytes, + lmd_bits, + header_size, + l_state, + m_state, + d_state, + l_freq, + m_freq, + d_freq, + lit_freq, + }) +} + +fn check_freq_sum(freq: &[u16], states: usize) -> Result<(), Error> { + let mut sum = 0usize; + for &f in freq { + sum += f as usize; + } + if sum != states { + return Err(Error::Corrupt); + } + Ok(()) +} + +/// The four frequency tables (L, M, D, LIT) decoded from a v2 header. +type FreqTables = (Vec, Vec, Vec, Vec); + +/// Decode the four bit-contiguous frequency tables (L, M, D, LIT). +fn decode_all_freqs(freq_bytes: &[u8]) -> Result { + let mut bit_pos = 0usize; + let l = decode_freq_at(freq_bytes, &mut bit_pos, N_L_SYMBOLS)?; + let m = decode_freq_at(freq_bytes, &mut bit_pos, N_M_SYMBOLS)?; + let d = decode_freq_at(freq_bytes, &mut bit_pos, N_D_SYMBOLS)?; + let lit = decode_freq_at(freq_bytes, &mut bit_pos, N_LIT_SYMBOLS)?; + Ok((l, m, d, lit)) +} + +/// Decode one frequency table at bit offset `*bit_pos`, advancing it. +/// +/// [`fse::decode_freq_table`] reads LSB-first from bit 0 of the slice it is +/// given. Our tables are bit-packed back-to-back, so a table may begin +/// mid-byte; we shift a temporary view so it starts at bit 0. +fn decode_freq_at( + freq_bytes: &[u8], + bit_pos: &mut usize, + n_symbols: usize, +) -> Result, Error> { + let byte_off = *bit_pos / 8; + let in_byte = (*bit_pos % 8) as u32; + if byte_off > freq_bytes.len() { + return Err(Error::UnexpectedEnd); + } + let tail = &freq_bytes[byte_off..]; + if in_byte == 0 { + let (freqs, consumed_bits) = fse::decode_freq_table(tail, n_symbols)?; + *bit_pos += consumed_bits; + Ok(freqs) + } else { + // Shift `tail` right by `in_byte` bits so the table begins at bit 0. + let mut shifted = Vec::with_capacity(tail.len()); + for w in 0..tail.len() { + let lo = tail[w] >> in_byte; + let hi = if w + 1 < tail.len() { + tail[w + 1].checked_shl(8 - in_byte).unwrap_or(0) + } else { + 0 + }; + shifted.push(lo | hi); + } + let (freqs, consumed_bits) = fse::decode_freq_table(&shifted, n_symbols)?; + *bit_pos += consumed_bits; + Ok(freqs) + } +} + +/// Decode a full `bvx2` block. `block` is the slice **after** the 4-byte +/// magic and must contain at least `header_size - 4 + payload` bytes. +/// Decoded output is appended to `out`. Returns the number of bytes consumed +/// from `block` (header + payload). +/// +/// `out_cap_hint` bounds the up-front output reservation against a hostile +/// `n_raw_bytes`; the real `n_raw_bytes` bound is still enforced exactly. +pub(crate) fn decode_block( + block: &[u8], + out: &mut Vec, + out_cap_hint: usize, +) -> Result { + let hdr = parse_header(block)?; + + let header_len = (hdr.header_size as usize) - 4; // post-magic + let lit_payload_len = hdr.n_literal_payload_bytes as usize; + let lmd_payload_len = hdr.n_lmd_payload_bytes as usize; + let payload_len = lit_payload_len + .checked_add(lmd_payload_len) + .ok_or(Error::Corrupt)?; + let total = header_len.checked_add(payload_len).ok_or(Error::Corrupt)?; + if block.len() < total { + return Err(Error::UnexpectedEnd); + } + + let lit_payload = &block[header_len..header_len + lit_payload_len]; + let lmd_payload = &block[header_len + lit_payload_len..total]; + + // ── 1. Decode literals (4-way interleaved FSE, reverse stream) ── + let lit_table = fse::build_literal_decoder(&hdr.lit_freq, LIT_STATES)?; + let n_literals = hdr.n_literals as usize; + // Reject an absurd literal count up-front (DoS guard). + if n_literals > out_cap_hint.saturating_mul(16).saturating_add(1 << 20) { + return Err(Error::Corrupt); + } + let mut literals = vec![0u8; n_literals]; + { + let mut bits = FseBits::new_with_stub(lit_payload, hdr.literal_bits)?; + let mut states = hdr.literal_state; + let mut i = 0usize; + while i < n_literals { + for state in states.iter_mut() { + if i >= n_literals { + break; + } + let (sym, next) = fse::fse_decode_literal(*state, &lit_table, &mut bits)?; + literals[i] = sym; + *state = next; + i += 1; + } + } + } + + // ── 2 & 3. Decode L/M/D commands and execute the LZ ── + let l_table = fse::build_lmd_decoder(&hdr.l_freq, L_STATES, &L_EXTRA_BITS, &L_BASE)?; + let m_table = fse::build_lmd_decoder(&hdr.m_freq, M_STATES, &M_EXTRA_BITS, &M_BASE)?; + let d_table = fse::build_lmd_decoder(&hdr.d_freq, D_STATES, &D_EXTRA_BITS, &D_BASE)?; + + let n_raw = hdr.n_raw_bytes as usize; + let start_len = out.len(); + out.reserve(n_raw.min(out_cap_hint)); + + let mut lmd = FseBits::new_with_stub(lmd_payload, hdr.lmd_bits)?; + let mut l_state = hdr.l_state; + let mut m_state = hdr.m_state; + let mut d_state = hdr.d_state; + + let mut lit_pos = 0usize; + let mut prev_d: i32 = 0; + let n_matches = hdr.n_matches as usize; + + for _ in 0..n_matches { + // The encoder pushed streams so the decoder pulls L, then M, then D. + let (l_val, l_next) = fse::fse_decode_lmd(l_state, &l_table, &mut lmd)?; + let (m_val, m_next) = fse::fse_decode_lmd(m_state, &m_table, &mut lmd)?; + let (d_val, d_next) = fse::fse_decode_lmd(d_state, &d_table, &mut lmd)?; + l_state = l_next; + m_state = m_next; + d_state = d_next; + + // D == 0 means "reuse the previous distance". + let d = if d_val == 0 { prev_d } else { d_val }; + if d_val != 0 { + prev_d = d_val; + } + if l_val < 0 || m_val < 0 || d <= 0 { + return Err(Error::Corrupt); + } + let l = l_val as usize; + let m = m_val as usize; + let d = d as usize; + + // Emit L literals. + if lit_pos + l > n_literals { + return Err(Error::Corrupt); + } + if out.len() + l > start_len + n_raw { + return Err(Error::Corrupt); + } + out.extend_from_slice(&literals[lit_pos..lit_pos + l]); + lit_pos += l; + + // Copy an M-byte match at distance d (may overlap). + let cur = out.len() - start_len; + if d > cur { + return Err(Error::Corrupt); + } + if out.len() + m > start_len + n_raw { + return Err(Error::Corrupt); + } + for src in (out.len() - d..).take(m) { + let b = out[src]; + out.push(b); + } + } + + // Trailing literals after the last match. + let remaining = n_literals - lit_pos; + if remaining > 0 { + if out.len() + remaining > start_len + n_raw { + return Err(Error::Corrupt); + } + out.extend_from_slice(&literals[lit_pos..]); + } + + if out.len() - start_len != n_raw { + return Err(Error::Corrupt); + } + + Ok(total) +} + +// ───────────────────────── test-only encoder ───────────────────────────── +// +// A spec-conformant `bvx2` encoder used only to validate the decoder by +// round-trip. It uses a greedy LZ parser, the standard quantized (nearest) +// FSE frequency normalization producing general, non-power-of-two +// frequencies, encode slots that exactly invert the decoder's general k/k-1 +// FSE table, and the documented header/payload packing. + +#[cfg(test)] +pub(crate) use test_encoder::encode_block; + +#[cfg(test)] +mod test_encoder { + use super::*; + + /// One FSE encode slot for a symbol: covers next-state range `[lo, hi]`, + /// emits `(next_state - lo)` in `k` bits and moves the encoder's running + /// state to table index `t`. + struct EncSlot { + t: usize, + k: u8, + lo: i32, + hi: i32, + } + + /// Build per-symbol encode slots that exactly invert + /// `fse::build_literal_decoder` / `build_lmd_decoder`, including the + /// general k/k-1 split. Frequencies are arbitrary (`1..=n_states`) and + /// must sum to `n_states`; the per-symbol slot set tiles `[0, n_states)`. + /// + /// Each decode entry maps a current state `t` to a `(next_state, k_bits)` + /// pull. The encoder inverts this: given the *next* state `cur` it finds + /// the slot whose `[lo, hi]` next-state range contains `cur`, emits + /// `cur - lo` in `k` bits and moves the running state to that slot's `t`. + /// A slot in the `i < j0` region uses `k` bits, otherwise `k - 1` bits — + /// matching the decode table exactly. + fn build_enc_slots(freq: &[u16], n_states: usize) -> Vec> { + let mut slots: Vec> = (0..freq.len()).map(|_| Vec::new()).collect(); + let mut occ = vec![false; n_states]; + let mut t = 0usize; + let step = (n_states >> 1) + (n_states >> 3) + 3; + let mask = n_states - 1; + let log2 = n_states.trailing_zeros() as i32; + for (s, &f) in freq.iter().enumerate() { + let f = f as usize; + if f == 0 { + continue; + } + let floor_log2 = 31 - (f as u32).leading_zeros() as i32; + let k = log2 - floor_log2; + let j0 = (((2 * n_states) >> k) as i32) - f as i32; + for i in 0..f { + while occ[t] { + t = (t + step) & mask; + } + let (ek, delta) = if (i as i32) < j0 { + (k, ((f as i32 + i as i32) << k) - n_states as i32) + } else { + (k - 1, (i as i32 - j0) << (k - 1)) + }; + slots[s].push(EncSlot { + t, + k: ek as u8, + lo: delta, + hi: delta + (1i32 << ek) - 1, + }); + occ[t] = true; + t = (t + step) & mask; + } + } + for sl in slots.iter_mut() { + sl.sort_by_key(|x| x.lo); + } + slots + } + + /// A bit accumulator producing the reverse FSE stream byte layout that + /// [`FseBits`] consumes. + /// + /// The FSE encoder must walk symbols **in reverse** to chain states + /// correctly (each symbol's emitted state is determined by the following + /// symbol in the same lane). The caller therefore [`push`](Self::push)es + /// `(value, n_bits)` chunks in reverse-of-pull order. [`finish`] reverses + /// the chunk list back to forward pull order, then packs the resulting + /// bit string into bytes laid out so [`FseBits`] (which pulls from the end + /// of the payload backward) reads them back in exactly pull order. One + /// stub byte always trails so the decoder's init-byte consumption lands on + /// it. + struct FseSink { + /// Each entry is one symbol's `(value, n_bits)`, recorded in + /// reverse-of-pull order. + chunks: Vec<(u64, u8)>, + } + + impl FseSink { + fn new() -> Self { + Self { chunks: Vec::new() } + } + + /// Record `n` bits of `value` for one symbol (reverse-of-pull order). + fn push(&mut self, value: u64, n: u8) { + self.chunks.push((value, n)); + } + + /// Serialize to `(payload_bytes, stub_bits)`. + fn finish(&self) -> (Vec, u32) { + // Reverse chunks to forward pull order, then flatten to a bit + // vector (LSB-first within each chunk). + let mut bits: Vec = Vec::new(); + for &(value, n) in self.chunks.iter().rev() { + for i in 0..n { + bits.push(((value >> i) & 1) as u8); + } + } + let total = bits.len(); + let stub = (total % 8) as u32; + let full = total / 8; + let plen = full + 1; + let mut payload = vec![0u8; plen]; + let mut bi = 0usize; + let mut sb = 0u8; + for i in 0..stub { + sb |= bits[bi] << i; + bi += 1; + } + payload[plen - 1] = sb; + let mut idx = plen as i32 - 2; + while idx >= 0 { + let mut b = 0u8; + for i in 0..8 { + if bi < total { + b |= bits[bi] << i; + bi += 1; + } + } + payload[idx as usize] = b; + idx -= 1; + } + (payload, stub) + } + } + + /// Encode a frequency value with the LZFSE Huffman-style fixed encoding + /// (inverse of `fse::decode_freq_table`). + fn encode_freq_value(v: u16) -> (u32, u32) { + match v { + 0 => (0b00, 2), + 1 => (0b10, 2), + 2 => (0b001, 3), + 3 => (0b101, 3), + 4 => (0b00011, 5), + 5 => (0b01011, 5), + 6 => (0b10011, 5), + 7 => (0b11011, 5), + 8..=23 => (0b0111 | ((v as u32 - 8) << 4), 8), + 24..=1047 => (0b1111 | ((v as u32 - 24) << 4), 14), + _ => panic!("frequency {v} too large to encode"), + } + } + + /// Normalize a histogram to **general** (arbitrary, not power-of-two) + /// frequencies summing exactly to `n_states`, giving every present symbol + /// at least 1. This is the standard quantized normalization: scale each + /// count by `n_states / total`, round to nearest, force present symbols to + /// 1, then correct the running sum by nudging the largest entry (which can + /// absorb ±1 changes without dropping a present symbol to 0). + /// + /// The resulting per-symbol frequencies are deliberately *not* coerced to + /// powers of two — the decoder's general k/k-1 table builder handles them + /// directly. Singletons and skewed (non-dyadic) distributions are + /// produced as-is so the round-trip exercises the general FSE path. + pub(super) fn normalize_general(hist: &[u32], n_states: usize) -> Vec { + let n = hist.len(); + let total: u64 = hist.iter().map(|&h| h as u64).sum(); + let mut freq = vec![0u16; n]; + if total == 0 { + freq[0] = n_states as u16; + return freq; + } + // Nearest-rounding scale, with a floor of 1 for every present symbol. + let mut assigned: i64 = 0; + for (i, &h) in hist.iter().enumerate() { + if h == 0 { + continue; + } + let scaled = (h as u64 * n_states as u64 + total / 2) / total; + let f = scaled.max(1).min(n_states as u64) as i64; + freq[i] = f as u16; + assigned += f; + } + let target = n_states as i64; + // Correct the sum. Each step adjusts the symbol that can absorb the + // change: when overshooting, the largest entry with `f > 1`; when + // undershooting, simply the largest entry. This converges because the + // largest entry is at least `n_states / n` which exceeds the total + // correction magnitude (bounded by `n`). + while assigned != target { + if assigned > target { + let (idx, _) = freq + .iter() + .enumerate() + .filter(|&(_, &f)| f > 1) + .max_by_key(|&(_, &f)| f) + .expect("an entry > 1 exists while overshooting"); + freq[idx] -= 1; + assigned -= 1; + } else { + let (idx, _) = freq + .iter() + .enumerate() + .max_by_key(|&(_, &f)| f) + .expect("non-empty alphabet"); + freq[idx] += 1; + assigned += 1; + } + } + debug_assert_eq!(assigned, target); + freq + } + + /// Map an L/M/D value to `(symbol, extra_value)`. + fn map_lmd(value: i32, base: &[i32], extra: &[u8]) -> (usize, u32) { + for s in 0..base.len() { + if base[s] <= value { + let hi = base[s] + ((1i64 << extra[s]) - 1) as i32; + if value <= hi { + return (s, (value - base[s]) as u32); + } + } + } + let s = base.len() - 1; + (s, (value - base[s]).max(0) as u32) + } + + struct Cmd { + l: usize, + m: usize, + d: usize, + } + + /// Greedy LZ parse of `data` via a hash chain over 4-byte prefixes. + fn lz_parse(data: &[u8]) -> (Vec, Vec) { + const MIN_MATCH: usize = 4; + const MAX_MATCH: usize = 2359; // M max encodable + const MAX_DIST: usize = 262_139; // D max encodable + let mut literals = Vec::new(); + let mut cmds = Vec::new(); + let n = data.len(); + + let hsize = 1usize << 15; + let mut head = vec![usize::MAX; hsize]; + let mut prev = vec![usize::MAX; n.max(1)]; + let hash = |d: &[u8], i: usize| -> usize { + let v = (d[i] as usize) + | ((d[i + 1] as usize) << 8) + | ((d[i + 2] as usize) << 16) + | ((d[i + 3] as usize) << 24); + (v.wrapping_mul(2654435761) >> 17) & (hsize - 1) + }; + + let mut i = 0usize; + let mut pending_lit = 0usize; + while i < n { + let mut best_len = 0usize; + let mut best_dist = 0usize; + if i + MIN_MATCH <= n { + let h = hash(data, i); + let mut cand = head[h]; + let mut chain = 0; + while cand != usize::MAX && chain < 64 { + if i - cand <= MAX_DIST { + let mut len = 0usize; + while i + len < n && len < MAX_MATCH && data[cand + len] == data[i + len] { + len += 1; + } + if len > best_len { + best_len = len; + best_dist = i - cand; + } + } else { + break; + } + cand = prev[cand]; + chain += 1; + } + } + + if best_len >= MIN_MATCH { + let end = i + best_len; + cmds.push(Cmd { + l: pending_lit, + m: best_len, + d: best_dist, + }); + pending_lit = 0; + while i < end { + if i + MIN_MATCH <= n { + let h = hash(data, i); + prev[i] = head[h]; + head[h] = i; + } + i += 1; + } + } else { + literals.push(data[i]); + pending_lit += 1; + if i + MIN_MATCH <= n { + let h = hash(data, i); + prev[i] = head[h]; + head[h] = i; + } + i += 1; + } + } + // Remaining `pending_lit` literals are trailing literals (no command); + // the decoder appends them after the last match. + let _ = pending_lit; + (literals, cmds) + } + + /// Encode `data` as a single `bvx2` block (NOT including the 4-byte + /// magic, which the caller prepends). + pub(crate) fn encode_block(data: &[u8]) -> Vec { + let (literals, cmds) = lz_parse(data); + + let mut lit_hist = vec![0u32; N_LIT_SYMBOLS]; + for &b in &literals { + lit_hist[b as usize] += 1; + } + let mut l_hist = vec![0u32; N_L_SYMBOLS]; + let mut m_hist = vec![0u32; N_M_SYMBOLS]; + let mut d_hist = vec![0u32; N_D_SYMBOLS]; + + struct MappedCmd { + l_sym: usize, + l_extra: u32, + m_sym: usize, + m_extra: u32, + d_sym: usize, + d_extra: u32, + } + let mut mapped = Vec::with_capacity(cmds.len()); + for c in &cmds { + let (l_sym, l_extra) = map_lmd(c.l as i32, &L_BASE, &L_EXTRA_BITS); + let (m_sym, m_extra) = map_lmd(c.m as i32, &M_BASE, &M_EXTRA_BITS); + let (d_sym, d_extra) = map_lmd(c.d as i32, &D_BASE, &D_EXTRA_BITS); + l_hist[l_sym] += 1; + m_hist[m_sym] += 1; + d_hist[d_sym] += 1; + mapped.push(MappedCmd { + l_sym, + l_extra, + m_sym, + m_extra, + d_sym, + d_extra, + }); + } + + let lit_freq = normalize_general(&lit_hist, LIT_STATES); + let l_freq = normalize_general(&l_hist, L_STATES); + let m_freq = normalize_general(&m_hist, M_STATES); + let d_freq = normalize_general(&d_hist, D_STATES); + + let lit_slots = build_enc_slots(&lit_freq, LIT_STATES); + let l_slots = build_enc_slots(&l_freq, L_STATES); + let m_slots = build_enc_slots(&m_freq, M_STATES); + let d_slots = build_enc_slots(&d_freq, D_STATES); + + // ── Encode literals (reverse, 4-way interleaved) ── + let n_lit = literals.len(); + let mut lit_sink = FseSink::new(); + let mut lit_states = [0i32; 4]; + for idx in (0..n_lit).rev() { + let lane = idx % 4; + let sym = literals[idx] as usize; + let cur = lit_states[lane]; + let slot = lit_slots[sym] + .iter() + .find(|s| cur >= s.lo && cur <= s.hi) + .expect("literal slot covers state"); + lit_sink.push((cur - slot.lo) as u64, slot.k); + lit_states[lane] = slot.t as i32; + } + let literal_state = [ + lit_states[0] as u32, + lit_states[1] as u32, + lit_states[2] as u32, + lit_states[3] as u32, + ]; + let (lit_payload, literal_bits) = lit_sink.finish(); + + // ── Encode LMD (reverse). Decoder pulls L, M, D per command, so to + // invert we iterate commands in reverse and push D, then M, then L. ── + let mut lmd_sink = FseSink::new(); + let mut l_st = 0i32; + let mut m_st = 0i32; + let mut d_st = 0i32; + for mc in mapped.iter().rev() { + let d_slot = d_slots[mc.d_sym] + .iter() + .find(|s| d_st >= s.lo && d_st <= s.hi) + .expect("d slot"); + let raw = (d_st - d_slot.lo) as u64 | ((mc.d_extra as u64) << d_slot.k); + lmd_sink.push(raw, d_slot.k + D_EXTRA_BITS[mc.d_sym]); + d_st = d_slot.t as i32; + + let m_slot = m_slots[mc.m_sym] + .iter() + .find(|s| m_st >= s.lo && m_st <= s.hi) + .expect("m slot"); + let raw = (m_st - m_slot.lo) as u64 | ((mc.m_extra as u64) << m_slot.k); + lmd_sink.push(raw, m_slot.k + M_EXTRA_BITS[mc.m_sym]); + m_st = m_slot.t as i32; + + let l_slot = l_slots[mc.l_sym] + .iter() + .find(|s| l_st >= s.lo && l_st <= s.hi) + .expect("l slot"); + let raw = (l_st - l_slot.lo) as u64 | ((mc.l_extra as u64) << l_slot.k); + lmd_sink.push(raw, l_slot.k + L_EXTRA_BITS[mc.l_sym]); + l_st = l_slot.t as i32; + } + let l_state = l_st as u32; + let m_state = m_st as u32; + let d_state = d_st as u32; + let (lmd_payload, lmd_bits) = lmd_sink.finish(); + + // ── Pack frequency tables (L, M, D, LIT, bit-contiguous) ── + let mut freq_bits: Vec = Vec::new(); + for table in [&l_freq, &m_freq, &d_freq, &lit_freq] { + for &f in table.iter() { + let (code, len) = encode_freq_value(f); + for i in 0..len { + freq_bits.push(((code >> i) & 1) as u8); + } + } + } + let mut freq_bytes = vec![0u8; freq_bits.len().div_ceil(8)]; + for (i, &b) in freq_bits.iter().enumerate() { + freq_bytes[i / 8] |= b << (i % 8); + } + + // ── Assemble the header ── + // `header_size` is measured from the start of the block, i.e. it + // includes the 4-byte magic that the caller prepends: + // magic(4) + n_raw(4) + 3*u64(24) + freq = 4 + V2_HEADER_FIXED_BYTES + freq. + let header_size = (4 + V2_HEADER_FIXED_BYTES + freq_bytes.len()) as u32; + let n_raw_bytes = data.len() as u32; + let n_literals = n_lit as u32; + let n_matches = cmds.len() as u32; + let n_literal_payload_bytes = lit_payload.len() as u32; + let n_lmd_payload_bytes = lmd_payload.len() as u32; + + let mut w0 = 0u64; + w0 |= (n_literals as u64) & 0xFFFFF; + w0 |= ((n_literal_payload_bytes as u64) & 0xFFFFF) << 20; + w0 |= ((n_matches as u64) & 0xFFFFF) << 40; + w0 |= ((literal_bits as u64) & 0x7) << 60; + + let mut w1 = 0u64; + w1 |= (literal_state[0] as u64) & 0x3FF; + w1 |= ((literal_state[1] as u64) & 0x3FF) << 10; + w1 |= ((literal_state[2] as u64) & 0x3FF) << 20; + w1 |= ((literal_state[3] as u64) & 0x3FF) << 30; + w1 |= ((n_lmd_payload_bytes as u64) & 0xFFFFF) << 40; + w1 |= ((lmd_bits as u64) & 0x7) << 60; + + let mut w2 = 0u64; + w2 |= (header_size as u64) & 0xFFFFFFFF; + w2 |= ((l_state as u64) & 0x3FF) << 32; + w2 |= ((m_state as u64) & 0x3FF) << 42; + w2 |= ((d_state as u64) & 0x3FF) << 52; + + let mut out = + Vec::with_capacity(header_size as usize + lit_payload.len() + lmd_payload.len()); + out.extend_from_slice(&n_raw_bytes.to_le_bytes()); + out.extend_from_slice(&w0.to_le_bytes()); + out.extend_from_slice(&w1.to_le_bytes()); + out.extend_from_slice(&w2.to_le_bytes()); + out.extend_from_slice(&freq_bytes); + out.extend_from_slice(&lit_payload); + out.extend_from_slice(&lmd_payload); + out + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lzfse::decoder::Decoder; + use crate::traits::{RawDecoder, RawProgress}; + + /// Wrap a v2-encoded block (post-magic bytes) into a full `bvx2` block. + fn v2_block(data: &[u8]) -> Vec { + let mut b = Vec::new(); + b.extend_from_slice(b"bvx2"); + b.extend_from_slice(&encode_block(data)); + b + } + + /// Block-level round-trip: encode then `decode_block`. + fn rt_block(data: &[u8]) { + let block = encode_block(data); + let mut out = Vec::new(); + let consumed = decode_block(&block, &mut out, 1 << 20) + .unwrap_or_else(|e| panic!("decode_block failed on len {}: {e:?}", data.len())); + assert_eq!(consumed, block.len(), "did not consume whole block"); + assert_eq!(out, data, "round-trip mismatch (len {})", data.len()); + } + + /// Full-stream round-trip through the streaming `Decoder`. + fn rt_stream(blocks: &[&[u8]]) -> Vec { + let mut stream = Vec::new(); + for b in blocks { + stream.extend_from_slice(&v2_block(b)); + } + stream.extend_from_slice(b"bvx$"); + + let mut dec = Decoder::new(); + let mut out = Vec::new(); + let mut buf = vec![0u8; 512]; + let mut pos = 0usize; + loop { + let RawProgress { + consumed, + written, + done, + } = dec.raw_decode(&stream[pos..], &mut buf).unwrap(); + pos += consumed; + out.extend_from_slice(&buf[..written]); + if done { + break; + } + if consumed == 0 && written == 0 { + // Need to finish. + let RawProgress { written, done, .. } = dec.raw_finish(&mut buf).unwrap(); + out.extend_from_slice(&buf[..written]); + if done || written == 0 { + break; + } + } + } + out + } + + #[test] + fn block_roundtrip_empty() { + rt_block(b""); + } + + #[test] + fn block_roundtrip_small() { + rt_block(b"a"); + rt_block(b"ab"); + rt_block(b"abc"); + rt_block(b"hello world"); + } + + #[test] + fn block_roundtrip_text() { + let text = b"the quick brown fox jumps over the lazy dog. \ + the quick brown fox jumps over the lazy dog. \ + pack my box with five dozen liquor jugs."; + rt_block(text); + } + + #[test] + fn block_roundtrip_repetitive() { + rt_block(&vec![b'A'; 1000]); + rt_block(&vec![0u8; 5000]); + let mut v = Vec::new(); + for _ in 0..500 { + v.extend_from_slice(b"abcd"); + } + rt_block(&v); + } + + #[test] + fn block_roundtrip_random() { + // Deterministic LCG "random" bytes (incompressible-ish) of varied sizes. + let mut state = 0x1234_5678u32; + let mut next = || { + state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223); + (state >> 24) as u8 + }; + for &len in &[ + 0usize, 1, 7, 15, 16, 17, 63, 64, 100, 255, 256, 1024, 4096, 9001, + ] { + let data: Vec = (0..len).map(|_| next()).collect(); + rt_block(&data); + } + } + + #[test] + fn block_roundtrip_mixed_structure() { + // Repetitive prefix + random tail + repetitive again exercises both + // literal-heavy and match-heavy command streams. + let mut data = vec![b'x'; 300]; + let mut state = 0x9E37_79B9u32; + for _ in 0..300 { + state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223); + data.push((state >> 23) as u8); + } + data.extend_from_slice(&vec![b'y'; 400]); + data.extend_from_slice(b"the the the the the the the the the the the the"); + rt_block(&data); + } + + #[test] + fn block_roundtrip_all_byte_values() { + // Every byte value present forces a full 256-symbol literal table. + let mut data = Vec::new(); + for _ in 0..8 { + for b in 0u16..256 { + data.push(b as u8); + } + } + rt_block(&data); + } + + #[test] + fn block_roundtrip_long_match() { + // A long run produces large match lengths (exercises M extra bits). + let data = vec![b'Q'; 50_000]; + rt_block(&data); + } + + #[test] + fn block_roundtrip_far_distance() { + // Distinct head, large gap, then a copy of the head — exercises large + // D extra bits. + let mut data: Vec = b"UNIQUEPREFIXHERE0123456789".to_vec(); + data.extend(core::iter::repeat_n(b'.', 70_000)); + data.extend_from_slice(b"UNIQUEPREFIXHERE0123456789"); + rt_block(&data); + } + + #[test] + fn stream_roundtrip_single_block() { + let data = b"the quick brown fox jumps over the lazy dog".repeat(20); + let out = rt_stream(&[&data]); + assert_eq!(out, data); + } + + #[test] + fn stream_roundtrip_multi_block() { + let a = b"first block contents, repeated repeated repeated".repeat(10); + let b = vec![b'Z'; 2000]; + let c = b"third".repeat(100); + let out = rt_stream(&[&a, &b, &c]); + let mut want = Vec::new(); + want.extend_from_slice(&a); + want.extend_from_slice(&b); + want.extend_from_slice(&c); + assert_eq!(out, want); + } + + #[test] + fn stream_roundtrip_empty_block() { + let out = rt_stream(&[b""]); + assert_eq!(out, b""); + } + + #[test] + fn corrupt_header_size_rejected() { + let mut block = encode_block(b"hello world this is a test of corruption"); + // header_size lives in packed_fields[2] low 32 bits, at byte offset + // 4 + 8 + 8 = 20 within the post-magic block. Set it absurdly large. + block[20] = 0xFF; + block[21] = 0xFF; + block[22] = 0xFF; + block[23] = 0xFF; + let mut out = Vec::new(); + assert!(decode_block(&block, &mut out, 1 << 20).is_err()); + } + + #[test] + fn truncated_payload_rejected() { + let block = encode_block(&vec![b'k'; 2000]); + // Drop the last few payload bytes. + let truncated = &block[..block.len() - 3]; + let mut out = Vec::new(); + assert!(decode_block(truncated, &mut out, 1 << 20).is_err()); + } + + #[test] + fn garbage_freq_does_not_panic() { + // A short, mostly-zero block: parse_header should reject (freq sums + // won't match) rather than panic. + let mut block = vec![0u8; 64]; + // Give n_raw a small value and a plausible header_size. + block[0..4].copy_from_slice(&8u32.to_le_bytes()); + // header_size = 32 (magic + fixed, no freq bytes) — freq tables empty + // → sums won't match the FSE state counts. + let w2 = 32u64; + block[20..28].copy_from_slice(&w2.to_le_bytes()); + let mut out = Vec::new(); + let _ = decode_block(&block, &mut out, 1 << 20); + } + + #[test] + fn stream_roundtrip_one_byte_at_a_time() { + // Feed a v2 block + EOS one byte at a time, exercising the streaming + // decoder's reassembly of the variable-length v2 header and payload. + let data = b"streaming reassembly test streaming reassembly test".repeat(8); + let mut stream = v2_block(&data); + stream.extend_from_slice(b"bvx$"); + + let mut dec = Decoder::new(); + let mut out = Vec::new(); + let mut buf = vec![0u8; 64]; + let mut pos = 0usize; + while pos < stream.len() { + let end = (pos + 1).min(stream.len()); + let RawProgress { + consumed, + written, + done, + } = dec.raw_decode(&stream[pos..end], &mut buf).unwrap(); + pos += consumed; + out.extend_from_slice(&buf[..written]); + if done { + break; + } + } + loop { + let RawProgress { written, done, .. } = dec.raw_finish(&mut buf).unwrap(); + out.extend_from_slice(&buf[..written]); + if done || written == 0 { + break; + } + } + assert_eq!(out, data); + } + + /// A hand-frozen `bvx2` stream, independent of this crate's encoder. + /// + /// It is a literals-only block (`n_matches == 0`) whose **literal + /// frequency table is deliberately non-dyadic**: the high-frequency + /// literal symbol `0x3d` (`=`) has frequency 1000 and the rare symbol + /// `0x3e` (`>`) has 24 (sum 1024 = LIT_STATES). Neither is a power of two, + /// so decoding correctly *requires* the general k/k-1 FSE table + /// construction — a single-`k` decoder cannot build a table that tiles + /// `[0,1024)` for these frequencies and mis-decodes the literals. + /// + /// The bytes (post-magic header + freq tables + literal FSE payload, then + /// the `bvx$` EOS) were generated once and frozen here; this test does not + /// call the encoder, so it guards against the encoder and decoder sharing + /// the same table-construction bug. The four literals decode to the exact + /// ASCII string `=>==`. + const HAND_VECTOR: &[u8] = &[ + 0x62, 0x76, 0x78, 0x32, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, + 0x50, 0x48, 0x40, 0x8f, 0x04, 0x12, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x8f, 0x02, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x8f, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc0, 0x43, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x76, 0x78, 0x24, + ]; + + #[test] + fn hand_vector_non_dyadic_decodes_to_known_string() { + // Decode the frozen, encoder-independent vector through the public + // streaming decoder and assert the exact output. + let mut dec = Decoder::new(); + let mut out = Vec::new(); + let mut buf = vec![0u8; 64]; + let mut pos = 0usize; + loop { + let RawProgress { + consumed, + written, + done, + } = dec.raw_decode(&HAND_VECTOR[pos..], &mut buf).unwrap(); + pos += consumed; + out.extend_from_slice(&buf[..written]); + if done { + break; + } + if consumed == 0 && written == 0 { + let RawProgress { written, done, .. } = dec.raw_finish(&mut buf).unwrap(); + out.extend_from_slice(&buf[..written]); + if done || written == 0 { + break; + } + } + } + assert_eq!(out, b"=>==", "hand vector decoded to {out:?}"); + } + + #[test] + fn normalize_general_produces_non_dyadic_freqs() { + // A skewed histogram must normalize to general (non-power-of-two) + // frequencies that sum exactly to n_states and give every present + // symbol at least 1. A regression that snapped to powers of two would + // be visible here. + use super::test_encoder::normalize_general; + let hist = [1000u32, 3, 17, 250, 0, 1]; + let freq = normalize_general(&hist, 1024); + assert_eq!(freq.iter().map(|&f| f as u32).sum::(), 1024); + // Absent symbol stays 0; present symbols are >= 1. + assert_eq!(freq[4], 0); + for (i, &h) in hist.iter().enumerate() { + if h > 0 { + assert!(freq[i] >= 1, "present symbol {i} dropped to 0"); + } + } + // At least one present symbol is genuinely non-power-of-two. + assert!( + freq.iter().any(|&f| f > 0 && !f.is_power_of_two()), + "expected a non-power-of-two frequency, got {freq:?}" + ); + } + + /// Round-trip a payload whose literal histogram is deliberately skewed so + /// the normalized FSE frequencies are non-dyadic. A regression to a + /// single-`k` decode table would corrupt the result. + fn rt_assert_non_dyadic_lit(data: &[u8]) { + use super::test_encoder::normalize_general; + // Recompute the literal histogram the way encode_block does, but only + // over true literals would require the parser; instead assert on a + // raw-byte histogram, which upper-bounds the literal alphabet and is a + // good proxy for "this input yields a non-dyadic literal table". + let mut hist = vec![0u32; 256]; + for &b in data { + hist[b as usize] += 1; + } + let freq = normalize_general(&hist, LIT_STATES); + assert!( + freq.iter().any(|&f| f > 0 && !f.is_power_of_two()), + "test input does not exercise a non-dyadic table" + ); + rt_block(data); + } + + #[test] + fn block_roundtrip_non_dyadic_literals() { + // Skewed-but-not-dyadic byte distributions (counts chosen so the + // 1024-state normalization lands on non-powers-of-two). + let mut data = Vec::new(); + data.extend(core::iter::repeat_n(b'a', 1000)); + data.extend(core::iter::repeat_n(b'b', 333)); + data.extend(core::iter::repeat_n(b'c', 77)); + data.extend(core::iter::repeat_n(b'd', 7)); + data.push(b'e'); // a singleton + rt_assert_non_dyadic_lit(&data); + + // A 3-symbol skew (~70/29/1 split). + let mut d2 = Vec::new(); + d2.extend(core::iter::repeat_n(b'x', 700)); + d2.extend(core::iter::repeat_n(b'y', 290)); + d2.extend(core::iter::repeat_n(b'z', 11)); + rt_assert_non_dyadic_lit(&d2); + } + + #[test] + fn block_roundtrip_small_match_counts() { + // Few, varied matches produce small non-power-of-two L/M/D histograms + // (e.g. a single match → a singleton frequency in each LMD table). + // Each must round-trip through the general k/k-1 LMD tables. + let cases: &[&[u8]] = &[ + b"abcabc", // one short match + b"abcdeabcde_xyzxyz", // two matches, different lens + b"AAAABBBBCCCCAAAABBBBCCCC", // a couple of medium matches + b"the cat sat on the mat the cat", // overlapping repeats + ]; + for c in cases { + rt_block(c); + } + } + + #[test] + fn fuzz_roundtrip_many_sizes() { + // Broad deterministic fuzz: many sizes, several content shapes. Each + // must round-trip exactly through decode_block(encode_block(x)). + let mut state = 0xDEAD_BEEFu32; + let mut rng = || { + state = state.wrapping_mul(1_103_515_245).wrapping_add(12_345); + state + }; + for len in 0..400usize { + // Shape 0: random bytes. Shape 1: small alphabet (matches galore). + // Shape 2: mostly-constant with sparse noise. + for shape in 0..3 { + let data: Vec = (0..len) + .map(|_| match shape { + 0 => (rng() >> 24) as u8, + 1 => b"abcde"[(rng() as usize) % 5], + _ => { + if rng() % 16 == 0 { + (rng() >> 24) as u8 + } else { + b'=' + } + } + }) + .collect(); + rt_block(&data); + } + } + } } diff --git a/src/lzfse/mod.rs b/src/lzfse/mod.rs index 84a66fd..b377e39 100644 --- a/src/lzfse/mod.rs +++ b/src/lzfse/mod.rs @@ -29,11 +29,18 @@ //! StreamEnd. //! - `bvx1` blocks: not commonly emitted by modern encoders; this build //! returns [`Error::Unsupported`]. -//! - `bvx2` (LZFSE v2 compressed) blocks: the FSE table-construction -//! primitives are present (see `fse.rs`), but the full v2 block decoder -//! is gated off in this release. `bvx2` blocks return -//! [`Error::Unsupported`]; see the internal `lzfse_v2` module for the -//! layout reference and the gap analysis. +//! - `bvx2` (LZFSE v2 compressed) blocks: **decoder implemented** — the core +//! LZFSE block type (LZ77 commands entropy-coded with Finite State +//! Entropy). The FSE table construction matches Apple's general +//! `fse_init_decoder_table` (k/k-1 split), so arbitrary per-symbol +//! frequencies decode, not only power-of-two normalizations. Validated by +//! round-trip against this crate's own spec-conformant general-frequency v2 +//! encoder, including deliberately non-dyadic distributions and a +//! hand-frozen non-dyadic block (no Apple reference fixtures are available +//! in this environment, so Apple-interop is best-effort but follows the +//! documented wire format and real table-construction algorithm). See the +//! internal `lzfse_v2` module for the layout reference and +//! validation/interop notes. //! //! Real LZFSE files produced by Apple's encoders mix these block types //! freely: small payloads land in `bvxn`, large ones in `bvx2`, and short diff --git a/src/lzma2/mod.rs b/src/lzma2/mod.rs index e7b41ab..41eb5ba 100644 --- a/src/lzma2/mod.rs +++ b/src/lzma2/mod.rs @@ -54,6 +54,27 @@ //! machinery used by [`crate::xz`] (the shared `LzmaCore`); this module only //! adds the raw chunk framing and self-termination handling. There is no //! re-implementation of LZMA here. +//! +//! ## Encoder +//! +//! The [`Encoder`] produces the same raw LZMA2 chunk stream the decoder +//! consumes, reusing the shared `encode_lzma_chunk` range coder from +//! [`crate::xz`]'s internals — no LZMA re-implementation. Every chunk is a +//! full-reset chunk (control byte `0xE0` for compressed, `0x01` for +//! uncompressed) so each chunk is independently decodable; an uncompressed +//! chunk is emitted as a fallback whenever compression would expand the data. +//! The stream is terminated by a single `0x00` end-marker byte. +//! +//! ### Dictionary-size contract +//! +//! A raw LZMA2 stream carries **no** dictionary size in band — that value is +//! the 7z coder property the decoder receives out of band (via +//! [`DecoderConfig::with_dict_prop`] / [`DecoderConfig::with_dict_size`]). +//! The encoder bounds its match distances by a fixed 4 MiB dictionary (the +//! [`crate::xz`] default), so a decoder built with the default config — which +//! also uses a 4 MiB window — round-trips the output exactly. If you transport +//! this stream inside a 7z container, advertise a dictionary size of at least +//! 4 MiB in the coder property. #![cfg_attr(docsrs, doc(cfg(feature = "lzma2")))] @@ -74,12 +95,13 @@ const MAX_DICT: usize = 128 * 1024 * 1024; /// LZMA2 default and the size [`crate::xz`] uses). const DEFAULT_DICT: usize = 4 * 1024 * 1024; -/// Raw LZMA2 stream codec (7-Zip coder id 21). Decode-only. +/// Raw LZMA2 stream codec (7-Zip coder id 21). /// -/// The encoder is a permanent [`Error::Unsupported`] stub: 7z LZMA2 framing -/// is produced by the [`crate::xz`] encoder path, and there is no need for a -/// standalone raw LZMA2 encoder. See the [module docs](self) for the stream -/// shape. +/// Both directions are implemented: the [`Encoder`] emits a raw LZMA2 chunk +/// stream (full-reset chunks + `0x00` end marker) bounded by a 4 MiB +/// dictionary, and the [`Decoder`] consumes that stream. The dictionary size +/// is out of band (see the [module docs](self#dictionary-size-contract)); a +/// default-config decoder round-trips the default-config encoder's output. #[derive(Debug, Clone, Copy, Default)] pub struct Lzma2; @@ -138,7 +160,7 @@ impl Algorithm for Lzma2 { type EncoderConfig = (); type DecoderConfig = DecoderConfig; fn encoder_with(_: ()) -> Encoder { - Encoder + Encoder::new() } fn decoder_with(cfg: DecoderConfig) -> Decoder { Decoder::new(cfg) @@ -157,24 +179,282 @@ fn resolve_dict_size(cfg: &DecoderConfig) -> Result { Ok(raw.clamp(4096, MAX_DICT)) } -// ─── encoder stub ───────────────────────────────────────────────────────── +// ─── encoder ────────────────────────────────────────────────────────────── + +use crate::lzma2_internal::lzma2_encoder::{EncoderParams, LZMA2_PROPS_BYTE, encode_lzma_chunk}; -/// Raw LZMA2 encoder stub: permanently returns [`Error::Unsupported`]. +/// Dictionary size (in bytes) the encoder advertises to the LZMA chunk +/// coder as the match-distance ceiling. Fixed at 4 MiB — the [`crate::xz`] +/// default — so a default-config [`Decoder`] (also 4 MiB) round-trips. +const ENC_DICT_SIZE: u32 = DEFAULT_DICT as u32; + +/// Default compression level (mirrors xz-utils' and [`crate::xz`]'s default). +const ENC_DEFAULT_LEVEL: u8 = 6; + +/// Maximum uncompressed bytes buffered per LZMA2 chunk. Capped at 65_536 so +/// both the uncompressed-chunk 16-bit size field and the compressed-chunk +/// size fields stay in range, matching the [`crate::xz`] encoder's cap and +/// bounding peak working-buffer memory. +const ENC_CHUNK_MAX: usize = 65_536; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum EncPhase { + /// Buffering input; flushing a chunk when the buffer fills. + Body, + /// Draining a staged chunk from `pending`, then back to `Body`. + DrainPending, + /// (`finish` only) Flush any partial buffered chunk, then stage the + /// `0x00` end marker. + Finishing, + /// (`finish` only) Draining the `0x00` end marker from `pending`. + DrainEnd, + /// All chunks plus the `0x00` end marker have been drained. + Done, +} + +/// Raw LZMA2 encoder. /// -/// Lets the crate auto-derive the public [`Encoder`](crate::Encoder) trait -/// while making encode attempts fail cleanly. LZMA2 output is produced via -/// the [`crate::xz`] encoder. -#[derive(Debug, Clone, Copy, Default)] -pub struct Encoder; +/// Emits the raw LZMA2 chunk stream consumed by [`Decoder`] — a sequence of +/// full-reset chunks terminated by a single `0x00` end marker. There is **no** +/// `.xz` container (no stream magic, block header, index, or CRC); for that, +/// use [`crate::xz`]. Match distances are bounded by a fixed 4 MiB dictionary +/// that the decoder must be told about out of band (see the +/// [module docs](self#dictionary-size-contract)). +/// +/// Each chunk is independently decodable: the encoder always full-resets +/// (dict + props + state) at the chunk boundary, emitting a compressed chunk +/// (control `0xE0`) when that shrinks the data and an uncompressed chunk +/// (control `0x01`) otherwise. +pub struct Encoder { + phase: EncPhase, + /// Staged bytes for the current chunk (or end marker), drained to the + /// caller from `pending[pending_idx..]`. + pending: Vec, + pending_idx: usize, + /// Input accumulated for the next chunk; flushed at `ENC_CHUNK_MAX` or on + /// `finish`. + in_buf: Vec, + /// Level-derived match-finder tuning; preserved across `reset`. + params: EncoderParams, +} + +impl Default for Encoder { + fn default() -> Self { + Self::new() + } +} + +impl Encoder { + /// Build an encoder at the default compression level (6). + pub fn new() -> Self { + Self { + phase: EncPhase::Body, + pending: Vec::new(), + pending_idx: 0, + in_buf: Vec::new(), + params: EncoderParams::from_level(ENC_DEFAULT_LEVEL), + } + } + + /// Push staged bytes from `pending[pending_idx..]` into `output`. Returns + /// true once the buffer is fully drained. + fn drain_pending(&mut self, output: &mut [u8], written: &mut usize) -> bool { + while self.pending_idx < self.pending.len() && *written < output.len() { + output[*written] = self.pending[self.pending_idx]; + *written += 1; + self.pending_idx += 1; + } + if self.pending_idx >= self.pending.len() { + self.pending.clear(); + self.pending_idx = 0; + true + } else { + false + } + } + + /// Stage one LZMA2 chunk for `data` (`1..=ENC_CHUNK_MAX` bytes), choosing + /// a compressed chunk when it shrinks the data and an uncompressed + /// fallback otherwise. + fn stage_chunk(&mut self, data: &[u8]) { + debug_assert!(!data.is_empty() && data.len() <= ENC_CHUNK_MAX); + let compressed = encode_lzma_chunk(data, ENC_DICT_SIZE, self.params); + // A compressed chunk is only worth emitting when its range-coded body + // is both smaller than the input and fits the 16-bit (+1) comp-size + // field. Otherwise the uncompressed chunk is strictly smaller. + let use_compressed = + !compressed.is_empty() && compressed.len() <= 65_536 && compressed.len() < data.len(); + if use_compressed { + self.stage_compressed_chunk(data, &compressed); + } else { + self.stage_uncompressed_chunk(data); + } + } + + /// Stage a full-reset compressed chunk: control `0xE0` (compressed, with + /// dict, props, and state all reset; top 5 bits of `uncomp_size-1`), a + /// 2-byte `uncomp_size-1` BE remainder, a 2-byte `comp_size-1` BE, the + /// 1-byte LZMA props (present because we full-reset), then the + /// range-coded body. + fn stage_compressed_chunk(&mut self, data: &[u8], compressed: &[u8]) { + debug_assert!(!data.is_empty() && data.len() <= ENC_CHUNK_MAX); + debug_assert!(!compressed.is_empty() && compressed.len() <= 65_536); + + let uncomp_m1 = (data.len() - 1) as u32; // 0..=65535 with our cap + // Top 5 bits of (uncomp_size - 1) live in the control byte; with a + // 65_536 cap they are always zero, yielding exactly 0xE0. + let control: u8 = 0xE0 | ((uncomp_m1 >> 16) & 0x1F) as u8; + let comp_m1 = (compressed.len() - 1) as u16; + + self.pending.reserve(6 + compressed.len()); + self.pending.push(control); + self.pending.push(((uncomp_m1 >> 8) & 0xFF) as u8); + self.pending.push((uncomp_m1 & 0xFF) as u8); + self.pending.push((comp_m1 >> 8) as u8); + self.pending.push((comp_m1 & 0xFF) as u8); + self.pending.push(LZMA2_PROPS_BYTE); + self.pending.extend_from_slice(compressed); + self.pending_idx = 0; + } + + /// Stage an uncompressed chunk: control `0x01` (dict reset), 2-byte + /// `size-1` BE, then the raw bytes. + fn stage_uncompressed_chunk(&mut self, data: &[u8]) { + debug_assert!(!data.is_empty() && data.len() <= ENC_CHUNK_MAX); + let size_m1 = (data.len() - 1) as u16; + self.pending.reserve(3 + data.len()); + self.pending.push(0x01); + self.pending.push((size_m1 >> 8) as u8); + self.pending.push((size_m1 & 0xFF) as u8); + self.pending.extend_from_slice(data); + self.pending_idx = 0; + } +} impl RawEncoder for Encoder { - fn raw_encode(&mut self, _input: &[u8], _output: &mut [u8]) -> Result { - Err(Error::Unsupported) + fn raw_encode(&mut self, input: &[u8], output: &mut [u8]) -> Result { + let mut consumed = 0usize; + let mut written = 0usize; + + loop { + match self.phase { + EncPhase::Body => { + while consumed < input.len() && self.in_buf.len() < ENC_CHUNK_MAX { + let take = (ENC_CHUNK_MAX - self.in_buf.len()).min(input.len() - consumed); + self.in_buf + .extend_from_slice(&input[consumed..consumed + take]); + consumed += take; + } + if self.in_buf.len() == ENC_CHUNK_MAX { + let data = core::mem::take(&mut self.in_buf); + self.stage_chunk(&data); + self.phase = EncPhase::DrainPending; + } else { + return Ok(RawProgress { + consumed, + written, + done: false, + }); + } + } + EncPhase::DrainPending => { + if self.drain_pending(output, &mut written) { + self.phase = EncPhase::Body; + } else { + return Ok(RawProgress { + consumed, + written, + done: false, + }); + } + } + // `encode` never advances into the finish-only phases. + EncPhase::Finishing | EncPhase::DrainEnd | EncPhase::Done => { + return Ok(RawProgress { + consumed, + written, + done: false, + }); + } + } + } } - fn raw_finish(&mut self, _output: &mut [u8]) -> Result { - Err(Error::Unsupported) + + fn raw_finish(&mut self, output: &mut [u8]) -> Result { + let mut written = 0usize; + + // `encode` leaves the encoder in `Body`/`DrainPending`; the first + // `finish` call drives it through `Finishing` → `DrainEnd` → `Done`. + if self.phase == EncPhase::Body || self.phase == EncPhase::DrainPending { + // A `DrainPending` left over from `encode` still has chunk bytes + // staged; drain those before flushing the tail. + self.phase = EncPhase::Finishing; + } + + loop { + match self.phase { + EncPhase::Finishing => { + if !self.pending.is_empty() { + // Drain a chunk staged during `encode` first. + if !self.drain_pending(output, &mut written) { + return Ok(RawProgress { + consumed: 0, + written, + done: false, + }); + } + } + if !self.in_buf.is_empty() { + let data = core::mem::take(&mut self.in_buf); + self.stage_chunk(&data); + // Stay in `Finishing`; the loop drains this chunk then + // re-checks the (now empty) buffer. + } else { + // Buffer empty and any staged chunk drained: emit the + // single 0x00 end marker. + self.pending.push(0x00); + self.pending_idx = 0; + self.phase = EncPhase::DrainEnd; + } + } + EncPhase::DrainEnd => { + if self.drain_pending(output, &mut written) { + self.phase = EncPhase::Done; + return Ok(RawProgress { + consumed: 0, + written, + done: true, + }); + } + return Ok(RawProgress { + consumed: 0, + written, + done: false, + }); + } + EncPhase::Done => { + return Ok(RawProgress { + consumed: 0, + written, + done: true, + }); + } + // Unreachable: normalized to `Finishing` above. + EncPhase::Body | EncPhase::DrainPending => { + self.phase = EncPhase::Finishing; + } + } + } + } + + fn raw_reset(&mut self) { + let params = self.params; + self.phase = EncPhase::Body; + self.pending.clear(); + self.pending_idx = 0; + self.in_buf.clear(); + self.params = params; } - fn raw_reset(&mut self) {} } // ─── decoder ─────────────────────────────────────────────────────────────── @@ -796,4 +1076,134 @@ mod tests { assert_eq!(st2, Status::StreamEnd); assert_eq!(&out[..p2.written], &data[..]); } + + // ── encoder tests ───────────────────────────────────────────────────── + + use crate::traits::Encoder as _; + + /// Encode `data` with the raw LZMA2 [`Encoder`], driving the streaming + /// API with the given output-buffer size to stress phase boundaries. + fn encode_all(data: &[u8], out_chunk: usize) -> Vec { + let mut enc = Lzma2::encoder_with(()); + let mut stream = Vec::new(); + let mut obuf = vec![0u8; out_chunk]; + let mut consumed = 0; + loop { + let (p, st) = enc.encode(&data[consumed..], &mut obuf).unwrap(); + stream.extend_from_slice(&obuf[..p.written]); + consumed += p.consumed; + match st { + Status::InputEmpty => break, + Status::OutputFull => {} + Status::StreamEnd => unreachable!("encode never ends the stream"), + } + } + loop { + let (p, st) = enc.finish(&mut obuf).unwrap(); + stream.extend_from_slice(&obuf[..p.written]); + if st == Status::StreamEnd { + break; + } + } + stream + } + + /// Encode then decode `data`, asserting a byte-identical round-trip both + /// in bulk and one byte at a time. + fn enc_roundtrip(data: &[u8]) { + for out_chunk in [4usize, 64, 4096, 1 << 17] { + let stream = encode_all(data, out_chunk); + // Last byte of a valid stream is always the 0x00 end marker. + assert_eq!(stream.last().copied(), Some(0u8), "missing end marker"); + let got = decode_all(&stream, data.len()).expect("decode_all"); + assert_eq!(got, data, "bulk decode mismatch (out_chunk={out_chunk})"); + } + // Stable framing → byte-streaming decode through every phase boundary. + let stream = encode_all(data, 1 << 17); + decode_byte_streaming(&stream, data); + } + + #[test] + fn enc_empty() { + let stream = encode_all(&[], 16); + assert_eq!(stream, vec![0x00]); + assert!(decode_all(&stream, 0).unwrap().is_empty()); + } + + #[test] + fn enc_one_byte() { + enc_roundtrip(b"Z"); + } + + #[test] + fn enc_small_text() { + enc_roundtrip(b"hello hello hello world the quick brown fox hello hello"); + } + + #[test] + fn enc_highly_compressible() { + // Zeros: forces the compressed-chunk path; ratio must be large. + let data = vec![0u8; 200 * 1024]; + let stream = encode_all(&data, 1 << 17); + assert!( + stream.len() < data.len() / 4, + "zeros should compress hard, got {} from {}", + stream.len(), + data.len() + ); + enc_roundtrip(&data); + } + + #[test] + fn enc_multi_chunk() { + // > one 64 KiB chunk: several chunks plus the end marker. + let data: Vec = (0u32..200_000) + .map(|i| (i.wrapping_mul(31) >> 3) as u8) + .collect(); + enc_roundtrip(&data); + } + + #[test] + fn enc_incompressible_falls_back() { + // A pseudo-random, incompressible buffer forces uncompressed-chunk + // fallback (control 0x01). Verify at least one such chunk appears. + let mut data = vec![0u8; 4096]; + let mut x = 0x1234_5678u32; + for b in data.iter_mut() { + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + *b = (x >> 24) as u8; + } + let stream = encode_all(&data, 1 << 17); + assert_eq!(stream[0], 0x01, "expected uncompressed fallback chunk"); + enc_roundtrip(&data); + } + + #[test] + fn enc_reset_reuses_encoder() { + let data = b"reusable encoder content content content".to_vec(); + let s1 = encode_all(&data, 1 << 17); + let mut enc = Lzma2::encoder_with(()); + let mut obuf = vec![0u8; 1 << 17]; + let mut produce = |enc: &mut Encoder| { + let mut out = Vec::new(); + let (p, _) = enc.encode(&data, &mut obuf).unwrap(); + out.extend_from_slice(&obuf[..p.written]); + loop { + let (p, st) = enc.finish(&mut obuf).unwrap(); + out.extend_from_slice(&obuf[..p.written]); + if st == Status::StreamEnd { + break; + } + } + out + }; + let a = produce(&mut enc); + enc.reset(); + let b = produce(&mut enc); + assert_eq!(a, b); + assert_eq!(a, s1, "reset output diverged from a fresh encoder"); + assert_eq!(decode_all(&a, data.len()).unwrap(), data); + } } diff --git a/src/lzma2_internal/mod.rs b/src/lzma2_internal/mod.rs index f291ea4..fe253a6 100644 --- a/src/lzma2_internal/mod.rs +++ b/src/lzma2_internal/mod.rs @@ -8,8 +8,9 @@ pub(crate) mod lzma2_decoder; -// The LZMA payload *encoder* is only needed by the `.xz` container encoder -// and by round-trip tests; a raw `lzma2`-only build (decode-only) would -// otherwise carry it as dead code. -#[cfg(any(feature = "xz", test))] +// The LZMA payload *encoder* backs both the `.xz` container encoder and the +// raw LZMA2 encoder ([`crate::lzma2::Encoder`]); it is also exercised by +// round-trip tests. A build with neither `xz` nor `lzma2` would otherwise +// carry it as dead code. +#[cfg(any(feature = "xz", feature = "lzma2", test))] pub(crate) mod lzma2_encoder; diff --git a/tests/lzfse.rs b/tests/lzfse.rs index d5228e4..6e56b7a 100644 --- a/tests/lzfse.rs +++ b/tests/lzfse.rs @@ -342,25 +342,30 @@ fn lzvn_one_byte_at_a_time() { assert_eq!(out, HELLO_WORLD); } -// ─── bvx2 (LZFSE v2) is documented Unsupported in this build ───────────── +// ─── bvx2 (LZFSE v2) is now decoded ────────────────────────────────────── +// +// The bvx2 decoder itself is validated by round-trip against this crate's +// own spec-conformant v2 encoder in `src/lzfse/lzfse_v2.rs` (in-crate unit +// tests, which can reach the `#[cfg(test)]` encoder helper). From the public +// integration surface we only assert that a *malformed* bvx2 header is +// rejected cleanly (no panic, no Unsupported) — proving the v2 arm is wired +// in and reaches real header parsing rather than the old Unsupported stub. #[test] -fn bvx2_block_returns_unsupported() { - // Construct a stream that starts with bvx2 magic. The decoder should - // read the magic, peek at the v2 header (need 28 bytes after magic - // for the fixed-size portion), and then return Unsupported. +fn bvx2_malformed_header_rejected_without_panic() { + // A bvx2 magic followed by 32 zero header bytes is not a valid v2 block + // (the frequency tables would not sum to the FSE state counts). The + // decoder must reject it as Corrupt rather than returning Unsupported or + // panicking. let mut stream = b"bvx2".to_vec(); - // 28 bytes of arbitrary header bytes — content doesn't matter because - // we return Unsupported before interpreting them. stream.extend_from_slice(&[0u8; 32]); let mut dec = Decoder::new(); let mut buf = [0u8; 256]; - // Feed all input. Expect Err(Unsupported) at some point. let r = dec.decode(&stream, &mut buf); assert!( - matches!(r, Err(Error::Unsupported)), - "expected Unsupported on bvx2 block, got {:?}", + matches!(r, Err(Error::Corrupt) | Err(Error::UnexpectedEnd)), + "expected Corrupt/UnexpectedEnd on malformed bvx2 block, got {:?}", r ); } diff --git a/tests/lzma2.rs b/tests/lzma2.rs index 78f3e49..fa60d47 100644 --- a/tests/lzma2.rs +++ b/tests/lzma2.rs @@ -1,11 +1,12 @@ -//! Public-API tests for the raw LZMA2 decoder (7-Zip coder id 21). +//! Public-API tests for the raw LZMA2 codec (7-Zip coder id 21). //! -//! The crate-private LZMA payload encoder is exercised by the in-module -//! unit tests (`src/lzma2/mod.rs`), which cover compressed multi-chunk -//! round-trips, dict resets, and 1-byte streaming. Here we validate the -//! public surface: decoding hand-framed *uncompressed* LZMA2 chunks (which -//! need no encoder), self-termination on the `0x00` control byte, the -//! factory wiring, and DoS hygiene on crafted input. +//! The in-module unit tests (`src/lzma2/mod.rs`) cover encoder/decoder +//! round-trips, dict resets, fallback, and 1-byte streaming. Here we +//! validate the public surface: encoder→decoder round-trips through the +//! `Lzma2` public types, decoding hand-framed *uncompressed* LZMA2 chunks, +//! self-termination on the `0x00` control byte, cross-validation against the +//! shared `xz` chunk codec, the factory wiring, and DoS hygiene on crafted +//! input. #![cfg(feature = "lzma2")] @@ -127,8 +128,205 @@ fn factory_wiring() { assert!(compcol::factory::names().contains(&"lzma2")); assert_eq!(compcol::factory::extension("lzma2"), Some("lzma2")); assert!(compcol::factory::decoder_by_name("lzma2").is_some()); - // Encoder resolves but is an Unsupported stub. + // The encoder is now a real working encoder: a factory-built encoder + // round-trips through a factory-built decoder. + let data = b"factory-routed lzma2 round-trip round-trip round-trip"; let mut enc = compcol::factory::encoder_by_name("lzma2").expect("encoder present"); - let mut out = [0u8; 16]; - assert_eq!(enc.encode(b"x", &mut out), Err(Error::Unsupported)); + let mut stream = Vec::new(); + let mut obuf = [0u8; 256]; + let (p, _) = enc.encode(data, &mut obuf).unwrap(); + stream.extend_from_slice(&obuf[..p.written]); + loop { + let (p, st) = enc.finish(&mut obuf).unwrap(); + stream.extend_from_slice(&obuf[..p.written]); + if st == Status::StreamEnd { + break; + } + } + let got = decode_all(&stream, DecoderConfig::default(), data.len()).unwrap(); + assert_eq!(got, data); +} + +/// Encode `data` with the public raw LZMA2 [`Lzma2`] encoder, draining +/// `output` in `out_chunk`-sized slices to exercise the streaming API. +fn encode_all(data: &[u8], out_chunk: usize) -> Vec { + let mut enc = Lzma2::encoder_with(()); + let mut stream = Vec::new(); + let mut obuf = vec![0u8; out_chunk]; + let mut consumed = 0; + loop { + let (p, st) = enc.encode(&data[consumed..], &mut obuf).unwrap(); + stream.extend_from_slice(&obuf[..p.written]); + consumed += p.consumed; + match st { + Status::InputEmpty => break, + Status::OutputFull => {} + Status::StreamEnd => unreachable!(), + } + } + loop { + let (p, st) = enc.finish(&mut obuf).unwrap(); + stream.extend_from_slice(&obuf[..p.written]); + if st == Status::StreamEnd { + break; + } + } + stream +} + +#[test] +fn encoder_decoder_roundtrip_public() { + // Cover the required spread of input shapes through the public API. + let zeros = vec![0u8; 130 * 1024]; + let big: Vec = (0u32..150_000) + .map(|i| (i.wrapping_mul(2654435761) >> 19) as u8) + .collect(); + let mut rnd = vec![0u8; 8192]; + let mut x = 0x9e37_79b9u32; + for b in rnd.iter_mut() { + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + *b = (x >> 24) as u8; + } + let cases: Vec> = vec![ + Vec::new(), + b"q".to_vec(), + b"the quick brown fox jumps over the lazy dog".to_vec(), + zeros, + big, + rnd, + ]; + for data in &cases { + for out_chunk in [7usize, 1 << 16] { + let stream = encode_all(data, out_chunk); + let got = decode_all(&stream, DecoderConfig::default(), data.len()).unwrap(); + assert_eq!(&got, data, "len={} out_chunk={out_chunk}", data.len()); + } + } +} + +/// Cross-validate framing against the shared `xz` chunk codec: wrap the raw +/// LZMA2 stream this encoder emits inside a minimal `.xz` container and decode +/// it with the public `xz` decoder. Because the `xz` and `lzma2` paths share +/// `lzma2_decoder`, a successful decode proves our chunk framing is exactly +/// what `xz` consumes. We build the container around our own payload rather +/// than re-encoding with `xz`, so this exercises *our* bytes. +#[test] +#[cfg(feature = "xz")] +fn xz_cross_validates_framing() { + use compcol::xz::Xz; + + fn crc32(data: &[u8]) -> u32 { + let mut s = 0xFFFF_FFFFu32; + for &b in data { + s ^= b as u32; + for _ in 0..8 { + s = if s & 1 != 0 { + 0xEDB8_8320 ^ (s >> 1) + } else { + s >> 1 + }; + } + } + s ^ 0xFFFF_FFFF + } + fn varint(mut v: u64, out: &mut Vec) { + while v >= 0x80 { + out.push((v as u8 & 0x7F) | 0x80); + v >>= 7; + } + out.push(v as u8); + } + + // Data with both compressible and incompressible regions so the payload + // contains compressed (0xE0) and uncompressed (0x01) chunks. + let mut data = vec![0u8; 100 * 1024]; + let mut x = 0x1234_5678u32; + for (i, b) in data.iter_mut().enumerate() { + if i % 3 == 0 { + *b = 0; // compressible runs + } else { + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + *b = (x >> 24) as u8; + } + } + + // Our raw LZMA2 payload (chunks + 0x00 end marker), unchanged. + let payload = encode_all(&data, 1 << 16); + + // ── Stream Header: magic | flags(00,01=CRC32) | CRC32(flags) ── + let mut xz = vec![0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00, 0x00, 0x01]; + xz.extend_from_slice(&crc32(&[0x00, 0x01]).to_le_bytes()); + + // ── Block Header: size byte | flags | filter id | props size | dict + // flag (0x14 = 4 MiB) | pad to mult-of-4-minus-CRC | CRC32 ── + let mut bh = vec![0x02u8, 0x00, 0x21, 0x01, 0x14, 0x00, 0x00, 0x00]; + let bh_crc = crc32(&bh).to_le_bytes(); + bh.extend_from_slice(&bh_crc); + let block_header_len = bh.len() as u64; + xz.extend_from_slice(&bh); + + // ── Block payload + padding + Check(CRC32 of uncompressed data) ── + xz.extend_from_slice(&payload); + let compressed_size = payload.len() as u64; + let unpadded_no_pad = block_header_len + compressed_size + 4; + let pad = ((4 - (unpadded_no_pad % 4)) % 4) as usize; + xz.extend(core::iter::repeat_n(0u8, pad)); + xz.extend_from_slice(&crc32(&data).to_le_bytes()); + + // ── Index: 00 | numrec | (unpadded, uncompressed) | pad | CRC32 ── + let unpadded_size = block_header_len + compressed_size + 4; + let mut idx = vec![0x00u8]; + varint(1, &mut idx); + varint(unpadded_size, &mut idx); + varint(data.len() as u64, &mut idx); + while idx.len() % 4 != 0 { + idx.push(0x00); + } + let idx_crc = crc32(&idx).to_le_bytes(); + idx.extend_from_slice(&idx_crc); + let index_size = idx.len() as u32; + xz.extend_from_slice(&idx); + + // ── Stream Footer: CRC32(body) | backward_size | flags | magic ── + let mut footer_body = ((index_size / 4) - 1).to_le_bytes().to_vec(); + footer_body.push(0x00); + footer_body.push(0x01); + let f_crc = crc32(&footer_body).to_le_bytes(); + xz.extend_from_slice(&f_crc); + xz.extend_from_slice(&footer_body); + xz.extend_from_slice(&[0x59, 0x5A]); + + // Decode the whole thing through the public xz decoder. + let mut dec = Xz::decoder_with(()); + let mut out = vec![0u8; data.len() + 64]; + let mut consumed = 0; + let mut written = 0; + loop { + let (p, st) = dec.decode(&xz[consumed..], &mut out[written..]).unwrap(); + consumed += p.consumed; + written += p.written; + match st { + Status::StreamEnd => break, + Status::InputEmpty if consumed >= xz.len() => { + // Whole container consumed; finish surfaces the trailer end. + let (p, fst) = dec.finish(&mut out[written..]).unwrap(); + written += p.written; + assert_eq!(fst, Status::StreamEnd, "xz trailer not terminated"); + break; + } + _ => assert!( + !(p.consumed == 0 && p.written == 0), + "xz decoder stalled — framing mismatch" + ), + } + } + out.truncate(written); + assert_eq!( + out, data, + "xz cross-decode of our raw LZMA2 framing mismatched" + ); }