KarpelesLab · MagicalTux · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Fixed
+
+- *(cli)* `compcol -d` no longer truncates highly-compressible large inputs.
+  The streaming decode loop stopped once the compressed input was consumed,
+  leaving output a block-buffering decoder (notably bzip2) still held
+  internally — `finish` does not flush it, so `compcol -t bzip2 -d` cut output
+  at 64 KiB. A drain loop now pulls the decoder's buffered output before
+  finishing. (Library decoders were already correct; this was CLI-only.)
+
+### Added
+
+- *(brotli enc)* iterative, statistics-driven optimal LZ77 parse
+  (zopfli-style forward DP) at quality 9–11. The cost model is rebuilt from
+  the previous pass's command/literal/distance histograms each round;
+  candidate matches are precomputed once and shared across passes. Improves
+  the max-quality ratio on the 2.9 MB corpus from 707558 to 669632 bytes
+  (1.473 → 1.394 vs `brotli -q 11`) and q9 from 709198 to 680156, with
+  reference cross-decode verified.
+
 ### Performance
 
 - **Round 2 of encoder ratio + codec speed work** (encoder-only for ratio;

diff --git a/src/bin/compcol.rs b/src/bin/compcol.rs
@@ -260,6 +260,21 @@ fn stream_decode(
         }
     }
 
+    // Drain any decoded output the decoder still holds internally before
+    // declaring end-of-input. Some codecs (notably bzip2) decode a whole block
+    // into an internal buffer that can exceed the caller's output buffer; once
+    // the compressed input is fully consumed, the decode loop above stops
+    // (its `consumed < n` guard is false) with that buffer still pending. Pull
+    // the rest out with empty-input `decode` calls until nothing more is
+    // produced — mirroring the streaming contract the library exercises.
+    loop {
+        let (p, _status) = dec.decode(&[], &mut out_buf).map_err(codec_err)?;
+        writer.write_all(&out_buf[..p.written])?;
+        if p.written == 0 {
+            break;
+        }
+    }
+
     loop {
         let (p, status) = dec.finish(&mut out_buf).map_err(codec_err)?;
         writer.write_all(&out_buf[..p.written])?;

diff --git a/src/brotli/encoder_lz77.rs b/src/brotli/encoder_lz77.rs
@@ -93,6 +93,112 @@ impl MatchFinder {
         self.head[idx] = pos as u32;
     }
 
+    /// Collect distinct match candidates at `pos` for the optimal parse.
+    ///
+    /// Walks the hash chain and records every candidate that is *strictly
+    /// longer* than all previous ones, pushing `(length, distance)` into
+    /// `out`. The result is therefore a set of length/distance pairs with
+    /// strictly increasing length; for any target length the shortest
+    /// (hence usually closest) sufficient distance among them can be
+    /// recovered by the caller. Returns the number of candidates written
+    /// (capped at `out.len()`).
+    ///
+    /// Unlike [`find_match`] this keeps the chain entries that a
+    /// longest-only search would discard — exactly the closer, cheaper
+    /// distances the DP wants to price against the cost model.
+    pub(crate) fn find_matches(
+        &self,
+        buffer: &[u8],
+        pos: usize,
+        params: FinderParams,
+        out: &mut [(u32, u32)],
+    ) -> usize {
+        if out.is_empty() {
+            return 0;
+        }
+        let buf_len = buffer.len();
+        if pos + MIN_MATCH > buf_len {
+            return 0;
+        }
+        let h = hash4_at(buffer, pos);
+        let idx = (h as usize) & (HASH_SIZE - 1);
+        let max_dist = WINDOW_SIZE.min(pos);
+        let max_len = MAX_MATCH.min(buf_len - pos);
+        if max_len < MIN_MATCH {
+            return 0;
+        }
+        let nice = params.nice_match.min(max_len);
+        let chain_cap = params.max_chain;
+        let target = &buffer[pos..pos + max_len];
+
+        let mut best_len: usize = MIN_MATCH - 1;
+        let mut count = 0usize;
+
+        let prev = &self.prev[..];
+        let head = &self.head[..];
+        let mut cur = head[idx];
+        let mut steps = 0usize;
+        while cur != NIL && steps < chain_cap {
+            let cur_pos = cur as usize;
+            if cur_pos >= pos {
+                cur = prev[cur_pos];
+                steps += 1;
+                continue;
+            }
+            let dist = pos - cur_pos;
+            if dist > max_dist {
+                break;
+            }
+            // Only bother if this candidate could extend past best_len.
+            if buffer[cur_pos + best_len] == target[best_len] {
+                let cand = &buffer[cur_pos..cur_pos + max_len];
+                let mut len = 0usize;
+                while len + 8 <= max_len {
+                    let a = u64::from_le_bytes([
+                        cand[len],
+                        cand[len + 1],
+                        cand[len + 2],
+                        cand[len + 3],
+                        cand[len + 4],
+                        cand[len + 5],
+                        cand[len + 6],
+                        cand[len + 7],
+                    ]);
+                    let b = u64::from_le_bytes([
+                        target[len],
+                        target[len + 1],
+                        target[len + 2],
+                        target[len + 3],
+                        target[len + 4],
+                        target[len + 5],
+                        target[len + 6],
+                        target[len + 7],
+                    ]);
+                    let diff = a ^ b;
+                    if diff != 0 {
+                        len += (diff.trailing_zeros() / 8) as usize;
+                        break;
+                    }
+                    len += 8;
+                }
+                while len < max_len && cand[len] == target[len] {
+                    len += 1;
+                }
+                if len > best_len {
+                    best_len = len;
+                    out[count] = (len as u32, dist as u32);
+                    count += 1;
+                    if count == out.len() || len >= nice {
+                        break;
+                    }
+                }
+            }
+            cur = prev[cur_pos];
+            steps += 1;
+        }
+        count
+    }
+
     /// Find the longest prior occurrence of the bytes starting at `pos`.
     /// Returns Some((length, distance)) with length ≥ MIN_MATCH, or None.
     pub(crate) fn find_match(