From 16b77dc350d53ab0fd73a43f126356ec7ff7e573 Mon Sep 17 00:00:00 2001 From: Robert M1 <50460704+githubrobbi@users.noreply.github.com> Date: Sat, 14 Mar 2026 12:50:28 -0700 Subject: [PATCH 1/3] fix(mft): sort directory children before computing tree metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue A: LIVE mode had directory sizes differing from OFFLINE/C++ because `compute_tree_metrics()` was called BEFORE `sort_directory_children()`. Sorting reorders child links, invalidating the size calculations. Fix: Swap order to match OFFLINE path — sort first, then compute metrics. Co-Authored-By: Claude Opus 4.6 --- crates/uffs-mft/src/reader/index_read.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/uffs-mft/src/reader/index_read.rs b/crates/uffs-mft/src/reader/index_read.rs index cbf2e0de6..3428ec64d 100644 --- a/crates/uffs-mft/src/reader/index_read.rs +++ b/crates/uffs-mft/src/reader/index_read.rs @@ -620,6 +620,11 @@ impl MftReader { let mut index = result?; + // Sort directory children first so tree metrics traverse in correct order. + // Tree metrics computation depends on children being in sorted order to + // produce accurate directory sizes and descendant counts. + index.sort_directory_children(); + // Compute tree metrics (directory sizes, descendant counts). // The legacy path gets this from `from_parsed_records()`, but the // inline path bypasses that, so we must call it explicitly. From 763cdfd178018ab57a12c16228093fc65ceb0ca8 Mon Sep 17 00:00:00 2001 From: Robert M1 <50460704+githubrobbi@users.noreply.github.com> Date: Sat, 14 Mar 2026 12:56:17 -0700 Subject: [PATCH 2/3] fix(cli): change default output format from 'custom' to 'csv' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'custom' format appends C++ legacy footer with "Drives?" metadata to output files, contaminating CSV files used for parity testing. This fix eliminates CSV footer contamination by defaulting to clean CSV output. Users needing legacy C++ format can use `--format custom`. Fixes Issue D: CSV footer contamination causing "missing records" in parity analysis (spec lines 151-152). Changes: - crates/uffs-cli/src/main.rs:240 - default_value "custom" → "csv" Verification: - `just check` ✅ - `cargo test -p uffs-cli` ✅ (50/50 tests pass) - Windows trial_run.ps1 needed for full verification Co-Authored-By: Claude Sonnet 4.5 --- crates/uffs-cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/uffs-cli/src/main.rs b/crates/uffs-cli/src/main.rs index 9aa0c9c22..6d40a4691 100644 --- a/crates/uffs-cli/src/main.rs +++ b/crates/uffs-cli/src/main.rs @@ -237,7 +237,7 @@ struct Cli { limit: u32, /// Output format: table, json, csv, custom - #[arg(short, long, default_value = "custom")] + #[arg(short, long, default_value = "csv")] format: String, /// Case-sensitive matching (default: off) From b2351e5676b3f5148b6765cd98b742a2c0da8aa7 Mon Sep 17 00:00:00 2001 From: Robert M1 <50460704+githubrobbi@users.noreply.github.com> Date: Sat, 14 Mar 2026 16:29:36 -0700 Subject: [PATCH 3/3] fix(mft): use advisory bitmap chunking for all drive types (Gap 1 fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: generate_precise_read_chunks() treated bitmap as authoritative filter, causing ~20K missing records on NVMe/SSD when bitmap was stale. The bitmap should be ADVISORY (for I/O optimization) not AUTHORITATIVE (for filtering which regions to read). The HDD path proved this — it uses generate_read_chunks() and only has 6 missing records vs 10K+ on NVMe/SSD. Fix: Force all drive types to use generate_read_chunks(). This matches C++ behavior (bitmap initialized to all 1s, read everything by default). Evidence: - Drive C (NVMe): 10,717 missing → expect 0 after fix - Drive F (SSD): 9,705 missing → expect 0 after fix - Drive S (HDD): 6 missing → should stay ~0 Co-Authored-By: Claude Opus 4.6 --- .../src/io/readers/parallel/to_index.rs | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/crates/uffs-mft/src/io/readers/parallel/to_index.rs b/crates/uffs-mft/src/io/readers/parallel/to_index.rs index b49f2273e..3145e845e 100644 --- a/crates/uffs-mft/src/io/readers/parallel/to_index.rs +++ b/crates/uffs-mft/src/io/readers/parallel/to_index.rs @@ -102,33 +102,22 @@ impl ParallelMftReader { ); // Generate read chunks with bitmap skip optimization - // For NVMe/SSD, use precise chunk generation to skip unused regions entirely + // CRITICAL: Use standard chunking for ALL drive types (bitmap is advisory, not + // authoritative) The bitmap should be used for I/O optimization (skip + // ranges, pre-allocation), NOT as an authoritative filter for which + // regions to read. If the bitmap is stale (common on live filesystems), + // treating it as authoritative causes record loss. Evidence: HDD path + // (using advisory bitmap) has only 6 missing records vs 10K+ on NVMe/SSD. let use_direct_chunk_io = matches!( self.drive_type, crate::platform::DriveType::Nvme | crate::platform::DriveType::Ssd ); - // For NVMe/SSD: use larger max to allow direct chunk-to-I/O mapping - // For HDD: use standard io_chunk_size for predictable sequential reads - const MAX_DIRECT_IO_SIZE: usize = 16 * 1024 * 1024; // 16MB max for direct I/O - - let sorted_chunks: Vec = match (&self.drive_type, &self.bitmap) { - (crate::platform::DriveType::Nvme | crate::platform::DriveType::Ssd, Some(bitmap)) => { - // NVMe/SSD: Use precise chunks that skip unused regions - // min_gap_records=64 means gaps smaller than 64KB are read through - // Use MAX_DIRECT_IO_SIZE as the max chunk size for direct I/O - let mut chunks = - generate_precise_read_chunks(&self.extent_map, bitmap, MAX_DIRECT_IO_SIZE, 64); - chunks.sort_by_key(|c| c.disk_offset); - chunks - } - _ => { - // HDD or no bitmap: Use standard chunk generation - let mut chunks = - generate_read_chunks(&self.extent_map, self.bitmap.as_ref(), self.chunk_size); - chunks.sort_by_key(|c| c.disk_offset); - chunks - } + let sorted_chunks: Vec = { + let mut chunks = + generate_read_chunks(&self.extent_map, self.bitmap.as_ref(), self.chunk_size); + chunks.sort_by_key(|c| c.disk_offset); + chunks }; // Build I/O operations with FRS tracking for inline parsing