githubrobbi · githubrobbi · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/crates/uffs-mft/src/commands/load.rs b/crates/uffs-mft/src/commands/load.rs
@@ -244,8 +244,15 @@ pub fn cmd_load(
         println!("🔨 BUILDING MFTINDEX...");
 
         let build_start = Instant::now();
-        let index = MftReader::load_raw_to_index_with_options(input, &data_load_options)
-            .with_context(|| format!("Failed to build index from {}", input.display()))?;
+        // Use new direct-to-index parser by default, legacy multi-pass with env var
+        let index = if std::env::var("UFFS_LEGACY_PARSE").is_ok() {
+            println!("  Using legacy multi-pass parser (UFFS_LEGACY_PARSE=1)");
+            MftReader::load_raw_to_index_with_options(input, &data_load_options)
+                .with_context(|| format!("Failed to build index from {}", input.display()))?
+        } else {
+            MftReader::load_raw_to_index_direct(input, &data_load_options)
+                .with_context(|| format!("Failed to build index from {}", input.display()))?
+        };
         let build_time = build_start.elapsed();
 
         println!();
@@ -436,8 +443,15 @@ pub fn cmd_load(
 
     // Build MftIndex (includes tree metrics computation)
     let build_start = Instant::now();
-    let index = MftReader::load_raw_to_index_with_options(input, &data_load_options)
-        .with_context(|| format!("Failed to build index from {}", input.display()))?;
+    // Use new direct-to-index parser by default, legacy multi-pass with env var
+    let index = if std::env::var("UFFS_LEGACY_PARSE").is_ok() {
+        println!("  Using legacy multi-pass parser (UFFS_LEGACY_PARSE=1)");
+        MftReader::load_raw_to_index_with_options(input, &data_load_options)
+            .with_context(|| format!("Failed to build index from {}", input.display()))?
+    } else {
+        MftReader::load_raw_to_index_direct(input, &data_load_options)
+            .with_context(|| format!("Failed to build index from {}", input.display()))?
+    };
     let build_time = build_start.elapsed();
 
     println!(

diff --git a/crates/uffs-mft/src/index/base.rs b/crates/uffs-mft/src/index/base.rs
@@ -35,6 +35,58 @@ impl MftIndex {
         }
     }
 
+    /// Create with optimized pre-allocation matching C++ ratios.
+    ///
+    /// This method pre-allocates all vectors based on the MFT bitmap popcount
+    /// to eliminate Vec resizing during the parse loop. The sizing ratios match
+    /// the C++ implementation in `ntfs_index_accessors.hpp` lines 525-544.
+    ///
+    /// # Arguments
+    ///
+    /// * `volume` - Volume letter (e.g., 'C')
+    /// * `estimated_records` - Number of valid records from bitmap popcount
+    /// * `max_frs` - Highest FRS number from bitmap (used for `frs_to_idx`
+    ///   sizing)
+    ///
+    /// # Pre-allocation Strategy
+    ///
+    /// - `records`: `estimated_records * 1.05` (5% safety margin for
+    ///   placeholders)
+    /// - `frs_to_idx`: `max_frs + 1` (sparse array indexed by FRS)
+    /// - `names`: `estimated_records * 23` (~23 chars avg per name)
+    /// - `links`: `estimated_records / 16` (~6% have hardlinks)
+    /// - `streams`: `estimated_records / 4` (~25% have additional streams)
+    /// - `internal_streams`: `estimated_records / 20` (~5% have internal
+    ///   streams)
+    /// - `children`: `estimated_records * 3 / 2` (directories have multiple
+    ///   children)
+    #[must_use]
+    pub fn with_capacity_optimized(volume: char, estimated_records: usize, max_frs: u64) -> Self {
+        // Safety margin for placeholder records added during path resolution
+        let records_capacity = estimated_records + (estimated_records / 20);
+
+        // frs_to_idx is a sparse lookup array indexed by FRS
+        let frs_to_idx_capacity = usize::try_from(max_frs)
+            .ok()
+            .and_then(|max_frs_usize| max_frs_usize.checked_add(1))
+            .unwrap_or(estimated_records);
+
+        Self {
+            volume,
+            records: Vec::with_capacity(records_capacity),
+            frs_to_idx: Vec::with_capacity(frs_to_idx_capacity),
+            names: String::with_capacity(estimated_records * 23),
+            links: Vec::with_capacity(estimated_records / 16),
+            streams: Vec::with_capacity(estimated_records / 4),
+            internal_streams: Vec::with_capacity(estimated_records / 20),
+            children: Vec::with_capacity(estimated_records * 3 / 2),
+            stats: MftStats::new(),
+            extensions: ExtensionTable::new(),
+            extension_index: None,
+            forensic_mode: false,
+        }
+    }
+
     /// Recompute stats from the current index data.
     ///
     /// This is useful after deserializing an index from disk,

diff --git a/crates/uffs-mft/src/index/builder.rs b/crates/uffs-mft/src/index/builder.rs
@@ -12,8 +12,19 @@ use super::{
 impl MftIndex {
     /// Build an `MftIndex` from a vector of parsed records.
     ///
-    /// This is the fast path - directly builds the lean index without
-    /// going through Polars `DataFrame`.
+    /// **LEGACY MULTI-PASS PIPELINE:** This function is the final stage of the
+    /// old `parse_record_full → MftRecordMerger → from_parsed_records`
+    /// pipeline. The hot path (`SlidingIocpInline`) now uses direct-to-index
+    /// parsers that build the index incrementally during I/O, skipping this
+    /// separate build phase. This function is still used by:
+    /// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`,
+    ///   `SlidingIocp`)
+    /// - File-based readers (`load_raw_to_index_with_options`)
+    /// - Tests and diagnostic tools
+    /// - `UFFS_LEGACY_PARSE=1` escape hatch
+    ///
+    /// This directly builds the lean index without going through Polars
+    /// `DataFrame`.
     ///
     /// Works on all platforms - uses cross-platform `ParsedRecord` from parse
     /// module.