Skip to content
Merged
22 changes: 18 additions & 4 deletions crates/uffs-mft/src/commands/load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,15 @@ pub fn cmd_load(
println!("🔨 BUILDING MFTINDEX...");

let build_start = Instant::now();
let index = MftReader::load_raw_to_index_with_options(input, &data_load_options)
.with_context(|| format!("Failed to build index from {}", input.display()))?;
// Use new direct-to-index parser by default, legacy multi-pass with env var
let index = if std::env::var("UFFS_LEGACY_PARSE").is_ok() {
println!(" Using legacy multi-pass parser (UFFS_LEGACY_PARSE=1)");
MftReader::load_raw_to_index_with_options(input, &data_load_options)
.with_context(|| format!("Failed to build index from {}", input.display()))?
} else {
MftReader::load_raw_to_index_direct(input, &data_load_options)
.with_context(|| format!("Failed to build index from {}", input.display()))?
};
let build_time = build_start.elapsed();

println!();
Expand Down Expand Up @@ -436,8 +443,15 @@ pub fn cmd_load(

// Build MftIndex (includes tree metrics computation)
let build_start = Instant::now();
let index = MftReader::load_raw_to_index_with_options(input, &data_load_options)
.with_context(|| format!("Failed to build index from {}", input.display()))?;
// Use new direct-to-index parser by default, legacy multi-pass with env var
let index = if std::env::var("UFFS_LEGACY_PARSE").is_ok() {
println!(" Using legacy multi-pass parser (UFFS_LEGACY_PARSE=1)");
MftReader::load_raw_to_index_with_options(input, &data_load_options)
.with_context(|| format!("Failed to build index from {}", input.display()))?
} else {
MftReader::load_raw_to_index_direct(input, &data_load_options)
.with_context(|| format!("Failed to build index from {}", input.display()))?
};
let build_time = build_start.elapsed();

println!(
Expand Down
52 changes: 52 additions & 0 deletions crates/uffs-mft/src/index/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,58 @@ impl MftIndex {
}
}

/// Create with optimized pre-allocation matching C++ ratios.
///
/// This method pre-allocates all vectors based on the MFT bitmap popcount
/// to eliminate Vec resizing during the parse loop. The sizing ratios match
/// the C++ implementation in `ntfs_index_accessors.hpp` lines 525-544.
///
/// # Arguments
///
/// * `volume` - Volume letter (e.g., 'C')
/// * `estimated_records` - Number of valid records from bitmap popcount
/// * `max_frs` - Highest FRS number from bitmap (used for `frs_to_idx`
/// sizing)
///
/// # Pre-allocation Strategy
///
/// - `records`: `estimated_records * 1.05` (5% safety margin for
/// placeholders)
/// - `frs_to_idx`: `max_frs + 1` (sparse array indexed by FRS)
/// - `names`: `estimated_records * 23` (~23 chars avg per name)
/// - `links`: `estimated_records / 16` (~6% have hardlinks)
/// - `streams`: `estimated_records / 4` (~25% have additional streams)
/// - `internal_streams`: `estimated_records / 20` (~5% have internal
/// streams)
/// - `children`: `estimated_records * 3 / 2` (directories have multiple
/// children)
#[must_use]
pub fn with_capacity_optimized(volume: char, estimated_records: usize, max_frs: u64) -> Self {
// Safety margin for placeholder records added during path resolution
let records_capacity = estimated_records + (estimated_records / 20);

// frs_to_idx is a sparse lookup array indexed by FRS
let frs_to_idx_capacity = usize::try_from(max_frs)
.ok()
.and_then(|max_frs_usize| max_frs_usize.checked_add(1))
.unwrap_or(estimated_records);

Self {
volume,
records: Vec::with_capacity(records_capacity),
frs_to_idx: Vec::with_capacity(frs_to_idx_capacity),
names: String::with_capacity(estimated_records * 23),
links: Vec::with_capacity(estimated_records / 16),
streams: Vec::with_capacity(estimated_records / 4),
internal_streams: Vec::with_capacity(estimated_records / 20),
children: Vec::with_capacity(estimated_records * 3 / 2),
stats: MftStats::new(),
extensions: ExtensionTable::new(),
extension_index: None,
forensic_mode: false,
}
}

/// Recompute stats from the current index data.
///
/// This is useful after deserializing an index from disk,
Expand Down
15 changes: 13 additions & 2 deletions crates/uffs-mft/src/index/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,19 @@ use super::{
impl MftIndex {
/// Build an `MftIndex` from a vector of parsed records.
///
/// This is the fast path - directly builds the lean index without
/// going through Polars `DataFrame`.
/// **LEGACY MULTI-PASS PIPELINE:** This function is the final stage of the
/// old `parse_record_full → MftRecordMerger → from_parsed_records`
/// pipeline. The hot path (`SlidingIocpInline`) now uses direct-to-index
/// parsers that build the index incrementally during I/O, skipping this
/// separate build phase. This function is still used by:
/// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`,
/// `SlidingIocp`)
/// - File-based readers (`load_raw_to_index_with_options`)
/// - Tests and diagnostic tools
/// - `UFFS_LEGACY_PARSE=1` escape hatch
///
/// This directly builds the lean index without going through Polars
/// `DataFrame`.
///
/// Works on all platforms - uses cross-platform `ParsedRecord` from parse
/// module.
Expand Down
Loading
Loading