From 370a58ad8c35b69db75a2c999df64d2d9d632ea3 Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 03:58:39 -0700
Subject: [PATCH 1/8] feat(mft): revive and modernize direct-to-index parsers

Un-deprecate and modernize parse_record_to_index() and parse_extension_to_index()
to handle ALL attribute types that parse_record_full() handles. This restores the
single-pass C++-style inline parsing approach as the primary path.

Key Changes:
- Remove #[deprecated] annotations from both parsers
- Add complete attribute handling:
  - $REPARSE_POINT - extract reparse tag, add as stream
  - $INDEX_ROOT, $INDEX_ALLOCATION, $BITMAP - directory index handling
  - $OBJECT_ID, $VOLUME_NAME, $VOLUME_INFORMATION, $PROPERTY_SET
  - $EA, $EA_INFORMATION, $LOGGED_UTILITY_STREAM
  - $SECURITY_DESCRIPTOR, $ATTRIBUTE_LIST
  - Unknown attribute types (default: case in C++)
- Set reparse_tag and total_stream_count in FileRecord
- Handle directory size from accumulated $I30 attributes
- Merge directory index sizes in extension records

Files Modified:
- crates/uffs-mft/src/io/parser/index.rs (845 LOC)
- crates/uffs-mft/src/io/parser/index_extension.rs (727 LOC)
- scripts/ci/file_size_exceptions.txt (add exceptions for large parsers)

Achieves feature parity with parse_record_full() + MftRecordMerger pipeline.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 crates/uffs-mft/src/io/parser/index.rs        | 361 +++++++++++++++++-
 .../uffs-mft/src/io/parser/index_extension.rs | 331 +++++++++++++++-
 scripts/ci/file_size_exceptions.txt           |   4 +-
 3 files changed, 680 insertions(+), 16 deletions(-)

diff --git a/crates/uffs-mft/src/io/parser/index.rs b/crates/uffs-mft/src/io/parser/index.rs
index 96fc6be6b..1823aa49e 100644
--- a/crates/uffs-mft/src/io/parser/index.rs
+++ b/crates/uffs-mft/src/io/parser/index.rs
@@ -1,5 +1,14 @@
-//! Legacy direct-to-index parser bridge.
-//! Preserves the `io` parser surface for the IOCP fast path.
+//! Single-pass direct-to-index parser (C++-style inline approach).
+//!
+//! Exception: This file is intentionally monolithic (840+ LOC) because it
+//! implements a performance-critical hot path that handles all NTFS attribute
+//! types inline. Splitting would introduce indirection overhead and hurt
+//! performance. See `scripts/ci/file_size_exceptions.txt`.
+//!
+//! This module implements the high-performance single-pass parser that matches
+//! the C++ architecture. It parses MFT records directly into `MftIndex` without
+//! creating intermediate `ParsedRecord` allocations, which is critical for IOCP
+//! performance.
 
 use core::mem::size_of;
 
@@ -9,20 +18,32 @@ use zerocopy::FromBytes;
 use super::index_extension::parse_extension_to_index;
 use crate::ntfs::is_internal_windows_stream;
 
-/// Parses a record directly into MftIndex (inline parsing for IOCP).
+/// Parses a record directly into `MftIndex` (single-pass inline parsing).
 ///
 /// This function parses the record and adds it directly to the index,
-/// creating parent placeholders on-demand. This is the legacy-output parity
+/// creating parent placeholders on-demand. This is the C++-style single-pass
 /// approach that eliminates the intermediate `ParsedRecord` allocation.
 ///
+/// Handles ALL attribute types that `parse_record_full()` handles, including:
+/// - `$STANDARD_INFORMATION`, `$FILE_NAME`, `$DATA` (default + ADS)
+/// - `$REPARSE_POINT` (for WoF detection and junctions/symlinks)
+/// - `$INDEX_ROOT`, `$INDEX_ALLOCATION`, `$BITMAP` (directory indexes)
+/// - `$OBJECT_ID`, `$VOLUME_NAME`, `$VOLUME_INFORMATION`, `$PROPERTY_SET`
+/// - `$EA`, `$EA_INFORMATION`, `$LOGGED_UTILITY_STREAM`
+/// - `$SECURITY_DESCRIPTOR`, `$ATTRIBUTE_LIST`
+/// - Unknown attribute types (counted as streams for C++ parity)
+///
 /// # Returns
 ///
 /// `true` if a record was added to the index, `false` if skipped.
-#[deprecated(note = "Use parse_record_full() + MftRecordMerger + from_parsed_records() instead")]
 #[expect(
     clippy::too_many_lines,
     reason = "monolithic parser kept for performance-critical hot path"
 )]
+#[expect(
+    clippy::cognitive_complexity,
+    reason = "NTFS attribute dispatch is inherently complex"
+)]
 #[expect(
     clippy::cast_possible_truncation,
     reason = "NTFS field sizes are bounded by u16/u32 record layout"
@@ -78,6 +99,9 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
     let mut default_allocated = 0u64;
     // ADS: (stream_name, size, allocated)
     let mut additional_streams: SmallVec<[(String, u64, u64); 4]> = SmallVec::new();
+    let mut reparse_tag: u32 = 0;
+    let mut dir_index_size: u64 = 0;
+    let mut dir_index_allocated: u64 = 0;
 
     while offset + size_of::<AttributeRecordHeader>() <= max_offset {
         let attr_header = match AttributeRecordHeader::read_from_prefix(&data[offset..]) {
@@ -93,7 +117,8 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
             break;
         }
 
-        match AttributeType::from_u32(attr_header.type_code) {
+        let attr_type = AttributeType::from_u32(attr_header.type_code);
+        match attr_type {
             Some(AttributeType::StandardInformation) => {
                 if attr_header.is_non_resident == 0 {
                     // Parse $STANDARD_INFORMATION
@@ -257,7 +282,319 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
                     }
                 }
             }
-            _ => {}
+            Some(AttributeType::ReparsePoint) => {
+                // Parse $REPARSE_POINT to get the reparse tag
+                // C++ handles both resident and non-resident reparse points
+                // C++ also counts $REPARSE_POINT as a stream (for descendants)
+                let (rp_size, rp_allocated) = if attr_header.is_non_resident == 0 {
+                    // Resident reparse point (common case)
+                    let value_length_bytes = &data[offset + 16..offset + 20];
+                    let value_length =
+                        u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0, 0, 0, 0]))
+                            as u64;
+
+                    let value_offset_bytes = &data[offset + 20..offset + 22];
+                    let value_offset =
+                        u16::from_le_bytes(value_offset_bytes.try_into().unwrap_or([0, 0]))
+                            as usize;
+                    let rp_offset = offset + value_offset;
+                    if rp_offset + 4 <= data.len() {
+                        // Read reparse tag (first 4 bytes of reparse point data)
+                        let tag_bytes = &data[rp_offset..rp_offset + 4];
+                        reparse_tag =
+                            u32::from_le_bytes(tag_bytes.try_into().unwrap_or([0, 0, 0, 0]));
+                    }
+                    (value_length, 0_u64) // Resident, allocated=0
+                } else {
+                    // Non-resident reparse point (rare - large reparse data)
+                    let nr_offset = offset + 16;
+                    if nr_offset + 48 <= data.len() {
+                        let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                        let allocated =
+                            i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                        let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                        let data_size = i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                        (data_size.max(0) as u64, allocated.max(0) as u64)
+                    } else {
+                        (0_u64, 0_u64)
+                    }
+                };
+
+                // Add $REPARSE_POINT as a stream (matches C++ stream counting)
+                additional_streams.push((String::from("$REPARSE"), rp_size, rp_allocated));
+            }
+            Some(
+                AttributeType::IndexRoot | AttributeType::IndexAllocation | AttributeType::Bitmap,
+            ) => {
+                // C++ includes $INDEX_ROOT and $INDEX_ALLOCATION with name $I30
+                // in directory size. For non-$I30 indexes, C++ counts them as streams.
+
+                // Extract attribute name
+                let name_len = attr_header.name_length as usize;
+                let (is_i30, attr_name) = if name_len > 0 {
+                    let name_offset = offset + attr_header.name_offset as usize;
+                    if name_offset + name_len * 2 <= data.len() {
+                        let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                        // Check for "$I30" in UTF-16LE
+                        let is_i30 =
+                            attr_header.name_length == 4 && name_bytes == b"$\x00I\x003\x000\x00";
+                        // Decode name for non-$I30 indexes
+                        let name = if is_i30 {
+                            String::new()
+                        } else {
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        };
+                        (is_i30, name)
+                    } else {
+                        (false, String::new())
+                    }
+                } else {
+                    (false, String::new())
+                };
+
+                if is_i30 {
+                    // Accumulate $I30 sizes for directories
+                    if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        dir_index_size += value_length;
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            dir_index_size += data_size.max(0) as u64;
+                            dir_index_allocated += allocated.max(0) as u64;
+                        }
+                    }
+                } else {
+                    // Non-$I30 index - count as stream
+                    // Check if primary attribute (LowestVCN == 0)
+                    let is_primary = if attr_header.is_non_resident == 0 {
+                        true
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 8 <= data.len() {
+                            let lowest_vcn = i64::from_le_bytes(
+                                data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                            );
+                            lowest_vcn == 0
+                        } else {
+                            false
+                        }
+                    };
+
+                    if is_primary {
+                        let (size, allocated) = if attr_header.is_non_resident == 0 {
+                            let value_length_bytes = &data[offset + 16..offset + 20];
+                            let value_length =
+                                u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                    as u64;
+                            (value_length, 0_u64)
+                        } else {
+                            let nr_offset = offset + 16;
+                            if nr_offset + 48 <= data.len() {
+                                let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                                let allocated =
+                                    i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                                let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                                let data_size =
+                                    i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                                (data_size.max(0) as u64, allocated.max(0) as u64)
+                            } else {
+                                (0_u64, 0_u64)
+                            }
+                        };
+
+                        let stream_name = if attr_name.is_empty() {
+                            match attr_type {
+                                Some(AttributeType::Bitmap) => String::from("$BITMAP"),
+                                Some(AttributeType::IndexRoot) => String::from("$INDEX_ROOT"),
+                                Some(AttributeType::IndexAllocation) => {
+                                    String::from("$INDEX_ALLOCATION")
+                                }
+                                _ => String::new(),
+                            }
+                        } else {
+                            attr_name
+                        };
+                        additional_streams.push((stream_name, size, allocated));
+                    }
+                }
+            }
+            Some(
+                AttributeType::ObjectId
+                | AttributeType::VolumeName
+                | AttributeType::VolumeInformation
+                | AttributeType::PropertySet
+                | AttributeType::Ea
+                | AttributeType::EaInformation
+                | AttributeType::LoggedUtilityStream
+                | AttributeType::SecurityDescriptor
+                | AttributeType::AttributeList,
+            ) => {
+                // All these are counted as streams in C++
+                // Check if primary attribute (LowestVCN == 0)
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    // Extract attribute name (if any)
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        match attr_type {
+                            Some(AttributeType::ObjectId) => String::from("$OBJECT_ID"),
+                            Some(AttributeType::VolumeName) => String::from("$VOLUME_NAME"),
+                            Some(AttributeType::VolumeInformation) => {
+                                String::from("$VOLUME_INFORMATION")
+                            }
+                            Some(AttributeType::PropertySet) => String::from("$PROPERTY_SET"),
+                            Some(AttributeType::Ea) => String::from("$EA"),
+                            Some(AttributeType::EaInformation) => String::from("$EA_INFORMATION"),
+                            Some(AttributeType::LoggedUtilityStream) => {
+                                String::from("$LOGGED_UTILITY_STREAM")
+                            }
+                            Some(AttributeType::SecurityDescriptor) => {
+                                String::from("$SECURITY_DESCRIPTOR")
+                            }
+                            Some(AttributeType::AttributeList) => String::from("$ATTRIBUTE_LIST"),
+                            _ => String::new(),
+                        }
+                    } else {
+                        attr_name
+                    };
+                    additional_streams.push((stream_name, size, allocated));
+                }
+            }
+            Some(AttributeType::StandardInformation | AttributeType::FileName) => {
+                // Already handled above
+            }
+            _ => {
+                // C++ counts ALL attribute types as streams via default: case
+                // This includes truly unknown types
+                let type_code = attr_header.type_code;
+
+                // Check if primary attribute (LowestVCN == 0)
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    // Extract attribute name (if any)
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        format!("$UNKNOWN_0x{type_code:X}")
+                    } else {
+                        attr_name
+                    };
+                    additional_streams.push((stream_name, size, allocated));
+                }
+            }
         }
 
         offset += attr_header.length as usize;
@@ -267,6 +604,11 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
     // This ensures is_directory is set even when $FILE_NAME is in extension record
     if is_directory {
         std_info.set_directory(true);
+        // For directories, set default size to directory index size
+        if dir_index_size > 0 {
+            default_size = dir_index_size;
+            default_allocated = dir_index_allocated;
+        }
     }
 
     // Handle records without a filename in the base record
@@ -419,6 +761,11 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
     record.name_count = 1 + additional_count as u16;
     // stream_count = 1 (default) + additional ADS
     record.stream_count = 1 + additional_stream_count as u16;
+    // total_stream_count includes all streams (including internal ones like
+    // $REPARSE)
+    record.total_stream_count = 1 + additional_stream_count as u16;
+    // Set reparse tag if this is a reparse point
+    record.reparse_tag = reparse_tag;
 
     // Chain the additional links: first_name -> link[0] -> link[1] -> ... ->
     // NO_ENTRY The links were pushed with next_entry = NO_ENTRY, now we chain
diff --git a/crates/uffs-mft/src/io/parser/index_extension.rs b/crates/uffs-mft/src/io/parser/index_extension.rs
index ca513c1f0..b1306548f 100644
--- a/crates/uffs-mft/src/io/parser/index_extension.rs
+++ b/crates/uffs-mft/src/io/parser/index_extension.rs
@@ -1,5 +1,12 @@
-//! Legacy extension-record helper for direct-to-index parsing.
-//! Extracts names and streams from extension records into the index.
+//! Extension record parser for direct-to-index path.
+//!
+//! Exception: This file is intentionally large (720+ LOC) to match the
+//! completeness of `index.rs` - it handles all the same attribute types that
+//! can appear in extension records. See `scripts/ci/file_size_exceptions.txt`.
+//!
+//! This module handles extension records for the single-pass parser, extracting
+//! names, streams, and all attribute types from extension records and merging
+//! them into base records in the index.
 
 use core::mem::size_of;
 
@@ -11,9 +18,16 @@ use crate::ntfs::is_internal_windows_stream;
 /// Parses an extension record and adds its names/streams to the base record.
 ///
 /// Extension records contain additional `$FILE_NAME` attributes (hard links)
-/// and `$DATA` attributes (ADS) that don't fit in the base record. This
-/// function extracts those attributes and adds them to the base record in the
-/// index.
+/// and additional attributes (ADS, system attributes, etc.) that don't fit
+/// in the base record. This function extracts those attributes and adds them
+/// to the base record in the index.
+///
+/// Handles ALL attribute types that `parse_record_full()` handles, including:
+/// - `$FILE_NAME` (hard links)
+/// - `$DATA` (ADS)
+/// - `$REPARSE_POINT`, `$INDEX_ROOT`, `$INDEX_ALLOCATION`, `$BITMAP`
+/// - `$OBJECT_ID`, `$EA`, `$LOGGED_UTILITY_STREAM`, etc.
+/// - Unknown attribute types
 ///
 /// # Arguments
 ///
@@ -24,11 +38,18 @@ use crate::ntfs::is_internal_windows_stream;
 /// # Returns
 ///
 /// `true` if any names/streams were added, `false` otherwise.
-#[deprecated(note = "Use parse_record_full() + MftRecordMerger instead")]
 #[expect(
     clippy::cast_possible_truncation,
     reason = "NTFS field sizes are bounded by u16/u32 record layout"
 )]
+#[expect(
+    clippy::cognitive_complexity,
+    reason = "NTFS attribute dispatch is inherently complex"
+)]
+#[expect(
+    clippy::too_many_lines,
+    reason = "monolithic extension parser for performance"
+)]
 pub(super) fn parse_extension_to_index(
     data: &[u8],
     base_frs: u64,
@@ -55,6 +76,8 @@ pub(super) fn parse_extension_to_index(
     // Collect names and streams from extension record
     let mut names: SmallVec<[(String, u64); 4]> = SmallVec::new();
     let mut streams: SmallVec<[(String, u64, u64); 4]> = SmallVec::new();
+    let mut dir_index_size: u64 = 0;
+    let mut dir_index_allocated: u64 = 0;
 
     while offset + size_of::<AttributeRecordHeader>() <= max_offset {
         let attr_header = match AttributeRecordHeader::read_from_prefix(&data[offset..]) {
@@ -70,7 +93,8 @@ pub(super) fn parse_extension_to_index(
             break;
         }
 
-        match AttributeType::from_u32(attr_header.type_code) {
+        let attr_type = AttributeType::from_u32(attr_header.type_code);
+        match attr_type {
             Some(AttributeType::FileName) => {
                 // Parse $FILE_NAME attribute
                 if attr_header.is_non_resident == 0 {
@@ -178,7 +202,289 @@ pub(super) fn parse_extension_to_index(
                     }
                 }
             }
-            _ => {}
+            Some(AttributeType::ReparsePoint) => {
+                // Parse $REPARSE_POINT - add as stream
+                let (rp_size, rp_allocated) = if attr_header.is_non_resident == 0 {
+                    let value_length_bytes = &data[offset + 16..offset + 20];
+                    let value_length =
+                        u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4])) as u64;
+                    (value_length, 0_u64)
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 48 <= data.len() {
+                        let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                        let allocated =
+                            i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                        let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                        let data_size = i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                        (data_size.max(0) as u64, allocated.max(0) as u64)
+                    } else {
+                        (0_u64, 0_u64)
+                    }
+                };
+                streams.push((String::from("$REPARSE"), rp_size, rp_allocated));
+            }
+            Some(
+                AttributeType::IndexRoot | AttributeType::IndexAllocation | AttributeType::Bitmap,
+            ) => {
+                // Extract attribute name
+                let name_len = attr_header.name_length as usize;
+                let (is_i30, attr_name) = if name_len > 0 {
+                    let name_offset = offset + attr_header.name_offset as usize;
+                    if name_offset + name_len * 2 <= data.len() {
+                        let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                        let is_i30 =
+                            attr_header.name_length == 4 && name_bytes == b"$\x00I\x003\x000\x00";
+                        let name = if is_i30 {
+                            String::new()
+                        } else {
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        };
+                        (is_i30, name)
+                    } else {
+                        (false, String::new())
+                    }
+                } else {
+                    (false, String::new())
+                };
+
+                if is_i30 {
+                    // Accumulate $I30 sizes
+                    if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        dir_index_size += value_length;
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            dir_index_size += data_size.max(0) as u64;
+                            dir_index_allocated += allocated.max(0) as u64;
+                        }
+                    }
+                } else {
+                    // Non-$I30 index - count as stream
+                    let is_primary = if attr_header.is_non_resident == 0 {
+                        true
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 8 <= data.len() {
+                            let lowest_vcn = i64::from_le_bytes(
+                                data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                            );
+                            lowest_vcn == 0
+                        } else {
+                            false
+                        }
+                    };
+
+                    if is_primary {
+                        let (size, allocated) = if attr_header.is_non_resident == 0 {
+                            let value_length_bytes = &data[offset + 16..offset + 20];
+                            let value_length =
+                                u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                    as u64;
+                            (value_length, 0_u64)
+                        } else {
+                            let nr_offset = offset + 16;
+                            if nr_offset + 48 <= data.len() {
+                                let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                                let allocated =
+                                    i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                                let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                                let data_size =
+                                    i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                                (data_size.max(0) as u64, allocated.max(0) as u64)
+                            } else {
+                                (0_u64, 0_u64)
+                            }
+                        };
+
+                        let stream_name = if attr_name.is_empty() {
+                            match attr_type {
+                                Some(AttributeType::Bitmap) => String::from("$BITMAP"),
+                                Some(AttributeType::IndexRoot) => String::from("$INDEX_ROOT"),
+                                Some(AttributeType::IndexAllocation) => {
+                                    String::from("$INDEX_ALLOCATION")
+                                }
+                                _ => String::new(),
+                            }
+                        } else {
+                            attr_name
+                        };
+                        streams.push((stream_name, size, allocated));
+                    }
+                }
+            }
+            Some(
+                AttributeType::ObjectId
+                | AttributeType::VolumeName
+                | AttributeType::VolumeInformation
+                | AttributeType::PropertySet
+                | AttributeType::Ea
+                | AttributeType::EaInformation
+                | AttributeType::LoggedUtilityStream
+                | AttributeType::SecurityDescriptor
+                | AttributeType::AttributeList,
+            ) => {
+                // All counted as streams
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        match attr_type {
+                            Some(AttributeType::ObjectId) => String::from("$OBJECT_ID"),
+                            Some(AttributeType::VolumeName) => String::from("$VOLUME_NAME"),
+                            Some(AttributeType::VolumeInformation) => {
+                                String::from("$VOLUME_INFORMATION")
+                            }
+                            Some(AttributeType::PropertySet) => String::from("$PROPERTY_SET"),
+                            Some(AttributeType::Ea) => String::from("$EA"),
+                            Some(AttributeType::EaInformation) => String::from("$EA_INFORMATION"),
+                            Some(AttributeType::LoggedUtilityStream) => {
+                                String::from("$LOGGED_UTILITY_STREAM")
+                            }
+                            Some(AttributeType::SecurityDescriptor) => {
+                                String::from("$SECURITY_DESCRIPTOR")
+                            }
+                            Some(AttributeType::AttributeList) => String::from("$ATTRIBUTE_LIST"),
+                            _ => String::new(),
+                        }
+                    } else {
+                        attr_name
+                    };
+                    streams.push((stream_name, size, allocated));
+                }
+            }
+            Some(AttributeType::StandardInformation) => {
+                // Skip - not expected in extension records
+            }
+            _ => {
+                // Unknown attribute types - count as streams (C++ default: case)
+                let type_code = attr_header.type_code;
+
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        format!("$UNKNOWN_0x{type_code:X}")
+                    } else {
+                        attr_name
+                    };
+                    streams.push((stream_name, size, allocated));
+                }
+            }
         }
 
         offset += attr_header.length as usize;
@@ -353,6 +659,15 @@ pub(super) fn parse_extension_to_index(
             record.total_stream_count += stream_indices.len() as u16;
         }
 
+        // Merge directory index sizes from extension records
+        if dir_index_size > 0 || dir_index_allocated > 0 {
+            let record = &mut index.records[record_idx as usize];
+            // Add to the first_stream size (which represents the default stream for
+            // directories)
+            record.first_stream.size.length += dir_index_size;
+            record.first_stream.size.allocated += dir_index_allocated;
+        }
+
         // Build parent-child relationship for names added from extension records
         // This is critical for compute_tree_metrics() to work correctly.
         // Get the current name_count to determine the name_index for each new name
diff --git a/scripts/ci/file_size_exceptions.txt b/scripts/ci/file_size_exceptions.txt
index de225657b..e03762a3c 100644
--- a/scripts/ci/file_size_exceptions.txt
+++ b/scripts/ci/file_size_exceptions.txt
@@ -3,4 +3,6 @@
 # path|reason
 crates/uffs-diag/src/bin/compare_scan_parity.rs|Diagnostic parity pipeline remains consolidated because the end-to-end workflow is reviewed as one unit.
 crates/uffs-cli/src/commands/output.rs|Output formatting module with comprehensive test suite for DataFrame/native output parity and footer formatting.
-crates/uffs-cli/src/commands/raw_io.rs|I/O coordination module consolidating MFT reading, query filtering, and multi-drive orchestration logic.
\ No newline at end of file
+crates/uffs-cli/src/commands/raw_io.rs|I/O coordination module consolidating MFT reading, query filtering, and multi-drive orchestration logic.
+crates/uffs-mft/src/io/parser/index.rs|Single-pass direct-to-index parser (C++-style inline approach). Monolithic by design for IOCP hot path - handles all NTFS attribute types inline.
+crates/uffs-mft/src/io/parser/index_extension.rs|Extension record parser for direct-to-index path. Handles all attribute types from extension records - matches index.rs completeness.
\ No newline at end of file

From fb853e79d43b940c5d9ed1e98ce1cf408480d18c Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 04:22:48 -0700
Subject: [PATCH 2/8] feat(mft): add cross-platform direct-to-index file reader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements new file-based reader path using direct-to-index parser from
Wave 1. This provides a single-pass parsing path that works on both
Windows and macOS, bypassing the old multi-pass pipeline.

Key changes:
- Added load_raw_to_index_direct() in reader/persistence.rs
- Copied direct-to-index parsers to cross-platform parse/ module
- Added env var switch in commands/load.rs (UFFS_LEGACY_PARSE=1 for old path)
- Both parser paths available for validation and comparison

New path: raw MFT → fixup → parse_record_to_index() → MftIndex
Old path: raw MFT → parse_record_full() → MftRecordMerger → from_parsed_records()

The new direct parser eliminates intermediate ParsedRecord allocations
and matches the C++ single-pass architecture.

All tests pass (105/105). F-drive parity verification requires Windows.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 crates/uffs-mft/src/commands/load.rs          |  22 +-
 crates/uffs-mft/src/io/parser/index.rs        |   3 -
 crates/uffs-mft/src/parse.rs                  |   3 +
 crates/uffs-mft/src/parse/direct_index.rs     | 891 ++++++++++++++++++
 .../src/parse/direct_index_extension.rs       | 763 +++++++++++++++
 crates/uffs-mft/src/reader/persistence.rs     |  76 ++
 scripts/ci/file_size_exceptions.txt           |   2 +-
 7 files changed, 1752 insertions(+), 8 deletions(-)
 create mode 100644 crates/uffs-mft/src/parse/direct_index.rs
 create mode 100644 crates/uffs-mft/src/parse/direct_index_extension.rs

diff --git a/crates/uffs-mft/src/commands/load.rs b/crates/uffs-mft/src/commands/load.rs
index 73c3f1321..29e9003e9 100644
--- a/crates/uffs-mft/src/commands/load.rs
+++ b/crates/uffs-mft/src/commands/load.rs
@@ -244,8 +244,15 @@ pub fn cmd_load(
         println!("🔨 BUILDING MFTINDEX...");
 
         let build_start = Instant::now();
-        let index = MftReader::load_raw_to_index_with_options(input, &data_load_options)
-            .with_context(|| format!("Failed to build index from {}", input.display()))?;
+        // Use new direct-to-index parser by default, legacy multi-pass with env var
+        let index = if std::env::var("UFFS_LEGACY_PARSE").is_ok() {
+            println!("  Using legacy multi-pass parser (UFFS_LEGACY_PARSE=1)");
+            MftReader::load_raw_to_index_with_options(input, &data_load_options)
+                .with_context(|| format!("Failed to build index from {}", input.display()))?
+        } else {
+            MftReader::load_raw_to_index_direct(input, &data_load_options)
+                .with_context(|| format!("Failed to build index from {}", input.display()))?
+        };
         let build_time = build_start.elapsed();
 
         println!();
@@ -436,8 +443,15 @@ pub fn cmd_load(
 
     // Build MftIndex (includes tree metrics computation)
     let build_start = Instant::now();
-    let index = MftReader::load_raw_to_index_with_options(input, &data_load_options)
-        .with_context(|| format!("Failed to build index from {}", input.display()))?;
+    // Use new direct-to-index parser by default, legacy multi-pass with env var
+    let index = if std::env::var("UFFS_LEGACY_PARSE").is_ok() {
+        println!("  Using legacy multi-pass parser (UFFS_LEGACY_PARSE=1)");
+        MftReader::load_raw_to_index_with_options(input, &data_load_options)
+            .with_context(|| format!("Failed to build index from {}", input.display()))?
+    } else {
+        MftReader::load_raw_to_index_direct(input, &data_load_options)
+            .with_context(|| format!("Failed to build index from {}", input.display()))?
+    };
     let build_time = build_start.elapsed();
 
     println!(
diff --git a/crates/uffs-mft/src/io/parser/index.rs b/crates/uffs-mft/src/io/parser/index.rs
index 1823aa49e..8e58c6d50 100644
--- a/crates/uffs-mft/src/io/parser/index.rs
+++ b/crates/uffs-mft/src/io/parser/index.rs
@@ -524,9 +524,6 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
                     additional_streams.push((stream_name, size, allocated));
                 }
             }
-            Some(AttributeType::StandardInformation | AttributeType::FileName) => {
-                // Already handled above
-            }
             _ => {
                 // C++ counts ALL attribute types as streams via default: case
                 // This includes truly unknown types
diff --git a/crates/uffs-mft/src/parse.rs b/crates/uffs-mft/src/parse.rs
index fa6b532ef..63277a5f1 100644
--- a/crates/uffs-mft/src/parse.rs
+++ b/crates/uffs-mft/src/parse.rs
@@ -44,6 +44,8 @@
 
 mod attribute_helpers;
 mod columns;
+mod direct_index;
+mod direct_index_extension;
 mod fixup;
 mod forensic;
 mod full;
@@ -59,6 +61,7 @@ use attribute_helpers::{
     parse_data_attribute_full, parse_file_name_full, parse_standard_info_full,
 };
 pub use columns::ParsedColumns;
+pub use direct_index::parse_record_to_index;
 pub use fixup::apply_fixup;
 pub use forensic::parse_record_forensic;
 pub use full::{parse_record, parse_record_full};
diff --git a/crates/uffs-mft/src/parse/direct_index.rs b/crates/uffs-mft/src/parse/direct_index.rs
new file mode 100644
index 000000000..cf86d5a6f
--- /dev/null
+++ b/crates/uffs-mft/src/parse/direct_index.rs
@@ -0,0 +1,891 @@
+//! Single-pass direct-to-index parser (C++-style inline approach).
+//!
+//! Exception: This file is intentionally monolithic (840+ LOC) because it
+//! implements a performance-critical hot path that handles all NTFS attribute
+//! types inline. Splitting would introduce indirection overhead and hurt
+//! performance. See `scripts/ci/file_size_exceptions.txt`.
+//!
+//! This module implements the high-performance single-pass parser that matches
+//! the C++ architecture. It parses MFT records directly into `MftIndex` without
+//! creating intermediate `ParsedRecord` allocations.
+//!
+//! This is a cross-platform parser used for both Windows IOCP and file-based
+//! loading.
+
+// Performance-critical hot-path parser — lint suppressions match the style of
+// other NTFS parser modules in this crate.
+#![expect(
+    clippy::unseparated_literal_suffix,
+    reason = "literal suffixes like 0u32 are common in NTFS struct parsing"
+)]
+#![expect(
+    clippy::doc_markdown,
+    reason = "NTFS terminology like MftIndex does not need backticks in internal docs"
+)]
+#![expect(
+    clippy::manual_let_else,
+    reason = "explicit match is clearer in NTFS attribute dispatch"
+)]
+#![expect(
+    clippy::missing_asserts_for_indexing,
+    reason = "bounds are verified by size checks before all index access"
+)]
+#![expect(
+    clippy::single_match_else,
+    reason = "explicit match arms are clearer for attribute type dispatch"
+)]
+#![expect(
+    clippy::shadow_unrelated,
+    reason = "reusing common names like 'record' in nested scopes is idiomatic here"
+)]
+#![expect(
+    clippy::single_call_fn,
+    reason = "parse_extension_to_index is a separate function for code organization"
+)]
+#![expect(
+    clippy::let_underscore_untyped,
+    reason = "let _ = expr is used for intentionally ignoring results"
+)]
+#![expect(
+    clippy::if_not_else,
+    reason = "!condition checks are clearer for NTFS flag testing"
+)]
+#![expect(
+    clippy::explicit_iter_loop,
+    reason = ".iter() is explicit and intentional"
+)]
+#![expect(
+    clippy::if_then_some_else_none,
+    reason = "explicit if/else is clearer than bool::then in complex NTFS logic"
+)]
+
+use core::mem::size_of;
+
+use smallvec::SmallVec;
+use zerocopy::FromBytes;
+
+use super::direct_index_extension::parse_extension_to_index;
+use crate::ntfs::is_internal_windows_stream;
+
+/// Parses a record directly into `MftIndex` (single-pass inline parsing).
+///
+/// This function parses the record and adds it directly to the index,
+/// creating parent placeholders on-demand. This is the C++-style single-pass
+/// approach that eliminates the intermediate `ParsedRecord` allocation.
+///
+/// Handles ALL attribute types that `parse_record_full()` handles, including:
+/// - `$STANDARD_INFORMATION`, `$FILE_NAME`, `$DATA` (default + ADS)
+/// - `$REPARSE_POINT` (for WoF detection and junctions/symlinks)
+/// - `$INDEX_ROOT`, `$INDEX_ALLOCATION`, `$BITMAP` (directory indexes)
+/// - `$OBJECT_ID`, `$VOLUME_NAME`, `$VOLUME_INFORMATION`, `$PROPERTY_SET`
+/// - `$EA`, `$EA_INFORMATION`, `$LOGGED_UTILITY_STREAM`
+/// - `$SECURITY_DESCRIPTOR`, `$ATTRIBUTE_LIST`
+/// - Unknown attribute types (counted as streams for C++ parity)
+///
+/// # Returns
+///
+/// `true` if a record was added to the index, `false` if skipped.
+#[expect(
+    clippy::too_many_lines,
+    reason = "monolithic parser kept for performance-critical hot path"
+)]
+#[expect(
+    clippy::cognitive_complexity,
+    reason = "NTFS attribute dispatch is inherently complex"
+)]
+#[expect(
+    clippy::cast_possible_truncation,
+    reason = "NTFS field sizes are bounded by u16/u32 record layout"
+)]
+pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::MftIndex) -> bool {
+    use crate::index::{
+        ChildInfo, IndexNameRef, IndexStreamInfo, LinkInfo, NO_ENTRY, SizeInfo, StandardInfo,
+    };
+    use crate::ntfs::{
+        AttributeRecordHeader, AttributeType, FileNameAttribute, FileRecordSegmentHeader,
+        StandardInformation, file_reference_to_frs, filetime_to_unix_micros,
+    };
+
+    if data.len() < size_of::<FileRecordSegmentHeader>() {
+        return false;
+    }
+
+    let header = match FileRecordSegmentHeader::read_from_prefix(data) {
+        Ok((header, _)) => header,
+        Err(_) => return false,
+    };
+
+    // Check if record is in use
+    if !header.is_in_use() {
+        return false;
+    }
+
+    // Check magic
+    let multi_sector_header = header.multi_sector_header;
+    if !multi_sector_header.is_file_record() {
+        return false;
+    }
+
+    // Handle extension records: add their names/streams to the base record
+    // C++ does this inline during parsing (see ntfs_index.hpp lines 521-583)
+    if !header.is_base_record() {
+        let base_frs = file_reference_to_frs(header.base_file_record_segment);
+        return parse_extension_to_index(data, base_frs, index);
+    }
+
+    let is_directory = header.is_directory();
+
+    // Parse attributes
+    let mut offset = header.first_attribute_offset as usize;
+    let max_offset = core::cmp::min(header.bytes_in_use as usize, data.len());
+
+    // Temporary storage for parsed data
+    let mut std_info = StandardInfo::default();
+    let mut primary_name: Option<(String, u64, u8, u16)> = None; // (name, parent_frs, namespace, parse_index)
+    let mut additional_names: SmallVec<[(String, u64, u16); 4]> = SmallVec::new();
+    let mut name_parse_counter: u16 = 0;
+    let mut default_size = 0u64;
+    let mut default_allocated = 0u64;
+    // ADS: (stream_name, size, allocated)
+    let mut additional_streams: SmallVec<[(String, u64, u64); 4]> = SmallVec::new();
+    let mut reparse_tag: u32 = 0;
+    let mut dir_index_size: u64 = 0;
+    let mut dir_index_allocated: u64 = 0;
+
+    while offset + size_of::<AttributeRecordHeader>() <= max_offset {
+        let attr_header = match AttributeRecordHeader::read_from_prefix(&data[offset..]) {
+            Ok((attr_header, _)) => attr_header,
+            Err(_) => break,
+        };
+
+        if attr_header.type_code == AttributeType::End as u32 {
+            break;
+        }
+
+        if attr_header.length == 0 || offset + attr_header.length as usize > max_offset {
+            break;
+        }
+
+        let attr_type = AttributeType::from_u32(attr_header.type_code);
+        match attr_type {
+            Some(AttributeType::StandardInformation) => {
+                if attr_header.is_non_resident == 0 {
+                    // Parse $STANDARD_INFORMATION
+                    let value_offset_bytes = &data[offset + 20..offset + 22];
+                    let value_offset =
+                        u16::from_le_bytes(value_offset_bytes.try_into().unwrap_or([0, 0]))
+                            as usize;
+                    let si_offset = offset + value_offset;
+                    if si_offset + size_of::<StandardInformation>() <= data.len() {
+                        let si = match StandardInformation::read_from_prefix(&data[si_offset..]) {
+                            Ok((si, _)) => si,
+                            Err(_) => break,
+                        };
+                        // Build StandardInfo with proper flags
+                        let mut info = StandardInfo::from_attributes(si.file_attributes);
+                        info.created = filetime_to_unix_micros(si.creation_time);
+                        info.modified = filetime_to_unix_micros(si.modification_time);
+                        info.accessed = filetime_to_unix_micros(si.access_time);
+                        info.mft_changed = filetime_to_unix_micros(si.mft_change_time);
+                        std_info = info;
+                    }
+                }
+            }
+            Some(AttributeType::FileName) => {
+                if attr_header.is_non_resident == 0 {
+                    // Parse $FILE_NAME
+                    let value_offset_bytes = &data[offset + 20..offset + 22];
+                    let value_offset =
+                        u16::from_le_bytes(value_offset_bytes.try_into().unwrap_or([0, 0]))
+                            as usize;
+                    let fn_offset = offset + value_offset;
+                    if fn_offset + size_of::<FileNameAttribute>() <= data.len() {
+                        let fn_attr = match FileNameAttribute::read_from_prefix(&data[fn_offset..])
+                        {
+                            Ok((fn_attr, _)) => fn_attr,
+                            Err(_) => break,
+                        };
+                        let name_len = fn_attr.file_name_length as usize;
+                        let name_bytes_offset = fn_offset + size_of::<FileNameAttribute>();
+                        if name_bytes_offset + name_len * 2 <= data.len() {
+                            let name_bytes =
+                                &data[name_bytes_offset..name_bytes_offset + name_len * 2];
+                            let name_u16: Vec<u16> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            let name = String::from_utf16_lossy(&name_u16);
+                            let parent_frs = file_reference_to_frs(fn_attr.parent_directory);
+                            let namespace = fn_attr.file_name_namespace;
+
+                            // Skip DOS-only names (namespace 2)
+                            if namespace != 2 {
+                                let parse_idx = name_parse_counter;
+                                name_parse_counter += 1;
+                                let is_better = match namespace {
+                                    1 | 3 => true,               // Win32 or Win32+DOS
+                                    0 => primary_name.is_none(), // POSIX only if no name yet
+                                    _ => false,
+                                };
+                                if is_better || primary_name.is_none() {
+                                    // Move old primary to additional if exists
+                                    if let Some((old_name, old_parent, _, old_parse_idx)) =
+                                        primary_name.take()
+                                    {
+                                        additional_names.push((
+                                            old_name,
+                                            old_parent,
+                                            old_parse_idx,
+                                        ));
+                                    }
+                                    primary_name = Some((name, parent_frs, namespace, parse_idx));
+                                } else {
+                                    additional_names.push((name, parent_frs, parse_idx));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            Some(AttributeType::Data) => {
+                // legacy-output parity: Only primary attributes (LowestVCN == 0) count as
+                // streams. Continuation extents (LowestVCN > 0) are skipped.
+                // See ntfs_index_load.hpp:358
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true // Resident attributes are always primary
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false // Can't verify, skip to be safe
+                    }
+                };
+
+                if !is_primary {
+                    // Skip continuation extents - they don't count as new streams
+                    offset += attr_header.length as usize;
+                    continue;
+                }
+
+                // Parse $DATA - track both default stream and ADS
+                let name_len = attr_header.name_length as usize;
+                let (size, allocated) = if attr_header.is_non_resident != 0 {
+                    // Non-resident: size at offset 48, allocated at offset 40
+                    let alloc_offset = offset + 40;
+                    let size_offset = offset + 48;
+                    if size_offset + 8 <= data.len() {
+                        let allocated = u64::from_le_bytes(
+                            data[alloc_offset..alloc_offset + 8]
+                                .try_into()
+                                .unwrap_or([0; 8]),
+                        );
+                        let size = u64::from_le_bytes(
+                            data[size_offset..size_offset + 8]
+                                .try_into()
+                                .unwrap_or([0; 8]),
+                        );
+                        (size, allocated)
+                    } else {
+                        (0, 0)
+                    }
+                } else {
+                    // Resident: value_length at offset 16
+                    // Resident files have no clusters allocated - data is stored in MFT record
+                    // C++ correctly shows allocated_size=0 for resident files
+                    let len_offset = offset + 16;
+                    if len_offset + 4 <= data.len() {
+                        let len = u32::from_le_bytes(
+                            data[len_offset..len_offset + 4]
+                                .try_into()
+                                .unwrap_or([0; 4]),
+                        ) as u64;
+                        (len, 0) // allocated_size = 0 for resident files
+                    } else {
+                        (0, 0)
+                    }
+                };
+
+                if name_len == 0 {
+                    // Default stream
+                    default_size = size;
+                    default_allocated = allocated;
+                } else {
+                    // Alternate Data Stream (ADS)
+                    let name_offset = offset + attr_header.name_offset as usize;
+                    if name_offset + name_len * 2 <= data.len() {
+                        let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                        let name_u16: SmallVec<[u16; 64]> = name_bytes
+                            .chunks_exact(2)
+                            .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                            .collect();
+                        let stream_name = String::from_utf16_lossy(&name_u16);
+                        // Filter out internal Windows streams (names starting with $)
+                        // These include $DSC, $REPARSE, $EA, $EA_INFORMATION, $TXF_DATA, $OBJECT_ID
+                        if !is_internal_windows_stream(&stream_name) {
+                            additional_streams.push((stream_name, size, allocated));
+                        }
+                    }
+                }
+            }
+            Some(AttributeType::ReparsePoint) => {
+                // Parse $REPARSE_POINT to get the reparse tag
+                // C++ handles both resident and non-resident reparse points
+                // C++ also counts $REPARSE_POINT as a stream (for descendants)
+                let (rp_size, rp_allocated) = if attr_header.is_non_resident == 0 {
+                    // Resident reparse point (common case)
+                    let value_length_bytes = &data[offset + 16..offset + 20];
+                    let value_length =
+                        u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0, 0, 0, 0]))
+                            as u64;
+
+                    let value_offset_bytes = &data[offset + 20..offset + 22];
+                    let value_offset =
+                        u16::from_le_bytes(value_offset_bytes.try_into().unwrap_or([0, 0]))
+                            as usize;
+                    let rp_offset = offset + value_offset;
+                    if rp_offset + 4 <= data.len() {
+                        // Read reparse tag (first 4 bytes of reparse point data)
+                        let tag_bytes = &data[rp_offset..rp_offset + 4];
+                        reparse_tag =
+                            u32::from_le_bytes(tag_bytes.try_into().unwrap_or([0, 0, 0, 0]));
+                    }
+                    (value_length, 0_u64) // Resident, allocated=0
+                } else {
+                    // Non-resident reparse point (rare - large reparse data)
+                    let nr_offset = offset + 16;
+                    if nr_offset + 48 <= data.len() {
+                        let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                        let allocated =
+                            i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                        let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                        let data_size = i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                        (data_size.max(0) as u64, allocated.max(0) as u64)
+                    } else {
+                        (0_u64, 0_u64)
+                    }
+                };
+
+                // Add $REPARSE_POINT as a stream (matches C++ stream counting)
+                additional_streams.push((String::from("$REPARSE"), rp_size, rp_allocated));
+            }
+            Some(
+                AttributeType::IndexRoot | AttributeType::IndexAllocation | AttributeType::Bitmap,
+            ) => {
+                // C++ includes $INDEX_ROOT and $INDEX_ALLOCATION with name $I30
+                // in directory size. For non-$I30 indexes, C++ counts them as streams.
+
+                // Extract attribute name
+                let name_len = attr_header.name_length as usize;
+                let (is_i30, attr_name) = if name_len > 0 {
+                    let name_offset = offset + attr_header.name_offset as usize;
+                    if name_offset + name_len * 2 <= data.len() {
+                        let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                        // Check for "$I30" in UTF-16LE
+                        let is_i30 =
+                            attr_header.name_length == 4 && name_bytes == b"$\x00I\x003\x000\x00";
+                        // Decode name for non-$I30 indexes
+                        let name = if is_i30 {
+                            String::new()
+                        } else {
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        };
+                        (is_i30, name)
+                    } else {
+                        (false, String::new())
+                    }
+                } else {
+                    (false, String::new())
+                };
+
+                if is_i30 {
+                    // Accumulate $I30 sizes for directories
+                    if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        dir_index_size += value_length;
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            dir_index_size += data_size.max(0) as u64;
+                            dir_index_allocated += allocated.max(0) as u64;
+                        }
+                    }
+                } else {
+                    // Non-$I30 index - count as stream
+                    // Check if primary attribute (LowestVCN == 0)
+                    let is_primary = if attr_header.is_non_resident == 0 {
+                        true
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 8 <= data.len() {
+                            let lowest_vcn = i64::from_le_bytes(
+                                data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                            );
+                            lowest_vcn == 0
+                        } else {
+                            false
+                        }
+                    };
+
+                    if is_primary {
+                        let (size, allocated) = if attr_header.is_non_resident == 0 {
+                            let value_length_bytes = &data[offset + 16..offset + 20];
+                            let value_length =
+                                u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                    as u64;
+                            (value_length, 0_u64)
+                        } else {
+                            let nr_offset = offset + 16;
+                            if nr_offset + 48 <= data.len() {
+                                let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                                let allocated =
+                                    i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                                let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                                let data_size =
+                                    i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                                (data_size.max(0) as u64, allocated.max(0) as u64)
+                            } else {
+                                (0_u64, 0_u64)
+                            }
+                        };
+
+                        let stream_name = if attr_name.is_empty() {
+                            match attr_type {
+                                Some(AttributeType::Bitmap) => String::from("$BITMAP"),
+                                Some(AttributeType::IndexRoot) => String::from("$INDEX_ROOT"),
+                                Some(AttributeType::IndexAllocation) => {
+                                    String::from("$INDEX_ALLOCATION")
+                                }
+                                _ => String::new(),
+                            }
+                        } else {
+                            attr_name
+                        };
+                        additional_streams.push((stream_name, size, allocated));
+                    }
+                }
+            }
+            Some(
+                AttributeType::ObjectId
+                | AttributeType::VolumeName
+                | AttributeType::VolumeInformation
+                | AttributeType::PropertySet
+                | AttributeType::Ea
+                | AttributeType::EaInformation
+                | AttributeType::LoggedUtilityStream
+                | AttributeType::SecurityDescriptor
+                | AttributeType::AttributeList,
+            ) => {
+                // All these are counted as streams in C++
+                // Check if primary attribute (LowestVCN == 0)
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    // Extract attribute name (if any)
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        match attr_type {
+                            Some(AttributeType::ObjectId) => String::from("$OBJECT_ID"),
+                            Some(AttributeType::VolumeName) => String::from("$VOLUME_NAME"),
+                            Some(AttributeType::VolumeInformation) => {
+                                String::from("$VOLUME_INFORMATION")
+                            }
+                            Some(AttributeType::PropertySet) => String::from("$PROPERTY_SET"),
+                            Some(AttributeType::Ea) => String::from("$EA"),
+                            Some(AttributeType::EaInformation) => String::from("$EA_INFORMATION"),
+                            Some(AttributeType::LoggedUtilityStream) => {
+                                String::from("$LOGGED_UTILITY_STREAM")
+                            }
+                            Some(AttributeType::SecurityDescriptor) => {
+                                String::from("$SECURITY_DESCRIPTOR")
+                            }
+                            Some(AttributeType::AttributeList) => String::from("$ATTRIBUTE_LIST"),
+                            _ => String::new(),
+                        }
+                    } else {
+                        attr_name
+                    };
+                    additional_streams.push((stream_name, size, allocated));
+                }
+            }
+            _ => {
+                // C++ counts ALL attribute types as streams via default: case
+                // This includes truly unknown types
+                let type_code = attr_header.type_code;
+
+                // Check if primary attribute (LowestVCN == 0)
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    // Extract attribute name (if any)
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        format!("$UNKNOWN_0x{type_code:X}")
+                    } else {
+                        attr_name
+                    };
+                    additional_streams.push((stream_name, size, allocated));
+                }
+            }
+        }
+
+        offset += attr_header.length as usize;
+    }
+
+    // Set directory flag in std_info BEFORE checking for filename
+    // This ensures is_directory is set even when $FILE_NAME is in extension record
+    if is_directory {
+        std_info.set_directory(true);
+        // For directories, set default size to directory index size
+        if dir_index_size > 0 {
+            default_size = dir_index_size;
+            default_allocated = dir_index_allocated;
+        }
+    }
+
+    // Handle records without a filename in the base record
+    // The $FILE_NAME may be in an extension record - we still need to store stdinfo
+    let (name, parent_frs, _namespace, primary_parse_index) = match primary_name {
+        Some(n) => n,
+        None => {
+            // No $FILE_NAME in base record - store stdinfo anyway
+            // The extension record will add the name later
+            //
+            // IMPORTANT: We must still add ADS streams from the base record!
+            // The $FILE_NAME may be in an extension record, but the ADS are here.
+            // Without this, ADS on files/directories with extension records are lost.
+
+            // Pre-process ADS streams BEFORE creating the record
+            let additional_stream_count = additional_streams.len();
+            let mut stream_indices: Vec<u32> = Vec::with_capacity(additional_stream_count);
+            for (stream_name, stream_size, stream_allocated) in additional_streams {
+                let stream_name_offset = index.add_name(&stream_name);
+                let stream_name_len = stream_name.len();
+                let stream_is_ascii = stream_name.is_ascii();
+                let extension_id = index.intern_extension(&stream_name);
+                let stream_name_ref = IndexNameRef::new(
+                    stream_name_offset,
+                    stream_name_len as u16,
+                    stream_is_ascii,
+                    extension_id,
+                );
+
+                let stream_idx = index.streams.len() as u32;
+                index.streams.push(IndexStreamInfo {
+                    size: SizeInfo {
+                        length: stream_size,
+                        allocated: stream_allocated,
+                    },
+                    next_entry: NO_ENTRY,
+                    name: stream_name_ref,
+                    flags: 0,
+                });
+                stream_indices.push(stream_idx);
+            }
+
+            // Now create the record and set up streams
+            let record = index.get_or_create(frs);
+            record.stdinfo = std_info;
+            record.first_stream.size = SizeInfo {
+                length: default_size,
+                allocated: default_allocated,
+            };
+
+            // Chain ADS streams to first_stream
+            if !stream_indices.is_empty() {
+                // Chain the streams together
+                for i in 0..stream_indices.len().saturating_sub(1) {
+                    let current_idx = stream_indices[i] as usize;
+                    let next_idx = stream_indices[i + 1];
+                    index.streams[current_idx].next_entry = next_idx;
+                }
+                // Attach to first_stream
+                let record = index.get_or_create(frs);
+                record.first_stream.next_entry = stream_indices[0];
+                record.stream_count = 1 + additional_stream_count as u16;
+            }
+
+            // Leave first_name empty - extension record will fill it
+            return false;
+        }
+    };
+
+    // Add primary name to names buffer and get reference
+    let name_offset = index.add_name(&name);
+    let name_len = name.len();
+    let is_ascii = name.is_ascii();
+    let extension_id = index.intern_extension(&name);
+    let name_ref = IndexNameRef::new(name_offset, name_len as u16, is_ascii, extension_id);
+
+    // Pre-process additional names: add to names buffer and links list BEFORE
+    // getting record reference This avoids borrow checker issues with holding
+    // &mut record while modifying index
+    let additional_count = additional_names.len();
+    let mut link_indices: Vec<u32> = Vec::with_capacity(additional_count);
+    // Collect parent FRS values for building children array later
+    let mut additional_parent_frs: SmallVec<[(u64, u16); 4]> =
+        SmallVec::with_capacity(additional_count);
+    for (link_name, link_parent, link_parse_idx) in additional_names {
+        additional_parent_frs.push((link_parent, link_parse_idx));
+        let link_offset = index.add_name(&link_name);
+        let link_len = link_name.len();
+        let link_is_ascii = link_name.is_ascii();
+        let extension_id = index.intern_extension(&link_name);
+        let link_name_ref =
+            IndexNameRef::new(link_offset, link_len as u16, link_is_ascii, extension_id);
+
+        let link_idx = index.links.len() as u32;
+        index.links.push(LinkInfo {
+            next_entry: NO_ENTRY, // Will be patched below
+            name: link_name_ref,
+            parent_frs: link_parent,
+        });
+        link_indices.push(link_idx);
+    }
+
+    // Pre-process additional streams (ADS): add to names buffer and streams list
+    let additional_stream_count = additional_streams.len();
+    let mut stream_indices: Vec<u32> = Vec::with_capacity(additional_stream_count);
+    for (stream_name, stream_size, stream_allocated) in additional_streams {
+        let stream_name_offset = index.add_name(&stream_name);
+        let stream_name_len = stream_name.len();
+        let stream_is_ascii = stream_name.is_ascii();
+        let extension_id = index.intern_extension(&stream_name);
+        let stream_name_ref = IndexNameRef::new(
+            stream_name_offset,
+            stream_name_len as u16,
+            stream_is_ascii,
+            extension_id,
+        );
+
+        let stream_idx = index.streams.len() as u32;
+        index.streams.push(IndexStreamInfo {
+            size: SizeInfo {
+                length: stream_size,
+                allocated: stream_allocated,
+            },
+            next_entry: NO_ENTRY, // Will be patched below
+            name: stream_name_ref,
+            flags: 0,
+        });
+        stream_indices.push(stream_idx);
+    }
+
+    // Ensure parent exists (create placeholder if needed) - do this before getting
+    // our record
+    if parent_frs != frs && parent_frs != 0 {
+        let _ = index.get_or_create(parent_frs);
+    }
+
+    // Now get or create the record in the index - no more index mutations after
+    // this
+    let record = index.get_or_create(frs);
+    record.stdinfo = std_info;
+    record.first_stream.size = SizeInfo {
+        length: default_size,
+        allocated: default_allocated,
+    };
+    record.first_name = LinkInfo {
+        next_entry: NO_ENTRY,
+        name: name_ref,
+        parent_frs,
+    };
+    record.name_count = 1 + additional_count as u16;
+    // stream_count = 1 (default) + additional ADS
+    record.stream_count = 1 + additional_stream_count as u16;
+    // total_stream_count includes all streams (including internal ones like
+    // $REPARSE)
+    record.total_stream_count = 1 + additional_stream_count as u16;
+    // Set reparse tag if this is a reparse point
+    record.reparse_tag = reparse_tag;
+
+    // Chain the additional links: first_name -> link[0] -> link[1] -> ... ->
+    // NO_ENTRY The links were pushed with next_entry = NO_ENTRY, now we chain
+    // them
+    if !link_indices.is_empty() {
+        // Point first_name to the first additional link
+        record.first_name.next_entry = link_indices[0];
+    }
+
+    // Chain the additional streams: first_stream -> stream[0] -> stream[1] -> ...
+    if !stream_indices.is_empty() {
+        // Point first_stream to the first additional stream
+        record.first_stream.next_entry = stream_indices[0];
+    }
+
+    // Chain the links together
+    for i in 0..link_indices.len().saturating_sub(1) {
+        let current_idx = link_indices[i] as usize;
+        let next_idx = link_indices[i + 1];
+        index.links[current_idx].next_entry = next_idx;
+    }
+
+    // Chain the streams together
+    for i in 0..stream_indices.len().saturating_sub(1) {
+        let current_idx = stream_indices[i] as usize;
+        let next_idx = stream_indices[i + 1];
+        index.streams[current_idx].next_entry = next_idx;
+    }
+
+    // Build parent-child relationship for tree metrics computation
+    // This is critical for compute_tree_metrics() to work correctly.
+    // Each name (primary + additional) creates a child entry in its parent.
+    // name_index 0 = primary name, 1+ = additional names (hardlinks)
+
+    // Helper to add a child entry to a parent
+    let add_child_entry = |index: &mut crate::index::MftIndex, p_frs: u64, name_idx: u16| {
+        if p_frs == frs || p_frs == 0 || p_frs == u64::from(NO_ENTRY) {
+            return;
+        }
+        // Ensure parent exists
+        let parent_idx = {
+            let p_frs_usize = p_frs as usize;
+            if p_frs_usize >= index.frs_to_idx.len() {
+                index.frs_to_idx.resize(p_frs_usize + 1, NO_ENTRY);
+            }
+            if index.frs_to_idx[p_frs_usize] == NO_ENTRY {
+                // Create placeholder parent
+                let new_idx = index.records.len() as u32;
+                index.frs_to_idx[p_frs_usize] = new_idx;
+                index.records.push(crate::index::FileRecord::new(p_frs));
+            }
+            index.frs_to_idx[p_frs_usize]
+        };
+
+        // Add child entry
+        let child_idx = index.children.len() as u32;
+        let parent = &mut index.records[parent_idx as usize];
+        let old_first_child = parent.first_child;
+        parent.first_child = child_idx;
+
+        index.children.push(ChildInfo {
+            next_entry: old_first_child,
+            child_frs: frs,
+            name_index: name_idx,
+        });
+    };
+
+    // Add child entry for primary name (using C++ parse-order index)
+    add_child_entry(index, parent_frs, primary_parse_index);
+
+    // Add child entries for additional names (hardlinks)
+    for &(link_parent_frs, link_parse_idx) in additional_parent_frs.iter() {
+        add_child_entry(index, link_parent_frs, link_parse_idx);
+    }
+
+    true
+}
diff --git a/crates/uffs-mft/src/parse/direct_index_extension.rs b/crates/uffs-mft/src/parse/direct_index_extension.rs
new file mode 100644
index 000000000..aaea8e5d9
--- /dev/null
+++ b/crates/uffs-mft/src/parse/direct_index_extension.rs
@@ -0,0 +1,763 @@
+//! Extension record parser for direct-to-index path.
+//!
+//! Exception: This file is intentionally large (720+ LOC) to match the
+//! completeness of `index.rs` - it handles all the same attribute types that
+//! can appear in extension records. See `scripts/ci/file_size_exceptions.txt`.
+//!
+//! This module handles extension records for the single-pass parser, extracting
+//! names, streams, and all attribute types from extension records and merging
+//! them into base records in the index.
+
+// Performance-critical hot-path parser — lint suppressions match the style of
+// other NTFS parser modules in this crate.
+#![expect(
+    clippy::manual_let_else,
+    reason = "explicit match is clearer in NTFS attribute dispatch"
+)]
+#![expect(
+    clippy::missing_asserts_for_indexing,
+    reason = "bounds are verified by size checks before all index access"
+)]
+#![expect(
+    clippy::shadow_unrelated,
+    reason = "reusing common names like 'record' in nested scopes is idiomatic here"
+)]
+#![expect(
+    clippy::let_underscore_untyped,
+    reason = "let _ = expr is used for intentionally ignoring results"
+)]
+#![expect(
+    clippy::if_not_else,
+    reason = "!condition checks are clearer for NTFS flag testing"
+)]
+#![expect(
+    clippy::unseparated_literal_suffix,
+    reason = "literal suffixes like 0u32 are common in NTFS struct parsing"
+)]
+#![expect(
+    clippy::doc_markdown,
+    reason = "NTFS terminology like MftIndex does not need backticks in internal docs"
+)]
+#![expect(
+    clippy::if_then_some_else_none,
+    reason = "explicit if/else is clearer than bool::then in complex NTFS logic"
+)]
+#![expect(
+    clippy::explicit_iter_loop,
+    reason = ".iter() is explicit and intentional"
+)]
+
+use core::mem::size_of;
+
+use smallvec::SmallVec;
+use zerocopy::FromBytes;
+
+use crate::ntfs::is_internal_windows_stream;
+
+/// Parses an extension record and adds its names/streams to the base record.
+///
+/// Extension records contain additional `$FILE_NAME` attributes (hard links)
+/// and additional attributes (ADS, system attributes, etc.) that don't fit
+/// in the base record. This function extracts those attributes and adds them
+/// to the base record in the index.
+///
+/// Handles ALL attribute types that `parse_record_full()` handles, including:
+/// - `$FILE_NAME` (hard links)
+/// - `$DATA` (ADS)
+/// - `$REPARSE_POINT`, `$INDEX_ROOT`, `$INDEX_ALLOCATION`, `$BITMAP`
+/// - `$OBJECT_ID`, `$EA`, `$LOGGED_UTILITY_STREAM`, etc.
+/// - Unknown attribute types
+///
+/// # Arguments
+///
+/// * `data` - The raw extension record data (after fixup)
+/// * `base_frs` - The FRS of the base record this extension belongs to
+/// * `index` - The MFT index to update
+///
+/// # Returns
+///
+/// `true` if any names/streams were added, `false` otherwise.
+#[expect(
+    clippy::cast_possible_truncation,
+    reason = "NTFS field sizes are bounded by u16/u32 record layout"
+)]
+#[expect(
+    clippy::cognitive_complexity,
+    reason = "NTFS attribute dispatch is inherently complex"
+)]
+#[expect(
+    clippy::too_many_lines,
+    reason = "monolithic extension parser for performance"
+)]
+pub(super) fn parse_extension_to_index(
+    data: &[u8],
+    base_frs: u64,
+    index: &mut crate::index::MftIndex,
+) -> bool {
+    use crate::index::{ChildInfo, IndexNameRef, IndexStreamInfo, LinkInfo, NO_ENTRY, SizeInfo};
+    use crate::ntfs::{
+        AttributeRecordHeader, AttributeType, FileNameAttribute, FileRecordSegmentHeader,
+    };
+
+    if data.len() < size_of::<FileRecordSegmentHeader>() {
+        return false;
+    }
+
+    let header = match FileRecordSegmentHeader::read_from_prefix(data) {
+        Ok((header, _)) => header,
+        Err(_) => return false,
+    };
+
+    // Parse attributes to find $FILE_NAME and $DATA
+    let mut offset = header.first_attribute_offset as usize;
+    let max_offset = core::cmp::min(header.bytes_in_use as usize, data.len());
+
+    // Collect names and streams from extension record
+    let mut names: SmallVec<[(String, u64); 4]> = SmallVec::new();
+    let mut streams: SmallVec<[(String, u64, u64); 4]> = SmallVec::new();
+    let mut dir_index_size: u64 = 0;
+    let mut dir_index_allocated: u64 = 0;
+
+    while offset + size_of::<AttributeRecordHeader>() <= max_offset {
+        let attr_header = match AttributeRecordHeader::read_from_prefix(&data[offset..]) {
+            Ok((attr_header, _)) => attr_header,
+            Err(_) => break,
+        };
+
+        if attr_header.type_code == AttributeType::End as u32 {
+            break;
+        }
+
+        if attr_header.length == 0 || offset + attr_header.length as usize > max_offset {
+            break;
+        }
+
+        let attr_type = AttributeType::from_u32(attr_header.type_code);
+        match attr_type {
+            Some(AttributeType::FileName) => {
+                // Parse $FILE_NAME attribute
+                if attr_header.is_non_resident == 0 {
+                    let value_offset_bytes = &data[offset + 20..offset + 22];
+                    let value_offset =
+                        u16::from_le_bytes(value_offset_bytes.try_into().unwrap_or([0, 0]))
+                            as usize;
+                    let fn_offset = offset + value_offset;
+                    if fn_offset + size_of::<FileNameAttribute>() <= data.len() {
+                        let fn_attr = match FileNameAttribute::read_from_prefix(&data[fn_offset..])
+                        {
+                            Ok((fn_attr, _)) => fn_attr,
+                            Err(_) => break,
+                        };
+
+                        // Skip DOS-only names (namespace 2)
+                        if fn_attr.file_name_namespace != 2 {
+                            let name_len = fn_attr.file_name_length as usize;
+                            let name_start = fn_offset + size_of::<FileNameAttribute>();
+                            if name_start + name_len * 2 <= data.len() {
+                                let name_bytes = &data[name_start..name_start + name_len * 2];
+                                let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                    .chunks_exact(2)
+                                    .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                    .collect();
+                                let name = String::from_utf16_lossy(&name_u16);
+                                let parent_frs = fn_attr.parent_directory & 0x0000_FFFF_FFFF_FFFF;
+                                names.push((name, parent_frs));
+                            }
+                        }
+                    }
+                }
+            }
+            Some(AttributeType::Data) => {
+                // legacy-output parity: Only primary attributes (LowestVCN == 0) count as
+                // streams. Continuation extents (LowestVCN > 0) are skipped.
+                // See ntfs_index_load.hpp:358
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true // Resident attributes are always primary
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false // Can't verify, skip to be safe
+                    }
+                };
+
+                if !is_primary {
+                    // Skip continuation extents - they don't count as new streams
+                    offset += attr_header.length as usize;
+                    continue;
+                }
+
+                // Parse $DATA attribute (ADS only - named streams)
+                let name_len = attr_header.name_length as usize;
+                if name_len > 0 {
+                    // This is an ADS (named stream)
+                    let (size, allocated) = if attr_header.is_non_resident != 0 {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let allocated = i64::from_le_bytes(
+                                data[nr_offset + 24..nr_offset + 32]
+                                    .try_into()
+                                    .unwrap_or([0; 8]),
+                            );
+                            let size = i64::from_le_bytes(
+                                data[nr_offset + 32..nr_offset + 40]
+                                    .try_into()
+                                    .unwrap_or([0; 8]),
+                            );
+                            (size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0, 0)
+                        }
+                    } else {
+                        let len_offset = offset + 16;
+                        if len_offset + 4 <= data.len() {
+                            let len = u32::from_le_bytes(
+                                data[len_offset..len_offset + 4]
+                                    .try_into()
+                                    .unwrap_or([0; 4]),
+                            ) as u64;
+                            (len, 0)
+                        } else {
+                            (0, 0)
+                        }
+                    };
+
+                    let name_offset = offset + attr_header.name_offset as usize;
+                    if name_offset + name_len * 2 <= data.len() {
+                        let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                        let name_u16: SmallVec<[u16; 64]> = name_bytes
+                            .chunks_exact(2)
+                            .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                            .collect();
+                        let stream_name = String::from_utf16_lossy(&name_u16);
+                        // Filter out internal Windows streams (names starting with $)
+                        if !is_internal_windows_stream(&stream_name) {
+                            streams.push((stream_name, size, allocated));
+                        }
+                    }
+                }
+            }
+            Some(AttributeType::ReparsePoint) => {
+                // Parse $REPARSE_POINT - add as stream
+                let (rp_size, rp_allocated) = if attr_header.is_non_resident == 0 {
+                    let value_length_bytes = &data[offset + 16..offset + 20];
+                    let value_length =
+                        u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4])) as u64;
+                    (value_length, 0_u64)
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 48 <= data.len() {
+                        let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                        let allocated =
+                            i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                        let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                        let data_size = i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                        (data_size.max(0) as u64, allocated.max(0) as u64)
+                    } else {
+                        (0_u64, 0_u64)
+                    }
+                };
+                streams.push((String::from("$REPARSE"), rp_size, rp_allocated));
+            }
+            Some(
+                AttributeType::IndexRoot | AttributeType::IndexAllocation | AttributeType::Bitmap,
+            ) => {
+                // Extract attribute name
+                let name_len = attr_header.name_length as usize;
+                let (is_i30, attr_name) = if name_len > 0 {
+                    let name_offset = offset + attr_header.name_offset as usize;
+                    if name_offset + name_len * 2 <= data.len() {
+                        let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                        let is_i30 =
+                            attr_header.name_length == 4 && name_bytes == b"$\x00I\x003\x000\x00";
+                        let name = if is_i30 {
+                            String::new()
+                        } else {
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        };
+                        (is_i30, name)
+                    } else {
+                        (false, String::new())
+                    }
+                } else {
+                    (false, String::new())
+                };
+
+                if is_i30 {
+                    // Accumulate $I30 sizes
+                    if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        dir_index_size += value_length;
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            dir_index_size += data_size.max(0) as u64;
+                            dir_index_allocated += allocated.max(0) as u64;
+                        }
+                    }
+                } else {
+                    // Non-$I30 index - count as stream
+                    let is_primary = if attr_header.is_non_resident == 0 {
+                        true
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 8 <= data.len() {
+                            let lowest_vcn = i64::from_le_bytes(
+                                data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                            );
+                            lowest_vcn == 0
+                        } else {
+                            false
+                        }
+                    };
+
+                    if is_primary {
+                        let (size, allocated) = if attr_header.is_non_resident == 0 {
+                            let value_length_bytes = &data[offset + 16..offset + 20];
+                            let value_length =
+                                u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                    as u64;
+                            (value_length, 0_u64)
+                        } else {
+                            let nr_offset = offset + 16;
+                            if nr_offset + 48 <= data.len() {
+                                let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                                let allocated =
+                                    i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                                let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                                let data_size =
+                                    i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                                (data_size.max(0) as u64, allocated.max(0) as u64)
+                            } else {
+                                (0_u64, 0_u64)
+                            }
+                        };
+
+                        let stream_name = if attr_name.is_empty() {
+                            match attr_type {
+                                Some(AttributeType::Bitmap) => String::from("$BITMAP"),
+                                Some(AttributeType::IndexRoot) => String::from("$INDEX_ROOT"),
+                                Some(AttributeType::IndexAllocation) => {
+                                    String::from("$INDEX_ALLOCATION")
+                                }
+                                _ => String::new(),
+                            }
+                        } else {
+                            attr_name
+                        };
+                        streams.push((stream_name, size, allocated));
+                    }
+                }
+            }
+            Some(
+                AttributeType::ObjectId
+                | AttributeType::VolumeName
+                | AttributeType::VolumeInformation
+                | AttributeType::PropertySet
+                | AttributeType::Ea
+                | AttributeType::EaInformation
+                | AttributeType::LoggedUtilityStream
+                | AttributeType::SecurityDescriptor
+                | AttributeType::AttributeList,
+            ) => {
+                // All counted as streams
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        match attr_type {
+                            Some(AttributeType::ObjectId) => String::from("$OBJECT_ID"),
+                            Some(AttributeType::VolumeName) => String::from("$VOLUME_NAME"),
+                            Some(AttributeType::VolumeInformation) => {
+                                String::from("$VOLUME_INFORMATION")
+                            }
+                            Some(AttributeType::PropertySet) => String::from("$PROPERTY_SET"),
+                            Some(AttributeType::Ea) => String::from("$EA"),
+                            Some(AttributeType::EaInformation) => String::from("$EA_INFORMATION"),
+                            Some(AttributeType::LoggedUtilityStream) => {
+                                String::from("$LOGGED_UTILITY_STREAM")
+                            }
+                            Some(AttributeType::SecurityDescriptor) => {
+                                String::from("$SECURITY_DESCRIPTOR")
+                            }
+                            Some(AttributeType::AttributeList) => String::from("$ATTRIBUTE_LIST"),
+                            _ => String::new(),
+                        }
+                    } else {
+                        attr_name
+                    };
+                    streams.push((stream_name, size, allocated));
+                }
+            }
+            Some(AttributeType::StandardInformation) => {
+                // Skip - not expected in extension records
+            }
+            _ => {
+                // Unknown attribute types - count as streams (C++ default: case)
+                let type_code = attr_header.type_code;
+
+                let is_primary = if attr_header.is_non_resident == 0 {
+                    true
+                } else {
+                    let nr_offset = offset + 16;
+                    if nr_offset + 8 <= data.len() {
+                        let lowest_vcn = i64::from_le_bytes(
+                            data[nr_offset..nr_offset + 8].try_into().unwrap_or([0; 8]),
+                        );
+                        lowest_vcn == 0
+                    } else {
+                        false
+                    }
+                };
+
+                if is_primary {
+                    let attr_name = if attr_header.name_length > 0 {
+                        let name_offset = offset + attr_header.name_offset as usize;
+                        let name_len = attr_header.name_length as usize;
+                        if name_offset + name_len * 2 <= data.len() {
+                            let name_bytes = &data[name_offset..name_offset + name_len * 2];
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
+                                .chunks_exact(2)
+                                .map(|c| u16::from_le_bytes([c[0], c[1]]))
+                                .collect();
+                            String::from_utf16_lossy(&name_u16)
+                        } else {
+                            String::new()
+                        }
+                    } else {
+                        String::new()
+                    };
+
+                    let (size, allocated) = if attr_header.is_non_resident == 0 {
+                        let value_length_bytes = &data[offset + 16..offset + 20];
+                        let value_length =
+                            u32::from_le_bytes(value_length_bytes.try_into().unwrap_or([0; 4]))
+                                as u64;
+                        (value_length, 0_u64)
+                    } else {
+                        let nr_offset = offset + 16;
+                        if nr_offset + 48 <= data.len() {
+                            let alloc_bytes = &data[nr_offset + 24..nr_offset + 32];
+                            let allocated =
+                                i64::from_le_bytes(alloc_bytes.try_into().unwrap_or([0; 8]));
+                            let size_bytes = &data[nr_offset + 32..nr_offset + 40];
+                            let data_size =
+                                i64::from_le_bytes(size_bytes.try_into().unwrap_or([0; 8]));
+                            (data_size.max(0) as u64, allocated.max(0) as u64)
+                        } else {
+                            (0_u64, 0_u64)
+                        }
+                    };
+
+                    let stream_name = if attr_name.is_empty() {
+                        format!("$UNKNOWN_0x{type_code:X}")
+                    } else {
+                        attr_name
+                    };
+                    streams.push((stream_name, size, allocated));
+                }
+            }
+        }
+
+        offset += attr_header.length as usize;
+    }
+
+    // If no names or streams found, nothing to do
+    if names.is_empty() && streams.is_empty() {
+        return false;
+    }
+
+    // Add names to the base record
+    // First, add all names to the names buffer and create LinkInfo entries
+    let mut link_indices: Vec<u32> = Vec::with_capacity(names.len());
+    for (name, parent_frs) in &names {
+        let name_offset = index.add_name(name);
+        let name_len = name.len();
+        let is_ascii = name.is_ascii();
+        let extension_id = index.intern_extension(name);
+        let name_ref = IndexNameRef::new(name_offset, name_len as u16, is_ascii, extension_id);
+
+        let link_idx = index.links.len() as u32;
+        index.links.push(LinkInfo {
+            next_entry: NO_ENTRY,
+            name: name_ref,
+            parent_frs: *parent_frs,
+        });
+        link_indices.push(link_idx);
+    }
+
+    // Add streams to the streams buffer
+    let mut stream_indices: Vec<u32> = Vec::with_capacity(streams.len());
+    for (stream_name, size, allocated) in &streams {
+        let name_offset = index.add_name(stream_name);
+        let name_len = stream_name.len();
+        let is_ascii = stream_name.is_ascii();
+        let extension_id = index.intern_extension(stream_name);
+        let name_ref = IndexNameRef::new(name_offset, name_len as u16, is_ascii, extension_id);
+
+        let stream_idx = index.streams.len() as u32;
+        index.streams.push(IndexStreamInfo {
+            size: SizeInfo {
+                length: *size,
+                allocated: *allocated,
+            },
+            next_entry: NO_ENTRY,
+            name: name_ref,
+            flags: 0,
+        });
+        stream_indices.push(stream_idx);
+    }
+
+    // Ensure parent directories exist for the new names
+    for (_, parent_frs) in &names {
+        if *parent_frs != base_frs && *parent_frs != 0 {
+            let _ = index.get_or_create(*parent_frs);
+        }
+    }
+
+    // Get the base record and add the names/streams to it
+    let base_frs_usize = base_frs as usize;
+    if base_frs_usize >= index.frs_to_idx.len() {
+        // Base record doesn't exist yet - create a placeholder
+        let _ = index.get_or_create(base_frs);
+    }
+
+    let record_idx = index.frs_to_idx[base_frs_usize];
+    if record_idx == NO_ENTRY {
+        // Base record doesn't exist - create it
+        let _ = index.get_or_create(base_frs);
+    }
+
+    // Now get the record and chain the new links/streams
+    let record_idx = index.frs_to_idx[base_frs_usize];
+    if record_idx != NO_ENTRY {
+        let record = &mut index.records[record_idx as usize];
+
+        // Add new links to the record
+        if !link_indices.is_empty() {
+            // Check if base record has no name (first_name is empty)
+            // This happens when the $FILE_NAME attribute is ONLY in extension records
+            if !record.first_name.name.is_valid() {
+                // Copy the first extension name directly into first_name
+                // This matches established behavior (ntfs_index.hpp lines 559-567)
+                let first_link = &index.links[link_indices[0] as usize];
+                record.first_name.name = first_link.name;
+                record.first_name.parent_frs = first_link.parent_frs;
+                // Don't increment name_count for the first name (it's already counted as 1)
+
+                // Chain remaining links (if any) to first_name.next_entry
+                if link_indices.len() > 1 {
+                    // Chain the remaining links together
+                    for i in 1..link_indices.len().saturating_sub(1) {
+                        let current_idx = link_indices[i] as usize;
+                        let next_idx = link_indices[i + 1];
+                        index.links[current_idx].next_entry = next_idx;
+                    }
+                    // Attach remaining links to first_name
+                    let record = &mut index.records[record_idx as usize];
+                    record.first_name.next_entry = link_indices[1];
+                    // Update name count for additional links only
+                    record.name_count += (link_indices.len() - 1) as u16;
+                }
+            } else {
+                // Base record already has a name - chain extension names as additional hard
+                // links Find the end of the current link chain
+                let last_link_idx = if record.first_name.next_entry != NO_ENTRY {
+                    let mut idx = record.first_name.next_entry;
+                    while index.links[idx as usize].next_entry != NO_ENTRY {
+                        idx = index.links[idx as usize].next_entry;
+                    }
+                    Some(idx)
+                } else {
+                    None
+                };
+
+                // Chain the new links together
+                for i in 0..link_indices.len().saturating_sub(1) {
+                    let current_idx = link_indices[i] as usize;
+                    let next_idx = link_indices[i + 1];
+                    index.links[current_idx].next_entry = next_idx;
+                }
+
+                // Attach to the chain
+                if let Some(last_idx) = last_link_idx {
+                    index.links[last_idx as usize].next_entry = link_indices[0];
+                } else {
+                    // first_name has no next_entry, attach directly
+                    let record = &mut index.records[record_idx as usize];
+                    record.first_name.next_entry = link_indices[0];
+                }
+
+                // Update name count
+                let record = &mut index.records[record_idx as usize];
+                record.name_count += link_indices.len() as u16;
+            }
+        }
+
+        // Chain new streams to the end of the existing stream chain
+        if !stream_indices.is_empty() {
+            let record = &mut index.records[record_idx as usize];
+
+            // Find the end of the current stream chain
+            let last_stream_idx = if record.first_stream.next_entry != NO_ENTRY {
+                let mut idx = record.first_stream.next_entry;
+                while index.streams[idx as usize].next_entry != NO_ENTRY {
+                    idx = index.streams[idx as usize].next_entry;
+                }
+                Some(idx)
+            } else {
+                None
+            };
+
+            // Chain the new streams together
+            for i in 0..stream_indices.len().saturating_sub(1) {
+                let current_idx = stream_indices[i] as usize;
+                let next_idx = stream_indices[i + 1];
+                index.streams[current_idx].next_entry = next_idx;
+            }
+
+            // Attach to the chain
+            if let Some(last_idx) = last_stream_idx {
+                index.streams[last_idx as usize].next_entry = stream_indices[0];
+            } else {
+                // first_stream has no next_entry, attach directly
+                let record = &mut index.records[record_idx as usize];
+                record.first_stream.next_entry = stream_indices[0];
+            }
+
+            // Update stream count
+            let record = &mut index.records[record_idx as usize];
+            record.stream_count += stream_indices.len() as u16;
+            record.total_stream_count += stream_indices.len() as u16;
+        }
+
+        // Merge directory index sizes from extension records
+        if dir_index_size > 0 || dir_index_allocated > 0 {
+            let record = &mut index.records[record_idx as usize];
+            // Add to the first_stream size (which represents the default stream for
+            // directories)
+            record.first_stream.size.length += dir_index_size;
+            record.first_stream.size.allocated += dir_index_allocated;
+        }
+
+        // Build parent-child relationship for names added from extension records
+        // This is critical for compute_tree_metrics() to work correctly.
+        // Get the current name_count to determine the name_index for each new name
+        let record = &index.records[record_idx as usize];
+        let existing_name_count = record.name_count;
+
+        for (name_idx, (_, parent_frs)) in names.iter().enumerate() {
+            let p_frs = *parent_frs;
+            if p_frs == base_frs || p_frs == 0 || p_frs == u64::from(NO_ENTRY) {
+                continue;
+            }
+
+            // Ensure parent exists
+            let parent_idx = {
+                let p_frs_usize = p_frs as usize;
+                if p_frs_usize >= index.frs_to_idx.len() {
+                    index.frs_to_idx.resize(p_frs_usize + 1, NO_ENTRY);
+                }
+                if index.frs_to_idx[p_frs_usize] == NO_ENTRY {
+                    // Create placeholder parent
+                    let new_idx = index.records.len() as u32;
+                    index.frs_to_idx[p_frs_usize] = new_idx;
+                    index.records.push(crate::index::FileRecord::new(p_frs));
+                }
+                index.frs_to_idx[p_frs_usize]
+            };
+
+            // Add child entry
+            // name_index is the position in the combined name list (existing + new)
+            // For extension records, the first name might replace first_name (if empty),
+            // so we need to account for that
+            let effective_name_idx = if existing_name_count == 0 {
+                // First extension name became first_name, so name_index starts at 0
+                name_idx as u16
+            } else {
+                // Extension names are appended after existing names
+                existing_name_count - 1 + name_idx as u16
+            };
+
+            let child_idx = index.children.len() as u32;
+            let parent = &mut index.records[parent_idx as usize];
+            let old_first_child = parent.first_child;
+            parent.first_child = child_idx;
+
+            index.children.push(ChildInfo {
+                next_entry: old_first_child,
+                child_frs: base_frs,
+                name_index: effective_name_idx,
+            });
+        }
+    }
+
+    !names.is_empty() || !streams.is_empty()
+}
diff --git a/crates/uffs-mft/src/reader/persistence.rs b/crates/uffs-mft/src/reader/persistence.rs
index 38a5c0215..dcad90c1c 100644
--- a/crates/uffs-mft/src/reader/persistence.rs
+++ b/crates/uffs-mft/src/reader/persistence.rs
@@ -553,4 +553,80 @@ impl MftReader {
             }
         }
     }
+
+    /// Load raw MFT from file and build `MftIndex` using direct-to-index
+    /// parser.
+    ///
+    /// This is a single-pass implementation that parses records directly into
+    /// the index without creating intermediate `ParsedRecord` allocations. It
+    /// uses the modernized `parse_record_to_index()` from Wave 1.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the raw file cannot be loaded or if record parsing
+    /// or index construction fails.
+    pub fn load_raw_to_index_direct<P: AsRef<Path>>(
+        path: P,
+        options: &crate::raw::LoadRawOptions,
+    ) -> Result<crate::index::MftIndex> {
+        use std::time::Instant;
+
+        use tracing::info;
+
+        use crate::index::MftIndex;
+        use crate::parse::{apply_fixup, parse_record_to_index};
+
+        let parse_start = Instant::now();
+
+        // Load raw MFT data
+        let mut raw = crate::raw::load_raw_mft(path, options)?;
+        let capacity = usize::try_from(raw.header.record_count).unwrap_or(0);
+        let total_records_in_file = capacity;
+        let record_size = raw.header.record_size as usize;
+
+        // Create index with pre-allocated capacity
+        let mut index = MftIndex::with_capacity(raw.header.volume_letter, capacity);
+
+        // Parse records directly into index
+        let mut fixup_success: u64 = 0;
+        let mut fixup_failed: u64 = 0;
+        let mut records_added: u64 = 0;
+
+        let buffer_slice = raw.data.as_mut_slice();
+        for (frs, chunk) in buffer_slice.chunks_exact_mut(record_size).enumerate() {
+            // Apply fixup in place
+            if !apply_fixup(chunk) {
+                fixup_failed += 1;
+                continue;
+            }
+            fixup_success += 1;
+
+            // Parse record directly into index
+            // parse_record_to_index handles both base and extension records internally
+            let added = parse_record_to_index(chunk, frs as u64, &mut index);
+            if added {
+                records_added += 1;
+            }
+        }
+
+        // Compute tree metrics
+        index.compute_tree_metrics();
+
+        // Sort directory children
+        index.sort_directory_children();
+
+        let parse_time = parse_start.elapsed();
+
+        info!(
+            total_records_in_file,
+            parse_ms = parse_time.as_millis(),
+            fixup_success,
+            fixup_failed,
+            records_added,
+            final_index_size = index.len(),
+            "Direct-to-index parse complete"
+        );
+
+        Ok(index)
+    }
 }
diff --git a/scripts/ci/file_size_exceptions.txt b/scripts/ci/file_size_exceptions.txt
index e03762a3c..164d87d59 100644
--- a/scripts/ci/file_size_exceptions.txt
+++ b/scripts/ci/file_size_exceptions.txt
@@ -5,4 +5,4 @@ crates/uffs-diag/src/bin/compare_scan_parity.rs|Diagnostic parity pipeline remai
 crates/uffs-cli/src/commands/output.rs|Output formatting module with comprehensive test suite for DataFrame/native output parity and footer formatting.
 crates/uffs-cli/src/commands/raw_io.rs|I/O coordination module consolidating MFT reading, query filtering, and multi-drive orchestration logic.
 crates/uffs-mft/src/io/parser/index.rs|Single-pass direct-to-index parser (C++-style inline approach). Monolithic by design for IOCP hot path - handles all NTFS attribute types inline.
-crates/uffs-mft/src/io/parser/index_extension.rs|Extension record parser for direct-to-index path. Handles all attribute types from extension records - matches index.rs completeness.
\ No newline at end of file
+crates/uffs-mft/src/parse/direct_index.rs|Cross-platform single-pass direct-to-index parser. Monolithic by design for hot path - handles all NTFS attribute types inline.
\ No newline at end of file

From 56e2b17905e48783d640fbf77f405c3e8a8a1715 Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 04:27:51 -0700
Subject: [PATCH 3/8] fix(lint): correct module-level lint expectations in
 direct-to-index parsers

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/uffs-mft/src/parse/direct_index.rs           | 12 ------------
 crates/uffs-mft/src/parse/direct_index_extension.rs | 12 ++----------
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/crates/uffs-mft/src/parse/direct_index.rs b/crates/uffs-mft/src/parse/direct_index.rs
index cf86d5a6f..973bd8a84 100644
--- a/crates/uffs-mft/src/parse/direct_index.rs
+++ b/crates/uffs-mft/src/parse/direct_index.rs
@@ -38,26 +38,14 @@
     clippy::shadow_unrelated,
     reason = "reusing common names like 'record' in nested scopes is idiomatic here"
 )]
-#![expect(
-    clippy::single_call_fn,
-    reason = "parse_extension_to_index is a separate function for code organization"
-)]
 #![expect(
     clippy::let_underscore_untyped,
     reason = "let _ = expr is used for intentionally ignoring results"
 )]
-#![expect(
-    clippy::if_not_else,
-    reason = "!condition checks are clearer for NTFS flag testing"
-)]
 #![expect(
     clippy::explicit_iter_loop,
     reason = ".iter() is explicit and intentional"
 )]
-#![expect(
-    clippy::if_then_some_else_none,
-    reason = "explicit if/else is clearer than bool::then in complex NTFS logic"
-)]
 
 use core::mem::size_of;
 
diff --git a/crates/uffs-mft/src/parse/direct_index_extension.rs b/crates/uffs-mft/src/parse/direct_index_extension.rs
index aaea8e5d9..b4da286db 100644
--- a/crates/uffs-mft/src/parse/direct_index_extension.rs
+++ b/crates/uffs-mft/src/parse/direct_index_extension.rs
@@ -30,21 +30,13 @@
     clippy::if_not_else,
     reason = "!condition checks are clearer for NTFS flag testing"
 )]
-#![expect(
-    clippy::unseparated_literal_suffix,
-    reason = "literal suffixes like 0u32 are common in NTFS struct parsing"
-)]
-#![expect(
-    clippy::doc_markdown,
-    reason = "NTFS terminology like MftIndex does not need backticks in internal docs"
-)]
 #![expect(
     clippy::if_then_some_else_none,
     reason = "explicit if/else is clearer than bool::then in complex NTFS logic"
 )]
 #![expect(
-    clippy::explicit_iter_loop,
-    reason = ".iter() is explicit and intentional"
+    clippy::single_call_fn,
+    reason = "parse_extension_to_index is a separate function for code organization"
 )]
 
 use core::mem::size_of;

From 61f031e2ba2c380b065ad74879e0166d7de60024 Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 04:53:36 -0700
Subject: [PATCH 4/8] feat(mft): wire IOCP LIVE reader to use direct-to-index
 parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the multi-pass pipeline (parse_record_full → MftRecordMerger →
from_parsed_records) with single-pass direct-to-index parsing in the
SlidingIocpInline path.

Changes:
- Pre-allocate MftIndex upfront instead of MftRecordMerger
- Call parse_record_to_index() directly during I/O completions
- Eliminate intermediate ParsedRecord allocation and merge phase
- Simplify logging (no separate io_ms/merge_ms split)

This completes Wave 3 — the IOCP path now uses the same zero-copy
parser as the file-based reader (Wave 2), eliminating redundant
allocations and improving parity across code paths.

Tests: 105/105 pass, just check clean, just lint-prod clean

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .../src/io/readers/parallel/to_index.rs       | 32 +++++--------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/crates/uffs-mft/src/io/readers/parallel/to_index.rs b/crates/uffs-mft/src/io/readers/parallel/to_index.rs
index 78885139c..f4e42c9a9 100644
--- a/crates/uffs-mft/src/io/readers/parallel/to_index.rs
+++ b/crates/uffs-mft/src/io/readers/parallel/to_index.rs
@@ -177,8 +177,8 @@ impl ParallelMftReader {
             "📊 Generated I/O operations for inline parsing"
         );
 
-        // Create merger to accumulate parsed records (unified pipeline)
-        let mut merger = MftRecordMerger::with_capacity(total_records);
+        // Pre-allocate MftIndex and build it incrementally during I/O
+        let mut index = MftIndex::with_capacity(volume, estimated_records);
 
         // Create IOCP
         let read_start = std::time::Instant::now();
@@ -338,7 +338,7 @@ impl ParallelMftReader {
                     // only project a mutable reference without moving the allocation.
                     let op_mut = unsafe { completed_op.as_mut().get_unchecked_mut() };
 
-                    // UNIFIED PIPELINE: parse_record_full() → MftRecordMerger
+                    // DIRECT-TO-INDEX: parse records directly into MftIndex
                     let buffer_slice =
                         &mut op_mut.buffer.as_mut_slice()[..bytes_transferred as usize];
                     let records_in_buffer = bytes_transferred as usize / record_size;
@@ -361,12 +361,10 @@ impl ParallelMftReader {
                             continue;
                         }
 
-                        // Parse using unified pipeline and accumulate in merger
-                        let result = parse_record_full(record_slice, frs);
-                        if !matches!(result, ParseResult::Skip) {
+                        // Parse directly into index (single-pass, no intermediates)
+                        if parse_record_to_index(record_slice, frs, &mut index) {
                             records_parsed += 1;
                         }
-                        merger.add_result(result);
                     }
 
                     bytes_read_total += bytes_transferred as u64;
@@ -432,27 +430,13 @@ impl ParallelMftReader {
             }
         }
 
-        let io_ms = read_start.elapsed().as_millis();
-        info!(
-            io_ms,
-            bytes_mb = bytes_read_total / (1024 * 1024),
-            records_parsed,
-            base_records = merger.base_count(),
-            extensions = merger.extension_count(),
-            "✅ Sliding window IOCP I/O + parse complete, merging..."
-        );
-
-        // Merge extensions and build index using unified pipeline
-        let parsed_records = merger.merge();
-        let index = MftIndex::from_parsed_records(volume, parsed_records);
-
         let total_ms = read_start.elapsed().as_millis();
         info!(
             total_ms,
-            io_ms,
-            merge_ms = total_ms - io_ms,
+            bytes_mb = bytes_read_total / (1024 * 1024),
+            records_parsed,
             index_entries = index.records.len(),
-            "✅ Sliding window IOCP with unified pipeline complete"
+            "✅ Sliding window IOCP with direct-to-index parsing complete"
         );
 
         Ok(index)

From 3be0666ee29d796b3e52e25bd987cf13bb4887da Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 05:05:31 -0700
Subject: [PATCH 5/8] feat(mft): add UFFS_LEGACY_PARSE escape hatch and
 document legacy pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 4 cleanup: Remove legacy multi-pass pipeline from hot path.

- Added UFFS_LEGACY_PARSE=1 environment variable to force legacy pipeline
  - When set, forces SlidingIocp mode instead of SlidingIocpInline
  - Allows debugging/comparison with old parse_record_full path

- Documented legacy pipeline components with clear markers:
  - parse_record_full(): part of old parse → merger → from_parsed_records
  - MftRecordMerger: merges extension records in legacy path
  - from_parsed_records(): final stage of legacy multi-pass pipeline
  - read_all_parallel_with_progress(): uses legacy pipeline

- Legacy pipeline still used by:
  - Legacy read modes (Parallel, Pipelined, PipelinedParallel, SlidingIocp)
  - File-based readers (load_raw_to_index_with_options)
  - Tests and diagnostic tools
  - UFFS_LEGACY_PARSE=1 escape hatch

- Hot path (SlidingIocpInline) bypasses legacy pipeline entirely:
  - Uses direct-to-index parsers (parse_record_to_index)
  - Builds index incrementally during I/O
  - Creates parent placeholders on-demand
  - No intermediate Vec<ParsedRecord> allocation

Verified: just check, just lint-prod, cargo test -p uffs-mft all pass

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 crates/uffs-mft/src/index/builder.rs           | 15 +++++++++++++--
 crates/uffs-mft/src/io/readers/parallel/mod.rs |  6 ++++++
 crates/uffs-mft/src/parse/full.rs              |  9 +++++++++
 crates/uffs-mft/src/parse/merger.rs            | 10 ++++++++++
 crates/uffs-mft/src/reader/index_read.rs       | 17 ++++++++++++++++-
 5 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/crates/uffs-mft/src/index/builder.rs b/crates/uffs-mft/src/index/builder.rs
index a495cd55e..49b6e91cb 100644
--- a/crates/uffs-mft/src/index/builder.rs
+++ b/crates/uffs-mft/src/index/builder.rs
@@ -12,8 +12,19 @@ use super::{
 impl MftIndex {
     /// Build an `MftIndex` from a vector of parsed records.
     ///
-    /// This is the fast path - directly builds the lean index without
-    /// going through Polars `DataFrame`.
+    /// **LEGACY MULTI-PASS PIPELINE:** This function is the final stage of the
+    /// old `parse_record_full → MftRecordMerger → from_parsed_records`
+    /// pipeline. The hot path (`SlidingIocpInline`) now uses direct-to-index
+    /// parsers that build the index incrementally during I/O, skipping this
+    /// separate build phase. This function is still used by:
+    /// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`,
+    ///   `SlidingIocp`)
+    /// - File-based readers (`load_raw_to_index_with_options`)
+    /// - Tests and diagnostic tools
+    /// - `UFFS_LEGACY_PARSE=1` escape hatch
+    ///
+    /// This directly builds the lean index without going through Polars
+    /// `DataFrame`.
     ///
     /// Works on all platforms - uses cross-platform `ParsedRecord` from parse
     /// module.
diff --git a/crates/uffs-mft/src/io/readers/parallel/mod.rs b/crates/uffs-mft/src/io/readers/parallel/mod.rs
index 4dfa39ba9..95535a32e 100644
--- a/crates/uffs-mft/src/io/readers/parallel/mod.rs
+++ b/crates/uffs-mft/src/io/readers/parallel/mod.rs
@@ -218,6 +218,12 @@ impl ParallelMftReader {
 
     /// Reads and parses all MFT records in parallel with progress callback.
     ///
+    /// **LEGACY MULTI-PASS PIPELINE:** This function uses
+    /// `parse_record_full → MftRecordMerger → Vec<ParsedRecord>`.
+    /// The hot path (`SlidingIocpInline`) uses direct-to-index parsing instead.
+    /// This function is used by legacy read modes (`Parallel`, `Auto` when not
+    /// inline).
+    ///
     /// This function handles extension records by merging their attributes
     /// into the base records, matching the legacy implementation behavior.
     /// The progress callback is called during the I/O phase with (bytes_read,
diff --git a/crates/uffs-mft/src/parse/full.rs b/crates/uffs-mft/src/parse/full.rs
index c875c6f25..7a5e9c0bf 100644
--- a/crates/uffs-mft/src/parse/full.rs
+++ b/crates/uffs-mft/src/parse/full.rs
@@ -11,6 +11,15 @@ use crate::ntfs::{ExtendedStandardInfo, NameInfo, ReparsePointHeader, StreamInfo
 
 /// Parses an MFT record and extracts relevant information.
 ///
+/// **LEGACY MULTI-PASS PIPELINE:** This function is part of the old
+/// `parse_record_full → MftRecordMerger → from_parsed_records` pipeline.
+/// The hot path (`SlidingIocpInline`) now uses direct-to-index parsers that
+/// skip this intermediate allocation. This function is still used by:
+/// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`, `SlidingIocp`)
+/// - File-based readers (`load_raw_to_index_with_options`)
+/// - Tests and diagnostic tools
+/// - `UFFS_LEGACY_PARSE=1` escape hatch
+///
 /// This function handles both base records and extension records.
 /// Extension records return `ParseResult::Extension` which must be
 /// merged into the base record later.
diff --git a/crates/uffs-mft/src/parse/merger.rs b/crates/uffs-mft/src/parse/merger.rs
index cff5a5416..3a839b2e7 100644
--- a/crates/uffs-mft/src/parse/merger.rs
+++ b/crates/uffs-mft/src/parse/merger.rs
@@ -5,6 +5,16 @@ use crate::ntfs::StreamInfo;
 
 /// Merges extension record attributes into base records.
 ///
+/// **LEGACY MULTI-PASS PIPELINE:** This type is part of the old
+/// `parse_record_full → MftRecordMerger → from_parsed_records` pipeline.
+/// The hot path (`SlidingIocpInline`) now uses direct-to-index parsers that
+/// create parent placeholders on-demand without this intermediate allocation.
+/// This merger is still used by:
+/// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`, `SlidingIocp`)
+/// - File-based readers (`load_raw_to_index_with_options`)
+/// - Tests and diagnostic tools
+/// - `UFFS_LEGACY_PARSE=1` escape hatch
+///
 /// This implements the C++ behavior where attributes from extension
 /// records are merged into their base records.
 ///
diff --git a/crates/uffs-mft/src/reader/index_read.rs b/crates/uffs-mft/src/reader/index_read.rs
index 516fa718a..a78d5ab40 100644
--- a/crates/uffs-mft/src/reader/index_read.rs
+++ b/crates/uffs-mft/src/reader/index_read.rs
@@ -262,6 +262,15 @@ impl MftReader {
         use crate::platform::detect_drive_type;
 
         tracing::debug!(volume = %self.volume, "[TRIP] reader::read_mft_index_internal ENTER");
+
+        // Check for legacy parse mode escape hatch
+        // UFFS_LEGACY_PARSE=1 forces the old multi-pass pipeline for
+        // debugging/comparison
+        let use_legacy_parse = std::env::var("UFFS_LEGACY_PARSE").is_ok();
+        if use_legacy_parse {
+            warn!(volume = %self.volume, "⚠️  UFFS_LEGACY_PARSE=1 detected - using legacy multi-pass pipeline");
+        }
+
         info!(volume = %self.volume, "Starting MFT read (lean index)");
 
         let start_time = Instant::now();
@@ -328,7 +337,13 @@ impl MftReader {
         // For lean index (MftIndex), use SlidingIocpInline for NVMe/SSD - this uses
         // IOCP with multiple reads in flight and inline parsing, matching C++
         // performance.
-        let effective_mode = index_effective_mode(self.mode, drive_type);
+        let mut effective_mode = index_effective_mode(self.mode, drive_type);
+
+        // Apply legacy parse mode override if escape hatch is set
+        if use_legacy_parse && effective_mode == MftReadMode::SlidingIocpInline {
+            warn!("🔄 Forcing SlidingIocp mode (legacy pipeline) due to UFFS_LEGACY_PARSE=1");
+            effective_mode = MftReadMode::SlidingIocp;
+        }
 
         info!(mode = %effective_mode, "🚀 Using read mode (lean index)");
 

From 5bcb9dec8c2eab17201ed60332bc90e78ae611b9 Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 05:14:49 -0700
Subject: [PATCH 6/8] feat(mft): add I/O overlap timing instrumentation to IOCP
 reader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add wait_ms/parse_ms/overlap_pct metrics to the sliding window IOCP
reader for measuring I/O overlap effectiveness on Windows. The IOCP
sliding window already provides optimal overlap (matching C++ design),
so no structural changes needed — just observability.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../src/io/readers/parallel/to_index.rs       | 57 ++++++++++++++++++-
 crates/uffs-mft/src/parse/full.rs             |  3 +-
 crates/uffs-mft/src/parse/merger.rs           |  3 +-
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/crates/uffs-mft/src/io/readers/parallel/to_index.rs b/crates/uffs-mft/src/io/readers/parallel/to_index.rs
index f4e42c9a9..89d6629c4 100644
--- a/crates/uffs-mft/src/io/readers/parallel/to_index.rs
+++ b/crates/uffs-mft/src/io/readers/parallel/to_index.rs
@@ -13,6 +13,33 @@ impl ParallelMftReader {
     /// This eliminates the separate parse and index build phases, saving ~7s
     /// on large MFTs by overlapping CPU work with I/O.
     ///
+    /// # I/O Overlap Architecture
+    ///
+    /// This function achieves true I/O-compute overlap using IOCP's sliding
+    /// window:
+    ///
+    /// 1. **Multiple I/O in flight**: Maintains 2-8 concurrent I/O operations
+    ///    (adaptive based on drive type). While one operation completes, others
+    ///    are still reading from disk.
+    ///
+    /// 2. **Inline parsing**: When `GetQueuedCompletionStatus` returns, the
+    ///    completion handler immediately applies fixup and parses records
+    ///    directly into the index. Critically, this parse happens while other
+    ///    I/O operations remain in flight.
+    ///
+    /// 3. **Immediate requeue**: After parsing completes, the buffer is
+    ///    recycled and the next I/O operation is queued immediately,
+    ///    maintaining the sliding window.
+    ///
+    /// Parse time per chunk is typically <1ms, so parsing on the IOCP
+    /// completion thread is optimal—it avoids thread synchronization
+    /// overhead and maintains cache locality. The overlap comes from having
+    /// multiple chunks in flight, not from multi-threaded parsing.
+    ///
+    /// Timing instrumentation (added for profiling) logs `wait_ms`, `parse_ms`,
+    /// and `overlap_pct` to quantify how much parse work was hidden behind
+    /// I/O latency.
+    ///
     /// # Arguments
     ///
     /// * `overlapped_handle` - IOCP handle for async I/O
@@ -263,6 +290,10 @@ impl ParallelMftReader {
         let bitmap_ref = self.bitmap.as_ref();
         let mut last_completion_at = Instant::now();
 
+        // Timing instrumentation for I/O overlap analysis
+        let mut total_wait_time_ns = 0u64;
+        let mut total_parse_time_ns = 0u64;
+
         const WAIT_OPERATION: &str = "read_all_sliding_window_iocp_to_index";
 
         while completed_count < total_io_ops {
@@ -271,6 +302,9 @@ impl ParallelMftReader {
             let mut overlapped_ptr: *mut windows::Win32::System::IO::OVERLAPPED =
                 std::ptr::null_mut();
 
+            // Time I/O wait (GetQueuedCompletionStatus)
+            let wait_start = Instant::now();
+
             // SAFETY: `iocp.raw_handle()` is a live completion port and all out-pointers
             // reference writable stack storage for the duration of the wait.
             let result = unsafe {
@@ -283,6 +317,8 @@ impl ParallelMftReader {
                 )
             };
 
+            total_wait_time_ns += wait_start.elapsed().as_nanos() as u64;
+
             if result.is_err() {
                 let last_error = unsafe { GetLastError() };
                 if last_error.0 == WAIT_TIMEOUT_ERROR_CODE {
@@ -338,6 +374,9 @@ impl ParallelMftReader {
                     // only project a mutable reference without moving the allocation.
                     let op_mut = unsafe { completed_op.as_mut().get_unchecked_mut() };
 
+                    // Time parse phase (fixup + parse_record_to_index)
+                    let parse_start = Instant::now();
+
                     // DIRECT-TO-INDEX: parse records directly into MftIndex
                     let buffer_slice =
                         &mut op_mut.buffer.as_mut_slice()[..bytes_transferred as usize];
@@ -367,6 +406,8 @@ impl ParallelMftReader {
                         }
                     }
 
+                    total_parse_time_ns += parse_start.elapsed().as_nanos() as u64;
+
                     bytes_read_total += bytes_transferred as u64;
                     completed_count += 1;
 
@@ -431,12 +472,26 @@ impl ParallelMftReader {
         }
 
         let total_ms = read_start.elapsed().as_millis();
+        let wait_ms = total_wait_time_ns / 1_000_000;
+        let parse_ms = total_parse_time_ns / 1_000_000;
+
+        // Calculate overlap efficiency: if wait_ms + parse_ms > total_ms,
+        // then we had effective overlap (parse happened while other I/O was in flight)
+        let overlap_pct = if total_ms > 0 {
+            ((wait_ms + parse_ms).saturating_sub(total_ms) as f64 / total_ms as f64) * 100.0
+        } else {
+            0.0
+        };
+
         info!(
             total_ms,
+            wait_ms,
+            parse_ms,
+            overlap_pct = format!("{:.1}%", overlap_pct),
             bytes_mb = bytes_read_total / (1024 * 1024),
             records_parsed,
             index_entries = index.records.len(),
-            "✅ Sliding window IOCP with direct-to-index parsing complete"
+            "✅ Sliding window IOCP with direct-to-index parsing complete (I/O overlap analysis)"
         );
 
         Ok(index)
diff --git a/crates/uffs-mft/src/parse/full.rs b/crates/uffs-mft/src/parse/full.rs
index 7a5e9c0bf..07bc8874b 100644
--- a/crates/uffs-mft/src/parse/full.rs
+++ b/crates/uffs-mft/src/parse/full.rs
@@ -15,7 +15,8 @@ use crate::ntfs::{ExtendedStandardInfo, NameInfo, ReparsePointHeader, StreamInfo
 /// `parse_record_full → MftRecordMerger → from_parsed_records` pipeline.
 /// The hot path (`SlidingIocpInline`) now uses direct-to-index parsers that
 /// skip this intermediate allocation. This function is still used by:
-/// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`, `SlidingIocp`)
+/// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`,
+///   `SlidingIocp`)
 /// - File-based readers (`load_raw_to_index_with_options`)
 /// - Tests and diagnostic tools
 /// - `UFFS_LEGACY_PARSE=1` escape hatch
diff --git a/crates/uffs-mft/src/parse/merger.rs b/crates/uffs-mft/src/parse/merger.rs
index 3a839b2e7..04a58fe0a 100644
--- a/crates/uffs-mft/src/parse/merger.rs
+++ b/crates/uffs-mft/src/parse/merger.rs
@@ -10,7 +10,8 @@ use crate::ntfs::StreamInfo;
 /// The hot path (`SlidingIocpInline`) now uses direct-to-index parsers that
 /// create parent placeholders on-demand without this intermediate allocation.
 /// This merger is still used by:
-/// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`, `SlidingIocp`)
+/// - Legacy read modes (`Parallel`, `Pipelined`, `PipelinedParallel`,
+///   `SlidingIocp`)
 /// - File-based readers (`load_raw_to_index_with_options`)
 /// - Tests and diagnostic tools
 /// - `UFFS_LEGACY_PARSE=1` escape hatch

From 8ede65b5b8eff3a0979b096c875271a27fb9907e Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 05:20:32 -0700
Subject: [PATCH 7/8] feat(mft): add bitmap-based pre-allocation matching C++
 strategy

Add `MftIndex::with_capacity_optimized()` that pre-allocates ALL vectors
based on MFT bitmap statistics to eliminate Vec resizing during the hot
parse loop. This matches the C++ pre-allocation strategy.

Pre-allocation ratios (matching C++ ntfs_index_accessors.hpp lines 525-544):
- records: estimated_records + 5% safety margin
- frs_to_idx: max_frs + 1 (sparse lookup array)
- names: estimated_records * 23 (~23 chars avg)
- links: estimated_records / 16 (6% have hardlinks)
- streams: estimated_records / 4 (25% have ADS)
- internal_streams: estimated_records / 20 (5% internal)
- children: estimated_records * 3/2 (dirs have multiple children)

Added MftBitmap::max_frs_in_use() to scan backwards and find the highest
in-use FRS number, used for frs_to_idx sizing.

IOCP direct-to-index reader now uses optimized pre-allocation.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 crates/uffs-mft/src/index/base.rs             | 52 +++++++++++++++++++
 .../src/io/readers/parallel/to_index.rs       | 13 +++--
 crates/uffs-mft/src/platform/bitmap.rs        | 17 ++++++
 3 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/crates/uffs-mft/src/index/base.rs b/crates/uffs-mft/src/index/base.rs
index 21ccd888f..b94934732 100644
--- a/crates/uffs-mft/src/index/base.rs
+++ b/crates/uffs-mft/src/index/base.rs
@@ -35,6 +35,58 @@ impl MftIndex {
         }
     }
 
+    /// Create with optimized pre-allocation matching C++ ratios.
+    ///
+    /// This method pre-allocates all vectors based on the MFT bitmap popcount
+    /// to eliminate Vec resizing during the parse loop. The sizing ratios match
+    /// the C++ implementation in `ntfs_index_accessors.hpp` lines 525-544.
+    ///
+    /// # Arguments
+    ///
+    /// * `volume` - Volume letter (e.g., 'C')
+    /// * `estimated_records` - Number of valid records from bitmap popcount
+    /// * `max_frs` - Highest FRS number from bitmap (used for `frs_to_idx`
+    ///   sizing)
+    ///
+    /// # Pre-allocation Strategy
+    ///
+    /// - `records`: `estimated_records * 1.05` (5% safety margin for
+    ///   placeholders)
+    /// - `frs_to_idx`: `max_frs + 1` (sparse array indexed by FRS)
+    /// - `names`: `estimated_records * 23` (~23 chars avg per name)
+    /// - `links`: `estimated_records / 16` (~6% have hardlinks)
+    /// - `streams`: `estimated_records / 4` (~25% have additional streams)
+    /// - `internal_streams`: `estimated_records / 20` (~5% have internal
+    ///   streams)
+    /// - `children`: `estimated_records * 3 / 2` (directories have multiple
+    ///   children)
+    #[must_use]
+    pub fn with_capacity_optimized(volume: char, estimated_records: usize, max_frs: u64) -> Self {
+        // Safety margin for placeholder records added during path resolution
+        let records_capacity = estimated_records + (estimated_records / 20);
+
+        // frs_to_idx is a sparse lookup array indexed by FRS
+        let frs_to_idx_capacity = usize::try_from(max_frs)
+            .ok()
+            .and_then(|max_frs_usize| max_frs_usize.checked_add(1))
+            .unwrap_or(estimated_records);
+
+        Self {
+            volume,
+            records: Vec::with_capacity(records_capacity),
+            frs_to_idx: Vec::with_capacity(frs_to_idx_capacity),
+            names: String::with_capacity(estimated_records * 23),
+            links: Vec::with_capacity(estimated_records / 16),
+            streams: Vec::with_capacity(estimated_records / 4),
+            internal_streams: Vec::with_capacity(estimated_records / 20),
+            children: Vec::with_capacity(estimated_records * 3 / 2),
+            stats: MftStats::new(),
+            extensions: ExtensionTable::new(),
+            extension_index: None,
+            forensic_mode: false,
+        }
+    }
+
     /// Recompute stats from the current index data.
     ///
     /// This is useful after deserializing an index from disk,
diff --git a/crates/uffs-mft/src/io/readers/parallel/to_index.rs b/crates/uffs-mft/src/io/readers/parallel/to_index.rs
index 89d6629c4..d8b488a97 100644
--- a/crates/uffs-mft/src/io/readers/parallel/to_index.rs
+++ b/crates/uffs-mft/src/io/readers/parallel/to_index.rs
@@ -181,10 +181,11 @@ impl ParallelMftReader {
         }
 
         let total_io_ops = io_ops.len();
-        let estimated_records = if let Some(ref bm) = self.bitmap {
-            bm.count_in_use()
+        let (estimated_records, max_frs) = if let Some(ref bm) = self.bitmap {
+            (bm.count_in_use(), bm.max_frs_in_use())
         } else {
-            total_records
+            // No bitmap: use total records as both count and max FRS
+            (total_records, total_records.saturating_sub(1) as u64)
         };
 
         // Calculate total bytes to read and max I/O size for buffer allocation
@@ -198,14 +199,16 @@ impl ParallelMftReader {
         info!(
             io_ops = total_io_ops,
             estimated_records,
+            max_frs,
             bytes_to_read_mb = total_bytes_to_read / (1024 * 1024),
             max_io_size_kb = max_io_size / 1024,
             direct_io = use_direct_chunk_io,
             "📊 Generated I/O operations for inline parsing"
         );
 
-        // Pre-allocate MftIndex and build it incrementally during I/O
-        let mut index = MftIndex::with_capacity(volume, estimated_records);
+        // Pre-allocate MftIndex with C++-matching ratios to eliminate resizing during
+        // parse
+        let mut index = MftIndex::with_capacity_optimized(volume, estimated_records, max_frs);
 
         // Create IOCP
         let read_start = std::time::Instant::now();
diff --git a/crates/uffs-mft/src/platform/bitmap.rs b/crates/uffs-mft/src/platform/bitmap.rs
index efe19239a..caa2899df 100644
--- a/crates/uffs-mft/src/platform/bitmap.rs
+++ b/crates/uffs-mft/src/platform/bitmap.rs
@@ -59,6 +59,23 @@ impl MftBitmap {
             .sum()
     }
 
+    /// Returns the highest FRS number that is marked as in use.
+    ///
+    /// This scans the bitmap backwards to find the last set bit.
+    /// Returns 0 if no records are in use.
+    #[must_use]
+    pub fn max_frs_in_use(&self) -> u64 {
+        // Scan backwards through bytes to find the last non-zero byte
+        for (byte_idx, &byte) in self.data.iter().enumerate().rev() {
+            if byte != 0 {
+                // Found a non-zero byte, find the highest bit set
+                let bit_idx = 7 - byte.leading_zeros() as usize;
+                return (byte_idx * 8 + bit_idx) as u64;
+            }
+        }
+        0
+    }
+
     /// Returns the total number of records this bitmap covers.
     #[must_use]
     pub fn record_count(&self) -> usize {

From a105fb9e918823ef995c787ca4cdb923b070e468 Mon Sep 17 00:00:00 2001
From: Robert M1 <50460704+githubrobbi@users.noreply.github.com>
Date: Sat, 14 Mar 2026 05:21:56 -0700
Subject: [PATCH 8/8] perf(mft): use SmallVec for UTF-16 filename decode in hot
 path

Replace Vec<u16> with SmallVec<[u16; 64]> for UTF-16 filename decoding
in both direct-to-index parsers. This avoids heap allocation for typical
filenames (<= 64 chars), reducing per-record overhead in the hot parse loop.

Matches the optimization already present in the full parser (parse/full.rs).

Files modified:
- crates/uffs-mft/src/io/parser/index.rs (IOCP hot path)
- crates/uffs-mft/src/parse/direct_index.rs (file-based hot path)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 crates/uffs-mft/src/io/parser/index.rs    | 3 ++-
 crates/uffs-mft/src/parse/direct_index.rs | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/crates/uffs-mft/src/io/parser/index.rs b/crates/uffs-mft/src/io/parser/index.rs
index 8e58c6d50..f341a0230 100644
--- a/crates/uffs-mft/src/io/parser/index.rs
+++ b/crates/uffs-mft/src/io/parser/index.rs
@@ -161,7 +161,8 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
                         if name_bytes_offset + name_len * 2 <= data.len() {
                             let name_bytes =
                                 &data[name_bytes_offset..name_bytes_offset + name_len * 2];
-                            let name_u16: Vec<u16> = name_bytes
+                            // SmallVec avoids heap allocation for typical filenames (<= 64 chars)
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
                                 .chunks_exact(2)
                                 .map(|c| u16::from_le_bytes([c[0], c[1]]))
                                 .collect();
diff --git a/crates/uffs-mft/src/parse/direct_index.rs b/crates/uffs-mft/src/parse/direct_index.rs
index 973bd8a84..8845ca017 100644
--- a/crates/uffs-mft/src/parse/direct_index.rs
+++ b/crates/uffs-mft/src/parse/direct_index.rs
@@ -198,7 +198,8 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
                         if name_bytes_offset + name_len * 2 <= data.len() {
                             let name_bytes =
                                 &data[name_bytes_offset..name_bytes_offset + name_len * 2];
-                            let name_u16: Vec<u16> = name_bytes
+                            // SmallVec avoids heap allocation for typical filenames (<= 64 chars)
+                            let name_u16: SmallVec<[u16; 64]> = name_bytes
                                 .chunks_exact(2)
                                 .map(|c| u16::from_le_bytes([c[0], c[1]]))
                                 .collect();