From e90aade0d62229001268947800510de281fda6a0 Mon Sep 17 00:00:00 2001 From: Robert M1 <50460704+githubrobbi@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:41:12 -0700 Subject: [PATCH] fix(mft): preserve extension first_stream.size and dir_index in LIVE parser Two root causes behind 16,517 directory size differences (Bug C): 1. IOCP out-of-order I/O: when an extension record is parsed before its base record, the base overwrites first_stream.size with 0 (losing the extension's $DATA size). Now snapshot and restore first_stream.size alongside the other extension-data snapshots. 2. Extension records with only $I30 attributes were silently skipped by the early-return guard (which didn't check dir_index_size). Now include dir_index_size/dir_index_allocated in both the guard and the return value. Co-Authored-By: Claude Opus 4.6 --- crates/uffs-mft/src/io/parser/index.rs | 21 +++++++++++++++++++ .../uffs-mft/src/io/parser/index_extension.rs | 8 +++++-- .../src/parse/direct_index_extension.rs | 16 +++++++++++--- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/crates/uffs-mft/src/io/parser/index.rs b/crates/uffs-mft/src/io/parser/index.rs index b82138af9..6d20f18d7 100644 --- a/crates/uffs-mft/src/io/parser/index.rs +++ b/crates/uffs-mft/src/io/parser/index.rs @@ -723,12 +723,20 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf let ext_internal_head = record.first_internal_stream; let ext_internal_size = record.internal_streams_size; let ext_internal_alloc = record.internal_streams_allocated; + // Snapshot first_stream.size from extension records (IOCP ordering). + let ext_first_stream_len = record.first_stream.size.length; + let ext_first_stream_alloc = record.first_stream.size.allocated; record.stdinfo = std_info; record.first_stream.size = SizeInfo { length: default_size, allocated: default_allocated, }; + // Restore extension's default-stream size if base has no $DATA/$I30. + if default_size == 0 && default_allocated == 0 && (ext_first_stream_len > 0 || ext_first_stream_alloc > 0) { + record.first_stream.size.length = ext_first_stream_len; + record.first_stream.size.allocated = ext_first_stream_alloc; + } // Set type_name_id for first_stream: 0 for directories ($I30), 8 for files // ($DATA) record.first_stream.flags = if record.stdinfo.is_directory() { @@ -919,12 +927,25 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf let ext_internal_head = record.first_internal_stream; let ext_internal_size = record.internal_streams_size; let ext_internal_alloc = record.internal_streams_allocated; + // Snapshot first_stream.size — extension records processed before the base + // (due to IOCP out-of-order I/O) may have already set the default $DATA or + // $I30 size. We must preserve it if the base record has no $DATA. + let ext_first_stream_len = record.first_stream.size.length; + let ext_first_stream_alloc = record.first_stream.size.allocated; record.stdinfo = std_info; record.first_stream.size = SizeInfo { length: default_size, allocated: default_allocated, }; + // If the base record has no $DATA (default_size == 0 and default_allocated + // == 0) but an extension record already populated first_stream.size, restore + // the extension's values. This handles files/dirs whose primary $DATA/$I30 + // attribute resides entirely in an extension record. + if default_size == 0 && default_allocated == 0 && (ext_first_stream_len > 0 || ext_first_stream_alloc > 0) { + record.first_stream.size.length = ext_first_stream_len; + record.first_stream.size.allocated = ext_first_stream_alloc; + } // Set type_name_id for first_stream: 0 for directories ($I30), 8 for files // ($DATA) record.first_stream.flags = if record.stdinfo.is_directory() { diff --git a/crates/uffs-mft/src/io/parser/index_extension.rs b/crates/uffs-mft/src/io/parser/index_extension.rs index 62dfd15b9..8e861303f 100644 --- a/crates/uffs-mft/src/io/parser/index_extension.rs +++ b/crates/uffs-mft/src/io/parser/index_extension.rs @@ -458,12 +458,14 @@ pub(super) fn parse_extension_to_index( offset += attr_header.length as usize; } - // If no names, user-visible streams, internal streams, or default data found, - // nothing to do + // If no names, user-visible streams, internal streams, default data, or + // directory index sizes found, nothing to do if names.is_empty() && streams.is_empty() && ext_internal_streams.is_empty() && !found_default_data + && dir_index_size == 0 + && dir_index_allocated == 0 { return false; } @@ -784,4 +786,6 @@ pub(super) fn parse_extension_to_index( || !streams.is_empty() || !ext_internal_streams.is_empty() || found_default_data + || dir_index_size > 0 + || dir_index_allocated > 0 } diff --git a/crates/uffs-mft/src/parse/direct_index_extension.rs b/crates/uffs-mft/src/parse/direct_index_extension.rs index f140c335c..3918ec43b 100644 --- a/crates/uffs-mft/src/parse/direct_index_extension.rs +++ b/crates/uffs-mft/src/parse/direct_index_extension.rs @@ -530,8 +530,14 @@ pub(super) fn parse_extension_to_index( offset += attr_header.length as usize; } - // If no names, streams, or default data found, nothing to do - if names.is_empty() && streams.is_empty() && !found_default_data { + // If no names, streams, default data, or directory index sizes found, + // nothing to do + if names.is_empty() + && streams.is_empty() + && !found_default_data + && dir_index_size == 0 + && dir_index_allocated == 0 + { return false; } @@ -788,5 +794,9 @@ pub(super) fn parse_extension_to_index( } } - !names.is_empty() || !streams.is_empty() || found_default_data + !names.is_empty() + || !streams.is_empty() + || found_default_data + || dir_index_size > 0 + || dir_index_allocated > 0 }