Skip to content

Commit 2908bc5

Browse files
committed
chore: development v0.3.41 - comprehensive testing complete [auto-commit]
1 parent 7b68ae4 commit 2908bc5

25 files changed

+1623
-81
lines changed

.intent/config.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"scripts": [
3+
{
4+
"name": "build",
5+
"command": "cargo build",
6+
"mode": "command",
7+
"category": "build"
8+
},
9+
{
10+
"name": "test",
11+
"command": "cargo test",
12+
"mode": "command",
13+
"category": "test"
14+
},
15+
{
16+
"name": "check",
17+
"command": "cargo check",
18+
"mode": "command",
19+
"category": "typecheck"
20+
}
21+
]
22+
}

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ exclude = [
3636
# Workspace Package Metadata (inherited by all crates)
3737
# ─────────────────────────────────────────────────────────────────────────────
3838
[workspace.package]
39-
version = "0.3.40"
39+
version = "0.3.41"
4040
edition = "2024"
4141
rust-version = "1.85"
4242
license = "MPL-2.0 OR LicenseRef-UFFS-Commercial"

LOG/Output

Lines changed: 217 additions & 0 deletions
Large diffs are not rendered by default.

crates/uffs-mft/src/index/tree.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,27 @@ impl MftIndex {
8888
);
8989
}
9090

91+
// C++ parity: fix total_stream_count for non-directory records that
92+
// have NO unnamed $DATA attribute but DO have other streams. In C++,
93+
// the first non-$STD_INFO/non-$FILE_NAME attribute becomes first_stream
94+
// directly — there is no phantom empty default. Rust always counts 1
95+
// for the default slot. The `has_default_data()` bit (set during
96+
// parsing in both base and extension handlers) lets us distinguish
97+
// "has empty $DATA" (keep the 1) from "has no $DATA" (subtract 1).
98+
{
99+
let no_entry = super::NO_ENTRY;
100+
for rec in &mut self.records {
101+
if !rec.stdinfo.is_directory()
102+
&& !rec.has_default_data()
103+
&& rec.total_stream_count > 1
104+
&& (rec.first_stream.next_entry != no_entry
105+
|| rec.first_internal_stream != no_entry)
106+
{
107+
rec.total_stream_count -= 1;
108+
}
109+
}
110+
}
111+
91112
// First pass: compute tree metrics
92113
crate::tree_metrics::compute_tree_metrics(self, debug, skip_orphans);
93114
tracing::debug!("[TRIP] MftIndex::compute_tree_metrics_impl -> first pass done");

crates/uffs-mft/src/index/types.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ pub struct FileRecord {
632632
/// Primary filename namespace (0=POSIX, 1=Win32, 2=DOS, 3=Win32+DOS)
633633
pub namespace: u8,
634634
/// Forensic flags (bit-packed): bit 0 = `is_deleted`, bit 1 = `is_corrupt`,
635-
/// bit 2 = `is_extension`
635+
/// bit 2 = `is_extension`, bit 3 = `has_default_data` (unnamed $DATA found)
636636
pub forensic_flags: u8,
637637
/// Log File Sequence Number - correlates with `$LogFile` journal (forensic)
638638
pub lsn: u64,
@@ -780,11 +780,28 @@ impl FileRecord {
780780
/// Sets the forensic flags from parsed record fields.
781781
#[inline]
782782
pub fn set_forensic_flags(&mut self, is_deleted: bool, is_corrupt: bool, is_extension: bool) {
783-
self.forensic_flags = u8::from(is_deleted)
783+
// Preserve bit 3 (has_default_data) when setting forensic bits
784+
self.forensic_flags = (self.forensic_flags & 0b1000)
785+
| u8::from(is_deleted)
784786
| (u8::from(is_corrupt) << 1_u8)
785787
| (u8::from(is_extension) << 2_u8);
786788
}
787789

790+
/// Returns true if an unnamed `$DATA` attribute was found during parsing.
791+
/// Used by tree metrics to distinguish "has empty $DATA" from "has no
792+
/// $DATA".
793+
#[inline]
794+
#[must_use]
795+
pub const fn has_default_data(&self) -> bool {
796+
self.forensic_flags & 0b1000 != 0
797+
}
798+
799+
/// Marks that an unnamed `$DATA` attribute was found during parsing.
800+
#[inline]
801+
pub const fn set_has_default_data(&mut self) {
802+
self.forensic_flags |= 0b1000;
803+
}
804+
788805
/// Returns the tree metrics tuple (descendants, treesize,
789806
/// `tree_allocated`).
790807
///

crates/uffs-mft/src/io.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pub use parser::parse_record_to_fragment;
3939
pub use parser::{
4040
ExtensionAttributes, ParseResult, ParsedColumns, ParsedRecord,
4141
add_missing_parent_placeholders_to_vec, create_placeholder_record, parse_record,
42-
parse_record_full, parse_record_to_index, parse_record_zero_alloc,
42+
parse_record_full, parse_record_to_index, parse_record_zero_alloc, process_record,
4343
};
4444
// Export Windows-specific readers (require HANDLE)
4545
#[cfg(windows)]

crates/uffs-mft/src/io/parser/fragment.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use zerocopy::FromBytes;
1515
reason = "internal use in deprecated parse_record_to_fragment"
1616
)]
1717
use super::fragment_extension::parse_extension_to_fragment;
18-
use crate::ntfs::is_internal_windows_stream;
1918

2019
/// Parses a record directly into an `MftIndexFragment` (for parallel parsing).
2120
///
@@ -259,10 +258,10 @@ pub fn parse_record_to_fragment(
259258
.map(|c| u16::from_le_bytes([c[0], c[1]]))
260259
.collect();
261260
let stream_name = String::from_utf16_lossy(&name_u16);
262-
// Filter out internal Windows streams (names starting with $)
263-
if !is_internal_windows_stream(&stream_name) {
264-
additional_streams.push((stream_name, size, allocated));
265-
}
261+
// C++ parity: ALL named $DATA streams create regular
262+
// stream entries. Internal ones are filtered from
263+
// output by is_internal_windows_stream in the output layer.
264+
additional_streams.push((stream_name, size, allocated));
266265
}
267266
}
268267
}
@@ -546,7 +545,7 @@ pub fn parse_record_to_fragment(
546545
// Helper to add a child entry to a parent in the fragment
547546
let add_child_entry =
548547
|fragment: &mut crate::index::MftIndexFragment, p_frs: u64, name_idx: u16| {
549-
if p_frs == frs || p_frs == 0 || p_frs == u64::from(NO_ENTRY) {
548+
if p_frs == frs || p_frs == u64::from(NO_ENTRY) {
550549
return;
551550
}
552551
// Ensure parent exists in fragment

crates/uffs-mft/src/io/parser/fragment_extension.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ use core::mem::size_of;
1010
use smallvec::SmallVec;
1111
use zerocopy::FromBytes;
1212

13-
use crate::ntfs::is_internal_windows_stream;
14-
1513
/// Parses an extension record and adds its names/streams to the base record in
1614
/// a fragment.
1715
///
@@ -169,10 +167,10 @@ pub(super) fn parse_extension_to_fragment(
169167
.map(|c| u16::from_le_bytes([c[0], c[1]]))
170168
.collect();
171169
let stream_name = String::from_utf16_lossy(&name_u16);
172-
// Filter out internal Windows streams (names starting with $)
173-
if !is_internal_windows_stream(&stream_name) {
174-
streams.push((stream_name, size, allocated));
175-
}
170+
// C++ parity: ALL named $DATA streams create regular
171+
// stream entries. Internal ones are filtered from
172+
// output by is_internal_windows_stream in the output layer.
173+
streams.push((stream_name, size, allocated));
176174
}
177175
}
178176
}
@@ -340,7 +338,7 @@ pub(super) fn parse_extension_to_fragment(
340338

341339
for (name_idx, (_, parent_frs)) in names.iter().enumerate() {
342340
let p_frs = *parent_frs;
343-
if p_frs == base_frs || p_frs == 0 || p_frs == u64::from(NO_ENTRY) {
341+
if p_frs == base_frs || p_frs == u64::from(NO_ENTRY) {
344342
continue;
345343
}
346344

crates/uffs-mft/src/io/parser/index.rs

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ use smallvec::SmallVec;
3737
use zerocopy::FromBytes;
3838

3939
use super::index_extension::parse_extension_to_index;
40-
use crate::ntfs::is_internal_windows_stream;
4140
use crate::parse::index_helpers::{
4241
ExtensionSnapshot, InternalStreamChain, add_child_entry, add_link_to_index,
4342
add_stream_to_index, build_internal_stream_chain, chain_links, chain_streams,
@@ -338,7 +337,10 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
338337
};
339338

340339
if name_len == 0 {
341-
// Default stream
340+
// Default stream — mark that unnamed $DATA exists
341+
// (C++ parity: distinguishes "empty $DATA" from "no $DATA")
342+
let rec = index.get_or_create(frs);
343+
rec.set_has_default_data();
342344
default_size = size;
343345
default_allocated = allocated;
344346
} else {
@@ -351,11 +353,37 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
351353
.map(|c| u16::from_le_bytes([c[0], c[1]]))
352354
.collect();
353355
let stream_name = String::from_utf16_lossy(&name_u16);
354-
// Filter out internal Windows streams (names starting with $)
355-
// These include $DSC, $REPARSE, $EA, $EA_INFORMATION, $TXF_DATA, $OBJECT_ID
356-
if !is_internal_windows_stream(&stream_name) {
357-
additional_streams.push((stream_name, size, allocated));
358-
}
356+
357+
// C++ parity: $BadClus:$Bad (FRS 8) uses InitializedSize
358+
// instead of DataSize/AllocatedSize to avoid counting the
359+
// entire volume size (ntfs_index_load.hpp lines 431-452).
360+
let (size, allocated) = if frs == 8
361+
&& attr_header.name_length == 4
362+
&& stream_name == "$Bad"
363+
&& attr_header.is_non_resident != 0
364+
{
365+
let init_size_offset = offset + 56;
366+
if init_size_offset + 8 <= data.len() {
367+
let init_size = u64::from_le_bytes(
368+
data[init_size_offset..init_size_offset + 8]
369+
.try_into()
370+
.unwrap_or([0; 8]),
371+
);
372+
(init_size, init_size)
373+
} else {
374+
(0, 0)
375+
}
376+
} else {
377+
(size, allocated)
378+
};
379+
380+
// C++ parity: ALL named $DATA streams create regular
381+
// stream entries (counted in stream_count). Internal
382+
// ones (names starting with $) are filtered from
383+
// *output* by is_internal_windows_stream checks in the
384+
// output layer, but must be counted here to match C++
385+
// stream_count semantics for descendants.
386+
additional_streams.push((stream_name, size, allocated));
359387
}
360388
}
361389
}
@@ -656,16 +684,8 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
656684

657685
record.stdinfo = std_info;
658686
record.first_stream.size = SizeInfo {
659-
length: if default_size == 0 && ext.first_stream_len > 0 {
660-
ext.first_stream_len
661-
} else {
662-
default_size
663-
},
664-
allocated: if default_allocated == 0 && ext.first_stream_alloc > 0 {
665-
ext.first_stream_alloc
666-
} else {
667-
default_allocated
668-
},
687+
length: default_size.saturating_add(ext.first_stream_len),
688+
allocated: default_allocated.saturating_add(ext.first_stream_alloc),
669689
};
670690
record.first_stream.flags = if record.stdinfo.is_directory() {
671691
0
@@ -764,16 +784,8 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
764784

765785
record.stdinfo = std_info;
766786
record.first_stream.size = SizeInfo {
767-
length: if default_size == 0 && ext.first_stream_len > 0 {
768-
ext.first_stream_len
769-
} else {
770-
default_size
771-
},
772-
allocated: if default_allocated == 0 && ext.first_stream_alloc > 0 {
773-
ext.first_stream_alloc
774-
} else {
775-
default_allocated
776-
},
787+
length: default_size.saturating_add(ext.first_stream_len),
788+
allocated: default_allocated.saturating_add(ext.first_stream_alloc),
777789
};
778790
record.first_stream.flags = if record.stdinfo.is_directory() {
779791
0

crates/uffs-mft/src/io/parser/index_extension.rs

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ use core::mem::size_of;
1717
use smallvec::SmallVec;
1818
use zerocopy::FromBytes;
1919

20-
use crate::ntfs::is_internal_windows_stream;
21-
2220
/// Parses an extension record and adds its names/streams to the base record.
2321
///
2422
/// Extension records contain additional `$FILE_NAME` attributes (hard links)
@@ -227,6 +225,17 @@ pub(super) fn parse_extension_to_index(
227225

228226
if name_len == 0 {
229227
// Default $DATA stream — update base record size
228+
// Mark that unnamed $DATA exists on the base record
229+
// (C++ parity: distinguishes "empty $DATA" from "no $DATA")
230+
{
231+
let bf = base_frs as usize;
232+
if bf < index.frs_to_idx.len() {
233+
let base_idx = index.frs_to_idx[bf];
234+
if base_idx != NO_ENTRY {
235+
index.records[base_idx as usize].set_has_default_data();
236+
}
237+
}
238+
}
230239
default_data_size = size;
231240
default_data_allocated = allocated;
232241
found_default_data = true;
@@ -240,10 +249,10 @@ pub(super) fn parse_extension_to_index(
240249
.map(|c| u16::from_le_bytes([c[0], c[1]]))
241250
.collect();
242251
let stream_name = String::from_utf16_lossy(&name_u16);
243-
// Filter out internal Windows streams (names starting with $)
244-
if !is_internal_windows_stream(&stream_name) {
245-
streams.push((stream_name, size, allocated));
246-
}
252+
// C++ parity: ALL named $DATA streams create regular
253+
// stream entries. Internal ones are filtered from
254+
// output by is_internal_windows_stream in the output layer.
255+
streams.push((stream_name, size, allocated));
247256
}
248257
}
249258
}
@@ -704,6 +713,9 @@ pub(super) fn parse_extension_to_index(
704713
// record (e.g., large files with extensive run lists).
705714
if found_default_data {
706715
let record = &mut index.records[record_idx as usize];
716+
// Ensure has_default_data bit is set (may not have been set
717+
// earlier if the base record didn't exist at attribute-parse time)
718+
record.set_has_default_data();
707719

708720
// If base record has no $DATA (both fields are 0), use extension's $DATA.
709721
// Otherwise, accumulate extension $DATA to base $DATA.
@@ -742,7 +754,7 @@ pub(super) fn parse_extension_to_index(
742754

743755
for (name_idx, (_, parent_frs)) in names.iter().enumerate() {
744756
let p_frs = *parent_frs;
745-
if p_frs == base_frs || p_frs == 0 || p_frs == u64::from(NO_ENTRY) {
757+
if p_frs == base_frs || p_frs == u64::from(NO_ENTRY) {
746758
continue;
747759
}
748760

@@ -765,12 +777,21 @@ pub(super) fn parse_extension_to_index(
765777
// name_index is the position in the combined name list (existing + new)
766778
// For extension records, the first name might replace first_name (if empty),
767779
// so we need to account for that
780+
//
781+
// FIX: The off-by-one bug was here. Extension names are appended AFTER
782+
// existing names, so the index should be existing_name_count + name_idx,
783+
// not existing_name_count - 1 + name_idx.
784+
//
785+
// Example: base has 1 name (index 0), extension adds 1 name
786+
// - existing_name_count = 1
787+
// - name_idx = 0 (first extension name)
788+
// - effective_name_idx should be 1 (the second name overall)
768789
let effective_name_idx = if existing_name_count == 0 {
769790
// First extension name became first_name, so name_index starts at 0
770791
name_idx as u16
771792
} else {
772793
// Extension names are appended after existing names
773-
existing_name_count - 1 + name_idx as u16
794+
existing_name_count + name_idx as u16
774795
};
775796

776797
let child_idx = index.children.len() as u32;

0 commit comments

Comments
 (0)