@@ -97,8 +97,12 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
9797 let mut name_parse_counter: u16 = 0 ;
9898 let mut default_size = 0u64 ;
9999 let mut default_allocated = 0u64 ;
100- // ADS: (stream_name, size, allocated)
100+ // User-visible ADS: (stream_name, size, allocated)
101101 let mut additional_streams: SmallVec < [ ( String , u64 , u64 ) ; 4 ] > = SmallVec :: new ( ) ;
102+ // Internal NTFS streams (e.g. $REPARSE, $EA, $OBJECT_ID) — not emitted as
103+ // output rows but still tracked for tree-metrics accounting.
104+ // (size, allocated)
105+ let mut internal_streams: SmallVec < [ ( u64 , u64 ) ; 4 ] > = SmallVec :: new ( ) ;
102106 let mut reparse_tag: u32 = 0 ;
103107 let mut dir_index_size: u64 = 0 ;
104108 let mut dir_index_allocated: u64 = 0 ;
@@ -321,8 +325,9 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
321325 }
322326 } ;
323327
324- // Add $REPARSE_POINT as a stream (matches C++ stream counting)
325- additional_streams. push ( ( String :: from ( "$REPARSE" ) , rp_size, rp_allocated) ) ;
328+ // $REPARSE_POINT is an internal stream — tracked for tree metrics
329+ // but not emitted as a user-visible output row
330+ internal_streams. push ( ( rp_size, rp_allocated) ) ;
326331 }
327332 Some (
328333 AttributeType :: IndexRoot | AttributeType :: IndexAllocation | AttributeType :: Bitmap ,
@@ -417,19 +422,8 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
417422 }
418423 } ;
419424
420- let stream_name = if attr_name. is_empty ( ) {
421- match attr_type {
422- Some ( AttributeType :: Bitmap ) => String :: from ( "$BITMAP" ) ,
423- Some ( AttributeType :: IndexRoot ) => String :: from ( "$INDEX_ROOT" ) ,
424- Some ( AttributeType :: IndexAllocation ) => {
425- String :: from ( "$INDEX_ALLOCATION" )
426- }
427- _ => String :: new ( ) ,
428- }
429- } else {
430- attr_name
431- } ;
432- additional_streams. push ( ( stream_name, size, allocated) ) ;
425+ // Non-$I30 index attributes are internal streams
426+ internal_streams. push ( ( size, allocated) ) ;
433427 }
434428 }
435429 }
@@ -444,7 +438,8 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
444438 | AttributeType :: SecurityDescriptor
445439 | AttributeType :: AttributeList ,
446440 ) => {
447- // All these are counted as streams in C++
441+ // All these are internal streams — tracked for tree metrics but
442+ // not emitted as user-visible output rows.
448443 // Check if primary attribute (LowestVCN == 0)
449444 let is_primary = if attr_header. is_non_resident == 0 {
450445 true
@@ -461,24 +456,6 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
461456 } ;
462457
463458 if is_primary {
464- // Extract attribute name (if any)
465- let attr_name = if attr_header. name_length > 0 {
466- let name_offset = offset + attr_header. name_offset as usize ;
467- let name_len = attr_header. name_length as usize ;
468- if name_offset + name_len * 2 <= data. len ( ) {
469- let name_bytes = & data[ name_offset..name_offset + name_len * 2 ] ;
470- let name_u16: SmallVec < [ u16 ; 64 ] > = name_bytes
471- . chunks_exact ( 2 )
472- . map ( |c| u16:: from_le_bytes ( [ c[ 0 ] , c[ 1 ] ] ) )
473- . collect ( ) ;
474- String :: from_utf16_lossy ( & name_u16)
475- } else {
476- String :: new ( )
477- }
478- } else {
479- String :: new ( )
480- } ;
481-
482459 let ( size, allocated) = if attr_header. is_non_resident == 0 {
483460 let value_length_bytes = & data[ offset + 16 ..offset + 20 ] ;
484461 let value_length =
@@ -500,36 +477,12 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
500477 }
501478 } ;
502479
503- let stream_name = if attr_name. is_empty ( ) {
504- match attr_type {
505- Some ( AttributeType :: ObjectId ) => String :: from ( "$OBJECT_ID" ) ,
506- Some ( AttributeType :: VolumeName ) => String :: from ( "$VOLUME_NAME" ) ,
507- Some ( AttributeType :: VolumeInformation ) => {
508- String :: from ( "$VOLUME_INFORMATION" )
509- }
510- Some ( AttributeType :: PropertySet ) => String :: from ( "$PROPERTY_SET" ) ,
511- Some ( AttributeType :: Ea ) => String :: from ( "$EA" ) ,
512- Some ( AttributeType :: EaInformation ) => String :: from ( "$EA_INFORMATION" ) ,
513- Some ( AttributeType :: LoggedUtilityStream ) => {
514- String :: from ( "$LOGGED_UTILITY_STREAM" )
515- }
516- Some ( AttributeType :: SecurityDescriptor ) => {
517- String :: from ( "$SECURITY_DESCRIPTOR" )
518- }
519- Some ( AttributeType :: AttributeList ) => String :: from ( "$ATTRIBUTE_LIST" ) ,
520- _ => String :: new ( ) ,
521- }
522- } else {
523- attr_name
524- } ;
525- additional_streams. push ( ( stream_name, size, allocated) ) ;
480+ internal_streams. push ( ( size, allocated) ) ;
526481 }
527482 }
528483 _ => {
529- // C++ counts ALL attribute types as streams via default: case
530- // This includes truly unknown types
531- let type_code = attr_header. type_code ;
532-
484+ // Unknown attribute types are internal streams — tracked for
485+ // tree metrics but not emitted as user-visible output rows.
533486 // Check if primary attribute (LowestVCN == 0)
534487 let is_primary = if attr_header. is_non_resident == 0 {
535488 true
@@ -546,24 +499,6 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
546499 } ;
547500
548501 if is_primary {
549- // Extract attribute name (if any)
550- let attr_name = if attr_header. name_length > 0 {
551- let name_offset = offset + attr_header. name_offset as usize ;
552- let name_len = attr_header. name_length as usize ;
553- if name_offset + name_len * 2 <= data. len ( ) {
554- let name_bytes = & data[ name_offset..name_offset + name_len * 2 ] ;
555- let name_u16: SmallVec < [ u16 ; 64 ] > = name_bytes
556- . chunks_exact ( 2 )
557- . map ( |c| u16:: from_le_bytes ( [ c[ 0 ] , c[ 1 ] ] ) )
558- . collect ( ) ;
559- String :: from_utf16_lossy ( & name_u16)
560- } else {
561- String :: new ( )
562- }
563- } else {
564- String :: new ( )
565- } ;
566-
567502 let ( size, allocated) = if attr_header. is_non_resident == 0 {
568503 let value_length_bytes = & data[ offset + 16 ..offset + 20 ] ;
569504 let value_length =
@@ -585,12 +520,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
585520 }
586521 } ;
587522
588- let stream_name = if attr_name. is_empty ( ) {
589- format ! ( "$UNKNOWN_0x{type_code:X}" )
590- } else {
591- attr_name
592- } ;
593- additional_streams. push ( ( stream_name, size, allocated) ) ;
523+ internal_streams. push ( ( size, allocated) ) ;
594524 }
595525 }
596526 }
@@ -621,7 +551,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
621551 // The $FILE_NAME may be in an extension record, but the ADS are here.
622552 // Without this, ADS on files/directories with extension records are lost.
623553
624- // Pre-process ADS streams BEFORE creating the record
554+ // Pre-process user-visible ADS streams BEFORE creating the record
625555 let additional_stream_count = additional_streams. len ( ) ;
626556 let mut stream_indices: Vec < u32 > = Vec :: with_capacity ( additional_stream_count) ;
627557 for ( stream_name, stream_size, stream_allocated) in additional_streams {
@@ -644,20 +574,59 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
644574 } ,
645575 next_entry : NO_ENTRY ,
646576 name : stream_name_ref,
647- flags : 0 ,
577+ // type_name_id=8 for $DATA (0x80 >> 4), stored in bits 2-7
578+ flags : 8 << 2 ,
648579 } ) ;
649580 stream_indices. push ( stream_idx) ;
650581 }
651582
583+ // Build internal stream chain for tree-metrics accounting
584+ let internal_stream_count = internal_streams. len ( ) ;
585+ let mut internal_size_total = 0_u64 ;
586+ let mut internal_alloc_total = 0_u64 ;
587+ let mut first_internal = NO_ENTRY ;
588+ let mut last_internal = NO_ENTRY ;
589+ for ( ist_size, ist_allocated) in internal_streams {
590+ internal_size_total = internal_size_total. saturating_add ( ist_size) ;
591+ internal_alloc_total = internal_alloc_total. saturating_add ( ist_allocated) ;
592+ let new_idx = index. internal_streams . len ( ) as u32 ;
593+ index
594+ . internal_streams
595+ . push ( crate :: index:: InternalStreamInfo {
596+ size : SizeInfo {
597+ length : ist_size,
598+ allocated : ist_allocated,
599+ } ,
600+ next_entry : NO_ENTRY ,
601+ flags : 0 ,
602+ } ) ;
603+ if last_internal == NO_ENTRY {
604+ first_internal = new_idx;
605+ } else {
606+ index. internal_streams [ last_internal as usize ] . next_entry = new_idx;
607+ }
608+ last_internal = new_idx;
609+ }
610+
652611 // Now create the record and set up streams
653612 let record = index. get_or_create ( frs) ;
654613 record. stdinfo = std_info;
655614 record. first_stream . size = SizeInfo {
656615 length : default_size,
657616 allocated : default_allocated,
658617 } ;
618+ // Set type_name_id for first_stream: 0 for directories ($I30), 8 for files
619+ // ($DATA)
620+ record. first_stream . flags = if record. stdinfo . is_directory ( ) {
621+ 0 // type_name_id=0 for directory index ($I30)
622+ } else {
623+ 8 << 2 // type_name_id=8 for $DATA (0x80 >> 4), stored in bits 2-7
624+ } ;
625+ record. internal_streams_size = internal_size_total;
626+ record. internal_streams_allocated = internal_alloc_total;
627+ record. first_internal_stream = first_internal;
659628
660- // Chain ADS streams to first_stream
629+ // Chain user-visible ADS streams to first_stream
661630 if !stream_indices. is_empty ( ) {
662631 // Chain the streams together
663632 for i in 0 ..stream_indices. len ( ) . saturating_sub ( 1 ) {
@@ -668,9 +637,15 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
668637 // Attach to first_stream
669638 let record = index. get_or_create ( frs) ;
670639 record. first_stream . next_entry = stream_indices[ 0 ] ;
671- record. stream_count = 1 + additional_stream_count as u16 ;
672640 }
673641
642+ // stream_count = user-visible streams only (1 default + ADS)
643+ let record = index. get_or_create ( frs) ;
644+ record. stream_count = 1 + additional_stream_count as u16 ;
645+ // total_stream_count = ALL streams including internal
646+ record. total_stream_count =
647+ 1 + additional_stream_count as u16 + internal_stream_count as u16 ;
648+
674649 // Leave first_name empty - extension record will fill it
675650 return false ;
676651 }
@@ -709,7 +684,7 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
709684 link_indices. push ( link_idx) ;
710685 }
711686
712- // Pre-process additional streams ( ADS) : add to names buffer and streams list
687+ // Pre-process user-visible ADS streams : add to names buffer and streams list
713688 let additional_stream_count = additional_streams. len ( ) ;
714689 let mut stream_indices: Vec < u32 > = Vec :: with_capacity ( additional_stream_count) ;
715690 for ( stream_name, stream_size, stream_allocated) in additional_streams {
@@ -732,11 +707,40 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
732707 } ,
733708 next_entry : NO_ENTRY , // Will be patched below
734709 name : stream_name_ref,
735- flags : 0 ,
710+ // type_name_id=8 for $DATA (0x80 >> 4), stored in bits 2-7
711+ flags : 8 << 2 ,
736712 } ) ;
737713 stream_indices. push ( stream_idx) ;
738714 }
739715
716+ // Build internal stream chain for tree-metrics accounting
717+ let internal_stream_count = internal_streams. len ( ) ;
718+ let mut internal_size_total = 0_u64 ;
719+ let mut internal_alloc_total = 0_u64 ;
720+ let mut first_internal = NO_ENTRY ;
721+ let mut last_internal = NO_ENTRY ;
722+ for ( ist_size, ist_allocated) in internal_streams {
723+ internal_size_total = internal_size_total. saturating_add ( ist_size) ;
724+ internal_alloc_total = internal_alloc_total. saturating_add ( ist_allocated) ;
725+ let new_idx = index. internal_streams . len ( ) as u32 ;
726+ index
727+ . internal_streams
728+ . push ( crate :: index:: InternalStreamInfo {
729+ size : SizeInfo {
730+ length : ist_size,
731+ allocated : ist_allocated,
732+ } ,
733+ next_entry : NO_ENTRY ,
734+ flags : 0 ,
735+ } ) ;
736+ if last_internal == NO_ENTRY {
737+ first_internal = new_idx;
738+ } else {
739+ index. internal_streams [ last_internal as usize ] . next_entry = new_idx;
740+ }
741+ last_internal = new_idx;
742+ }
743+
740744 // Ensure parent exists (create placeholder if needed) - do this before getting
741745 // our record
742746 if parent_frs != frs && parent_frs != 0 {
@@ -751,17 +755,27 @@ pub fn parse_record_to_index(data: &[u8], frs: u64, index: &mut crate::index::Mf
751755 length : default_size,
752756 allocated : default_allocated,
753757 } ;
758+ // Set type_name_id for first_stream: 0 for directories ($I30), 8 for files
759+ // ($DATA)
760+ record. first_stream . flags = if record. stdinfo . is_directory ( ) {
761+ 0 // type_name_id=0 for directory index ($I30)
762+ } else {
763+ 8 << 2 // type_name_id=8 for $DATA (0x80 >> 4), stored in bits 2-7
764+ } ;
754765 record. first_name = LinkInfo {
755766 next_entry : NO_ENTRY ,
756767 name : name_ref,
757768 parent_frs,
758769 } ;
759770 record. name_count = 1 + additional_count as u16 ;
760- // stream_count = 1 ( default) + additional ADS
771+ // stream_count = user-visible streams only (1 default + ADS)
761772 record. stream_count = 1 + additional_stream_count as u16 ;
762- // total_stream_count includes all streams (including internal ones like
763- // $REPARSE)
764- record. total_stream_count = 1 + additional_stream_count as u16 ;
773+ // total_stream_count = ALL streams including internal (for tree metrics)
774+ record. total_stream_count = 1 + additional_stream_count as u16 + internal_stream_count as u16 ;
775+ // Internal stream metadata for tree metrics
776+ record. internal_streams_size = internal_size_total;
777+ record. internal_streams_allocated = internal_alloc_total;
778+ record. first_internal_stream = first_internal;
765779 // Set reparse tag if this is a reparse point
766780 record. reparse_tag = reparse_tag;
767781
0 commit comments