Skip to content

Commit 542e8a3

Browse files
committed
Merge branch 'optimize-offline-mft'
2 parents 7f4e66d + 018407c commit 542e8a3

File tree

9 files changed

+1031
-119
lines changed

9 files changed

+1031
-119
lines changed

crates/uffs-cli/src/commands/output.rs

Lines changed: 641 additions & 18 deletions
Large diffs are not rendered by default.

crates/uffs-cli/src/commands/raw_io.rs

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@ use uffs_mft::MftReader;
1818

1919
use super::output::results_to_dataframe;
2020

21+
/// Native offline query results for direct `--mft-file` output.
22+
pub(super) struct NativeOfflineQueryResults {
23+
/// Loaded offline index used for record metadata lookups during output.
24+
pub(super) index: uffs_mft::MftIndex,
25+
/// Native search results collected from `IndexQuery`.
26+
pub(super) results: Vec<uffs_core::SearchResult>,
27+
/// Raw MFT load duration in milliseconds.
28+
pub(super) load_ms: u128,
29+
/// Query/filter duration in milliseconds.
30+
pub(super) query_ms: u128,
31+
}
32+
2133
/// Query filter options for the search command.
2234
pub(super) struct QueryFilters<'a> {
2335
/// Parsed search pattern (glob, regex, or literal).
@@ -146,6 +158,42 @@ pub(super) fn load_and_filter_from_mft_file(
146158
profile: bool,
147159
debug_tree: bool,
148160
) -> Result<uffs_mft::DataFrame> {
161+
let native = load_and_filter_native_from_mft_file(
162+
mft_path,
163+
drive_letter,
164+
filters,
165+
needs_paths,
166+
debug_tree,
167+
)?;
168+
let matches = native.results.len();
169+
let records = native.index.len();
170+
let df = results_to_dataframe(&native.index, native.results, needs_paths)?;
171+
172+
if profile {
173+
let total_ms = native.load_ms + native.query_ms;
174+
eprintln!("=== RAW MFT FILE TIMING ===");
175+
eprintln!(
176+
" Load from file: {:>6} ms ({} records)",
177+
native.load_ms, records
178+
);
179+
eprintln!(
180+
" Query/filter: {:>6} ms ({} matches)",
181+
native.query_ms, matches
182+
);
183+
eprintln!(" TOTAL: {total_ms:>6} ms");
184+
}
185+
186+
Ok(df)
187+
}
188+
189+
/// Load, query, and return native results from a raw offline MFT file.
190+
pub(super) fn load_and_filter_native_from_mft_file(
191+
mft_path: &Path,
192+
drive_letter: Option<char>,
193+
filters: &QueryFilters<'_>,
194+
needs_paths: bool,
195+
debug_tree: bool,
196+
) -> Result<NativeOfflineQueryResults> {
149197
use uffs_mft::LoadRawOptions;
150198

151199
let volume = drive_letter.unwrap_or('X');
@@ -166,24 +214,15 @@ pub(super) fn load_and_filter_from_mft_file(
166214
let load_ms = t_load.elapsed().as_millis();
167215

168216
let t_query = std::time::Instant::now();
169-
let results = execute_index_query(&index, filters, needs_paths)?;
217+
let results = execute_index_query_native(&index, filters, needs_paths)?;
170218
let query_ms = t_query.elapsed().as_millis();
171219

172-
if profile {
173-
let total_ms = load_ms + query_ms;
174-
eprintln!("=== RAW MFT FILE TIMING ===");
175-
eprintln!(
176-
" Load from file: {load_ms:>6} ms ({} records)",
177-
index.len()
178-
);
179-
eprintln!(
180-
" Query/filter: {query_ms:>6} ms ({} matches)",
181-
results.height()
182-
);
183-
eprintln!(" TOTAL: {total_ms:>6} ms");
184-
}
185-
186-
Ok(results)
220+
Ok(NativeOfflineQueryResults {
221+
index,
222+
results,
223+
load_ms,
224+
query_ms,
225+
})
187226
}
188227

189228
/// Load raw MFT with debug output for tree metrics.
@@ -744,13 +783,12 @@ fn execute_query(
744783

745784
/// Execute query using fast `IndexQuery` path (no `DataFrame` conversion).
746785
///
747-
/// This is the fast path for simple queries. Returns results as a `DataFrame`
748-
/// for compatibility with the output pipeline.
749-
fn execute_index_query(
786+
/// This is the fast path for simple queries.
787+
fn execute_index_query_native(
750788
index: &uffs_mft::MftIndex,
751789
filters: &QueryFilters<'_>,
752790
resolve_paths: bool,
753-
) -> Result<uffs_mft::DataFrame> {
791+
) -> Result<Vec<uffs_core::SearchResult>> {
754792
use uffs_core::{IndexQuery, TypeFilter, compile_parsed_pattern};
755793

756794
let mut query = IndexQuery::new(index);
@@ -789,6 +827,5 @@ fn execute_index_query(
789827
query = query.case_sensitive(filters.parsed.is_case_sensitive());
790828
query = query.with_resolve_paths(resolve_paths);
791829

792-
let results = query.collect();
793-
results_to_dataframe(index, &results, resolve_paths)
830+
Ok(query.collect())
794831
}

crates/uffs-cli/src/commands/search/mod.rs

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ use uffs_core::output::OutputConfig;
99
use uffs_core::pattern::ParsedPattern;
1010
use uffs_core::tree::add_tree_columns;
1111

12-
use super::output::write_results;
13-
use super::raw_io::{QueryFilters, load_and_filter_data, load_and_filter_from_mft_file};
12+
use super::output::{can_write_native_results, write_native_results, write_results};
13+
use super::raw_io::{
14+
QueryFilters, load_and_filter_data, load_and_filter_from_mft_file,
15+
load_and_filter_native_from_mft_file,
16+
};
1417
#[cfg(windows)]
1518
use super::raw_io::{load_and_filter_data_index, load_and_filter_data_index_multi};
1619

@@ -200,6 +203,64 @@ pub async fn search(
200203
.or_else(|| filters.parsed.drive().map(|drive| vec![drive]))
201204
.unwrap_or_default();
202205

206+
if let Some(mft_path) = mft_file.as_ref()
207+
&& !benchmark
208+
&& can_write_native_results(format, &output_config)
209+
{
210+
info!(
211+
path = %mft_path.display(),
212+
format,
213+
"📂 Loading raw MFT file via native direct-output path"
214+
);
215+
216+
let native = load_and_filter_native_from_mft_file(
217+
mft_path,
218+
single_drive,
219+
&filters,
220+
needs_paths,
221+
debug_tree,
222+
)?;
223+
224+
let t_output = std::time::Instant::now();
225+
write_native_results(
226+
&native.index,
227+
&native.results,
228+
format,
229+
out,
230+
&output_config,
231+
&output_targets,
232+
)?;
233+
let output_ms = t_output.elapsed().as_millis();
234+
let elapsed = start_time.elapsed();
235+
236+
if profile {
237+
let raw_total_ms = native.load_ms + native.query_ms;
238+
eprintln!("=== RAW MFT FILE TIMING ===");
239+
eprintln!(
240+
" Load from file: {:>6} ms ({} records)",
241+
native.load_ms,
242+
native.index.len()
243+
);
244+
eprintln!(
245+
" Query/filter: {:>6} ms ({} matches)",
246+
native.query_ms,
247+
native.results.len()
248+
);
249+
eprintln!(" TOTAL: {raw_total_ms:>6} ms");
250+
eprintln!("=== PROFILE: Output ===");
251+
eprintln!(" Tree columns: {:>6} ms", 0_u128);
252+
eprintln!(
253+
" Output/write: {:>6} ms ({} rows)",
254+
output_ms,
255+
native.results.len()
256+
);
257+
eprintln!("=== TOTAL: {} ms ===", elapsed.as_millis());
258+
}
259+
260+
info!(count = native.results.len(), "Search complete");
261+
return Ok(());
262+
}
263+
203264
let mut results = if let Some(mft_path) = mft_file.as_ref() {
204265
info!(path = %mft_path.display(), "📂 Loading from raw MFT file");
205266
load_and_filter_from_mft_file(

crates/uffs-core/src/index_search/query/execution.rs

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use rayon::prelude::*;
55
use super::IndexQuery;
66
use super::expansion::RecordExpander;
77
use super::filtering::RecordFilter;
8-
use super::planning::CollectPlan;
8+
use super::planning::{CollectPlan, ScanPlan};
99
use crate::index_search::SearchResult;
1010

1111
impl IndexQuery<'_> {
@@ -31,21 +31,42 @@ impl IndexQuery<'_> {
3131
self.min_size,
3232
self.max_size,
3333
);
34-
let plan = CollectPlan::build(index, self.pattern.as_ref(), include_system_metafiles);
34+
let CollectPlan {
35+
path_cache,
36+
scan_plan,
37+
} = CollectPlan::build(index, self.pattern.as_ref(), include_system_metafiles);
38+
let path_resolver = path_cache.resolver();
3539
let expander = RecordExpander::new(
3640
index,
37-
&plan.path_cache,
41+
&path_cache,
3842
expand_names,
3943
expand_streams,
4044
resolve_paths,
4145
);
46+
let scan_limit = limit.unwrap_or(usize::MAX);
4247

43-
plan.records_to_scan
44-
.par_iter()
45-
.filter(|record| plan.path_cache.is_valid(record.frs) && filters.matches(record))
46-
.take_any(limit.unwrap_or(usize::MAX))
47-
.flat_map_iter(|record| expander.collect_results(record))
48-
.collect()
48+
match scan_plan {
49+
ScanPlan::Full(records) => records
50+
.par_iter()
51+
.enumerate()
52+
.filter(|(record_idx, record)| {
53+
path_resolver.is_valid_idx(*record_idx) && filters.matches(record)
54+
})
55+
.take_any(scan_limit)
56+
.flat_map_iter(|(record_idx, record)| expander.collect_results(record_idx, record))
57+
.collect(),
58+
ScanPlan::Filtered { records, indices } => indices
59+
.par_iter()
60+
.filter_map(|&record_idx_u32| {
61+
let record_idx = usize::try_from(record_idx_u32).ok()?;
62+
let record = records.get(record_idx)?;
63+
(path_resolver.is_valid_idx(record_idx) && filters.matches(record))
64+
.then_some((record_idx, record))
65+
})
66+
.take_any(scan_limit)
67+
.flat_map_iter(|(record_idx, record)| expander.collect_results(record_idx, record))
68+
.collect(),
69+
}
4970
}
5071

5172
/// Count matching records without collecting results.

crates/uffs-core/src/index_search/query/expansion.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ impl<'a> RecordExpander<'a> {
4343

4444
/// Collect all output search results for the record.
4545
#[must_use]
46-
pub(super) fn collect_results(&self, record: &FileRecord) -> Vec<SearchResult> {
46+
pub(super) fn collect_results(
47+
&self,
48+
record_idx: usize,
49+
record: &FileRecord,
50+
) -> Vec<SearchResult> {
4751
let name_count = if self.expand_names {
4852
record.name_count.max(1)
4953
} else {
@@ -54,11 +58,15 @@ impl<'a> RecordExpander<'a> {
5458
} else {
5559
1
5660
};
57-
let cached_path = if self.resolve_paths {
58-
self.path_cache.get(record.frs)
59-
} else {
60-
None
61-
};
61+
let path_index = self.path_cache.index();
62+
let path_resolver = self.path_cache.resolver();
63+
let cached_path = self.resolve_paths.then(|| {
64+
debug_assert!(
65+
path_resolver.is_valid_idx(record_idx),
66+
"collect_results only resolves paths for valid record indices"
67+
);
68+
path_resolver.materialize_path(path_index, record_idx)
69+
});
6270

6371
let mut results = Vec::with_capacity(usize::from(name_count) * usize::from(stream_count));
6472
for name_idx in 0..name_count {
@@ -76,6 +84,7 @@ impl<'a> RecordExpander<'a> {
7684
self.resolve_result_path(
7785
expanded_result,
7886
record,
87+
record_idx,
7988
name_idx,
8089
stream_idx,
8190
cached_path.clone(),
@@ -98,19 +107,22 @@ impl<'a> RecordExpander<'a> {
98107
&self,
99108
result: SearchResult,
100109
record: &FileRecord,
110+
record_idx: usize,
101111
name_idx: u16,
102112
stream_idx: u16,
103113
cached_path: Option<String>,
104114
) -> SearchResult {
115+
let path_index = self.path_cache.index();
116+
let path_resolver = self.path_cache.resolver();
105117
let stream = self
106118
.index
107119
.get_stream_at(record, stream_idx)
108120
.unwrap_or(&record.first_stream);
109121

110122
let mut base_path = if name_idx == 0 {
111-
cached_path.unwrap_or_else(|| self.index.build_path(record.frs))
123+
cached_path.unwrap_or_else(|| path_resolver.materialize_path(path_index, record_idx))
112124
} else {
113-
self.index.build_path_for_name(record, name_idx)
125+
path_resolver.materialize_path_for_name(path_index, record_idx, name_idx)
114126
};
115127

116128
let stream_name = self.index.stream_name(stream);

crates/uffs-core/src/index_search/query/planning.rs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,25 @@ use uffs_mft::index::{FileRecord, MftIndex, PathCache};
44

55
use crate::index_search::IndexPattern;
66

7+
/// Candidate-record scan plan for `IndexQuery::collect()`.
8+
pub(super) enum ScanPlan<'a> {
9+
/// Scan the full record slice directly.
10+
Full(&'a [FileRecord]),
11+
/// Scan a narrowed set of record indices from the extension index.
12+
Filtered {
13+
/// Backing slice used for index lookup.
14+
records: &'a [FileRecord],
15+
/// Candidate indices from the extension index.
16+
indices: Vec<u32>,
17+
},
18+
}
19+
720
/// Precomputed inputs for `IndexQuery::collect()`.
821
pub(super) struct CollectPlan<'a> {
922
/// Shared path cache used for path validity checks and materialization.
1023
pub(super) path_cache: PathCache<'a>,
1124
/// Candidate records to scan after extension-index planning.
12-
pub(super) records_to_scan: Vec<&'a FileRecord>,
25+
pub(super) scan_plan: ScanPlan<'a>,
1326
}
1427

1528
impl<'a> CollectPlan<'a> {
@@ -25,21 +38,15 @@ impl<'a> CollectPlan<'a> {
2538
include_system_metafiles: bool,
2639
) -> Self {
2740
let path_cache = PathCache::build(index, include_system_metafiles);
28-
let extension_filter_indices = Self::build_extension_filter_indices(pattern, index);
2941
let records = index.records();
30-
let records_to_scan: Vec<&FileRecord> = extension_filter_indices.as_ref().map_or_else(
31-
|| records.iter().collect(),
32-
|indices| {
33-
indices
34-
.iter()
35-
.filter_map(|&idx| records.get(idx as usize))
36-
.collect()
37-
},
42+
let scan_plan = Self::build_extension_filter_indices(pattern, index).map_or(
43+
ScanPlan::Full(records),
44+
|indices| ScanPlan::Filtered { records, indices },
3845
);
3946

4047
Self {
4148
path_cache,
42-
records_to_scan,
49+
scan_plan,
4350
}
4451
}
4552

0 commit comments

Comments
 (0)