Skip to content

Commit d90c090

Browse files
committed
chore: development v0.2.124 - comprehensive testing complete [auto-commit]
1 parent 0595052 commit d90c090

File tree

15 files changed

+267
-41
lines changed

15 files changed

+267
-41
lines changed

CHANGELOG.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2323
- Cleaned up all TTAPI references from justfile and build scripts
2424
- Updated justfile header and recipes for UFFS
2525

26-
## [0.2.123] - 2026-01-27
26+
## [0.2.124] - 2026-01-27
2727

2828
### Added
2929
- Baseline CI validation for modernization effort
@@ -46,7 +46,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4646
### Fixed
4747
- Various MFT parsing edge cases
4848

49-
[Unreleased]: https://github.com/githubrobbi/UltraFastFileSearch/compare/v0.2.123...HEAD
50-
[0.2.123]: https://github.com/githubrobbi/UltraFastFileSearch/compare/v0.2.114...v0.2.123
49+
[Unreleased]: https://github.com/githubrobbi/UltraFastFileSearch/compare/v0.2.124...HEAD
50+
[0.2.124]: https://github.com/githubrobbi/UltraFastFileSearch/compare/v0.2.114...v0.2.124
5151
[0.2.114]: https://github.com/githubrobbi/UltraFastFileSearch/releases/tag/v0.2.114
5252

Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ exclude = [
3939
# Workspace Package Metadata (inherited by all crates)
4040
# ─────────────────────────────────────────────────────────────────────────────
4141
[workspace.package]
42-
version = "0.2.123"
42+
version = "0.2.124"
4343
edition = "2024"
4444
rust-version = "1.85"
4545
license = "MPL-2.0 OR LicenseRef-UFFS-Commercial"
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# CHANGELOG_HEALING - C++ vs Rust UFFS Parity Fixes
2+
3+
**Date:** 2026-01-27 18:00
4+
**Session:** C++ vs Rust Output Parity
5+
**Version:** v0.2.123 → v0.2.124
6+
7+
## Summary
8+
9+
Fixing identified differences between C++ and Rust UFFS outputs to achieve feature parity.
10+
11+
## Issues Identified
12+
13+
| # | Issue | Severity | Root Cause |
14+
|---|-------|----------|------------|
15+
| 1 | Size on Disk = Size (wrong) | 🔴 HIGH | Output uses `size` instead of `allocated_size` |
16+
| 2 | Directory Size not aggregated | 🟡 MEDIUM | treesize not applied in all output paths |
17+
| 3 | ADS Name missing stream name | 🟡 MEDIUM | Stream name not appended to Name column |
18+
| 4 | Descendant count off by 28 | 🟡 MEDIUM | ADS/hardlink counting differences |
19+
| 5 | "Descendents" typo | 🟢 LOW | Fixed in v0.2.123 |
20+
21+
---
22+
23+
## Fix 1: Size on Disk Calculation ✅ FIXED
24+
25+
### What Failed
26+
- Rust output shows `Size on Disk = Size` for 100% of entries
27+
- C++ correctly shows `Size on Disk = 0` for resident files (data stored in MFT)
28+
- C++ correctly shows `Size on Disk = allocated_size` (cluster-aligned) for non-resident files
29+
30+
### Why It Failed
31+
- The raw MFT parsing is CORRECT (`allocated_size = 0` for resident files)
32+
- `SearchResult` struct lacked `allocated_size` field
33+
- `results_to_dataframe()` used `result.size` for `allocated_sizes` vector
34+
35+
### How Fixed
36+
1. Added `allocated_size: u64` field to `SearchResult` struct in `crates/uffs-core/src/index_search.rs`
37+
2. Updated `SearchResult::from_record()` to populate from `record.first_stream.size.allocated`
38+
3. Updated `SearchResult::from_expanded()` to populate from `stream_info.size.allocated`
39+
4. Fixed `results_to_dataframe()` in `crates/uffs-cli/src/commands.rs` to use `result.allocated_size`
40+
41+
---
42+
43+
## Fix 2: Directory Size Aggregation ✅ FIXED
44+
45+
### What Failed
46+
- C++ shows directory Size = sum of all descendant sizes (treesize)
47+
- Rust shows directory Size = directory's own size (not aggregated)
48+
49+
### Why It Failed
50+
- Code exists in `commands.rs` to apply treesize transformation
51+
- But it was only applied in `results_to_dataframe()`, not in streaming paths
52+
- Streaming paths (`search_multi_drive_filtered`, `search_multi_drive_streaming`) bypassed the transformation
53+
54+
### How Fixed
55+
1. Created `apply_directory_treesize()` helper function in `crates/uffs-core/src/tree.rs`
56+
2. Exported function from `crates/uffs-core/src/lib.rs`
57+
3. Refactored `results_to_dataframe()` to use the helper function
58+
4. Applied transformation in `search_multi_drive_filtered()` streaming path
59+
5. Applied transformation in `search_multi_drive_streaming()` streaming path
60+
61+
---
62+
63+
## Fix 3: ADS Name Column ✅ FIXED
64+
65+
### What Failed
66+
- C++ Name column: `readme.txt:Zone.Identifier` (includes stream name)
67+
- Rust Name column: `readme.txt` (base filename only)
68+
69+
### Why It Failed
70+
- `SearchResult::from_expanded()` only stored base filename in `name` field
71+
- Stream name was stored separately in `stream_name` field but not combined
72+
73+
### How Fixed
74+
1. Updated `SearchResult::from_expanded()` in `crates/uffs-core/src/index_search.rs`
75+
2. For ADS entries (non-empty stream_name), format name as `{base_name}:{stream_name}`
76+
3. This matches C++ behavior where ADS entries show full name with stream suffix
77+
78+
---
79+
80+
## Fix 4: Descendant Count Difference ⏳ DEFERRED
81+
82+
### What Failed
83+
- C++ G:\: 15,115 descendants
84+
- Rust G:\: 15,087 descendants (28 fewer)
85+
86+
### Why It Failed
87+
- Likely due to ADS handling differences
88+
- C++ may count ADS as separate entries in descendant count
89+
- Rust tree metrics are computed per-FRS, not per-stream
90+
91+
### Status
92+
- Deferred for now - requires Windows testing to verify
93+
- The difference is small (0.18%) and may be intentional design difference
94+
- ADS are not separate file records, so not counting them in descendants may be correct
95+
96+
---
97+
98+
## Commits
99+
100+
| Commit | Description |
101+
|--------|-------------|
102+
| v0.2.124 | fix: C++ parity - Size on Disk, Directory Size, ADS Name |
103+

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
2121

2222
**UFFS reads the MFT directly** - once - and queries it in memory using Polars DataFrames. This is like reading the entire phonebook once instead of looking up each name individually.
2323

24-
### Benchmark Results (v0.2.123)
24+
### Benchmark Results (v0.2.124)
2525

2626
| Drive Type | Records | Time | Throughput |
2727
|------------|---------|------|------------|
@@ -33,7 +33,7 @@ Traditional file search tools (including `os.walk`, `FindFirstFile`, etc.) work
3333

3434
| Comparison | Records | Time | Notes |
3535
|------------|---------|------|-------|
36-
| **UFFS v0.2.123** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
36+
| **UFFS v0.2.124** | **18.7 Million** | **~142 seconds** | All disks, fast mode |
3737
| UFFS v0.1.30 | 18.7 Million | ~315 seconds | Baseline |
3838
| Everything | 19 Million | 178 seconds | All disks |
3939
| WizFile | 6.5 Million | 299 seconds | Single HDD |

crates/uffs-cli/src/commands.rs

Lines changed: 81 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,7 +1173,9 @@ fn results_to_dataframe(
11731173

11741174
if let Some(rec) = record {
11751175
// Populate from record's StandardInfo
1176-
allocated_sizes.push(result.size); // TODO: Get actual allocated size from stream
1176+
// Use allocated_size from SearchResult (populated from stream's
1177+
// SizeInfo.allocated)
1178+
allocated_sizes.push(result.allocated_size);
11771179
created_times.push(rec.stdinfo.created);
11781180
modified_times.push(rec.stdinfo.modified);
11791181
accessed_times.push(rec.stdinfo.accessed);
@@ -1288,23 +1290,8 @@ fn results_to_dataframe(
12881290
// Replace size and allocated_size columns with tree metrics for directories
12891291
// (C++ parity) For directories: size = treesize, allocated_size =
12901292
// tree_allocated For files: keep original size and allocated_size
1291-
use uffs_polars::{IntoLazy, col, when};
1292-
df = df
1293-
.lazy()
1294-
.with_column(
1295-
when(col("is_directory"))
1296-
.then(col("treesize"))
1297-
.otherwise(col("size"))
1298-
.alias("size"),
1299-
)
1300-
.with_column(
1301-
when(col("is_directory"))
1302-
.then(col("tree_allocated"))
1303-
.otherwise(col("allocated_size"))
1304-
.alias("allocated_size"),
1305-
)
1306-
.collect()
1307-
.map_err(|err| anyhow::anyhow!("Failed to merge tree metrics: {err}"))?;
1293+
df = uffs_core::apply_directory_treesize(&df)
1294+
.map_err(|err| anyhow::anyhow!("Failed to apply directory treesize: {err}"))?;
13081295

13091296
// Add path_only column (directory portion of path)
13101297
df = uffs_core::add_path_only_column(&df)
@@ -1598,7 +1585,27 @@ async fn search_multi_drive_filtered(
15981585
Ok(df) => {
15991586
// Add path_only column (directory portion of path)
16001587
match uffs_core::add_path_only_column(&df) {
1601-
Ok(df_with_path_only) => df_with_path_only,
1588+
Ok(df_with_path_only) => {
1589+
// Apply treesize transformation for directories (C++ parity)
1590+
match uffs_core::apply_directory_treesize(&df_with_path_only) {
1591+
Ok(df_with_treesize) => df_with_treesize,
1592+
Err(e) => {
1593+
let _ = tx
1594+
.send(DriveResult {
1595+
drive: drive_char,
1596+
df: None,
1597+
records_read,
1598+
matches,
1599+
error: Some(format!(
1600+
"Failed to apply treesize: {e}"
1601+
)),
1602+
paths_resolved: false,
1603+
})
1604+
.await;
1605+
return;
1606+
}
1607+
}
1608+
}
16021609
Err(e) => {
16031610
let _ = tx
16041611
.send(DriveResult {
@@ -1629,7 +1636,23 @@ async fn search_multi_drive_filtered(
16291636
}
16301637
}
16311638
} else {
1632-
filtered
1639+
// No path resolver - still apply treesize transformation
1640+
match uffs_core::apply_directory_treesize(&filtered) {
1641+
Ok(df) => df,
1642+
Err(e) => {
1643+
let _ = tx
1644+
.send(DriveResult {
1645+
drive: drive_char,
1646+
df: None,
1647+
records_read,
1648+
matches,
1649+
error: Some(format!("Failed to apply treesize: {e}")),
1650+
paths_resolved: false,
1651+
})
1652+
.await;
1653+
return;
1654+
}
1655+
}
16331656
};
16341657

16351658
// Add drive column
@@ -1877,7 +1900,27 @@ async fn search_multi_drive_streaming<W: Write + Send + 'static>(
18771900
Ok(df) => {
18781901
// Add path_only column (directory portion of path)
18791902
match uffs_core::add_path_only_column(&df) {
1880-
Ok(df_with_path_only) => df_with_path_only,
1903+
Ok(df_with_path_only) => {
1904+
// Apply treesize transformation for directories (C++ parity)
1905+
match uffs_core::apply_directory_treesize(&df_with_path_only) {
1906+
Ok(df_with_treesize) => df_with_treesize,
1907+
Err(e) => {
1908+
let _ = tx
1909+
.send(DriveResult {
1910+
drive: drive_char,
1911+
df: None,
1912+
records_read,
1913+
matches,
1914+
error: Some(format!(
1915+
"Failed to apply treesize: {e}"
1916+
)),
1917+
paths_resolved: false,
1918+
})
1919+
.await;
1920+
return;
1921+
}
1922+
}
1923+
}
18811924
Err(e) => {
18821925
let _ = tx
18831926
.send(DriveResult {
@@ -1908,7 +1951,23 @@ async fn search_multi_drive_streaming<W: Write + Send + 'static>(
19081951
}
19091952
}
19101953
} else {
1911-
filtered
1954+
// No path resolver - still apply treesize transformation
1955+
match uffs_core::apply_directory_treesize(&filtered) {
1956+
Ok(df) => df,
1957+
Err(e) => {
1958+
let _ = tx
1959+
.send(DriveResult {
1960+
drive: drive_char,
1961+
df: None,
1962+
records_read,
1963+
matches,
1964+
error: Some(format!("Failed to apply treesize: {e}")),
1965+
paths_resolved: false,
1966+
})
1967+
.await;
1968+
return;
1969+
}
1970+
}
19121971
};
19131972

19141973
// Add drive column

0 commit comments

Comments
 (0)