|
| 1 | +//! Compare two raw MFT files record-by-record. |
| 2 | +//! |
| 3 | +//! This tool streams through two UFFS-MFT format files and compares them |
| 4 | +//! record-by-record without loading the entire files into memory. |
| 5 | +//! |
| 6 | +//! # Usage |
| 7 | +//! |
| 8 | +//! ```text |
| 9 | +//! compare_raw_mft <file_a> <file_b> |
| 10 | +//! ``` |
| 11 | +
|
| 12 | +use std::env; |
| 13 | +use std::fs::File; |
| 14 | +use std::io::{BufReader, Read}; |
| 15 | +use std::path::Path; |
| 16 | +use std::time::Instant; |
| 17 | + |
| 18 | +use anyhow::{bail, Context, Result}; |
| 19 | + |
| 20 | +/// Header size in bytes (matches uffs-mft::raw). |
| 21 | +const HEADER_SIZE: usize = 64; |
| 22 | + |
| 23 | +/// Magic bytes for raw MFT file format. |
| 24 | +const MAGIC: &[u8; 8] = b"UFFS-MFT"; |
| 25 | + |
| 26 | +/// Flag: data is zstd compressed. |
| 27 | +const FLAG_COMPRESSED: u32 = 0x0001; |
| 28 | + |
| 29 | +/// Parsed header from a raw MFT file. |
| 30 | +#[derive(Debug)] |
| 31 | +struct RawMftHeader { |
| 32 | + version: u32, |
| 33 | + flags: u32, |
| 34 | + record_size: u32, |
| 35 | + record_count: u64, |
| 36 | + original_size: u64, |
| 37 | + #[allow(dead_code)] |
| 38 | + compressed_size: u64, |
| 39 | +} |
| 40 | + |
| 41 | +impl RawMftHeader { |
| 42 | + fn from_bytes(buf: &[u8; HEADER_SIZE]) -> Result<Self> { |
| 43 | + if &buf[0..8] != MAGIC { |
| 44 | + bail!("Invalid magic bytes"); |
| 45 | + } |
| 46 | + let version = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]); |
| 47 | + let flags = u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]); |
| 48 | + let record_size = u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]); |
| 49 | + let record_count = u64::from_le_bytes([ |
| 50 | + buf[20], buf[21], buf[22], buf[23], buf[24], buf[25], buf[26], buf[27], |
| 51 | + ]); |
| 52 | + let original_size = u64::from_le_bytes([ |
| 53 | + buf[28], buf[29], buf[30], buf[31], buf[32], buf[33], buf[34], buf[35], |
| 54 | + ]); |
| 55 | + let compressed_size = u64::from_le_bytes([ |
| 56 | + buf[36], buf[37], buf[38], buf[39], buf[40], buf[41], buf[42], buf[43], |
| 57 | + ]); |
| 58 | + Ok(Self { |
| 59 | + version, |
| 60 | + flags, |
| 61 | + record_size, |
| 62 | + record_count, |
| 63 | + original_size, |
| 64 | + compressed_size, |
| 65 | + }) |
| 66 | + } |
| 67 | + |
| 68 | + fn is_compressed(&self) -> bool { |
| 69 | + self.flags & FLAG_COMPRESSED != 0 |
| 70 | + } |
| 71 | +} |
| 72 | + |
| 73 | +fn read_header<P: AsRef<Path>>(path: P) -> Result<(RawMftHeader, BufReader<File>)> { |
| 74 | + let file = File::open(path.as_ref()) |
| 75 | + .with_context(|| format!("Failed to open {}", path.as_ref().display()))?; |
| 76 | + let mut reader = BufReader::with_capacity(1024 * 1024, file); // 1MB buffer |
| 77 | + let mut header_buf = [0u8; HEADER_SIZE]; |
| 78 | + reader.read_exact(&mut header_buf)?; |
| 79 | + let header = RawMftHeader::from_bytes(&header_buf)?; |
| 80 | + Ok((header, reader)) |
| 81 | +} |
| 82 | + |
| 83 | +fn main() -> Result<()> { |
| 84 | + let args: Vec<String> = env::args().collect(); |
| 85 | + if args.len() != 3 { |
| 86 | + eprintln!("Usage: compare_raw_mft <file_a> <file_b>"); |
| 87 | + std::process::exit(1); |
| 88 | + } |
| 89 | + |
| 90 | + let path_a = &args[1]; |
| 91 | + let path_b = &args[2]; |
| 92 | + |
| 93 | + println!("=== Raw MFT Comparison ==="); |
| 94 | + println!("File A: {path_a}"); |
| 95 | + println!("File B: {path_b}"); |
| 96 | + println!(); |
| 97 | + |
| 98 | + // Read headers |
| 99 | + let (header_a, mut reader_a) = read_header(path_a)?; |
| 100 | + let (header_b, mut reader_b) = read_header(path_b)?; |
| 101 | + |
| 102 | + println!("Header A: version={}, flags={}, record_size={}, record_count={}, original_size={}", |
| 103 | + header_a.version, header_a.flags, header_a.record_size, header_a.record_count, header_a.original_size); |
| 104 | + println!("Header B: version={}, flags={}, record_size={}, record_count={}, original_size={}", |
| 105 | + header_b.version, header_b.flags, header_b.record_size, header_b.record_count, header_b.original_size); |
| 106 | + println!(); |
| 107 | + |
| 108 | + // Validate geometry matches |
| 109 | + if header_a.record_size != header_b.record_size { |
| 110 | + bail!("Record size mismatch: {} vs {}", header_a.record_size, header_b.record_size); |
| 111 | + } |
| 112 | + if header_a.record_count != header_b.record_count { |
| 113 | + bail!("Record count mismatch: {} vs {}", header_a.record_count, header_b.record_count); |
| 114 | + } |
| 115 | + |
| 116 | + // Check for compression (not supported in streaming mode) |
| 117 | + if header_a.is_compressed() || header_b.is_compressed() { |
| 118 | + bail!("Compressed files not supported - decompress first"); |
| 119 | + } |
| 120 | + |
| 121 | + let record_size = header_a.record_size as usize; |
| 122 | + let record_count = header_a.record_count; |
| 123 | + let total_bytes = record_count * record_size as u64; |
| 124 | + |
| 125 | + println!("Comparing {} records of {} bytes each ({:.2} GiB)...", |
| 126 | + record_count, record_size, total_bytes as f64 / 1024.0 / 1024.0 / 1024.0); |
| 127 | + println!(); |
| 128 | + |
| 129 | + // Allocate buffers for one record each |
| 130 | + let mut buf_a = vec![0u8; record_size]; |
| 131 | + let mut buf_b = vec![0u8; record_size]; |
| 132 | + |
| 133 | + let mut same_records: u64 = 0; |
| 134 | + let mut diff_records: u64 = 0; |
| 135 | + let mut diff_bytes_total: u64 = 0; |
| 136 | + let mut sample_diffs: Vec<(u64, usize)> = Vec::new(); // (frs, diff_byte_count) |
| 137 | + const MAX_SAMPLES: usize = 20; |
| 138 | + |
| 139 | + let start = Instant::now(); |
| 140 | + let progress_interval = 1_000_000_u64; // Report every 1M records |
| 141 | + |
| 142 | + for frs in 0..record_count { |
| 143 | + // Progress reporting |
| 144 | + if frs > 0 && frs % progress_interval == 0 { |
| 145 | + let elapsed = start.elapsed().as_secs_f64(); |
| 146 | + let rate = frs as f64 / elapsed; |
| 147 | + let eta = (record_count - frs) as f64 / rate; |
| 148 | + println!(" Progress: {} / {} records ({:.1}%), {:.0} rec/s, ETA {:.0}s", |
| 149 | + frs, record_count, frs as f64 / record_count as f64 * 100.0, rate, eta); |
| 150 | + } |
| 151 | + |
| 152 | + // Read records |
| 153 | + reader_a.read_exact(&mut buf_a).with_context(|| format!("EOF reading record {frs} from A"))?; |
| 154 | + reader_b.read_exact(&mut buf_b).with_context(|| format!("EOF reading record {frs} from B"))?; |
| 155 | + |
| 156 | + if buf_a == buf_b { |
| 157 | + same_records += 1; |
| 158 | + } else { |
| 159 | + diff_records += 1; |
| 160 | + // Count differing bytes |
| 161 | + let diff_bytes: usize = buf_a.iter().zip(buf_b.iter()).filter(|(a, b)| a != b).count(); |
| 162 | + diff_bytes_total += diff_bytes as u64; |
| 163 | + if sample_diffs.len() < MAX_SAMPLES { |
| 164 | + sample_diffs.push((frs, diff_bytes)); |
| 165 | + } |
| 166 | + } |
| 167 | + } |
| 168 | + |
| 169 | + let elapsed = start.elapsed(); |
| 170 | + println!(); |
| 171 | + println!("=== Comparison Complete ==="); |
| 172 | + println!("Time: {:.2}s", elapsed.as_secs_f64()); |
| 173 | + println!(); |
| 174 | + println!("Total records: {}", record_count); |
| 175 | + println!("Same records: {}", same_records); |
| 176 | + println!("Diff records: {} ({:.6}%)", diff_records, diff_records as f64 / record_count as f64 * 100.0); |
| 177 | + println!("Total differing bytes: {}", diff_bytes_total); |
| 178 | + if total_bytes > 0 { |
| 179 | + println!("Fraction of differing bytes: {:.9}", diff_bytes_total as f64 / total_bytes as f64); |
| 180 | + } |
| 181 | + println!(); |
| 182 | + |
| 183 | + if !sample_diffs.is_empty() { |
| 184 | + println!("First {} differing records (FRS, differing_bytes_in_record):", sample_diffs.len()); |
| 185 | + for (frs, diff_bytes) in &sample_diffs { |
| 186 | + println!(" FRS {}: {} bytes differ", frs, diff_bytes); |
| 187 | + } |
| 188 | + } else { |
| 189 | + println!("Files are IDENTICAL!"); |
| 190 | + } |
| 191 | + |
| 192 | + Ok(()) |
| 193 | +} |
0 commit comments