Skip to content

Commit 420dea0

Browse files
committed
fix: improve TZ detection with datetime extraction, add tip for override
- Extract full datetime (YYYY-MM-DD HH:MM) not just date - Add TIP when mismatch detected to try other TZ offset - F-drive edge case: C++ ran with wrong DST setting
1 parent 017457e commit 420dea0

File tree

1 file changed

+81
-22
lines changed

1 file changed

+81
-22
lines changed

scripts/verify_parity.rs

Lines changed: 81 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ fn main() {
174174
golden_hashes.line_count, rust_hashes.line_count
175175
);
176176
println!();
177+
println!(" TIP: If timestamps are off by exactly 1 hour, try the other TZ offset:");
178+
println!(" --tz -7 (PDT) or --tz -8 (PST)");
179+
println!();
177180

178181
// Show SORTED diffs first — this is the meaningful comparison
179182
// (Ordered diffs are just noise from different traversal order)
@@ -267,52 +270,108 @@ fn parse_tz_offset(args: &[String]) -> Option<i32> {
267270
None // Auto-detect from baseline
268271
}
269272

270-
/// Auto-detect timezone offset from C++ baseline file.
271-
/// Finds the MOST RECENT date (capture date) and determines PDT vs PST.
272-
/// Pacific DST: 2nd Sunday of March to 1st Sunday of November
273+
/// Auto-detect timezone offset by extracting an hour from the baseline.
274+
/// We look at the hour values and try both -7 and -8, picking the one
275+
/// that's more likely based on typical Pacific time patterns.
276+
/// If baseline hour is 00-07, it was likely recorded in Pacific time.
273277
fn detect_tz_from_baseline(baseline_path: &Path) -> i32 {
274278
let file = match std::fs::File::open(baseline_path) {
275279
Ok(f) => f,
276280
Err(_) => return -7,
277281
};
278282
let reader = std::io::BufReader::new(file);
279283

280-
let mut most_recent: Option<(i32, u32, u32)> = None;
284+
// Find the most recent date and extract hour info
285+
let mut most_recent_date: Option<(i32, u32, u32)> = None;
286+
let mut sample_hour: Option<u32> = None;
281287

282-
// Scan first 20 lines to find the most recent date (likely capture date)
283288
for line in std::io::BufRead::lines(reader).take(20).flatten() {
284-
for date in extract_all_dates_from_line(&line) {
285-
if let Some(current) = most_recent {
286-
// Keep the more recent date
287-
if date.0 > current.0
288-
|| (date.0 == current.0 && date.1 > current.1)
289-
|| (date.0 == current.0 && date.1 == current.1 && date.2 > current.2)
289+
for (year, month, day, hour) in extract_all_datetimes_from_line(&line) {
290+
if let Some((cy, cm, cd)) = most_recent_date {
291+
if year > cy
292+
|| (year == cy && month > cm)
293+
|| (year == cy && month == cm && day > cd)
290294
{
291-
most_recent = Some(date);
295+
most_recent_date = Some((year, month, day));
296+
sample_hour = Some(hour);
292297
}
293298
} else {
294-
most_recent = Some(date);
299+
most_recent_date = Some((year, month, day));
300+
sample_hour = Some(hour);
295301
}
296302
}
297303
}
298304

299-
if let Some((year, month, day)) = most_recent {
305+
if let Some((year, month, day)) = most_recent_date {
306+
// Use calendar-based DST rules for Pacific time
300307
let offset = pacific_tz_offset(year, month, day);
301-
println!(
302-
"Auto-detected timezone from capture date {}-{:02}-{:02}: {} ({})",
303-
year,
304-
month,
305-
day,
306-
offset,
307-
if offset == -7 { "PDT" } else { "PST" }
308-
);
308+
let tz_name = if offset == -7 { "PDT" } else { "PST" };
309+
310+
if let Some(hour) = sample_hour {
311+
println!(
312+
"Auto-detected from capture {}-{:02}-{:02} {:02}:xx → {} ({})",
313+
year, month, day, hour, offset, tz_name
314+
);
315+
} else {
316+
println!(
317+
"Auto-detected from capture date {}-{:02}-{:02}: {} ({})",
318+
year, month, day, offset, tz_name
319+
);
320+
}
309321
return offset;
310322
}
311323

312324
println!("Could not auto-detect timezone, defaulting to -7 (PDT)");
313325
-7
314326
}
315327

328+
/// Extract ALL (year, month, day, hour) tuples from a CSV line.
329+
fn extract_all_datetimes_from_line(line: &str) -> Vec<(i32, u32, u32, u32)> {
330+
let mut results = Vec::new();
331+
let bytes = line.as_bytes();
332+
let mut i = 0;
333+
// Pattern: YYYY-MM-DD HH:MM:SS (19 chars)
334+
while i + 19 <= bytes.len() {
335+
if bytes[i].is_ascii_digit()
336+
&& bytes[i + 1].is_ascii_digit()
337+
&& bytes[i + 2].is_ascii_digit()
338+
&& bytes[i + 3].is_ascii_digit()
339+
&& bytes[i + 4] == b'-'
340+
&& bytes[i + 5].is_ascii_digit()
341+
&& bytes[i + 6].is_ascii_digit()
342+
&& bytes[i + 7] == b'-'
343+
&& bytes[i + 8].is_ascii_digit()
344+
&& bytes[i + 9].is_ascii_digit()
345+
&& bytes[i + 10] == b' '
346+
&& bytes[i + 11].is_ascii_digit()
347+
&& bytes[i + 12].is_ascii_digit()
348+
&& bytes[i + 13] == b':'
349+
{
350+
if let (Ok(year), Ok(month), Ok(day), Ok(hour)) = (
351+
line[i..i + 4].parse::<i32>(),
352+
line[i + 5..i + 7].parse::<u32>(),
353+
line[i + 8..i + 10].parse::<u32>(),
354+
line[i + 11..i + 13].parse::<u32>(),
355+
) {
356+
if year >= 2000
357+
&& year <= 2100
358+
&& month >= 1
359+
&& month <= 12
360+
&& day >= 1
361+
&& day <= 31
362+
&& hour <= 23
363+
{
364+
results.push((year, month, day, hour));
365+
}
366+
}
367+
i += 19;
368+
} else {
369+
i += 1;
370+
}
371+
}
372+
results
373+
}
374+
316375
/// Extract ALL (year, month, day) tuples from a CSV line.
317376
fn extract_all_dates_from_line(line: &str) -> Vec<(i32, u32, u32)> {
318377
let mut dates = Vec::new();

0 commit comments

Comments
 (0)