Skip to content

Commit 33f22a9

Browse files
dtsongclaude
andauthored
fix: replace fragile datetime string slicing with robust parser (#20)
* fix: replace fragile datetime string slicing with robust parser (#6) Replace hardcoded `res[:23]` truncation with `_parse_datetime()` helper that preserves microsecond precision and handles variable-length fractional seconds, timezone suffixes, and trailing whitespace. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address review feedback for datetime parsing - Normalize Z suffix to +00:00 for Python 3.10 compatibility - Add error logging at call site before ValueError propagates - Fix incorrect skipif guard on timezone test (+HH:MM works on 3.7+) - Add tests: Z suffix, 7-digit boundary, leading whitespace, nanoseconds with timezone, empty string Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: remove incorrect skipif and add missing edge case tests - Remove skipif on test_nanosecond_precision_with_timezone — after truncation the string is valid on Python 3.10 - Add test_nanosecond_precision_with_z_suffix (combined Z + truncation) - Add test_trailing_dot_raises (malformed fractional seconds) - Remove unused sys import Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address remaining review suggestions - Expand docstring to document all normalizations (whitespace, Z suffix, truncation) - Add defensive guards for empty/missing datetime results, matching the existing int path - Enrich error log with database name and SQL context - Add tests: all-nines truncation boundary, negative timezone offset Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 13963a4 commit 33f22a9

2 files changed

Lines changed: 110 additions & 2 deletions

File tree

data_diff/databases/base.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,27 @@
8181
logger = logging.getLogger("database")
8282

8383

84+
def _parse_datetime(s: str) -> datetime:
85+
"""Parse an ISO 8601 datetime string with the following normalizations:
86+
87+
- Strips leading/trailing whitespace
88+
- Converts 'Z' timezone suffix to '+00:00' for Python 3.10 compatibility
89+
- Truncates sub-microsecond precision (>6 fractional digits) to microseconds
90+
"""
91+
s = s.strip()
92+
if s.endswith("Z"):
93+
s = s[:-1] + "+00:00"
94+
dot = s.rfind(".")
95+
if dot != -1:
96+
frac_end = dot + 1
97+
while frac_end < len(s) and s[frac_end].isdigit():
98+
frac_end += 1
99+
frac_digits = frac_end - dot - 1
100+
if frac_digits > 6:
101+
s = s[: dot + 7] + s[frac_end:]
102+
return datetime.fromisoformat(s)
103+
104+
84105
class CompileError(Exception):
85106
pass
86107

@@ -985,9 +1006,23 @@ def query(self, sql_ast: Expr | Generator, res_type: type = None, log_message: s
9851006
return None
9861007
return int(res)
9871008
elif res_type is datetime:
988-
res = _one(_one(res))
1009+
if not res:
1010+
raise ValueError("Datetime query returned 0 rows, expected 1")
1011+
row = _one(res)
1012+
if not row:
1013+
raise ValueError("Datetime query row is empty, expected 1 column")
1014+
res = _one(row)
9891015
if isinstance(res, str):
990-
res = datetime.fromisoformat(res[:23]) # TODO use a better parsing method
1016+
try:
1017+
res = _parse_datetime(res)
1018+
except ValueError:
1019+
logger.error(
1020+
"Failed to parse datetime string returned by database %s: %r (sql: %s)",
1021+
self.name,
1022+
res,
1023+
sql_code,
1024+
)
1025+
raise
9911026
return res
9921027
elif res_type is tuple:
9931028
if len(res) != 1:

tests/test_datetime_parsing.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from datetime import datetime, timedelta, timezone
2+
3+
import pytest
4+
5+
from data_diff.databases.base import _parse_datetime
6+
7+
8+
class TestParseDatetime:
9+
def test_standard_microsecond_format(self):
10+
result = _parse_datetime("2022-06-03 12:24:35.123456")
11+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)
12+
13+
def test_millisecond_precision(self):
14+
result = _parse_datetime("2022-06-03 12:24:35.123")
15+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123000)
16+
17+
def test_no_fractional_seconds(self):
18+
result = _parse_datetime("2022-06-03 12:24:35")
19+
assert result == datetime(2022, 6, 3, 12, 24, 35)
20+
21+
def test_nanosecond_precision_truncated(self):
22+
result = _parse_datetime("2022-06-03 12:24:35.123456789")
23+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)
24+
25+
def test_seven_fractional_digits_truncated(self):
26+
result = _parse_datetime("2022-06-03 12:24:35.1234567")
27+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)
28+
29+
def test_trailing_whitespace(self):
30+
result = _parse_datetime("2022-06-03 12:24:35.123456 ")
31+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)
32+
33+
def test_leading_whitespace(self):
34+
result = _parse_datetime(" 2022-06-03 12:24:35.123456")
35+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)
36+
37+
def test_timezone_offset(self):
38+
result = _parse_datetime("2022-06-03T12:24:35+00:00")
39+
assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc)
40+
41+
def test_z_suffix_utc(self):
42+
result = _parse_datetime("2022-06-03T12:24:35Z")
43+
assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc)
44+
45+
def test_nanosecond_precision_with_timezone(self):
46+
result = _parse_datetime("2022-06-03T12:24:35.123456789+05:30")
47+
expected_tz = timezone(timedelta(hours=5, minutes=30))
48+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=expected_tz)
49+
50+
def test_nanosecond_precision_with_z_suffix(self):
51+
result = _parse_datetime("2022-06-03T12:24:35.123456789Z")
52+
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=timezone.utc)
53+
54+
def test_all_nines_nanoseconds_truncates_not_rounds(self):
55+
result = _parse_datetime("2022-06-03 23:59:59.999999999")
56+
assert result == datetime(2022, 6, 3, 23, 59, 59, 999999)
57+
58+
def test_negative_timezone_offset(self):
59+
result = _parse_datetime("2022-06-03T12:24:35-05:00")
60+
expected_tz = timezone(timedelta(hours=-5))
61+
assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=expected_tz)
62+
63+
def test_trailing_dot_raises(self):
64+
with pytest.raises(ValueError):
65+
_parse_datetime("2022-06-03T12:24:35.")
66+
67+
def test_invalid_string_raises(self):
68+
with pytest.raises(ValueError):
69+
_parse_datetime("not-a-date")
70+
71+
def test_empty_string_raises(self):
72+
with pytest.raises(ValueError):
73+
_parse_datetime("")

0 commit comments

Comments
 (0)