Skip to content

Commit 115f4e6

Browse files
authored
Fix(clickhouse): remove fractional seconds when time column is datetime/timestamp type (#3261)
1 parent 2585336 commit 115f4e6

File tree

3 files changed

+81
-8
lines changed

3 files changed

+81
-8
lines changed

sqlmesh/core/model/definition.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,15 @@ def convert_to_time_column(
625625

626626
time_column_type = columns_to_types[self.time_column.column.name]
627627

628-
return to_time_column(time, time_column_type, self.time_column.format)
628+
return to_time_column(
629+
time,
630+
time_column_type,
631+
self.time_column.format,
632+
include_microseconds=not (
633+
self.dialect in TIME_COLUMN_NO_MICROSECONDS_TYPES
634+
and time_column_type.is_type(*TIME_COLUMN_NO_MICROSECONDS_TYPES[self.dialect])
635+
),
636+
)
629637
return exp.convert(time)
630638

631639
def set_mapping_schema(self, schema: t.Dict) -> None:
@@ -2440,3 +2448,8 @@ def _meta_renderer(
24402448
def get_model_name(path: Path) -> str:
24412449
path_parts = list(path.parts[path.parts.index("models") + 1 : -1]) + [path.stem]
24422450
return ".".join(path_parts[-3:])
2451+
2452+
2453+
TIME_COLUMN_NO_MICROSECONDS_TYPES = {
2454+
"clickhouse": (exp.DataType.Type.DATETIME, exp.DataType.Type.TIMESTAMP)
2455+
}

sqlmesh/utils/date.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -256,14 +256,20 @@ def to_ds(obj: TimeLike) -> str:
256256
return to_ts(obj)[0:10]
257257

258258

259-
def to_ts(obj: TimeLike) -> str:
259+
def to_ts(obj: TimeLike, include_microseconds: bool = True) -> str:
260260
"""Converts a TimeLike object into YYYY-MM-DD HH:MM:SS formatted string."""
261-
return to_datetime(obj).replace(tzinfo=None).isoformat(sep=" ")
261+
obj_dt = to_datetime(obj)
262+
if not include_microseconds:
263+
obj_dt = obj_dt.replace(microsecond=0)
264+
return obj_dt.replace(tzinfo=None).isoformat(sep=" ")
262265

263266

264-
def to_tstz(obj: TimeLike) -> str:
267+
def to_tstz(obj: TimeLike, include_microseconds: bool = True) -> str:
265268
"""Converts a TimeLike object into YYYY-MM-DD HH:MM:SS+00:00 formatted string."""
266-
return to_datetime(obj).isoformat(sep=" ")
269+
obj_dt = to_datetime(obj)
270+
if not include_microseconds:
271+
obj_dt = obj_dt.replace(microsecond=0)
272+
return obj_dt.isoformat(sep=" ")
267273

268274

269275
def is_date(obj: TimeLike) -> bool:
@@ -289,7 +295,7 @@ def make_inclusive(start: TimeLike, end: TimeLike) -> Interval:
289295
In the ds ('2020-01-01') case, because start_ds and end_ds are categorical, between works even if
290296
start_ds and end_ds are equivalent. However, when we move to ts ('2022-01-01 12:00:00'), because timestamps
291297
are numeric, using simple equality doesn't make sense. When the end is not a categorical date, then it is
292-
treated as an exclusive range and converted to inclusive by subtracting 1 millisecond.
298+
treated as an exclusive range and converted to inclusive by subtracting 1 microsecond.
293299
294300
Args:
295301
start: Start timelike object.
@@ -347,16 +353,22 @@ def to_time_column(
347353
time_column: t.Union[TimeLike, exp.Null],
348354
time_column_type: exp.DataType,
349355
time_column_format: t.Optional[str] = None,
356+
include_microseconds: bool = True,
350357
) -> exp.Expression:
351358
"""Convert a TimeLike object to the same time format and type as the model's time column."""
352359
if isinstance(time_column, exp.Null):
353360
return exp.cast(time_column, to=time_column_type)
354361
if time_column_type.is_type(exp.DataType.Type.DATE):
355362
return exp.cast(exp.Literal.string(to_ds(time_column)), to="date")
356363
if time_column_type.is_type(*TEMPORAL_TZ_TYPES):
357-
return exp.cast(exp.Literal.string(to_tstz(time_column)), to=time_column_type)
364+
return exp.cast(
365+
exp.Literal.string(to_tstz(time_column, include_microseconds)),
366+
to=time_column_type,
367+
)
358368
if time_column_type.is_type(*exp.DataType.TEMPORAL_TYPES):
359-
return exp.cast(exp.Literal.string(to_ts(time_column)), to=time_column_type)
369+
return exp.cast(
370+
exp.Literal.string(to_ts(time_column, include_microseconds)), to=time_column_type
371+
)
360372

361373
if time_column_format:
362374
time_column = to_datetime(time_column).strftime(time_column_format)

tests/core/engine_adapter/test_clickhouse.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from sqlmesh.core.schema_diff import SchemaDiffer
1111
from datetime import datetime
1212
from pytest_mock.plugin import MockerFixture
13+
from sqlmesh.core import dialect as d
14+
from sqlmesh.utils.date import to_datetime
1315

1416
pytestmark = [pytest.mark.clickhouse, pytest.mark.engine]
1517

@@ -808,3 +810,49 @@ def test_scd_type_2_by_column(
808810
""",
809811
dialect=adapter.dialect,
810812
).sql(adapter.dialect)
813+
814+
815+
def test_to_time_column():
816+
# datetime/timestamp data type should remove fractional seconds
817+
expressions = d.parse(
818+
"""
819+
MODEL (
820+
name db.table,
821+
kind INCREMENTAL_BY_TIME_RANGE(
822+
time_column (ds)
823+
),
824+
dialect clickhouse
825+
);
826+
827+
SELECT ds::datetime
828+
"""
829+
)
830+
model = load_sql_based_model(expressions)
831+
assert model.convert_to_time_column("2022-01-01 00:00:00.000001").this == d.parse_one(
832+
"'2022-01-01 00:00:00'"
833+
)
834+
assert model.convert_to_time_column(
835+
to_datetime("2022-01-01 00:00:00.000001")
836+
).this == d.parse_one("'2022-01-01 00:00:00'")
837+
838+
# DateTime64 data type should retain fractional seconds
839+
expressions = d.parse(
840+
"""
841+
MODEL (
842+
name db.table,
843+
kind INCREMENTAL_BY_TIME_RANGE(
844+
time_column (ds)
845+
),
846+
dialect clickhouse
847+
);
848+
849+
SELECT ds::DateTime64
850+
"""
851+
)
852+
model = load_sql_based_model(expressions)
853+
assert model.convert_to_time_column("2022-01-01 00:00:00.000001").this == d.parse_one(
854+
"'2022-01-01 00:00:00.000001'"
855+
)
856+
assert model.convert_to_time_column(
857+
to_datetime("2022-01-01 00:00:00.000001")
858+
).this == d.parse_one("'2022-01-01 00:00:00.000001'")

0 commit comments

Comments
 (0)