Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 680b1bc

Browse files
committed
add both logical & raw type to schema
1 parent b81be7d commit 680b1bc

File tree

2 files changed

+79
-13
lines changed

2 files changed

+79
-13
lines changed

data_diff/dbt.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,13 +308,27 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
308308
)
309309
return
310310

311+
dataset1_columns = [
312+
(name,
313+
type_,
314+
table1.database.dialect.parse_type(table1.table_path, name, type_, *other))
315+
for (name, type_, *other)
316+
in table1_columns.values()
317+
]
318+
dataset2_columns = [
319+
(name,
320+
type_,
321+
table2.database.dialect.parse_type(table2.table_path, name, type_, *other))
322+
for (name, type_, *other)
323+
in table2_columns.values()
324+
]
311325
print(
312326
json.dumps(
313327
jsonify(
314328
diff,
315329
dbt_model=diff_vars.dbt_model,
316-
dataset1_columns=table1_columns,
317-
dataset2_columns=table2_columns,
330+
dataset1_columns=dataset1_columns,
331+
dataset2_columns=dataset2_columns,
318332
with_summary=True,
319333
columns_diff={
320334
"added": columns_added,

data_diff/format.py

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,23 @@
11
import collections
2-
from typing import Any, Optional, List, Dict, Tuple
2+
from enum import Enum
3+
from typing import Any, Optional, List, Dict, Tuple, Type
34

45
from runtype import dataclass
56
from data_diff.diff_tables import DiffResultWrapper
7+
from data_diff.sqeleton.abcs.database_types import (
8+
JSON,
9+
Boolean,
10+
ColType,
11+
Array,
12+
ColType_UUID,
13+
Date,
14+
FractionalType,
15+
NumericType,
16+
Struct,
17+
TemporalType,
18+
ColType_Alphanum,
19+
String_Alphanum
20+
)
621

722

823
def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: str) -> "FailedDiff":
@@ -14,12 +29,13 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
1429
error=error,
1530
).json()
1631

32+
Columns = List[Tuple[str, str, Type[ColType]]]
1733

1834
def jsonify(
1935
diff: DiffResultWrapper,
2036
dbt_model: str,
21-
dataset1_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
22-
dataset2_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
37+
dataset1_columns: Columns,
38+
dataset2_columns: Columns,
2339
columns_diff: Dict[str, List[str]],
2440
with_summary: bool = False,
2541
) -> "JsonDiff":
@@ -136,10 +152,36 @@ class ExclusiveColumns:
136152
dataset1: List[str]
137153
dataset2: List[str]
138154

155+
class ColumnKind(Enum):
156+
INTEGER = 'integer'
157+
FLOAT = 'float'
158+
STRING = 'string'
159+
DATE = 'date'
160+
TIME = 'time'
161+
DATETIME = 'datetime'
162+
BOOL = 'boolean'
163+
UNSUPPORTED = 'unsupported'
164+
165+
KIND_MAPPING: List[Tuple[Type[ColType], ColumnKind]] = [
166+
(Boolean, ColumnKind.BOOL),
167+
(Date, ColumnKind.DATE),
168+
(TemporalType, ColumnKind.DATETIME),
169+
(FractionalType, ColumnKind.FLOAT),
170+
(NumericType, ColumnKind.INTEGER),
171+
(ColType_UUID, ColumnKind.STRING),
172+
(ColType_Alphanum, ColumnKind.STRING),
173+
(String_Alphanum, ColumnKind.STRING),
174+
(JSON, ColumnKind.STRING),
175+
(Array, ColumnKind.STRING),
176+
(Struct, ColumnKind.STRING),
177+
(ColType, ColumnKind.UNSUPPORTED)
178+
]
179+
139180
@dataclass
140181
class Column:
141182
name: str
142183
type: str
184+
kind: str
143185

144186
@dataclass
145187
class JsonColumnsSummary:
@@ -267,19 +309,23 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
267309
)
268310

269311

270-
def _jsonify_columns_diff(dataset1_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
271-
dataset2_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
312+
def _jsonify_columns_diff(dataset1_columns: Columns,
313+
dataset2_columns: Columns,
272314
columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
273315
return JsonColumnsSummary(
274316
dataset1=[
275-
Column(name=name, type=type_)
276-
for (name, type_, *_)
277-
in dataset1_columns.values()
317+
Column(name=name,
318+
type=type_,
319+
kind=_map_kind(kind).value)
320+
for (name, type_, kind)
321+
in dataset1_columns
278322
],
279323
dataset2=[
280-
Column(name=name, type=type_)
281-
for (name, type_, *_)
282-
in dataset2_columns.values()
324+
Column(name=name,
325+
type=type_,
326+
kind=_map_kind(kind).value)
327+
for (name, type_, kind)
328+
in dataset2_columns
283329
],
284330
primaryKey=key_columns,
285331
exclusive=ExclusiveColumns(
@@ -288,3 +334,9 @@ def _jsonify_columns_diff(dataset1_columns: Dict[str, Tuple[str, str, Any, Any,
288334
),
289335
typeChanged=list(columns_diff.get("changed", [])),
290336
)
337+
338+
def _map_kind(kind: Type[ColType]) -> ColumnKind:
339+
for raw_kind, json_kind in KIND_MAPPING:
340+
if isinstance(kind, raw_kind):
341+
return json_kind
342+
return ColumnKind.UNSUPPORTED

0 commit comments

Comments
 (0)