11import collections
2- from typing import Any , Optional , List , Dict , Tuple
2+ from enum import Enum
3+ from typing import Any , Optional , List , Dict , Tuple , Type
34
45from runtype import dataclass
56from data_diff .diff_tables import DiffResultWrapper
7+ from data_diff .sqeleton .abcs .database_types import (
8+ JSON ,
9+ Boolean ,
10+ ColType ,
11+ Array ,
12+ ColType_UUID ,
13+ Date ,
14+ FractionalType ,
15+ NumericType ,
16+ Struct ,
17+ TemporalType ,
18+ ColType_Alphanum ,
19+ String_Alphanum
20+ )
621
722
823def jsonify_error (table1 : List [str ], table2 : List [str ], dbt_model : str , error : str ) -> "FailedDiff" :
@@ -14,12 +29,13 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
1429 error = error ,
1530 ).json ()
1631
32+ Columns = List [Tuple [str , str , Type [ColType ]]]
1733
1834def jsonify (
1935 diff : DiffResultWrapper ,
2036 dbt_model : str ,
21- dataset1_columns : Dict [ str , Tuple [ str , str , Any , Any , Any ]] ,
22- dataset2_columns : Dict [ str , Tuple [ str , str , Any , Any , Any ]] ,
37+ dataset1_columns : Columns ,
38+ dataset2_columns : Columns ,
2339 columns_diff : Dict [str , List [str ]],
2440 with_summary : bool = False ,
2541) -> "JsonDiff" :
@@ -136,10 +152,36 @@ class ExclusiveColumns:
136152 dataset1 : List [str ]
137153 dataset2 : List [str ]
138154
155+ class ColumnKind (Enum ):
156+ INTEGER = 'integer'
157+ FLOAT = 'float'
158+ STRING = 'string'
159+ DATE = 'date'
160+ TIME = 'time'
161+ DATETIME = 'datetime'
162+ BOOL = 'boolean'
163+ UNSUPPORTED = 'unsupported'
164+
165+ KIND_MAPPING : List [Tuple [Type [ColType ], ColumnKind ]] = [
166+ (Boolean , ColumnKind .BOOL ),
167+ (Date , ColumnKind .DATE ),
168+ (TemporalType , ColumnKind .DATETIME ),
169+ (FractionalType , ColumnKind .FLOAT ),
170+ (NumericType , ColumnKind .INTEGER ),
171+ (ColType_UUID , ColumnKind .STRING ),
172+ (ColType_Alphanum , ColumnKind .STRING ),
173+ (String_Alphanum , ColumnKind .STRING ),
174+ (JSON , ColumnKind .STRING ),
175+ (Array , ColumnKind .STRING ),
176+ (Struct , ColumnKind .STRING ),
177+ (ColType , ColumnKind .UNSUPPORTED )
178+ ]
179+
139180@dataclass
140181class Column :
141182 name : str
142183 type : str
184+ kind : str
143185
144186@dataclass
145187class JsonColumnsSummary :
@@ -267,19 +309,23 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
267309 )
268310
269311
270- def _jsonify_columns_diff (dataset1_columns : Dict [ str , Tuple [ str , str , Any , Any , Any ]] ,
271- dataset2_columns : Dict [ str , Tuple [ str , str , Any , Any , Any ]] ,
312+ def _jsonify_columns_diff (dataset1_columns : Columns ,
313+ dataset2_columns : Columns ,
272314 columns_diff : Dict [str , List [str ]], key_columns : List [str ]) -> JsonColumnsSummary :
273315 return JsonColumnsSummary (
274316 dataset1 = [
275- Column (name = name , type = type_ )
276- for (name , type_ , * _ )
277- in dataset1_columns .values ()
317+ Column (name = name ,
318+ type = type_ ,
319+ kind = _map_kind (kind ).value )
320+ for (name , type_ , kind )
321+ in dataset1_columns
278322 ],
279323 dataset2 = [
280- Column (name = name , type = type_ )
281- for (name , type_ , * _ )
282- in dataset2_columns .values ()
324+ Column (name = name ,
325+ type = type_ ,
326+ kind = _map_kind (kind ).value )
327+ for (name , type_ , kind )
328+ in dataset2_columns
283329 ],
284330 primaryKey = key_columns ,
285331 exclusive = ExclusiveColumns (
@@ -288,3 +334,9 @@ def _jsonify_columns_diff(dataset1_columns: Dict[str, Tuple[str, str, Any, Any,
288334 ),
289335 typeChanged = list (columns_diff .get ("changed" , [])),
290336 )
337+
338+ def _map_kind (kind : Type [ColType ]) -> ColumnKind :
339+ for raw_kind , json_kind in KIND_MAPPING :
340+ if isinstance (kind , raw_kind ):
341+ return json_kind
342+ return ColumnKind .UNSUPPORTED
0 commit comments