Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit b81be7d

Browse files
committed
add dataset schemas to --json output
1 parent 1094999 commit b81be7d

File tree

3 files changed

+97
-15
lines changed

3 files changed

+97
-15
lines changed

data_diff/dbt.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,10 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
313313
jsonify(
314314
diff,
315315
dbt_model=diff_vars.dbt_model,
316+
dataset1_columns=table1_columns,
317+
dataset2_columns=table2_columns,
316318
with_summary=True,
317-
with_columns={
319+
columns_diff={
318320
"added": columns_added,
319321
"removed": columns_removed,
320322
"changed": columns_type_changed,

data_diff/format.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
1818
def jsonify(
1919
diff: DiffResultWrapper,
2020
dbt_model: str,
21+
dataset1_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
22+
dataset2_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
23+
columns_diff: Dict[str, List[str]],
2124
with_summary: bool = False,
22-
with_columns: Optional[Dict[str, List[str]]] = None,
2325
) -> "JsonDiff":
2426
"""
2527
Converts the diff result into a JSON-serializable format.
@@ -53,16 +55,13 @@ def jsonify(
5355
if with_summary:
5456
summary = _jsonify_diff_summary(diff.get_stats_dict(is_dbt=True))
5557

56-
columns = None
57-
if with_columns:
58-
columns = _jsonify_columns_diff(with_columns, list(key_columns))
58+
columns = _jsonify_columns_diff(dataset1_columns, dataset2_columns, columns_diff, list(key_columns))
5959

6060
is_different = bool(
6161
t1_exclusive_rows
6262
or t2_exclusive_rows
6363
or diff_rows
64-
or with_columns
65-
and (with_columns["added"] or with_columns["removed"] or with_columns["changed"])
64+
or (columns_diff["added"] or columns_diff["removed"] or columns_diff["changed"])
6665
)
6766
return JsonDiff(
6867
status="success",
@@ -137,9 +136,15 @@ class ExclusiveColumns:
137136
dataset1: List[str]
138137
dataset2: List[str]
139138

139+
@dataclass
140+
class Column:
141+
name: str
142+
type: str
140143

141144
@dataclass
142145
class JsonColumnsSummary:
146+
dataset1: List[Column]
147+
dataset2: List[Column]
143148
primaryKey: List[str]
144149
exclusive: ExclusiveColumns
145150
typeChanged: List[str]
@@ -179,7 +184,7 @@ class JsonDiff:
179184
summary: Optional[JsonDiffSummary]
180185
columns: Optional[JsonColumnsSummary]
181186

182-
version: str = "1.0.0"
187+
version: str = "1.1.0"
183188

184189

185190
def _group_rows(
@@ -262,8 +267,20 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
262267
)
263268

264269

265-
def _jsonify_columns_diff(columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
270+
def _jsonify_columns_diff(dataset1_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
271+
dataset2_columns: Dict[str, Tuple[str, str, Any, Any, Any]],
272+
columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
266273
return JsonColumnsSummary(
274+
dataset1=[
275+
Column(name=name, type=type_)
276+
for (name, type_, *_)
277+
in dataset1_columns.values()
278+
],
279+
dataset2=[
280+
Column(name=name, type=type_)
281+
for (name, type_, *_)
282+
in dataset2_columns.values()
283+
],
267284
primaryKey=key_columns,
268285
exclusive=ExclusiveColumns(
269286
dataset2=list(columns_diff.get("added", [])),

tests/test_format.py

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,28 @@ def test_jsonify_diff(self):
3535
diff=[],
3636
stats={},
3737
)
38-
json_diff = jsonify(diff, dbt_model="my_model")
38+
json_diff = jsonify(
39+
diff,
40+
dbt_model="my_model",
41+
dataset1_columns={
42+
"id": ('id', 'integer', None, None, None),
43+
"value": ('value', 'integer', None, None, None),
44+
},
45+
dataset2_columns={
46+
"id": ('id', 'integer', None, None, None),
47+
"value": ('value', 'integer', None, None, None),
48+
},
49+
columns_diff={
50+
"added": [],
51+
"removed": [],
52+
"typeChanged": [],
53+
}
54+
)
55+
3956
self.assertEqual(
4057
json_diff,
4158
{
42-
"version": "1.0.0",
59+
"version": "1.1.0",
4360
"status": "success",
4461
"result": "different",
4562
"model": "my_model",
@@ -57,8 +74,23 @@ def test_jsonify_diff(self):
5774
},
5875
],
5976
},
77+
"columns": {
78+
"dataset1": [
79+
{"name": "id", "type": "integer"},
80+
{"name": "value", "type": "integer"}
81+
],
82+
"dataset2": [
83+
{"name": "id", "type": "integer"},
84+
{"name": "value", "type": "integer"}
85+
],
86+
"primaryKey": ["id"],
87+
"exclusive": {
88+
"dataset1": [],
89+
"dataset2": [],
90+
},
91+
"typeChanged": [],
92+
},
6093
"summary": None,
61-
"columns": None,
6294
},
6395
)
6496

@@ -86,11 +118,27 @@ def test_jsonify_diff_no_difeference(self):
86118
diff=[],
87119
stats={},
88120
)
89-
json_diff = jsonify(diff, dbt_model="model")
121+
json_diff = jsonify(
122+
diff,
123+
dbt_model="model",
124+
dataset1_columns={
125+
"id": ('id', 'integer', None, None, None),
126+
"value": ('value', 'integer', None, None, None),
127+
},
128+
dataset2_columns={
129+
"id": ('id', 'integer', None, None, None),
130+
"value": ('value', 'integer', None, None, None),
131+
},
132+
columns_diff={
133+
"added": [],
134+
"removed": [],
135+
"changed": [],
136+
}
137+
)
90138
self.assertEqual(
91139
json_diff,
92140
{
93-
"version": "1.0.0",
141+
"version": "1.1.0",
94142
"status": "success",
95143
"result": "identical",
96144
"model": "model",
@@ -100,8 +148,23 @@ def test_jsonify_diff_no_difeference(self):
100148
"exclusive": {"dataset1": [], "dataset2": []},
101149
"diff": [],
102150
},
151+
"columns": {
152+
"primaryKey": ["id"],
153+
"dataset1": [
154+
{"name": "id", "type": "integer"},
155+
{"name": "value", "type": "integer"}
156+
],
157+
"dataset2": [
158+
{"name": "id", "type": "integer"},
159+
{"name": "value", "type": "integer"}
160+
],
161+
"exclusive": {
162+
"dataset1": [],
163+
"dataset2": [],
164+
},
165+
"typeChanged": [],
166+
},
103167
"summary": None,
104-
"columns": None,
105168
},
106169
)
107170

0 commit comments

Comments
 (0)