Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 46cbc1c

Browse files
committed
CLI: Automatically choose joindiff is dbs are the same (don't rely just on syntax)
1 parent 78b725d commit 46cbc1c

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

data_diff/__main__.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,20 +198,14 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
198198
metavar="NAME",
199199
)
200200
def main(conf, run, **kw):
201-
indb_syntax = False
202201
if kw["table2"] is None and kw["database2"]:
203202
# Use the "database table table" form
204203
kw["table2"] = kw["database2"]
205204
kw["database2"] = kw["database1"]
206-
indb_syntax = True
207205

208206
if conf:
209207
kw = apply_config_from_file(conf, run, kw)
210208

211-
kw["algorithm"] = Algorithm(kw["algorithm"])
212-
if kw["algorithm"] == Algorithm.AUTO:
213-
kw["algorithm"] = Algorithm.JOINDIFF if indb_syntax else Algorithm.HASHDIFF
214-
215209
try:
216210
return _main(**kw)
217211
except Exception as e:
@@ -336,6 +330,10 @@ def _main(
336330
for db in dbs:
337331
db.enable_interactive()
338332

333+
algorithm = Algorithm(algorithm)
334+
if algorithm == Algorithm.AUTO:
335+
algorithm = Algorithm.JOINDIFF if db1 == db2 else Algorithm.HASHDIFF
336+
339337
if algorithm == Algorithm.JOINDIFF:
340338
differ = JoinDiffer(
341339
threaded=threaded,
@@ -344,7 +342,8 @@ def _main(
344342
sample_exclusive_rows=sample_exclusive_rows,
345343
materialize_all_rows=materialize_all_rows,
346344
table_write_limit=table_write_limit,
347-
materialize_to_table=materialize_to_table and db1.parse_table_name(eval_name_template(materialize_to_table)),
345+
materialize_to_table=materialize_to_table
346+
and db1.parse_table_name(eval_name_template(materialize_to_table)),
348347
)
349348
else:
350349
assert algorithm == Algorithm.HASHDIFF
@@ -381,7 +380,7 @@ def _main(
381380

382381
columns = tuple(expanded_columns - {*key_columns, update_column})
383382

384-
if db1 is db2:
383+
if db1 == db2:
385384
diff_schemas(
386385
table_names[0],
387386
table_names[1],
@@ -394,7 +393,8 @@ def _main(
394393
),
395394
)
396395

397-
logging.info(f"Diffing using columns: key={key_columns} update={update_column} extra={columns}")
396+
logging.info(f"Diffing using columns: key={key_columns} update={update_column} extra={columns}.")
397+
logging.info(f"Using algorithm '{algorithm.name.lower()}'.")
398398

399399
segments = [
400400
TableSegment(db, table_path, key_columns, update_column, columns, **options)._with_raw_schema(raw_schema)

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def test_api_get_stats_dict(self):
7474
diff = diff_tables(t1, t2)
7575

7676
output = diff.get_stats_dict()
77-
output.pop('stats')
77+
output.pop("stats")
7878
self.assertEqual(expected_dict, output)
7979
self.assertIsNotNone(diff)
8080
assert len(list(diff)) == 1

0 commit comments

Comments
 (0)