Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit a0a9238

Browse files
authored
Merge pull request #328 from datafold/dec1
CLI: Automatically choose joindiff is dbs are the same (don't rely just on syntax)
2 parents 92c6274 + 46cbc1c commit a0a9238

File tree

2 files changed

+10
-10
lines changed

2 files changed

+10
-10
lines changed

data_diff/__main__.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,20 +198,14 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
198198
metavar="NAME",
199199
)
200200
def main(conf, run, **kw):
201-
indb_syntax = False
202201
if kw["table2"] is None and kw["database2"]:
203202
# Use the "database table table" form
204203
kw["table2"] = kw["database2"]
205204
kw["database2"] = kw["database1"]
206-
indb_syntax = True
207205

208206
if conf:
209207
kw = apply_config_from_file(conf, run, kw)
210208

211-
kw["algorithm"] = Algorithm(kw["algorithm"])
212-
if kw["algorithm"] == Algorithm.AUTO:
213-
kw["algorithm"] = Algorithm.JOINDIFF if indb_syntax else Algorithm.HASHDIFF
214-
215209
try:
216210
return _main(**kw)
217211
except Exception as e:
@@ -332,6 +326,10 @@ def _main(
332326
for db in dbs:
333327
db.enable_interactive()
334328

329+
algorithm = Algorithm(algorithm)
330+
if algorithm == Algorithm.AUTO:
331+
algorithm = Algorithm.JOINDIFF if db1 == db2 else Algorithm.HASHDIFF
332+
335333
if algorithm == Algorithm.JOINDIFF:
336334
differ = JoinDiffer(
337335
threaded=threaded,
@@ -340,7 +338,8 @@ def _main(
340338
sample_exclusive_rows=sample_exclusive_rows,
341339
materialize_all_rows=materialize_all_rows,
342340
table_write_limit=table_write_limit,
343-
materialize_to_table=materialize_to_table and db1.parse_table_name(eval_name_template(materialize_to_table)),
341+
materialize_to_table=materialize_to_table
342+
and db1.parse_table_name(eval_name_template(materialize_to_table)),
344343
)
345344
else:
346345
assert algorithm == Algorithm.HASHDIFF
@@ -377,7 +376,7 @@ def _main(
377376

378377
columns = tuple(expanded_columns - {*key_columns, update_column})
379378

380-
if db1 is db2:
379+
if db1 == db2:
381380
diff_schemas(
382381
table_names[0],
383382
table_names[1],
@@ -390,7 +389,8 @@ def _main(
390389
),
391390
)
392391

393-
logging.info(f"Diffing using columns: key={key_columns} update={update_column} extra={columns}")
392+
logging.info(f"Diffing using columns: key={key_columns} update={update_column} extra={columns}.")
393+
logging.info(f"Using algorithm '{algorithm.name.lower()}'.")
394394

395395
segments = [
396396
TableSegment(db, table_path, key_columns, update_column, columns, **options)._with_raw_schema(raw_schema)

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def test_api_get_stats_dict(self):
7474
diff = diff_tables(t1, t2)
7575

7676
output = diff.get_stats_dict()
77-
output.pop('stats')
77+
output.pop("stats")
7878
self.assertEqual(expected_dict, output)
7979
self.assertIsNotNone(diff)
8080
assert len(list(diff)) == 1

0 commit comments

Comments
 (0)