Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 9fcef78

Browse files
authored
Merge branch 'master' into string_key_column
2 parents 94b7b1a + 30f5c23 commit 9fcef78

File tree

6 files changed

+38
-29
lines changed

6 files changed

+38
-29
lines changed

README.md

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -110,22 +110,22 @@ $ data-diff \
110110

111111
## Supported Databases
112112

113-
| Database | Connection string | Status |
114-
|---------------|-----------------------------------------------------------------------------------------|--------|
115-
| PostgreSQL | `postgresql://user:password@hostname:5432/database` | 💚 |
116-
| MySQL | `mysql://user:password@hostname:5432/database` | 💚 |
117-
| Snowflake | `snowflake://user:password@account/database/SCHEMA?warehouse=WAREHOUSE&role=role` | 💚 |
118-
| Oracle | `oracle://username:password@hostname/database` | 💛 |
119-
| BigQuery | `bigquery://project/dataset` | 💛 |
120-
| Redshift | `redshift://username:password@hostname:5439/database` | 💛 |
121-
| Presto | `presto://username:password@hostname:8080/database` | 💛 |
122-
| ElasticSearch | | 📝 |
123-
| Databricks | | 📝 |
124-
| Planetscale | | 📝 |
125-
| Clickhouse | | 📝 |
126-
| Pinot | | 📝 |
127-
| Druid | | 📝 |
128-
| Kafka | | 📝 |
113+
| Database | Connection string | Status |
114+
|---------------|--------------------------------------------------------------------------------------------------|--------|
115+
| PostgreSQL | `postgresql://<user>:<password>@<hostname>:5432/<database>` | 💚 |
116+
| MySQL | `mysql://<user>:<password>@<hostname>:5432/<database>` | 💚 |
117+
| Snowflake | `"snowflake://<user>:<password>@<account>/<database>/<SCHEMA>?warehouse=<WAREHOUSE>&role=<role>"`| 💚 |
118+
| Oracle | `oracle://<username>:<password>@<hostname>/database` | 💛 |
119+
| BigQuery | `bigquery://<project>/<dataset>` | 💛 |
120+
| Redshift | `redshift://<username>:<password>@<hostname>:5439/<database>` | 💛 |
121+
| Presto | `presto://<username>:<password>@<hostname>:8080/<database>` | 💛 |
122+
| ElasticSearch | | 📝 |
123+
| Databricks | | 📝 |
124+
| Planetscale | | 📝 |
125+
| Clickhouse | | 📝 |
126+
| Pinot | | 📝 |
127+
| Druid | | 📝 |
128+
| Kafka | | 📝 |
129129

130130
* 💚: Implemented and thoroughly tested.
131131
* 💛: Implemented, but not thoroughly tested yet.
@@ -171,7 +171,10 @@ Users can also install several drivers at once:
171171
Usage: `data-diff DB1_URI TABLE1_NAME DB2_URI TABLE2_NAME [OPTIONS]`
172172

173173
See the [example command](#example-command-and-output) and the [sample
174-
connection strings](#supported-databases).
174+
connection strings](#supported-databases).
175+
176+
Note that for some databases, the arguments that you enter in the command line
177+
may be case-sensitive. This is the case for the Snowflake schema and table names.
175178

176179
Options:
177180

data_diff/databases/mysql.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class MySQL(ThreadedDatabase):
2020
"float": Float,
2121
"decimal": Decimal,
2222
"int": Integer,
23+
"bigint": Integer,
2324
# Text
2425
"varchar": Text,
2526
"char": Text,

data_diff/databases/oracle.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ class Oracle(ThreadedDatabase):
2727
def __init__(self, host, port, user, password, *, database, thread_count, **kw):
2828
assert not port
2929
self.kwargs = dict(user=user, password=password, dsn="%s/%s" % (host, database), **kw)
30+
31+
self.default_schema = user
32+
3033
super().__init__(thread_count=thread_count)
3134

3235
def create_connection(self):
@@ -54,13 +57,11 @@ def to_string(self, s: str):
5457
return f"cast({s} as varchar(1024))"
5558

5659
def select_table_schema(self, path: DbPath) -> str:
57-
if len(path) > 1:
58-
raise ValueError("Unexpected table path for oracle")
59-
(table,) = path
60+
schema, table = self._normalize_table_path(path)
6061

6162
return (
6263
f"SELECT column_name, data_type, 6 as datetime_precision, data_precision as numeric_precision, data_scale as numeric_scale"
63-
f" FROM USER_TAB_COLUMNS WHERE table_name = '{table.upper()}'"
64+
f" FROM ALL_TAB_COLUMNS WHERE table_name = '{table.upper()}' AND owner = '{schema.upper()}'"
6465
)
6566

6667
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/sql.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,12 @@ class Checksum(Sql):
120120
exprs: Sequence[SqlOrStr]
121121

122122
def compile(self, c: Compiler):
123-
compiled_exprs = ", ".join(map(c.compile, self.exprs))
124-
expr = f"concat({compiled_exprs})"
123+
if len(self.exprs) > 1:
124+
compiled_exprs = ", ".join(map(c.compile, self.exprs))
125+
expr = f"concat({compiled_exprs})"
126+
else:
127+
expr ,= self.exprs
128+
expr = c.compile(expr)
125129
md5 = c.database.md5_to_int(expr)
126130
return f"sum({md5})"
127131

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "data-diff"
33
version = "0.2.1"
44
description = "Command-line tool and Python library to efficiently diff rows across two different databases."
5-
authors = ["Erez Shinnan <erezshin@gmail.com>", "Simon Eskildsen <simon@sirupsen.com>"]
5+
authors = ["Datafold <data-diff@datafold.com>"]
66
license = "MIT"
77
readme = "README.md"
88
repository = "https://github.com/datafold/data-diff"

tests/test_database_types.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def __iter__(self):
183183
"int": [
184184
# "smallint", # 2 bytes
185185
"int", # 4 bytes
186-
# "bigint", # 8 bytes
186+
"bigint", # 8 bytes
187187
],
188188
# https://www.postgresql.org/docs/current/datatype-datetime.html
189189
"datetime_no_timezone": [
@@ -211,7 +211,7 @@ def __iter__(self):
211211
# "smallint", # 2 bytes
212212
# "mediumint", # 3 bytes
213213
"int", # 4 bytes
214-
# "bigint", # 8 bytes
214+
"bigint", # 8 bytes
215215
],
216216
# https://dev.mysql.com/doc/refman/8.0/en/datetime.html
217217
"datetime_no_timezone": [
@@ -253,8 +253,8 @@ def __iter__(self):
253253
"int": [
254254
# all 38 digits with 0 precision, don't need to test all
255255
"int",
256-
# "integer",
257-
# "bigint",
256+
"integer",
257+
"bigint",
258258
# "smallint",
259259
# "tinyint",
260260
# "byteint"
@@ -323,7 +323,7 @@ def __iter__(self):
323323
# "smallint", # 2 bytes
324324
# "mediumint", # 3 bytes
325325
"int", # 4 bytes
326-
# "bigint", # 8 bytes
326+
"bigint", # 8 bytes
327327
],
328328
"datetime_no_timezone": [
329329
"timestamp",

0 commit comments

Comments
 (0)