Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 4a3c259

Browse files
committed
Split Integer from Decimal to reduce casts in SQL. Added FractionalType.
1 parent 8bbe841 commit 4a3c259

File tree

9 files changed

+21
-19
lines changed

9 files changed

+21
-19
lines changed

data_diff/databases/bigquery.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,7 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
8787
f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
8888
)
8989

90-
def normalize_number(self, value: str, coltype: NumericType) -> str:
91-
if isinstance(coltype, Integer):
92-
return self.to_string(value)
90+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
9391
return f"format('%.{coltype.precision}f', {value})"
9492

9593
def parse_table_name(self, name: str) -> DbPath:

data_diff/databases/database_types.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,19 @@ class NumericType(ColType):
4040
# 'precision' signifies how many fractional digits (after the dot) we want to compare
4141
precision: int
4242

43+
class FractionalType(NumericType):
44+
pass
4345

44-
class Float(NumericType):
46+
class Float(FractionalType):
4547
pass
4648

4749

48-
class Decimal(NumericType):
50+
class Decimal(FractionalType):
4951
pass
5052

5153

5254
@dataclass
53-
class Integer(Decimal):
55+
class Integer(NumericType):
5456
def __post_init__(self):
5557
assert self.precision == 0
5658

@@ -114,7 +116,7 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
114116
...
115117

116118
@abstractmethod
117-
def normalize_number(self, value: str, coltype: NumericType) -> str:
119+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
118120
"""Creates an SQL expression, that converts 'value' to a normalized number.
119121
120122
The returned expression must accept any SQL int/numeric/float, and return a string.
@@ -139,18 +141,20 @@ def normalize_value_by_type(self, value: str, coltype: ColType) -> str:
139141
140142
The returned expression must accept any SQL value, and return a string.
141143
142-
The default implementation dispatches to a method according to ``coltype``:
144+
The default implementation dispatches to a method according to `coltype`:
143145
144146
TemporalType -> normalize_timestamp()
145-
NumericType -> normalize_number()
146-
-else- -> to_string()
147+
FractionalType -> normalize_number()
148+
*else* -> to_string()
149+
150+
(`Integer` falls in the *else* category)
147151
148152
"""
149153
if isinstance(coltype, TemporalType):
150154
return self.normalize_timestamp(value, coltype)
151-
elif isinstance(coltype, NumericType):
155+
elif isinstance(coltype, FractionalType):
152156
return self.normalize_number(value, coltype)
153-
return self.to_string(f"{value}")
157+
return self.to_string(value)
154158

155159
def _normalize_table_path(self, path: DbPath) -> DbPath:
156160
...

data_diff/databases/mysql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,5 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
6060
s = self.to_string(f"cast({value} as datetime(6))")
6161
return f"RPAD(RPAD({s}, {TIMESTAMP_PRECISION_POS+coltype.precision}, '.'), {TIMESTAMP_PRECISION_POS+6}, '0')"
6262

63-
def normalize_number(self, value: str, coltype: NumericType) -> str:
63+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
6464
return self.to_string(f"cast({value} as decimal(38, {coltype.precision}))")

data_diff/databases/oracle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def select_table_schema(self, path: DbPath) -> str:
5757
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
5858
return f"to_char(cast({value} as timestamp({coltype.precision})), 'YYYY-MM-DD HH24:MI:SS.FF6')"
5959

60-
def normalize_number(self, value: str, coltype: NumericType) -> str:
60+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
6161
# FM999.9990
6262
format_str = "FM" + "9" * (38 - coltype.precision)
6363
if coltype.precision:

data_diff/databases/postgresql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,5 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
6767
f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
6868
)
6969

70-
def normalize_number(self, value: str, coltype: NumericType) -> str:
70+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
7171
return self.to_string(f"{value}::decimal(38, {coltype.precision})")

data_diff/databases/presto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
6464

6565
return f"RPAD(RPAD({s}, {TIMESTAMP_PRECISION_POS+coltype.precision}, '.'), {TIMESTAMP_PRECISION_POS+6}, '0')"
6666

67-
def normalize_number(self, value: str, coltype: NumericType) -> str:
67+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
6868
return self.to_string(f"cast({value} as decimal(38,{coltype.precision}))")
6969

7070
def select_table_schema(self, path: DbPath) -> str:

data_diff/databases/redshift.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
3232
f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
3333
)
3434

35-
def normalize_number(self, value: str, coltype: NumericType) -> str:
35+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
3636
return self.to_string(f"{value}::decimal(38,{coltype.precision})")
3737

3838
def select_table_schema(self, path: DbPath) -> str:

data_diff/databases/snowflake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,5 +86,5 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
8686

8787
return f"to_char({timestamp}, 'YYYY-MM-DD HH24:MI:SS.FF6')"
8888

89-
def normalize_number(self, value: str, coltype: NumericType) -> str:
89+
def normalize_number(self, value: str, coltype: FractionalType) -> str:
9090
return self.to_string(f"cast({value} as decimal(38, {coltype.precision}))")

data_diff/diff_tables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def _update_column(self):
9696

9797
def _quote_column(self, c):
9898
if self._schema:
99-
c = self._schema.get_key(c)
99+
c = self._schema.get_key(c) # Get the actual name. Might be case-insensitive.
100100
return self.database.quote(c)
101101

102102
def with_schema(self) -> "TableSegment":

0 commit comments

Comments
 (0)