1+ from uuid import UUID
12import math
23import sys
34import logging
4- from typing import Dict , Tuple , Optional , Sequence , Type
5+ from typing import Dict , Tuple , Optional , Sequence , Type , List
56from functools import lru_cache , wraps
67from concurrent .futures import ThreadPoolExecutor
78import threading
89from abc import abstractmethod
910
1011from .database_types import (
12+ ColType_UUID ,
1113 AbstractDatabase ,
1214 ColType ,
1315 Integer ,
1618 PrecisionType ,
1719 TemporalType ,
1820 UnknownColType ,
21+ Text ,
1922)
20- from data_diff .sql import DbPath , SqlOrStr , Compiler , Explain , Select
23+ from data_diff .sql import DbPath , SqlOrStr , Compiler , Explain , Select , TableName
2124
2225logger = logging .getLogger ("database" )
2326
@@ -26,6 +29,14 @@ def parse_table_name(t):
2629 return tuple (t .split ("." ))
2730
2831
32+ def is_uuid (u ):
33+ try :
34+ UUID (u )
35+ except ValueError :
36+ return False
37+ return True
38+
39+
2940def import_helper (package : str = None , text = "" ):
3041 def dec (f ):
3142 @wraps (f )
@@ -102,7 +113,7 @@ def query(self, sql_ast: SqlOrStr, res_type: type):
102113 assert len (res ) == 1 , (sql_code , res )
103114 return res [0 ]
104115 elif getattr (res_type , "__origin__" , None ) is list and len (res_type .__args__ ) == 1 :
105- if res_type .__args__ == (int ,):
116+ if res_type .__args__ == (int ,) or res_type . __args__ == ( str ,) :
106117 return [_one (row ) for row in res ]
107118 elif res_type .__args__ == (Tuple ,):
108119 return [tuple (row ) for row in res ]
@@ -123,6 +134,7 @@ def _parse_type_repr(self, type_repr: str) -> Optional[Type[ColType]]:
123134
124135 def _parse_type (
125136 self ,
137+ table_path : DbPath ,
126138 col_name : str ,
127139 type_repr : str ,
128140 datetime_precision : int = None ,
@@ -147,7 +159,7 @@ def _parse_type(
147159 elif issubclass (cls , Decimal ):
148160 if numeric_scale is None :
149161 raise ValueError (
150- f"{ self .name } : Unexpected numeric_scale is NULL, for column { col_name } of type { type_repr } ."
162+ f"{ self .name } : Unexpected numeric_scale is NULL, for column { '.' . join ( table_path ) } . { col_name } of type { type_repr } ."
151163 )
152164 return cls (precision = numeric_scale )
153165
@@ -159,6 +171,20 @@ def _parse_type(
159171 )
160172 )
161173
174+ elif issubclass (cls , Text ):
175+ samples = self .query (Select ([col_name ], TableName (table_path ), limit = 16 ), List [str ])
176+ uuid_samples = list (filter (is_uuid , samples ))
177+
178+ if uuid_samples :
179+ if len (uuid_samples ) != len (samples ):
180+ logger .warning (
181+ f"Mixed UUID/Non-UUID values detected in column { '.' .join (table_path )} .{ col_name } , disabling UUID support."
182+ )
183+ else :
184+ return ColType_UUID ()
185+
186+ return Text ()
187+
162188 raise TypeError (f"Parsing { type_repr } returned an unknown type '{ cls } '." )
163189
164190 def select_table_schema (self , path : DbPath ) -> str :
@@ -179,7 +205,7 @@ def query_table_schema(self, path: DbPath, filter_columns: Optional[Sequence[str
179205 rows = [r for r in rows if r [0 ].lower () in accept ]
180206
181207 # Return a dict of form {name: type} after normalization
182- return {row [0 ]: self ._parse_type (* row ) for row in rows }
208+ return {row [0 ]: self ._parse_type (path , * row ) for row in rows }
183209
184210 # @lru_cache()
185211 # def get_table_schema(self, path: DbPath) -> Dict[str, ColType]:
@@ -233,6 +259,12 @@ def create_connection(self):
233259 def close (self ):
234260 self ._queue .shutdown ()
235261
262+ def offset_limit (self , offset : Optional [int ] = None , limit : Optional [int ] = None ):
263+ if offset :
264+ raise NotImplementedError ("No support for OFFSET in query" )
265+
266+ return f"LIMIT { limit } "
267+
236268
237269CHECKSUM_HEXDIGITS = 15 # Must be 15 or lower
238270MD5_HEXDIGITS = 32
0 commit comments