22
33import logging
44import typing as t
5- from collections import defaultdict
65from enum import Enum , auto
6+ from collections import defaultdict
7+ from pydantic import Field
78from sqlglot import exp
89from sqlglot .helper import ensure_list , seq_get
910
@@ -305,6 +306,11 @@ class SchemaDiffer(PydanticModel):
305306 columns of nested STRUCTs.
306307 compatible_types: Types that are compatible and automatically coerced in actions like UNION ALL. Dict key is data
307308 type, and value is the set of types that are compatible with it.
309+ coerceable_types: The mapping from a current type to all types that can be safely coerced to the current one without
310+ altering the column type. NOTE: usually callers should not specify this attribute manually and set the
311+ `support_coercing_compatible_types` flag instead. Some engines are inconsistent about their type coercion rules.
312+ For example, in BigQuery a BIGNUMERIC column can't be altered to be FLOAT64, while BIGNUMERIC values can be inserted
313+ into a FLOAT64 column just fine.
308314 support_coercing_compatible_types: Whether or not the engine for which the diff is being computed supports direct
309315 coercion of compatible types.
310316 parameterized_type_defaults: Default values for parameterized data types. Dict key is a sqlglot exp.DataType.Type,
@@ -326,6 +332,9 @@ class SchemaDiffer(PydanticModel):
326332 support_nested_drop : bool = False
327333 array_element_selector : str = ""
328334 compatible_types : t .Dict [exp .DataType , t .Set [exp .DataType ]] = {}
335+ coerceable_types_ : t .Dict [exp .DataType , t .Set [exp .DataType ]] = Field (
336+ default_factory = dict , alias = "coerceable_types"
337+ )
329338 support_coercing_compatible_types : bool = False
330339 parameterized_type_defaults : t .Dict [
331340 exp .DataType .Type , t .List [t .Tuple [t .Union [int , float ], ...]]
@@ -339,8 +348,9 @@ class SchemaDiffer(PydanticModel):
339348 def coerceable_types (self ) -> t .Dict [exp .DataType , t .Set [exp .DataType ]]:
340349 if not self ._coerceable_types :
341350 if not self .support_coercing_compatible_types or not self .compatible_types :
342- return {}
343- coerceable_types = defaultdict (set )
351+ return self .coerceable_types_
352+ coerceable_types : t .Dict [exp .DataType , t .Set [exp .DataType ]] = defaultdict (set )
353+ coerceable_types .update (self .coerceable_types_ )
344354 for source_type , target_types in self .compatible_types .items ():
345355 for target_type in target_types :
346356 coerceable_types [target_type ].add (source_type )
@@ -361,8 +371,6 @@ def _is_compatible_type(self, current_type: exp.DataType, new_type: exp.DataType
361371 return False
362372
363373 def _is_coerceable_type (self , current_type : exp .DataType , new_type : exp .DataType ) -> bool :
364- if not self .support_coercing_compatible_types :
365- return False
366374 if current_type in self .coerceable_types :
367375 is_coerceable = new_type in self .coerceable_types [current_type ]
368376 if is_coerceable :
0 commit comments