@@ -277,6 +277,29 @@ def get_spreadsheet_metadata(file_path):
277277
278278# ----------------------------------------------------------------------------------
279279
280+ def is_numeric_type (value ):
281+ """
282+ Checks if a value is a common numeric data type in
283+ pandas, NumPy, or Python.
284+
285+ Parameters:
286+ ----------
287+ value: The value to check.
288+ Returns:
289+ -------
290+ bool: True if the value is numeric, False otherwise.
291+ """
292+ # Check for standard numeric types (int, float, complex)
293+ if isinstance (value , (int , float , complex )):
294+ return True
295+ # Check for NumPy numeric dtypes using np.issubdtype
296+ elif np .issubdtype (type (value ), np .number ):
297+ return True
298+ else :
299+ return False
300+
301+ # ----------------------------------------------------------------------------------
302+
280303def downcast_ints (value ):
281304 """
282305 Downcast a numeric value to an integer if it is equal to
@@ -1214,7 +1237,7 @@ def series_hasNull(series,
12141237
12151238#----------------------------------------------------------------------------------
12161239
1217- def get_numeric_range (series ,
1240+ def get_numeric_range (pd_series ,
12181241 attribute ,
12191242 na_val = None
12201243 ):
@@ -1223,7 +1246,7 @@ def get_numeric_range(series,
12231246 numerical and non-numerical cases.
12241247
12251248 Parameters:
1226- series (pd.Series):
1249+ pd_series (pd.Series):
12271250 The Pandas Series to process.
12281251 attribute (str):
12291252 The desired statistical attribute, either 'min' or 'max'.
@@ -1238,12 +1261,17 @@ def get_numeric_range(series,
12381261 value as an integer if possible; otherwise, returns it as a float. If the
12391262 Series is empty or non-numeric, returns (na_val).
12401263 """
1241- _s = series .dropna ()
1264+ # Check for integers or float
1265+ _s = pd_series .replace (r'^\s+$' , pd .NA , regex = True )
1266+ _s .fillna (pd .NA )
12421267 try :
12431268 _s = pd .to_numeric (_s )
1269+ _s .fillna (pd .NA )
12441270 except :
12451271 pass
1246-
1272+
1273+ _s = _s .dropna ()
1274+
12471275 if not pd .api .types .is_numeric_dtype (_s ):
12481276 return na_val # Return `na_val` for non-numeric Series
12491277
@@ -1817,9 +1845,9 @@ def schema_validate_range(attribute,
18171845 """
18181846
18191847 # Check if the expected range is a numeric value
1820- if isinstance (p_errors [attribute ]['expected' ], ( int , float ) ):
1848+ if is_numeric_type (p_errors [attribute ]['expected' ]):
18211849 # Check if the observed value is also a numeric value
1822- if isinstance (p_errors [attribute ]['observed' ], ( int , float ) ):
1850+ if is_numeric_type (p_errors [attribute ]['observed' ]):
18231851 exp_val = p_errors [attribute ]['expected' ]
18241852 obs_val = p_errors [attribute ]['observed' ]
18251853
@@ -2290,9 +2318,11 @@ def value_errors_out_of_range(df,
22902318 results = []
22912319
22922320 # Check for integers or float
2293- numeric_column = df [column_name ].notna ()
2321+ numeric_column = df [column_name ].replace (r'^\s+$' , pd .NA , regex = True )
2322+ numeric_column .fillna (pd .NA )
22942323 try :
22952324 numeric_column = pd .to_numeric (numeric_column )
2325+ numeric_column .fillna (pd .NA )
22962326 except :
22972327 pass
22982328
@@ -2463,20 +2493,20 @@ def get_value_errors(dataset_path,
24632493 )
24642494 if 'range_max' in flagged_errs \
24652495 and 'range_max' not in ignore_errors :
2466- max_len = errors ['range_max' ]['expected' ]
2496+ rng_max = errors ['range_max' ]['expected' ]
24672497 sheet_v_errors .append (
24682498 value_errors_out_of_range (df , col ,
24692499 test_type = 'max' ,
2470- value = max_len ,
2500+ value = rng_max ,
24712501 unique_column = unique_column )
24722502 )
24732503 if 'range_min' in flagged_errs \
24742504 and 'range_min' not in ignore_errors :
2475- min_len = errors ['range_min' ]['expected' ]
2505+ rng_min = errors ['range_min' ]['expected' ]
24762506 sheet_v_errors .append (
24772507 value_errors_out_of_range (df , col ,
24782508 test_type = 'min' ,
2479- value = min_len ,
2509+ value = rng_min ,
24802510 unique_column = unique_column )
24812511 )
24822512 if 'allowed_value_list' in flagged_errs \
0 commit comments