Skip to content

Commit f5982f0

Browse files
partial working dump
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent 9de6c8b commit f5982f0

File tree

6 files changed

+266
-44
lines changed

6 files changed

+266
-44
lines changed

src/databricks/sql/backend/sea/result_set.py

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -359,12 +359,57 @@ def _transform_arrow_table(self, table: "pyarrow.Table") -> "pyarrow.Table":
359359

360360
for new_idx, result_column in enumerate(self._metadata_columns):
361361
old_idx = self._column_index_mapping.get(new_idx)
362+
363+
# Get the source data
362364
if old_idx is not None:
363-
new_columns.append(table.column(old_idx))
365+
column = table.column(old_idx)
366+
values = column.to_pylist()
364367
else:
365-
# Create null column for missing data
366-
null_array = pyarrow.nulls(table.num_rows)
368+
values = None
369+
370+
# Special handling for columns that need data from other columns
371+
if result_column.column_name == "DATA_TYPE" and result_column.result_set_column_name is None:
372+
# Get TYPE_NAME column value for DATA_TYPE calculation
373+
for idx, col in enumerate(self._metadata_columns):
374+
if col.column_name == "TYPE_NAME":
375+
type_idx = self._column_index_mapping.get(idx)
376+
if type_idx is not None:
377+
values = table.column(type_idx).to_pylist()
378+
break
379+
elif result_column.column_name == "NULLABLE" and result_column.result_set_column_name is None:
380+
# Get IS_NULLABLE column value for NULLABLE calculation
381+
for idx, col in enumerate(self._metadata_columns):
382+
if col.column_name == "IS_NULLABLE":
383+
nullable_idx = self._column_index_mapping.get(idx)
384+
if nullable_idx is not None:
385+
values = table.column(nullable_idx).to_pylist()
386+
break
387+
elif result_column.column_name == "BUFFER_LENGTH" and result_column.result_set_column_name is None:
388+
# Get TYPE_NAME column value for BUFFER_LENGTH calculation
389+
for idx, col in enumerate(self._metadata_columns):
390+
if col.column_name == "TYPE_NAME":
391+
type_idx = self._column_index_mapping.get(idx)
392+
if type_idx is not None:
393+
values = table.column(type_idx).to_pylist()
394+
break
395+
396+
# Apply transformation and create column
397+
if values is not None:
398+
if result_column.transform_value:
399+
transformed_values = [result_column.transform_value(v) for v in values]
400+
column = pyarrow.array(transformed_values)
401+
else:
402+
column = pyarrow.array(values)
403+
new_columns.append(column)
404+
else:
405+
# Create column with default/transformed values
406+
if result_column.transform_value:
407+
default_value = result_column.transform_value(None)
408+
null_array = pyarrow.array([default_value] * table.num_rows)
409+
else:
410+
null_array = pyarrow.nulls(table.num_rows)
367411
new_columns.append(null_array)
412+
368413
column_names.append(result_column.column_name)
369414

370415
return pyarrow.Table.from_arrays(new_columns, names=column_names)
@@ -377,11 +422,43 @@ def _transform_json_rows(self, rows: List[List[str]]) -> List[List[Any]]:
377422
transformed_rows = []
378423
for row in rows:
379424
new_row = []
380-
for new_idx in range(len(self._metadata_columns)):
425+
for new_idx, result_column in enumerate(self._metadata_columns):
381426
old_idx = self._column_index_mapping.get(new_idx)
382427
if old_idx is not None:
383-
new_row.append(row[old_idx])
428+
value = row[old_idx]
384429
else:
385-
new_row.append(None)
430+
value = None
431+
432+
# Special handling for columns that need data from other columns
433+
if result_column.column_name == "DATA_TYPE" and result_column.result_set_column_name is None:
434+
# Get TYPE_NAME column value for DATA_TYPE calculation
435+
for idx, col in enumerate(self._metadata_columns):
436+
if col.column_name == "TYPE_NAME":
437+
type_idx = self._column_index_mapping.get(idx)
438+
if type_idx is not None and type_idx < len(row):
439+
value = row[type_idx]
440+
break
441+
elif result_column.column_name == "NULLABLE" and result_column.result_set_column_name is None:
442+
# Get IS_NULLABLE column value for NULLABLE calculation
443+
for idx, col in enumerate(self._metadata_columns):
444+
if col.column_name == "IS_NULLABLE":
445+
nullable_idx = self._column_index_mapping.get(idx)
446+
if nullable_idx is not None and nullable_idx < len(row):
447+
value = row[nullable_idx]
448+
break
449+
elif result_column.column_name == "BUFFER_LENGTH" and result_column.result_set_column_name is None:
450+
# Get TYPE_NAME column value for BUFFER_LENGTH calculation
451+
for idx, col in enumerate(self._metadata_columns):
452+
if col.column_name == "TYPE_NAME":
453+
type_idx = self._column_index_mapping.get(idx)
454+
if type_idx is not None and type_idx < len(row):
455+
value = row[type_idx]
456+
break
457+
458+
# Apply transformation if defined
459+
if result_column.transform_value:
460+
value = result_column.transform_value(value)
461+
462+
new_row.append(value)
386463
transformed_rows.append(new_row)
387464
return transformed_rows

src/databricks/sql/backend/sea/utils/conversion.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ def convert_value(
150150
Returns:
151151
The converted value in the appropriate Python type
152152
"""
153+
154+
# Handle None values directly
155+
if value is None:
156+
return None
153157

154158
sql_type = sql_type.lower().strip()
155159

src/databricks/sql/backend/sea/utils/filters.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ def _filter_sea_result_set(
8686
arraysize=result_set.arraysize,
8787
)
8888

89+
# Preserve metadata columns setup from original result set
90+
if hasattr(result_set, '_metadata_columns') and result_set._metadata_columns:
91+
filtered_result_set._metadata_columns = result_set._metadata_columns
92+
filtered_result_set._column_index_mapping = result_set._column_index_mapping
93+
# Update the description to match the original prepared description
94+
filtered_result_set.description = result_set.description
95+
8996
return filtered_result_set
9097

9198
@staticmethod
Lines changed: 71 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,89 @@
11
from databricks.sql.backend.sea.utils.result_column import ResultColumn
2+
from databricks.sql.backend.sea.utils.metadata_transformations import (
3+
transform_table_type,
4+
transform_is_nullable,
5+
transform_nullable_to_int,
6+
transform_remarks_default,
7+
transform_numeric_default_zero,
8+
transform_ordinal_position_offset,
9+
calculate_data_type,
10+
calculate_buffer_length,
11+
always_null,
12+
always_null_int,
13+
always_null_smallint,
14+
identity
15+
)
216

317

418
class MetadataColumnMappings:
519
"""Column mappings for metadata queries following JDBC specification."""
620

721
# Common columns used across multiple metadata queries
8-
CATALOG_COLUMN = ResultColumn("TABLE_CAT", "catalog", "string")
9-
CATALOG_COLUMN_FOR_TABLES = ResultColumn("TABLE_CAT", "catalogName", "string")
10-
SCHEMA_COLUMN = ResultColumn("TABLE_SCHEM", "namespace", "string")
11-
SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn("TABLE_SCHEM", "databaseName", "string")
12-
TABLE_NAME_COLUMN = ResultColumn("TABLE_NAME", "tableName", "string")
13-
TABLE_TYPE_COLUMN = ResultColumn("TABLE_TYPE", "tableType", "string")
14-
REMARKS_COLUMN = ResultColumn("REMARKS", "remarks", "string")
22+
# FIX 1: Catalog columns - swap the mappings
23+
CATALOG_COLUMN = ResultColumn("TABLE_CAT", "catalogName", "string", transform_value=identity)
24+
CATALOG_COLUMN_FOR_GET_CATALOGS = ResultColumn("TABLE_CAT", "catalog", "string", transform_value=identity)
25+
# Remove CATALOG_COLUMN_FOR_TABLES - will use CATALOG_COLUMN instead
26+
27+
SCHEMA_COLUMN = ResultColumn("TABLE_SCHEM", "namespace", "string", transform_value=identity)
28+
SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn("TABLE_SCHEM", "databaseName", "string", transform_value=identity)
29+
TABLE_NAME_COLUMN = ResultColumn("TABLE_NAME", "tableName", "string", transform_value=identity)
30+
TABLE_TYPE_COLUMN = ResultColumn("TABLE_TYPE", "tableType", "string", transform_value=transform_table_type)
31+
REMARKS_COLUMN = ResultColumn("REMARKS", "remarks", "string", transform_value=transform_remarks_default)
1532

1633
# Columns specific to getColumns()
17-
COLUMN_NAME_COLUMN = ResultColumn("COLUMN_NAME", "col_name", "string")
34+
COLUMN_NAME_COLUMN = ResultColumn("COLUMN_NAME", "col_name", "string", transform_value=identity)
1835
DATA_TYPE_COLUMN = ResultColumn(
19-
"DATA_TYPE", None, "int"
20-
) # SEA doesn't provide this
21-
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", "columnType", "string")
22-
COLUMN_SIZE_COLUMN = ResultColumn("COLUMN_SIZE", None, "int")
23-
DECIMAL_DIGITS_COLUMN = ResultColumn("DECIMAL_DIGITS", None, "int")
24-
NUM_PREC_RADIX_COLUMN = ResultColumn("NUM_PREC_RADIX", None, "int")
25-
NULLABLE_COLUMN = ResultColumn("NULLABLE", None, "int")
36+
"DATA_TYPE", None, "int", transform_value=calculate_data_type
37+
) # Calculated from columnType
38+
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", "columnType", "string", transform_value=identity)
39+
40+
# FIX 5: SEA actually provides these columns
41+
COLUMN_SIZE_COLUMN = ResultColumn("COLUMN_SIZE", "columnSize", "int", transform_value=identity)
42+
DECIMAL_DIGITS_COLUMN = ResultColumn("DECIMAL_DIGITS", "decimalDigits", "int", transform_value=transform_numeric_default_zero)
43+
NUM_PREC_RADIX_COLUMN = ResultColumn("NUM_PREC_RADIX", "radix", "int", transform_value=transform_numeric_default_zero)
44+
ORDINAL_POSITION_COLUMN = ResultColumn("ORDINAL_POSITION", "ordinalPosition", "int", transform_value=transform_ordinal_position_offset)
45+
46+
NULLABLE_COLUMN = ResultColumn("NULLABLE", None, "int", transform_value=transform_nullable_to_int) # Calculated from isNullable
2647
COLUMN_DEF_COLUMN = ResultColumn(
27-
"COLUMN_DEF", "columnType", "string"
48+
"COLUMN_DEF", "columnType", "string", transform_value=identity
2849
) # Note: duplicate mapping
29-
SQL_DATA_TYPE_COLUMN = ResultColumn("SQL_DATA_TYPE", None, "int")
30-
SQL_DATETIME_SUB_COLUMN = ResultColumn("SQL_DATETIME_SUB", None, "int")
31-
CHAR_OCTET_LENGTH_COLUMN = ResultColumn("CHAR_OCTET_LENGTH", None, "int")
32-
ORDINAL_POSITION_COLUMN = ResultColumn("ORDINAL_POSITION", None, "int")
33-
IS_NULLABLE_COLUMN = ResultColumn("IS_NULLABLE", "isNullable", "string")
50+
SQL_DATA_TYPE_COLUMN = ResultColumn("SQL_DATA_TYPE", None, "int", transform_value=always_null_int)
51+
SQL_DATETIME_SUB_COLUMN = ResultColumn("SQL_DATETIME_SUB", None, "int", transform_value=always_null_int)
52+
CHAR_OCTET_LENGTH_COLUMN = ResultColumn("CHAR_OCTET_LENGTH", None, "int", transform_value=always_null_int)
53+
IS_NULLABLE_COLUMN = ResultColumn("IS_NULLABLE", "isNullable", "string", transform_value=transform_is_nullable)
3454

3555
# Columns for getTables() that don't exist in SEA
36-
TYPE_CAT_COLUMN = ResultColumn("TYPE_CAT", None, "string")
37-
TYPE_SCHEM_COLUMN = ResultColumn("TYPE_SCHEM", None, "string")
38-
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", None, "string")
56+
TYPE_CAT_COLUMN = ResultColumn("TYPE_CAT", None, "string", transform_value=always_null)
57+
TYPE_SCHEM_COLUMN = ResultColumn("TYPE_SCHEM", None, "string", transform_value=always_null)
58+
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", None, "string", transform_value=always_null)
3959
SELF_REFERENCING_COL_NAME_COLUMN = ResultColumn(
40-
"SELF_REFERENCING_COL_NAME", None, "string"
60+
"SELF_REFERENCING_COL_NAME", None, "string", transform_value=always_null
4161
)
42-
REF_GENERATION_COLUMN = ResultColumn("REF_GENERATION", None, "string")
62+
REF_GENERATION_COLUMN = ResultColumn("REF_GENERATION", None, "string", transform_value=always_null)
63+
64+
# FIX 8: Scope columns (always null per JDBC)
65+
SCOPE_CATALOG_COLUMN = ResultColumn("SCOPE_CATALOG", None, "string", transform_value=always_null)
66+
SCOPE_SCHEMA_COLUMN = ResultColumn("SCOPE_SCHEMA", None, "string", transform_value=always_null)
67+
SCOPE_TABLE_COLUMN = ResultColumn("SCOPE_TABLE", None, "string", transform_value=always_null)
68+
SOURCE_DATA_TYPE_COLUMN = ResultColumn("SOURCE_DATA_TYPE", None, "smallint", transform_value=always_null_smallint)
69+
70+
# FIX 9 & 10: Auto increment and generated columns
71+
IS_AUTO_INCREMENT_COLUMN = ResultColumn("IS_AUTOINCREMENT", "isAutoIncrement", "string", transform_value=identity) # No underscore!
72+
IS_GENERATED_COLUMN = ResultColumn("IS_GENERATEDCOLUMN", "isGenerated", "string", transform_value=identity) # SEA provides this
73+
74+
# FIX 11: Buffer length column
75+
BUFFER_LENGTH_COLUMN = ResultColumn("BUFFER_LENGTH", None, "int", transform_value=always_null_int) # Always null per JDBC
4376

4477
# Column lists for each metadata operation
45-
CATALOG_COLUMNS = [CATALOG_COLUMN]
78+
CATALOG_COLUMNS = [CATALOG_COLUMN_FOR_GET_CATALOGS] # Use specific catalog column
4679

4780
SCHEMA_COLUMNS = [
4881
SCHEMA_COLUMN_FOR_GET_SCHEMA,
49-
ResultColumn("TABLE_CATALOG", None, "string"), # SEA doesn't return this
82+
ResultColumn("TABLE_CATALOG", None, "string", transform_value=always_null), # Will need special population logic
5083
]
5184

5285
TABLE_COLUMNS = [
53-
CATALOG_COLUMN_FOR_TABLES,
86+
CATALOG_COLUMN, # Use general catalog column (catalogName)
5487
SCHEMA_COLUMN,
5588
TABLE_NAME_COLUMN,
5689
TABLE_TYPE_COLUMN,
@@ -62,15 +95,16 @@ class MetadataColumnMappings:
6295
REF_GENERATION_COLUMN,
6396
]
6497

98+
# FIX 13: Remove IS_GENERATEDCOLUMN from list (should be 23 columns, not 24)
6599
COLUMN_COLUMNS = [
66-
CATALOG_COLUMN_FOR_TABLES,
100+
CATALOG_COLUMN, # Use general catalog column (catalogName)
67101
SCHEMA_COLUMN,
68102
TABLE_NAME_COLUMN,
69103
COLUMN_NAME_COLUMN,
70104
DATA_TYPE_COLUMN,
71105
TYPE_NAME_COLUMN,
72106
COLUMN_SIZE_COLUMN,
73-
ResultColumn("BUFFER_LENGTH", None, "int"),
107+
BUFFER_LENGTH_COLUMN,
74108
DECIMAL_DIGITS_COLUMN,
75109
NUM_PREC_RADIX_COLUMN,
76110
NULLABLE_COLUMN,
@@ -81,10 +115,10 @@ class MetadataColumnMappings:
81115
CHAR_OCTET_LENGTH_COLUMN,
82116
ORDINAL_POSITION_COLUMN,
83117
IS_NULLABLE_COLUMN,
84-
ResultColumn("SCOPE_CATALOG", None, "string"),
85-
ResultColumn("SCOPE_SCHEMA", None, "string"),
86-
ResultColumn("SCOPE_TABLE", None, "string"),
87-
ResultColumn("SOURCE_DATA_TYPE", None, "smallint"),
88-
ResultColumn("IS_AUTO_INCREMENT", None, "string"),
89-
ResultColumn("IS_GENERATEDCOLUMN", None, "string"),
118+
SCOPE_CATALOG_COLUMN,
119+
SCOPE_SCHEMA_COLUMN,
120+
SCOPE_TABLE_COLUMN,
121+
SOURCE_DATA_TYPE_COLUMN,
122+
IS_AUTO_INCREMENT_COLUMN,
123+
# DO NOT INCLUDE IS_GENERATED_COLUMN - Thrift returns 23 columns
90124
]
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from typing import Any, Optional
2+
3+
# Table transformations
4+
def transform_table_type(value: Any) -> str:
5+
"""Transform empty/null table type to 'TABLE' per JDBC spec."""
6+
if value is None or value == "":
7+
return "TABLE"
8+
return str(value)
9+
10+
# Nullable transformations
11+
def transform_is_nullable(value: Any) -> str:
12+
"""Transform boolean nullable to YES/NO per JDBC spec."""
13+
if value is None or value == "true" or value is True:
14+
return "YES"
15+
return "NO"
16+
17+
def transform_nullable_to_int(value: Any) -> int:
18+
"""Transform isNullable to JDBC integer (1=nullable, 0=not nullable)."""
19+
if value is None or value == "true" or value is True:
20+
return 1
21+
return 0
22+
23+
# Default value transformations
24+
def transform_remarks_default(value: Any) -> str:
25+
"""Transform null remarks to empty string."""
26+
if value is None:
27+
return ""
28+
return str(value)
29+
30+
def transform_numeric_default_zero(value: Any) -> int:
31+
"""Transform null numeric values to 0."""
32+
if value is None:
33+
return 0
34+
try:
35+
return int(value)
36+
except (ValueError, TypeError):
37+
return 0
38+
39+
# Calculated transformations
40+
def calculate_data_type(value: Any) -> int:
41+
"""Calculate JDBC SQL type code from Databricks type name."""
42+
if value is None:
43+
return 1111 # SQL NULL type
44+
45+
type_name = str(value).upper().split('(')[0]
46+
type_map = {
47+
'STRING': 12, 'VARCHAR': 12,
48+
'INT': 4, 'INTEGER': 4,
49+
'DOUBLE': 8, 'FLOAT': 6,
50+
'BOOLEAN': 16, 'DATE': 91,
51+
'TIMESTAMP': 93, 'TIMESTAMP_NTZ': 93,
52+
'DECIMAL': 3, 'NUMERIC': 2,
53+
'BINARY': -2, 'ARRAY': 2003,
54+
'MAP': 2002, 'STRUCT': 2002,
55+
'TINYINT': -6, 'SMALLINT': 5,
56+
'BIGINT': -5, 'LONG': -5
57+
}
58+
return type_map.get(type_name, 1111)
59+
60+
def calculate_buffer_length(value: Any) -> Optional[int]:
61+
"""Calculate buffer length from type name."""
62+
if value is None:
63+
return None
64+
65+
type_name = str(value).upper()
66+
if 'ARRAY' in type_name or 'MAP' in type_name:
67+
return 255
68+
69+
# For other types, return None (will be null in result)
70+
return None
71+
72+
def transform_ordinal_position_offset(value: Any) -> int:
73+
"""Adjust ordinal position from 1-based to 0-based or vice versa if needed."""
74+
if value is None:
75+
return 0
76+
try:
77+
# SEA returns 1-based, Thrift expects 0-based
78+
return int(value) - 1
79+
except (ValueError, TypeError):
80+
return 0
81+
82+
# Null column transformations
83+
def always_null(value: Any) -> None:
84+
"""Always return null for columns that should be null per JDBC spec."""
85+
return None
86+
87+
def always_null_int(value: Any) -> None:
88+
"""Always return null for integer columns that should be null per JDBC spec."""
89+
return None
90+
91+
def always_null_smallint(value: Any) -> None:
92+
"""Always return null for smallint columns that should be null per JDBC spec."""
93+
return None
94+
95+
# Identity transformations (for columns that need no change)
96+
def identity(value: Any) -> Any:
97+
"""Return value unchanged."""
98+
return value

0 commit comments

Comments
 (0)