Skip to content

Commit c5c9859

Browse files
remove callback methods for transformations
Signed-off-by: varun-edachali-dbx <varun.edachali@databricks.com>
1 parent d97d875 commit c5c9859

File tree

4 files changed

+49
-279
lines changed

4 files changed

+49
-279
lines changed

src/databricks/sql/backend/sea/result_set.py

Lines changed: 18 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Any, List, Optional, TYPE_CHECKING, Dict
3+
from typing import Any, List, Optional, TYPE_CHECKING, Dict, Union
44

55
import logging
66

@@ -93,8 +93,8 @@ def __init__(
9393
)
9494

9595
# Initialize metadata columns for post-fetch transformation
96-
self._metadata_columns = None
97-
self._column_index_mapping = None
96+
self._metadata_columns: Optional[List[ResultColumn]] = None
97+
self._column_index_mapping: Optional[Dict[int, Union[int, None]]] = None
9898

9999
def _convert_json_types(self, row: List[str]) -> List[Any]:
100100
"""
@@ -301,40 +301,6 @@ def prepare_metadata_columns(self, metadata_columns: List[ResultColumn]) -> None
301301
self._metadata_columns = metadata_columns
302302
self._prepare_column_mapping()
303303

304-
def _populate_columns_from_others(
305-
self, result_column: ResultColumn, row_data: Any
306-
) -> Any:
307-
"""
308-
Helper function to populate column data from other columns based on COLUMN_DATA_MAPPING.
309-
310-
Args:
311-
result_column: The result column that needs data
312-
row_data: Row data (list for JSON, PyArrow table for Arrow)
313-
314-
Returns:
315-
The value to use for this column, or None if not found
316-
"""
317-
target_column = result_column.column_name
318-
if target_column not in COLUMN_DATA_MAPPING:
319-
return None
320-
321-
source_column = COLUMN_DATA_MAPPING[target_column]
322-
323-
# Find the source column index
324-
for idx, col in enumerate(self._metadata_columns):
325-
if col.column_name == source_column:
326-
source_idx = self._column_index_mapping.get(idx)
327-
if source_idx is not None:
328-
# Handle Arrow table format
329-
if hasattr(row_data, "column"): # PyArrow table
330-
return row_data.column(source_idx).to_pylist()
331-
# Handle JSON row format
332-
else:
333-
return row_data[source_idx]
334-
break
335-
336-
return None
337-
338304
def _prepare_column_mapping(self) -> None:
339305
"""
340306
Prepare column index mapping for metadata queries.
@@ -353,7 +319,7 @@ def _prepare_column_mapping(self) -> None:
353319
new_description = []
354320
self._column_index_mapping = {} # Maps new index -> old index
355321

356-
for new_idx, result_column in enumerate(self._metadata_columns):
322+
for new_idx, result_column in enumerate(self._metadata_columns or []):
357323
# Find the corresponding SEA column
358324
if (
359325
result_column.result_set_column_name
@@ -400,8 +366,12 @@ def _transform_arrow_table(self, table: "pyarrow.Table") -> "pyarrow.Table":
400366
new_columns = []
401367
column_names = []
402368

403-
for new_idx, result_column in enumerate(self._metadata_columns):
404-
old_idx = self._column_index_mapping.get(new_idx)
369+
for new_idx, result_column in enumerate(self._metadata_columns or []):
370+
old_idx = (
371+
self._column_index_mapping.get(new_idx)
372+
if self._column_index_mapping
373+
else None
374+
)
405375

406376
# Get the source data
407377
if old_idx is not None:
@@ -410,27 +380,13 @@ def _transform_arrow_table(self, table: "pyarrow.Table") -> "pyarrow.Table":
410380
else:
411381
values = None
412382

413-
# Special handling for columns that need data from other columns
414-
if result_column.result_set_column_name is None:
415-
values = self._populate_columns_from_others(result_column, table)
416-
417383
# Apply transformation and create column
418384
if values is not None:
419-
if result_column.transform_value:
420-
transformed_values = [
421-
result_column.transform_value(v) for v in values
422-
]
423-
column = pyarrow.array(transformed_values)
424-
else:
425-
column = pyarrow.array(values)
385+
column = pyarrow.array(values)
426386
new_columns.append(column)
427387
else:
428388
# Create column with default/transformed values
429-
if result_column.transform_value:
430-
default_value = result_column.transform_value(None)
431-
null_array = pyarrow.array([default_value] * table.num_rows)
432-
else:
433-
null_array = pyarrow.nulls(table.num_rows)
389+
null_array = pyarrow.nulls(table.num_rows)
434390
new_columns.append(null_array)
435391

436392
column_names.append(result_column.column_name)
@@ -445,21 +401,17 @@ def _transform_json_rows(self, rows: List[List[str]]) -> List[List[Any]]:
445401
transformed_rows = []
446402
for row in rows:
447403
new_row = []
448-
for new_idx, result_column in enumerate(self._metadata_columns):
449-
old_idx = self._column_index_mapping.get(new_idx)
404+
for new_idx, result_column in enumerate(self._metadata_columns or []):
405+
old_idx = (
406+
self._column_index_mapping.get(new_idx)
407+
if self._column_index_mapping
408+
else None
409+
)
450410
if old_idx is not None:
451411
value = row[old_idx]
452412
else:
453413
value = None
454414

455-
# Special handling for columns that need data from other columns
456-
if result_column.result_set_column_name is None:
457-
value = self._populate_columns_from_others(result_column, row)
458-
459-
# Apply transformation if defined
460-
if result_column.transform_value:
461-
value = result_column.transform_value(value)
462-
463415
new_row.append(value)
464416
transformed_rows.append(new_row)
465417
return transformed_rows

src/databricks/sql/backend/sea/utils/metadata_mappings.py

Lines changed: 31 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,141 +1,79 @@
11
from databricks.sql.backend.sea.utils.result_column import ResultColumn
2-
from databricks.sql.backend.sea.utils.metadata_transformations import (
3-
transform_table_type,
4-
transform_is_nullable,
5-
transform_nullable_to_int,
6-
transform_remarks_default,
7-
transform_numeric_default_zero,
8-
transform_ordinal_position_offset,
9-
calculate_data_type,
10-
calculate_buffer_length,
11-
always_null,
12-
always_null_int,
13-
always_null_smallint,
14-
identity,
15-
)
162

173

184
class MetadataColumnMappings:
195
"""Column mappings for metadata queries following JDBC specification."""
206

217
# Common columns used across multiple metadata queries
228
# FIX 1: Catalog columns - swap the mappings
23-
CATALOG_COLUMN = ResultColumn(
24-
"TABLE_CAT", "catalogName", "string", transform_value=identity
25-
)
26-
CATALOG_COLUMN_FOR_GET_CATALOGS = ResultColumn(
27-
"TABLE_CAT", "catalog", "string", transform_value=identity
28-
)
9+
CATALOG_COLUMN = ResultColumn("TABLE_CAT", "catalogName", "string")
10+
CATALOG_COLUMN_FOR_GET_CATALOGS = ResultColumn("TABLE_CAT", "catalog", "string")
2911
# Remove CATALOG_COLUMN_FOR_TABLES - will use CATALOG_COLUMN instead
3012

31-
SCHEMA_COLUMN = ResultColumn(
32-
"TABLE_SCHEM", "namespace", "string", transform_value=identity
33-
)
34-
SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn(
35-
"TABLE_SCHEM", "databaseName", "string", transform_value=identity
36-
)
37-
TABLE_NAME_COLUMN = ResultColumn(
38-
"TABLE_NAME", "tableName", "string", transform_value=identity
39-
)
40-
TABLE_TYPE_COLUMN = ResultColumn(
41-
"TABLE_TYPE", "tableType", "string", transform_value=transform_table_type
42-
)
43-
REMARKS_COLUMN = ResultColumn(
44-
"REMARKS", "remarks", "string", transform_value=transform_remarks_default
45-
)
13+
SCHEMA_COLUMN = ResultColumn("TABLE_SCHEM", "namespace", "string")
14+
SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn("TABLE_SCHEM", "databaseName", "string")
15+
TABLE_NAME_COLUMN = ResultColumn("TABLE_NAME", "tableName", "string")
16+
TABLE_TYPE_COLUMN = ResultColumn("TABLE_TYPE", "tableType", "string")
17+
REMARKS_COLUMN = ResultColumn("REMARKS", "remarks", "string")
4618

4719
# Columns specific to getColumns()
48-
COLUMN_NAME_COLUMN = ResultColumn(
49-
"COLUMN_NAME", "col_name", "string", transform_value=identity
50-
)
20+
COLUMN_NAME_COLUMN = ResultColumn("COLUMN_NAME", "col_name", "string")
5121
DATA_TYPE_COLUMN = ResultColumn(
52-
"DATA_TYPE", None, "int", transform_value=calculate_data_type
22+
"DATA_TYPE", None, "int"
5323
) # Calculated from columnType
54-
TYPE_NAME_COLUMN = ResultColumn(
55-
"TYPE_NAME", "columnType", "string", transform_value=identity
56-
)
24+
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", "columnType", "string")
5725

5826
# FIX 5: SEA actually provides these columns
59-
COLUMN_SIZE_COLUMN = ResultColumn(
60-
"COLUMN_SIZE", "columnSize", "int", transform_value=identity
61-
)
27+
COLUMN_SIZE_COLUMN = ResultColumn("COLUMN_SIZE", "columnSize", "int")
6228
DECIMAL_DIGITS_COLUMN = ResultColumn(
6329
"DECIMAL_DIGITS",
6430
"decimalDigits",
6531
"int",
66-
transform_value=transform_numeric_default_zero,
67-
)
68-
NUM_PREC_RADIX_COLUMN = ResultColumn(
69-
"NUM_PREC_RADIX", "radix", "int", transform_value=transform_numeric_default_zero
7032
)
33+
NUM_PREC_RADIX_COLUMN = ResultColumn("NUM_PREC_RADIX", "radix", "int")
7134
ORDINAL_POSITION_COLUMN = ResultColumn(
7235
"ORDINAL_POSITION",
7336
"ordinalPosition",
7437
"int",
75-
transform_value=transform_ordinal_position_offset,
7638
)
7739

7840
NULLABLE_COLUMN = ResultColumn(
79-
"NULLABLE", None, "int", transform_value=transform_nullable_to_int
41+
"NULLABLE", None, "int"
8042
) # Calculated from isNullable
8143
COLUMN_DEF_COLUMN = ResultColumn(
82-
"COLUMN_DEF", "columnType", "string", transform_value=identity
44+
"COLUMN_DEF", "columnType", "string"
8345
) # Note: duplicate mapping
84-
SQL_DATA_TYPE_COLUMN = ResultColumn(
85-
"SQL_DATA_TYPE", None, "int", transform_value=always_null_int
86-
)
87-
SQL_DATETIME_SUB_COLUMN = ResultColumn(
88-
"SQL_DATETIME_SUB", None, "int", transform_value=always_null_int
89-
)
90-
CHAR_OCTET_LENGTH_COLUMN = ResultColumn(
91-
"CHAR_OCTET_LENGTH", None, "int", transform_value=always_null_int
92-
)
93-
IS_NULLABLE_COLUMN = ResultColumn(
94-
"IS_NULLABLE", "isNullable", "string", transform_value=transform_is_nullable
95-
)
46+
SQL_DATA_TYPE_COLUMN = ResultColumn("SQL_DATA_TYPE", None, "int")
47+
SQL_DATETIME_SUB_COLUMN = ResultColumn("SQL_DATETIME_SUB", None, "int")
48+
CHAR_OCTET_LENGTH_COLUMN = ResultColumn("CHAR_OCTET_LENGTH", None, "int")
49+
IS_NULLABLE_COLUMN = ResultColumn("IS_NULLABLE", "isNullable", "string")
9650

9751
# Columns for getTables() that don't exist in SEA
98-
TYPE_CAT_COLUMN = ResultColumn(
99-
"TYPE_CAT", None, "string", transform_value=always_null
100-
)
101-
TYPE_SCHEM_COLUMN = ResultColumn(
102-
"TYPE_SCHEM", None, "string", transform_value=always_null
103-
)
104-
TYPE_NAME_COLUMN = ResultColumn(
105-
"TYPE_NAME", None, "string", transform_value=always_null
106-
)
52+
TYPE_CAT_COLUMN = ResultColumn("TYPE_CAT", None, "string")
53+
TYPE_SCHEM_COLUMN = ResultColumn("TYPE_SCHEM", None, "string")
54+
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", None, "string")
10755
SELF_REFERENCING_COL_NAME_COLUMN = ResultColumn(
108-
"SELF_REFERENCING_COL_NAME", None, "string", transform_value=always_null
109-
)
110-
REF_GENERATION_COLUMN = ResultColumn(
111-
"REF_GENERATION", None, "string", transform_value=always_null
56+
"SELF_REFERENCING_COL_NAME", None, "string"
11257
)
58+
REF_GENERATION_COLUMN = ResultColumn("REF_GENERATION", None, "string")
11359

11460
# FIX 8: Scope columns (always null per JDBC)
115-
SCOPE_CATALOG_COLUMN = ResultColumn(
116-
"SCOPE_CATALOG", None, "string", transform_value=always_null
117-
)
118-
SCOPE_SCHEMA_COLUMN = ResultColumn(
119-
"SCOPE_SCHEMA", None, "string", transform_value=always_null
120-
)
121-
SCOPE_TABLE_COLUMN = ResultColumn(
122-
"SCOPE_TABLE", None, "string", transform_value=always_null
123-
)
124-
SOURCE_DATA_TYPE_COLUMN = ResultColumn(
125-
"SOURCE_DATA_TYPE", None, "smallint", transform_value=always_null_smallint
126-
)
61+
SCOPE_CATALOG_COLUMN = ResultColumn("SCOPE_CATALOG", None, "string")
62+
SCOPE_SCHEMA_COLUMN = ResultColumn("SCOPE_SCHEMA", None, "string")
63+
SCOPE_TABLE_COLUMN = ResultColumn("SCOPE_TABLE", None, "string")
64+
SOURCE_DATA_TYPE_COLUMN = ResultColumn("SOURCE_DATA_TYPE", None, "smallint")
12765

12866
# FIX 9 & 10: Auto increment and generated columns
12967
IS_AUTO_INCREMENT_COLUMN = ResultColumn(
130-
"IS_AUTOINCREMENT", "isAutoIncrement", "string", transform_value=identity
68+
"IS_AUTOINCREMENT", "isAutoIncrement", "string"
13169
) # No underscore!
13270
IS_GENERATED_COLUMN = ResultColumn(
133-
"IS_GENERATEDCOLUMN", "isGenerated", "string", transform_value=identity
71+
"IS_GENERATEDCOLUMN", "isGenerated", "string"
13472
) # SEA provides this
13573

13674
# FIX 11: Buffer length column
13775
BUFFER_LENGTH_COLUMN = ResultColumn(
138-
"BUFFER_LENGTH", None, "int", transform_value=always_null_int
76+
"BUFFER_LENGTH", None, "int"
13977
) # Always null per JDBC
14078

14179
# Column lists for each metadata operation
@@ -144,7 +82,7 @@ class MetadataColumnMappings:
14482
SCHEMA_COLUMNS = [
14583
SCHEMA_COLUMN_FOR_GET_SCHEMA,
14684
ResultColumn(
147-
"TABLE_CATALOG", None, "string", transform_value=always_null
85+
"TABLE_CATALOG", None, "string"
14886
), # Will need special population logic
14987
]
15088

0 commit comments

Comments
 (0)