partial working dump

varun-edachali-dbx · varun-edachali-dbx · commit f5982f0c624b · 2025-08-03T13:43:16.000Z
Signed-off-by: varun-edachali-dbx &lt;varun.edachali@databricks.com&gt;
diff --git a/src/databricks/sql/backend/sea/result_set.py b/src/databricks/sql/backend/sea/result_set.py
@@ -359,12 +359,57 @@ def _transform_arrow_table(self, table: "pyarrow.Table") -> "pyarrow.Table":
 
         for new_idx, result_column in enumerate(self._metadata_columns):
             old_idx = self._column_index_mapping.get(new_idx)
+            
+            # Get the source data 
             if old_idx is not None:
-                new_columns.append(table.column(old_idx))
+                column = table.column(old_idx)
+                values = column.to_pylist()
             else:
-                # Create null column for missing data
-                null_array = pyarrow.nulls(table.num_rows)
+                values = None
+                
+            # Special handling for columns that need data from other columns
+            if result_column.column_name == "DATA_TYPE" and result_column.result_set_column_name is None:
+                # Get TYPE_NAME column value for DATA_TYPE calculation
+                for idx, col in enumerate(self._metadata_columns):
+                    if col.column_name == "TYPE_NAME":
+                        type_idx = self._column_index_mapping.get(idx)
+                        if type_idx is not None:
+                            values = table.column(type_idx).to_pylist()
+                        break
+            elif result_column.column_name == "NULLABLE" and result_column.result_set_column_name is None:
+                # Get IS_NULLABLE column value for NULLABLE calculation
+                for idx, col in enumerate(self._metadata_columns):
+                    if col.column_name == "IS_NULLABLE":
+                        nullable_idx = self._column_index_mapping.get(idx)
+                        if nullable_idx is not None:
+                            values = table.column(nullable_idx).to_pylist()
+                        break
+            elif result_column.column_name == "BUFFER_LENGTH" and result_column.result_set_column_name is None:
+                # Get TYPE_NAME column value for BUFFER_LENGTH calculation
+                for idx, col in enumerate(self._metadata_columns):
+                    if col.column_name == "TYPE_NAME":
+                        type_idx = self._column_index_mapping.get(idx)
+                        if type_idx is not None:
+                            values = table.column(type_idx).to_pylist()
+                        break
+            
+            # Apply transformation and create column
+            if values is not None:
+                if result_column.transform_value:
+                    transformed_values = [result_column.transform_value(v) for v in values]
+                    column = pyarrow.array(transformed_values)
+                else:
+                    column = pyarrow.array(values)
+                new_columns.append(column)
+            else:
+                # Create column with default/transformed values
+                if result_column.transform_value:
+                    default_value = result_column.transform_value(None)
+                    null_array = pyarrow.array([default_value] * table.num_rows)
+                else:
+                    null_array = pyarrow.nulls(table.num_rows)
                 new_columns.append(null_array)
+                
             column_names.append(result_column.column_name)
 
         return pyarrow.Table.from_arrays(new_columns, names=column_names)
@@ -377,11 +422,43 @@ def _transform_json_rows(self, rows: List[List[str]]) -> List[List[Any]]:
         transformed_rows = []
         for row in rows:
             new_row = []
-            for new_idx in range(len(self._metadata_columns)):
+            for new_idx, result_column in enumerate(self._metadata_columns):
                 old_idx = self._column_index_mapping.get(new_idx)
                 if old_idx is not None:
-                    new_row.append(row[old_idx])
+                    value = row[old_idx]
                 else:
-                    new_row.append(None)
+                    value = None
+                    
+                # Special handling for columns that need data from other columns
+                if result_column.column_name == "DATA_TYPE" and result_column.result_set_column_name is None:
+                    # Get TYPE_NAME column value for DATA_TYPE calculation
+                    for idx, col in enumerate(self._metadata_columns):
+                        if col.column_name == "TYPE_NAME":
+                            type_idx = self._column_index_mapping.get(idx)
+                            if type_idx is not None and type_idx < len(row):
+                                value = row[type_idx]
+                            break
+                elif result_column.column_name == "NULLABLE" and result_column.result_set_column_name is None:
+                    # Get IS_NULLABLE column value for NULLABLE calculation
+                    for idx, col in enumerate(self._metadata_columns):
+                        if col.column_name == "IS_NULLABLE":
+                            nullable_idx = self._column_index_mapping.get(idx)
+                            if nullable_idx is not None and nullable_idx < len(row):
+                                value = row[nullable_idx]
+                            break
+                elif result_column.column_name == "BUFFER_LENGTH" and result_column.result_set_column_name is None:
+                    # Get TYPE_NAME column value for BUFFER_LENGTH calculation
+                    for idx, col in enumerate(self._metadata_columns):
+                        if col.column_name == "TYPE_NAME":
+                            type_idx = self._column_index_mapping.get(idx)
+                            if type_idx is not None and type_idx < len(row):
+                                value = row[type_idx]
+                            break
+                
+                # Apply transformation if defined
+                if result_column.transform_value:
+                    value = result_column.transform_value(value)
+                    
+                new_row.append(value)
             transformed_rows.append(new_row)
         return transformed_rows
diff --git a/src/databricks/sql/backend/sea/utils/conversion.py b/src/databricks/sql/backend/sea/utils/conversion.py
@@ -150,6 +150,10 @@ def convert_value(
         Returns:
             The converted value in the appropriate Python type
         """
+        
+        # Handle None values directly
+        if value is None:
+            return None
 
         sql_type = sql_type.lower().strip()
 
diff --git a/src/databricks/sql/backend/sea/utils/filters.py b/src/databricks/sql/backend/sea/utils/filters.py
@@ -86,6 +86,13 @@ def _filter_sea_result_set(
             arraysize=result_set.arraysize,
         )
 
+        # Preserve metadata columns setup from original result set
+        if hasattr(result_set, '_metadata_columns') and result_set._metadata_columns:
+            filtered_result_set._metadata_columns = result_set._metadata_columns
+            filtered_result_set._column_index_mapping = result_set._column_index_mapping
+            # Update the description to match the original prepared description
+            filtered_result_set.description = result_set.description
+
         return filtered_result_set
 
     @staticmethod
diff --git a/src/databricks/sql/backend/sea/utils/metadata_mappings.py b/src/databricks/sql/backend/sea/utils/metadata_mappings.py
@@ -1,56 +1,89 @@
 from databricks.sql.backend.sea.utils.result_column import ResultColumn
+from databricks.sql.backend.sea.utils.metadata_transformations import (
+    transform_table_type,
+    transform_is_nullable,
+    transform_nullable_to_int,
+    transform_remarks_default,
+    transform_numeric_default_zero,
+    transform_ordinal_position_offset,
+    calculate_data_type,
+    calculate_buffer_length,
+    always_null,
+    always_null_int,
+    always_null_smallint,
+    identity
+)
 
 
 class MetadataColumnMappings:
     """Column mappings for metadata queries following JDBC specification."""
 
     # Common columns used across multiple metadata queries
-    CATALOG_COLUMN = ResultColumn("TABLE_CAT", "catalog", "string")
-    CATALOG_COLUMN_FOR_TABLES = ResultColumn("TABLE_CAT", "catalogName", "string")
-    SCHEMA_COLUMN = ResultColumn("TABLE_SCHEM", "namespace", "string")
-    SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn("TABLE_SCHEM", "databaseName", "string")
-    TABLE_NAME_COLUMN = ResultColumn("TABLE_NAME", "tableName", "string")
-    TABLE_TYPE_COLUMN = ResultColumn("TABLE_TYPE", "tableType", "string")
-    REMARKS_COLUMN = ResultColumn("REMARKS", "remarks", "string")
+    # FIX 1: Catalog columns - swap the mappings
+    CATALOG_COLUMN = ResultColumn("TABLE_CAT", "catalogName", "string", transform_value=identity)
+    CATALOG_COLUMN_FOR_GET_CATALOGS = ResultColumn("TABLE_CAT", "catalog", "string", transform_value=identity)
+    # Remove CATALOG_COLUMN_FOR_TABLES - will use CATALOG_COLUMN instead
+    
+    SCHEMA_COLUMN = ResultColumn("TABLE_SCHEM", "namespace", "string", transform_value=identity)
+    SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn("TABLE_SCHEM", "databaseName", "string", transform_value=identity)
+    TABLE_NAME_COLUMN = ResultColumn("TABLE_NAME", "tableName", "string", transform_value=identity)
+    TABLE_TYPE_COLUMN = ResultColumn("TABLE_TYPE", "tableType", "string", transform_value=transform_table_type)
+    REMARKS_COLUMN = ResultColumn("REMARKS", "remarks", "string", transform_value=transform_remarks_default)
 
     # Columns specific to getColumns()
-    COLUMN_NAME_COLUMN = ResultColumn("COLUMN_NAME", "col_name", "string")
+    COLUMN_NAME_COLUMN = ResultColumn("COLUMN_NAME", "col_name", "string", transform_value=identity)
     DATA_TYPE_COLUMN = ResultColumn(
-        "DATA_TYPE", None, "int"
-    )  # SEA doesn't provide this
-    TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", "columnType", "string")
-    COLUMN_SIZE_COLUMN = ResultColumn("COLUMN_SIZE", None, "int")
-    DECIMAL_DIGITS_COLUMN = ResultColumn("DECIMAL_DIGITS", None, "int")
-    NUM_PREC_RADIX_COLUMN = ResultColumn("NUM_PREC_RADIX", None, "int")
-    NULLABLE_COLUMN = ResultColumn("NULLABLE", None, "int")
+        "DATA_TYPE", None, "int", transform_value=calculate_data_type
+    )  # Calculated from columnType
+    TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", "columnType", "string", transform_value=identity)
+    
+    # FIX 5: SEA actually provides these columns
+    COLUMN_SIZE_COLUMN = ResultColumn("COLUMN_SIZE", "columnSize", "int", transform_value=identity)
+    DECIMAL_DIGITS_COLUMN = ResultColumn("DECIMAL_DIGITS", "decimalDigits", "int", transform_value=transform_numeric_default_zero)
+    NUM_PREC_RADIX_COLUMN = ResultColumn("NUM_PREC_RADIX", "radix", "int", transform_value=transform_numeric_default_zero)
+    ORDINAL_POSITION_COLUMN = ResultColumn("ORDINAL_POSITION", "ordinalPosition", "int", transform_value=transform_ordinal_position_offset)
+    
+    NULLABLE_COLUMN = ResultColumn("NULLABLE", None, "int", transform_value=transform_nullable_to_int)  # Calculated from isNullable
     COLUMN_DEF_COLUMN = ResultColumn(
-        "COLUMN_DEF", "columnType", "string"
+        "COLUMN_DEF", "columnType", "string", transform_value=identity
     )  # Note: duplicate mapping
-    SQL_DATA_TYPE_COLUMN = ResultColumn("SQL_DATA_TYPE", None, "int")
-    SQL_DATETIME_SUB_COLUMN = ResultColumn("SQL_DATETIME_SUB", None, "int")
-    CHAR_OCTET_LENGTH_COLUMN = ResultColumn("CHAR_OCTET_LENGTH", None, "int")
-    ORDINAL_POSITION_COLUMN = ResultColumn("ORDINAL_POSITION", None, "int")
-    IS_NULLABLE_COLUMN = ResultColumn("IS_NULLABLE", "isNullable", "string")
+    SQL_DATA_TYPE_COLUMN = ResultColumn("SQL_DATA_TYPE", None, "int", transform_value=always_null_int)
+    SQL_DATETIME_SUB_COLUMN = ResultColumn("SQL_DATETIME_SUB", None, "int", transform_value=always_null_int)
+    CHAR_OCTET_LENGTH_COLUMN = ResultColumn("CHAR_OCTET_LENGTH", None, "int", transform_value=always_null_int)
+    IS_NULLABLE_COLUMN = ResultColumn("IS_NULLABLE", "isNullable", "string", transform_value=transform_is_nullable)
 
     # Columns for getTables() that don't exist in SEA
-    TYPE_CAT_COLUMN = ResultColumn("TYPE_CAT", None, "string")
-    TYPE_SCHEM_COLUMN = ResultColumn("TYPE_SCHEM", None, "string")
-    TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", None, "string")
+    TYPE_CAT_COLUMN = ResultColumn("TYPE_CAT", None, "string", transform_value=always_null)
+    TYPE_SCHEM_COLUMN = ResultColumn("TYPE_SCHEM", None, "string", transform_value=always_null)
+    TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", None, "string", transform_value=always_null)
     SELF_REFERENCING_COL_NAME_COLUMN = ResultColumn(
-        "SELF_REFERENCING_COL_NAME", None, "string"
+        "SELF_REFERENCING_COL_NAME", None, "string", transform_value=always_null
     )
-    REF_GENERATION_COLUMN = ResultColumn("REF_GENERATION", None, "string")
+    REF_GENERATION_COLUMN = ResultColumn("REF_GENERATION", None, "string", transform_value=always_null)
+    
+    # FIX 8: Scope columns (always null per JDBC)
+    SCOPE_CATALOG_COLUMN = ResultColumn("SCOPE_CATALOG", None, "string", transform_value=always_null)
+    SCOPE_SCHEMA_COLUMN = ResultColumn("SCOPE_SCHEMA", None, "string", transform_value=always_null)
+    SCOPE_TABLE_COLUMN = ResultColumn("SCOPE_TABLE", None, "string", transform_value=always_null)
+    SOURCE_DATA_TYPE_COLUMN = ResultColumn("SOURCE_DATA_TYPE", None, "smallint", transform_value=always_null_smallint)
+    
+    # FIX 9 & 10: Auto increment and generated columns
+    IS_AUTO_INCREMENT_COLUMN = ResultColumn("IS_AUTOINCREMENT", "isAutoIncrement", "string", transform_value=identity)  # No underscore!
+    IS_GENERATED_COLUMN = ResultColumn("IS_GENERATEDCOLUMN", "isGenerated", "string", transform_value=identity)  # SEA provides this
+    
+    # FIX 11: Buffer length column
+    BUFFER_LENGTH_COLUMN = ResultColumn("BUFFER_LENGTH", None, "int", transform_value=always_null_int)  # Always null per JDBC
 
     # Column lists for each metadata operation
-    CATALOG_COLUMNS = [CATALOG_COLUMN]
+    CATALOG_COLUMNS = [CATALOG_COLUMN_FOR_GET_CATALOGS]  # Use specific catalog column
 
     SCHEMA_COLUMNS = [
         SCHEMA_COLUMN_FOR_GET_SCHEMA,
-        ResultColumn("TABLE_CATALOG", None, "string"),  # SEA doesn't return this
+        ResultColumn("TABLE_CATALOG", None, "string", transform_value=always_null),  # Will need special population logic
     ]
 
     TABLE_COLUMNS = [
-        CATALOG_COLUMN_FOR_TABLES,
+        CATALOG_COLUMN,  # Use general catalog column (catalogName)
         SCHEMA_COLUMN,
         TABLE_NAME_COLUMN,
         TABLE_TYPE_COLUMN,
@@ -62,15 +95,16 @@ class MetadataColumnMappings:
         REF_GENERATION_COLUMN,
     ]
 
+    # FIX 13: Remove IS_GENERATEDCOLUMN from list (should be 23 columns, not 24)
     COLUMN_COLUMNS = [
-        CATALOG_COLUMN_FOR_TABLES,
+        CATALOG_COLUMN,  # Use general catalog column (catalogName)
         SCHEMA_COLUMN,
         TABLE_NAME_COLUMN,
         COLUMN_NAME_COLUMN,
         DATA_TYPE_COLUMN,
         TYPE_NAME_COLUMN,
         COLUMN_SIZE_COLUMN,
-        ResultColumn("BUFFER_LENGTH", None, "int"),
+        BUFFER_LENGTH_COLUMN,
         DECIMAL_DIGITS_COLUMN,
         NUM_PREC_RADIX_COLUMN,
         NULLABLE_COLUMN,
@@ -81,10 +115,10 @@ class MetadataColumnMappings:
         CHAR_OCTET_LENGTH_COLUMN,
         ORDINAL_POSITION_COLUMN,
         IS_NULLABLE_COLUMN,
-        ResultColumn("SCOPE_CATALOG", None, "string"),
-        ResultColumn("SCOPE_SCHEMA", None, "string"),
-        ResultColumn("SCOPE_TABLE", None, "string"),
-        ResultColumn("SOURCE_DATA_TYPE", None, "smallint"),
-        ResultColumn("IS_AUTO_INCREMENT", None, "string"),
-        ResultColumn("IS_GENERATEDCOLUMN", None, "string"),
+        SCOPE_CATALOG_COLUMN,
+        SCOPE_SCHEMA_COLUMN,
+        SCOPE_TABLE_COLUMN,
+        SOURCE_DATA_TYPE_COLUMN,
+        IS_AUTO_INCREMENT_COLUMN,
+        # DO NOT INCLUDE IS_GENERATED_COLUMN - Thrift returns 23 columns
     ]
diff --git a/src/databricks/sql/backend/sea/utils/metadata_transformations.py b/src/databricks/sql/backend/sea/utils/metadata_transformations.py
@@ -0,0 +1,98 @@
+from typing import Any, Optional
+
+# Table transformations
+def transform_table_type(value: Any) -> str:
+    """Transform empty/null table type to 'TABLE' per JDBC spec."""
+    if value is None or value == "":
+        return "TABLE"
+    return str(value)
+
+# Nullable transformations
+def transform_is_nullable(value: Any) -> str:
+    """Transform boolean nullable to YES/NO per JDBC spec."""
+    if value is None or value == "true" or value is True:
+        return "YES"
+    return "NO"
+
+def transform_nullable_to_int(value: Any) -> int:
+    """Transform isNullable to JDBC integer (1=nullable, 0=not nullable)."""
+    if value is None or value == "true" or value is True:
+        return 1
+    return 0
+
+# Default value transformations
+def transform_remarks_default(value: Any) -> str:
+    """Transform null remarks to empty string."""
+    if value is None:
+        return ""
+    return str(value)
+
+def transform_numeric_default_zero(value: Any) -> int:
+    """Transform null numeric values to 0."""
+    if value is None:
+        return 0
+    try:
+        return int(value)
+    except (ValueError, TypeError):
+        return 0
+
+# Calculated transformations
+def calculate_data_type(value: Any) -> int:
+    """Calculate JDBC SQL type code from Databricks type name."""
+    if value is None:
+        return 1111  # SQL NULL type
+    
+    type_name = str(value).upper().split('(')[0]
+    type_map = {
+        'STRING': 12, 'VARCHAR': 12,
+        'INT': 4, 'INTEGER': 4,
+        'DOUBLE': 8, 'FLOAT': 6,
+        'BOOLEAN': 16, 'DATE': 91,
+        'TIMESTAMP': 93, 'TIMESTAMP_NTZ': 93,
+        'DECIMAL': 3, 'NUMERIC': 2,
+        'BINARY': -2, 'ARRAY': 2003,
+        'MAP': 2002, 'STRUCT': 2002,
+        'TINYINT': -6, 'SMALLINT': 5,
+        'BIGINT': -5, 'LONG': -5
+    }
+    return type_map.get(type_name, 1111)
+
+def calculate_buffer_length(value: Any) -> Optional[int]:
+    """Calculate buffer length from type name."""
+    if value is None:
+        return None
+    
+    type_name = str(value).upper()
+    if 'ARRAY' in type_name or 'MAP' in type_name:
+        return 255
+    
+    # For other types, return None (will be null in result)
+    return None
+
+def transform_ordinal_position_offset(value: Any) -> int:
+    """Adjust ordinal position from 1-based to 0-based or vice versa if needed."""
+    if value is None:
+        return 0
+    try:
+        # SEA returns 1-based, Thrift expects 0-based
+        return int(value) - 1
+    except (ValueError, TypeError):
+        return 0
+
+# Null column transformations
+def always_null(value: Any) -> None:
+    """Always return null for columns that should be null per JDBC spec."""
+    return None
+
+def always_null_int(value: Any) -> None:
+    """Always return null for integer columns that should be null per JDBC spec."""
+    return None
+    
+def always_null_smallint(value: Any) -> None:
+    """Always return null for smallint columns that should be null per JDBC spec."""
+    return None
+
+# Identity transformations (for columns that need no change)
+def identity(value: Any) -> Any:
+    """Return value unchanged."""
+    return value
diff --git a/src/databricks/sql/backend/sea/utils/result_column.py b/src/databricks/sql/backend/sea/utils/result_column.py