databricks
diff --git a/‎src/databricks/sql/backend/sea/backend.py‎
Lines changed: 17 additions & 4 deletions b/‎src/databricks/sql/backend/sea/backend.py‎
Lines changed: 17 additions & 4 deletions
diff --git a/‎src/databricks/sql/backend/sea/result_set.py‎
Lines changed: 125 additions & 3 deletions b/‎src/databricks/sql/backend/sea/result_set.py‎
Lines changed: 125 additions & 3 deletions
diff --git a/‎src/databricks/sql/backend/sea/utils/metadata_mappings.py‎
Lines changed: 90 additions & 0 deletions b/‎src/databricks/sql/backend/sea/utils/metadata_mappings.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎src/databricks/sql/backend/sea/utils/result_column.py‎
Lines changed: 18 additions & 0 deletions b/‎src/databricks/sql/backend/sea/utils/result_column.py‎
Lines changed: 18 additions & 0 deletions
@@ -19,6 +19,7 @@
     WaitTimeout,
     MetadataCommands,
 )
+from databricks.sql.backend.sea.utils.metadata_mappings import MetadataColumnMappings
 from databricks.sql.backend.sea.utils.normalize import normalize_sea_type_to_thrift
 from databricks.sql.thrift_api.TCLIService import ttypes
 
@@ -699,7 +700,10 @@ def get_catalogs(
             async_op=False,
             enforce_embedded_schema_correctness=False,
         )
-        assert result is not None, "execute_command returned None in synchronous mode"
+        assert isinstance(
+            result, SeaResultSet
+        ), "Expected SeaResultSet from SEA backend"
+        result.prepare_metadata_columns(MetadataColumnMappings.CATALOG_COLUMNS)
         return result
 
     def get_schemas(
@@ -732,7 +736,10 @@ def get_schemas(
             async_op=False,
             enforce_embedded_schema_correctness=False,
         )
-        assert result is not None, "execute_command returned None in synchronous mode"
+        assert isinstance(
+            result, SeaResultSet
+        ), "Expected SeaResultSet from SEA backend"
+        result.prepare_metadata_columns(MetadataColumnMappings.SCHEMA_COLUMNS)
         return result
 
     def get_tables(
@@ -773,7 +780,10 @@ def get_tables(
             async_op=False,
             enforce_embedded_schema_correctness=False,
         )
-        assert result is not None, "execute_command returned None in synchronous mode"
+        assert isinstance(
+            result, SeaResultSet
+        ), "Expected SeaResultSet from SEA backend"
+        result.prepare_metadata_columns(MetadataColumnMappings.TABLE_COLUMNS)
 
         # Apply client-side filtering by table_types
         from databricks.sql.backend.sea.utils.filters import ResultSetFilter
@@ -820,5 +830,8 @@ def get_columns(
             async_op=False,
             enforce_embedded_schema_correctness=False,
         )
-        assert result is not None, "execute_command returned None in synchronous mode"
+        assert isinstance(
+            result, SeaResultSet
+        ), "Expected SeaResultSet from SEA backend"
+        result.prepare_metadata_columns(MetadataColumnMappings.COLUMN_COLUMNS)
         return result
@@ -1,11 +1,12 @@
 from __future__ import annotations
 
-from typing import Any, List, Optional, TYPE_CHECKING
+from typing import Any, List, Optional, TYPE_CHECKING, Dict
 
 import logging
 
 from databricks.sql.backend.sea.models.base import ResultData, ResultManifest
 from databricks.sql.backend.sea.utils.conversion import SqlTypeConverter
+from databricks.sql.backend.sea.utils.result_column import ResultColumn
 
 try:
     import pyarrow
@@ -82,6 +83,10 @@ def __init__(
             arrow_schema_bytes=execute_response.arrow_schema_bytes,
         )
 
+        # Initialize metadata columns for post-fetch transformation
+        self._metadata_columns = None
+        self._column_index_mapping = None
+
     def _convert_json_types(self, row: List[str]) -> List[Any]:
         """
         Convert string values in the row to appropriate Python types based on column metadata.
@@ -160,6 +165,7 @@ def fetchmany_json(self, size: int) -> List[List[str]]:
             raise ValueError(f"size argument for fetchmany is {size} but must be >= 0")
 
         results = self.results.next_n_rows(size)
+        results = self._transform_json_rows(results)
         self._next_row_index += len(results)
 
         return results
@@ -173,6 +179,7 @@ def fetchall_json(self) -> List[List[str]]:
         """
 
         results = self.results.remaining_rows()
+        results = self._transform_json_rows(results)
         self._next_row_index += len(results)
 
         return results
@@ -197,7 +204,12 @@ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
 
         results = self.results.next_n_rows(size)
         if isinstance(self.results, JsonQueue):
-            results = self._convert_json_to_arrow_table(results)
+            # Transform JSON first, then convert to Arrow
+            transformed_json = self._transform_json_rows(results)
+            results = self._convert_json_to_arrow_table(transformed_json)
+        else:
+            # Transform Arrow table directly
+            results = self._transform_arrow_table(results)
 
         self._next_row_index += results.num_rows
 
@@ -210,7 +222,12 @@ def fetchall_arrow(self) -> "pyarrow.Table":
 
         results = self.results.remaining_rows()
         if isinstance(self.results, JsonQueue):
-            results = self._convert_json_to_arrow_table(results)
+            # Transform JSON first, then convert to Arrow
+            transformed_json = self._transform_json_rows(results)
+            results = self._convert_json_to_arrow_table(transformed_json)
+        else:
+            # Transform Arrow table directly
+            results = self._transform_arrow_table(results)
 
         self._next_row_index += results.num_rows
 
@@ -263,3 +280,108 @@ def fetchall(self) -> List[Row]:
             return self._create_json_table(self.fetchall_json())
         else:
             return self._convert_arrow_table(self.fetchall_arrow())
+
+    def prepare_metadata_columns(self, metadata_columns: List[ResultColumn]) -> None:
+        """
+        Prepare result set for metadata column normalization.
+
+        Args:
+            metadata_columns: List of ResultColumn objects defining the expected columns
+                            and their mappings from SEA column names
+        """
+        self._metadata_columns = metadata_columns
+        self._prepare_column_mapping()
+
+    def _prepare_column_mapping(self) -> None:
+        """
+        Prepare column index mapping for metadata queries.
+        Updates description to use JDBC column names.
+        """
+        # Ensure description is available
+        if not self.description:
+            raise ValueError("Cannot prepare column mapping without result description")
+
+        # Build mapping from SEA column names to their indices
+        sea_column_indices = {}
+        for idx, col in enumerate(self.description):
+            sea_column_indices[col[0]] = idx
+
+        # Create new description and index mapping
+        new_description = []
+        self._column_index_mapping = {}  # Maps new index -> old index
+
+        for new_idx, result_column in enumerate(self._metadata_columns):
+            # Find the corresponding SEA column
+            if (
+                result_column.result_set_column_name
+                and result_column.result_set_column_name in sea_column_indices
+            ):
+                old_idx = sea_column_indices[result_column.result_set_column_name]
+                self._column_index_mapping[new_idx] = old_idx
+                # Use the original column metadata but with JDBC name
+                old_col = self.description[old_idx]
+                new_description.append(
+                    (
+                        result_column.column_name,  # JDBC name
+                        result_column.column_type,  # Expected type
+                        old_col[2],  # display_size
+                        old_col[3],  # internal_size
+                        old_col[4],  # precision
+                        old_col[5],  # scale
+                        old_col[6],  # null_ok
+                    )
+                )
+            else:
+                # Column doesn't exist in SEA - add with None values
+                new_description.append(
+                    (
+                        result_column.column_name,
+                        result_column.column_type,
+                        None,
+                        None,
+                        None,
+                        None,
+                        True,
+                    )
+                )
+                self._column_index_mapping[new_idx] = None
+
+        self.description = new_description
+
+    def _transform_arrow_table(self, table: "pyarrow.Table") -> "pyarrow.Table":
+        """Transform arrow table columns for metadata normalization."""
+        if not self._metadata_columns:
+            return table
+
+        # Reorder columns and add missing ones
+        new_columns = []
+        column_names = []
+
+        for new_idx, result_column in enumerate(self._metadata_columns):
+            old_idx = self._column_index_mapping.get(new_idx)
+            if old_idx is not None:
+                new_columns.append(table.column(old_idx))
+            else:
+                # Create null column for missing data
+                null_array = pyarrow.nulls(table.num_rows)
+                new_columns.append(null_array)
+            column_names.append(result_column.column_name)
+
+        return pyarrow.Table.from_arrays(new_columns, names=column_names)
+
+    def _transform_json_rows(self, rows: List[List[str]]) -> List[List[Any]]:
+        """Transform JSON rows for metadata normalization."""
+        if not self._metadata_columns:
+            return rows
+
+        transformed_rows = []
+        for row in rows:
+            new_row = []
+            for new_idx in range(len(self._metadata_columns)):
+                old_idx = self._column_index_mapping.get(new_idx)
+                if old_idx is not None:
+                    new_row.append(row[old_idx])
+                else:
+                    new_row.append(None)
+            transformed_rows.append(new_row)
+        return transformed_rows
@@ -0,0 +1,90 @@
+from databricks.sql.backend.sea.utils.result_column import ResultColumn
+
+
+class MetadataColumnMappings:
+    """Column mappings for metadata queries following JDBC specification."""
+
+    # Common columns used across multiple metadata queries
+    CATALOG_COLUMN = ResultColumn("TABLE_CAT", "catalog", "string")
+    CATALOG_COLUMN_FOR_TABLES = ResultColumn("TABLE_CAT", "catalogName", "string")
+    SCHEMA_COLUMN = ResultColumn("TABLE_SCHEM", "namespace", "string")
+    SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn("TABLE_SCHEM", "databaseName", "string")
+    TABLE_NAME_COLUMN = ResultColumn("TABLE_NAME", "tableName", "string")
+    TABLE_TYPE_COLUMN = ResultColumn("TABLE_TYPE", "tableType", "string")
+    REMARKS_COLUMN = ResultColumn("REMARKS", "remarks", "string")
+
+    # Columns specific to getColumns()
+    COLUMN_NAME_COLUMN = ResultColumn("COLUMN_NAME", "col_name", "string")
+    DATA_TYPE_COLUMN = ResultColumn(
+        "DATA_TYPE", None, "int"
+    )  # SEA doesn't provide this
+    TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", "columnType", "string")
+    COLUMN_SIZE_COLUMN = ResultColumn("COLUMN_SIZE", None, "int")
+    DECIMAL_DIGITS_COLUMN = ResultColumn("DECIMAL_DIGITS", None, "int")
+    NUM_PREC_RADIX_COLUMN = ResultColumn("NUM_PREC_RADIX", None, "int")
+    NULLABLE_COLUMN = ResultColumn("NULLABLE", None, "int")
+    COLUMN_DEF_COLUMN = ResultColumn(
+        "COLUMN_DEF", "columnType", "string"
+    )  # Note: duplicate mapping
+    SQL_DATA_TYPE_COLUMN = ResultColumn("SQL_DATA_TYPE", None, "int")
+    SQL_DATETIME_SUB_COLUMN = ResultColumn("SQL_DATETIME_SUB", None, "int")
+    CHAR_OCTET_LENGTH_COLUMN = ResultColumn("CHAR_OCTET_LENGTH", None, "int")
+    ORDINAL_POSITION_COLUMN = ResultColumn("ORDINAL_POSITION", None, "int")
+    IS_NULLABLE_COLUMN = ResultColumn("IS_NULLABLE", "isNullable", "string")
+
+    # Columns for getTables() that don't exist in SEA
+    TYPE_CAT_COLUMN = ResultColumn("TYPE_CAT", None, "string")
+    TYPE_SCHEM_COLUMN = ResultColumn("TYPE_SCHEM", None, "string")
+    TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", None, "string")
+    SELF_REFERENCING_COL_NAME_COLUMN = ResultColumn(
+        "SELF_REFERENCING_COL_NAME", None, "string"
+    )
+    REF_GENERATION_COLUMN = ResultColumn("REF_GENERATION", None, "string")
+
+    # Column lists for each metadata operation
+    CATALOG_COLUMNS = [CATALOG_COLUMN]
+
+    SCHEMA_COLUMNS = [
+        SCHEMA_COLUMN_FOR_GET_SCHEMA,
+        ResultColumn("TABLE_CATALOG", None, "string"),  # SEA doesn't return this
+    ]
+
+    TABLE_COLUMNS = [
+        CATALOG_COLUMN_FOR_TABLES,
+        SCHEMA_COLUMN,
+        TABLE_NAME_COLUMN,
+        TABLE_TYPE_COLUMN,
+        REMARKS_COLUMN,
+        TYPE_CAT_COLUMN,
+        TYPE_SCHEM_COLUMN,
+        TYPE_NAME_COLUMN,
+        SELF_REFERENCING_COL_NAME_COLUMN,
+        REF_GENERATION_COLUMN,
+    ]
+
+    COLUMN_COLUMNS = [
+        CATALOG_COLUMN_FOR_TABLES,
+        SCHEMA_COLUMN,
+        TABLE_NAME_COLUMN,
+        COLUMN_NAME_COLUMN,
+        DATA_TYPE_COLUMN,
+        TYPE_NAME_COLUMN,
+        COLUMN_SIZE_COLUMN,
+        ResultColumn("BUFFER_LENGTH", None, "int"),
+        DECIMAL_DIGITS_COLUMN,
+        NUM_PREC_RADIX_COLUMN,
+        NULLABLE_COLUMN,
+        REMARKS_COLUMN,
+        COLUMN_DEF_COLUMN,
+        SQL_DATA_TYPE_COLUMN,
+        SQL_DATETIME_SUB_COLUMN,
+        CHAR_OCTET_LENGTH_COLUMN,
+        ORDINAL_POSITION_COLUMN,
+        IS_NULLABLE_COLUMN,
+        ResultColumn("SCOPE_CATALOG", None, "string"),
+        ResultColumn("SCOPE_SCHEMA", None, "string"),
+        ResultColumn("SCOPE_TABLE", None, "string"),
+        ResultColumn("SOURCE_DATA_TYPE", None, "smallint"),
+        ResultColumn("IS_AUTO_INCREMENT", None, "string"),
+        ResultColumn("IS_GENERATEDCOLUMN", None, "string"),
+    ]
@@ -0,0 +1,18 @@
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass(frozen=True)
+class ResultColumn:
+    """
+    Represents a mapping between JDBC specification column names and actual result set column names.
+
+    Attributes:
+        column_name: JDBC specification column name (e.g., "TABLE_CAT")
+        result_set_column_name: Server result column name from SEA (e.g., "catalog")
+        column_type: SQL type code from databricks.sql.types
+    """
+
+    column_name: str
+    result_set_column_name: Optional[str]  # None if SEA doesn't return this column
+    column_type: str