some comparator stuff

varun-edachali-dbx · varun-edachali-dbx · commit c90875f7f465 · 2025-07-27T12:01:40.000Z
Signed-off-by: varun-edachali-dbx &lt;varun.edachali@databricks.com&gt;
diff --git a/src/databricks/sql/backend/sea/backend.py b/src/databricks/sql/backend/sea/backend.py
@@ -736,12 +736,19 @@ def get_schemas(
         )
         assert result is not None, "execute_command returned None in synchronous mode"
 
-        # Normalize column names to match JDBC/thrift backend
-        from .metadata_constants import SCHEMA_COLUMNS, SCHEMA_TYPE_CODES, normalize_metadata_description
-
+        # Normalize column names and transform data to match JDBC/thrift backend
+        from .metadata_constants import SCHEMA_COLUMNS, SCHEMA_TYPE_CODES, normalize_metadata_description, transform_schemas_data_rows
+        
+        # Store original description before normalization for data transformation
+        original_description = result.description[:]
+        
+        # Normalize the description (column names and types)
         result.description = normalize_metadata_description(
             result.description, SCHEMA_COLUMNS, SCHEMA_TYPE_CODES
         )
+        
+        # Transform the actual data rows to match the new column order and format
+        transform_schemas_data_rows(result, catalog_name, original_description)
 
         return result
 
@@ -785,12 +792,19 @@ def get_tables(
         )
         assert result is not None, "execute_command returned None in synchronous mode"
 
-        # Normalize column names to match JDBC/thrift backend
-        from .metadata_constants import TABLE_COLUMNS, TABLE_TYPE_CODES, normalize_metadata_description
-
+        # Normalize column names and transform data to match JDBC/thrift backend
+        from .metadata_constants import TABLE_COLUMNS, TABLE_TYPE_CODES, normalize_metadata_description, transform_tables_data_rows
+        
+        # Store original description before normalization for data transformation
+        original_description = result.description[:]
+        
+        # Normalize the description (column names and types)
         result.description = normalize_metadata_description(
             result.description, TABLE_COLUMNS, TABLE_TYPE_CODES
         )
+        
+        # Transform the actual data rows to match the new column order and format
+        transform_tables_data_rows(result, catalog_name, original_description)
 
         # Apply client-side filtering by table_types
         from databricks.sql.backend.sea.utils.filters import ResultSetFilter
@@ -839,9 +853,16 @@ def get_columns(
         )
         assert result is not None, "execute_command returned None in synchronous mode"
 
-        # Normalize column names to match JDBC/thrift backend
-        from .metadata_constants import normalize_columns_metadata_description
-
+        # Normalize column names and transform data to match JDBC/thrift backend
+        from .metadata_constants import normalize_columns_metadata_description, transform_columns_data_rows
+        
+        # Store original description before normalization for data transformation
+        original_description = result.description[:]
+        
+        # Normalize the description (column names and types)
         result.description = normalize_columns_metadata_description(result.description)
+        
+        # Transform the actual data rows to match the new column order and format
+        transform_columns_data_rows(result, original_description)
 
         return result
diff --git a/src/databricks/sql/backend/sea/metadata_constants.py b/src/databricks/sql/backend/sea/metadata_constants.py
@@ -39,32 +39,34 @@
     ),  # REF_GENERATION_COLUMN (likely None in data)
 ]
 
-# Columns for columns() - matching JDBC COLUMN_COLUMNS exactly
+# Columns for columns() - mapping JDBC columns to actual SEA SHOW COLUMNS output
+# Based on actual SEA output: col_name, catalogName, namespace, tableName, columnType, 
+# columnSize, decimalDigits, radix, isNullable, remarks, ordinalPosition, isAutoIncrement, isGenerated
 COLUMN_COLUMNS: List[Tuple[str, str]] = [
-    ("TABLE_CAT", "catalogName"),  # CATALOG_COLUMN
-    ("TABLE_SCHEM", "namespace"),  # SCHEMA_COLUMN
-    ("TABLE_NAME", "tableName"),  # TABLE_NAME_COLUMN
-    ("COLUMN_NAME", "col_name"),  # COL_NAME_COLUMN
-    ("DATA_TYPE", "dataType"),  # DATA_TYPE_COLUMN
-    ("TYPE_NAME", "columnType"),  # COLUMN_TYPE_COLUMN
-    ("COLUMN_SIZE", "columnSize"),  # COLUMN_SIZE_COLUMN
-    ("BUFFER_LENGTH", "bufferLength"),  # BUFFER_LENGTH_COLUMN
-    ("DECIMAL_DIGITS", "decimalDigits"),  # DECIMAL_DIGITS_COLUMN
-    ("NUM_PREC_RADIX", "radix"),  # NUM_PREC_RADIX_COLUMN
-    ("NULLABLE", "Nullable"),  # NULLABLE_COLUMN
-    ("REMARKS", "remarks"),  # REMARKS_COLUMN
-    ("COLUMN_DEF", "columnType"),  # COLUMN_DEF_COLUMN (same source as TYPE_NAME)
-    ("SQL_DATA_TYPE", "SQLDataType"),  # SQL_DATA_TYPE_COLUMN
-    ("SQL_DATETIME_SUB", "SQLDateTimeSub"),  # SQL_DATETIME_SUB_COLUMN
-    ("CHAR_OCTET_LENGTH", "CharOctetLength"),  # CHAR_OCTET_LENGTH_COLUMN
-    ("ORDINAL_POSITION", "ordinalPosition"),  # ORDINAL_POSITION_COLUMN
-    ("IS_NULLABLE", "isNullable"),  # IS_NULLABLE_COLUMN
-    ("SCOPE_CATALOG", "ScopeCatalog"),  # SCOPE_CATALOG_COLUMN
-    ("SCOPE_SCHEMA", "ScopeSchema"),  # SCOPE_SCHEMA_COLUMN
-    ("SCOPE_TABLE", "ScopeTable"),  # SCOPE_TABLE_COLUMN
-    ("SOURCE_DATA_TYPE", "SourceDataType"),  # SOURCE_DATA_TYPE_COLUMN
-    ("IS_AUTOINCREMENT", "isAutoIncrement"),  # IS_AUTO_INCREMENT_COLUMN
-    ("IS_GENERATEDCOLUMN", "isGenerated"),  # IS_GENERATED_COLUMN
+    ("TABLE_CAT", "catalogName"),  # Maps to existing SEA column
+    ("TABLE_SCHEM", "namespace"),  # Maps to existing SEA column  
+    ("TABLE_NAME", "tableName"),  # Maps to existing SEA column
+    ("COLUMN_NAME", "col_name"),  # Maps to existing SEA column
+    ("DATA_TYPE", None),  # Calculated from columnType
+    ("TYPE_NAME", "columnType"),  # Maps to existing SEA column
+    ("COLUMN_SIZE", "columnSize"),  # Maps to existing SEA column
+    ("BUFFER_LENGTH", None),  # Not available in SEA - default to None
+    ("DECIMAL_DIGITS", "decimalDigits"),  # Maps to existing SEA column
+    ("NUM_PREC_RADIX", "radix"),  # Maps to existing SEA column
+    ("NULLABLE", None),  # Calculated from isNullable
+    ("REMARKS", "remarks"),  # Maps to existing SEA column
+    ("COLUMN_DEF", None),  # Not available in SEA - default to None
+    ("SQL_DATA_TYPE", None),  # Not available in SEA - default to None
+    ("SQL_DATETIME_SUB", None),  # Not available in SEA - default to None
+    ("CHAR_OCTET_LENGTH", None),  # Not available in SEA - default to None
+    ("ORDINAL_POSITION", "ordinalPosition"),  # Maps to existing SEA column
+    ("IS_NULLABLE", "isNullable"),  # Maps to existing SEA column
+    ("SCOPE_CATALOG", None),  # Not available in SEA - default to None
+    ("SCOPE_SCHEMA", None),  # Not available in SEA - default to None
+    ("SCOPE_TABLE", None),  # Not available in SEA - default to None
+    ("SOURCE_DATA_TYPE", None),  # Not available in SEA - default to None
+    ("IS_AUTO_INCREMENT", "isAutoIncrement"),  # Maps to existing SEA column (renamed from IS_AUTOINCREMENT)
+    # Note: Removing IS_GENERATEDCOLUMN to match Thrift's 23 columns exactly
 ]
 
 # Note: COLUMN_DEF and TYPE_NAME both map to "columnType" - no special handling needed
@@ -111,8 +113,7 @@
     "SCOPE_CATALOG": "string",
     "SCOPE_SCHEMA": "string",
     "SCOPE_TABLE": "string",
-    "IS_AUTOINCREMENT": "string",
-    "IS_GENERATEDCOLUMN": "string",
+    "IS_AUTO_INCREMENT": "string",
 }
 
 
@@ -203,7 +204,213 @@ def normalize_columns_metadata_description(
     Returns:
         Normalized description matching JDBC COLUMN_COLUMNS with correct type codes
     """
-    # COLUMN_DEF and TYPE_NAME both map to "columnType" so no special handling needed
     return normalize_metadata_description(
         original_description, COLUMN_COLUMNS, COLUMN_TYPE_CODES
     )
+
+
+def transform_schemas_data_rows(result_set, catalog_name: str, original_description: List[Tuple]) -> None:
+    """
+    Transform SEA schemas() data rows to match JDBC format.
+    
+    Args:
+        result_set: The SEA result set to modify
+        catalog_name: The catalog name to add as TABLE_CATALOG
+        original_description: Original column descriptions before normalization
+    """
+    if not hasattr(result_set, 'rows') or not result_set.rows:
+        return
+        
+    # Build mapping from original column names to their indices
+    original_col_to_idx = {}
+    for idx, col_desc in enumerate(original_description):
+        original_col_to_idx[col_desc[0]] = idx
+    
+    # Transform each row to JDBC format: (TABLE_SCHEM, TABLE_CATALOG)
+    new_rows = []
+    for row in result_set.rows:
+        # Convert row to list for easier manipulation
+        if hasattr(row, '_asdict'):
+            row_dict = row._asdict()
+            row_data = [row_dict.get(col_desc[0]) for col_desc in original_description]
+        else:
+            row_data = list(row)
+        
+        # Extract schema name from databaseName field
+        schema_name = None
+        if 'databaseName' in original_col_to_idx:
+            idx = original_col_to_idx['databaseName']
+            schema_name = row_data[idx] if idx < len(row_data) else None
+            # Remove quotes if present
+            if schema_name and schema_name.startswith("'") and schema_name.endswith("'"):
+                schema_name = schema_name[1:-1]
+        
+        # Create new row: (TABLE_SCHEM, TABLE_CATALOG)
+        new_row_data = (schema_name, catalog_name)
+        new_rows.append(new_row_data)
+    
+    # Replace the rows in the result set
+    result_set.rows = new_rows
+
+
+def transform_tables_data_rows(result_set, catalog_name: str, original_description: List[Tuple]) -> None:
+    """
+    Transform SEA tables() data rows to match JDBC format.
+    
+    Args:
+        result_set: The SEA result set to modify  
+        catalog_name: The catalog name to add as TABLE_CAT
+        original_description: Original column descriptions before normalization
+    """
+    if not hasattr(result_set, 'rows') or not result_set.rows:
+        return
+        
+    # Build mapping from original column names to their indices
+    original_col_to_idx = {}
+    for idx, col_desc in enumerate(original_description):
+        original_col_to_idx[col_desc[0]] = idx
+    
+    # Transform each row to JDBC format
+    new_rows = []
+    for row in result_set.rows:
+        # Convert row to list for easier manipulation
+        if hasattr(row, '_asdict'):
+            row_dict = row._asdict()
+            row_data = [row_dict.get(col_desc[0]) for col_desc in original_description]
+        else:
+            row_data = list(row)
+        
+        # Extract values from original SHOW TABLES output
+        table_schema = None
+        table_name = None  
+        is_temporary = None
+        
+        if 'database' in original_col_to_idx:
+            idx = original_col_to_idx['database']
+            table_schema = row_data[idx] if idx < len(row_data) else None
+            
+        if 'tableName' in original_col_to_idx:
+            idx = original_col_to_idx['tableName']
+            table_name = row_data[idx] if idx < len(row_data) else None
+            
+        if 'isTemporary' in original_col_to_idx:
+            idx = original_col_to_idx['isTemporary']
+            is_temporary = row_data[idx] if idx < len(row_data) else None
+        
+        # Determine table type based on isTemporary flag
+        table_type = "TEMPORARY TABLE" if is_temporary else "TABLE"
+        
+        # Create new row with JDBC format:
+        # (TABLE_CAT, TABLE_SCHEM, TABLE_NAME, TABLE_TYPE, REMARKS, TYPE_CAT, TYPE_SCHEM, TYPE_NAME, SELF_REFERENCING_COL_NAME, REF_GENERATION)
+        new_row_data = (
+            catalog_name,          # TABLE_CAT
+            table_schema,          # TABLE_SCHEM  
+            table_name,            # TABLE_NAME
+            table_type,            # TABLE_TYPE
+            "",                    # REMARKS (empty string)
+            None,                  # TYPE_CAT
+            None,                  # TYPE_SCHEM
+            None,                  # TYPE_NAME
+            None,                  # SELF_REFERENCING_COL_NAME
+            None,                  # REF_GENERATION
+        )
+        new_rows.append(new_row_data)
+    
+    # Replace the rows in the result set
+    result_set.rows = new_rows
+
+
+def transform_columns_data_rows(result_set, original_description: List[Tuple]) -> None:
+    """
+    Transform SEA columns() data rows to match JDBC format and column order.
+    
+    This function modifies the result_set.rows in place to:
+    1. Reorder columns to match JDBC standard
+    2. Transform data types (e.g., string to int for DATA_TYPE)
+    3. Add missing columns with appropriate defaults
+    4. Remove extra columns not in JDBC standard
+    
+    Args:
+        result_set: The SEA result set to modify
+        original_description: Original column descriptions before normalization
+    """
+    if not hasattr(result_set, 'rows') or not result_set.rows:
+        return
+        
+    # Build mapping from original column names to their indices
+    original_col_to_idx = {}
+    for idx, col_desc in enumerate(original_description):
+        original_col_to_idx[col_desc[0]] = idx
+    
+    # SQL type code mapping for DATA_TYPE field
+    TYPE_CODE_MAP = {
+        'INT': 4, 'INTEGER': 4,
+        'BIGINT': -5,
+        'SMALLINT': 5,
+        'TINYINT': -6,
+        'FLOAT': 6,
+        'DOUBLE': 8,
+        'DECIMAL': 3, 'NUMERIC': 3,
+        'STRING': 12, 'VARCHAR': 12,
+        'BOOLEAN': 16,
+        'DATE': 91,
+        'TIMESTAMP': 93,
+        'BINARY': -2,
+        'ARRAY': 2003,
+        'STRUCT': 2002,
+        'MAP': 2003,
+    }
+    
+    # Special handling for DECIMAL types with precision/scale
+    def parse_decimal_type(type_str):
+        """Parse DECIMAL(precision,scale) to extract base type."""
+        if type_str and type_str.upper().startswith('DECIMAL'):
+            return 'DECIMAL'
+        return type_str
+    
+    # Transform each row
+    new_rows = []
+    for row in result_set.rows:
+        # Convert row to list for easier manipulation
+        if hasattr(row, '_asdict'):
+            row_dict = row._asdict()
+            row_data = [row_dict.get(col_desc[0]) for col_desc in original_description]
+        else:
+            row_data = list(row)
+            
+        # Build new row according to JDBC column order
+        new_row_data = []
+        
+        for jdbc_col, sea_col in COLUMN_COLUMNS:
+            if sea_col and sea_col in original_col_to_idx:
+                # Column exists in original data
+                original_idx = original_col_to_idx[sea_col]
+                value = row_data[original_idx] if original_idx < len(row_data) else None
+                
+                # Special transformations
+                if jdbc_col == "DATA_TYPE" and value:
+                    # Convert type name to SQL type code
+                    base_type = parse_decimal_type(str(value))
+                    value = TYPE_CODE_MAP.get(str(base_type).upper(), 12)  # Default to VARCHAR
+                elif jdbc_col == "NULLABLE" and sea_col == "isNullable":
+                    # Convert boolean string to int (1=nullable, 0=not nullable)
+                    value = 1 if str(value).lower() == 'true' else 0
+                    
+                new_row_data.append(value)
+            else:
+                # Column doesn't exist in SEA, use appropriate default
+                if jdbc_col == "DATA_TYPE":
+                    new_row_data.append(12)  # Default to VARCHAR
+                elif jdbc_col == "NULLABLE":
+                    new_row_data.append(1)  # Default to nullable
+                elif jdbc_col in ["BUFFER_LENGTH", "SQL_DATA_TYPE", "SQL_DATETIME_SUB", 
+                                "CHAR_OCTET_LENGTH", "COLUMN_DEF", "SCOPE_CATALOG", 
+                                "SCOPE_SCHEMA", "SCOPE_TABLE", "SOURCE_DATA_TYPE"]:
+                    new_row_data.append(None)
+                else:
+                    new_row_data.append(None)
+        
+        new_rows.append(tuple(new_row_data))
+    
+    # Replace the rows in the result set
+    result_set.rows = new_rows
diff --git a/tests/unit/test_sea_backend.py b/tests/unit/test_sea_backend.py
@@ -683,6 +683,7 @@ def test_get_schemas(self, sea_client, sea_session_id, mock_cursor):
             ("databaseName", "string", None, None, None, None, None),
             ("catalogName", "string", None, None, None, None, None),
         ]
+        mock_result_set.rows = []  # Add empty rows for the transformation function
         with patch.object(
             sea_client, "execute_command", return_value=mock_result_set
         ) as mock_execute:
@@ -754,6 +755,7 @@ def test_get_tables(self, sea_client, sea_session_id, mock_cursor):
             ("tableType", "string", None, None, None, None, None),
             ("remarks", "string", None, None, None, None, None),
         ]
+        mock_result_set.rows = []  # Add empty rows for the transformation function
 
         with patch.object(
             sea_client, "execute_command", return_value=mock_result_set
@@ -847,6 +849,7 @@ def test_get_columns(self, sea_client, sea_session_id, mock_cursor):
             ("dataType", "int", None, None, None, None, None),
             ("columnType", "string", None, None, None, None, None),
         ]
+        mock_result_set.rows = []  # Add empty rows for the transformation function
         with patch.object(
             sea_client, "execute_command", return_value=mock_result_set
         ) as mock_execute: