feat: Implement special DDL generation for Snowflake Iceberg tables with PARTITION BY to correctly handle property ordering and CTAS limitations.

sineline · Guillem G. · commit 2ba2730c0bc7 · 2026-03-09T11:51:45.000+01:00
diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py
@@ -207,6 +207,135 @@ def _create_table(
             elif table_kind == self.MANAGED_TABLE_KIND:
                 table_kind = f"DYNAMIC {table_format} TABLE"
 
+        partitioned_by = kwargs.get("partitioned_by")
+
+        # For Iceberg tables with PARTITION BY, we must handle the DDL specially because:
+        #   1. sqlglot reorders PartitionedByProperty to appear before other properties,
+        #      but Snowflake requires: CATALOG → EXTERNAL_VOLUME → BASE_LOCATION → PARTITION BY → PATH_LAYOUT
+        #   2. Snowflake's CTAS variant does not support PARTITION BY at all.
+        #
+        # Solution: Build a CREATE expression WITHOUT PartitionedByProperty, render it to SQL,
+        # then inject "PARTITION BY (...)" at the correct position (before PATH_LAYOUT).
+        # For CTAS, we also split into CREATE + INSERT.
+        if (
+            partitioned_by
+            and table_kind
+            and "ICEBERG" in table_kind.upper()
+            and target_columns_to_types
+        ):
+            table = (
+                table_name_or_schema
+                if isinstance(table_name_or_schema, exp.Schema)
+                else exp.to_table(table_name_or_schema)
+            )
+
+            # Ensure schema with column definitions
+            if not isinstance(table, exp.Schema):
+                columns_to_types_all_known = all(
+                    dt.this != exp.DataType.Type.UNKNOWN for dt in target_columns_to_types.values()
+                )
+                if columns_to_types_all_known:
+                    table = exp.Schema(
+                        this=table,
+                        expressions=[
+                            exp.ColumnDef(this=exp.to_identifier(col), kind=dtype)
+                            for col, dtype in target_columns_to_types.items()
+                        ],
+                    )
+
+            # Build properties WITHOUT PartitionedByProperty (sqlglot would reorder it)
+            properties = self._build_table_properties_exp(
+                **kwargs,
+                target_columns_to_types=target_columns_to_types,
+                table_description=(
+                    table_description
+                    if self.COMMENT_CREATION_TABLE.supports_schema_def and self.comments_enabled
+                    else None
+                ),
+                table_kind=table_kind,
+            )
+
+            # Create the DDL expression (no AS SELECT, even if we have a CTAS expression)
+            create_exp = exp.Create(
+                this=table,
+                kind=table_kind or "TABLE",
+                replace=replace,
+                exists=False if replace else exists,
+                properties=properties,
+            )
+            # Use identify=True to quote all identifiers, matching how SQLMesh
+            # quotes identifiers in INSERT/DELETE statements. Without this,
+            # unquoted identifiers get uppercased by Snowflake, while SQLMesh's
+            # INSERT uses quoted lowercase — pointing to different objects.
+            ddl_sql = create_exp.sql(dialect=self.dialect, identify=True)
+
+            # Build the PARTITION BY clause string
+            partition_cols = ", ".join(
+                col.sql(dialect=self.dialect, identify=True) for col in partitioned_by
+            )
+            partition_clause = f" PARTITION BY ({partition_cols})"
+
+            # Inject PARTITION BY right after the column definitions closing paren.
+            # Snowflake requires: CREATE ICEBERG TABLE (...cols...) PARTITION BY (...) COMMENT=... CATALOG=... etc.
+            # We track parenthesis depth to find the end of the column list, handling
+            # nested types like DECIMAL(38, 0) correctly.
+            paren_depth = 0
+            col_end_pos = -1
+            for i, c in enumerate(ddl_sql):
+                if c == '(':
+                    paren_depth += 1
+                elif c == ')':
+                    paren_depth -= 1
+                    if paren_depth == 0:
+                        col_end_pos = i + 1
+                        break
+
+            if col_end_pos > 0:
+                ddl_sql = ddl_sql[:col_end_pos] + partition_clause + ddl_sql[col_end_pos:]
+            else:
+                # Fallback: append at end
+                ddl_sql += partition_clause
+
+            # Ensure the schema exists before creating the Iceberg table.
+            # SQLMesh uses staging schemas (e.g. sqlmesh__MIQ_ICEBERG) that may not
+            # exist yet when this custom DDL path runs.
+            # Use quoted identifiers to preserve case, matching the CREATE TABLE DDL.
+            target_table = (
+                table_name_or_schema.this
+                if isinstance(table_name_or_schema, exp.Schema)
+                else exp.to_table(table_name_or_schema)
+            )
+            schema_parts = []
+            if target_table.catalog:
+                schema_parts.append(f'"{target_table.catalog}"')
+            if target_table.db:
+                schema_parts.append(f'"{target_table.db}"')
+            if schema_parts:
+                schema_fqn = ".".join(schema_parts)
+                self.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_fqn}")
+
+            self.execute(ddl_sql)
+            self._clear_data_object_cache(
+                table_name_or_schema.this
+                if isinstance(table_name_or_schema, exp.Schema)
+                else table_name_or_schema
+            )
+
+            # If we had a CTAS expression, insert the data separately
+            if expression is not None:
+                table_name = (
+                    table_name_or_schema.this
+                    if isinstance(table_name_or_schema, exp.Schema)
+                    else table_name_or_schema
+                )
+                self._insert_append_query(
+                    table_name,
+                    expression,
+                    target_columns_to_types,
+                    track_rows_processed=False,
+                )
+            return
+
         super()._create_table(
             table_name_or_schema=table_name_or_schema,
             expression=expression,