SQLMesh
diff --git a/‎setup.py‎
Lines changed: 2 additions & 1 deletion b/‎setup.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎sqlmesh/core/engine_adapter/athena.py‎
Lines changed: 75 additions & 36 deletions b/‎sqlmesh/core/engine_adapter/athena.py‎
Lines changed: 75 additions & 36 deletions
diff --git a/‎sqlmesh/dbt/target.py‎
Lines changed: 87 additions & 0 deletions b/‎sqlmesh/dbt/target.py‎
Lines changed: 87 additions & 0 deletions
@@ -86,7 +86,7 @@
             "pytz",
             "snowflake-connector-python[pandas,secure-local-storage]>=3.0.2",
             "sqlalchemy-stubs",
-            "tenacity==8.1.0",
+            "tenacity",
             "types-croniter",
             "types-dateparser",
             "types-python-dateutil",
@@ -100,6 +100,7 @@
             "dbt-redshift",
             "dbt-sqlserver>=1.7.0",
             "dbt-trino",
+            "dbt-athena-community",
         ],
         "dbt": [
             "dbt-core<2",
 
@@ -22,6 +22,9 @@
 
 if t.TYPE_CHECKING:
     from sqlmesh.core._typing import SchemaName, TableName
+    from sqlmesh.core.engine_adapter._typing import QueryOrDF
+
+    TableType = t.Union[t.Literal["hive"], t.Literal["iceberg"]]
 
 logger = logging.getLogger(__name__)
 
@@ -30,8 +33,10 @@ class AthenaEngineAdapter(PandasNativeFetchDFSupportMixin):
     DIALECT = "athena"
     SUPPORTS_TRANSACTIONS = False
     SUPPORTS_REPLACE_TABLE = False
-    # Athena has the concept of catalogs but no notion of current_catalog or setting the current catalog
-    CATALOG_SUPPORT = CatalogSupport.UNSUPPORTED
+    # Athena has the concept of catalogs but the current catalog is set in the connection parameters with no way to query or change it after that
+    # It also cant create new catalogs, you have to configure them in AWS. Typically, catalogs that are not "awsdatacatalog"
+    # are pointers to the "awsdatacatalog" of other AWS accounts
+    CATALOG_SUPPORT = CatalogSupport.SINGLE_CATALOG_ONLY
     # Athena's support for table and column comments is too patchy to consider "supported"
     # Hive tables: Table + Column comments are supported
     # Iceberg tables: Column comments only
@@ -48,6 +53,8 @@ def __init__(
         super().__init__(*args, s3_warehouse_location=s3_warehouse_location, **kwargs)
         self.s3_warehouse_location = s3_warehouse_location
 
+        self._default_catalog = self._default_catalog or "awsdatacatalog"
+
     @property
     def s3_warehouse_location(self) -> t.Optional[str]:
         return self._s3_warehouse_location
@@ -90,14 +97,7 @@ def _get_data_objects(
         schema = schema_name.db
         query = (
             exp.select(
-                exp.case()
-                .when(
-                    # calling code expects data objects in the default catalog to have their catalog set to None
-                    exp.column("table_catalog", table="t").eq("awsdatacatalog"),
-                    exp.Null(),
-                )
-                .else_(exp.column("table_catalog"))
-                .as_("catalog"),
+                exp.column("table_catalog").as_("catalog"),
                 exp.column("table_schema", table="t").as_("schema"),
                 exp.column("table_name", table="t").as_("name"),
                 exp.case()
@@ -130,6 +130,7 @@ def columns(
         self, table_name: TableName, include_pseudo_columns: bool = False
     ) -> t.Dict[str, exp.DataType]:
         table = exp.to_table(table_name)
+        # note: the data_type column contains the full parameterized type, eg 'varchar(10)'
         query = (
             exp.select("column_name", "data_type")
             .from_("information_schema.columns")
@@ -305,24 +306,29 @@ def _build_table_properties_exp(
 
         return None
 
+    def drop_table(self, table_name: TableName, exists: bool = True) -> None:
+        table = exp.to_table(table_name)
+
+        if self._query_table_type(table) == "hive":
+            self._truncate_table(table)
+
+        return super().drop_table(table_name=table, exists=exists)
+
     def _truncate_table(self, table_name: TableName) -> None:
-        if isinstance(table_name, str):
-            table_name = exp.to_table(table_name)
+        table = exp.to_table(table_name)
 
         # Truncating an Iceberg table is just DELETE FROM <table>
-        if self._query_table_type(table_name) == "iceberg":
-            return self.delete_from(table_name, exp.true())
+        if self._query_table_type(table) == "iceberg":
+            return self.delete_from(table, exp.true())
 
         # Truncating a partitioned Hive table is dropping all partitions and deleting the data from S3
-        if self._is_hive_partitioned_table(table_name):
-            self._clear_partition_data(table_name, exp.true())
-        elif s3_location := self._query_table_s3_location(table_name):
+        if self._is_hive_partitioned_table(table):
+            self._clear_partition_data(table, exp.true())
+        elif s3_location := self._query_table_s3_location(table):
             # Truncating a non-partitioned Hive table is clearing out all data in its Location
             self._clear_s3_location(s3_location)
 
-    def _table_type(
-        self, table_format: t.Optional[str] = None
-    ) -> t.Union[t.Literal["hive"], t.Literal["iceberg"]]:
+    def _table_type(self, table_format: t.Optional[str] = None) -> TableType:
         """
         Interpret the "table_format" property to check if this is a Hive or an Iceberg table
         """
@@ -332,12 +338,19 @@ def _table_type(
         # if we cant detect any indication of Iceberg, this is a Hive table
         return "hive"
 
+    def _query_table_type(self, table: exp.Table) -> t.Optional[TableType]:
+        if self.table_exists(table):
+            return self._query_table_type_or_raise(table)
+        return None
+
     @lru_cache()
-    def _query_table_type(
-        self, table: exp.Table
-    ) -> t.Union[t.Literal["hive"], t.Literal["iceberg"]]:
+    def _query_table_type_or_raise(self, table: exp.Table) -> TableType:
         """
-        Hit the DB to check if this is a Hive or an Iceberg table
+        Hit the DB to check if this is a Hive or an Iceberg table.
+
+        Note that in order to @lru_cache() this method, we have the following assumptions:
+         - The table must exist (otherwise we would cache None if this method was called before table creation and always return None after creation)
+         - The table type will not change within the same SQLMesh session
         """
         # Note: SHOW TBLPROPERTIES gets parsed by SQLGlot as an exp.Command anyway so we just use a string here
         # This also means we need to use dialect="hive" instead of dialect="athena" so that the identifiers get the correct quoting (backticks)
@@ -404,6 +417,29 @@ def _find_matching_columns(
                 matches.append((key, match_dtype))
         return matches
 
+    def replace_query(
+        self,
+        table_name: TableName,
+        query_or_df: QueryOrDF,
+        columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None,
+        table_description: t.Optional[str] = None,
+        column_descriptions: t.Optional[t.Dict[str, str]] = None,
+        **kwargs: t.Any,
+    ) -> None:
+        table = exp.to_table(table_name)
+
+        if self._query_table_type(table=table) == "hive":
+            self.drop_table(table)
+
+        return super().replace_query(
+            table_name=table,
+            query_or_df=query_or_df,
+            columns_to_types=columns_to_types,
+            table_description=table_description,
+            column_descriptions=column_descriptions,
+            **kwargs,
+        )
+
     def _insert_overwrite_by_time_partition(
         self,
         table_name: TableName,
@@ -412,23 +448,22 @@ def _insert_overwrite_by_time_partition(
         where: exp.Condition,
         **kwargs: t.Any,
     ) -> None:
-        if isinstance(table_name, str):
-            table_name = exp.to_table(table_name)
+        table = exp.to_table(table_name)
 
-        table_type = self._query_table_type(table_name)
+        table_type = self._query_table_type(table)
 
         if table_type == "iceberg":
             # Iceberg tables work as expected, we can use the default behaviour
             return super()._insert_overwrite_by_time_partition(
-                table_name, source_queries, columns_to_types, where, **kwargs
+                table, source_queries, columns_to_types, where, **kwargs
             )
 
         # For Hive tables, we need to drop all the partitions covered by the query and delete the data from S3
-        self._clear_partition_data(table_name, where)
+        self._clear_partition_data(table, where)
 
         # Now the data is physically gone, we can continue with inserting a new partition
         return super()._insert_overwrite_by_time_partition(
-            table_name,
+            table,
             source_queries,
             columns_to_types,
             where,
@@ -500,21 +535,20 @@ def _drop_partitions_from_metastore(
         )
 
     def delete_from(self, table_name: TableName, where: t.Union[str, exp.Expression]) -> None:
-        if isinstance(table_name, str):
-            table_name = exp.to_table(table_name)
+        table = exp.to_table(table_name)
 
-        table_type = self._query_table_type(table_name)
+        table_type = self._query_table_type(table)
 
         # If Iceberg, DELETE operations work as expected
         if table_type == "iceberg":
-            return super().delete_from(table_name, where)
+            return super().delete_from(table, where)
 
         # If Hive, DELETE is an error
         if table_type == "hive":
             # However, if there are no actual records to delete, we can make DELETE a no-op
             # This simplifies a bunch of calling code that just assumes DELETE works (which to be fair is a reasonable assumption since it does for every other engine)
             empty_check = (
-                exp.select("*").from_(table_name).where(where).limit(1)
+                exp.select("*").from_(table).where(where).limit(1)
             )  # deliberately not count(*) because we want the engine to stop as soon as it finds a record
             if len(self.fetchall(empty_check)) > 0:
                 raise SQLMeshError("Cannot delete individual records from a Hive table")
@@ -536,7 +570,9 @@ def _clear_s3_location(self, s3_uri: str) -> None:
             Bucket=bucket, Prefix=key, Delimiter="/"
         ):
             # list_objects_v2() returns 1000 keys per page so that lines up nicely with delete_objects() being able to delete 1000 keys at a time
-            keys_to_delete.append([item["Key"] for item in page.get("Contents", [])])
+            keys = [item["Key"] for item in page.get("Contents", [])]
+            if keys:
+                keys_to_delete.append(keys)
 
         for chunk in keys_to_delete:
             s3.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": k} for k in chunk]})
@@ -558,3 +594,6 @@ def _boto3_client(self, name: str) -> t.Any:
             config=conn.config,
             **conn._client_kwargs,
         )  # type: ignore
+
+    def get_current_catalog(self) -> t.Optional[str]:
+        return self.connection.catalog_name
@@ -22,6 +22,7 @@
     SnowflakeConnectionConfig,
     TrinoAuthenticationMethod,
     TrinoConnectionConfig,
+    AthenaConnectionConfig,
 )
 from sqlmesh.core.model import (
     IncrementalByTimeRangeKind,
@@ -109,6 +110,8 @@ def load(cls, data: t.Dict[str, t.Any]) -> TargetConfig:
             return MSSQLConfig(**data)
         elif db_type == "trino":
             return TrinoConfig(**data)
+        elif db_type == "athena":
+            return AthenaConfig(**data)
 
         raise ConfigError(f"{db_type} not supported.")
 
@@ -849,6 +852,89 @@ def to_sqlmesh(self, **kwargs: t.Any) -> ConnectionConfig:
         )
 
 
+class AthenaConfig(TargetConfig):
+    """
+    Project connection and operational configuration for the Athena target.
+
+    Args:
+        s3_staging_dir: S3 location to store Athena query results and metadata
+        s3_data_dir: Prefix for storing tables, if different from the connection's s3_staging_dir
+        s3_data_naming: How to generate table paths in s3_data_dir
+        s3_tmp_table_dir: Prefix for storing temporary tables, if different from the connection's s3_data_dir
+        region_name: AWS region of your Athena instance
+        schema: Specify the schema (Athena database) to build models into (lowercase only)
+        database: Specify the database (Data catalog) to build models into (lowercase only)
+        poll_interval: Interval in seconds to use for polling the status of query results in Athena
+        debug_query_state: Flag if debug message with Athena query state is needed
+        aws_access_key_id: Access key ID of the user performing requests
+        aws_secret_access_key: Secret access key of the user performing requests
+        aws_profile_name: Profile to use from your AWS shared credentials file
+        work_group: Identifier of Athena workgroup
+        skip_workgroup_check: Indicates if the WorkGroup check (additional AWS call) can be skipped
+        num_retries: Number of times to retry a failing query
+        num_boto3_retries: Number of times to retry boto3 requests (e.g. deleting S3 files for materialized tables)
+        num_iceberg_retries: Number of times to retry iceberg commit queries to fix ICEBERG_COMMIT_ERROR
+        spark_work_group: Identifier of Athena Spark workgroup for running Python models
+        seed_s3_upload_args: Dictionary containing boto3 ExtraArgs when uploading to S3
+        lf_tags_database: Default LF tags for new database if it's created by dbt
+    """
+
+    type: Literal["athena"] = "athena"
+    threads: int = 4
+
+    s3_staging_dir: t.Optional[str] = None
+    s3_data_dir: t.Optional[str] = None
+    s3_data_naming: t.Optional[str] = None
+    s3_tmp_table_dir: t.Optional[str] = None
+    poll_interval: t.Optional[int] = None
+    debug_query_state: bool = False
+    work_group: t.Optional[str] = None
+    skip_workgroup_check: t.Optional[bool] = None
+    spark_work_group: t.Optional[str] = None
+
+    aws_access_key_id: t.Optional[str] = None
+    aws_secret_access_key: t.Optional[str] = None
+    aws_profile_name: t.Optional[str] = None
+    region_name: t.Optional[str] = None
+
+    num_retries: t.Optional[int] = None
+    num_boto3_retries: t.Optional[int] = None
+    num_iceberg_retries: t.Optional[int] = None
+
+    seed_s3_upload_args: t.Dict[str, str] = {}
+    lf_tags_database: t.Dict[str, str] = {}
+
+    @classproperty
+    def relation_class(cls) -> t.Type[BaseRelation]:
+        from dbt.adapters.athena.relation import AthenaRelation
+
+        return AthenaRelation
+
+    @classproperty
+    def column_class(cls) -> t.Type[Column]:
+        from dbt.adapters.athena.column import AthenaColumn
+
+        return AthenaColumn
+
+    def default_incremental_strategy(self, kind: IncrementalKind) -> str:
+        return "insert_overwrite"
+
+    def to_sqlmesh(self, **kwargs: t.Any) -> ConnectionConfig:
+        return AthenaConnectionConfig(
+            type="athena",
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+            region_name=self.region_name,
+            work_group=self.work_group,
+            s3_staging_dir=self.s3_staging_dir,
+            s3_warehouse_location=self.s3_data_dir,
+            schema_name=self.schema_,
+            catalog_name=self.database,
+            concurrent_tasks=self.threads,
+            **kwargs,
+        )
+
+
 TARGET_TYPE_TO_CONFIG_CLASS: t.Dict[str, t.Type[TargetConfig]] = {
     "databricks": DatabricksConfig,
     "duckdb": DuckDbConfig,
@@ -859,4 +945,5 @@ def to_sqlmesh(self, **kwargs: t.Any) -> ConnectionConfig:
     "sqlserver": MSSQLConfig,
     "tsql": MSSQLConfig,
     "trino": TrinoConfig,
+    "athena": AthenaConfig,
 }