Fix: Improve Databricks Catalog setting (#1240)

eakmanrq · web-flow · commit 66c610a16fa1 · 2023-07-31T13:45:54.000-07:00
* improve databricks set catalog

* still support non-unity

* update comment
diff --git a/setup.cfg b/setup.cfg
@@ -84,6 +84,9 @@ ignore_missing_imports = True
 [mypy-slack_sdk.*]
 ignore_missing_imports = True
 
+[mypy-py4j.*]
+ignore_missing_imports = True
+
 [autoflake]
 in-place = True
 expand-star-imports = True
diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py
@@ -103,8 +103,16 @@ def spark(self) -> PySparkSession:
             ).getOrCreate()
             catalog = self._extra_config.get("catalog")
             if catalog:
+                from py4j.protocol import Py4JError
+
                 # Note: Spark 3.4+ Only API
-                self._spark.catalog.setCurrentCatalog(catalog)
+                try:
+                    self.spark.catalog.setCurrentCatalog(catalog)
+                # If `setCurrentCatalog` should work for both non-unity and Unity single user
+                # clusters. If it fails then we try `USE CATALOG` which is Unity only but works
+                # across all clusters
+                except Py4JError:
+                    self.spark.sql(f"USE CATALOG {catalog}")
             self._spark.conf.set("spark.sql.sources.partitionOverwriteMode", "dynamic")
         return self._spark
 
diff --git a/sqlmesh/engines/spark/db_api/spark_session.py b/sqlmesh/engines/spark/db_api/spark_session.py
@@ -73,7 +73,14 @@ def cursor(self) -> SparkSessionCursor:
             pass
         if self.catalog:
             # Note: Spark 3.4+ Only API
-            self.spark.catalog.setCurrentCatalog(self.catalog)
+            from py4j.protocol import Py4JError
+
+            try:
+                self.spark.catalog.setCurrentCatalog(self.catalog)
+            # Databricks does not support `setCurrentCatalog` with Unity catalog
+            # and shared clusters so we use the Databricks Unity only SQL command instead
+            except Py4JError:
+                self.spark.sql(f"USE CATALOG {self.catalog}")
         self.spark.conf.set("spark.sql.sources.partitionOverwriteMode", "dynamic")
         self.spark.conf.set("hive.exec.dynamic.partition", "true")
         self.spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict")