diff --git a/CHANGELOG.md b/CHANGELOG.md index 1031aca48..1d50bec96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Features +- Declare `CATALOG_V2_CONFIGS` on `DatabricksAdapter` with `UnityDatabricksConfig` and `HiveMetastoreDatabricksConfig` to support parse-time validation of catalogs.yml v2 (requires `use_catalogs_v2` behavior flag in dbt-core) - Add `invocation_id` to the default query comment ([#1377](https://github.com/databricks/dbt-databricks/issues/1377)) ### Fixes diff --git a/dbt/adapters/databricks/catalogs/__init__.py b/dbt/adapters/databricks/catalogs/__init__.py index a6f1bed39..e06a6877c 100644 --- a/dbt/adapters/databricks/catalogs/__init__.py +++ b/dbt/adapters/databricks/catalogs/__init__.py @@ -1,9 +1,15 @@ from dbt.adapters.databricks.catalogs._hive_metastore import HiveMetastoreCatalogIntegration from dbt.adapters.databricks.catalogs._relation import DatabricksCatalogRelation from dbt.adapters.databricks.catalogs._unity import UnityCatalogIntegration +from dbt.adapters.databricks.catalogs._v2 import ( + HiveMetastoreDatabricksConfig, + UnityDatabricksConfig, +) __all__ = [ "DatabricksCatalogRelation", "HiveMetastoreCatalogIntegration", + "HiveMetastoreDatabricksConfig", "UnityCatalogIntegration", + "UnityDatabricksConfig", ] diff --git a/dbt/adapters/databricks/catalogs/_v2.py b/dbt/adapters/databricks/catalogs/_v2.py new file mode 100644 index 000000000..a2456ef72 --- /dev/null +++ b/dbt/adapters/databricks/catalogs/_v2.py @@ -0,0 +1,37 @@ +from dataclasses import dataclass +from typing import Optional + +from dbt_common.dataclass_schema import dbtClassMixin +from dbt_common.exceptions import DbtValidationError + + +@dataclass +class UnityDatabricksConfig(dbtClassMixin): + file_format: str + location_root: Optional[str] = None + use_uniform: Optional[bool] = None + + def __post_init__(self) -> None: + if not self.file_format.strip(): + raise DbtValidationError("'file_format' must be non-empty") + if self.use_uniform: + if self.file_format.lower() != "delta": + raise DbtValidationError("file_format must be 'delta' when 'use_uniform' is true") + else: + if self.file_format.lower() != "parquet": + raise DbtValidationError( + "file_format must be 'parquet' when 'use_uniform' is false or unset" + ) + if self.location_root is not None and not self.location_root.strip(): + raise DbtValidationError("'location_root' cannot be blank") + + +@dataclass +class HiveMetastoreDatabricksConfig(dbtClassMixin): + file_format: str + + def __post_init__(self) -> None: + if self.file_format.lower() not in {"delta", "parquet", "hudi"}: + raise DbtValidationError( + f"file_format must be one of: {sorted({'delta', 'parquet', 'hudi'})}" + ) diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index 05b832f78..345885c19 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -43,7 +43,9 @@ from dbt.adapters.databricks.catalogs import ( DatabricksCatalogRelation, HiveMetastoreCatalogIntegration, + HiveMetastoreDatabricksConfig, UnityCatalogIntegration, + UnityDatabricksConfig, ) from dbt.adapters.databricks.column import DatabricksColumn from dbt.adapters.databricks.connections import ( @@ -233,6 +235,10 @@ class DatabricksAdapter(SparkAdapter): HiveMetastoreCatalogIntegration, UnityCatalogIntegration, ] + CATALOG_V2_CONFIGS = { + "unity": UnityDatabricksConfig, + "hive_metastore": HiveMetastoreDatabricksConfig, + } CONSTRAINT_SUPPORT = constraints.CONSTRAINT_SUPPORT get_column_behavior: GetColumnsBehavior diff --git a/tests/unit/test_v2_catalog_configs.py b/tests/unit/test_v2_catalog_configs.py new file mode 100644 index 000000000..814edf85d --- /dev/null +++ b/tests/unit/test_v2_catalog_configs.py @@ -0,0 +1,93 @@ +import pytest + +from dbt.adapters.databricks.catalogs._v2 import ( + HiveMetastoreDatabricksConfig, + UnityDatabricksConfig, +) +from dbt.adapters.databricks.impl import DatabricksAdapter +from dbt_common.exceptions import DbtValidationError + + +# ===== CATALOG_V2_CONFIGS class attribute ===== + + +def test_unity_registered(): + assert DatabricksAdapter.CATALOG_V2_CONFIGS["unity"] is UnityDatabricksConfig + + +def test_hive_metastore_registered(): + assert ( + DatabricksAdapter.CATALOG_V2_CONFIGS["hive_metastore"] is HiveMetastoreDatabricksConfig + ) + + +# ===== UnityDatabricksConfig ===== + + +def test_unity_parquet_without_uniform(): + cfg = UnityDatabricksConfig(file_format="parquet") + assert cfg.file_format == "parquet" + + +def test_unity_delta_with_uniform(): + cfg = UnityDatabricksConfig(file_format="delta", use_uniform=True) + assert cfg.file_format == "delta" + assert cfg.use_uniform is True + + +def test_unity_with_location_root(): + cfg = UnityDatabricksConfig(file_format="parquet", location_root="/mnt/data") + assert cfg.location_root == "/mnt/data" + + +def test_unity_delta_without_uniform_raises(): + with pytest.raises(DbtValidationError, match="file_format must be 'parquet'"): + UnityDatabricksConfig(file_format="delta") + + +def test_unity_parquet_with_uniform_raises(): + with pytest.raises(DbtValidationError, match="file_format must be 'delta'"): + UnityDatabricksConfig(file_format="parquet", use_uniform=True) + + +def test_unity_blank_file_format_raises(): + with pytest.raises(DbtValidationError, match="file_format.*non-empty"): + UnityDatabricksConfig(file_format=" ") + + +def test_unity_blank_location_root_raises(): + with pytest.raises(DbtValidationError, match="location_root.*blank"): + UnityDatabricksConfig(file_format="parquet", location_root=" ") + + +def test_unity_rejects_unknown_keys(): + with pytest.raises(Exception, match="Additional properties"): + UnityDatabricksConfig.validate({"file_format": "parquet", "bogus": True}) + + +# ===== HiveMetastoreDatabricksConfig ===== + + +def test_hive_delta_valid(): + cfg = HiveMetastoreDatabricksConfig(file_format="delta") + assert cfg.file_format == "delta" + + +def test_hive_parquet_valid(): + cfg = HiveMetastoreDatabricksConfig(file_format="parquet") + assert cfg.file_format == "parquet" + + +def test_hive_hudi_valid(): + cfg = HiveMetastoreDatabricksConfig(file_format="hudi") + assert cfg.file_format == "hudi" + + +def test_hive_invalid_file_format_raises(): + with pytest.raises(DbtValidationError, match="file_format must be one of"): + HiveMetastoreDatabricksConfig(file_format="avro") + + +def test_hive_rejects_unknown_keys(): + with pytest.raises(Exception, match="Additional properties"): + HiveMetastoreDatabricksConfig.validate({"file_format": "delta", "extra": "bad"})