From e6926f6b0b59232d6824e2929dc3bcdb4540cc48 Mon Sep 17 00:00:00 2001 From: Aahel Guha Date: Mon, 4 May 2026 16:47:14 +0530 Subject: [PATCH 1/3] feat: declare CATALOG_V2_CONFIGS for catalogs.yml v2 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds UnityDatabricksConfig and HiveMetastoreDatabricksConfig dataclasses and declares them on DatabricksAdapter.CATALOG_V2_CONFIGS. This enables parse-time validation of catalogs.yml v2 when the use_catalogs_v2 behavior flag is set in dbt-core. UnityDatabricksConfig enforces file_format='parquet' when use_uniform is false/unset, and 'delta' when use_uniform=true. HiveMetastoreDatabricksConfig accepts delta, parquet, or hudi. Both schemas are dbtClassMixin dataclasses — structural validation (unknown keys, required fields) via jsonschema; semantic constraints in __post_init__. --- CHANGELOG.md | 1 + dbt/adapters/databricks/catalogs/__init__.py | 6 ++ dbt/adapters/databricks/catalogs/_v2.py | 38 ++++++++ dbt/adapters/databricks/impl.py | 6 ++ tests/unit/test_v2_catalog_configs.py | 93 ++++++++++++++++++++ 5 files changed, 144 insertions(+) create mode 100644 dbt/adapters/databricks/catalogs/_v2.py create mode 100644 tests/unit/test_v2_catalog_configs.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 87659b60a..41d15d69e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Features +- Declare `CATALOG_V2_CONFIGS` on `DatabricksAdapter` with `UnityDatabricksConfig` and `HiveMetastoreDatabricksConfig` to support parse-time validation of catalogs.yml v2 (requires `use_catalogs_v2` behavior flag in dbt-core) - Add `invocation_id` to the default query comment ([#1377](https://github.com/databricks/dbt-databricks/issues/1377)) ### Fixes diff --git a/dbt/adapters/databricks/catalogs/__init__.py b/dbt/adapters/databricks/catalogs/__init__.py index a6f1bed39..e06a6877c 100644 --- a/dbt/adapters/databricks/catalogs/__init__.py +++ b/dbt/adapters/databricks/catalogs/__init__.py @@ -1,9 +1,15 @@ from dbt.adapters.databricks.catalogs._hive_metastore import HiveMetastoreCatalogIntegration from dbt.adapters.databricks.catalogs._relation import DatabricksCatalogRelation from dbt.adapters.databricks.catalogs._unity import UnityCatalogIntegration +from dbt.adapters.databricks.catalogs._v2 import ( + HiveMetastoreDatabricksConfig, + UnityDatabricksConfig, +) __all__ = [ "DatabricksCatalogRelation", "HiveMetastoreCatalogIntegration", + "HiveMetastoreDatabricksConfig", "UnityCatalogIntegration", + "UnityDatabricksConfig", ] diff --git a/dbt/adapters/databricks/catalogs/_v2.py b/dbt/adapters/databricks/catalogs/_v2.py new file mode 100644 index 000000000..74f722f4f --- /dev/null +++ b/dbt/adapters/databricks/catalogs/_v2.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass +from typing import Optional + +from dbt_common.dataclass_schema import dbtClassMixin +from dbt_common.exceptions import DbtValidationError + + +@dataclass +class UnityDatabricksConfig(dbtClassMixin): + file_format: str + location_root: Optional[str] = None + use_uniform: Optional[bool] = None + + def __post_init__(self) -> None: + if not self.file_format.strip(): + raise DbtValidationError("'file_format' must be non-empty") + # file_format depends on use_uniform — see dbt-adapters issue #9648 + if self.use_uniform: + if self.file_format.lower() != "delta": + raise DbtValidationError("file_format must be 'delta' when 'use_uniform' is true") + else: + if self.file_format.lower() != "parquet": + raise DbtValidationError( + "file_format must be 'parquet' when 'use_uniform' is false or unset" + ) + if self.location_root is not None and not self.location_root.strip(): + raise DbtValidationError("'location_root' cannot be blank") + + +@dataclass +class HiveMetastoreDatabricksConfig(dbtClassMixin): + file_format: str + + def __post_init__(self) -> None: + if self.file_format.lower() not in {"delta", "parquet", "hudi"}: + raise DbtValidationError( + f"file_format must be one of: {sorted(f.upper() for f in {'delta', 'parquet', 'hudi'})}" + ) \ No newline at end of file diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index c9f39a203..ced7159ac 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -41,6 +41,8 @@ GetColumnsByDescribe, ) from dbt.adapters.databricks.catalogs import ( + HiveMetastoreDatabricksConfig, + UnityDatabricksConfig, DatabricksCatalogRelation, HiveMetastoreCatalogIntegration, UnityCatalogIntegration, @@ -233,6 +235,10 @@ class DatabricksAdapter(SparkAdapter): HiveMetastoreCatalogIntegration, UnityCatalogIntegration, ] + CATALOG_V2_CONFIGS = { + "unity": UnityDatabricksConfig, + "hive_metastore": HiveMetastoreDatabricksConfig, + } CONSTRAINT_SUPPORT = constraints.CONSTRAINT_SUPPORT get_column_behavior: GetColumnsBehavior diff --git a/tests/unit/test_v2_catalog_configs.py b/tests/unit/test_v2_catalog_configs.py new file mode 100644 index 000000000..c5f8d7d9a --- /dev/null +++ b/tests/unit/test_v2_catalog_configs.py @@ -0,0 +1,93 @@ +import pytest + +from dbt.adapters.databricks.catalogs._v2 import ( + HiveMetastoreDatabricksConfig, + UnityDatabricksConfig, +) +from dbt.adapters.databricks.impl import DatabricksAdapter +from dbt_common.exceptions import DbtValidationError + + +# ===== CATALOG_V2_CONFIGS class attribute ===== + + +def test_unity_registered(): + assert DatabricksAdapter.CATALOG_V2_CONFIGS["unity"] is UnityDatabricksConfig + + +def test_hive_metastore_registered(): + assert ( + DatabricksAdapter.CATALOG_V2_CONFIGS["hive_metastore"] is HiveMetastoreDatabricksConfig + ) + + +# ===== UnityDatabricksConfig ===== + + +def test_unity_parquet_without_uniform(): + cfg = UnityDatabricksConfig(file_format="parquet") + assert cfg.file_format == "parquet" + + +def test_unity_delta_with_uniform(): + cfg = UnityDatabricksConfig(file_format="delta", use_uniform=True) + assert cfg.file_format == "delta" + assert cfg.use_uniform is True + + +def test_unity_with_location_root(): + cfg = UnityDatabricksConfig(file_format="parquet", location_root="/mnt/data") + assert cfg.location_root == "/mnt/data" + + +def test_unity_delta_without_uniform_raises(): + with pytest.raises(DbtValidationError, match="file_format must be 'parquet'"): + UnityDatabricksConfig(file_format="delta") + + +def test_unity_parquet_with_uniform_raises(): + with pytest.raises(DbtValidationError, match="file_format must be 'delta'"): + UnityDatabricksConfig(file_format="parquet", use_uniform=True) + + +def test_unity_blank_file_format_raises(): + with pytest.raises(DbtValidationError, match="file_format.*non-empty"): + UnityDatabricksConfig(file_format=" ") + + +def test_unity_blank_location_root_raises(): + with pytest.raises(DbtValidationError, match="location_root.*blank"): + UnityDatabricksConfig(file_format="parquet", location_root=" ") + + +def test_unity_rejects_unknown_keys(): + with pytest.raises(Exception, match="Additional properties"): + UnityDatabricksConfig.validate({"file_format": "parquet", "bogus": True}) + + +# ===== HiveMetastoreDatabricksConfig ===== + + +def test_hive_delta_valid(): + cfg = HiveMetastoreDatabricksConfig(file_format="delta") + assert cfg.file_format == "delta" + + +def test_hive_parquet_valid(): + cfg = HiveMetastoreDatabricksConfig(file_format="parquet") + assert cfg.file_format == "parquet" + + +def test_hive_hudi_valid(): + cfg = HiveMetastoreDatabricksConfig(file_format="hudi") + assert cfg.file_format == "hudi" + + +def test_hive_invalid_file_format_raises(): + with pytest.raises(DbtValidationError, match="file_format must be one of"): + HiveMetastoreDatabricksConfig(file_format="avro") + + +def test_hive_rejects_unknown_keys(): + with pytest.raises(Exception, match="Additional properties"): + HiveMetastoreDatabricksConfig.validate({"file_format": "delta", "extra": "bad"}) \ No newline at end of file From e2d06887e0a93c404279162f6b0da9a5e0ad74c3 Mon Sep 17 00:00:00 2001 From: Aahel Guha Date: Tue, 5 May 2026 16:30:02 +0530 Subject: [PATCH 2/3] Remove stale comment from UnityDatabricksConfig --- dbt/adapters/databricks/catalogs/_v2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/adapters/databricks/catalogs/_v2.py b/dbt/adapters/databricks/catalogs/_v2.py index 74f722f4f..e225d8d75 100644 --- a/dbt/adapters/databricks/catalogs/_v2.py +++ b/dbt/adapters/databricks/catalogs/_v2.py @@ -14,7 +14,6 @@ class UnityDatabricksConfig(dbtClassMixin): def __post_init__(self) -> None: if not self.file_format.strip(): raise DbtValidationError("'file_format' must be non-empty") - # file_format depends on use_uniform — see dbt-adapters issue #9648 if self.use_uniform: if self.file_format.lower() != "delta": raise DbtValidationError("file_format must be 'delta' when 'use_uniform' is true") From 0e119f60ef4333af34f2761b2d8f6b58fef299a8 Mon Sep 17 00:00:00 2001 From: Aahel Guha Date: Tue, 5 May 2026 16:59:23 +0530 Subject: [PATCH 3/3] minor fixes --- dbt/adapters/databricks/catalogs/_v2.py | 4 ++-- dbt/adapters/databricks/impl.py | 4 ++-- tests/unit/test_v2_catalog_configs.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/databricks/catalogs/_v2.py b/dbt/adapters/databricks/catalogs/_v2.py index e225d8d75..a2456ef72 100644 --- a/dbt/adapters/databricks/catalogs/_v2.py +++ b/dbt/adapters/databricks/catalogs/_v2.py @@ -33,5 +33,5 @@ class HiveMetastoreDatabricksConfig(dbtClassMixin): def __post_init__(self) -> None: if self.file_format.lower() not in {"delta", "parquet", "hudi"}: raise DbtValidationError( - f"file_format must be one of: {sorted(f.upper() for f in {'delta', 'parquet', 'hudi'})}" - ) \ No newline at end of file + f"file_format must be one of: {sorted({'delta', 'parquet', 'hudi'})}" + ) diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index ced7159ac..6d78ca46a 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -41,11 +41,11 @@ GetColumnsByDescribe, ) from dbt.adapters.databricks.catalogs import ( - HiveMetastoreDatabricksConfig, - UnityDatabricksConfig, DatabricksCatalogRelation, HiveMetastoreCatalogIntegration, + HiveMetastoreDatabricksConfig, UnityCatalogIntegration, + UnityDatabricksConfig, ) from dbt.adapters.databricks.column import DatabricksColumn from dbt.adapters.databricks.connections import ( diff --git a/tests/unit/test_v2_catalog_configs.py b/tests/unit/test_v2_catalog_configs.py index c5f8d7d9a..814edf85d 100644 --- a/tests/unit/test_v2_catalog_configs.py +++ b/tests/unit/test_v2_catalog_configs.py @@ -90,4 +90,4 @@ def test_hive_invalid_file_format_raises(): def test_hive_rejects_unknown_keys(): with pytest.raises(Exception, match="Additional properties"): - HiveMetastoreDatabricksConfig.validate({"file_format": "delta", "extra": "bad"}) \ No newline at end of file + HiveMetastoreDatabricksConfig.validate({"file_format": "delta", "extra": "bad"})