diff --git a/CHANGELOG.md b/CHANGELOG.md index 11f4aa35c..e28f3a384 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ - Warn when `contract.enforced: true` is set on a `materialized_view` model ([#1279](https://github.com/databricks/dbt-databricks/issues/1279)) - Fix `materialized_view` models with `databricks_tags` silently going stale on `dbt run`. `MaterializedViewAPI._describe_relation` was not fetching `information_schema.tags`, so existing tags always parsed as empty, producing a spurious tag diff that routed the materialization to `ALTER ... SET TAGS` instead of `REFRESH MATERIALIZED VIEW` ([#1419](https://github.com/databricks/dbt-databricks/issues/1419)) - Fix `dbt docs generate` failing with `RuntimeError: Tables contain columns with the same names ... but different types` during catalog merge across schemas ([#1392](https://github.com/databricks/dbt-databricks/issues/1392)) +- Fix column comments being permanently dropped from views when `view_update_via_alter` issues `ALTER VIEW AS`; reapply persisted column comments after the query update ([#1357](https://github.com/databricks/dbt-databricks/issues/1357)) + +### Under the Hood + +- Views with enabled config `view_update_via_alter` now update the view query independent of diffs in the query definition. This ensures that changes in underlying tables are also reflected when using star select semantics. ([#1356](https://github.com/databricks/dbt-databricks/issues/1356)) ## dbt-databricks 1.11.7 (Apr 17, 2026) diff --git a/dbt/adapters/databricks/relation_configs/query.py b/dbt/adapters/databricks/relation_configs/query.py index 12d1e3ce1..dc13d8ef2 100644 --- a/dbt/adapters/databricks/relation_configs/query.py +++ b/dbt/adapters/databricks/relation_configs/query.py @@ -22,6 +22,13 @@ def get_diff(self, other: "QueryConfig") -> Optional["QueryConfig"]: return None +class ViewQueryConfig(QueryConfig): + def get_diff(self, other: "ViewQueryConfig") -> "ViewQueryConfig": + # Return self so the view query always appears changed in the diff. + # View query diffing is unreliable (star selects, upstream schema changes etc.) + return self + + class QueryProcessor(DatabricksComponentProcessor[QueryConfig]): name: ClassVar[str] = "query" @@ -50,3 +57,15 @@ def from_relation_results(cls, result: RelationResults) -> QueryConfig: table = result["describe_extended"] row = next(x for x in table if x[0] == "View Text") return QueryConfig(query=SqlUtils.clean_sql(row[1])) + + +class ViewQueryProcessor(QueryProcessor): + @classmethod + def from_relation_results(cls, result: RelationResults) -> ViewQueryConfig: + base = super().from_relation_results(result) + return ViewQueryConfig(query=base.query) + + @classmethod + def from_relation_config(cls, relation_config: RelationConfig) -> ViewQueryConfig: + base = super().from_relation_config(relation_config) + return ViewQueryConfig(query=base.query) diff --git a/dbt/adapters/databricks/relation_configs/view.py b/dbt/adapters/databricks/relation_configs/view.py index 6fa555760..89c1e7d42 100644 --- a/dbt/adapters/databricks/relation_configs/view.py +++ b/dbt/adapters/databricks/relation_configs/view.py @@ -1,5 +1,3 @@ -from typing import Optional - from typing_extensions import Self from dbt.adapters.databricks.logging import logger @@ -7,10 +5,9 @@ DatabricksRelationChangeSet, DatabricksRelationConfigBase, ) -from dbt.adapters.databricks.relation_configs.column_comments import ColumnCommentsProcessor from dbt.adapters.databricks.relation_configs.column_tags import ColumnTagsProcessor from dbt.adapters.databricks.relation_configs.comment import CommentProcessor -from dbt.adapters.databricks.relation_configs.query import QueryProcessor +from dbt.adapters.databricks.relation_configs.query import ViewQueryProcessor from dbt.adapters.databricks.relation_configs.tags import TagsProcessor from dbt.adapters.databricks.relation_configs.tblproperties import TblPropertiesProcessor @@ -19,15 +16,17 @@ class ViewConfig(DatabricksRelationConfigBase): config_components = [ TagsProcessor, TblPropertiesProcessor, - QueryProcessor, + ViewQueryProcessor, CommentProcessor, - ColumnCommentsProcessor, ColumnTagsProcessor, ] - def get_changeset(self, existing: Self) -> Optional[DatabricksRelationChangeSet]: + def get_changeset(self, existing: Self) -> DatabricksRelationChangeSet: changeset = super().get_changeset(existing) - if changeset and "comment" in changeset.changes: + if changeset is None: + # ViewQueryProcessor always returns a diff, so this should be unreachable + raise RuntimeError("Expected a non-empty changeset for a view relation") + if "comment" in changeset.changes: logger.debug( "View description changed, requiring replace, as there is" " no API yet to update comments." diff --git a/dbt/include/databricks/macros/materializations/view.sql b/dbt/include/databricks/macros/materializations/view.sql index a2b443d94..1f0d169f4 100644 --- a/dbt/include/databricks/macros/materializations/view.sql +++ b/dbt/include/databricks/macros/materializations/view.sql @@ -11,17 +11,13 @@ {% if existing_relation %} {% if relation_should_be_altered(existing_relation) %} {% set configuration_changes = get_configuration_changes(existing_relation) %} - {% if configuration_changes and configuration_changes.changes %} - {% if configuration_changes.requires_full_refresh %} - {{ log('Using replace_with_view') }} - {{ replace_with_view(existing_relation, target_relation) }} - {% else %} - {{ log('Using alter_view') }} - {{ log(configuration_changes.changes) }} - {{ alter_view(target_relation, configuration_changes.changes) }} - {% endif %} + {% if configuration_changes.requires_full_refresh %} + {{ log('Using replace_with_view') }} + {{ replace_with_view(existing_relation, target_relation) }} {% else %} - {{ execute_no_op(target_relation) }} + {{ log('Using alter_view') }} + {{ log(configuration_changes.changes) }} + {{ alter_view(target_relation, configuration_changes.changes) }} {% endif %} {% else %} {{ replace_with_view(existing_relation, target_relation) }} diff --git a/dbt/include/databricks/macros/relations/view/alter.sql b/dbt/include/databricks/macros/relations/view/alter.sql index 43c03acba..7048313ae 100644 --- a/dbt/include/databricks/macros/relations/view/alter.sql +++ b/dbt/include/databricks/macros/relations/view/alter.sql @@ -16,8 +16,11 @@ {% endif %} {% if query %} {{ alter_query(target_relation, query.query) }} - {% endif %} - {% if column_comments %} - {{ alter_column_comments(target_relation, column_comments.comments) }} + {% if config.persist_column_docs() and model.columns %} + {#-- ALTER VIEW AS wipes all column comments, so reapply them here. --#} + {%- set existing_columns = adapter.get_columns_in_relation(target_relation) -%} + {%- set columns_to_persist = adapter.get_persist_doc_columns(existing_columns, model.columns) -%} + {{ alter_column_comment(target_relation, columns_to_persist) }} + {% endif %} {% endif %} {% endmacro %} diff --git a/tests/functional/adapter/views/fixtures.py b/tests/functional/adapter/views/fixtures.py index d9c86dc38..89f3aafec 100644 --- a/tests/functional/adapter/views/fixtures.py +++ b/tests/functional/adapter/views/fixtures.py @@ -58,3 +58,10 @@ {{ config(materialized='view') }} select id from {{ ref('seed') }}; """ + +seed_with_extra_csv = """id,msg,extra +1,hello,a +2,goodbye,b +2,yo,c +3,anyway,d +""" diff --git a/tests/functional/adapter/views/test_views.py b/tests/functional/adapter/views/test_views.py index 20baaed5f..20674c742 100644 --- a/tests/functional/adapter/views/test_views.py +++ b/tests/functional/adapter/views/test_views.py @@ -89,6 +89,39 @@ def test_view_update_with_column_comments(self, project): assert results[0][2] == "This is an id column" +class BaseUpdateQueryPreservesColumnComments(BaseUpdateView): + """Regression for #1357: ALTER VIEW AS wipes column comments; they must be reapplied.""" + + def test_view_update_query_preserves_column_comments(self, project): + util.run_dbt(["build"]) + util.write_file(fixtures.altered_view_sql, "models", "initial_view.sql") + util.run_dbt(["run"]) + + results = project.run_sql( + "describe extended {database}.{schema}.initial_view", + fetch="all", + ) + assert results[0][2] == "This is the id column" + + +class BaseUpdateUpstreamSchema(BaseUpdateView): + """Test that a star-select view is re-applied when the upstream table gains a new column.""" + + def test_view_update_with_upstream_schema_change(self, project): + util.run_dbt(["build"]) + util.write_file(fixtures.seed_with_extra_csv, "seeds", "seed.csv") + util.run_dbt(["seed", "--full-refresh"]) + util.run_dbt(["run"]) + + results = project.run_sql( + "describe {database}.{schema}.initial_view", + fetch="all", + ) + + # check that `extra` column was added, even though the view's sql definition hasn't changed + assert any([col.col_name == "extra" for col in results]) + + class BaseRemoveTags(BaseUpdateView): def test_view_update_remove_tags(self, project): util.run_dbt(["build"]) @@ -191,6 +224,38 @@ def project_config_update(self): } +@pytest.mark.skip_profile("databricks_cluster") +class TestUpdateViewViaAlterQueryPreservesColumnComments(BaseUpdateQueryPreservesColumnComments): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": {"use_materialization_v2": True}, + "models": { + "+view_update_via_alter": True, + "+persist_docs": { + "relation": True, + "columns": True, + }, + }, + } + + +@pytest.mark.skip_profile("databricks_cluster") +class TestUpdateViewViaAlterUpstreamSchema(BaseUpdateUpstreamSchema): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": {"use_materialization_v2": True}, + "models": { + "+view_update_via_alter": True, + "+persist_docs": { + "relation": True, + "columns": True, + }, + }, + } + + @pytest.mark.skip_profile("databricks_cluster") class TestUpdateViewViaAlterRemoveTags(BaseRemoveTags): @pytest.fixture(scope="class") diff --git a/tests/unit/macros/relations/test_view_macros.py b/tests/unit/macros/relations/test_view_macros.py index c2c3fd9ae..6f37905e0 100644 --- a/tests/unit/macros/relations/test_view_macros.py +++ b/tests/unit/macros/relations/test_view_macros.py @@ -51,6 +51,7 @@ def mocks(self, context): context["apply_tags"] = Mock() context["apply_tblproperties"] = Mock() context["alter_query"] = Mock() + context["alter_column_comment"] = Mock() def render_alter_view(self, template_bundle, changes): return self.run_macro( @@ -83,3 +84,12 @@ def test_macros__alter_view_with_query(self, context, template_bundle): context["apply_tags"].assert_not_called() context["apply_tblproperties"].assert_not_called() context["alter_query"].assert_called_once() + + def test_macros__alter_view_with_query_reapplies_column_comments( + self, context, template_bundle + ): + context["config"].persist_column_docs = Mock(return_value=True) + context["model"].columns = {"id": Mock()} + self.render_alter_view(template_bundle, {"query": Mock()}) + context["alter_query"].assert_called_once() + context["alter_column_comment"].assert_called_once() diff --git a/tests/unit/relation_configs/test_query.py b/tests/unit/relation_configs/test_query.py index dd58e2c5d..cd4e7baf9 100644 --- a/tests/unit/relation_configs/test_query.py +++ b/tests/unit/relation_configs/test_query.py @@ -4,7 +4,12 @@ from agate import Row from dbt.exceptions import DbtRuntimeError -from dbt.adapters.databricks.relation_configs.query import QueryConfig, QueryProcessor +from dbt.adapters.databricks.relation_configs.query import ( + QueryConfig, + QueryProcessor, + ViewQueryConfig, + ViewQueryProcessor, +) sql = "select * from foo" @@ -50,3 +55,46 @@ def test_get_diff__different_query(self): } other = QueryProcessor.from_relation_results(results) assert model.get_diff(other) is model + + +class TestViewQueryProcessor: + def test_from_results(self): + results = {"information_schema.views": Row([sql, "other"], ["view_definition", "comment"])} + spec = ViewQueryProcessor.from_relation_results(results) + assert spec == ViewQueryConfig(query=sql) + + def test_from_model_node__with_query(self): + model = Mock() + model.compiled_code = sql + spec = ViewQueryProcessor.from_relation_config(model) + assert spec == ViewQueryConfig(query=sql) + + def test_from_model_node__without_query(self): + model = Mock() + model.compiled_code = None + model.identifier = "1" + with pytest.raises( + DbtRuntimeError, + match="Cannot compile model 1 with no SQL query", + ): + _ = ViewQueryProcessor.from_relation_config(model) + + def test_get_diff__always_returns_self_for_identical_query(self): + model = ViewQueryConfig(query="select * from foo") + results = { + "information_schema.views": Row( + ["(\nselect * from foo\n)", "other"], ["view_definition", "comment"] + ) + } + other = ViewQueryProcessor.from_relation_results(results) + assert model.get_diff(other) is model + + def test_get_diff__always_returns_self_for_different_query(self): + model = ViewQueryConfig(query="select * from foo") + results = { + "information_schema.views": Row( + ["(\nselect * from bar\n)", "other"], ["view_definition", "comment"] + ) + } + other = ViewQueryProcessor.from_relation_results(results) + assert model.get_diff(other) is model