diff --git a/CHANGELOG.md b/CHANGELOG.md index 274a2d776..4a9b710c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Fixes - Fix foreign-key on an incremental table to a primary key on a non-incremental table being lost after incremental run +- Track tblproperties which are managed by dbt, rather than relying on ignore list ### Under the Hood diff --git a/dbt/adapters/databricks/relation_configs/tblproperties.py b/dbt/adapters/databricks/relation_configs/tblproperties.py index 995266c18..e8df9f305 100644 --- a/dbt/adapters/databricks/relation_configs/tblproperties.py +++ b/dbt/adapters/databricks/relation_configs/tblproperties.py @@ -19,47 +19,11 @@ class TblPropertiesConfig(DatabricksComponentConfig): tblproperties: dict[str, str] pipeline_id: Optional[str] = None - # List of tblproperties that should be ignored when comparing configs. These are generally - # set by Databricks and are not user-configurable. - ignore_list: ClassVar[list[str]] = [ - "pipelines.pipelineId", - "delta.enableChangeDataFeed", - "delta.minReaderVersion", - "delta.minWriterVersion", - "pipeline_internal.catalogType", - "pipelines.metastore.tableName", - "pipeline_internal.enzymeMode", - "clusterByAuto", - "clusteringColumns", - "delta.enableRowTracking", - "delta.feature.appendOnly", - "delta.feature.changeDataFeed", - "delta.feature.checkConstraints", - "delta.feature.domainMetadata", - "delta.feature.generatedColumns", - "delta.feature.invariants", - "delta.feature.rowTracking", - "delta.rowTracking.materializedRowCommitVersionColumnName", - "delta.rowTracking.materializedRowIdColumnName", - "spark.internal.pipelines.top_level_entry.user_specified_name", - "delta.columnMapping.maxColumnId", - "spark.sql.internal.pipelines.parentTableId", - "delta.enableDeletionVectors", - "delta.feature.deletionVectors", - ] - def __eq__(self, __value: Any) -> bool: - """Override equality check to ignore certain tblproperties.""" - if not isinstance(__value, TblPropertiesConfig): return False - def _without_ignore_list(d: dict[str, str]) -> dict[str, str]: - return {k: v for k, v in d.items() if k not in self.ignore_list} - - return _without_ignore_list(self.tblproperties) == _without_ignore_list( - __value.tblproperties - ) + return self.tblproperties == __value.tblproperties class TblPropertiesProcessor(DatabricksComponentProcessor[TblPropertiesConfig]): @@ -72,11 +36,19 @@ def from_relation_results(cls, results: RelationResults) -> TblPropertiesConfig: pipeline_id = None if table: - for row in table.rows: - if str(row[0]) == "pipelines.pipelineId": - pipeline_id = str(row[1]) - elif str(row[0]) not in TblPropertiesConfig.ignore_list: - tblproperties[str(row[0])] = str(row[1]) + # only load the properties that are managed by dbt + managed_keys = {"dbt.tblproperties.managedKeys"} + managed_keys_prop = next( + (v for k, v in table.rows if k == "dbt.tblproperties.managedKeys"), None + ) + if managed_keys_prop: + managed_keys.update(managed_keys_prop.split(",")) + + for k, v in table.rows: + if str(k) == "pipelines.pipelineId": + pipeline_id = str(v) + elif str(k) in managed_keys: + tblproperties[str(k)] = str(v) return TblPropertiesConfig(tblproperties=tblproperties, pipeline_id=pipeline_id) @@ -101,4 +73,10 @@ def from_relation_config(cls, relation_config: RelationConfig) -> TblPropertiesC ) tblproperties = {str(k): str(v) for k, v in tblproperties.items()} + + if tblproperties: + # track the keys that are managed by dbt + managed_keys = sorted([k for k in tblproperties.keys()]) + tblproperties["dbt.tblproperties.managedKeys"] = ",".join(managed_keys) + return TblPropertiesConfig(tblproperties=tblproperties) diff --git a/tests/functional/adapter/materialized_view_tests/test_changes.py b/tests/functional/adapter/materialized_view_tests/test_changes.py index 85f4b08cf..d21c535ef 100644 --- a/tests/functional/adapter/materialized_view_tests/test_changes.py +++ b/tests/functional/adapter/materialized_view_tests/test_changes.py @@ -19,7 +19,9 @@ def _check_tblproperties(tblproperties: TblPropertiesConfig, expected: dict): final_tblproperties = { - k: v for k, v in tblproperties.tblproperties.items() if k not in tblproperties.ignore_list + k: v + for k, v in tblproperties.tblproperties.items() + if k not in ["dbt.tblproperties.managedKeys"] } assert final_tblproperties == expected @@ -58,7 +60,7 @@ def check_state_alter_change_is_applied(project, materialized_view): def change_config_via_replace(project, materialized_view): initial_model = util.get_model_file(project, materialized_view) new_model = ( - initial_model.replace("partition_by='id',", "") + initial_model.replace("partition_by='id',", "partition_by='value',") .replace("select *", "select id, value") .replace("'key': 'value'", "'other': 'other'") ) @@ -69,7 +71,7 @@ def check_state_replace_change_is_applied(project, materialized_view): with util.get_connection(project.adapter): results = project.adapter.get_relation_config(materialized_view) assert isinstance(results, MaterializedViewConfig) - assert results.config["partition_by"].partition_by == [] + assert results.config["partition_by"].partition_by == ["value"] assert results.config["query"].query.startswith("select id, value") _check_tblproperties(results.config["tblproperties"], {"other": "other"}) diff --git a/tests/functional/adapter/streaming_tables/test_st_changes.py b/tests/functional/adapter/streaming_tables/test_st_changes.py index e8189a260..11040f041 100644 --- a/tests/functional/adapter/streaming_tables/test_st_changes.py +++ b/tests/functional/adapter/streaming_tables/test_st_changes.py @@ -18,7 +18,9 @@ def _check_tblproperties(tblproperties: TblPropertiesConfig, expected: dict): final_tblproperties = { - k: v for k, v in tblproperties.tblproperties.items() if k not in tblproperties.ignore_list + k: v + for k, v in tblproperties.tblproperties.items() + if k not in ["dbt.tblproperties.managedKeys"] } assert final_tblproperties == expected diff --git a/tests/functional/adapter/views/test_views.py b/tests/functional/adapter/views/test_views.py index 20baaed5f..2ae623ce4 100644 --- a/tests/functional/adapter/views/test_views.py +++ b/tests/functional/adapter/views/test_views.py @@ -71,7 +71,8 @@ def test_view_update_tblproperties(self, project): "show tblproperties {database}.{schema}.initial_view", fetch="all", ) - assert results[0][1] == "value2" + row = [r for r in results if r[0] == "key"][0] + assert row[1] == "value2" class BaseUpdateColumnComments(BaseUpdateView): diff --git a/tests/unit/relation_configs/test_incremental_config.py b/tests/unit/relation_configs/test_incremental_config.py index 5b99fec74..a609a6ac5 100644 --- a/tests/unit/relation_configs/test_incremental_config.py +++ b/tests/unit/relation_configs/test_incremental_config.py @@ -39,6 +39,7 @@ def test_from_results(self): ["clusterByAuto", "true"], ["clusteringColumns", '[["col1"],[""a""]]'], ["delta.constraints.check_name_length", "LENGTH (name) >= 1"], + ["dbt.tblproperties.managedKeys", "prop,delta.constraints.check_name_length"], ], column_names=["key", "value"], ), @@ -99,6 +100,7 @@ def test_from_results(self): tblproperties={ "prop": "f1", "delta.constraints.check_name_length": "LENGTH (name) >= 1", + "dbt.tblproperties.managedKeys": "prop,delta.constraints.check_name_length", } ), "liquid_clustering": LiquidClusteringConfig( diff --git a/tests/unit/relation_configs/test_materialized_view_config.py b/tests/unit/relation_configs/test_materialized_view_config.py index 14177a8e2..baee07191 100644 --- a/tests/unit/relation_configs/test_materialized_view_config.py +++ b/tests/unit/relation_configs/test_materialized_view_config.py @@ -37,7 +37,12 @@ def test_from_results(self): ["select * from foo", "other"], ["view_definition", "comment"] ), "show_tblproperties": Table( - rows=[["prop", "1"], ["other", "other"]], column_names=["key", "value"] + rows=[ + ["prop", "1"], + ["other", "other"], + ["dbt.tblproperties.managedKeys", "other,prop"], + ], + column_names=["key", "value"], ), "information_schema.tags": Table( rows=[["a", "b"], ["c", "d"]], column_names=["tag_name", "tag_value"] @@ -51,7 +56,13 @@ def test_from_results(self): "partition_by": PartitionedByConfig(partition_by=["col_a", "col_b"]), "liquid_clustering": LiquidClusteringConfig(), "comment": CommentConfig(comment="This is the table comment"), - "tblproperties": TblPropertiesConfig(tblproperties={"prop": "1", "other": "other"}), + "tblproperties": TblPropertiesConfig( + tblproperties={ + "prop": "1", + "other": "other", + "dbt.tblproperties.managedKeys": "other,prop", + } + ), "refresh": RefreshConfig(), "query": QueryConfig(query="select * from foo"), "tags": TagsConfig(set_tags={"a": "b", "c": "d"}), @@ -79,7 +90,13 @@ def test_from_model_node(self): "partition_by": PartitionedByConfig(partition_by=["col_a", "col_b"]), "liquid_clustering": LiquidClusteringConfig(), "comment": CommentConfig(comment="This is the table comment", persist=True), - "tblproperties": TblPropertiesConfig(tblproperties={"prop": "1", "other": "other"}), + "tblproperties": TblPropertiesConfig( + tblproperties={ + "prop": "1", + "other": "other", + "dbt.tblproperties.managedKeys": "other,prop", + } + ), "refresh": RefreshConfig(), "query": QueryConfig(query="select * from foo"), "tags": TagsConfig(set_tags={"a": "b", "c": "d"}), diff --git a/tests/unit/relation_configs/test_streaming_table_config.py b/tests/unit/relation_configs/test_streaming_table_config.py index 41f7b6290..5ecd62fed 100644 --- a/tests/unit/relation_configs/test_streaming_table_config.py +++ b/tests/unit/relation_configs/test_streaming_table_config.py @@ -30,7 +30,9 @@ def test_from_results(self): ["View Text", "select * from foo", None], ], ), - "show_tblproperties": fixtures.gen_tblproperties([["prop", "1"], ["other", "other"]]), + "show_tblproperties": fixtures.gen_tblproperties( + [["prop", "1"], ["other", "other"], ["dbt.tblproperties.managedKeys", "other,prop"]] + ), "information_schema.tags": Table( rows=[["a", "b"], ["c", "d"]], column_names=["tag_name", "tag_value"] ), @@ -43,7 +45,13 @@ def test_from_results(self): "partition_by": PartitionedByConfig(partition_by=["col_a", "col_b"]), "liquid_clustering": LiquidClusteringConfig(), "comment": CommentConfig(comment="This is the table comment"), - "tblproperties": TblPropertiesConfig(tblproperties={"prop": "1", "other": "other"}), + "tblproperties": TblPropertiesConfig( + tblproperties={ + "prop": "1", + "other": "other", + "dbt.tblproperties.managedKeys": "other,prop", + } + ), "refresh": RefreshConfig(), "tags": TagsConfig(set_tags={"a": "b", "c": "d"}), "query": QueryConfig(query="select * from foo"), @@ -71,7 +79,13 @@ def test_from_model_node(self): "partition_by": PartitionedByConfig(partition_by=["col_a", "col_b"]), "liquid_clustering": LiquidClusteringConfig(), "comment": CommentConfig(comment="This is the table comment", persist=False), - "tblproperties": TblPropertiesConfig(tblproperties={"prop": "1", "other": "other"}), + "tblproperties": TblPropertiesConfig( + tblproperties={ + "prop": "1", + "other": "other", + "dbt.tblproperties.managedKeys": "other,prop", + } + ), "refresh": RefreshConfig(), "tags": TagsConfig(set_tags={"a": "b", "c": "d"}), "query": QueryConfig(query="select * from foo"), diff --git a/tests/unit/relation_configs/test_tblproperties.py b/tests/unit/relation_configs/test_tblproperties.py index d8eeccc8d..53f38d7e4 100644 --- a/tests/unit/relation_configs/test_tblproperties.py +++ b/tests/unit/relation_configs/test_tblproperties.py @@ -19,16 +19,30 @@ def test_from_results__none(self): assert spec == TblPropertiesConfig(tblproperties={}) def test_from_results__single(self): - results = {"show_tblproperties": fixtures.gen_tblproperties([["prop", "f1"]])} + results = { + "show_tblproperties": fixtures.gen_tblproperties( + [["prop", "f1"], ["dbt.tblproperties.managedKeys", "prop"]] + ) + } spec = TblPropertiesProcessor.from_relation_results(results) - assert spec == TblPropertiesConfig(tblproperties={"prop": "f1"}) + assert spec == TblPropertiesConfig( + tblproperties={"prop": "f1", "dbt.tblproperties.managedKeys": "prop"} + ) def test_from_results__multiple(self): results = { - "show_tblproperties": fixtures.gen_tblproperties([["prop", "1"], ["other", "other"]]) + "show_tblproperties": fixtures.gen_tblproperties( + [["prop", "1"], ["other", "other"], ["dbt.tblproperties.managedKeys", "other,prop"]] + ) } spec = TblPropertiesProcessor.from_relation_results(results) - assert spec == TblPropertiesConfig(tblproperties={"prop": "1", "other": "other"}) + assert spec == TblPropertiesConfig( + tblproperties={ + "prop": "1", + "other": "other", + "dbt.tblproperties.managedKeys": "other,prop", + } + ) def test_from_model_node__without_tblproperties(self): model = Mock() @@ -42,7 +56,9 @@ def test_from_model_node__with_tblproperties(self): "tblproperties": {"prop": 1}, } spec = TblPropertiesProcessor.from_relation_config(model) - assert spec == TblPropertiesConfig(tblproperties={"prop": "1"}) + assert spec == TblPropertiesConfig( + tblproperties={"prop": "1", "dbt.tblproperties.managedKeys": "prop"} + ) def test_from_model_node__with_empty_tblproperties(self): model = Mock() @@ -73,6 +89,13 @@ def test_from_model_node__with_uniform_iceberg_adds_properties(self): "custom_prop": "value", "delta.enableIcebergCompatV2": "true", "delta.universalFormat.enabledFormats": constants.ICEBERG_TABLE_FORMAT, + "dbt.tblproperties.managedKeys": ",".join( + [ + "custom_prop", + "delta.enableIcebergCompatV2", + "delta.universalFormat.enabledFormats", + ] + ), } ) @@ -85,7 +108,9 @@ def test_from_model_node__with_managed_iceberg_no_uniform_properties(self): } spec = TblPropertiesProcessor.from_relation_config(model) # Should only have the custom property, NOT the UniForm properties - assert spec == TblPropertiesConfig(tblproperties={"custom_prop": "value"}) + assert spec == TblPropertiesConfig( + tblproperties={"custom_prop": "value", "dbt.tblproperties.managedKeys": "custom_prop"} + ) def test_from_model_node__with_iceberg_no_flag_no_properties(self): GlobalState.set_use_managed_iceberg(None)