-
Notifications
You must be signed in to change notification settings - Fork 197
perf: skip unnecessary metadata fetch calls for tags when not configured #1387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
17754e0
9eca1c4
815ff55
7839417
91e0dea
a84e0f0
51a495e
0ed403b
f4399cd
e6adab7
ba26eb1
f985dc8
616239f
12e008a
adee055
c727e76
8b57e75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -80,6 +80,9 @@ | |
| StreamingTableConfig, | ||
| ) | ||
| from dbt.adapters.databricks.relation_configs.table_format import TableFormat | ||
| from dbt.adapters.databricks.relation_configs.tags import ( | ||
| TagsProcessor, | ||
| ) | ||
| from dbt.adapters.databricks.relation_configs.tblproperties import TblPropertiesConfig | ||
| from dbt.adapters.databricks.relation_configs.view import ViewConfig | ||
| from dbt.adapters.databricks.utils import ( | ||
|
|
@@ -964,15 +967,19 @@ def parse_columns_and_constraints( | |
| return enriched_columns, parsed_constraints | ||
|
|
||
| @available.parse(lambda *a, **k: {}) | ||
| def get_relation_config(self, relation: DatabricksRelation) -> DatabricksRelationConfigBase: | ||
| def get_relation_config( | ||
| self, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> DatabricksRelationConfigBase: | ||
| if relation.type == DatabricksRelationType.MaterializedView: | ||
| return MaterializedViewAPI.get_from_relation(self, relation) | ||
| return MaterializedViewAPI.get_from_relation(self, relation, model_config) | ||
| elif relation.type == DatabricksRelationType.StreamingTable: | ||
| return StreamingTableAPI.get_from_relation(self, relation) | ||
| return StreamingTableAPI.get_from_relation(self, relation, model_config) | ||
| elif relation.type == DatabricksRelationType.Table: | ||
| return IncrementalTableAPI.get_from_relation(self, relation) | ||
| return IncrementalTableAPI.get_from_relation(self, relation, model_config) | ||
| elif relation.type == DatabricksRelationType.View: | ||
| return ViewAPI.get_from_relation(self, relation) | ||
| return ViewAPI.get_from_relation(self, relation, model_config) | ||
| else: | ||
| raise NotImplementedError(f"Relation type {relation.type} is not supported.") | ||
|
|
||
|
|
@@ -1053,12 +1060,15 @@ def config_type(cls) -> type[DatabricksRelationConfig]: | |
|
|
||
| @classmethod | ||
| def get_from_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> DatabricksRelationConfig: | ||
| """Get the relation config from the relation.""" | ||
|
|
||
| assert relation.type == cls.relation_type | ||
| results = cls._describe_relation(adapter, relation) | ||
| results = cls._describe_relation(adapter, relation, model_config) | ||
| return cls.config_type().from_results(results) | ||
|
|
||
| @classmethod | ||
|
|
@@ -1070,7 +1080,10 @@ def get_from_relation_config(cls, relation_config: RelationConfig) -> Databricks | |
| @classmethod | ||
| @abstractmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| """Describe the relation and return the results.""" | ||
|
|
||
|
|
@@ -1080,11 +1093,14 @@ def _describe_relation( | |
| class DeltaLiveTableAPIBase(RelationAPIBase[DatabricksRelationConfig]): | ||
| @classmethod | ||
| def get_from_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> DatabricksRelationConfig: | ||
| """Get the relation config from the relation.""" | ||
|
|
||
| relation_config = super().get_from_relation(adapter, relation) | ||
|
sd-db marked this conversation as resolved.
|
||
| relation_config = super().get_from_relation(adapter, relation, model_config) | ||
|
|
||
| # Ensure any current refreshes are completed before returning the relation config | ||
| tblproperties = cast(TblPropertiesConfig, relation_config.config["tblproperties"]) | ||
|
|
@@ -1104,7 +1120,10 @@ def config_type(cls) -> type[MaterializedViewConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| kwargs = {"table_name": relation} | ||
| results: RelationResults = dict() | ||
|
|
@@ -1116,8 +1135,15 @@ def _describe_relation( | |
| results["information_schema.views"] = get_first_row( | ||
| adapter.execute_macro("get_view_description", kwargs=kwargs) | ||
| ) | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs) | ||
|
|
||
| # To be backward compatible model_config can be None. In that case, tags should be fetched | ||
| # to maintain backward compatibility. | ||
| table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename to |
||
| if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff(): | ||
|
sd-db marked this conversation as resolved.
|
||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| else: | ||
| results["information_schema.tags"] = None | ||
| return results | ||
|
|
||
|
|
||
|
|
@@ -1130,7 +1156,10 @@ def config_type(cls) -> type[StreamingTableConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| kwargs = {"table_name": relation} | ||
| results: RelationResults = dict() | ||
|
|
@@ -1153,16 +1182,37 @@ def config_type(cls) -> type[IncrementalTableConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| results = {} | ||
| kwargs = {"relation": relation} | ||
|
|
||
| if not relation.is_hive_metastore(): | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| results["information_schema.column_tags"] = adapter.execute_macro( | ||
| "fetch_column_tags", kwargs=kwargs | ||
| # To be backward compatible model_config can be None. In that case, tags should be fetched | ||
| # to maintain backward compatibility. | ||
| table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None | ||
|
sd-db marked this conversation as resolved.
|
||
| if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff(): | ||
|
sd-db marked this conversation as resolved.
|
||
| results["information_schema.tags"] = adapter.execute_macro( | ||
| "fetch_tags", kwargs=kwargs | ||
| ) | ||
| else: | ||
| results["information_schema.tags"] = None | ||
|
|
||
| # To be backward compatible model_config can be None. In that case, tags should be fetched | ||
| # to maintain backward compatibility. | ||
| column_tag_config = ( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comments on naming + checks on
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suffix
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Variable naming should still be intentional and descriptive, we should update it to |
||
| model_config.config.get(ColumnTagsProcessor.name) if model_config else None | ||
| ) | ||
| if column_tag_config is None or column_tag_config.requires_server_metadata_for_diff(): | ||
| results["information_schema.column_tags"] = adapter.execute_macro( | ||
| "fetch_column_tags", kwargs=kwargs | ||
| ) | ||
| else: | ||
| results["information_schema.column_tags"] = None | ||
|
|
||
| results["non_null_constraint_columns"] = adapter.execute_macro( | ||
| "fetch_non_null_constraint_columns", kwargs=kwargs | ||
| ) | ||
|
|
@@ -1191,15 +1241,26 @@ def config_type(cls) -> type[ViewConfig]: | |
|
|
||
| @classmethod | ||
| def _describe_relation( | ||
| cls, adapter: DatabricksAdapter, relation: DatabricksRelation | ||
| cls, | ||
|
sd-db marked this conversation as resolved.
|
||
| adapter: DatabricksAdapter, | ||
| relation: DatabricksRelation, | ||
| model_config: Optional[DatabricksRelationConfigBase] = None, | ||
| ) -> RelationResults: | ||
| results = {} | ||
| kwargs = {"relation": relation} | ||
|
|
||
| results["information_schema.views"] = get_first_row( | ||
| adapter.execute_macro("get_view_description", kwargs=kwargs) | ||
| ) | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
|
|
||
| # To be backward compatible model_config can be None. In that case, tags should be fetched | ||
| # to maintain backward compatibility. | ||
| table_tag_config = model_config.config.get(TagsProcessor.name) if model_config else None | ||
| if table_tag_config is None or table_tag_config.requires_server_metadata_for_diff(): | ||
| results["information_schema.tags"] = adapter.execute_macro("fetch_tags", kwargs=kwargs) | ||
| else: | ||
| results["information_schema.tags"] = None | ||
|
|
||
| results["show_tblproperties"] = adapter.execute_macro("fetch_tbl_properties", kwargs=kwargs) | ||
|
|
||
| kwargs = {"table_name": relation} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| {%- macro get_configuration_changes(existing_relation) -%} | ||
| {%- set existing_config = adapter.get_relation_config(existing_relation) -%} | ||
| {%- set model_config = adapter.get_config_from_model(config.model) -%} | ||
| {%- set existing_config = adapter.get_relation_config(existing_relation, model_config) -%} | ||
| {%- set configuration_changes = model_config.get_changeset(existing_config) -%} | ||
| {% do return(configuration_changes) %} | ||
| {%- endmacro -%} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| from dbt.adapters.base.relation import BaseRelation | ||
| from dbt.tests.util import get_manifest | ||
|
|
||
| FAIL_IF_TAG_FETCH_CALLED_MACROS = """ | ||
| {% macro fetch_tags(relation) %} | ||
| {{ exceptions.raise_compiler_error("fetch_tags should not be called") }} | ||
| {% endmacro %} | ||
| """ | ||
|
|
||
| FAIL_IF_TAG_AND_COLUMN_TAG_FETCH_CALLED_MACROS = """ | ||
| {% macro fetch_tags(relation) %} | ||
| {{ exceptions.raise_compiler_error("fetch_tags should not be called") }} | ||
| {% endmacro %} | ||
|
|
||
| {% macro fetch_column_tags(relation) %} | ||
| {{ exceptions.raise_compiler_error("fetch_column_tags should not be called") }} | ||
| {% endmacro %} | ||
| """ | ||
|
|
||
|
|
||
| def get_model_config(project, relation: BaseRelation): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this ? Since
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently this is adding extra complexity while mostly being no-op |
||
| """Return the parsed dbt model config for the given relation fixture.""" | ||
| manifest = get_manifest(project.project_root) | ||
| model_nodes = [ | ||
| node | ||
| for node in manifest.nodes.values() | ||
| if getattr(node, "resource_type", None) == "model" | ||
| and getattr(node, "alias", None) == relation.identifier | ||
| ] | ||
| assert len(model_nodes) == 1, ( | ||
| f"Expected exactly one model node for relation {relation.identifier}, " | ||
| f"found {len(model_nodes)}" | ||
| ) | ||
| model_node = model_nodes[0] | ||
|
|
||
| # The partial-parse manifest only stores `raw_code`; `compiled_code` is set | ||
| # later during compile/run. `get_config_from_model` requires it for MV/ST, | ||
| # so backfill from `raw_code` (only used for equality checks here). If a | ||
| # caller needs the actually-compiled SQL, run `run_dbt(["compile"])` first | ||
| # and read `target/manifest.json` instead. | ||
| if model_node.compiled_code is None: | ||
| model_node.compiled_code = model_node.raw_code | ||
|
|
||
| return project.adapter.get_config_from_model(model_node) | ||
Uh oh!
There was an error while loading. Please reload this page.