Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 71522ee

Browse files
working code to allow --select
1 parent 7a5769d commit 71522ee

File tree

5 files changed

+123
-63
lines changed

5 files changed

+123
-63
lines changed

data_diff/__main__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,12 @@ def write_usage(self, prog: str, args: str = "", prefix: Optional[str] = None) -
228228
metavar="PATH",
229229
help="Which directory to look in for the dbt_project.yml file. Default is the current working directory and its parents.",
230230
)
231+
@click.option(
232+
"--select",
233+
default=None,
234+
metavar="PATH",
235+
help="select dbt resources to compar using dbt selection syntax",
236+
)
231237
def main(conf, run, **kw):
232238
if kw["table2"] is None and kw["database2"]:
233239
# Use the "database table table" form
@@ -264,6 +270,7 @@ def main(conf, run, **kw):
264270
profiles_dir_override=kw["dbt_profiles_dir"],
265271
project_dir_override=kw["dbt_project_dir"],
266272
is_cloud=kw["cloud"],
273+
dbt_selection=kw["select"],
267274
)
268275
else:
269276
return _data_diff(**kw)

data_diff/dbt.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
logger = getLogger(__name__)
1919

2020

21-
def import_dbt():
21+
def import_dbt_parsers():
2222
try:
2323
from dbt_artifacts_parser.parser import parse_run_results, parse_manifest
2424
from dbt.config.renderer import ProfileRenderer
@@ -47,7 +47,7 @@ def import_dbt():
4747
PROJECT_FILE = "dbt_project.yml"
4848
PROFILES_FILE = "profiles.yml"
4949
LOWER_DBT_V = "1.0.0"
50-
UPPER_DBT_V = "1.4.6"
50+
UPPER_DBT_V = "1.6.0"
5151

5252

5353
# https://github.com/dbt-labs/dbt-core/blob/c952d44ec5c2506995fbad75320acbae49125d3d/core/dbt/cli/resolvers.py#L6
@@ -76,12 +76,12 @@ class DiffVars:
7676

7777

7878
def dbt_diff(
79-
profiles_dir_override: Optional[str] = None, project_dir_override: Optional[str] = None, is_cloud: bool = False
79+
profiles_dir_override: Optional[str] = None, project_dir_override: Optional[str] = None, is_cloud: bool = False, dbt_selection: Optional[str] = None
8080
) -> None:
8181
diff_threads = []
8282
set_entrypoint_name("CLI-dbt")
8383
dbt_parser = DbtParser(profiles_dir_override, project_dir_override)
84-
models = dbt_parser.get_models()
84+
models = dbt_parser.get_models(dbt_selection)
8585
datadiff_variables = dbt_parser.get_datadiff_variables()
8686
config_prod_database = datadiff_variables.get("prod_database")
8787
config_prod_schema = datadiff_variables.get("prod_schema")

data_diff/dbt_parser.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from collections import defaultdict
22
import json
3+
import os
34
from pathlib import Path
4-
from typing import List, Dict, Tuple, Set
5+
from typing import List, Dict, Tuple, Set, Optional
56

67
from packaging.version import parse as parse_version
78

@@ -12,7 +13,7 @@
1213
logger = getLogger(__name__)
1314

1415

15-
def import_dbt():
16+
def import_dbt_parsers():
1617
try:
1718
from dbt_artifacts_parser.parser import parse_run_results, parse_manifest
1819
from dbt.config.renderer import ProfileRenderer
@@ -23,12 +24,23 @@ def import_dbt():
2324
return parse_run_results, parse_manifest, ProfileRenderer, yaml
2425

2526

27+
# dbt 1.5+ specific stuff to power selection of models
28+
try:
29+
from dbt.cli.main import dbtRunner
30+
except ImportError:
31+
dbtRunner = None
32+
33+
if dbtRunner is not None:
34+
dbt_runner = dbtRunner()
35+
else:
36+
dbt_runner = None
37+
2638
RUN_RESULTS_PATH = "target/run_results.json"
2739
MANIFEST_PATH = "target/manifest.json"
2840
PROJECT_FILE = "dbt_project.yml"
2941
PROFILES_FILE = "profiles.yml"
3042
LOWER_DBT_V = "1.0.0"
31-
UPPER_DBT_V = "1.4.6"
43+
UPPER_DBT_V = "1.6.0"
3244

3345

3446
# https://github.com/dbt-labs/dbt-core/blob/c952d44ec5c2506995fbad75320acbae49125d3d/core/dbt/cli/resolvers.py#L6
@@ -49,7 +61,7 @@ def legacy_profiles_dir() -> Path:
4961

5062
class DbtParser:
5163
def __init__(self, profiles_dir_override: str, project_dir_override: str) -> None:
52-
self.parse_run_results, self.parse_manifest, self.ProfileRenderer, self.yaml = import_dbt()
64+
self.parse_run_results, self.parse_manifest, self.ProfileRenderer, self.yaml = import_dbt_parsers()
5365
self.profiles_dir = Path(profiles_dir_override or default_profiles_dir())
5466
self.project_dir = Path(project_dir_override or default_project_dir())
5567
self.connection = None
@@ -66,7 +78,44 @@ def get_datadiff_variables(self) -> dict:
6678
vars = get_from_dict_with_raise(self.project_dict, "vars", f"No vars: found in dbt_project.yml.")
6779
return get_from_dict_with_raise(vars, "data_diff", f"data_diff: section not found in dbt_project.yml vars:.")
6880

69-
def get_models(self):
81+
def get_models(self, dbt_selection: Optional[str] = None):
82+
dbt_version = parse_version(self.dbt_version)
83+
if dbt_selection:
84+
if dbt_version.major == 1 and dbt_version.minor >= 5:
85+
return self.get_dbt_selection_models(dbt_selection)
86+
else:
87+
raise Exception(
88+
f"Use of the `--select` feature requires dbt >= 1.5. Found dbt: v{dbt_version}"
89+
)
90+
else:
91+
return self.get_run_results_models()
92+
93+
def get_dbt_selection_models(self, dbt_selection: str) -> List[str]:
94+
start_dir = os.getcwd()
95+
os.chdir(self.project_dir)
96+
# log level and format settings needed to prevent dbt from printing to stdout
97+
# ls command is used to get the list of model unique_ids
98+
results = dbt_runner.invoke([
99+
"--log-format",
100+
"json",
101+
"--log-level",
102+
"none",
103+
"ls",
104+
"--select",
105+
dbt_selection,
106+
"--resource-type",
107+
"model",
108+
"--output",
109+
"json",
110+
'--output-keys',
111+
"unique_id"])
112+
os.chdir(start_dir)
113+
if results.success:
114+
model_list = [json.loads(model)["unique_id"] for model in results.result]
115+
models = [self.manifest_obj.nodes.get(x) for x in model_list]
116+
return models
117+
118+
def get_run_results_models(self):
70119
with open(self.project_dir / RUN_RESULTS_PATH) as run_results:
71120
run_results_dict = json.load(run_results)
72121
run_results_obj = self.parse_run_results(run_results=run_results_dict)

poetry.lock

Lines changed: 56 additions & 52 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)