1313logger = getLogger (__name__ )
1414
1515
16- def import_dbt_parsers ():
16+ def import_dbt_dependencies ():
1717 try :
1818 from dbt_artifacts_parser .parser import parse_run_results , parse_manifest
1919 from dbt .config .renderer import ProfileRenderer
2020 import yaml
2121 except ImportError :
2222 raise RuntimeError ("Could not import 'dbt' package. You can install it using: pip install 'data-diff[dbt]'." )
2323
24- return parse_run_results , parse_manifest , ProfileRenderer , yaml
24+ # dbt 1.5+ specific stuff to power selection of models
25+ try :
26+ from dbt .cli .main import dbtRunner
27+ except ImportError :
28+ dbtRunner = None
2529
30+ if dbtRunner is not None :
31+ dbt_runner = dbtRunner ()
32+ else :
33+ dbt_runner = None
2634
27- # dbt 1.5+ specific stuff to power selection of models
28- try :
29- from dbt .cli .main import dbtRunner
30- except ImportError :
31- dbtRunner = None
35+ return parse_run_results , parse_manifest , ProfileRenderer , yaml , dbt_runner
3236
33- if dbtRunner is not None :
34- dbt_runner = dbtRunner ()
35- else :
36- dbt_runner = None
3737
3838RUN_RESULTS_PATH = "target/run_results.json"
3939MANIFEST_PATH = "target/manifest.json"
@@ -61,7 +61,13 @@ def legacy_profiles_dir() -> Path:
6161
6262class DbtParser :
6363 def __init__ (self , profiles_dir_override : str , project_dir_override : str ) -> None :
64- self .parse_run_results , self .parse_manifest , self .ProfileRenderer , self .yaml = import_dbt_parsers ()
64+ (
65+ self .parse_run_results ,
66+ self .parse_manifest ,
67+ self .ProfileRenderer ,
68+ self .yaml ,
69+ self .dbt_runner ,
70+ ) = import_dbt_dependencies ()
6571 self .profiles_dir = Path (profiles_dir_override or default_profiles_dir ())
6672 self .project_dir = Path (project_dir_override or default_project_dir ())
6773 self .connection = None
@@ -84,38 +90,50 @@ def get_models(self, dbt_selection: Optional[str] = None):
8490 dbt_version = parse_version (self .dbt_version )
8591 if dbt_selection :
8692 if dbt_version .major == 1 and dbt_version .minor >= 5 :
87- return self .get_dbt_selection_models (dbt_selection )
93+ if self .dbt_runner :
94+ return self .get_dbt_selection_models (dbt_selection )
95+ # edge case if running data-diff from a separate env than dbt (likely local development)
96+ else :
97+ raise Exception (
98+ "data-diff is using a dbt-core version < 1.5, update the environment's dbt-core version via pip install 'dbt-core>=1.5' in order to use `--select`"
99+ )
88100 else :
89101 raise Exception (
90- f"Use of the `--select` feature requires dbt >= 1.5. Found dbt: v{ dbt_version } "
102+ f"Use of the `--select` feature requires dbt >= 1.5. Found dbt manifest : v{ dbt_version } "
91103 )
92104 else :
93105 return self .get_run_results_models ()
94106
95107 def get_dbt_selection_models (self , dbt_selection : str ) -> List [str ]:
96- start_dir = os .getcwd ()
97- os .chdir (self .project_dir )
98108 # log level and format settings needed to prevent dbt from printing to stdout
99109 # ls command is used to get the list of model unique_ids
100- results = dbt_runner .invoke ([
101- "--log-format" ,
102- "json" ,
103- "--log-level" ,
104- "none" ,
105- "ls" ,
106- "--select" ,
107- dbt_selection ,
108- "--resource-type" ,
109- "model" ,
110- "--output" ,
111- "json" ,
112- '--output-keys' ,
113- "unique_id" ])
114- os .chdir (start_dir )
115- if results .success :
110+ results = self .dbt_runner .invoke (
111+ [
112+ "--log-format" ,
113+ "json" ,
114+ "--log-level" ,
115+ "none" ,
116+ "ls" ,
117+ "--select" ,
118+ dbt_selection ,
119+ "--resource-type" ,
120+ "model" ,
121+ "--output" ,
122+ "json" ,
123+ "--output-keys" ,
124+ "unique_id" ,
125+ "--project-dir" ,
126+ self .project_dir ,
127+ ]
128+ )
129+ if results .success and results .result :
116130 model_list = [json .loads (model )["unique_id" ] for model in results .result ]
117131 models = [self .manifest_obj .nodes .get (x ) for x in model_list ]
118132 return models
133+ elif not results .result :
134+ raise Exception (f"No dbt models found for `--select { dbt_selection } `" )
135+ else :
136+ raise results .exception
119137
120138 def get_run_results_models (self ):
121139 with open (self .project_dir / RUN_RESULTS_PATH ) as run_results :
@@ -129,11 +147,11 @@ def get_run_results_models(self):
129147 self .profiles_dir = legacy_profiles_dir ()
130148
131149 if dbt_version < parse_version (LOWER_DBT_V ):
132- raise Exception (
133- f"Found dbt: v{ dbt_version } Expected the dbt project's version to be >= { LOWER_DBT_V } "
134- )
150+ raise Exception (f"Found dbt: v{ dbt_version } Expected the dbt project's version to be >= { LOWER_DBT_V } " )
135151 elif dbt_version >= parse_version (UPPER_DBT_V ):
136- logger .warning (f"{ dbt_version } is a recent version of dbt and may not be fully tested with data-diff! \n Please report any issues to https://github.com/datafold/data-diff/issues" )
152+ logger .warning (
153+ f"{ dbt_version } is a recent version of dbt and may not be fully tested with data-diff! \n Please report any issues to https://github.com/datafold/data-diff/issues"
154+ )
137155
138156 success_models = [x .unique_id for x in run_results_obj .results if x .status .name == "success" ]
139157 models = [self .manifest_obj .nodes .get (x ) for x in success_models ]
0 commit comments