Skip to content

Commit 45a2db6

Browse files
authored
Fix: disable query validations for dbt models (#5305)
1 parent 101e73b commit 45a2db6

File tree

9 files changed

+164
-64
lines changed

9 files changed

+164
-64
lines changed

sqlmesh/cli/project_init.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def _gen_config(
114114
rules:
115115
- ambiguousorinvalidcolumn
116116
- invalidselectstarexpansion
117+
- noambiguousprojections
117118
""",
118119
ProjectTemplate.DBT: f"""# --- Virtual Data Environment Mode ---
119120
# Enable Virtual Data Environments (VDE) for *development* environments.

sqlmesh/core/context.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3130,7 +3130,9 @@ def lint_models(
31303130
found_error = False
31313131

31323132
model_list = (
3133-
list(self.get_model(model) for model in models) if models else self.models.values()
3133+
list(self.get_model(model, raise_if_missing=True) for model in models)
3134+
if models
3135+
else self.models.values()
31343136
)
31353137
all_violations = []
31363138
for model in model_list:

sqlmesh/core/linter/definition.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
from __future__ import annotations
2-
import typing as t
3-
from sqlmesh.core.config.linter import LinterConfig
4-
from sqlmesh.core.model import Model
5-
from sqlmesh.utils.errors import raise_config_error
6-
from sqlmesh.core.console import LinterConsole, get_console
2+
73
import operator as op
4+
import typing as t
85
from collections.abc import Iterator, Iterable, Set, Mapping, Callable
96
from functools import reduce
10-
from sqlmesh.core.model import Model
11-
from sqlmesh.core.linter.rule import Rule, RuleViolation, Range, Fix
7+
8+
from sqlmesh.core.config.linter import LinterConfig
129
from sqlmesh.core.console import LinterConsole, get_console
10+
from sqlmesh.core.linter.rule import Rule, RuleViolation, Range, Fix
11+
from sqlmesh.core.model import Model
12+
from sqlmesh.utils.errors import raise_config_error
1313

1414
if t.TYPE_CHECKING:
1515
from sqlmesh.core.context import GenericContext
@@ -38,6 +38,12 @@ def __init__(
3838
self.rules = rules
3939
self.warn_rules = warn_rules
4040

41+
if overlapping := rules.intersection(warn_rules):
42+
overlapping_rules = ", ".join(rule for rule in overlapping)
43+
raise_config_error(
44+
f"Rules cannot simultaneously warn and raise an error: [{overlapping_rules}]"
45+
)
46+
4147
@classmethod
4248
def from_rules(cls, all_rules: RuleSet, config: LinterConfig) -> Linter:
4349
ignored_rules = select_rules(all_rules, config.ignored_rules)
@@ -46,12 +52,6 @@ def from_rules(cls, all_rules: RuleSet, config: LinterConfig) -> Linter:
4652
rules = select_rules(included_rules, config.rules)
4753
warn_rules = select_rules(included_rules, config.warn_rules)
4854

49-
if overlapping := rules.intersection(warn_rules):
50-
overlapping_rules = ", ".join(rule for rule in overlapping)
51-
raise_config_error(
52-
f"Rules cannot simultaneously warn and raise an error: [{overlapping_rules}]"
53-
)
54-
5555
return Linter(config.enabled, all_rules, rules, warn_rules)
5656

5757
def lint_model(

sqlmesh/core/linter/rules/builtin.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,4 +274,33 @@ def create_fix(self, model_name: str) -> t.Optional[Fix]:
274274
)
275275

276276

277+
class NoAmbiguousProjections(Rule):
278+
"""All projections in a model must have unique & inferrable names or explicit aliases."""
279+
280+
def check_model(self, model: Model) -> t.Optional[RuleViolation]:
281+
query = model.render_query()
282+
if query is None:
283+
return None
284+
285+
name_counts: t.Dict[str, int] = {}
286+
projection_list = query.selects
287+
for expression in projection_list:
288+
alias = expression.output_name
289+
if alias == "*":
290+
continue
291+
292+
if not alias:
293+
return self.violation(
294+
f"Outer projection '{expression.sql(dialect=model.dialect)}' must have inferrable names or explicit aliases."
295+
)
296+
297+
name_counts[alias] = name_counts.get(alias, 0) + 1
298+
299+
for name, count in name_counts.items():
300+
if count > 1:
301+
return self.violation(f"Found duplicate outer select name '{name}'")
302+
303+
return None
304+
305+
277306
BUILTIN_RULES = RuleSet(subclasses(__name__, Rule, (Rule,)))

sqlmesh/core/model/definition.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,12 +1417,20 @@ def columns_to_types(self) -> t.Optional[t.Dict[str, exp.DataType]]:
14171417

14181418
unknown = exp.DataType.build("unknown")
14191419

1420-
self._columns_to_types = {
1420+
columns_to_types = {}
1421+
for select in query.selects:
1422+
output_name = select.output_name
1423+
1424+
# If model validation is disabled, we cannot assume that projections
1425+
# will have inferrable output names or even that they will be unique
1426+
if not output_name or output_name in columns_to_types:
1427+
return None
1428+
14211429
# copy data type because it is used in the engine to build CTAS and other queries
14221430
# this can change the parent which will mess up the diffing algo
1423-
select.output_name: (select.type or unknown).copy()
1424-
for select in query.selects
1425-
}
1431+
columns_to_types[output_name] = (select.type or unknown).copy()
1432+
1433+
self._columns_to_types = columns_to_types
14261434

14271435
if "*" in self._columns_to_types:
14281436
return None
@@ -1473,22 +1481,6 @@ def validate_definition(self) -> None:
14731481
if not projection_list:
14741482
raise_config_error("Query missing select statements", self._path)
14751483

1476-
name_counts: t.Dict[str, int] = {}
1477-
for expression in projection_list:
1478-
alias = expression.output_name
1479-
if alias == "*":
1480-
continue
1481-
if not alias:
1482-
raise_config_error(
1483-
f"Outer projection '{expression.sql(dialect=self.dialect)}' must have inferrable names or explicit aliases.",
1484-
self._path,
1485-
)
1486-
name_counts[alias] = name_counts.get(alias, 0) + 1
1487-
1488-
for name, count in name_counts.items():
1489-
if count > 1:
1490-
raise_config_error(f"Found duplicate outer select name '{name}'", self._path)
1491-
14921484
if self.depends_on_self and not self.annotated:
14931485
raise_config_error(
14941486
"Self-referencing models require inferrable column types. There are three options available to mitigate this issue: add explicit types to all projections in the outermost SELECT statement, leverage external models (https://sqlmesh.readthedocs.io/en/stable/concepts/models/external_models/), or use the `columns` model attribute (https://sqlmesh.readthedocs.io/en/stable/concepts/models/overview/#columns).",
@@ -1846,8 +1838,9 @@ def validate_definition(self) -> None:
18461838
super().validate_definition()
18471839

18481840
if self.kind and not self.kind.supports_python_models:
1849-
raise SQLMeshError(
1850-
f"Cannot create Python model '{self.name}' as the '{self.kind.name}' kind doesn't support Python models"
1841+
raise_config_error(
1842+
f"Cannot create Python model '{self.name}' as the '{self.kind.name}' kind doesn't support Python models",
1843+
self._path,
18511844
)
18521845

18531846
def render(

tests/cli/test_cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,7 @@ def test_dlt_filesystem_pipeline(tmp_path):
982982
" rules:\n"
983983
" - ambiguousorinvalidcolumn\n"
984984
" - invalidselectstarexpansion\n"
985+
" - noambiguousprojections\n"
985986
)
986987

987988
with open(config_path) as file:
@@ -1048,6 +1049,7 @@ def test_dlt_pipeline(runner, tmp_path):
10481049
rules:
10491050
- ambiguousorinvalidcolumn
10501051
- invalidselectstarexpansion
1052+
- noambiguousprojections
10511053
"""
10521054

10531055
with open(tmp_path / "config.yaml") as file:
@@ -1990,6 +1992,7 @@ def test_init_project_engine_configs(tmp_path):
19901992
rules:
19911993
- ambiguousorinvalidcolumn
19921994
- invalidselectstarexpansion
1995+
- noambiguousprojections
19931996
"""
19941997

19951998
with open(tmp_path / "config.yaml") as file:

tests/core/test_context.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,38 @@ def test_raw_code_handling(sushi_test_dbt_context: Context):
15541554
)
15551555

15561556

1557+
@pytest.mark.slow
1558+
def test_dbt_models_are_not_validated(sushi_test_dbt_context: Context):
1559+
model = sushi_test_dbt_context.models['"memory"."sushi"."non_validated_model"']
1560+
1561+
assert model.render_query_or_raise().sql(comments=False) == 'SELECT 1 AS "c", 2 AS "c"'
1562+
assert sushi_test_dbt_context.fetchdf(
1563+
'SELECT * FROM "memory"."sushi"."non_validated_model"'
1564+
).to_dict() == {"c": {0: 1}, "c_1": {0: 2}}
1565+
1566+
# Write a new incremental model file that should fail validation
1567+
models_dir = sushi_test_dbt_context.path / "models"
1568+
incremental_model_path = models_dir / "invalid_incremental.sql"
1569+
incremental_model_content = """{{
1570+
config(
1571+
materialized='incremental',
1572+
incremental_strategy='delete+insert',
1573+
)
1574+
}}
1575+
1576+
SELECT
1577+
1 AS c"""
1578+
1579+
incremental_model_path.write_text(incremental_model_content)
1580+
1581+
# Reload the context - this should raise a validation error for the incremental model
1582+
with pytest.raises(
1583+
ConfigError,
1584+
match="Unmanaged incremental models with insert / overwrite enabled must specify the partitioned_by field",
1585+
):
1586+
Context(paths=sushi_test_dbt_context.path, config="test_config")
1587+
1588+
15571589
def test_catalog_name_needs_to_be_quoted():
15581590
config = Config(
15591591
model_defaults=ModelDefaultsConfig(dialect="duckdb"),
@@ -3085,3 +3117,20 @@ def test_plan_no_start_configured():
30853117
match=r"Model '.*xvg.*': Start date / time .* can't be greater than end date / time .*\.\nSet the `start` attribute in your project config model defaults to avoid this issue",
30863118
):
30873119
context.plan("dev", execution_time="1999-01-05")
3120+
3121+
3122+
def test_lint_model_projections(tmp_path: Path):
3123+
init_example_project(tmp_path, engine_type="duckdb", dialect="duckdb")
3124+
3125+
context = Context(paths=tmp_path)
3126+
context.upsert_model(
3127+
load_sql_based_model(
3128+
parse("""MODEL(name sqlmesh_example.m); SELECT 1 AS x, 2 AS x"""),
3129+
default_catalog="db",
3130+
)
3131+
)
3132+
3133+
config_err = "Linter detected errors in the code. Please fix them before proceeding."
3134+
3135+
with pytest.raises(LinterError, match=config_err):
3136+
prod_plan = context.plan(no_prompts=True, auto_apply=True)

tests/core/test_model.py

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@
3131
ModelDefaultsConfig,
3232
LinterConfig,
3333
)
34+
from sqlmesh.core import constants as c
3435
from sqlmesh.core.context import Context, ExecutionContext
3536
from sqlmesh.core.dialect import parse
3637
from sqlmesh.core.engine_adapter.base import MERGE_SOURCE_ALIAS, MERGE_TARGET_ALIAS
3738
from sqlmesh.core.engine_adapter.duckdb import DuckDBEngineAdapter
3839
from sqlmesh.core.macros import MacroEvaluator, macro
39-
from sqlmesh.core import constants as c
4040
from sqlmesh.core.model import (
4141
CustomKind,
4242
PythonModel,
@@ -198,29 +198,64 @@ def test_model_multiple_select_statements():
198198
load_sql_based_model(expressions)
199199

200200

201-
@pytest.mark.parametrize(
202-
"query, error",
203-
[
204-
("y::int, x::int AS y", "duplicate"),
205-
("* FROM db.table", "require inferrable column types"),
206-
],
207-
)
208-
def test_model_validation(query, error):
201+
def test_model_validation(tmp_path):
209202
expressions = d.parse(
210203
f"""
211204
MODEL (
212205
name db.table,
213206
kind FULL,
214207
);
215208
216-
SELECT {query}
209+
SELECT
210+
y::int,
211+
x::int AS y
212+
FROM db.ext
213+
"""
214+
)
215+
216+
ctx = Context(
217+
config=Config(linter=LinterConfig(enabled=True, rules=["noambiguousprojections"])),
218+
paths=tmp_path,
219+
)
220+
ctx.upsert_model(load_sql_based_model(expressions, default_catalog="memory"))
221+
222+
errors = ctx.lint_models(["db.table"], raise_on_error=False)
223+
assert errors, "Expected NoAmbiguousProjections violation"
224+
assert errors[0].violation_msg == "Found duplicate outer select name 'y'"
225+
226+
expressions = d.parse(
227+
"""
228+
MODEL (
229+
name db.table,
230+
kind FULL,
231+
);
232+
233+
SELECT a, a UNION SELECT c, c
234+
"""
235+
)
236+
237+
ctx.upsert_model(load_sql_based_model(expressions, default_catalog="memory"))
238+
239+
errors = ctx.lint_models(["db.table"], raise_on_error=False)
240+
assert errors, "Expected NoAmbiguousProjections violation"
241+
assert errors[0].violation_msg == "Found duplicate outer select name 'a'"
242+
243+
expressions = d.parse(
244+
f"""
245+
MODEL (
246+
name db.table,
247+
kind FULL,
248+
);
249+
250+
SELECT * FROM db.table
217251
"""
218252
)
219253

220254
model = load_sql_based_model(expressions)
221255
with pytest.raises(ConfigError) as ex:
222256
model.validate_definition()
223-
assert error in str(ex.value)
257+
258+
assert "require inferrable column types" in str(ex.value)
224259

225260

226261
def test_model_union_query(sushi_context, assert_exp_eq):
@@ -405,23 +440,6 @@ def get_date(evaluator):
405440
)
406441

407442

408-
def test_model_validation_union_query():
409-
expressions = d.parse(
410-
"""
411-
MODEL (
412-
name db.table,
413-
kind FULL,
414-
);
415-
416-
SELECT a, a UNION SELECT c, c
417-
"""
418-
)
419-
420-
model = load_sql_based_model(expressions)
421-
with pytest.raises(ConfigError, match=r"Found duplicate outer select name 'a'"):
422-
model.validate_definition()
423-
424-
425443
@use_terminal_console
426444
def test_model_qualification(tmp_path: Path):
427445
with patch.object(get_console(), "log_warning") as mock_logger:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{{ config(materialized='table') }}
2+
3+
SELECT
4+
1 AS c,
5+
2 AS c,

0 commit comments

Comments
 (0)