Skip to content

Commit 8ce63a7

Browse files
authored
Feat!: Add 'validate_query' model attribute for compile-time checks (#3614)
1 parent f4ea686 commit 8ce63a7

10 files changed

Lines changed: 176 additions & 33 deletions

File tree

docs/concepts/models/overview.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,9 @@ to `false` causes SQLMesh to disable query canonicalization & simplification. Th
443443
!!! warning
444444
Turning off the optimizer may prevent column-level lineage from working for the affected model and its descendants, unless all columns in the model's query are qualified and it contains no star projections (e.g. `SELECT *`).
445445

446+
### validate_query
447+
: Whether the model's query will be validated at compile time. This attribute is `false` by default. Setting it to `true` causes SQLMesh to raise an error instead of emitting warnings. This will display invalid columns in your SQL statements along with models containing `SELECT *` that cannot be automatically expanded to list out all columns. This ensures SQL is verified locally before time and money are spent running the SQL in your data warehouse.
448+
446449
## Incremental Model Properties
447450

448451
These properties can be specified in an incremental model's `kind` definition.

docs/reference/model_configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Configuration options for SQLMesh model properties. Supported by all model kinds
3939
| `enabled` | Whether the model is enabled. This attribute is `true` by default. Setting it to `false` causes SQLMesh to ignore this model when loading the project. | bool | N |
4040
| `gateway` | Specifies the gateway to use for the execution of this model. When not specified, the default gateway is used. | str | N |
4141
| `optimize_query` | Whether the model's query should be optimized. This attribute is `true` by default. Setting it to `false` causes SQLMesh to disable query canonicalization & simplification. This should be turned off only if the optimized query leads to errors such as surpassing text limit. | bool | N |
42-
42+
| `validate_query` | `validate_query` | Whether the model's query will be strictly validated at compile time. This attribute is `false` by default. Setting it to `true` causes SQLMesh to raise an error instead of emitting warnings. This will display invalid columns in your SQL statements along with models containing `SELECT *` that cannot be automatically expanded to list out all columns. | bool | N |
4343
### Model defaults
4444

4545
The SQLMesh project-level configuration must contain the `model_defaults` key and must specify a value for its `dialect` key. Other values are set automatically unless explicitly overridden in the model definition. Learn more about project-level configuration in the [configuration guide](../guides/configuration.md).

sqlmesh/core/config/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class ModelDefaultsConfig(BaseConfig):
4747
session_properties: t.Optional[t.Dict[str, t.Any]] = None
4848
audits: t.Optional[t.List[FunctionCall]] = None
4949
optimize_query: t.Optional[bool] = None
50+
validate_query: t.Optional[bool] = None
5051

5152
_model_kind_validator = model_kind_validator
5253
_on_destructive_change_validator = on_destructive_change_validator

sqlmesh/core/model/cache.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def _entry_name(model: SqlModel) -> str:
135135
hash_data.append(str([gen(d) for d in model.macro_definitions]))
136136
hash_data.append(str([(k, v) for k, v in model.sorted_python_env]))
137137
hash_data.extend(model.jinja_macros.data_hash_values)
138+
hash_data.extend(str(model.validate_query))
138139
return f"{model.name}_{crc32(hash_data)}"
139140

140141

sqlmesh/core/model/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ def depends_on(cls: t.Type, v: t.Any, values: t.Dict[str, t.Any]) -> t.Optional[
303303
"allow_partials",
304304
"enabled",
305305
"optimize_query",
306+
"validate_query",
306307
mode="before",
307308
check_fields=False,
308309
)(parse_bool)

sqlmesh/core/model/definition.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ def render_definition(
221221
"enabled",
222222
"inline_audits",
223223
"optimize_query",
224+
"validate_query",
224225
):
225226
expressions.append(
226227
exp.Property(
@@ -912,11 +913,19 @@ def validate_definition(self) -> None:
912913
self._path,
913914
)
914915

915-
if not self.is_sql and self.optimize_query is not None:
916-
raise_config_error(
917-
"SQLMesh query optimizer can only be enabled/disabled for SQL models",
918-
self._path,
919-
)
916+
# The following attributes should be set only for SQL models
917+
if not self.is_sql:
918+
if self.optimize_query:
919+
raise_config_error(
920+
"SQLMesh query optimizer can only be enabled for SQL models",
921+
self._path,
922+
)
923+
924+
if self.validate_query:
925+
raise_config_error(
926+
"Query validation can only be enabled for SQL models",
927+
self._path,
928+
)
920929

921930
def is_breaking_change(self, previous: Model) -> t.Optional[bool]:
922931
"""Determines whether this model is a breaking change in relation to the `previous` model.
@@ -1013,6 +1022,7 @@ def metadata_hash(self) -> str:
10131022
self.project,
10141023
str(self.allow_partials),
10151024
gen(self.session_properties_) if self.session_properties_ else None,
1025+
str(self.validate_query) if self.validate_query is not None else None,
10161026
]
10171027

10181028
for audit_name, audit_args in sorted(self.audits, key=lambda a: a[0]):
@@ -1354,6 +1364,7 @@ def _query_renderer(self) -> QueryRenderer:
13541364
default_catalog=self.default_catalog,
13551365
quote_identifiers=not no_quote_identifiers,
13561366
optimize_query=self.optimize_query,
1367+
validate_query=self.validate_query,
13571368
)
13581369

13591370
@property

sqlmesh/core/model/meta.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class ModelMeta(_Node):
7979
physical_version: t.Optional[str] = None
8080
gateway: t.Optional[str] = None
8181
optimize_query: t.Optional[bool] = None
82+
validate_query: t.Optional[bool] = None
8283

8384
_bool_validator = bool_validator
8485
_model_kind_validator = model_kind_validator

sqlmesh/core/renderer.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def __init__(
5252
model_fqn: t.Optional[str] = None,
5353
normalize_identifiers: bool = True,
5454
optimize_query: t.Optional[bool] = True,
55+
validate_query: t.Optional[bool] = False,
5556
):
5657
self._expression = expression
5758
self._dialect = dialect
@@ -67,6 +68,7 @@ def __init__(
6768
self._cache: t.List[t.Optional[exp.Expression]] = []
6869
self._model_fqn = model_fqn
6970
self._optimize_query_flag = optimize_query is not False
71+
self._validate_query = validate_query
7072

7173
def update_schema(self, schema: t.Dict[str, t.Any]) -> None:
7274
self.schema = d.normalize_mapping_schema(schema, dialect=self._dialect)
@@ -526,12 +528,17 @@ def _optimize_query(self, query: exp.Query, all_deps: t.Set[str]) -> exp.Query:
526528
if self._model_fqn and not should_optimize and any(s.is_star for s in query.selects):
527529
deps = ", ".join(f"'{dep}'" for dep in sorted(missing_deps))
528530

529-
logger.warning(
531+
warning = (
530532
f"SELECT * cannot be expanded due to missing schema(s) for model(s): {deps}. "
531533
"Run `sqlmesh create_external_models` and / or make sure that the model "
532-
f"'{self._model_fqn}' can be rendered at parse time.",
534+
f"'{self._model_fqn}' can be rendered at parse time."
533535
)
534536

537+
if self._validate_query:
538+
raise_config_error(warning, self._path)
539+
540+
logger.warning(warning)
541+
535542
try:
536543
if should_optimize:
537544
query = query.copy()
@@ -549,11 +556,16 @@ def _optimize_query(self, query: exp.Query, all_deps: t.Set[str]) -> exp.Query:
549556
)
550557
)
551558
except SqlglotError as ex:
559+
warning = (
560+
f"{ex} for model '{self._model_fqn}', the column may not exist or is ambiguous"
561+
)
562+
563+
if self._validate_query:
564+
raise_config_error(warning, self._path)
565+
552566
query = original
553567

554-
logger.warning(
555-
"%s for model '%s', the column may not exist or is ambiguous", ex, self._model_fqn
556-
)
568+
logger.warning(warning)
557569
except Exception as ex:
558570
raise_config_error(
559571
f"Failed to optimize query, please file an issue at https://github.com/TobikoData/sqlmesh/issues/new. {ex}",

tests/core/test_model.py

Lines changed: 132 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def test_model_qualification():
291291
model.render_query(needs_optimization=True)
292292
assert (
293293
mock_logger.call_args[0][0]
294-
== "%s for model '%s', the column may not exist or is ambiguous"
294+
== """Column '"a"' could not be resolved for model '"db"."table"', the column may not exist or is ambiguous"""
295295
)
296296

297297

@@ -1001,10 +1001,10 @@ def test_seed_on_virtual_update_statements():
10011001
CREATE TABLE x{{ 1 + 1 }};
10021002
JINJA_END;
10031003
1004-
ON_VIRTUAL_UPDATE_BEGIN;
1005-
JINJA_STATEMENT_BEGIN;
1004+
ON_VIRTUAL_UPDATE_BEGIN;
1005+
JINJA_STATEMENT_BEGIN;
10061006
GRANT SELECT ON VIEW {{ this_model }} TO ROLE dev_role;
1007-
JINJA_END;
1007+
JINJA_END;
10081008
DROP TABLE x2;
10091009
ON_VIRTUAL_UPDATE_END;
10101010
@@ -6595,24 +6595,29 @@ def test_model_optimize(tmp_path: Path, assert_exp_eq):
65956595
context.plan(auto_apply=True, no_prompts=True)
65966596

65976597
# Ensure non-SQLModels raise if optimize_query is not None
6598+
seed_path = tmp_path / "seed.csv"
6599+
model_kind = SeedKind(path=str(seed_path.absolute()))
6600+
with open(seed_path, "w", encoding="utf-8") as fd:
6601+
fd.write(
6602+
"""
6603+
col_a,col_b,col_c
6604+
1,text_a,1.0
6605+
2,text_b,2.0"""
6606+
)
6607+
model = create_seed_model("test_db.test_seed_model", model_kind, optimize_query=True)
6608+
context = Context(config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")))
6609+
65986610
with pytest.raises(
65996611
ConfigError,
6600-
match=r"SQLMesh query optimizer can only be enabled/disabled for SQL models",
6612+
match=r"SQLMesh query optimizer can only be enabled for SQL models",
66016613
):
6602-
seed_path = tmp_path / "seed.csv"
6603-
model_kind = SeedKind(path=str(seed_path.absolute()))
6604-
with open(seed_path, "w", encoding="utf-8") as fd:
6605-
fd.write(
6606-
"""
6607-
col_a,col_b,col_c
6608-
1,text_a,1.0
6609-
2,text_b,2.0"""
6610-
)
6611-
model = create_seed_model("test_db.test_seed_model", model_kind, optimize_query=True)
6612-
context = Context(config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")))
66136614
context.upsert_model(model)
66146615
context.plan(auto_apply=True, no_prompts=True)
66156616

6617+
model = create_seed_model("test_db.test_seed_model", model_kind, optimize_query=False)
6618+
context.upsert_model(model)
6619+
context.plan(auto_apply=True, no_prompts=True)
6620+
66166621

66176622
def test_column_description_metadata_change():
66186623
context = Context(config=Config())
@@ -6762,15 +6767,15 @@ def test_model_on_virtual_update(make_snapshot: t.Callable):
67626767
def resolve_parent_name(evaluator, name):
67636768
return evaluator.resolve_table(name.name)
67646769

6765-
virtual_update_statements = """
6770+
virtual_update_statements = """
67666771
CREATE OR REPLACE VIEW test_view FROM demo_db.table;
67676772
GRANT SELECT ON VIEW @this_model TO ROLE owner_name;
67686773
JINJA_STATEMENT_BEGIN;
67696774
GRANT SELECT ON VIEW {{this_model}} TO ROLE admin;
67706775
JINJA_END;
67716776
GRANT REFERENCES, SELECT ON FUTURE VIEWS IN DATABASE demo_db TO ROLE owner_name;
67726777
@resolve_parent_name('parent');
6773-
GRANT SELECT ON VIEW demo_db.table /* sqlglot.meta replace=false */ TO ROLE admin;
6778+
GRANT SELECT ON VIEW demo_db.table /* sqlglot.meta replace=false */ TO ROLE admin;
67746779
"""
67756780

67766781
expressions = d.parse(
@@ -6784,7 +6789,7 @@ def resolve_parent_name(evaluator, name):
67846789
67856790
on_virtual_update_begin;
67866791
6787-
{virtual_update_statements}
6792+
{virtual_update_statements}
67886793
67896794
on_virtual_update_end;
67906795
@@ -6918,3 +6923,111 @@ def model_with_virtual_statements(context, **kwargs):
69186923
rendered_statements[2].sql()
69196924
== "GRANT REFERENCES, SELECT ON FUTURE VIEWS IN DATABASE db TO ROLE dev_role"
69206925
)
6926+
6927+
6928+
def test_compile_time_checks(tmp_path: Path, assert_exp_eq):
6929+
# Strict SELECT * expansion
6930+
strict_query = d.parse(
6931+
"""
6932+
MODEL (
6933+
name test,
6934+
validate_query True,
6935+
);
6936+
6937+
SELECT * FROM tbl
6938+
"""
6939+
)
6940+
6941+
with pytest.raises(
6942+
ConfigError,
6943+
match=r".*cannot be expanded due to missing schema.*",
6944+
):
6945+
load_sql_based_model(strict_query).render_query()
6946+
6947+
# Strict column resolution
6948+
strict_query = d.parse(
6949+
"""
6950+
MODEL (
6951+
name test,
6952+
validate_query True,
6953+
);
6954+
6955+
SELECT foo
6956+
"""
6957+
)
6958+
6959+
with pytest.raises(
6960+
ConfigError,
6961+
match=r"""Column '"foo"' could not be resolved for model.*""",
6962+
):
6963+
load_sql_based_model(strict_query).render_query()
6964+
6965+
# Non-strict model with strict defaults raises error, otherwise can still render
6966+
strict_default = ModelDefaultsConfig(validate_query=True).dict()
6967+
query = d.parse(
6968+
"""
6969+
MODEL (
6970+
name test,
6971+
);
6972+
6973+
SELECT * FROM tbl
6974+
"""
6975+
)
6976+
6977+
with pytest.raises(
6978+
ConfigError,
6979+
match=r".*cannot be expanded due to missing schema.*",
6980+
):
6981+
load_sql_based_model(query, defaults=strict_default).render_query()
6982+
6983+
assert_exp_eq(load_sql_based_model(query).render_query(), 'SELECT * FROM "tbl" AS "tbl"')
6984+
6985+
# Ensure plan works for valid queries & cache is invalidated if strict changes
6986+
context = Context(config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")))
6987+
6988+
query = d.parse(
6989+
"""
6990+
MODEL (
6991+
name db.test,
6992+
validate_query True,
6993+
);
6994+
6995+
SELECT 1 AS col
6996+
"""
6997+
)
6998+
6999+
context.upsert_model(load_sql_based_model(query, default_catalog=context.default_catalog))
7000+
context.plan(auto_apply=True, no_prompts=True)
7001+
7002+
context.upsert_model("db.test", validate_query=False)
7003+
plan = context.plan(no_prompts=True, auto_apply=True)
7004+
7005+
snapshots = list(plan.snapshots.values())
7006+
assert len(snapshots) == 1
7007+
7008+
snapshot = snapshots[0]
7009+
assert len(snapshot.previous_versions) == 1
7010+
assert snapshot.change_category == SnapshotChangeCategory.METADATA
7011+
7012+
# Ensure non-SQLModels raise if strict mode is set to True
7013+
seed_path = tmp_path / "seed.csv"
7014+
model_kind = SeedKind(path=str(seed_path.absolute()))
7015+
with open(seed_path, "w", encoding="utf-8") as fd:
7016+
fd.write(
7017+
"""
7018+
col_a,col_b,col_c
7019+
1,text_a,1.0"""
7020+
)
7021+
model = create_seed_model("test_db.test_seed_model", model_kind, validate_query=True)
7022+
context = Context(config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")))
7023+
7024+
with pytest.raises(
7025+
ConfigError,
7026+
match=r"Query validation can only be enabled for SQL models at",
7027+
):
7028+
context.upsert_model(model)
7029+
context.plan(auto_apply=True, no_prompts=True)
7030+
7031+
model = create_seed_model("test_db.test_seed_model", model_kind, validate_query=False)
7032+
context.upsert_model(model)
7033+
context.plan(auto_apply=True, no_prompts=True)

tests/core/test_snapshot.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,7 @@ def test_fingerprint(model: Model, parent_model: Model):
760760

761761
original_fingerprint = SnapshotFingerprint(
762762
data_hash="1312415267",
763-
metadata_hash="2573378960",
763+
metadata_hash="2476734280",
764764
)
765765

766766
assert fingerprint == original_fingerprint
@@ -819,7 +819,7 @@ def test_fingerprint_seed_model():
819819

820820
expected_fingerprint = SnapshotFingerprint(
821821
data_hash="1909791099",
822-
metadata_hash="3403817841",
822+
metadata_hash="1153541408",
823823
)
824824

825825
model = load_sql_based_model(expressions, path=Path("./examples/sushi/models/test_model.sql"))
@@ -858,7 +858,7 @@ def test_fingerprint_jinja_macros(model: Model):
858858
)
859859
original_fingerprint = SnapshotFingerprint(
860860
data_hash="923305614",
861-
metadata_hash="2573378960",
861+
metadata_hash="2476734280",
862862
)
863863

864864
fingerprint = fingerprint_from_node(model, nodes={})

0 commit comments

Comments
 (0)