From 776a6793bc20122fb4a4d7eaa25e8ab623609a04 Mon Sep 17 00:00:00 2001 From: Cortland Goffena Date: Wed, 3 Sep 2025 19:44:44 -0600 Subject: [PATCH 01/10] switching out yaml --- sqlmesh/core/linter/rules/builtin.py | 50 ++++++++++++++++++++++++---- tests/core/linter/test_builtin.py | 45 +++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 7 deletions(-) diff --git a/sqlmesh/core/linter/rules/builtin.py b/sqlmesh/core/linter/rules/builtin.py index f6bef4b4ef..8e143efd85 100644 --- a/sqlmesh/core/linter/rules/builtin.py +++ b/sqlmesh/core/linter/rules/builtin.py @@ -3,29 +3,31 @@ from __future__ import annotations import typing as t +from pathlib import Path +from ruamel.yaml import YAML from sqlglot.expressions import Star from sqlglot.helper import subclasses from sqlmesh.core.constants import EXTERNAL_MODELS_YAML from sqlmesh.core.dialect import normalize_model_name +from sqlmesh.core.linter.definition import RuleSet from sqlmesh.core.linter.helpers import ( TokenPositionDetails, get_range_of_model_block, read_range_from_string, ) from sqlmesh.core.linter.rule import ( + CreateFile, + Fix, + Position, + Range, Rule, RuleViolation, - Range, - Fix, TextEdit, - Position, - CreateFile, ) -from sqlmesh.core.linter.definition import RuleSet -from sqlmesh.core.model import Model, SqlModel, ExternalModel -from sqlmesh.utils.lineage import extract_references_from_query, ExternalModelReference +from sqlmesh.core.model import ExternalModel, Model, SqlModel +from sqlmesh.utils.lineage import ExternalModelReference, extract_references_from_query class NoSelectStar(Rule): @@ -129,6 +131,40 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]: return self.violation() +class NoMissingUnitTest(Rule): + """All models must have a unit test found in the test/ directory yaml files""" + + def check_model(self, model: Model) -> t.Optional[RuleViolation]: + # External models cannot have unit tests + if isinstance(model, ExternalModel): + return None + + test_dir = Path("tests") + found_test = False + + yaml_parser = YAML(typ="safe") + for test_file in test_dir.rglob("*.yaml"): + try: + test_data = yaml_parser.load(test_file) or {} + except Exception: + # Skip files with Jinja templating or other parse errors + continue + + for _, test_config in test_data.items(): + print(f"Test_Config: {test_config}") + if test_config.get("model") == model.name: + found_test = True + break + if found_test: + break + + if not found_test: + return self.violation( + violation_msg=f"Model {model.name} is missing unit test(s). Please add in the tests/ directory." + ) + return None + + class NoMissingExternalModels(Rule): """All external models must be registered in the external_models.yaml file""" diff --git a/tests/core/linter/test_builtin.py b/tests/core/linter/test_builtin.py index 1a19d036b5..4b7b710b56 100644 --- a/tests/core/linter/test_builtin.py +++ b/tests/core/linter/test_builtin.py @@ -172,3 +172,48 @@ def test_no_missing_external_models_with_existing_file_not_ending_in_newline( ) fix_path = sushi_path / "external_models.yaml" assert edit.path == fix_path + + +def test_no_missing_unit_tests(tmp_path, copy_to_temp_path): + """ + Tests that the NoMissingUnitTest linter rule correctly identifies models + without corresponding unit tests in the tests/ directory + + This test checks the sushi example project, enables the linter, + and verifies that the linter raises a rule violation for the models + that do not have a unit test + """ + sushi_paths = copy_to_temp_path("examples/sushi") + sushi_path = sushi_paths[0] + + # Override the config.py to turn on lint + with open(sushi_path / "config.py", "r") as f: + read_file = f.read() + + before = """ linter=LinterConfig( + enabled=False, + rules=[ + "ambiguousorinvalidcolumn", + "invalidselectstarexpansion", + "noselectstar", + "nomissingaudits", + "nomissingowner", + "nomissingexternalmodels", + ], + ),""" + after = """linter=LinterConfig(enabled=True, rules=["nomissingunittest"]),""" + read_file = read_file.replace(before, after) + assert after in read_file + with open(sushi_path / "config.py", "w") as f: + f.writelines(read_file) + + # Load the context with the temporary sushi path + context = Context(paths=[sushi_path]) + + # Lint the models + lints = context.lint_models(raise_on_error=False) + assert 1 == 1 + # assert len(lints) >= 1 + # lint = lints[0] + # assert lint.violation_range is None + # print(lints) From 346ad720c7456b468c401c1b44ea5526d59d2c4b Mon Sep 17 00:00:00 2001 From: Cortland Goffena Date: Sun, 14 Sep 2025 18:51:14 -0600 Subject: [PATCH 02/10] figured out context to complete test --- sqlmesh/core/linter/rules/builtin.py | 4 +--- tests/core/linter/test_builtin.py | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/sqlmesh/core/linter/rules/builtin.py b/sqlmesh/core/linter/rules/builtin.py index 8e143efd85..9f453ea25d 100644 --- a/sqlmesh/core/linter/rules/builtin.py +++ b/sqlmesh/core/linter/rules/builtin.py @@ -3,7 +3,6 @@ from __future__ import annotations import typing as t -from pathlib import Path from ruamel.yaml import YAML from sqlglot.expressions import Star @@ -139,7 +138,7 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]: if isinstance(model, ExternalModel): return None - test_dir = Path("tests") + test_dir = self.context.path / "tests" found_test = False yaml_parser = YAML(typ="safe") @@ -151,7 +150,6 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]: continue for _, test_config in test_data.items(): - print(f"Test_Config: {test_config}") if test_config.get("model") == model.name: found_test = True break diff --git a/tests/core/linter/test_builtin.py b/tests/core/linter/test_builtin.py index 4b7b710b56..0ff91470ff 100644 --- a/tests/core/linter/test_builtin.py +++ b/tests/core/linter/test_builtin.py @@ -212,8 +212,23 @@ def test_no_missing_unit_tests(tmp_path, copy_to_temp_path): # Lint the models lints = context.lint_models(raise_on_error=False) - assert 1 == 1 - # assert len(lints) >= 1 - # lint = lints[0] - # assert lint.violation_range is None - # print(lints) + + # Should have violations for models without tests (most models except customers) + assert len(lints) >= 1 + + # Check that we get violations for models without tests + violation_messages = [lint.violation_msg for lint in lints] + assert any("is missing unit test(s)" in msg for msg in violation_messages) + + # Check that models with existing tests don't have violations + models_with_tests = ["customer_revenue_by_day", "customer_revenue_lifetime", "order_items"] + + for model_name in models_with_tests: + model_violations = [ + lint + for lint in lints + if model_name in lint.violation_msg and "is missing unit test(s)" in lint.violation_msg + ] + assert len(model_violations) == 0, ( + f"Model {model_name} should not have a violation since it has a test" + ) From 2aabde8278ca930515f97b5a3189d179eed47e30 Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Sun, 12 Oct 2025 17:55:53 -0600 Subject: [PATCH 03/10] making it key for model --- sqlmesh/core/context.py | 33 +++++++++++++++++----------- sqlmesh/core/linter/rules/builtin.py | 21 +----------------- sqlmesh/core/loader.py | 26 ++++++++++++++++------ sqlmesh/core/test/discovery.py | 4 ++++ 4 files changed, 44 insertions(+), 40 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index cfe84fb700..76eb2ff60a 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -40,13 +40,13 @@ import time import traceback import typing as t +from datetime import datetime from functools import cached_property from io import StringIO from itertools import chain from pathlib import Path from shutil import rmtree from types import MappingProxyType -from datetime import datetime from sqlglot import Dialect, exp from sqlglot.helper import first @@ -63,6 +63,7 @@ ) from sqlmesh.core.config.connection import ConnectionConfig from sqlmesh.core.config.loader import C +from sqlmesh.core.config.model import ModelDefaultsConfig from sqlmesh.core.config.root import RegexKeyDict from sqlmesh.core.console import get_console from sqlmesh.core.context_diff import ContextDiff @@ -76,24 +77,23 @@ ) from sqlmesh.core.engine_adapter import EngineAdapter from sqlmesh.core.environment import Environment, EnvironmentNamingInfo, EnvironmentStatements -from sqlmesh.core.loader import Loader from sqlmesh.core.linter.definition import AnnotatedRuleViolation, Linter from sqlmesh.core.linter.rules import BUILTIN_RULES +from sqlmesh.core.loader import Loader from sqlmesh.core.macros import ExecutableOrMacro, macro from sqlmesh.core.metric import Metric, rewrite from sqlmesh.core.model import Model, update_model_schemas -from sqlmesh.core.config.model import ModelDefaultsConfig from sqlmesh.core.notification_target import ( NotificationEvent, NotificationTarget, NotificationTargetManager, ) -from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals, PlanExplainer +from sqlmesh.core.plan import Plan, PlanBuilder, PlanExplainer, SnapshotIntervals from sqlmesh.core.plan.definition import UserProvidedFlags from sqlmesh.core.reference import ReferenceGraph -from sqlmesh.core.scheduler import Scheduler, CompletionStatus +from sqlmesh.core.scheduler import CompletionStatus, Scheduler from sqlmesh.core.schema_loader import create_external_models_file -from sqlmesh.core.selector import Selector, NativeSelector +from sqlmesh.core.selector import NativeSelector, Selector from sqlmesh.core.snapshot import ( DeployabilityIndex, Snapshot, @@ -111,33 +111,33 @@ from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots from sqlmesh.core.table_diff import TableDiff from sqlmesh.core.test import ( - ModelTextTestResult, ModelTestMetadata, + ModelTextTestResult, generate_test, run_tests, ) from sqlmesh.core.user import User from sqlmesh.utils import UniqueKeyDict, Verbosity from sqlmesh.utils.concurrency import concurrent_apply_to_values +from sqlmesh.utils.config import print_config from sqlmesh.utils.dag import DAG from sqlmesh.utils.date import ( TimeLike, - to_timestamp, format_tz_datetime, - now_timestamp, + make_exclusive, now, + now_timestamp, to_datetime, - make_exclusive, + to_timestamp, ) from sqlmesh.utils.errors import ( CircuitBreakerError, ConfigError, + LinterError, PlanError, SQLMeshError, UncategorizedPlanError, - LinterError, ) -from sqlmesh.utils.config import print_config from sqlmesh.utils.jinja import JinjaMacroRegistry from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path @@ -146,8 +146,8 @@ from typing_extensions import Literal from sqlmesh.core.engine_adapter._typing import ( - BigframeSession, DF, + BigframeSession, PySparkDataFrame, PySparkSession, SnowparkSession, @@ -398,6 +398,7 @@ def __init__( self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( "standaloneaudits" ) + self._models_with_tests: t.Set[str] = set() self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") self._jinja_macros = JinjaMacroRegistry() @@ -647,6 +648,7 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: self._requirements.update(project.requirements) self._excluded_requirements.update(project.excluded_requirements) self._environment_statements.extend(project.environment_statements) + self._models_with_tests.update(project.models_with_tests) config = loader.config self._linters[config.project] = Linter.from_rules( @@ -1049,6 +1051,11 @@ def standalone_audits(self) -> MappingProxyType[str, StandaloneAudit]: """Returns all registered standalone audits in this context.""" return MappingProxyType(self._standalone_audits) + @property + def models_with_tests(self) -> t.Set[str]: + """Returns all models with tests in this context.""" + return self._models_with_tests + @property def snapshots(self) -> t.Dict[str, Snapshot]: """Generates and returns snapshots based on models registered in this context. diff --git a/sqlmesh/core/linter/rules/builtin.py b/sqlmesh/core/linter/rules/builtin.py index 9f453ea25d..28b810fca0 100644 --- a/sqlmesh/core/linter/rules/builtin.py +++ b/sqlmesh/core/linter/rules/builtin.py @@ -4,7 +4,6 @@ import typing as t -from ruamel.yaml import YAML from sqlglot.expressions import Star from sqlglot.helper import subclasses @@ -138,25 +137,7 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]: if isinstance(model, ExternalModel): return None - test_dir = self.context.path / "tests" - found_test = False - - yaml_parser = YAML(typ="safe") - for test_file in test_dir.rglob("*.yaml"): - try: - test_data = yaml_parser.load(test_file) or {} - except Exception: - # Skip files with Jinja templating or other parse errors - continue - - for _, test_config in test_data.items(): - if test_config.get("model") == model.name: - found_test = True - break - if found_test: - break - - if not found_test: + if model.name not in self.context.models_with_tests: return self.violation( violation_msg=f"Model {model.name} is missing unit test(s). Please add in the tests/ directory." ) diff --git a/sqlmesh/core/loader.py b/sqlmesh/core/loader.py index 6647a2edba..4145351f9e 100644 --- a/sqlmesh/core/loader.py +++ b/sqlmesh/core/loader.py @@ -1,6 +1,7 @@ from __future__ import annotations import abc +import concurrent.futures import glob import itertools import linecache @@ -10,11 +11,10 @@ from collections import Counter, defaultdict from dataclasses import dataclass from pathlib import Path -from pydantic import ValidationError -import concurrent.futures -from sqlglot.errors import SqlglotError +from pydantic import ValidationError from sqlglot import exp +from sqlglot.errors import SqlglotError from sqlglot.helper import subclasses from sqlmesh.core import constants as c @@ -22,8 +22,8 @@ from sqlmesh.core.console import Console from sqlmesh.core.dialect import parse from sqlmesh.core.environment import EnvironmentStatements -from sqlmesh.core.linter.rule import Rule from sqlmesh.core.linter.definition import RuleSet +from sqlmesh.core.linter.rule import Rule from sqlmesh.core.macros import MacroRegistry, macro from sqlmesh.core.metric import Metric, MetricMeta, expand_metrics, load_metric_ddl from sqlmesh.core.model import ( @@ -40,10 +40,10 @@ from sqlmesh.utils.errors import ConfigError from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroExtractor from sqlmesh.utils.metaprogramming import import_python_file -from sqlmesh.utils.pydantic import validation_error_message from sqlmesh.utils.process import create_process_pool_executor -from sqlmesh.utils.yaml import YAML, load as yaml_load - +from sqlmesh.utils.pydantic import validation_error_message +from sqlmesh.utils.yaml import YAML +from sqlmesh.utils.yaml import load as yaml_load if t.TYPE_CHECKING: from sqlmesh.core.context import GenericContext @@ -64,6 +64,8 @@ class LoadedProject: excluded_requirements: t.Set[str] environment_statements: t.List[EnvironmentStatements] user_rules: RuleSet + model_test_metadata: t.List[ModelTestMetadata] + models_with_tests: t.Set[str] class CacheBase(abc.ABC): @@ -243,6 +245,14 @@ def load(self) -> LoadedProject: user_rules = self._load_linting_rules() + model_test_metadata = self.load_model_tests() + + models_with_tests = { + model_test_metadata.model_name for model_test_metadata in model_test_metadata + } + + self._models_with_tests = models_with_tests + project = LoadedProject( macros=macros, jinja_macros=jinja_macros, @@ -254,6 +264,8 @@ def load(self) -> LoadedProject: excluded_requirements=excluded_requirements, environment_statements=environment_statements, user_rules=user_rules, + model_test_metadata=model_test_metadata, + models_with_tests=models_with_tests, ) return project diff --git a/sqlmesh/core/test/discovery.py b/sqlmesh/core/test/discovery.py index 0f60fe6fa9..5e2452b570 100644 --- a/sqlmesh/core/test/discovery.py +++ b/sqlmesh/core/test/discovery.py @@ -20,6 +20,10 @@ class ModelTestMetadata(PydanticModel): def fully_qualified_test_name(self) -> str: return f"{self.path}::{self.test_name}" + @property + def model_name(self) -> str: + return self.body["model"] + def __hash__(self) -> int: return self.fully_qualified_test_name.__hash__() From 04a82c2a5264c37610629e019d9063c42b473d56 Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Sun, 12 Oct 2025 18:25:11 -0600 Subject: [PATCH 04/10] moving to use eager loaded tests for testing --- sqlmesh/core/context.py | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 76eb2ff60a..11bf082b30 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -77,6 +77,7 @@ ) from sqlmesh.core.engine_adapter import EngineAdapter from sqlmesh.core.environment import Environment, EnvironmentNamingInfo, EnvironmentStatements +from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots from sqlmesh.core.linter.definition import AnnotatedRuleViolation, Linter from sqlmesh.core.linter.rules import BUILTIN_RULES from sqlmesh.core.loader import Loader @@ -108,11 +109,11 @@ StateReader, StateSync, ) -from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots from sqlmesh.core.table_diff import TableDiff from sqlmesh.core.test import ( ModelTestMetadata, ModelTextTestResult, + filter_tests_by_patterns, generate_test, run_tests, ) @@ -399,6 +400,7 @@ def __init__( "standaloneaudits" ) self._models_with_tests: t.Set[str] = set() + self._model_test_metadata: t.List[ModelTestMetadata] = [] self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") self._jinja_macros = JinjaMacroRegistry() @@ -649,6 +651,7 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: self._excluded_requirements.update(project.excluded_requirements) self._environment_statements.extend(project.environment_statements) self._models_with_tests.update(project.models_with_tests) + self._model_test_metadata.extend(project.model_test_metadata) config = loader.config self._linters[config.project] = Linter.from_rules( @@ -2227,7 +2230,10 @@ def test( pd.set_option("display.max_columns", None) - test_meta = self.load_model_tests(tests=tests, patterns=match_patterns) + loaded_test_meta = self._model_test_metadata + test_meta = self._filter_preloaded_tests( + test_meta=loaded_test_meta, tests=tests, patterns=match_patterns + ) result = run_tests( model_test_metadata=test_meta, @@ -2789,6 +2795,35 @@ def _get_engine_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") return self.engine_adapter + def _filter_preloaded_tests( + self, + test_meta: t.List[ModelTestMetadata], + tests: t.Optional[t.List[str]] = None, + patterns: t.Optional[t.List[str]] = None, + ) -> t.List[ModelTestMetadata]: + """Filter pre-loaded test metadata based on tests and patterns.""" + + if tests: + filtered_tests = [] + for test in tests: + if "::" in test: + filename, test_name = test.split("::", maxsplit=1) + filtered_tests.extend( + [ + t + for t in test_meta + if str(t.path) == filename and t.test_name == test_name + ] + ) + else: + filtered_tests.extend([t for t in test_meta if str(t.path) == test]) + test_meta = filtered_tests + + if patterns: + test_meta = filter_tests_by_patterns(test_meta, patterns) + + return test_meta + def _snapshots( self, models_override: t.Optional[UniqueKeyDict[str, Model]] = None ) -> t.Dict[str, Snapshot]: From c1d20dc9ee33502761f29b0fe20b0682a4fd64ca Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Sun, 12 Oct 2025 18:40:18 -0600 Subject: [PATCH 05/10] fixing formatting issues --- sqlmesh/core/context.py | 28 ++++++++++++++-------------- sqlmesh/core/linter/rules/builtin.py | 14 +++++++------- sqlmesh/core/loader.py | 14 +++++++------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 11bf082b30..3f157cc3d0 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -40,13 +40,13 @@ import time import traceback import typing as t -from datetime import datetime from functools import cached_property from io import StringIO from itertools import chain from pathlib import Path from shutil import rmtree from types import MappingProxyType +from datetime import datetime from sqlglot import Dialect, exp from sqlglot.helper import first @@ -63,7 +63,6 @@ ) from sqlmesh.core.config.connection import ConnectionConfig from sqlmesh.core.config.loader import C -from sqlmesh.core.config.model import ModelDefaultsConfig from sqlmesh.core.config.root import RegexKeyDict from sqlmesh.core.console import get_console from sqlmesh.core.context_diff import ContextDiff @@ -77,24 +76,24 @@ ) from sqlmesh.core.engine_adapter import EngineAdapter from sqlmesh.core.environment import Environment, EnvironmentNamingInfo, EnvironmentStatements -from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots +from sqlmesh.core.loader import Loader from sqlmesh.core.linter.definition import AnnotatedRuleViolation, Linter from sqlmesh.core.linter.rules import BUILTIN_RULES -from sqlmesh.core.loader import Loader from sqlmesh.core.macros import ExecutableOrMacro, macro from sqlmesh.core.metric import Metric, rewrite from sqlmesh.core.model import Model, update_model_schemas +from sqlmesh.core.config.model import ModelDefaultsConfig from sqlmesh.core.notification_target import ( NotificationEvent, NotificationTarget, NotificationTargetManager, ) -from sqlmesh.core.plan import Plan, PlanBuilder, PlanExplainer, SnapshotIntervals +from sqlmesh.core.plan import Plan, PlanBuilder, SnapshotIntervals, PlanExplainer from sqlmesh.core.plan.definition import UserProvidedFlags from sqlmesh.core.reference import ReferenceGraph -from sqlmesh.core.scheduler import CompletionStatus, Scheduler +from sqlmesh.core.scheduler import Scheduler, CompletionStatus from sqlmesh.core.schema_loader import create_external_models_file -from sqlmesh.core.selector import NativeSelector, Selector +from sqlmesh.core.selector import Selector, NativeSelector from sqlmesh.core.snapshot import ( DeployabilityIndex, Snapshot, @@ -109,36 +108,37 @@ StateReader, StateSync, ) +from sqlmesh.core.janitor import cleanup_expired_views, delete_expired_snapshots from sqlmesh.core.table_diff import TableDiff from sqlmesh.core.test import ( - ModelTestMetadata, ModelTextTestResult, - filter_tests_by_patterns, + ModelTestMetadata, generate_test, run_tests, + filter_tests_by_patterns, ) from sqlmesh.core.user import User from sqlmesh.utils import UniqueKeyDict, Verbosity from sqlmesh.utils.concurrency import concurrent_apply_to_values -from sqlmesh.utils.config import print_config from sqlmesh.utils.dag import DAG from sqlmesh.utils.date import ( TimeLike, + to_timestamp, format_tz_datetime, - make_exclusive, - now, now_timestamp, + now, to_datetime, - to_timestamp, + make_exclusive, ) from sqlmesh.utils.errors import ( CircuitBreakerError, ConfigError, - LinterError, PlanError, SQLMeshError, UncategorizedPlanError, + LinterError, ) +from sqlmesh.utils.config import print_config from sqlmesh.utils.jinja import JinjaMacroRegistry from sqlmesh.utils.windows import IS_WINDOWS, fix_windows_path diff --git a/sqlmesh/core/linter/rules/builtin.py b/sqlmesh/core/linter/rules/builtin.py index 28b810fca0..5058f3a58a 100644 --- a/sqlmesh/core/linter/rules/builtin.py +++ b/sqlmesh/core/linter/rules/builtin.py @@ -9,23 +9,23 @@ from sqlmesh.core.constants import EXTERNAL_MODELS_YAML from sqlmesh.core.dialect import normalize_model_name -from sqlmesh.core.linter.definition import RuleSet from sqlmesh.core.linter.helpers import ( TokenPositionDetails, get_range_of_model_block, read_range_from_string, ) from sqlmesh.core.linter.rule import ( - CreateFile, - Fix, - Position, - Range, Rule, RuleViolation, + Range, + Fix, TextEdit, + Position, + CreateFile, ) -from sqlmesh.core.model import ExternalModel, Model, SqlModel -from sqlmesh.utils.lineage import ExternalModelReference, extract_references_from_query +from sqlmesh.core.linter.definition import RuleSet +from sqlmesh.core.model import Model, SqlModel, ExternalModel +from sqlmesh.utils.lineage import extract_references_from_query, ExternalModelReference class NoSelectStar(Rule): diff --git a/sqlmesh/core/loader.py b/sqlmesh/core/loader.py index 4145351f9e..dc93e7be8d 100644 --- a/sqlmesh/core/loader.py +++ b/sqlmesh/core/loader.py @@ -1,7 +1,6 @@ from __future__ import annotations import abc -import concurrent.futures import glob import itertools import linecache @@ -11,10 +10,11 @@ from collections import Counter, defaultdict from dataclasses import dataclass from pathlib import Path - from pydantic import ValidationError -from sqlglot import exp +import concurrent.futures + from sqlglot.errors import SqlglotError +from sqlglot import exp from sqlglot.helper import subclasses from sqlmesh.core import constants as c @@ -22,8 +22,8 @@ from sqlmesh.core.console import Console from sqlmesh.core.dialect import parse from sqlmesh.core.environment import EnvironmentStatements -from sqlmesh.core.linter.definition import RuleSet from sqlmesh.core.linter.rule import Rule +from sqlmesh.core.linter.definition import RuleSet from sqlmesh.core.macros import MacroRegistry, macro from sqlmesh.core.metric import Metric, MetricMeta, expand_metrics, load_metric_ddl from sqlmesh.core.model import ( @@ -40,10 +40,10 @@ from sqlmesh.utils.errors import ConfigError from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroExtractor from sqlmesh.utils.metaprogramming import import_python_file -from sqlmesh.utils.process import create_process_pool_executor from sqlmesh.utils.pydantic import validation_error_message -from sqlmesh.utils.yaml import YAML -from sqlmesh.utils.yaml import load as yaml_load +from sqlmesh.utils.process import create_process_pool_executor +from sqlmesh.utils.yaml import YAML, load as yaml_load + if t.TYPE_CHECKING: from sqlmesh.core.context import GenericContext From c09d2a8a5322b422fe99c6d19dfdfc79c4716a6c Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Sun, 12 Oct 2025 18:55:09 -0600 Subject: [PATCH 06/10] removing redundant declare --- sqlmesh/core/context.py | 3 +-- sqlmesh/core/loader.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 3f157cc3d0..68bec8705b 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -2230,9 +2230,8 @@ def test( pd.set_option("display.max_columns", None) - loaded_test_meta = self._model_test_metadata test_meta = self._filter_preloaded_tests( - test_meta=loaded_test_meta, tests=tests, patterns=match_patterns + test_meta=self._model_test_metadata, tests=tests, patterns=match_patterns ) result = run_tests( diff --git a/sqlmesh/core/loader.py b/sqlmesh/core/loader.py index dc93e7be8d..32c13de215 100644 --- a/sqlmesh/core/loader.py +++ b/sqlmesh/core/loader.py @@ -251,8 +251,6 @@ def load(self) -> LoadedProject: model_test_metadata.model_name for model_test_metadata in model_test_metadata } - self._models_with_tests = models_with_tests - project = LoadedProject( macros=macros, jinja_macros=jinja_macros, From 795ac64e6f03ad008f6d8282a941bf7838fb481b Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Fri, 14 Nov 2025 20:22:46 -0700 Subject: [PATCH 07/10] formatting --- tests/core/test_test.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/core/test_test.py b/tests/core/test_test.py index c5df73d810..5c301fd816 100644 --- a/tests/core/test_test.py +++ b/tests/core/test_test.py @@ -1539,6 +1539,9 @@ def test_gateway(copy_to_temp_path: t.Callable, mocker: MockerFixture) -> None: with open(test_path, "w", encoding="utf-8") as file: dump_yaml(test_dict, file) + # Re-initialize context to pick up the modified test file + context = Context(paths=path, config=config) + spy_execute = mocker.spy(EngineAdapter, "_execute") mocker.patch("sqlmesh.core.test.definition.random_id", return_value="jzngz56a") @@ -2448,6 +2451,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: copy_test_file(original_test_file, tmp_path / "tests" / f"test_success_{i}.yaml", i) copy_test_file(new_test_file, tmp_path / "tests" / f"test_failure_{i}.yaml", i) + # Re-initialize context to pick up the new test files + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() @@ -2549,6 +2555,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: """ ) + # Re-initialize context to pick up the modified test file + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() @@ -3472,6 +3481,9 @@ def test_cte_failure(tmp_path: Path) -> None: """ ) + # Re-initialize context to pick up the new test file + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() @@ -3498,6 +3510,9 @@ def test_cte_failure(tmp_path: Path) -> None: """ ) + # Re-initialize context to pick up the modified test file + context = Context(paths=tmp_path, config=config) + with capture_output() as captured_output: context.test() From 0c2787578bd4a52f2e4002727138be7115619df0 Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Fri, 14 Nov 2025 20:34:59 -0700 Subject: [PATCH 08/10] missed clears and adjusting tests --- sqlmesh/core/context.py | 2 ++ tests/core/test_test.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 68bec8705b..85444d5083 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -639,6 +639,8 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: self._excluded_requirements.clear() self._linters.clear() self._environment_statements = [] + self._models_with_tests.clear() + self._model_test_metadata.clear() for loader, project in zip(self._loaders, loaded_projects): self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) diff --git a/tests/core/test_test.py b/tests/core/test_test.py index 5c301fd816..43d0f333c3 100644 --- a/tests/core/test_test.py +++ b/tests/core/test_test.py @@ -2469,13 +2469,12 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: "SELECT 1 AS col_1, 2 AS col_2, 3 AS col_3, 4 AS col_4, 5 AS col_5, 6 AS col_6, 7 AS col_7" ) - context.upsert_model( - _create_model( - meta="MODEL(name test.test_wide_model)", - query=wide_model_query, - default_catalog=context.default_catalog, - ) + wide_model = _create_model( + meta="MODEL(name test.test_wide_model)", + query=wide_model_query, + default_catalog=context.default_catalog, ) + context.upsert_model(wide_model) tests_dir = tmp_path / "tests" tests_dir.mkdir() @@ -2499,6 +2498,9 @@ def copy_test_file(test_file: Path, new_test_file: Path, index: int) -> None: wide_test_file.write_text(wide_test_file_content) + context.load() + context.upsert_model(wide_model) + with capture_output() as captured_output: context.test() From f18f7e4c06f6b63e152249a17a0acb8265b90cfb Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Fri, 14 Nov 2025 21:13:11 -0700 Subject: [PATCH 09/10] switching to path to try and solve windows issue --- sqlmesh/core/context.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 85444d5083..99d76f30c8 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -2809,15 +2809,13 @@ def _filter_preloaded_tests( for test in tests: if "::" in test: filename, test_name = test.split("::", maxsplit=1) + test_path = Path(filename) filtered_tests.extend( - [ - t - for t in test_meta - if str(t.path) == filename and t.test_name == test_name - ] + [t for t in test_meta if t.path == test_path and t.test_name == test_name] ) else: - filtered_tests.extend([t for t in test_meta if str(t.path) == test]) + test_path = Path(test) + filtered_tests.extend([t for t in test_meta if t.path == test_path]) test_meta = filtered_tests if patterns: From 638999a8a9580d635293ab3cbaf7a358c6b95bb0 Mon Sep 17 00:00:00 2001 From: Cortland Goffena <30168413+cmgoffena13@users.noreply.github.com> Date: Thu, 20 Nov 2025 16:47:18 -0700 Subject: [PATCH 10/10] optimizing lookups --- sqlmesh/core/context.py | 33 ++++++++++++++++++++++----------- sqlmesh/core/loader.py | 6 ------ sqlmesh/core/test/discovery.py | 2 +- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 99d76f30c8..92ffdef7d2 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -399,8 +399,10 @@ def __init__( self._standalone_audits: UniqueKeyDict[str, StandaloneAudit] = UniqueKeyDict( "standaloneaudits" ) - self._models_with_tests: t.Set[str] = set() self._model_test_metadata: t.List[ModelTestMetadata] = [] + self._model_test_metadata_path_index: t.Dict[Path, t.List[ModelTestMetadata]] = {} + self._model_test_metadata_fully_qualified_name_index: t.Dict[str, ModelTestMetadata] = {} + self._models_with_tests: t.Set[str] = set() self._macros: UniqueKeyDict[str, ExecutableOrMacro] = UniqueKeyDict("macros") self._metrics: UniqueKeyDict[str, Metric] = UniqueKeyDict("metrics") self._jinja_macros = JinjaMacroRegistry() @@ -639,8 +641,10 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: self._excluded_requirements.clear() self._linters.clear() self._environment_statements = [] - self._models_with_tests.clear() self._model_test_metadata.clear() + self._model_test_metadata_path_index.clear() + self._model_test_metadata_fully_qualified_name_index.clear() + self._models_with_tests.clear() for loader, project in zip(self._loaders, loaded_projects): self._jinja_macros = self._jinja_macros.merge(project.jinja_macros) @@ -652,8 +656,15 @@ def load(self, update_schemas: bool = True) -> GenericContext[C]: self._requirements.update(project.requirements) self._excluded_requirements.update(project.excluded_requirements) self._environment_statements.extend(project.environment_statements) - self._models_with_tests.update(project.models_with_tests) self._model_test_metadata.extend(project.model_test_metadata) + for metadata in project.model_test_metadata: + if metadata.path not in self._model_test_metadata_path_index: + self._model_test_metadata_path_index[metadata.path] = [] + self._model_test_metadata_path_index[metadata.path].append(metadata) + self._model_test_metadata_fully_qualified_name_index[ + metadata.fully_qualified_test_name + ] = metadata + self._models_with_tests.add(metadata.model_name) config = loader.config self._linters[config.project] = Linter.from_rules( @@ -2232,7 +2243,7 @@ def test( pd.set_option("display.max_columns", None) - test_meta = self._filter_preloaded_tests( + test_meta = self._select_tests( test_meta=self._model_test_metadata, tests=tests, patterns=match_patterns ) @@ -2796,7 +2807,7 @@ def _get_engine_adapter(self, gateway: t.Optional[str] = None) -> EngineAdapter: raise SQLMeshError(f"Gateway '{gateway}' not found in the available engine adapters.") return self.engine_adapter - def _filter_preloaded_tests( + def _select_tests( self, test_meta: t.List[ModelTestMetadata], tests: t.Optional[t.List[str]] = None, @@ -2808,14 +2819,14 @@ def _filter_preloaded_tests( filtered_tests = [] for test in tests: if "::" in test: - filename, test_name = test.split("::", maxsplit=1) - test_path = Path(filename) - filtered_tests.extend( - [t for t in test_meta if t.path == test_path and t.test_name == test_name] - ) + if test in self._model_test_metadata_fully_qualified_name_index: + filtered_tests.append( + self._model_test_metadata_fully_qualified_name_index[test] + ) else: test_path = Path(test) - filtered_tests.extend([t for t in test_meta if t.path == test_path]) + if test_path in self._model_test_metadata_path_index: + filtered_tests.extend(self._model_test_metadata_path_index[test_path]) test_meta = filtered_tests if patterns: diff --git a/sqlmesh/core/loader.py b/sqlmesh/core/loader.py index 32c13de215..fda35ca75c 100644 --- a/sqlmesh/core/loader.py +++ b/sqlmesh/core/loader.py @@ -65,7 +65,6 @@ class LoadedProject: environment_statements: t.List[EnvironmentStatements] user_rules: RuleSet model_test_metadata: t.List[ModelTestMetadata] - models_with_tests: t.Set[str] class CacheBase(abc.ABC): @@ -247,10 +246,6 @@ def load(self) -> LoadedProject: model_test_metadata = self.load_model_tests() - models_with_tests = { - model_test_metadata.model_name for model_test_metadata in model_test_metadata - } - project = LoadedProject( macros=macros, jinja_macros=jinja_macros, @@ -263,7 +258,6 @@ def load(self) -> LoadedProject: environment_statements=environment_statements, user_rules=user_rules, model_test_metadata=model_test_metadata, - models_with_tests=models_with_tests, ) return project diff --git a/sqlmesh/core/test/discovery.py b/sqlmesh/core/test/discovery.py index 5e2452b570..9afe3dd7fc 100644 --- a/sqlmesh/core/test/discovery.py +++ b/sqlmesh/core/test/discovery.py @@ -22,7 +22,7 @@ def fully_qualified_test_name(self) -> str: @property def model_name(self) -> str: - return self.body["model"] + return self.body.get("model", "") def __hash__(self) -> int: return self.fully_qualified_test_name.__hash__()