Skip to content

Commit 9326a3c

Browse files
authored
Feat: make automatic python dependency inference opt-out (#4575)
1 parent 989ecb2 commit 9326a3c

6 files changed

Lines changed: 57 additions & 21 deletions

File tree

docs/reference/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Configuration options for SQLMesh environment creation and promotion.
3636
| `physical_schema_mapping` | A mapping from regular expressions to names of schemas in which physical tables for the corresponding models [will be placed](../guides/configuration.md#physical-table-schemas). (Default physical schema name: `sqlmesh__[model schema]`) | dict[string, string] | N |
3737
| `environment_suffix_target` | Whether SQLMesh views should append their environment name to the `schema` or `table` - [additional details](../guides/configuration.md#view-schema-override). (Default: `schema`) | string | N |
3838
| `gateway_managed_virtual_layer` | Whether SQLMesh views of the virtual layer will be created by the default gateway or model specified gateways - [additional details](../guides/multi_engine.md#gateway-managed-virtual-layer). (Default: False) | boolean | N |
39+
| `infer_python_dependencies` | Whether SQLMesh will statically analyze Python code to automatically infer Python package requirements. (Default: True) | boolean | N |
3940
| `environment_catalog_mapping` | A mapping from regular expressions to catalog names. The catalog name is used to determine the target catalog for a given environment. | dict[string, string] | N |
4041
| `log_limit` | The default number of logs to keep (Default: `20`) | int | N |
4142

sqlmesh/core/config/root.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class Config(BaseConfig):
107107
physical_schema_mapping: A mapping from regular expressions to names of schemas in which physical tables for corresponding models will be placed.
108108
environment_suffix_target: Indicates whether to append the environment name to the schema or table name.
109109
gateway_managed_virtual_layer: Whether the models' views in the virtual layer are created by the model-specific gateway rather than the default gateway.
110+
infer_python_dependencies: Whether to statically analyze Python code to automatically infer Python package requirements.
110111
environment_catalog_mapping: A mapping from regular expressions to catalog names. The catalog name is used to determine the target catalog for a given environment.
111112
default_target_environment: The name of the environment that will be the default target for the `sqlmesh plan` and `sqlmesh run` commands.
112113
log_limit: The default number of logs to keep.
@@ -146,6 +147,7 @@ class Config(BaseConfig):
146147
default=EnvironmentSuffixTarget.default
147148
)
148149
gateway_managed_virtual_layer: bool = False
150+
infer_python_dependencies: bool = True
149151
environment_catalog_mapping: RegexKeyDict = {}
150152
default_target_environment: str = c.PROD
151153
log_limit: int = c.DEFAULT_LOG_LIMIT

sqlmesh/core/context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,6 @@ def __init__(
388388
self.pinned_environments = Environment.sanitize_names(self.config.pinned_environments)
389389
self.auto_categorize_changes = self.config.plan.auto_categorize_changes
390390
self.selected_gateway = gateway or self.config.default_gateway_name
391-
self.gateway_managed_virtual_layer = self.config.gateway_managed_virtual_layer
392391

393392
gw_model_defaults = self.config.gateways[self.selected_gateway].model_defaults
394393
if gw_model_defaults:
@@ -2617,7 +2616,8 @@ def _context_diff(
26172616
ensure_finalized_snapshots=ensure_finalized_snapshots,
26182617
diff_rendered=diff_rendered,
26192618
environment_statements=self._environment_statements,
2620-
gateway_managed_virtual_layer=self.gateway_managed_virtual_layer,
2619+
gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer,
2620+
infer_python_dependencies=self.config.infer_python_dependencies,
26212621
)
26222622

26232623
def _destroy(self) -> None:

sqlmesh/core/context_diff.py

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def create(
102102
diff_rendered: bool = False,
103103
environment_statements: t.Optional[t.List[EnvironmentStatements]] = [],
104104
gateway_managed_virtual_layer: bool = False,
105+
infer_python_dependencies: bool = True,
105106
) -> ContextDiff:
106107
"""Create a ContextDiff object.
107108
@@ -116,6 +117,12 @@ def create(
116117
the environment is not finalized.
117118
provided_requirements: Python dependencies sourced from the lock file.
118119
excluded_requirements: Python dependencies to exclude.
120+
diff_rendered: Whether to compute the diff of the rendered version of the compared expressions.
121+
environment_statements: A list of `before_all` or `after_all` statements associated with the environment.
122+
gateway_managed_virtual_layer: Whether the models' views in the virtual layer are created by the
123+
model-specific gateway rather than the default gateway.
124+
infer_python_dependencies: Whether to statically analyze Python code to automatically infer Python
125+
package requirements.
119126
120127
Returns:
121128
The ContextDiff object.
@@ -208,6 +215,7 @@ def create(
208215
provided_requirements or {},
209216
excluded_requirements or set(),
210217
snapshots.values(),
218+
infer_python_dependencies=infer_python_dependencies,
211219
)
212220

213221
previous_environment_statements = state_reader.get_environment_statements(environment)
@@ -475,29 +483,41 @@ def _build_requirements(
475483
provided_requirements: t.Dict[str, str],
476484
excluded_requirements: t.Set[str],
477485
snapshots: t.Collection[Snapshot],
486+
infer_python_dependencies: bool = True,
478487
) -> t.Dict[str, str]:
479488
requirements = {
480489
k: v for k, v in provided_requirements.items() if k not in excluded_requirements
481490
}
491+
492+
if not infer_python_dependencies:
493+
return requirements
494+
482495
distributions = metadata.packages_distributions()
483496

484497
for snapshot in snapshots:
485-
if snapshot.is_model:
486-
for executable in snapshot.model.python_env.values():
487-
if executable.kind == "import":
488-
try:
489-
start = "from " if executable.payload.startswith("from ") else "import "
490-
lib = executable.payload.split(start)[1].split()[0].split(".")[0]
491-
if lib in distributions:
492-
for dist in distributions[lib]:
493-
if (
494-
dist not in requirements
495-
and dist not in IGNORED_PACKAGES
496-
and dist not in excluded_requirements
497-
):
498-
requirements[dist] = metadata.version(dist)
499-
except metadata.PackageNotFoundError:
500-
from sqlmesh.core.console import get_console
501-
502-
get_console().log_warning(f"Failed to find package for {lib}.")
498+
if not snapshot.is_model:
499+
continue
500+
501+
for executable in snapshot.model.python_env.values():
502+
if executable.kind != "import":
503+
continue
504+
505+
try:
506+
start = "from " if executable.payload.startswith("from ") else "import "
507+
lib = executable.payload.split(start)[1].split()[0].split(".")[0]
508+
if lib not in distributions:
509+
continue
510+
511+
for dist in distributions[lib]:
512+
if (
513+
dist not in requirements
514+
and dist not in IGNORED_PACKAGES
515+
and dist not in excluded_requirements
516+
):
517+
requirements[dist] = metadata.version(dist)
518+
except metadata.PackageNotFoundError:
519+
from sqlmesh.core.console import get_console
520+
521+
get_console().log_warning(f"Failed to find package for {lib}.")
522+
503523
return requirements

tests/core/test_context.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,19 @@ def test_requirements(copy_to_temp_path: t.Callable):
12401240
assert set(diff.requirements) == {"numpy", "pandas"}
12411241

12421242

1243+
def test_deactivate_automatic_requirement_inference(copy_to_temp_path: t.Callable):
1244+
context_path = copy_to_temp_path("examples/sushi")[0]
1245+
config = next(iter(load_configs("config", Config, paths=context_path).values()))
1246+
1247+
config.infer_python_dependencies = False
1248+
context = Context(paths=context_path, config=config)
1249+
environment = context.plan(
1250+
"dev", no_prompts=True, skip_tests=True, skip_backfill=True, auto_apply=True
1251+
).environment
1252+
1253+
assert environment.requirements == {"pandas": "2.2.2"}
1254+
1255+
12431256
@pytest.mark.slow
12441257
def test_rendered_diff():
12451258
ctx = Context(config=Config())

tests/core/test_integration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5050,7 +5050,7 @@ def test_multi_virtual_layer(copy_to_temp_path):
50505050
assert len(prod_environment.snapshots_) == 3
50515051

50525052
# Changing the flag should show a diff
5053-
context.gateway_managed_virtual_layer = False
5053+
context.config.gateway_managed_virtual_layer = False
50545054
plan = context.plan_builder().build()
50555055
assert not plan.requires_backfill
50565056
assert (

0 commit comments

Comments
 (0)