Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sqlmesh/core/plan/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ def should_force_rebuild(old: Snapshot, new: Snapshot) -> bool:
if new.is_view and new.is_indirect_non_breaking and not new.is_forward_only:
# View models always need to be rebuilt to reflect updated upstream dependencies
return True
if new.is_seed:
if new.is_seed and not new.is_metadata:
# Seed models always need to be rebuilt to reflect changes in the seed file
# Unless only their metadata has been updated (eg description added) and the seed file has not been touched
return True
return is_breaking_kind_change(old, new)

Expand Down
118 changes: 118 additions & 0 deletions tests/core/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10662,3 +10662,121 @@ def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue):
assert len(model_a.intervals)

set_console(orig_console)


def test_seed_model_metadata_update_does_not_trigger_backfill(tmp_path: Path):
"""
Scenario:
- Create a seed model; perform initial population
- Modify the model with a metadata-only change and trigger a plan

Outcome:
- The seed model is modified (metadata-only) but this should NOT trigger backfill
- There should be no missing_intervals on the plan to backfill
"""

models_path = tmp_path / "models"
seeds_path = tmp_path / "seeds"
models_path.mkdir()
seeds_path.mkdir()

seed_model_path = models_path / "seed.sql"
seed_path = seeds_path / "seed_data.csv"

seed_path.write_text("\n".join(["id,name", "1,test"]))

seed_model_path.write_text("""
MODEL (
name test.source_data,
kind SEED (
path '../seeds/seed_data.csv'
)
);
""")

config = Config(
gateways={"": GatewayConfig(connection=DuckDBConnectionConfig())},
model_defaults=ModelDefaultsConfig(dialect="duckdb", start="2024-01-01"),
)
ctx = Context(paths=tmp_path, config=config)

plan = ctx.plan(auto_apply=True)

original_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"']
assert plan.directly_modified == {original_seed_snapshot.snapshot_id}
assert plan.metadata_updated == set()
assert plan.missing_intervals

# prove data loaded
assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [
(1, "test")
]

# prove no diff
ctx.load()
plan = ctx.plan(auto_apply=True)
assert not plan.has_changes
assert not plan.missing_intervals

# make metadata-only change
seed_model_path.write_text("""
MODEL (
name test.source_data,
kind SEED (
path '../seeds/seed_data.csv'
),
description 'updated by test'
);
""")

ctx.load()
plan = ctx.plan(auto_apply=True)
assert plan.has_changes

new_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"']
assert (
new_seed_snapshot.version == original_seed_snapshot.version
) # should be using the same physical table
assert (
new_seed_snapshot.snapshot_id != original_seed_snapshot.snapshot_id
) # but still be different due to the metadata change
assert plan.directly_modified == set()
assert plan.metadata_updated == {new_seed_snapshot.snapshot_id}

# there should be no missing intervals to backfill since all we did is update a description
assert not plan.missing_intervals

# there should still be no diff or missing intervals in 3 days time
assert new_seed_snapshot.model.interval_unit.is_day
with time_machine.travel(timedelta(days=3)):
ctx.clear_caches()
ctx.load()
plan = ctx.plan(auto_apply=True)
assert not plan.has_changes
assert not plan.missing_intervals

# change seed data
seed_path.write_text("\n".join(["id,name", "1,test", "2,updated"]))

# new plan - NOW we should backfill because data changed
ctx.load()
plan = ctx.plan(auto_apply=True)
assert plan.has_changes

updated_seed_snapshot = ctx.snapshots['"memory"."test"."source_data"']

assert (
updated_seed_snapshot.snapshot_id
!= new_seed_snapshot.snapshot_id
!= original_seed_snapshot.snapshot_id
)
assert not updated_seed_snapshot.forward_only
assert plan.directly_modified == {updated_seed_snapshot.snapshot_id}
assert plan.metadata_updated == set()
assert plan.missing_intervals

# prove backfilled data loaded
assert ctx.engine_adapter.fetchall("select id, name from memory.test.source_data") == [
(1, "test"),
(2, "updated"),
]
59 changes: 59 additions & 0 deletions tests/core/test_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,65 @@ def test_forward_only_plan_seed_models(make_snapshot, mocker: MockerFixture):
assert not snapshot_a_updated.is_forward_only


def test_seed_model_metadata_change_no_missing_intervals(
make_snapshot: t.Callable[..., Snapshot],
):
snapshot_a = make_snapshot(
SeedModel(
name="a",
kind=SeedKind(path="./path/to/seed"),
seed=Seed(content="content"),
column_hashes={"col": "hash1"},
depends_on=set(),
)
)
snapshot_a.categorize_as(SnapshotChangeCategory.BREAKING)
snapshot_a.add_interval("2022-01-01", now())

snapshot_a_metadata_updated = make_snapshot(
SeedModel(
name="a",
kind=SeedKind(path="./path/to/seed"),
seed=Seed(content="content"),
column_hashes={"col": "hash1"},
depends_on=set(),
description="foo",
)
)
assert snapshot_a_metadata_updated.version is None
assert snapshot_a_metadata_updated.change_category is None

context_diff = ContextDiff(
environment="prod",
is_new_environment=True,
is_unfinalized_environment=False,
normalize_environment_name=True,
create_from="prod",
create_from_env_exists=True,
added=set(),
removed_snapshots={},
modified_snapshots={
snapshot_a_metadata_updated.name: (snapshot_a_metadata_updated, snapshot_a)
},
snapshots={snapshot_a_metadata_updated.snapshot_id: snapshot_a_metadata_updated},
new_snapshots={snapshot_a_metadata_updated.snapshot_id: snapshot_a},
previous_plan_id=None,
previously_promoted_snapshot_ids=set(),
previous_finalized_snapshots=None,
previous_gateway_managed_virtual_layer=False,
gateway_managed_virtual_layer=False,
environment_statements=[],
)

plan = PlanBuilder(context_diff).build()
assert snapshot_a_metadata_updated.change_category == SnapshotChangeCategory.METADATA
assert not snapshot_a_metadata_updated.is_forward_only
assert not plan.missing_intervals # plan should have no missing intervals
assert (
snapshot_a_metadata_updated.intervals == snapshot_a.intervals
) # intervals should have been copied


def test_start_inference(make_snapshot, mocker: MockerFixture):
snapshot_a = make_snapshot(
SqlModel(name="a", query=parse_one("select 1, ds"), start="2022-01-01")
Expand Down