@@ -10662,3 +10662,125 @@ def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue):
1066210662 assert len (model_a .intervals )
1066310663
1066410664 set_console (orig_console )
10665+
10666+
10667+ def test_seed_model_metadata_update_sets_forward_only (tmp_path : Path ):
10668+ """
10669+ Scenario:
10670+ - Create a seed model; perform initial population
10671+ - Modify the model with a metadata-only change and trigger a plan
10672+
10673+ Outcome:
10674+ - The seed model is modified (metadata-only) but this should NOT trigger backfill
10675+ - To prevent backfill, the categorizer needs to set forward_only on the seed
10676+ """
10677+
10678+ models_path = tmp_path / "models"
10679+ seeds_path = tmp_path / "seeds"
10680+ models_path .mkdir ()
10681+ seeds_path .mkdir ()
10682+
10683+ seed_model_path = models_path / "seed.sql"
10684+ seed_path = seeds_path / "seed_data.csv"
10685+
10686+ seed_path .write_text ("\n " .join (["id,name" , "1,test" ]))
10687+
10688+ seed_model_path .write_text ("""
10689+ MODEL (
10690+ name test.source_data,
10691+ kind SEED (
10692+ path '../seeds/seed_data.csv'
10693+ )
10694+ );
10695+ """ )
10696+
10697+ config = Config (
10698+ gateways = {"" : GatewayConfig (connection = DuckDBConnectionConfig ())},
10699+ model_defaults = ModelDefaultsConfig (dialect = "duckdb" , start = "2024-01-01" ),
10700+ )
10701+ ctx = Context (paths = tmp_path , config = config )
10702+
10703+ plan = ctx .plan (auto_apply = True )
10704+
10705+ original_seed_snapshot = ctx .snapshots ['"memory"."test"."source_data"' ]
10706+ assert not original_seed_snapshot .forward_only
10707+ assert plan .directly_modified == {original_seed_snapshot .snapshot_id }
10708+ assert plan .metadata_updated == set ()
10709+ assert plan .missing_intervals
10710+
10711+ # prove data loaded
10712+ assert ctx .engine_adapter .fetchall ("select id, name from memory.test.source_data" ) == [
10713+ (1 , "test" )
10714+ ]
10715+
10716+ # prove no diff
10717+ ctx .load ()
10718+ plan = ctx .plan (auto_apply = True )
10719+ assert not plan .has_changes
10720+ assert not plan .missing_intervals
10721+
10722+ # make metadata-only change
10723+ seed_model_path .write_text ("""
10724+ MODEL (
10725+ name test.source_data,
10726+ kind SEED (
10727+ path '../seeds/seed_data.csv'
10728+ ),
10729+ description 'updated by test'
10730+ );
10731+ """ )
10732+
10733+ ctx .load ()
10734+ plan = ctx .plan (auto_apply = True )
10735+ assert plan .has_changes
10736+
10737+ new_seed_snapshot = ctx .snapshots ['"memory"."test"."source_data"' ]
10738+ assert (
10739+ new_seed_snapshot .forward_only
10740+ ) # change needs to be applied as forward-only to prevent backfill
10741+ assert (
10742+ new_seed_snapshot .version == original_seed_snapshot .version
10743+ ) # should be using the same physical table
10744+ assert (
10745+ new_seed_snapshot .snapshot_id != original_seed_snapshot .snapshot_id
10746+ ) # but still be different due to the metadata change
10747+ assert plan .directly_modified == set ()
10748+ assert plan .metadata_updated == {new_seed_snapshot .snapshot_id }
10749+
10750+ # there should be no missing intervals to backfill since all we did is update a description
10751+ assert not plan .missing_intervals
10752+
10753+ # there should still be no diff or missing intervals in 3 days time
10754+ assert new_seed_snapshot .model .interval_unit .is_day
10755+ with time_machine .travel (timedelta (days = 3 )):
10756+ ctx .clear_caches ()
10757+ ctx .load ()
10758+ plan = ctx .plan (auto_apply = True )
10759+ assert not plan .has_changes
10760+ assert not plan .missing_intervals
10761+
10762+ # change seed data
10763+ seed_path .write_text ("\n " .join (["id,name" , "1,test" , "2,updated" ]))
10764+
10765+ # new plan - NOW we should backfill because data changed
10766+ ctx .load ()
10767+ plan = ctx .plan (auto_apply = True )
10768+ assert plan .has_changes
10769+
10770+ updated_seed_snapshot = ctx .snapshots ['"memory"."test"."source_data"' ]
10771+
10772+ assert (
10773+ updated_seed_snapshot .snapshot_id
10774+ != new_seed_snapshot .snapshot_id
10775+ != original_seed_snapshot .snapshot_id
10776+ )
10777+ assert not updated_seed_snapshot .forward_only
10778+ assert plan .directly_modified == {updated_seed_snapshot .snapshot_id }
10779+ assert plan .metadata_updated == set ()
10780+ assert plan .missing_intervals
10781+
10782+ # prove backfilled data loaded
10783+ assert ctx .engine_adapter .fetchall ("select id, name from memory.test.source_data" ) == [
10784+ (1 , "test" ),
10785+ (2 , "updated" ),
10786+ ]
0 commit comments