2424import sqlmesh .core .dialect as d
2525from sqlmesh .core .dialect import select_from_values
2626from sqlmesh .core .model import Model , load_sql_based_model
27- from sqlmesh .core .engine_adapter import EngineAdapter
2827from sqlmesh .core .engine_adapter .shared import DataObject , DataObjectType
2928from sqlmesh .core .engine_adapter .mixins import RowDiffMixin , LogicalMergeMixin
3029from sqlmesh .core .model .definition import create_sql_model
@@ -2816,19 +2815,28 @@ def test_janitor_drops_downstream_unexpired_hard_dependencies(
28162815 - In dev, we modify A - triggers new version of A and a dev preview of B that both expire in 7 days
28172816 - We advance time by 3 days
28182817 - In dev, we modify B - triggers a new version of B that depends on A but expires 3 days after A
2819- - We advance time by 5 days so that A has reached its expiry but B has not
2818+ - In dev, we create B(view) <- C(view) and B(view) <- D(table)
2819+ - We advance time by 5 days so that A has reached its expiry but B, C and D have not
28202820 - We expire dev so that none of these snapshots are promoted and are thus targets for cleanup
28212821 - We run the janitor
28222822
28232823 Expected outcome:
28242824 - All the dev versions of A and B should be dropped
2825+ - C should be dropped as well because it's a view that depends on B which was dropped
2826+ - D should not be dropped because while it depends on B which was dropped, it's a table so is still valid after B is dropped
28252827 - We should not get a 'ERROR: cannot drop table x because other objects depend on it' on engines that do schema binding
28262828 """
28272829
2828- def _state_sync_engine_adapter (context : Context ) -> EngineAdapter :
2830+ def _all_snapshot_ids (context : Context ) -> t . List [ SnapshotId ] :
28292831 assert isinstance (context .state_sync , CachingStateSync )
28302832 assert isinstance (context .state_sync .state_sync , EngineAdapterStateSync )
2831- return context .state_sync .state_sync .engine_adapter
2833+
2834+ return [
2835+ SnapshotId (name = name , identifier = identifier )
2836+ for name , identifier in context .state_sync .state_sync .engine_adapter .fetchall (
2837+ "select name, identifier from sqlmesh._snapshots"
2838+ )
2839+ ]
28322840
28332841 models_dir = tmp_path / "models"
28342842 models_dir .mkdir ()
@@ -2898,10 +2906,10 @@ def _mutate_config(gateway: str, config: Config):
28982906
28992907 # should now have 4 snapshots in state - 2x model a and 2x model b
29002908 # the new model b is a dev preview because its upstream model changed
2901- assert (
2902- len (_state_sync_engine_adapter ( sqlmesh ). fetchall ( f"select * from sqlmesh._snapshots" ))
2903- == 4
2904- )
2909+ all_snapshot_ids = _all_snapshot_ids ( sqlmesh )
2910+ assert len (all_snapshot_ids ) == 4
2911+ assert len ([ s for s in all_snapshot_ids if "model_a" in s . name ]) == 2
2912+ assert len ([ s for s in all_snapshot_ids if "model_b" in s . name ]) == 2
29052913
29062914 # context just has the two latest
29072915 assert len (sqlmesh .snapshots ) == 2
@@ -2930,19 +2938,39 @@ def _mutate_config(gateway: str, config: Config):
29302938 SELECT a, 'b' as b from { schema } .model_a;
29312939 """ )
29322940
2941+ (models_dir / "model_c.sql" ).write_text (f"""
2942+ MODEL (
2943+ name { schema } .model_c,
2944+ kind VIEW
2945+ );
2946+
2947+ SELECT a, 'c' as c from { schema } .model_b;
2948+ """ )
2949+
2950+ (models_dir / "model_d.sql" ).write_text (f"""
2951+ MODEL (
2952+ name { schema } .model_d,
2953+ kind FULL
2954+ );
2955+
2956+ SELECT a, 'd' as d from { schema } .model_b;
2957+ """ )
2958+
29332959 sqlmesh = ctx .create_context (
29342960 path = tmp_path , config_mutator = _mutate_config , ephemeral_state_connection = False
29352961 )
29362962 sqlmesh .plan (environment = "dev" , auto_apply = True )
29372963
2938- # should now have 5 snapshots in state - 2x model a and 3x model b
2939- assert (
2940- len (_state_sync_engine_adapter (sqlmesh ).fetchall (f"select * from sqlmesh._snapshots" ))
2941- == 5
2942- )
2964+ # should now have 7 snapshots in state - 2x model a, 3x model b, 1x model c and 1x model d
2965+ all_snapshot_ids = _all_snapshot_ids (sqlmesh )
2966+ assert len (all_snapshot_ids ) == 7
2967+ assert len ([s for s in all_snapshot_ids if "model_a" in s .name ]) == 2
2968+ assert len ([s for s in all_snapshot_ids if "model_b" in s .name ]) == 3
2969+ assert len ([s for s in all_snapshot_ids if "model_c" in s .name ]) == 1
2970+ assert len ([s for s in all_snapshot_ids if "model_d" in s .name ]) == 1
29432971
2944- # context just has the two latest
2945- assert len (sqlmesh .snapshots ) == 2
2972+ # context just has the 4 latest
2973+ assert len (sqlmesh .snapshots ) == 4
29462974
29472975 # model a expiry should not have changed
29482976 model_a_snapshot = next (s for n , s in sqlmesh .snapshots .items () if "model_a" in n )
@@ -2956,26 +2984,41 @@ def _mutate_config(gateway: str, config: Config):
29562984 assert to_ds (model_b_snapshot .updated_ts ) == "2020-01-05"
29572985 assert to_ds (model_b_snapshot .expiration_ts ) == "2020-01-12"
29582986
2987+ # model c should expire at the same time as model b
2988+ model_c_snapshot = next (s for n , s in sqlmesh .snapshots .items () if "model_c" in n )
2989+ assert to_ds (model_c_snapshot .updated_ts ) == to_ds (model_b_snapshot .updated_ts )
2990+ assert to_ds (model_c_snapshot .expiration_ts ) == to_ds (model_b_snapshot .expiration_ts )
2991+
2992+ # model d should expire at the same time as model b
2993+ model_d_snapshot = next (s for n , s in sqlmesh .snapshots .items () if "model_d" in n )
2994+ assert to_ds (model_d_snapshot .updated_ts ) == to_ds (model_b_snapshot .updated_ts )
2995+ assert to_ds (model_d_snapshot .expiration_ts ) == to_ds (model_b_snapshot .expiration_ts )
2996+
29592997 # move forward to date where after model a has expired but before model b has expired
29602998 # invalidate dev to trigger cleanups
2961- # run janitor. model a is expired so will be cleaned up and this will cascade to model b.
2999+ # run janitor
3000+ # - table model a is expired so will be cleaned up and this will cascade to view model b
3001+ # - view model b is not expired, but because it got cascaded to, this will cascade again to view model c
3002+ # - table model d is a not a view, so even though its parent view model b got dropped, it doesnt need to be dropped
29623003 with time_machine .travel ("2020-01-10 00:00:00" ):
29633004 sqlmesh = ctx .create_context (
29643005 path = tmp_path , config_mutator = _mutate_config , ephemeral_state_connection = False
29653006 )
29663007
2967- before_snapshots = _state_sync_engine_adapter (sqlmesh ).fetchall (
2968- f"select name, identifier from sqlmesh._snapshots"
2969- )
3008+ before_snapshot_ids = _all_snapshot_ids (sqlmesh )
3009+
29703010 sqlmesh .invalidate_environment ("dev" )
29713011 sqlmesh .run_janitor (ignore_ttl = False )
2972- after_snapshots = _state_sync_engine_adapter (sqlmesh ).fetchall (
2973- f"select name, identifier from sqlmesh._snapshots"
2974- )
29753012
2976- assert len (before_snapshots ) != len (after_snapshots )
3013+ after_snapshot_ids = _all_snapshot_ids (sqlmesh )
3014+
3015+ assert len (before_snapshot_ids ) != len (after_snapshot_ids )
29773016
2978- # all that's left should be the two snapshots that were in prod
2979- assert set (
2980- [SnapshotId (name = name , identifier = identifier ) for name , identifier in after_snapshots ]
2981- ) == set ([model_a_prod_snapshot .snapshot_id , model_b_prod_snapshot .snapshot_id ])
3017+ # all that's left should be the two original snapshots that were in prod and model d
3018+ assert set (after_snapshot_ids ) == set (
3019+ [
3020+ model_a_prod_snapshot .snapshot_id ,
3021+ model_b_prod_snapshot .snapshot_id ,
3022+ model_d_snapshot .snapshot_id ,
3023+ ]
3024+ )
0 commit comments