diff --git a/datajunction-clients/python/datajunction/deployment.py b/datajunction-clients/python/datajunction/deployment.py index c34b5c0db..fe35b2936 100644 --- a/datajunction-clients/python/datajunction/deployment.py +++ b/datajunction-clients/python/datajunction/deployment.py @@ -98,8 +98,25 @@ def pull( ) with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf: + new_yaml_names = {n for n in zf.namelist() if n.endswith(".yaml")} zf.extractall(base_path) + # Remove local YAML files for nodes that no longer exist on the server. + # The server only emits YAML for active nodes, so any local *.yaml not + # present in the new export is an orphan (e.g. a node was deactivated + # after a previous pull). Non-YAML files are left alone. + for yaml_file in base_path.rglob("*.yaml"): + rel = str(yaml_file.relative_to(base_path)) + if rel not in new_yaml_names: + yaml_file.unlink() + # Clean up directories that became empty as a result. + parent = yaml_file.parent + while parent != base_path and parent.is_dir() and not any( + parent.iterdir() + ): + parent.rmdir() + parent = parent.parent + def push( self, source_path: str | Path, diff --git a/datajunction-clients/python/tests/test_deploy.py b/datajunction-clients/python/tests/test_deploy.py index 22fa2d213..47b3bcfd4 100644 --- a/datajunction-clients/python/tests/test_deploy.py +++ b/datajunction-clients/python/tests/test_deploy.py @@ -45,6 +45,33 @@ def test_pull_extracts_zip_from_server(tmp_path): assert (tmp_path / "foo" / "bar" / "baz.yaml").exists() +def test_pull_removes_orphan_local_yaml(tmp_path): + """A local YAML for a node that no longer exists on the server is removed.""" + (tmp_path / "kept.yaml").write_text("name: ns.kept\n") + (tmp_path / "subdir").mkdir() + (tmp_path / "subdir" / "gone.yaml").write_text("name: ns.gone\n") + (tmp_path / "README.md").write_text("# unrelated\n") + + new_zip = _make_zip( + { + "dj.yaml": "namespace: ns\n", + "kept.yaml": "name: ns.kept\nquery: SELECT 1\n", + }, + ) + client = MagicMock() + client._export_namespace_yaml_zip.return_value = new_zip + svc = DeploymentService(client) + + svc.pull("ns", tmp_path) + + assert (tmp_path / "kept.yaml").exists() + assert not (tmp_path / "subdir" / "gone.yaml").exists() + # empty directory should have been cleaned up + assert not (tmp_path / "subdir").exists() + # non-YAML files are left alone + assert (tmp_path / "README.md").exists() + + def test_pull_uploads_existing_yaml_files(tmp_path): (tmp_path / "old.yaml").write_text("name: ns.old\n") updated_zip = _make_zip({"old.yaml": "name: ns.old\nquery: SELECT 2\n"})