Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/guides/model_selection.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,9 @@ Models:
#### Select with git changes

The git-based selector allows you to select models whose files have changed compared to a target branch (default: main). This includes:

- Untracked files (new files not in git)
- Uncommitted changes in working directory
- Uncommitted changes in working directory (both staged and unstaged)
- Committed changes different from the target branch

For example:
Expand Down
4 changes: 3 additions & 1 deletion sqlmesh/utils/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def list_untracked_files(self) -> t.List[Path]:
)

def list_uncommitted_changed_files(self) -> t.List[Path]:
return self._execute_list_output(["diff", "--name-only", "--diff-filter=d"], self._git_root)
return self._execute_list_output(
["diff", "--name-only", "--diff-filter=d", "HEAD"], self._git_root
)

def list_committed_changed_files(self, target_branch: str = "main") -> t.List[Path]:
return self._execute_list_output(
Expand Down
79 changes: 79 additions & 0 deletions tests/core/test_selector_native.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pytest
from pytest_mock.plugin import MockerFixture
import subprocess

from sqlmesh.core import dialect as d
from sqlmesh.core.audit import StandaloneAudit
Expand All @@ -16,6 +17,7 @@
from sqlmesh.core.snapshot import SnapshotChangeCategory
from sqlmesh.utils import UniqueKeyDict
from sqlmesh.utils.date import now_timestamp
from sqlmesh.utils.git import GitClient


@pytest.mark.parametrize(
Expand Down Expand Up @@ -634,6 +636,83 @@ def test_expand_git_selection(
git_client_mock.list_untracked_files.assert_called_once()


def test_expand_git_selection_integration(tmp_path: Path, mocker: MockerFixture):
repo_path = tmp_path / "test_repo"
repo_path.mkdir()
subprocess.run(["git", "init"], cwd=repo_path, check=True, capture_output=True)

models: UniqueKeyDict[str, Model] = UniqueKeyDict("models")
model_a_path = repo_path / "model_a.sql"
model_a_path.write_text("SELECT 1 AS a")
model_a = SqlModel(name="test_model_a", query=d.parse_one("SELECT 1 AS a"))
model_a._path = model_a_path
models[model_a.fqn] = model_a

model_b_path = repo_path / "model_b.sql"
model_b_path.write_text("SELECT 2 AS b")
model_b = SqlModel(name="test_model_b", query=d.parse_one("SELECT 2 AS b"))
model_b._path = model_b_path
models[model_b.fqn] = model_b

subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "Initial commit"],
cwd=repo_path,
check=True,
capture_output=True,
)

result = subprocess.run(
["git", "branch", "--show-current"],
cwd=repo_path,
check=True,
capture_output=True,
text=True,
)
default_branch = result.stdout.strip()

# no changes should select nothing
git_client = GitClient(repo_path)
selector = NativeSelector(mocker.Mock(), models)
selector._git_client = git_client
assert selector.expand_model_selections([f"git:{default_branch}"]) == set()

# modify A but dont stage it, should be only selected
model_a_path.write_text("SELECT 10 AS a")
assert selector.expand_model_selections([f"git:{default_branch}"]) == {'"test_model_a"'}

# stage model A, should still select it (this is the bug fix)
subprocess.run(["git", "add", "model_a.sql"], cwd=repo_path, check=True, capture_output=True)
assert selector.expand_model_selections([f"git:{default_branch}"]) == {'"test_model_a"'}

# now add unstaged change to B and both should be selected
model_b_path.write_text("SELECT 20 AS b")
assert selector.expand_model_selections([f"git:{default_branch}"]) == {
'"test_model_a"',
'"test_model_b"',
}

subprocess.run(
["git", "checkout", "-b", "dev"],
cwd=repo_path,
check=True,
capture_output=True,
)

subprocess.run(
["git", "commit", "-m", "Update model_a"],
cwd=repo_path,
check=True,
capture_output=True,
)

# now A is committed in the dev branch and B unstaged but should both be selected
assert selector.expand_model_selections([f"git:{default_branch}"]) == {
'"test_model_a"',
'"test_model_b"',
}


def test_select_models_with_external_parent(mocker: MockerFixture):
default_catalog = "test_catalog"
added_model = SqlModel(
Expand Down
137 changes: 137 additions & 0 deletions tests/utils/test_git_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import subprocess
from pathlib import Path
import pytest
from sqlmesh.utils.git import GitClient


@pytest.fixture
def git_repo(tmp_path: Path) -> Path:
repo_path = tmp_path / "test_repo"
repo_path.mkdir()
subprocess.run(["git", "init"], cwd=repo_path, check=True, capture_output=True)
return repo_path


def test_git_uncommitted_changes(git_repo: Path):
git_client = GitClient(git_repo)

test_file = git_repo / "model.sql"
test_file.write_text("SELECT 1 AS a")
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "onitial commit"],
cwd=git_repo,
check=True,
capture_output=True,
)
assert git_client.list_uncommitted_changed_files() == []

# make an unstaged change and see that it is listed
test_file.write_text("SELECT 2 AS a")
uncommitted = git_client.list_uncommitted_changed_files()
assert len(uncommitted) == 1
assert uncommitted[0].name == "model.sql"

# stage the change and test that it is still detected
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
uncommitted = git_client.list_uncommitted_changed_files()
assert len(uncommitted) == 1
assert uncommitted[0].name == "model.sql"


def test_git_both_staged_and_unstaged_changes(git_repo: Path):
git_client = GitClient(git_repo)

file1 = git_repo / "model1.sql"
file2 = git_repo / "model2.sql"
file1.write_text("SELECT 1")
file2.write_text("SELECT 2")
subprocess.run(["git", "add", "."], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "onitial commit"],
cwd=git_repo,
check=True,
capture_output=True,
)

# stage file1
file1.write_text("SELECT 10")
subprocess.run(["git", "add", "model1.sql"], cwd=git_repo, check=True, capture_output=True)

# mdify file2 but don't stage it!
file2.write_text("SELECT 20")

# both should be detected
uncommitted = git_client.list_uncommitted_changed_files()
assert len(uncommitted) == 2
file_names = {f.name for f in uncommitted}
assert file_names == {"model1.sql", "model2.sql"}


def test_git_untracked_files(git_repo: Path):
git_client = GitClient(git_repo)
initial_file = git_repo / "initial.sql"
initial_file.write_text("SELECT 0")
subprocess.run(["git", "add", "initial.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "onitial commit"],
cwd=git_repo,
check=True,
capture_output=True,
)

new_file = git_repo / "new_model.sql"
new_file.write_text("SELECT 1")

# untracked file should not appear in uncommitted changes
assert git_client.list_uncommitted_changed_files() == []

# but in untracked
untracked = git_client.list_untracked_files()
assert len(untracked) == 1
assert untracked[0].name == "new_model.sql"


def test_git_committed_changes(git_repo: Path):
git_client = GitClient(git_repo)

test_file = git_repo / "model.sql"
test_file.write_text("SELECT 1")
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "onitial commit"],
cwd=git_repo,
check=True,
capture_output=True,
)

result = subprocess.run(
["git", "branch", "--show-current"],
cwd=git_repo,
check=True,
capture_output=True,
text=True,
)
default_branch = result.stdout.strip()

subprocess.run(
["git", "checkout", "-b", "feature"],
cwd=git_repo,
check=True,
capture_output=True,
)

test_file.write_text("SELECT 2")
subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "an update on feature bramch"],
cwd=git_repo,
check=True,
capture_output=True,
)

committed = git_client.list_committed_changed_files(target_branch=default_branch)
assert len(committed) == 1
assert committed[0].name == "model.sql"

assert git_client.list_uncommitted_changed_files() == []
Loading