Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions recce/models/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def merge(self, other) -> bool:
"added",
"removed",
"modified",
"renamed",
]
ChangeCategory = Literal[
"breaking",
Expand All @@ -148,6 +149,7 @@ def merge(self, other) -> bool:
class NodeChange(BaseModel):
category: ChangeCategory
columns: Optional[dict[str, ChangeStatus]] = None
rename_map: Optional[dict[str, str]] = None # new_name → old_name


class NodeDiff(BaseModel):
Expand Down
30 changes: 29 additions & 1 deletion recce/util/breaking.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,34 @@ def _has_star(expr: exp.Expression) -> bool:
change_category = "partial_breaking"
changed_columns[column_name] = "modified"

# Detect renames: removed + added columns with identical AST expression
def _unalias(projection: exp.Expression) -> str:
"""Extract the underlying expression SQL, stripping the alias wrapper."""
inner = projection.this if isinstance(projection, exp.Alias) else projection
return inner.sql()

rename_map: dict[str, str] = {} # new_name → old_name
removed_cols = {
name: old_column_map[name]
for name, status in changed_columns.items()
if status == "removed" and name in old_column_map
}
added_cols = {
name: new_column_map[name]
for name, status in changed_columns.items()
if status == "added" and name in new_column_map
}
for add_name, add_expr in added_cols.items():
for rm_name, rm_expr in removed_cols.items():
if rm_name in rename_map.values():
continue
if _unalias(add_expr) == _unalias(rm_expr):
rename_map[add_name] = rm_name
break
for new_name, old_name in rename_map.items():
del changed_columns[old_name] # Remove the "removed" entry
changed_columns[new_name] = "renamed" # Replace "added" with "renamed"

def selected_column_change_status(ref_column: exp.Column) -> Optional[ChangeStatus]:
column_name = ref_column.name
return changed_columns.get(column_name)
Expand Down Expand Up @@ -230,7 +258,7 @@ def selected_column_change_status(ref_column: exp.Column) -> Optional[ChangeStat
elif selected_column_change_status(ref_column) is not None:
change_category = "breaking"

return NodeChange(category=change_category, columns=changed_columns)
return NodeChange(category=change_category, columns=changed_columns, rename_map=rename_map or None)


def _diff_union_scope(old_scope: Scope, new_scope: Scope, scope_changes_map: dict[Scope, NodeChange]) -> NodeChange:
Expand Down
133 changes: 122 additions & 11 deletions tests/util/test_breaking.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,16 +190,14 @@ def test_rename_column(self):
a as a1
from Customers
"""
assert is_partial_breaking_change(
original_sql,
modified_sql,
{
"a": "removed",
"a1": "added",
},
)

# by cte
# Simple rename: same expression, different alias → detected as rename
result = _parse_change_catgory(original_sql, modified_sql)
assert result.category == "partial_breaking"
assert result.columns == {"a1": "renamed"}
assert result.rename_map == {"a1": "a"}

# by cte: the outer select references different CTE column names,
# so expressions differ (cte.a vs cte.a1) — NOT a rename at root scope
original_sql = """
with cte as (
select
Expand Down Expand Up @@ -656,7 +654,7 @@ def test_where_change_with_column_changes(self):
from Customers
where a > 100
"""
assert is_breaking_change(no_where, with_where, {"a": "modified", "b": "removed", "b2": "added"})
assert is_breaking_change(no_where, with_where, {"a": "modified", "b2": "renamed"})

def test_where_source_column_change(self):
original_sql = """
Expand Down Expand Up @@ -1425,3 +1423,116 @@ def test_pr44(self):
select * from renamed
"""
assert is_non_breaking_change(original_sql, modified_sql, {"is_promotion": "added"})


class RenameDetectionTest(unittest.TestCase):
"""Tests for rename detection in breaking change analysis."""

def test_simple_rename(self):
"""Column alias changes but expression is identical → renamed."""
original = "select a as col1 from Customers"
modified = "select a as col2 from Customers"
result = _parse_change_catgory(original, modified)
assert result.category == "partial_breaking"
assert result.columns == {"col2": "renamed"}
assert result.rename_map == {"col2": "col1"}

def test_rename_bare_column_to_alias(self):
"""Bare column → aliased with same expression → renamed."""
original = "select a from Customers"
modified = "select a as alpha from Customers"
result = _parse_change_catgory(original, modified)
assert result.category == "partial_breaking"
assert result.columns == {"alpha": "renamed"}
assert result.rename_map == {"alpha": "a"}

def test_multiple_renames(self):
"""Multiple columns renamed simultaneously."""
original = "select a as x, b as y from Customers"
modified = "select a as x2, b as y2 from Customers"
result = _parse_change_catgory(original, modified)
assert result.category == "partial_breaking"
assert result.columns == {"x2": "renamed", "y2": "renamed"}
assert result.rename_map == {"x2": "x", "y2": "y"}

def test_rename_with_expression_change_is_not_rename(self):
"""Different expression + different name → removed + added, NOT rename."""
original = "select a as col1 from Customers"
modified = "select a + 1 as col2 from Customers"
result = _parse_change_catgory(original, modified)
assert result.category == "partial_breaking"
assert result.columns == {"col1": "removed", "col2": "added"}
assert result.rename_map is None

def test_rename_plus_added_column(self):
"""Rename one column and add another → mixed changes."""
original = "select a as x from Customers"
modified = "select a as x2, b from Customers"
result = _parse_change_catgory(original, modified)
assert result.columns.get("x2") == "renamed"
assert result.columns.get("b") == "added"
assert "x" not in result.columns # old name should be gone
assert result.rename_map == {"x2": "x"}

def test_rename_plus_removed_column(self):
"""Rename one column and remove another."""
original = "select a as x, b as y from Customers"
modified = "select a as x2 from Customers"
result = _parse_change_catgory(original, modified)
assert result.columns.get("x2") == "renamed"
assert result.columns.get("y") == "removed"
assert "x" not in result.columns
assert result.rename_map == {"x2": "x"}

def test_rename_plus_modified_column(self):
"""Rename one column and modify another."""
original = "select a as x, b as y from Customers"
modified = "select a as x2, b + 1 as y from Customers"
result = _parse_change_catgory(original, modified)
assert result.columns.get("x2") == "renamed"
assert result.columns.get("y") == "modified"
assert "x" not in result.columns
assert result.rename_map == {"x2": "x"}

def test_no_false_positive_different_expressions(self):
"""Removed + added with DIFFERENT expressions → no rename."""
original = "select a as x from Customers"
modified = "select b as y from Customers"
result = _parse_change_catgory(original, modified)
assert result.columns == {"x": "removed", "y": "added"}
assert result.rename_map is None

def test_no_rename_when_only_added(self):
"""Pure add → no rename detection triggered."""
original = "select a from Customers"
modified = "select a, b from Customers"
result = _parse_change_catgory(original, modified)
assert result.columns == {"b": "added"}
assert result.rename_map is None

def test_no_rename_when_only_removed(self):
"""Pure remove → no rename detection triggered."""
original = "select a, b from Customers"
modified = "select a from Customers"
result = _parse_change_catgory(original, modified)
assert result.columns == {"b": "removed"}
assert result.rename_map is None

def test_rename_with_derived_expression(self):
"""Derived expression renamed → detected as rename."""
original = "select a + b as total from Customers"
modified = "select a + b as sum_ab from Customers"
result = _parse_change_catgory(original, modified)
assert result.category == "partial_breaking"
assert result.columns == {"sum_ab": "renamed"}
assert result.rename_map == {"sum_ab": "total"}

def test_ambiguous_rename_greedy_match(self):
"""Two removed and two added with same expr → greedy first-match."""
original = "select a as x, a as y from Customers"
modified = "select a as p, a as q from Customers"
result = _parse_change_catgory(original, modified)
# Both pairs have same expression (Customers.a), greedy match pairs them
assert result.columns.get("p") == "renamed"
assert result.columns.get("q") == "renamed"
assert len(result.rename_map) == 2
Loading