From 946af511a8c729cbd2bcd04547978b87e649f6ee Mon Sep 17 00:00:00 2001 From: even-wei Date: Wed, 8 Apr 2026 14:41:29 +0800 Subject: [PATCH 1/2] feat(breaking): add rename detection to schema diff analysis Extend ChangeStatus with "renamed" and add rename_map to NodeChange. After column comparison in _diff_select_scope(), detect removed+added column pairs with identical underlying expressions (stripped of alias) and reclassify them as renames. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: even-wei --- recce/models/types.py | 2 ++ recce/util/breaking.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/recce/models/types.py b/recce/models/types.py index 6c6f67695..206dcd155 100644 --- a/recce/models/types.py +++ b/recce/models/types.py @@ -136,6 +136,7 @@ def merge(self, other) -> bool: "added", "removed", "modified", + "renamed", ] ChangeCategory = Literal[ "breaking", @@ -148,6 +149,7 @@ def merge(self, other) -> bool: class NodeChange(BaseModel): category: ChangeCategory columns: Optional[dict[str, ChangeStatus]] = None + rename_map: Optional[dict[str, str]] = None # new_name → old_name class NodeDiff(BaseModel): diff --git a/recce/util/breaking.py b/recce/util/breaking.py index f2690d31e..2e69dc732 100644 --- a/recce/util/breaking.py +++ b/recce/util/breaking.py @@ -181,6 +181,34 @@ def _has_star(expr: exp.Expression) -> bool: change_category = "partial_breaking" changed_columns[column_name] = "modified" + # Detect renames: removed + added columns with identical AST expression + def _unalias(projection: exp.Expression) -> str: + """Extract the underlying expression SQL, stripping the alias wrapper.""" + inner = projection.this if isinstance(projection, exp.Alias) else projection + return inner.sql() + + rename_map: dict[str, str] = {} # new_name → old_name + removed_cols = { + name: old_column_map[name] + for name, status in changed_columns.items() + if status == "removed" and name in old_column_map + } + added_cols = { + name: new_column_map[name] + for name, status in changed_columns.items() + if status == "added" and name in new_column_map + } + for add_name, add_expr in added_cols.items(): + for rm_name, rm_expr in removed_cols.items(): + if rm_name in rename_map.values(): + continue + if _unalias(add_expr) == _unalias(rm_expr): + rename_map[add_name] = rm_name + break + for new_name, old_name in rename_map.items(): + del changed_columns[old_name] # Remove the "removed" entry + changed_columns[new_name] = "renamed" # Replace "added" with "renamed" + def selected_column_change_status(ref_column: exp.Column) -> Optional[ChangeStatus]: column_name = ref_column.name return changed_columns.get(column_name) @@ -230,7 +258,7 @@ def selected_column_change_status(ref_column: exp.Column) -> Optional[ChangeStat elif selected_column_change_status(ref_column) is not None: change_category = "breaking" - return NodeChange(category=change_category, columns=changed_columns) + return NodeChange(category=change_category, columns=changed_columns, rename_map=rename_map or None) def _diff_union_scope(old_scope: Scope, new_scope: Scope, scope_changes_map: dict[Scope, NodeChange]) -> NodeChange: From e615b72379a5afdeb49e4234a6edabd78078b44e Mon Sep 17 00:00:00 2001 From: even-wei Date: Wed, 8 Apr 2026 14:41:36 +0800 Subject: [PATCH 2/2] test(breaking): add rename detection tests and update expectations Add RenameDetectionTest class with 12 test cases covering: simple rename, bare-to-alias, multiple renames, expression change (not rename), rename + add/remove/modify, no false positives, derived expressions, and ambiguous greedy matching. Update existing test that now correctly detects a rename instead of removed+added. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: even-wei --- tests/util/test_breaking.py | 133 +++++++++++++++++++++++++++++++++--- 1 file changed, 122 insertions(+), 11 deletions(-) diff --git a/tests/util/test_breaking.py b/tests/util/test_breaking.py index b5d3b5f9c..e8db061ac 100644 --- a/tests/util/test_breaking.py +++ b/tests/util/test_breaking.py @@ -190,16 +190,14 @@ def test_rename_column(self): a as a1 from Customers """ - assert is_partial_breaking_change( - original_sql, - modified_sql, - { - "a": "removed", - "a1": "added", - }, - ) - - # by cte + # Simple rename: same expression, different alias → detected as rename + result = _parse_change_catgory(original_sql, modified_sql) + assert result.category == "partial_breaking" + assert result.columns == {"a1": "renamed"} + assert result.rename_map == {"a1": "a"} + + # by cte: the outer select references different CTE column names, + # so expressions differ (cte.a vs cte.a1) — NOT a rename at root scope original_sql = """ with cte as ( select @@ -656,7 +654,7 @@ def test_where_change_with_column_changes(self): from Customers where a > 100 """ - assert is_breaking_change(no_where, with_where, {"a": "modified", "b": "removed", "b2": "added"}) + assert is_breaking_change(no_where, with_where, {"a": "modified", "b2": "renamed"}) def test_where_source_column_change(self): original_sql = """ @@ -1425,3 +1423,116 @@ def test_pr44(self): select * from renamed """ assert is_non_breaking_change(original_sql, modified_sql, {"is_promotion": "added"}) + + +class RenameDetectionTest(unittest.TestCase): + """Tests for rename detection in breaking change analysis.""" + + def test_simple_rename(self): + """Column alias changes but expression is identical → renamed.""" + original = "select a as col1 from Customers" + modified = "select a as col2 from Customers" + result = _parse_change_catgory(original, modified) + assert result.category == "partial_breaking" + assert result.columns == {"col2": "renamed"} + assert result.rename_map == {"col2": "col1"} + + def test_rename_bare_column_to_alias(self): + """Bare column → aliased with same expression → renamed.""" + original = "select a from Customers" + modified = "select a as alpha from Customers" + result = _parse_change_catgory(original, modified) + assert result.category == "partial_breaking" + assert result.columns == {"alpha": "renamed"} + assert result.rename_map == {"alpha": "a"} + + def test_multiple_renames(self): + """Multiple columns renamed simultaneously.""" + original = "select a as x, b as y from Customers" + modified = "select a as x2, b as y2 from Customers" + result = _parse_change_catgory(original, modified) + assert result.category == "partial_breaking" + assert result.columns == {"x2": "renamed", "y2": "renamed"} + assert result.rename_map == {"x2": "x", "y2": "y"} + + def test_rename_with_expression_change_is_not_rename(self): + """Different expression + different name → removed + added, NOT rename.""" + original = "select a as col1 from Customers" + modified = "select a + 1 as col2 from Customers" + result = _parse_change_catgory(original, modified) + assert result.category == "partial_breaking" + assert result.columns == {"col1": "removed", "col2": "added"} + assert result.rename_map is None + + def test_rename_plus_added_column(self): + """Rename one column and add another → mixed changes.""" + original = "select a as x from Customers" + modified = "select a as x2, b from Customers" + result = _parse_change_catgory(original, modified) + assert result.columns.get("x2") == "renamed" + assert result.columns.get("b") == "added" + assert "x" not in result.columns # old name should be gone + assert result.rename_map == {"x2": "x"} + + def test_rename_plus_removed_column(self): + """Rename one column and remove another.""" + original = "select a as x, b as y from Customers" + modified = "select a as x2 from Customers" + result = _parse_change_catgory(original, modified) + assert result.columns.get("x2") == "renamed" + assert result.columns.get("y") == "removed" + assert "x" not in result.columns + assert result.rename_map == {"x2": "x"} + + def test_rename_plus_modified_column(self): + """Rename one column and modify another.""" + original = "select a as x, b as y from Customers" + modified = "select a as x2, b + 1 as y from Customers" + result = _parse_change_catgory(original, modified) + assert result.columns.get("x2") == "renamed" + assert result.columns.get("y") == "modified" + assert "x" not in result.columns + assert result.rename_map == {"x2": "x"} + + def test_no_false_positive_different_expressions(self): + """Removed + added with DIFFERENT expressions → no rename.""" + original = "select a as x from Customers" + modified = "select b as y from Customers" + result = _parse_change_catgory(original, modified) + assert result.columns == {"x": "removed", "y": "added"} + assert result.rename_map is None + + def test_no_rename_when_only_added(self): + """Pure add → no rename detection triggered.""" + original = "select a from Customers" + modified = "select a, b from Customers" + result = _parse_change_catgory(original, modified) + assert result.columns == {"b": "added"} + assert result.rename_map is None + + def test_no_rename_when_only_removed(self): + """Pure remove → no rename detection triggered.""" + original = "select a, b from Customers" + modified = "select a from Customers" + result = _parse_change_catgory(original, modified) + assert result.columns == {"b": "removed"} + assert result.rename_map is None + + def test_rename_with_derived_expression(self): + """Derived expression renamed → detected as rename.""" + original = "select a + b as total from Customers" + modified = "select a + b as sum_ab from Customers" + result = _parse_change_catgory(original, modified) + assert result.category == "partial_breaking" + assert result.columns == {"sum_ab": "renamed"} + assert result.rename_map == {"sum_ab": "total"} + + def test_ambiguous_rename_greedy_match(self): + """Two removed and two added with same expr → greedy first-match.""" + original = "select a as x, a as y from Customers" + modified = "select a as p, a as q from Customers" + result = _parse_change_catgory(original, modified) + # Both pairs have same expression (Customers.a), greedy match pairs them + assert result.columns.get("p") == "renamed" + assert result.columns.get("q") == "renamed" + assert len(result.rename_map) == 2