Skip to content

Commit b94d069

Browse files
correctly categorize aliased UDTF and positional group/order by
Signed-off-by: Alberto Suman <alberto.suman@1komma5grad.com>
1 parent b004450 commit b94d069

2 files changed

Lines changed: 115 additions & 3 deletions

File tree

sqlmesh/core/model/definition.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2907,6 +2907,18 @@ def _is_projection(expr: exp.Expr) -> bool:
29072907
return isinstance(parent, exp.Select) and expr.arg_key == "expressions"
29082908

29092909

2910+
def _has_ordinal_references(query: exp.Select) -> bool:
2911+
order = query.args.get("order")
2912+
if order and any(
2913+
isinstance(ob.this, exp.Literal) and ob.this.is_number for ob in order.expressions
2914+
):
2915+
return True
2916+
group = query.args.get("group")
2917+
return bool(
2918+
group and any(isinstance(gb, exp.Literal) and gb.is_number for gb in group.expressions)
2919+
)
2920+
2921+
29102922
def _additive_projection_change(
29112923
previous_query: exp.Query,
29122924
this_query: exp.Query,
@@ -2923,8 +2935,9 @@ def _additive_projection_change(
29232935
Returns ``False`` (non-breaking) only when the change is provably additive:
29242936
* both queries are simple ``SELECT`` statements,
29252937
* everything other than the projection list is structurally identical,
2926-
* no added projection is a (potentially cardinality-changing) ``UDTF``, and
2927-
* every previous projection is preserved, in order, within the new projection list.
2938+
* no added projection is a (potentially cardinality-changing) ``UDTF``,
2939+
* every previous projection is preserved, in order, within the new projection list, and
2940+
* no mid-list insert shifts ordinal ``ORDER BY`` / ``GROUP BY`` references.
29282941
29292942
Otherwise returns ``None`` (undetermined), preserving the conservative default.
29302943
"""
@@ -2942,7 +2955,8 @@ def _additive_projection_change(
29422955
# Adding a UDTF projection (e.g. EXPLODE / UNNEST) can change row cardinality, so such a
29432956
# change is not safely non-breaking even when it appears as an extra SELECT item.
29442957
for projection in this_projections:
2945-
if isinstance(projection, exp.UDTF) and not projection.find_ancestor(exp.Subquery):
2958+
bare = projection.this if isinstance(projection, exp.Alias) else projection
2959+
if isinstance(bare, exp.UDTF):
29462960
return None
29472961

29482962
# Everything other than the projection list must be structurally identical. Replacing each
@@ -2960,17 +2974,26 @@ def _additive_projection_change(
29602974
# parser built distinct object identities.
29612975
this_projection_sql = [p.sql(dialect=dialect, comments=False) for p in this_projections]
29622976
search_start = 0
2977+
matched_at: list[int] = []
29632978
for projection in previous_projections:
29642979
target_sql = projection.sql(dialect=dialect, comments=False)
29652980
# Continue after the previous match so added columns can appear before, between, or after
29662981
# the original projections, but existing projections cannot be reordered or rewritten.
29672982
for index in range(search_start, len(this_projection_sql)):
29682983
if this_projection_sql[index] == target_sql:
2984+
matched_at.append(index)
29692985
search_start = index + 1
29702986
break
29712987
else:
29722988
return None
29732989

2990+
# Mid-list inserts shift ordinal references in ORDER BY / GROUP BY clauses.
2991+
if _has_ordinal_references(this_query):
2992+
matched_set = set(matched_at)
2993+
last_matched = matched_at[-1]
2994+
if any(i < last_matched for i in range(len(this_projections)) if i not in matched_set):
2995+
return None
2996+
29742997
# At this point the query shape is unchanged and all prior outputs are preserved, so the only
29752998
# remaining difference is one or more additional, non-UDTF projections.
29762999
return False

tests/core/test_snapshot.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,6 +1720,95 @@ def test_categorize_change_sql_redundant_cast(make_snapshot):
17201720
is None
17211721
)
17221722

1723+
# Mid-list insert with ORDER BY ordinal shifts the referenced projection: undetermined.
1724+
assert (
1725+
categorize_change(
1726+
new=make_snapshot(
1727+
SqlModel(
1728+
name="a",
1729+
query=parse_one("SELECT a::DATE, x::TEXT, s::TEXT FROM t ORDER BY 2"),
1730+
)
1731+
),
1732+
old=make_snapshot(
1733+
SqlModel(name="a", query=parse_one("SELECT a::DATE, s::TEXT FROM t ORDER BY 2"))
1734+
),
1735+
config=config,
1736+
)
1737+
is None
1738+
)
1739+
1740+
# Append at end with ORDER BY ordinal leaves existing ordinal bindings unchanged.
1741+
assert (
1742+
categorize_change(
1743+
new=make_snapshot(
1744+
SqlModel(
1745+
name="a",
1746+
query=parse_one("SELECT a::DATE, s::TEXT, x::TEXT FROM t ORDER BY 2"),
1747+
)
1748+
),
1749+
old=make_snapshot(
1750+
SqlModel(name="a", query=parse_one("SELECT a::DATE, s::TEXT FROM t ORDER BY 2"))
1751+
),
1752+
config=config,
1753+
)
1754+
== SnapshotChangeCategory.NON_BREAKING
1755+
)
1756+
1757+
# Mid-list insert with GROUP BY ordinal shifts the referenced projection: undetermined.
1758+
assert (
1759+
categorize_change(
1760+
new=make_snapshot(
1761+
SqlModel(
1762+
name="a",
1763+
query=parse_one("SELECT a::DATE, x::TEXT, s::TEXT FROM t GROUP BY 2"),
1764+
)
1765+
),
1766+
old=make_snapshot(
1767+
SqlModel(name="a", query=parse_one("SELECT a::DATE, s::TEXT FROM t GROUP BY 2"))
1768+
),
1769+
config=config,
1770+
)
1771+
is None
1772+
)
1773+
1774+
# Aliased UDTF projection via fallback path: undetermined.
1775+
assert (
1776+
categorize_change(
1777+
new=make_snapshot(
1778+
SqlModel(
1779+
name="a",
1780+
query=parse_one(
1781+
"SELECT a::DATE AS a, x::TEXT AS x, EXPLODE(y) AS y, s::TEXT AS s FROM t"
1782+
),
1783+
)
1784+
),
1785+
old=make_snapshot(
1786+
SqlModel(name="a", query=parse_one("SELECT a::DATE AS a, s::TEXT AS s FROM t"))
1787+
),
1788+
config=config,
1789+
)
1790+
is None
1791+
)
1792+
1793+
# UDTF inside aliased scalar subquery remains non-breaking.
1794+
assert (
1795+
categorize_change(
1796+
new=make_snapshot(
1797+
SqlModel(
1798+
name="a",
1799+
query=parse_one(
1800+
"SELECT a::DATE AS a, (SELECT x FROM unnest(b) x) AS sub, s::TEXT AS s FROM t"
1801+
),
1802+
)
1803+
),
1804+
old=make_snapshot(
1805+
SqlModel(name="a", query=parse_one("SELECT a::DATE AS a, s::TEXT AS s FROM t"))
1806+
),
1807+
config=config,
1808+
)
1809+
== SnapshotChangeCategory.NON_BREAKING
1810+
)
1811+
17231812

17241813
def test_categorize_change_seed(make_snapshot, tmp_path):
17251814
config = CategorizerConfig(seed=AutoCategorizationMode.SEMI)

0 commit comments

Comments
 (0)