From 4fdb5c9b48c7e590e406f5a135651eba195f2eae Mon Sep 17 00:00:00 2001 From: iwanttobepowerful <745778074@qq.com> Date: Fri, 12 Jun 2026 14:21:01 +0800 Subject: [PATCH] [CALCITE-7587] RelDecorrelator fails on correlated scalar subquery with ROW_NUMBER window function due to RexOver nullability mismatch --- .../calcite/sql2rel/RelDecorrelator.java | 112 ++++++++++++++++++ core/src/test/resources/sql/sub-query.iq | 33 ++++++ 2 files changed, 145 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java index f9e7049af21..a5a81757937 100644 --- a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java +++ b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java @@ -2695,6 +2695,118 @@ private RexNode createCaseExpression( return literal; } + /** + * Decorrelates a window expression ({@link RexOver}) that may reference + * correlation variables in its {@code PARTITION BY} / {@code ORDER BY} + * keys (or the aggregate arguments). + * + *
For each correlation field reachable from {@code over}, we: + *
If the scalar sub-query has been pulled above a LEFT correlate, the + * result is wrapped in a {@code CASE} on the null-indicator so that it + * stays {@code NULL} when the right side did not match. + * + *
Concrete example. For the SQL: + *
{@code
+ * SELECT e.ename,
+ * (SELECT ROW_NUMBER() OVER (PARTITION BY e.deptno
+ * ORDER BY e.empno, d.deptno)
+ * FROM dept d WHERE e.deptno = d.deptno) AS rn
+ * FROM emp e
+ * ORDER BY e.empno
+ * }
+ *
+ * BEFORE this method (window expression as seen on entry): + *
{@code
+ * ROW_NUMBER() OVER (
+ * PARTITION BY $cor2.DEPTNO
+ * ORDER BY $cor2.EMPNO, $0)
+ * partitionKeys = [$cor2.DEPTNO]
+ * orderKeys = [$cor2.EMPNO, $0]
+ * }
+ *
+ * AFTER this method (with {@code projectPulledAboveLeftCorrelator=true} + * and a null-indicator at column {@code $3}; correlation fields are + * rewritten to input refs from the outer side and {@code $cor2.EMPNO} is + * additionally appended to the partition keys): + *
{@code
+ * CASE(IS NULL($3), null:BIGINT,
+ * CAST(
+ * ROW_NUMBER() OVER (
+ * PARTITION BY CASE(IS NULL($3), null:TINYINT, CAST($2):TINYINT),
+ * CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT)
+ * ORDER BY CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT),
+ * $3)
+ * ):BIGINT)
+ * newOver.partitionKeys =
+ * [CASE(IS NULL($3), null:TINYINT, CAST($2):TINYINT),
+ * CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT)]
+ * newOver.orderKeys =
+ * [CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT), $3]
+ * }
+ *
+ * Note that {@code $cor2.EMPNO} only appeared in the original
+ * {@code ORDER BY}; without appending its decorrelated form to
+ * {@code partitionKeys} the rewritten window would silently widen its
+ * computation scope across outer rows and produce wrong results.
+ */
+ @Override public RexNode visitOver(RexOver over) {
+ // Collect correlation fields that are referenced directly by the window
+ // expression. They need to be added to the window partition keys so that
+ // decorrelation does not widen the window computation scope.
+ final List