From 4fdb5c9b48c7e590e406f5a135651eba195f2eae Mon Sep 17 00:00:00 2001 From: iwanttobepowerful <745778074@qq.com> Date: Fri, 12 Jun 2026 14:21:01 +0800 Subject: [PATCH] [CALCITE-7587] RelDecorrelator fails on correlated scalar subquery with ROW_NUMBER window function due to RexOver nullability mismatch --- .../calcite/sql2rel/RelDecorrelator.java | 112 ++++++++++++++++++ core/src/test/resources/sql/sub-query.iq | 33 ++++++ 2 files changed, 145 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java index f9e7049af21..a5a81757937 100644 --- a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java +++ b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java @@ -2695,6 +2695,118 @@ private RexNode createCaseExpression( return literal; } + /** + * Decorrelates a window expression ({@link RexOver}) that may reference + * correlation variables in its {@code PARTITION BY} / {@code ORDER BY} + * keys (or the aggregate arguments). + * + *

For each correlation field reachable from {@code over}, we: + *

    + *
  1. rewrite it to an input reference of the (already decorrelated) + * left-hand side of the surrounding correlate, and
  2. + *
  3. append that reference to the window's {@code partitionKeys} + * (if not already present), so that the window is evaluated + * independently per outer row group, matching the original + * per-correlated-row semantics.
  4. + *
+ * + *

If the scalar sub-query has been pulled above a LEFT correlate, the + * result is wrapped in a {@code CASE} on the null-indicator so that it + * stays {@code NULL} when the right side did not match. + * + *

Concrete example. For the SQL: + *

{@code
+     * SELECT e.ename,
+     *        (SELECT ROW_NUMBER() OVER (PARTITION BY e.deptno
+     *                                   ORDER BY e.empno, d.deptno)
+     *           FROM dept d WHERE e.deptno = d.deptno) AS rn
+     * FROM   emp e
+     * ORDER BY e.empno
+     * }
+ * + *

BEFORE this method (window expression as seen on entry): + *

{@code
+     * ROW_NUMBER() OVER (
+     *   PARTITION BY $cor2.DEPTNO
+     *   ORDER BY    $cor2.EMPNO, $0)
+     *   partitionKeys = [$cor2.DEPTNO]
+     *   orderKeys     = [$cor2.EMPNO, $0]
+     * }
+ * + *

AFTER this method (with {@code projectPulledAboveLeftCorrelator=true} + * and a null-indicator at column {@code $3}; correlation fields are + * rewritten to input refs from the outer side and {@code $cor2.EMPNO} is + * additionally appended to the partition keys): + *

{@code
+     * CASE(IS NULL($3), null:BIGINT,
+     *      CAST(
+     *        ROW_NUMBER() OVER (
+     *          PARTITION BY CASE(IS NULL($3), null:TINYINT,  CAST($2):TINYINT),
+     *                       CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT)
+     *          ORDER BY    CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT),
+     *                       $3)
+     *      ):BIGINT)
+     *   newOver.partitionKeys =
+     *     [CASE(IS NULL($3), null:TINYINT,  CAST($2):TINYINT),
+     *      CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT)]
+     *   newOver.orderKeys     =
+     *     [CASE(IS NULL($3), null:SMALLINT, CAST($0):SMALLINT), $3]
+     * }
+ * + *

Note that {@code $cor2.EMPNO} only appeared in the original + * {@code ORDER BY}; without appending its decorrelated form to + * {@code partitionKeys} the rewritten window would silently widen its + * computation scope across outer rows and produce wrong results. + */ + @Override public RexNode visitOver(RexOver over) { + // Collect correlation fields that are referenced directly by the window + // expression. They need to be added to the window partition keys so that + // decorrelation does not widen the window computation scope. + final List correlationFields = new ArrayList<>(); + over.accept(new RexVisitorImpl(true) { + @Override public Void visitFieldAccess(RexFieldAccess fieldAccess) { + if (cm.mapFieldAccessToCorRef.containsKey(fieldAccess) + && !correlationFields.contains(fieldAccess)) { + correlationFields.add(fieldAccess); + } + return super.visitFieldAccess(fieldAccess); + } + }); + + RexOver newOver = (RexOver) super.visitOver(over); + if (!correlationFields.isEmpty()) { + final List partitionKeys = new ArrayList<>(newOver.getWindow().partitionKeys); + boolean update = false; + for (RexFieldAccess fieldAccess : correlationFields) { + // Rewrite the correlation field to its decorrelated input reference, + // then use it as an additional partition key for the window. + RexNode partitionKey = visitFieldAccess(fieldAccess); + if (!partitionKeys.contains(partitionKey)) { + partitionKeys.add(partitionKey); + update = true; + } + } + if (update) { + newOver = + (RexOver) rexBuilder.makeOver(newOver.getParserPosition(), + newOver.getType(), + newOver.getAggOperator(), newOver.getOperands(), partitionKeys, + newOver.getWindow().orderKeys, + newOver.getWindow().getLowerBound(), + newOver.getWindow().getUpperBound(), + newOver.getWindow().getExclude(), + newOver.getWindow().isRows(), true, false, newOver.isDistinct(), + newOver.ignoreNulls()); + } + } + if (projectPulledAboveLeftCorrelator && (nullIndicator != null)) { + // Once a scalar sub-query is pulled above a left correlate, the result + // must remain nullable when there is no matching row on the right side. + return createCaseExpression(nullIndicator, null, newOver); + } + return newOver; + } + @Override public RexNode visitCall(final RexCall call) { RexNode newCall; diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq index db69258df24..4d4398cfc4f 100644 --- a/core/src/test/resources/sql/sub-query.iq +++ b/core/src/test/resources/sql/sub-query.iq @@ -8168,6 +8168,39 @@ SELECT deptno FROM dept WHERE 1000.00 > !ok +# [CALCITE-7587] RelDecorrelator fails on correlated scalar subquery with ROW_NUMBER window function +# due to RexOver nullability mismatch +# Correlated scalar sub-query in the SELECT list that contains a window function. +# Decorrelation must add the correlation key to the window partition and preserve nullability +# of the OVER expression after pulling it above the left correlate. +SELECT e.ename, + (SELECT ROW_NUMBER() OVER (PARTITION BY e.deptno ORDER BY e.empno, d.deptno) + FROM dept d + WHERE e.deptno = d.deptno) AS rn +FROM emp e +ORDER BY e.empno; ++--------+----+ +| ENAME | RN | ++--------+----+ +| SMITH | 1 | +| ALLEN | 1 | +| WARD | 1 | +| JONES | 1 | +| MARTIN | 1 | +| BLAKE | 1 | +| CLARK | 1 | +| SCOTT | 1 | +| KING | 1 | +| TURNER | 1 | +| ADAMS | 1 | +| JAMES | 1 | +| FORD | 1 | +| MILLER | 1 | ++--------+----+ +(14 rows) + +!ok + # [CALCITE-7584] RelDecorrelator produces incorrect results for correlated LATERAL sub-queries with window functions # Correlated LATERAL sub-query with a window expression. # The equality predicate between the inner and outer query must remain applied