From d6f24f543580a5f504780e30537118429a7445c0 Mon Sep 17 00:00:00 2001 From: Mihailo Timotic Date: Wed, 25 Mar 2026 15:33:12 +0000 Subject: [PATCH] [SPARK-XXXXX][SQL] Strip Alias wrappers from inline table row expressions in parser When the SQL parser processes `VALUES (1 AS id, 'a' AS name)`, the parenthesized expression is parsed as a `CreateNamedStruct` via `CreateStruct.apply`, which preserves `Alias` wrappers as value expressions. These `Alias` nodes then propagate into `UnresolvedInlineTable.rows` through `struct.valExprs`. The aliases are redundant for inline tables since column names are determined separately (either from the explicit table alias identifier list or generated defaults). Their presence causes issues during single-pass analysis where the `ExpressionIdAssigner` mapping is not yet initialized when processing inline table row expressions. This patch strips `Alias` wrappers from inline table row expressions in `visitInlineTable`, extracting just `alias.child`. This is safe because the alias names are already captured in the struct's name expressions and are not used for inline table column naming. Co-authored-by: Isaac --- .../sql/catalyst/parser/AstBuilder.scala | 10 +++++-- .../analyzer-results/inline-table.sql.out | 21 +++++++++++++++ .../sql-tests/inputs/inline-table.sql | 9 +++++++ .../sql-tests/results/inline-table.sql.out | 26 +++++++++++++++++++ 4 files changed, 64 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 07b0801f55ebd..5e1ed4d2f5747 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2725,8 +2725,14 @@ class AstBuilder extends DataTypeAstBuilder // inline table comes in two styles: // style 1: values (1), (2), (3) -- multiple columns are supported // style 2: values 1, 2, 3 -- only a single column is supported here - case struct: CreateNamedStruct => struct.valExprs // style 1 - case child => Seq(child) // style 2 + // Strip Alias wrappers from row values — CreateStruct.apply preserves them for + // expressions like `(1 AS id, 'a' AS name)`, but they are redundant here since + // column names are determined by the table alias or generated defaults. + case struct: CreateNamedStruct => struct.valExprs.map { + case a: Alias => a.child + case other => other + } // style 1 + case child => Seq(child) // style 2 } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out index 78539effe188e..4aa144b59257e 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out @@ -256,3 +256,24 @@ select count(distinct ct) from values now(), now(), now() as data(ct) select count(distinct ct) from values current_timestamp(), current_timestamp() as data(ct) -- !query analysis [Analyzer test output redacted due to nondeterminism] + + +-- !query +select a from (values (1 as id, current_timestamp() as ts), (2 as id, current_timestamp() as ts)) as t(a, b) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +select * from values (1 as id, 'a' as name), (2 as id, 'b' as name) as t(a, b) +-- !query analysis +Project [a#x, b#x] ++- SubqueryAlias t + +- LocalRelation [a#x, b#x] + + +-- !query +select * from values (1 as a, 2 as b) +-- !query analysis +Project [col1#x, col2#x] ++- LocalRelation [col1#x, col2#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql index 8f65dc77c960a..bb66e5dc6b9f8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql @@ -66,3 +66,12 @@ select count(distinct ct) from values now(), now(), now() as data(ct); -- current_timestamp() should be kept as tempResolved inline expression. select count(distinct ct) from values current_timestamp(), current_timestamp() as data(ct); + +-- aliased expressions in multi-column rows with current_timestamp (non-foldable) +select a from (values (1 as id, current_timestamp() as ts), (2 as id, current_timestamp() as ts)) as t(a, b); + +-- aliased expressions in multi-column rows +select * from values (1 as id, 'a' as name), (2 as id, 'b' as name) as t(a, b); + +-- aliased expressions without table alias +select * from values (1 as a, 2 as b); diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out index 0a2c7b0f55ed2..dee75e7338aaa 100644 --- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out @@ -287,3 +287,29 @@ select count(distinct ct) from values current_timestamp(), current_timestamp() a struct -- !query output 1 + + +-- !query +select a from (values (1 as id, current_timestamp() as ts), (2 as id, current_timestamp() as ts)) as t(a, b) +-- !query schema +struct +-- !query output +1 +2 + + +-- !query +select * from values (1 as id, 'a' as name), (2 as id, 'b' as name) as t(a, b) +-- !query schema +struct +-- !query output +1 a +2 b + + +-- !query +select * from values (1 as a, 2 as b) +-- !query schema +struct +-- !query output +1 2