From 4f173861665f01806073cd5ba2e740a92628bb49 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Wed, 3 Jun 2026 07:36:47 +0000 Subject: [PATCH 01/10] feat: support size() for MapType input (#4472) --- native/spark-expr/src/array_funcs/size.rs | 83 ++++++++----------- .../scala/org/apache/comet/serde/arrays.scala | 6 +- .../expressions/array/posexplode.sql | 4 +- .../sql-tests/expressions/array/size.sql | 2 +- .../comet/CometMapExpressionSuite.scala | 18 +--- 5 files changed, 41 insertions(+), 72 deletions(-) diff --git a/native/spark-expr/src/array_funcs/size.rs b/native/spark-expr/src/array_funcs/size.rs index 9777553341..1a4d4c2272 100644 --- a/native/spark-expr/src/array_funcs/size.rs +++ b/native/spark-expr/src/array_funcs/size.rs @@ -198,6 +198,14 @@ fn spark_size_scalar(scalar: &ScalarValue) -> Result { + if array.is_null(0) { + Ok(ScalarValue::Int32(Some(-1))) + } else { + let len = array.value_length(0) as i32; + Ok(ScalarValue::Int32(Some(len))) + } + } ScalarValue::Null => { Ok(ScalarValue::Int32(Some(-1))) // Spark behavior: return -1 for null } @@ -279,75 +287,56 @@ mod tests { // TODO: Add map array test once Arrow MapArray API constraints are resolved // Currently MapArray doesn't allow nulls in entries which makes testing complex // The core size() implementation supports maps correctly - #[ignore] #[test] fn test_spark_size_map_array() { - use arrow::array::{MapArray, StringArray}; - - // Create a simpler test with maps: - // [{"key1": "value1", "key2": "value2"}, {"key3": "value3"}, {}, null] + use arrow::array::{Int32Array, MapArray, StringArray}; - // Create keys array for all entries (no nulls) - let keys = StringArray::from(vec!["key1", "key2", "key3"]); + let keys = StringArray::from(vec![Some("key1"), Some("key2"), Some("key3")]); + let values = Int32Array::from(vec![Some(1), Some(2), Some(3)]); - // Create values array for all entries (no nulls) - let values = StringArray::from(vec!["value1", "value2", "value3"]); - - // Create entry offsets: [0, 2, 3, 3] representing: - // - Map 1: entries 0-1 (2 key-value pairs) - // - Map 2: entries 2-2 (1 key-value pair) - // - Map 3: entries 3-2 (0 key-value pairs, empty map) - // - Map 4: null (handled by null buffer) - let entry_offsets = arrow::buffer::OffsetBuffer::new(vec![0, 2, 3, 3, 3].into()); + let entry_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 3, 3, 3].into()); let key_field = Arc::new(Field::new("key", DataType::Utf8, false)); - let value_field = Arc::new(Field::new("value", DataType::Utf8, false)); // Make values non-nullable too + let value_field = Arc::new(Field::new("value", DataType::Int32, true)); - // Create the entries struct array let entries = arrow::array::StructArray::new( arrow::datatypes::Fields::from(vec![key_field, value_field]), vec![Arc::new(keys), Arc::new(values)], - None, // No nulls in the entries struct array itself + None, ); - // Create null buffer for the map array (fourth map is null) let mut null_buffer = NullBufferBuilder::new(4); - null_buffer.append(true); // Map with 2 entries - not null - null_buffer.append(true); // Map with 1 entry - not null - null_buffer.append(true); // Empty map - not null - null_buffer.append(false); // null map - - let map_data_type = DataType::Map( - Arc::new(Field::new( - "entries", - DataType::Struct(arrow::datatypes::Fields::from(vec![ - Field::new("key", DataType::Utf8, false), - Field::new("value", DataType::Utf8, false), // Make values non-nullable too - ])), - false, - )), - false, // keys are not sorted - ); - - let map_field = Arc::new(Field::new("map", map_data_type, true)); - - let map_array = MapArray::new( + null_buffer.append(true); + null_buffer.append(true); + null_buffer.append(true); + null_buffer.append(false); + + let map_field = Arc::new(Field::new( + "entries", + DataType::Struct(arrow::datatypes::Fields::from(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Int32, true), + ])), + false, + )); + + let map_array = MapArray::try_new( map_field, entry_offsets, entries, null_buffer.finish(), - false, // keys are not sorted - ); + false, + ) + .unwrap(); let array_ref: ArrayRef = Arc::new(map_array); let result = spark_size_array(&array_ref).unwrap(); let result = result.as_any().downcast_ref::().unwrap(); - // Expected: [2, 1, 0, -1] - assert_eq!(result.value(0), 2); // Map with 2 key-value pairs - assert_eq!(result.value(1), 1); // Map with 1 key-value pair - assert_eq!(result.value(2), 0); // empty map has 0 pairs - assert_eq!(result.value(3), -1); // null map returns -1 + assert_eq!(result.value(0), 2); + assert_eq!(result.value(1), 1); + assert_eq!(result.value(2), 0); + assert_eq!(result.value(3), -1); } #[test] diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 2f89b0e2e3..bcda62ee09 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -667,15 +667,11 @@ object CometArrayFilter extends CometExpressionSerde[ArrayFilter] { object CometSize extends CometExpressionSerde[Size] { - override def getUnsupportedReasons(): Seq[String] = Seq( - "Only supports `ArrayType` input; `MapType` input is not supported") - override def getSupportLevel(expr: Size): SupportLevel = { expr.child.dataType match { case _: ArrayType => Compatible() - case _: MapType => Unsupported(Some("size does not support map inputs")) + case _: MapType => Compatible() case other => - // this should be unreachable because Spark only supports map and array inputs Unsupported(Some(s"Unsupported child data type: $other")) } } diff --git a/spark/src/test/resources/sql-tests/expressions/array/posexplode.sql b/spark/src/test/resources/sql-tests/expressions/array/posexplode.sql index 20b0547ebe..b7ba70d45a 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/posexplode.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/posexplode.sql @@ -95,6 +95,6 @@ INSERT INTO test_posexplode_map VALUES (1, map('a', 1, 'b', 2)), (2, map('c', 3)) --- posexplode over a map falls back to Spark (Comet only supports array inputs) -query expect_fallback(size does not support map inputs) +-- posexplode over a map falls back to Spark (Comet only supports array inputs, not maps) +query expect_fallback(Comet only supports explode/explode_outer for arrays, not maps) SELECT id, posexplode(m) FROM test_posexplode_map diff --git a/spark/src/test/resources/sql-tests/expressions/array/size.sql b/spark/src/test/resources/sql-tests/expressions/array/size.sql index b006a4da0d..38e6830883 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/size.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/size.sql @@ -21,7 +21,7 @@ CREATE TABLE test_size(arr array, m map) USING parquet statement INSERT INTO test_size VALUES (array(1, 2, 3), map('a', 1, 'b', 2)), (array(), map()), (NULL, NULL) -query spark_answer_only +query SELECT size(arr), size(m) FROM test_size -- literal arguments diff --git a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala index f3c7d9f23e..347d924d2f 100644 --- a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala @@ -126,23 +126,7 @@ class CometMapExpressionSuite extends CometTestBase { } } - test("fallback for size with map input") { - withTempDir { dir => - withTempView("t1") { - val path = new Path(dir.toURI.toString, "test.parquet") - makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = true, 100) - spark.read.parquet(path.toString).createOrReplaceTempView("t1") - - // Use column references in maps to avoid constant folding - checkSparkAnswerAndFallbackReason( - sql("SELECT size(case when _2 < 0 then map(_8, _9) else map() end) from t1"), - "size does not support map inputs") - } - } - } - - // fails with "map is not supported" - ignore("size with map input") { + test("size with map input") { withTempDir { dir => withTempView("t1") { val path = new Path(dir.toURI.toString, "test.parquet") From 0b3c4511ca8ba030723dd54817503d78dbbea415 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Thu, 4 Jun 2026 06:31:14 +0000 Subject: [PATCH 02/10] test: add literal map, cardinality, and sizeOfNull=false SQL tests for size() --- .../sql-tests/expressions/array/size.sql | 14 +++++++- .../expressions/array/size_legacy_off.sql | 34 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql diff --git a/spark/src/test/resources/sql-tests/expressions/array/size.sql b/spark/src/test/resources/sql-tests/expressions/array/size.sql index 38e6830883..6bb8862a3c 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/size.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/size.sql @@ -24,6 +24,18 @@ INSERT INTO test_size VALUES (array(1, 2, 3), map('a', 1, 'b', 2)), (array(), ma query SELECT size(arr), size(m) FROM test_size --- literal arguments +-- literal array arguments query SELECT size(array(1, 2, 3)), size(array()), size(cast(NULL as array)) + +-- literal map via CreateMap (falls back: Comet has no CreateMap serde; +-- cast(NULL as map) avoids CreateMap and goes through CometLiteral instead) +query spark_answer_only +SELECT size(map('a', 1, 'b', 2)), size(map()) + +query +SELECT size(cast(NULL as map)) + +-- cardinality is a SQL alias for size +query +SELECT cardinality(arr), cardinality(m) FROM test_size diff --git a/spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql b/spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql new file mode 100644 index 0000000000..be47983e87 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql @@ -0,0 +1,34 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Config: spark.sql.legacy.sizeOfNull=false + +statement +CREATE TABLE test_size_legacy_off(arr array, m map) USING parquet + +statement +INSERT INTO test_size_legacy_off VALUES (array(1, 2, 3), map('a', 1, 'b', 2)), (array(), map()), (NULL, NULL) + +-- With sizeOfNull=false, size(NULL) returns NULL instead of -1 +query +SELECT size(arr), size(m) FROM test_size_legacy_off + +query +SELECT size(cast(NULL as array)), size(cast(NULL as map)) + +query +SELECT cardinality(arr), cardinality(m) FROM test_size_legacy_off From ff01bbc4d7f8f6b61ceeb26d77d77d15e15df039 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Thu, 4 Jun 2026 06:31:14 +0000 Subject: [PATCH 03/10] chore: remove stale TODO and add ScalarValue::Map unit tests in size.rs --- native/spark-expr/src/array_funcs/size.rs | 72 ++++++++++++++++++++++- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/native/spark-expr/src/array_funcs/size.rs b/native/spark-expr/src/array_funcs/size.rs index 1a4d4c2272..22dfefbfc8 100644 --- a/native/spark-expr/src/array_funcs/size.rs +++ b/native/spark-expr/src/array_funcs/size.rs @@ -284,9 +284,6 @@ mod tests { assert_eq!(result, ScalarValue::Int32(Some(-1))); } - // TODO: Add map array test once Arrow MapArray API constraints are resolved - // Currently MapArray doesn't allow nulls in entries which makes testing complex - // The core size() implementation supports maps correctly #[test] fn test_spark_size_map_array() { use arrow::array::{Int32Array, MapArray, StringArray}; @@ -339,6 +336,75 @@ mod tests { assert_eq!(result.value(3), -1); } + #[test] + fn test_spark_size_scalar_map() { + use arrow::array::{Int32Array, MapArray, StringArray}; + + let keys = StringArray::from(vec![Some("a"), Some("b")]); + let values = Int32Array::from(vec![Some(1), Some(2)]); + let entry_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2].into()); + + let key_field = Arc::new(Field::new("key", DataType::Utf8, false)); + let value_field = Arc::new(Field::new("value", DataType::Int32, true)); + + let entries = arrow::array::StructArray::new( + arrow::datatypes::Fields::from(vec![key_field, value_field]), + vec![Arc::new(keys), Arc::new(values)], + None, + ); + + let map_field = Arc::new(Field::new( + "entries", + DataType::Struct(arrow::datatypes::Fields::from(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Int32, true), + ])), + false, + )); + + let map_array = MapArray::try_new(map_field, entry_offsets, entries, None, false).unwrap(); + let scalar = ScalarValue::Map(Arc::new(map_array)); + let result = spark_size_scalar(&scalar).unwrap(); + assert_eq!(result, ScalarValue::Int32(Some(2))); + } + + #[test] + fn test_spark_size_scalar_null_map() { + use arrow::array::{Int32Array, MapArray, StringArray}; + + let keys = StringArray::from(vec![Some("a")]); + let values = Int32Array::from(vec![Some(1)]); + let entry_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 1].into()); + + let key_field = Arc::new(Field::new("key", DataType::Utf8, false)); + let value_field = Arc::new(Field::new("value", DataType::Int32, true)); + + let entries = arrow::array::StructArray::new( + arrow::datatypes::Fields::from(vec![key_field, value_field]), + vec![Arc::new(keys), Arc::new(values)], + None, + ); + + let map_field = Arc::new(Field::new( + "entries", + DataType::Struct(arrow::datatypes::Fields::from(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("value", DataType::Int32, true), + ])), + false, + )); + + let mut null_buffer = NullBufferBuilder::new(1); + null_buffer.append(false); + + let map_array = + MapArray::try_new(map_field, entry_offsets, entries, null_buffer.finish(), false) + .unwrap(); + let scalar = ScalarValue::Map(Arc::new(map_array)); + let result = spark_size_scalar(&scalar).unwrap(); + assert_eq!(result, ScalarValue::Int32(Some(-1))); + } + #[test] fn test_spark_size_fixed_size_list_array() { use arrow::array::FixedSizeListArray; From 92789acc5811e2747d23ebebadbac1a4f2095441 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Thu, 4 Jun 2026 06:31:14 +0000 Subject: [PATCH 04/10] docs: remove stale MapType fallback notes for size and cardinality --- docs/source/user-guide/latest/expressions.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index b5ffd40161..2d3df5f429 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -183,13 +183,13 @@ The tables below list every Spark built-in expression with its current status. ## collection_funcs -| Function | Status | Notes | -| --- | --- | --- | -| `array_size` | ✅ | | -| `cardinality` | ✅ | MapType input falls back | -| `concat` | ✅ | Binary/array children fall back | -| `reverse` | ✅ | Binary-element arrays fall back (Incompatible) ([details](compatibility/expressions/array.md)) | -| `size` | ✅ | MapType input falls back | +| Function | Status | Notes | +| ------------- | ------ | ----------------------------------------------------------------------------------------------------------- | +| `array_size` | ✅ | | +| `cardinality` | ✅ | | +| `concat` | ✅ | Binary/array children fall back | +| `reverse` | ✅ | Binary-element arrays fall back (Incompatible) ([details](compatibility/expressions/array.md)) | +| `size` | ✅ | | --- From 206fbd04fa054b62a7009d3d1215b6ae9551a4b1 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Thu, 4 Jun 2026 21:53:34 +0800 Subject: [PATCH 05/10] docs: remove padding from collection_funcs table in expressions.md --- docs/source/user-guide/latest/expressions.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index 2d3df5f429..62434cfb32 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -183,13 +183,13 @@ The tables below list every Spark built-in expression with its current status. ## collection_funcs -| Function | Status | Notes | -| ------------- | ------ | ----------------------------------------------------------------------------------------------------------- | -| `array_size` | ✅ | | -| `cardinality` | ✅ | | -| `concat` | ✅ | Binary/array children fall back | -| `reverse` | ✅ | Binary-element arrays fall back (Incompatible) ([details](compatibility/expressions/array.md)) | -| `size` | ✅ | | +| Function | Status | Notes | +| --- | --- | --- | +| `array_size` | ✅ | | +| `cardinality` | ✅ | | +| `concat` | ✅ | Binary/array children fall back | +| `reverse` | ✅ | Binary-element arrays fall back (Incompatible) ([details](compatibility/expressions/array.md)) | +| `size` | ✅ | | --- From 22407c5bac54763bff32d9dd2ebbc457c3cd09d1 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Fri, 5 Jun 2026 01:05:38 +0000 Subject: [PATCH 06/10] chore: apply cargo fmt to size.rs --- native/spark-expr/src/array_funcs/size.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/native/spark-expr/src/array_funcs/size.rs b/native/spark-expr/src/array_funcs/size.rs index 22dfefbfc8..f6abd04114 100644 --- a/native/spark-expr/src/array_funcs/size.rs +++ b/native/spark-expr/src/array_funcs/size.rs @@ -397,9 +397,14 @@ mod tests { let mut null_buffer = NullBufferBuilder::new(1); null_buffer.append(false); - let map_array = - MapArray::try_new(map_field, entry_offsets, entries, null_buffer.finish(), false) - .unwrap(); + let map_array = MapArray::try_new( + map_field, + entry_offsets, + entries, + null_buffer.finish(), + false, + ) + .unwrap(); let scalar = ScalarValue::Map(Arc::new(map_array)); let result = spark_size_scalar(&scalar).unwrap(); assert_eq!(result, ScalarValue::Int32(Some(-1))); From 4ed725f0e2b1714df665e218760718577602a7a4 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Fri, 5 Jun 2026 08:23:09 +0000 Subject: [PATCH 07/10] fix: remove unnecessary i32 cast and fix size with map input test (#4580) --- native/spark-expr/src/array_funcs/size.rs | 2 +- .../comet/CometMapExpressionSuite.scala | 37 ++++++++++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/native/spark-expr/src/array_funcs/size.rs b/native/spark-expr/src/array_funcs/size.rs index f6abd04114..9adcc29800 100644 --- a/native/spark-expr/src/array_funcs/size.rs +++ b/native/spark-expr/src/array_funcs/size.rs @@ -202,7 +202,7 @@ fn spark_size_scalar(scalar: &ScalarValue) -> Result - withTempView("t1") { - val path = new Path(dir.toURI.toString, "test.parquet") - makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = true, 100) - spark.read.parquet(path.toString).createOrReplaceTempView("t1") + withTempPath { dir => + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val df = spark + .range(100) + .select( + when(col("id") > 1, map(col("id"), when(col("id") > 2, col("id")))) + .alias("map1"), + when(col("id") > 5, map(lit("a"), col("id"), lit("b"), col("id") + 1)) + .alias("map2")) + df.write.parquet(dir.toString()) + } - // Use column references in maps to avoid constant folding - checkSparkAnswerAndOperator( - sql("SELECT size(map(_8, _9, _10, _11)) from t1 where _8 is not null")) - checkSparkAnswerAndOperator( - sql("SELECT size(case when _2 < 0 then map(_8, _9) else map() end) from t1")) + Seq("", "parquet").foreach { v1List => + withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) { + val df = spark.read.parquet(dir.toString()) + df.createOrReplaceTempView("t1") + if (v1List.isEmpty) { + checkSparkAnswer(df.select(size(col("map1")))) + checkSparkAnswer(df.select(size(col("map2")))) + checkSparkAnswer( + sql("SELECT size(CASE WHEN id < 50 THEN map1 ELSE map2 END) FROM t1")) + } else { + checkSparkAnswerAndOperator(df.select(size(col("map1")))) + checkSparkAnswerAndOperator(df.select(size(col("map2")))) + checkSparkAnswerAndOperator( + sql("SELECT size(CASE WHEN id < 50 THEN map1 ELSE map2 END) FROM t1")) + } + } } } } From a7f7529a71f8df4647b683783e14304c9426e772 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Wed, 10 Jun 2026 01:02:27 +0000 Subject: [PATCH 08/10] test: address PR review feedback for size() map support --- .../sql-tests/expressions/array/size.sql | 2 ++ .../expressions/array/size_legacy_off.sql | 34 ------------------- .../comet/CometMapExpressionSuite.scala | 15 ++++++++ 3 files changed, 17 insertions(+), 34 deletions(-) delete mode 100644 spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql diff --git a/spark/src/test/resources/sql-tests/expressions/array/size.sql b/spark/src/test/resources/sql-tests/expressions/array/size.sql index 6bb8862a3c..fb2bedef1e 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/size.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/size.sql @@ -15,6 +15,8 @@ -- specific language governing permissions and limitations -- under the License. +-- ConfigMatrix: spark.sql.legacy.sizeOfNull=true,false + statement CREATE TABLE test_size(arr array, m map) USING parquet diff --git a/spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql b/spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql deleted file mode 100644 index be47983e87..0000000000 --- a/spark/src/test/resources/sql-tests/expressions/array/size_legacy_off.sql +++ /dev/null @@ -1,34 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. - --- Config: spark.sql.legacy.sizeOfNull=false - -statement -CREATE TABLE test_size_legacy_off(arr array, m map) USING parquet - -statement -INSERT INTO test_size_legacy_off VALUES (array(1, 2, 3), map('a', 1, 'b', 2)), (array(), map()), (NULL, NULL) - --- With sizeOfNull=false, size(NULL) returns NULL instead of -1 -query -SELECT size(arr), size(m) FROM test_size_legacy_off - -query -SELECT size(cast(NULL as array)), size(cast(NULL as map)) - -query -SELECT cardinality(arr), cardinality(m) FROM test_size_legacy_off diff --git a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala index f5753f472f..ebdce217a6 100644 --- a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala @@ -127,6 +127,21 @@ class CometMapExpressionSuite extends CometTestBase { } test("size with map input") { + withTempDir { dir => + withTempView("t1") { + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = true, 100) + spark.read.parquet(path.toString).createOrReplaceTempView("t1") + + checkSparkAnswerAndOperator( + sql("SELECT size(map(_8, _9, _10, _11)) from t1 where _8 is not null")) + checkSparkAnswerAndOperator( + sql("SELECT size(case when _2 < 0 then map(_8, _9) else map() end) from t1")) + } + } + } + + test("size with map input - v2 reader") { withTempPath { dir => withSQLConf(CometConf.COMET_ENABLED.key -> "false") { val df = spark From d5ecc472fabd68af5e8f23e63f1b63ac40f71138 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Wed, 10 Jun 2026 06:06:00 +0000 Subject: [PATCH 09/10] fix: include id column in parquet for size map v2 reader test --- .../test/scala/org/apache/comet/CometMapExpressionSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala index ebdce217a6..5a55488b5e 100644 --- a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala @@ -133,8 +133,6 @@ class CometMapExpressionSuite extends CometTestBase { makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = true, 100) spark.read.parquet(path.toString).createOrReplaceTempView("t1") - checkSparkAnswerAndOperator( - sql("SELECT size(map(_8, _9, _10, _11)) from t1 where _8 is not null")) checkSparkAnswerAndOperator( sql("SELECT size(case when _2 < 0 then map(_8, _9) else map() end) from t1")) } @@ -147,6 +145,7 @@ class CometMapExpressionSuite extends CometTestBase { val df = spark .range(100) .select( + col("id"), when(col("id") > 1, map(col("id"), when(col("id") > 2, col("id")))) .alias("map1"), when(col("id") > 5, map(lit("a"), col("id"), lit("b"), col("id") + 1)) From 678f2d5f309f6ed106fb30b7cbade7556b2f3700 Mon Sep 17 00:00:00 2001 From: marvelshan Date: Thu, 11 Jun 2026 04:46:29 +0000 Subject: [PATCH 10/10] test: fix size with map input test to expect CreateMap fallback map(_8, _9) in SQL produces a CreateMap expression which has no Comet serde, causing the entire size() expression to fall back to Spark. checkSparkAnswerAndOperator would fail because it requires Comet native execution. Switch to checkSparkAnswerAndFallbackReason to correctly assert that the fallback reason is CreateMap not being supported, and rename the test to make the fallback expectation explicit. Native execution of size() on map columns from Parquet is already tested in 'size with map input - v2 reader'. --- .../scala/org/apache/comet/CometMapExpressionSuite.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala index 5a55488b5e..86980fffe6 100644 --- a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala @@ -126,15 +126,16 @@ class CometMapExpressionSuite extends CometTestBase { } } - test("size with map input") { + test("fallback for size with map input") { withTempDir { dir => withTempView("t1") { val path = new Path(dir.toURI.toString, "test.parquet") makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = true, 100) spark.read.parquet(path.toString).createOrReplaceTempView("t1") - checkSparkAnswerAndOperator( - sql("SELECT size(case when _2 < 0 then map(_8, _9) else map() end) from t1")) + checkSparkAnswerAndFallbackReason( + sql("SELECT size(case when _2 < 0 then map(_8, _9) else map() end) from t1"), + "map is not supported") } } }