From 0a16953929aa64fc411749f98891c6bed40103d3 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Sun, 8 Feb 2026 23:53:52 -0600 Subject: [PATCH 1/7] Add nomv command + parser/AST/calcite wiring + tests + docs Signed-off-by: Srikanth Padakanti # Conflicts: # core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java # integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java # ppl/src/main/antlr/OpenSearchPPLParser.g4 # ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java --- .../org/opensearch/sql/analysis/Analyzer.java | 6 + .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/tree/NoMv.java | 73 +++++ .../sql/calcite/CalciteRelNodeVisitor.java | 6 + .../function/BuiltinFunctionName.java | 1 + docs/category.json | 1 + docs/user/ppl/cmd/nomv.md | 99 ++++++ docs/user/ppl/index.md | 5 +- .../sql/calcite/CalciteNoPushdownIT.java | 3 +- .../sql/calcite/remote/CalciteExplainIT.java | 39 +++ .../calcite/remote/CalciteNoMvCommandIT.java | 306 ++++++++++++++++++ .../sql/ppl/NewAddedCommandsIT.java | 16 + .../security/CalciteCrossClusterSearchIT.java | 17 + .../expectedOutput/calcite/explain_nomv.yaml | 10 + .../calcite_no_pushdown/explain_nomv.yaml | 10 + ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 + .../opensearch/sql/ppl/parser/AstBuilder.java | 7 + .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 9 + .../sql/ppl/calcite/CalcitePPLNoMvTest.java | 266 +++++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 5 + 21 files changed, 888 insertions(+), 3 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java create mode 100644 docs/user/ppl/cmd/nomv.md create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 58d542538d..2fdd8d6b84 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -81,6 +81,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -546,6 +547,11 @@ public LogicalPlan visitMvCombine(MvCombine node, AnalysisContext context) { throw getOnlyForCalciteException("mvcombine"); } + @Override + public LogicalPlan visitNoMv(NoMv node, AnalysisContext context) { + throw getOnlyForCalciteException("nomv"); + } + /** Build {@link ParseExpression} to context and skip to child nodes. */ @Override public LogicalPlan visitParse(Parse node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 2486b63791..9a8fac2588 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -69,6 +69,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -475,4 +476,8 @@ public T visitAddColTotals(AddColTotals node, C context) { public T visitMvCombine(MvCombine node, C context) { return visitChildren(node, context); } + + public T visitNoMv(NoMv node, C context) { + return visitChildren(node, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java b/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java new file mode 100644 index 0000000000..fac25f612e --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java @@ -0,0 +1,73 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import javax.annotation.Nullable; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Let; +import org.opensearch.sql.ast.expression.Literal; + +/** + * AST node for the NOMV command. Converts multi-value fields to single-value fields by joining + * array elements with newline delimiter. + */ +@Getter +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = false) +public class NoMv extends UnresolvedPlan { + + private final Field field; + @Nullable private UnresolvedPlan child; + + public NoMv(Field field) { + this.field = field; + } + + public NoMv attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? ImmutableList.of() : ImmutableList.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitNoMv(this, context); + } + + /** + * Rewrites the nomv command as an eval command using mvjoin function. nomv is rewritten + * to: eval = mvjoin(, "\n") + * + * @return an Eval node representing the equivalent mvjoin operation + */ + public UnresolvedPlan rewriteAsEval() { + // Create mvjoin function call: mvjoin(field, "\n") + Function mvjoinFunc = + new Function("mvjoin", ImmutableList.of(field, new Literal("\n", DataType.STRING))); + + // Create eval expression: field = mvjoin(field, "\n") + Let letExpr = new Let(field, mvjoinFunc); + + // Create eval node and attach the child from this NoMv node + Eval eval = new Eval(ImmutableList.of(letExpr)); + if (this.child != null) { + eval.attach(this.child); + } + return eval; + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 68a700b66b..e4dfc0761f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -126,6 +126,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -3290,6 +3291,11 @@ private void restoreColumnOrderAfterArrayAgg( relBuilder.project(projections, projectionNames, /* force= */ true); } + @Override + public RelNode visitNoMv(NoMv node, CalcitePlanContext context) { + return visitEval((Eval) node.rewriteAsEval(), context); + } + @Override public RelNode visitValues(Values values, CalcitePlanContext context) { if (values.getValues() == null || values.getValues().isEmpty()) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index dce558bf7c..3902b17bdf 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -76,6 +76,7 @@ public enum BuiltinFunctionName { MAP_REMOVE(FunctionName.of("map_remove"), true), MVAPPEND(FunctionName.of("mvappend")), MVJOIN(FunctionName.of("mvjoin")), + NOMV(FunctionName.of("nomv")), MVINDEX(FunctionName.of("mvindex")), MVFIND(FunctionName.of("mvfind")), MVZIP(FunctionName.of("mvzip")), diff --git a/docs/category.json b/docs/category.json index bcf73cb1a8..f90acc0369 100644 --- a/docs/category.json +++ b/docs/category.json @@ -25,6 +25,7 @@ "user/ppl/cmd/join.md", "user/ppl/cmd/lookup.md", "user/ppl/cmd/mvcombine.md", + "user/ppl/cmd/nomv.md", "user/ppl/cmd/parse.md", "user/ppl/cmd/patterns.md", "user/ppl/cmd/rare.md", diff --git a/docs/user/ppl/cmd/nomv.md b/docs/user/ppl/cmd/nomv.md new file mode 100644 index 0000000000..ca66834abc --- /dev/null +++ b/docs/user/ppl/cmd/nomv.md @@ -0,0 +1,99 @@ +# nomv + +## Description + +The `nomv` command converts a multivalue (array) field into a single-value string field by joining all array elements with newline characters (`\n`). This operation is performed in-place, replacing the original field with its joined string representation. + +`nomv` is a transforming command: it modifies the specified field without changing the number of rows in the result set. + +### Key behaviors + +- The field must be of **ARRAY type**. For scalar fields, use the `array()` function to create an array first. +- The specified field is **replaced** with a string containing all array elements joined by newline (`\n`) characters. +- **NULL values within the array are automatically filtered out** before joining. +- If the field doesn't exist, an error is returned. +- The operation uses Calcite's ARRAY_JOIN function internally (same underlying implementation as mvjoin). + +--- + +## Syntax + +```syntax +nomv +``` + +### Arguments + +- **field** (required) + The name of the field whose multivalue content should be converted to a single-value string. + +--- + +## Example 1: Basic nomv usage + +```ppl +source=accounts +| where account_number=1 +| eval names = array(firstname, lastname) +| nomv names +| fields account_number, names +``` + +Expected output: +```text +fetched rows / total rows = 1/1 ++----------------+-------+ +| account_number | names | +|----------------+-------| +| 1 | Amber | +| | Duke | ++----------------+-------+ +``` + +## Example 2: nomv with multiple fields + +```ppl +source=accounts +| where account_number=1 +| eval location = array(city, state) +| nomv location +| fields account_number, location +``` + +Expected output: +```text +fetched rows / total rows = 1/1 ++----------------+----------+ +| account_number | location | +|----------------+----------| +| 1 | Brogan | +| | IL | ++----------------+----------+ +``` + +## Example 3: Error when field does not exist + +```ppl +source=accounts +| nomv does_not_exist +``` + +Expected output: +```text +{'reason': 'Invalid Query', 'details': 'Field [does_not_exist] not found.', 'type': 'IllegalArgumentException'} +Error: Query returned no data +``` + +--- + +## Notes + +- The `nomv` command is only available when the Calcite query engine is enabled. +- This command is particularly useful when you need to export or display multivalue fields as single strings. +- The newline delimiter (`\n`) is fixed and cannot be customized. For custom delimiters, use the `mvjoin` function directly in an eval expression. +- NULL values are automatically filtered out during the join operation, so they do not contribute empty strings to the output. + +## Related commands + +- `mvjoin()` -- Function used by nomv internally to join array elements with a custom delimiter +- [`eval`](eval.md) -- Create computed fields using the `array()` and `mvjoin()` functions \ No newline at end of file diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 12afe96eea..e322fdf599 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -78,11 +78,12 @@ source=accounts | [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | | [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | | [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | -| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | | [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | | [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | | [mvcombine command](cmd/mvcombine.md) | 3.5 | stable (since 3.4) | Combines values of a specified field across rows identical on all other fields. | - +| [nomv command](cmd/nomv.md) | 3.6 | experimental (since 3.6) | Converts a multivalue field to a single-value string by joining elements with newlines. | + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** - [Aggregation Functions](functions/aggregations.md) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index aa569629e2..c564c0b86c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -108,7 +108,8 @@ CalciteVisualizationFormatIT.class, CalciteWhereCommandIT.class, CalcitePPLTpchIT.class, - CalciteMvCombineCommandIT.class + CalciteMvCombineCommandIT.class, + CalciteNoMvCommandIT.class }) public class CalciteNoPushdownIT { private static boolean wasPushdownEnabled; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 0bbc25f1d7..7a50a1ae44 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2529,6 +2529,19 @@ public void testExplainMvCombine() throws IOException { assertYamlEqualsIgnoreId(expected, actual); } + @Test + public void testExplainNoMv() throws IOException { + String query = + "source=opensearch-sql_test_index_account " + + "| fields state, city, age " + + "| eval location = array(state, city) " + + "| nomv location"; + + String actual = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_nomv.yaml"); + assertYamlEqualsIgnoreId(expected, actual); + } + // ==================== fetch_size explain tests ==================== @Test @@ -2706,4 +2719,30 @@ public void testFilterBooleanFieldOnlyNotTrue() throws IOException { String expected = loadExpectedPlan("explain_filter_boolean_only_not_true.yaml"); assertYamlEqualsIgnoreId(expected, result); } + + @Test + public void testNoMvBasic() throws IOException { + String query = + StringUtils.format( + "source=%s | fields firstname, age | eval names = array(firstname) | nomv names |" + + " fields names", + TEST_INDEX_BANK); + var result = explainQueryYaml(query); + Assert.assertTrue( + "Expected explain to contain ARRAY_JOIN function", + result.toLowerCase().contains("array_join")); + } + + @Test + public void testNoMvWithEval() throws IOException { + String query = + StringUtils.format( + "source=%s | eval full_name = concat(firstname, ' J.') | eval name_array =" + + " array(full_name) | nomv name_array | fields name_array", + TEST_INDEX_BANK); + var result = explainQueryYaml(query); + Assert.assertTrue( + "Expected explain to contain both CONCAT and ARRAY_JOIN", + result.toLowerCase().contains("concat") && result.toLowerCase().contains("array_join")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java new file mode 100644 index 0000000000..2f6214946c --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java @@ -0,0 +1,306 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteNoMvCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK); + } + + // --------------------------- + // Sanity (precondition) + // --------------------------- + + @Test + public void testSanity_datasetIsLoaded() throws IOException { + JSONObject result = executeQuery("source=" + TEST_INDEX_BANK + " | head 5"); + int rows = result.getJSONArray("datarows").length(); + Assertions.assertTrue(rows > 0, "Expected bank dataset to have rows, got 0"); + } + + // --------------------------- + // Happy path (core nomv) + // --------------------------- + + @Test + public void testNoMv_basicUsage_withArrayLiterals() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr = array('web', 'production', 'east') | nomv arr | head 1 | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("web\nproduction\neast")); + } + + @Test + public void testNoMv_withArrayFromFields() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval names = array(firstname, lastname) | nomv names | head 1 | fields" + + " firstname, lastname, names"; + + JSONObject result = executeQuery(q); + + verifySchema( + result, + schema("firstname", null, "string"), + schema("lastname", null, "string"), + schema("names", null, "string")); + + verifyDataRows( + result, rows("Amber JOHnny", "Duke Willmington", "Amber JOHnny\nDuke Willmington")); + } + + @Test + public void testNoMv_multipleArrays_appliedInSequence() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr1 = array('a', 'b'), arr2 = array('x', 'y') | nomv arr1 | nomv arr2 |" + + " head 1 | fields arr1, arr2"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr1", null, "string"), schema("arr2", null, "string")); + + verifyDataRows(result, rows("a\nb", "x\ny")); + } + + @Test + public void testNoMv_inComplexPipeline_withWhereAndSort() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number < 20 | eval arr = array(firstname, 'test') | nomv arr |" + + " sort account_number | head 3 | fields account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows( + result, rows(1, "Amber JOHnny\ntest"), rows(6, "Hattie\ntest"), rows(13, "Nanette\ntest")); + } + + @Test + public void testNoMv_fieldUsableInSubsequentOperations() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number = 6 | eval arr = array('test', 'data') | nomv arr | eval" + + " arr_len = length(arr) | fields account_number, arr, arr_len"; + + JSONObject result = executeQuery(q); + + verifySchema( + result, + schema("account_number", null, "bigint"), + schema("arr", null, "string"), + schema("arr_len", null, "int")); + + verifyDataRows(result, rows(6, "test\ndata", 9)); + } + + @Test + public void testNoMv_withStats_afterAggregation() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | stats count() as cnt by age | eval age_str = cast(age as string) | eval arr =" + + " array(age_str, 'count') | nomv arr | fields cnt, age, arr | sort cnt | head 2"; + + JSONObject result = executeQuery(q); + + verifySchema( + result, + schema("cnt", null, "bigint"), + schema("age", null, "int"), + schema("arr", null, "string")); + + Assertions.assertTrue(result.getJSONArray("datarows").length() > 0); + } + + @Test + public void testNoMv_withEval_worksOnComputedArrays() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number = 1 | eval full_name = concat(firstname, ' ', lastname) |" + + " eval arr = array(full_name, 'suffix') | nomv arr | fields full_name, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("full_name", null, "string"), schema("arr", null, "string")); + + verifyDataRows( + result, rows("Amber JOHnny Duke Willmington", "Amber JOHnny Duke Willmington\nsuffix")); + } + + @Test + public void testNoMv_preservesFieldInPlace() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr = array('a', 'b', 'c') | nomv arr | head 1 | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("a\nb\nc")); + + Assertions.assertEquals(1, result.getJSONArray("schema").length()); + } + + // --------------------------- + // Edge case / error semantics + // --------------------------- + + @Test + public void testNoMv_singleElementArray() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr = array('single') | nomv arr | head 1 | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("single")); + } + + @Test + public void testNoMv_emptyArray() throws IOException { + String q = + "source=" + TEST_INDEX_BANK + " | eval arr = array() | nomv arr | head 1 | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("")); + } + + @Test + public void testNoMv_arrayWithNullValues() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr = array('first', 'second', 'third') | nomv arr | head 1 | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("first\nsecond\nthird")); + } + + @Test + public void testNoMv_scalarFieldError() throws IOException { + ResponseException ex = + Assertions.assertThrows( + ResponseException.class, + () -> + executeQuery("source=" + TEST_INDEX_BANK + " | fields firstname | nomv firstname")); + + int status = ex.getResponse().getStatusLine().getStatusCode(); + Assertions.assertEquals(400, status, "Expected 400 for type mismatch"); + + String msg = ex.getMessage(); + + Assertions.assertTrue( + msg.contains("MVJOIN") || msg.contains("ARRAY") || msg.contains("type"), msg); + } + + @Test + public void testNoMv_arrayWithMixedTypes() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | where account_number = 1 | eval arr = array('age:', cast(age as string)) | nomv" + + " arr | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("age:\n32")); + } + + @Test + public void testNoMv_largeArray() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr = array('1', '2', '3', '4', '5', '6', '7', '8', '9', '10') | nomv arr |" + + " head 1 | fields arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("arr", null, "string")); + + verifyDataRows(result, rows("1\n2\n3\n4\n5\n6\n7\n8\n9\n10")); + } + + @Test + public void testNoMv_resultUsedInComparison() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval arr = array('test') | nomv arr | where arr = 'test' | head 1 | fields" + + " account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + Assertions.assertTrue(result.getJSONArray("datarows").length() > 0); + } + + // --------------------------- + // Edge case / error semantics + // --------------------------- + + @Test + public void testNoMv_missingField_shouldReturn4xx() throws IOException { + // Error when field does not exist + ResponseException ex = + Assertions.assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_BANK + " | nomv does_not_exist")); + + int status = ex.getResponse().getStatusLine().getStatusCode(); + + Assertions.assertEquals(400, status, "Unexpected status. ex=" + ex.getMessage()); + + String msg = ex.getMessage(); + Assertions.assertTrue( + msg.contains("does_not_exist") || msg.contains("field") || msg.contains("Field"), msg); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index c5a1d08c37..5a7f7be922 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -252,4 +252,20 @@ public void testMvCombineUnsupportedInV2() throws IOException { } verifyQuery(result); } + + @Test + public void testNoMvUnsupportedInV2() throws IOException { + JSONObject result; + try { + result = + executeQuery( + String.format( + "source=%s | fields account_number, firstname | eval names = array(firstname) |" + + " nomv names", + TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 571d915517..0c101ce472 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -418,4 +418,21 @@ public void testCrossClusterFieldFormat() throws IOException { verifyDataRows( result, rows("Hattie", 36, 5686, "$5,686"), rows("Nanette", 28, 32838, "$32,838")); } + + /** CrossClusterSearchIT Test for nomv. */ + @Test + public void testCrossClusterNoMv() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' " + + "| eval names = array(firstname, lastname) | nomv names " + + "| fields firstname, names", + TEST_INDEX_BANK_REMOTE)); + + verifyColumn(result, columnName("firstname"), columnName("names")); + verifySchema(result, schema("firstname", "string"), schema("names", "string")); + + verifyDataRows(result, rows("Hattie", "Hattie\nBond")); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml new file mode 100644 index 0000000000..57731fa174 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$7], city=[$5], age=[$8], location=[ARRAY_JOIN(array($7, $5), ' + ')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[array($t0, $t1)], expr#4=[' + '], expr#5=[ARRAY_JOIN($t3, $t4)], proj#0..2=[{exprs}], location=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml new file mode 100644 index 0000000000..57731fa174 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$7], city=[$5], age=[$8], location=[ARRAY_JOIN(array($7, $5), ' + ')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[array($t0, $t1)], expr#4=[' + '], expr#5=[ARRAY_JOIN($t3, $t4)], proj#0..2=[{exprs}], location=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 9113663e47..54d69da39b 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -74,6 +74,7 @@ AGGREGATION: 'AGGREGATION'; APPENDPIPE: 'APPENDPIPE'; COLUMN_NAME: 'COLUMN_NAME'; MVCOMBINE: 'MVCOMBINE'; +NOMV: 'NOMV'; //Native JOIN KEYWORDS diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 8cc4ed932d..455fd92c3d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -91,6 +91,7 @@ commands | replaceCommand | mvcombineCommand | fieldformatCommand + | nomvCommand ; commandName @@ -136,6 +137,7 @@ commandName | APPENDPIPE | REPLACE | MVCOMBINE + | NOMV | TRANSPOSE ; @@ -555,6 +557,10 @@ mvcombineCommand : MVCOMBINE fieldExpression (DELIM EQUAL stringLiteral)? ; +nomvCommand + : NOMV fieldExpression + ; + flattenCommand : FLATTEN fieldExpression (AS aliases = identifierSeq)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 1ff9d2818d..ed432dc903 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -93,6 +93,7 @@ import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -908,6 +909,12 @@ public UnresolvedPlan visitMvcombineCommand(OpenSearchPPLParser.MvcombineCommand return new MvCombine(field, delim); } + @Override + public UnresolvedPlan visitNomvCommand(OpenSearchPPLParser.NomvCommandContext ctx) { + Field field = (Field) internalVisitExpression(ctx.fieldExpression()); + return new NoMv(field); + } + @Override public UnresolvedPlan visitGrokCommand(OpenSearchPPLParser.GrokCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 4376b5659d..a47db85c3c 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -83,6 +83,7 @@ import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.NoMv; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -473,6 +474,14 @@ public String visitMvCombine(MvCombine node, String context) { return StringUtils.format("%s | mvcombine delim=%s %s", child, MASK_LITERAL, field); } + @Override + public String visitNoMv(NoMv node, String context) { + String child = node.getChild().getFirst().accept(this, context); + String field = visitExpression(node.getField()); + + return StringUtils.format("%s | nomv %s", child, field); + } + /** Build {@link LogicalSort}. */ @Override public String visitSort(Sort node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java new file mode 100644 index 0000000000..07c62a3e0c --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -0,0 +1,266 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import static org.junit.Assert.assertThrows; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLNoMvTest extends CalcitePPLAbstractTest { + + public CalcitePPLNoMvTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testNoMvBasic() { + String ppl = + "source=EMP | eval arr = array('web', 'production', 'east') | nomv arr | head 1 | fields" + + " arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('web':VARCHAR," + + " 'production':VARCHAR, 'east':VARCHAR), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(ARRAY('web', 'production', 'east'), '\n') `arr`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithMultipleFields() { + String ppl = + "source=EMP | eval arr1 = array('a', 'b'), arr2 = array('x', 'y') | nomv arr1 | nomv arr2 |" + + " head 1 | fields arr1, arr2"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr1=[$8], arr2=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr1=[ARRAY_JOIN(array('a', 'b'), '\n')]," + + " arr2=[ARRAY_JOIN(array('x', 'y'), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(ARRAY('a', 'b'), '\n') `arr1`, ARRAY_JOIN(ARRAY('x', 'y'), '\n')" + + " `arr2`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithArrayFromFields() { + String ppl = + "source=EMP | eval tags = array(ENAME, JOB) | nomv tags | head 1 | fields EMPNO, tags"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], tags=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], tags=[ARRAY_JOIN(array($1, $2), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `tags`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvInPipeline() { + String ppl = + "source=EMP | where DEPTNO = 10 | eval names = array(ENAME, JOB) | nomv names | head 1 |" + + " fields EMPNO, names"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], names=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], names=[ARRAY_JOIN(array($1, $2), '\n')])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `names`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvNonExistentField() { + String ppl = "source=EMP | eval arr = array('a', 'b') | nomv does_not_exist | head 1"; + + Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); + + String msg = String.valueOf(ex.getMessage()); + org.junit.Assert.assertTrue( + "Expected error message to mention missing field. Actual: " + msg, + msg.toLowerCase().contains("does_not_exist") || msg.toLowerCase().contains("field")); + } + + @Test + public void testNoMvWithNestedArray() { + String ppl = + "source=EMP | eval arr = array('a', 'b', 'c') | nomv arr | eval arr_len = length(arr) |" + + " head 1 | fields EMPNO, arr, arr_len"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8], arr_len=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('a', 'b', 'c'), '\n')]," + + " arr_len=[CHAR_LENGTH(ARRAY_JOIN(array('a', 'b', 'c'), '\n'))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n') `arr`," + + " CHAR_LENGTH(ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n')) `arr_len`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithConcatInArray() { + String ppl = + "source=EMP | eval full_name = concat(ENAME, ' - ', JOB), arr = array(full_name) | nomv" + + " arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], full_name=[CONCAT($1, ' - ':VARCHAR, $2)]," + + " arr=[ARRAY_JOIN(array(CONCAT($1, ' - ':VARCHAR, $2)), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(CONCAT(`ENAME`, ' - ', `JOB`)), '\n') `arr`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvSingleElementArray() { + String ppl = "source=EMP | eval arr = array('single') | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('single':VARCHAR)," + + " '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('single'), '\n') `arr`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvEmptyArray() { + String ppl = "source=EMP | eval arr = array() | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array(), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(), '\n') `arr`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvLargeArray() { + String ppl = + "source=EMP | eval arr = array('1', '2', '3', '4', '5', '6', '7', '8', '9', '10') | nomv" + + " arr | head 1 | fields arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('1', '2', '3', '4', '5'," + + " '6', '7', '8', '9', '10':VARCHAR), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(ARRAY('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), '\n') `arr`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvChainedWithOtherOperations() { + String ppl = + "source=EMP | eval arr = array('a', 'b') | nomv arr | eval arr_upper = upper(arr) | head" + + " 1 | fields arr, arr_upper"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(arr=[$8], arr_upper=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('a', 'b'), '\n')]," + + " arr_upper=[UPPER(ARRAY_JOIN(array('a', 'b'), '\n'))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT ARRAY_JOIN(ARRAY('a', 'b'), '\n') `arr`, UPPER(ARRAY_JOIN(ARRAY('a', 'b')," + + " '\n')) `arr_upper`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1e200eb092..bdb07ea6a9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -1030,4 +1030,9 @@ public void testMvcombineCommandWithDelim() { "source=table | mvcombine delim=*** identifier", anonymize("source=t | mvcombine age delim=','")); } + + @Test + public void testNoMvCommand() { + assertEquals("source=table | nomv identifier", anonymize("source=t | nomv firstname")); + } } From 0d714acbc71b70da88c8f13111ef7ab33750168a Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Tue, 10 Feb 2026 13:53:33 -0600 Subject: [PATCH 2/7] Address coderrabbit suggestions. Signed-off-by: Srikanth Padakanti --- .../sql/calcite/CalciteRelNodeVisitor.java | 15 +++++ .../function/BuiltinFunctionName.java | 1 - docs/user/ppl/cmd/nomv.md | 4 +- .../calcite/remote/CalciteNoMvCommandIT.java | 34 +++++----- .../sql/ppl/calcite/CalcitePPLNoMvTest.java | 65 +++++++++++++++++++ 5 files changed, 99 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e4dfc0761f..2d46e972aa 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -3291,6 +3291,21 @@ private void restoreColumnOrderAfterArrayAgg( relBuilder.project(projections, projectionNames, /* force= */ true); } + /** + * Visits a NoMv (no multivalue) node by rewriting it as an Eval node. + * + *

The NoMv command converts multivalue (array) fields to single-value strings by joining array + * elements with newline delimiters. Internally, NoMv rewrites itself to an Eval node containing a + * mvjoin function call: {@code eval field = mvjoin(field, "\n")}. + * + *

The explicit cast to Eval is safe because {@link NoMv#rewriteAsEval()} always returns a + * newly constructed Eval instance and never returns null or other types. + * + * @param node the NoMv node to visit + * @param context the Calcite plan context containing schema and optimization information + * @return the RelNode resulting from visiting the rewritten Eval node + * @see NoMv#rewriteAsEval() + */ @Override public RelNode visitNoMv(NoMv node, CalcitePlanContext context) { return visitEval((Eval) node.rewriteAsEval(), context); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 3902b17bdf..dce558bf7c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -76,7 +76,6 @@ public enum BuiltinFunctionName { MAP_REMOVE(FunctionName.of("map_remove"), true), MVAPPEND(FunctionName.of("mvappend")), MVJOIN(FunctionName.of("mvjoin")), - NOMV(FunctionName.of("nomv")), MVINDEX(FunctionName.of("mvindex")), MVFIND(FunctionName.of("mvfind")), MVZIP(FunctionName.of("mvzip")), diff --git a/docs/user/ppl/cmd/nomv.md b/docs/user/ppl/cmd/nomv.md index ca66834abc..93a04f7bbe 100644 --- a/docs/user/ppl/cmd/nomv.md +++ b/docs/user/ppl/cmd/nomv.md @@ -8,7 +8,7 @@ The `nomv` command converts a multivalue (array) field into a single-value strin ### Key behaviors -- The field must be of **ARRAY type**. For scalar fields, use the `array()` function to create an array first. +- The field must be a **direct field reference** of **ARRAY type**. For scalar fields, use the `array()` function to create an array first. - The specified field is **replaced** with a string containing all array elements joined by newline (`\n`) characters. - **NULL values within the array are automatically filtered out** before joining. - If the field doesn't exist, an error is returned. @@ -50,7 +50,7 @@ fetched rows / total rows = 1/1 +----------------+-------+ ``` -## Example 2: nomv with multiple fields +## Example 2: nomv with an eval-created field ```ppl source=accounts diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java index 2f6214946c..47605b425f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java @@ -32,7 +32,7 @@ public void init() throws Exception { // --------------------------- @Test - public void testSanity_datasetIsLoaded() throws IOException { + public void testSanityDatasetIsLoaded() throws IOException { JSONObject result = executeQuery("source=" + TEST_INDEX_BANK + " | head 5"); int rows = result.getJSONArray("datarows").length(); Assertions.assertTrue(rows > 0, "Expected bank dataset to have rows, got 0"); @@ -43,7 +43,7 @@ public void testSanity_datasetIsLoaded() throws IOException { // --------------------------- @Test - public void testNoMv_basicUsage_withArrayLiterals() throws IOException { + public void testNoMvBasicUsageWithArrayLiterals() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -57,7 +57,7 @@ public void testNoMv_basicUsage_withArrayLiterals() throws IOException { } @Test - public void testNoMv_withArrayFromFields() throws IOException { + public void testNoMvWithArrayFromFields() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -77,7 +77,7 @@ public void testNoMv_withArrayFromFields() throws IOException { } @Test - public void testNoMv_multipleArrays_appliedInSequence() throws IOException { + public void testNoMvMultipleArraysAppliedInSequence() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -92,7 +92,7 @@ public void testNoMv_multipleArrays_appliedInSequence() throws IOException { } @Test - public void testNoMv_inComplexPipeline_withWhereAndSort() throws IOException { + public void testNoMvInComplexPipelineWithWhereAndSort() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -108,7 +108,7 @@ public void testNoMv_inComplexPipeline_withWhereAndSort() throws IOException { } @Test - public void testNoMv_fieldUsableInSubsequentOperations() throws IOException { + public void testNoMvFieldUsableInSubsequentOperations() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -127,7 +127,7 @@ public void testNoMv_fieldUsableInSubsequentOperations() throws IOException { } @Test - public void testNoMv_withStats_afterAggregation() throws IOException { + public void testNoMvWithStatsAfterAggregation() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -146,7 +146,7 @@ public void testNoMv_withStats_afterAggregation() throws IOException { } @Test - public void testNoMv_withEval_worksOnComputedArrays() throws IOException { + public void testNoMvWithEvalWorksOnComputedArrays() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -162,7 +162,7 @@ public void testNoMv_withEval_worksOnComputedArrays() throws IOException { } @Test - public void testNoMv_preservesFieldInPlace() throws IOException { + public void testNoMvPreservesFieldInPlace() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -182,7 +182,7 @@ public void testNoMv_preservesFieldInPlace() throws IOException { // --------------------------- @Test - public void testNoMv_singleElementArray() throws IOException { + public void testNoMvSingleElementArray() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -196,7 +196,7 @@ public void testNoMv_singleElementArray() throws IOException { } @Test - public void testNoMv_emptyArray() throws IOException { + public void testNoMvEmptyArray() throws IOException { String q = "source=" + TEST_INDEX_BANK + " | eval arr = array() | nomv arr | head 1 | fields arr"; @@ -208,7 +208,7 @@ public void testNoMv_emptyArray() throws IOException { } @Test - public void testNoMv_arrayWithNullValues() throws IOException { + public void testNoMvArrayWithNullValues() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -222,7 +222,7 @@ public void testNoMv_arrayWithNullValues() throws IOException { } @Test - public void testNoMv_scalarFieldError() throws IOException { + public void testNoMvScalarFieldError() throws IOException { ResponseException ex = Assertions.assertThrows( ResponseException.class, @@ -239,7 +239,7 @@ public void testNoMv_scalarFieldError() throws IOException { } @Test - public void testNoMv_arrayWithMixedTypes() throws IOException { + public void testNoMvArrayWithMixedTypes() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -254,7 +254,7 @@ public void testNoMv_arrayWithMixedTypes() throws IOException { } @Test - public void testNoMv_largeArray() throws IOException { + public void testNoMvLargeArray() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -269,7 +269,7 @@ public void testNoMv_largeArray() throws IOException { } @Test - public void testNoMv_resultUsedInComparison() throws IOException { + public void testNoMvResultUsedInComparison() throws IOException { String q = "source=" + TEST_INDEX_BANK @@ -288,7 +288,7 @@ public void testNoMv_resultUsedInComparison() throws IOException { // --------------------------- @Test - public void testNoMv_missingField_shouldReturn4xx() throws IOException { + public void testNoMvMissingFieldShouldReturn4xx() throws IOException { // Error when field does not exist ResponseException ex = Assertions.assertThrows( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java index 07c62a3e0c..f59b91f317 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -263,4 +263,69 @@ public void testNoMvChainedWithOtherOperations() { + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testNoMvWithNullableField() { + String ppl = + "source=EMP | eval arr = array(ENAME, COMM) | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array($1, $6), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`), '\n') `arr`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithMultipleNullableFields() { + String ppl = "source=EMP | eval arr = array(MGR, COMM) | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array($3, $6), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`MGR`, `COMM`), '\n') `arr`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNoMvWithMixedNullableAndNonNullableFields() { + String ppl = + "source=EMP | eval arr = array(ENAME, COMM, JOB) | nomv arr | head 1 | fields EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array($1, $6, $2), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`, `JOB`), '\n') `arr`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From 836fe43026c975fe982337170632a8c382a638cc Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Tue, 10 Feb 2026 15:01:43 -0600 Subject: [PATCH 3/7] Correct the no_push_down yaml expected result Signed-off-by: Srikanth Padakanti --- .../expectedOutput/calcite_no_pushdown/explain_nomv.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml index 57731fa174..1a60deac23 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml @@ -5,6 +5,7 @@ calcite: ')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[array($t0, $t1)], expr#4=[' - '], expr#5=[ARRAY_JOIN($t3, $t4)], proj#0..2=[{exprs}], location=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[array($t7, $t5)], expr#18=[' + '], expr#19=[ARRAY_JOIN($t17, $t18)], state=[$t7], city=[$t5], age=[$t8], location=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file From 39fdbd561b4562641f17061c0ebfe01291265221 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Tue, 10 Feb 2026 16:20:04 -0600 Subject: [PATCH 4/7] Fix Windows CI: Override verifyPPLToSparkSQL to preserve ARRAY_JOIN '\n' delimiter Signed-off-by: Srikanth Padakanti --- .../sql/ppl/calcite/CalcitePPLNoMvTest.java | 85 ++++++++++++++----- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java index f59b91f317..cdf20a7799 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -8,15 +8,40 @@ import static org.junit.Assert.assertThrows; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.rel2sql.RelToSqlConverter; +import org.apache.calcite.rel.rel2sql.SqlImplementor; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; public class CalcitePPLNoMvTest extends CalcitePPLAbstractTest { + private static final String LS = System.lineSeparator(); + public CalcitePPLNoMvTest() { super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); } + /** + * Override to avoid normalizing the '\n' delimiter inside ARRAY_JOIN. The base class's + * normalization replaces ALL \n with System.lineSeparator(), which incorrectly changes the + * delimiter from '\n' to '\r\n' on Windows. The delimiter should always be '\n' regardless of + * platform - it's a data value, not a line separator. + */ + @Override + public void verifyPPLToSparkSQL(RelNode rel, String expected) { + // Don't normalize - expect strings are written with explicit System.lineSeparator() + SqlImplementor.Result result = getConverter().visitRoot(rel); + final SqlNode sqlNode = result.asStatement(); + final String sql = sqlNode.toSqlString(OpenSearchSparkSqlDialect.DEFAULT).getSql(); + org.hamcrest.MatcherAssert.assertThat(sql, org.hamcrest.CoreMatchers.is(expected)); + } + + // Helper to access converter from parent + private RelToSqlConverter getConverter() { + return new RelToSqlConverter(OpenSearchSparkSqlDialect.DEFAULT); + } + @Test public void testNoMvBasic() { String ppl = @@ -35,8 +60,10 @@ public void testNoMvBasic() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT ARRAY_JOIN(ARRAY('web', 'production', 'east'), '\n') `arr`\n" - + "FROM `scott`.`EMP`\n" + "SELECT ARRAY_JOIN(ARRAY('web', 'production', 'east'), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -60,8 +87,10 @@ public void testNoMvWithMultipleFields() { String expectedSparkSql = "SELECT ARRAY_JOIN(ARRAY('a', 'b'), '\n') `arr1`, ARRAY_JOIN(ARRAY('x', 'y'), '\n')" - + " `arr2`\n" - + "FROM `scott`.`EMP`\n" + + " `arr2`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -83,7 +112,8 @@ public void testNoMvWithArrayFromFields() { String expectedSparkSql = "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `tags`\n" - + "FROM `scott`.`EMP`\n" + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -107,7 +137,8 @@ public void testNoMvInPipeline() { String expectedSparkSql = "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `names`\n" - + "FROM `scott`.`EMP`\n" + + "FROM `scott`.`EMP`" + + LS + "WHERE `DEPTNO` = 10\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -145,7 +176,8 @@ public void testNoMvWithNestedArray() { String expectedSparkSql = "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n') `arr`," + " CHAR_LENGTH(ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n')) `arr_len`\n" - + "FROM `scott`.`EMP`\n" + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -168,8 +200,10 @@ public void testNoMvWithConcatInArray() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(CONCAT(`ENAME`, ' - ', `JOB`)), '\n') `arr`\n" - + "FROM `scott`.`EMP`\n" + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(CONCAT(`ENAME`, ' - ', `JOB`)), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -190,8 +224,10 @@ public void testNoMvSingleElementArray() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('single'), '\n') `arr`\n" - + "FROM `scott`.`EMP`\n" + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('single'), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -233,8 +269,10 @@ public void testNoMvLargeArray() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT ARRAY_JOIN(ARRAY('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), '\n') `arr`\n" - + "FROM `scott`.`EMP`\n" + "SELECT ARRAY_JOIN(ARRAY('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -259,7 +297,8 @@ public void testNoMvChainedWithOtherOperations() { String expectedSparkSql = "SELECT ARRAY_JOIN(ARRAY('a', 'b'), '\n') `arr`, UPPER(ARRAY_JOIN(ARRAY('a', 'b')," + " '\n')) `arr_upper`\n" - + "FROM `scott`.`EMP`\n" + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -280,8 +319,10 @@ public void testNoMvWithNullableField() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`), '\n') `arr`\n" - + "FROM `scott`.`EMP`\n" + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -301,8 +342,10 @@ public void testNoMvWithMultipleNullableFields() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`MGR`, `COMM`), '\n') `arr`\n" - + "FROM `scott`.`EMP`\n" + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`MGR`, `COMM`), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -323,8 +366,10 @@ public void testNoMvWithMixedNullableAndNonNullableFields() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`, `JOB`), '\n') `arr`\n" - + "FROM `scott`.`EMP`\n" + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`, `JOB`), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } From 83e6345c3fe661f5a73e7194f640891219fd90fd Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Tue, 10 Feb 2026 17:16:35 -0600 Subject: [PATCH 5/7] Fix Windows CI: Override verifyPPLToSparkSQL to preserve ARRAY_JOIN '\n' delimiter Signed-off-by: Srikanth Padakanti --- .../sql/ppl/calcite/CalcitePPLNoMvTest.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java index cdf20a7799..98ab001b77 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -111,7 +111,8 @@ public void testNoMvWithArrayFromFields() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `tags`\n" + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `tags`" + + LS + "FROM `scott`.`EMP`" + LS + "LIMIT 1"; @@ -136,10 +137,12 @@ public void testNoMvInPipeline() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `names`\n" + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `names`" + + LS + "FROM `scott`.`EMP`" + LS - + "WHERE `DEPTNO` = 10\n" + + "WHERE `DEPTNO` = 10" + + LS + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -175,7 +178,8 @@ public void testNoMvWithNestedArray() { String expectedSparkSql = "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n') `arr`," - + " CHAR_LENGTH(ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n')) `arr_len`\n" + + " CHAR_LENGTH(ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n')) `arr_len`" + + LS + "FROM `scott`.`EMP`" + LS + "LIMIT 1"; @@ -296,7 +300,8 @@ public void testNoMvChainedWithOtherOperations() { String expectedSparkSql = "SELECT ARRAY_JOIN(ARRAY('a', 'b'), '\n') `arr`, UPPER(ARRAY_JOIN(ARRAY('a', 'b')," - + " '\n')) `arr_upper`\n" + + " '\n')) `arr_upper`" + + LS + "FROM `scott`.`EMP`" + LS + "LIMIT 1"; From 8632bc03acf659483c83d048db747e0475b4d81b Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Tue, 10 Feb 2026 17:32:16 -0600 Subject: [PATCH 6/7] Fix Windows CI: Override verifyPPLToSparkSQL to preserve ARRAY_JOIN '\n' delimiter Signed-off-by: Srikanth Padakanti --- .../sql/ppl/calcite/CalcitePPLNoMvTest.java | 58 ++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java index 98ab001b77..c175b45200 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -68,6 +68,32 @@ public void testNoMvBasic() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testNoMvMultipleDocuments() { + String ppl = + "source=EMP | eval arr = array('web', 'production') | nomv arr | head 2 | fields" + + " EMPNO, arr"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], arr=[$8])\n" + + " LogicalSort(fetch=[2])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('web':VARCHAR," + + " 'production':VARCHAR), '\n')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('web', 'production'), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 2"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testNoMvWithMultipleFields() { String ppl = @@ -159,6 +185,32 @@ public void testNoMvNonExistentField() { msg.toLowerCase().contains("does_not_exist") || msg.toLowerCase().contains("field")); } + @Test + public void testNoMvScalarFieldError() { + String ppl = "source=EMP | nomv EMPNO | head 1"; + + Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); + + String msg = String.valueOf(ex.getMessage()); + org.junit.Assert.assertTrue( + "Expected error for non-array field. Actual: " + msg, + msg.toLowerCase().contains("array") || msg.toLowerCase().contains("type")); + } + + @Test + public void testNoMvNonDirectFieldReferenceError() { + String ppl = "source=EMP | eval arr = array('a', 'b') | nomv upper(arr) | head 1"; + + Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); + + String msg = String.valueOf(ex.getMessage()); + org.junit.Assert.assertTrue( + "Expected parser error for non-direct field reference. Actual: " + msg, + msg.contains("(") + || msg.toLowerCase().contains("syntax") + || msg.toLowerCase().contains("parse")); + } + @Test public void testNoMvWithNestedArray() { String ppl = @@ -251,7 +303,11 @@ public void testNoMvEmptyArray() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(), '\n') `arr`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(), '\n') `arr`" + + LS + + "FROM `scott`.`EMP`" + + LS + + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } From 5d7e87990c7383c5a7c72a601dc3748b5f9c4406 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Thu, 12 Feb 2026 21:57:09 -0600 Subject: [PATCH 7/7] Address code review comments Signed-off-by: Srikanth Padakanti --- .../org/opensearch/sql/ast/tree/NoMv.java | 22 +- .../function/BuiltinFunctionName.java | 1 + .../CollectionUDF/ArrayFunctionImpl.java | 18 +- .../expression/function/PPLFuncImpTable.java | 9 +- .../CollectionUDF/ArrayFunctionImplTest.java | 305 ++++++++++++++++++ docs/user/ppl/cmd/nomv.md | 22 +- docs/user/ppl/index.md | 2 +- .../calcite/remote/CalciteNoMvCommandIT.java | 207 +++++++----- .../expectedOutput/calcite/explain_nomv.yaml | 10 +- .../calcite_no_pushdown/explain_nomv.yaml | 10 +- .../calcite/CalcitePPLFunctionTypeTest.java | 2 +- .../sql/ppl/calcite/CalcitePPLNoMvTest.java | 124 ++++--- 12 files changed, 563 insertions(+), 169 deletions(-) create mode 100644 core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java b/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java index fac25f612e..0b24576281 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/NoMv.java @@ -50,20 +50,26 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { } /** - * Rewrites the nomv command as an eval command using mvjoin function. nomv is rewritten - * to: eval = mvjoin(, "\n") + * Rewrites the nomv command as an eval command using mvjoin function with null filtering. nomv + * is rewritten to: eval = coalesce(mvjoin(array_compact(), "\n"), "") * - * @return an Eval node representing the equivalent mvjoin operation + *

The array_compact removes null elements from the array, and coalesce ensures empty arrays + * return empty string instead of null. + * + * @return an Eval node representing the equivalent mvjoin operation with null filtering */ public UnresolvedPlan rewriteAsEval() { - // Create mvjoin function call: mvjoin(field, "\n") + Function arrayCompactFunc = new Function("array_compact", ImmutableList.of(field)); + Function mvjoinFunc = - new Function("mvjoin", ImmutableList.of(field, new Literal("\n", DataType.STRING))); + new Function( + "mvjoin", ImmutableList.of(arrayCompactFunc, new Literal("\n", DataType.STRING))); + + Function coalesceFunc = + new Function("coalesce", ImmutableList.of(mvjoinFunc, new Literal("", DataType.STRING))); - // Create eval expression: field = mvjoin(field, "\n") - Let letExpr = new Let(field, mvjoinFunc); + Let letExpr = new Let(field, coalesceFunc); - // Create eval node and attach the child from this NoMv node Eval eval = new Eval(ImmutableList.of(letExpr)); if (this.child != null) { eval.attach(this.child); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index dce558bf7c..3171a09d39 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -71,6 +71,7 @@ public enum BuiltinFunctionName { ARRAY(FunctionName.of("array")), ARRAY_LENGTH(FunctionName.of("array_length")), ARRAY_SLICE(FunctionName.of("array_slice"), true), + ARRAY_COMPACT(FunctionName.of("array_compact")), MAP_APPEND(FunctionName.of("map_append"), true), MAP_CONCAT(FunctionName.of("map_concat"), true), MAP_REMOVE(FunctionName.of("map_remove"), true), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java index 2e9e53b9ac..9a77a0d5a7 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java @@ -35,7 +35,7 @@ */ public class ArrayFunctionImpl extends ImplementorUDF { public ArrayFunctionImpl() { - super(new ArrayImplementor(), NullPolicy.ANY); + super(new ArrayImplementor(), NullPolicy.NONE); } /** @@ -81,7 +81,8 @@ public Expression implement( /** * The asList will generate the List. We need to convert internally, otherwise, the - * calcite will directly cast like DOUBLE -> INTEGER, which throw error + * calcite will directly cast like DOUBLE -> INTEGER, which throw error. Null elements are + * preserved in the array. */ public static Object internalCast(Object... args) { List originalList = (List) args[0]; @@ -93,7 +94,9 @@ public static Object internalCast(Object... args) { originalList.stream() .map( num -> { - if (num instanceof BigDecimal) { + if (num == null) { + return null; + } else if (num instanceof BigDecimal) { return (BigDecimal) num; } else { return BigDecimal.valueOf(((Number) num).doubleValue()); @@ -104,17 +107,20 @@ public static Object internalCast(Object... args) { case DOUBLE: result = originalList.stream() - .map(i -> (Object) ((Number) i).doubleValue()) + .map(i -> i == null ? null : (Object) ((Number) i).doubleValue()) .collect(Collectors.toList()); break; case FLOAT: result = originalList.stream() - .map(i -> (Object) ((Number) i).floatValue()) + .map(i -> i == null ? null : (Object) ((Number) i).floatValue()) .collect(Collectors.toList()); break; case VARCHAR, CHAR: - result = originalList.stream().map(i -> (Object) i.toString()).collect(Collectors.toList()); + result = + originalList.stream() + .map(i -> i == null ? null : (Object) i.toString()) + .collect(Collectors.toList()); break; default: result = originalList; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 205f3a0f2e..29463c95f4 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -16,6 +16,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDTIME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.AND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_COMPACT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_SLICE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASCII; @@ -990,12 +991,7 @@ void populate() { PPLTypeChecker.family(SqlTypeFamily.ANY)); // Register MVJOIN to use Calcite's ARRAY_JOIN - register( - MVJOIN, - (FunctionImp2) - (builder, array, delimiter) -> - builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter), - PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER)); + registerOperator(MVJOIN, SqlLibraryOperators.ARRAY_JOIN); // Register SPLIT with custom logic for empty delimiter // Case 1: Delimiter is not empty string, use SPLIT @@ -1048,6 +1044,7 @@ void populate() { registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE); registerOperator(ARRAY_LENGTH, SqlLibraryOperators.ARRAY_LENGTH); registerOperator(ARRAY_SLICE, SqlLibraryOperators.ARRAY_SLICE); + registerOperator(ARRAY_COMPACT, SqlLibraryOperators.ARRAY_COMPACT); registerOperator(FORALL, PPLBuiltinOperators.FORALL); registerOperator(EXISTS, PPLBuiltinOperators.EXISTS); registerOperator(FILTER, PPLBuiltinOperators.FILTER); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java new file mode 100644 index 0000000000..6dbc1901fa --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImplTest.java @@ -0,0 +1,305 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for ArrayFunctionImpl. + * + *

These tests verify that the array() function correctly handles null elements inside arrays, + * which is critical for the NOMV command's null filtering functionality via ARRAY_COMPACT. + * + *

The array() function uses NullPolicy.NONE, meaning it accepts null arguments and preserves + * them inside the resulting array. This allows ARRAY_COMPACT to filter them out later. + */ +public class ArrayFunctionImplTest { + + @Test + public void testArrayWithNoArguments() { + Object result = ArrayFunctionImpl.internalCast(Collections.emptyList(), SqlTypeName.VARCHAR); + assertNotNull(result, "Empty array should not be null"); + assertTrue(result instanceof List, "Result should be a List"); + assertEquals(0, ((List) result).size(), "Empty array should have size 0"); + } + + @Test + public void testArrayWithSingleElement() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList("test"), SqlTypeName.VARCHAR); + assertNotNull(result); + assertTrue(result instanceof List); + assertEquals(Arrays.asList("test"), result); + } + + @Test + public void testArrayWithMultipleElements() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", "b", "c"), SqlTypeName.VARCHAR); + assertNotNull(result); + assertEquals(Arrays.asList("a", "b", "c"), result); + } + + // ==================== NULL HANDLING TESTS ==================== + // These tests are critical for NOMV command's null filtering + + @Test + public void testArrayWithNullInMiddle() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", null, "b"), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve null element"); + assertEquals("a", list.get(0)); + assertNull(list.get(1), "Middle element should be null"); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithNullAtBeginning() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(null, "a", "b"), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve null element"); + assertNull(list.get(0), "First element should be null"); + assertEquals("a", list.get(1)); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithNullAtEnd() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", "b", null), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve null element"); + assertEquals("a", list.get(0)); + assertEquals("b", list.get(1)); + assertNull(list.get(2), "Last element should be null"); + } + + @Test + public void testArrayWithMultipleNulls() { + Object result = + ArrayFunctionImpl.internalCast( + Arrays.asList("a", null, "b", null, "c"), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with nulls should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(5, list.size(), "Array should preserve all null elements"); + assertEquals("a", list.get(0)); + assertNull(list.get(1), "Second element should be null"); + assertEquals("b", list.get(2)); + assertNull(list.get(3), "Fourth element should be null"); + assertEquals("c", list.get(4)); + } + + @Test + public void testArrayWithAllNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(null, null, null), SqlTypeName.VARCHAR); + assertNotNull(result, "Array of all nulls should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(3, list.size(), "Array should preserve all null elements"); + assertNull(list.get(0)); + assertNull(list.get(1)); + assertNull(list.get(2)); + } + + @Test + public void testArrayWithSingleNull() { + Object result = + ArrayFunctionImpl.internalCast(Collections.singletonList(null), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with single null should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(1, list.size(), "Array should contain one null element"); + assertNull(list.get(0)); + } + + @Test + public void testArrayWithMixedTypesAndNulls() { + Object result = + ArrayFunctionImpl.internalCast( + Arrays.asList(1, null, "text", null, 3.14), SqlTypeName.VARCHAR); + assertNotNull(result, "Array with mixed types and nulls should not return null"); + assertTrue(result instanceof List); + List list = (List) result; + assertEquals(5, list.size()); + assertEquals("1", list.get(0)); // Converted to string + assertNull(list.get(1)); + assertEquals("text", list.get(2)); + assertNull(list.get(3)); + assertEquals("3.14", list.get(4)); // Converted to string + } + + // ==================== INTEGRATION WITH NOMV WORKFLOW ==================== + // These tests verify the array works correctly in the NOMV workflow: + // array(fields) -> array_compact(array) -> mvjoin(compacted, '\n') -> coalesce(result, '') + + @Test + public void testArrayOutputCanBeProcessedByArrayCompact() { + // Simulate: array(field1, null, field2) -> array_compact + Object arrayResult = + ArrayFunctionImpl.internalCast( + Arrays.asList("value1", null, "value2"), SqlTypeName.VARCHAR); + assertNotNull(arrayResult); + assertTrue(arrayResult instanceof List); + + // Verify the array has the structure expected by array_compact + List list = (List) arrayResult; + assertEquals(3, list.size(), "Array should have 3 elements before compacting"); + + // Simulate what array_compact would do (filter out nulls) + List compacted = list.stream().filter(item -> item != null).collect(Collectors.toList()); + assertEquals(2, compacted.size(), "After compacting, array should have 2 elements"); + assertEquals("value1", compacted.get(0)); + assertEquals("value2", compacted.get(1)); + } + + @Test + public void testArrayWithAllNullsForNomvWorkflow() { + // RFC Example 9: array(null, null, null) should allow NOMV to return "" + Object arrayResult = + ArrayFunctionImpl.internalCast(Arrays.asList(null, null, null), SqlTypeName.VARCHAR); + assertNotNull(arrayResult); + assertTrue(arrayResult instanceof List); + + List list = (List) arrayResult; + assertEquals(3, list.size(), "Array should preserve all nulls"); + + // Simulate array_compact - should result in empty array + List compacted = + list.stream().filter(item -> item != null).collect(java.util.stream.Collectors.toList()); + assertEquals( + 0, compacted.size(), "After compacting all nulls, array should be empty for NOMV to use"); + } + + @Test + public void testArrayPreservesNullsForRFCExample5() { + // RFC Example 5: nomv should filter nulls + // array('a', null, 'b') -> array_compact -> ['a', 'b'] -> mvjoin -> "a\nb" + Object arrayResult = + ArrayFunctionImpl.internalCast(Arrays.asList("a", null, "b"), SqlTypeName.VARCHAR); + assertNotNull(arrayResult); + + List list = (List) arrayResult; + assertEquals(3, list.size(), "Original array should have 3 elements"); + assertNull(list.get(1), "Middle element should be null"); + + // After array_compact + List compacted = + list.stream().filter(item -> item != null).collect(java.util.stream.Collectors.toList()); + assertEquals(2, compacted.size()); + assertEquals("a", compacted.get(0)); + assertEquals("b", compacted.get(1)); + } + + // ==================== EDGE CASE TESTS ==================== + + @Test + public void testArrayWithNumericTypes() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList(1, 2, 3), SqlTypeName.INTEGER); + assertNotNull(result); + assertEquals(Arrays.asList(1, 2, 3), result); + } + + @Test + public void testArrayWithMixedNumericAndString() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList(1, "two", 3), SqlTypeName.VARCHAR); + assertNotNull(result); + assertEquals(Arrays.asList("1", "two", "3"), result); + } + + @Test + public void testArrayWithEmptyStrings() { + // Empty strings should be preserved (they are not null) + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", "", "b"), SqlTypeName.VARCHAR); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals("a", list.get(0)); + assertEquals("", list.get(1), "Empty string should be preserved"); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithBooleanValues() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(true, false, null), SqlTypeName.BOOLEAN); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals(true, list.get(0)); + assertEquals(false, list.get(1)); + assertNull(list.get(2)); + } + + // ==================== TYPE CONVERSION TESTS ==================== + // Test that internalCast correctly handles type conversions while preserving nulls + + @Test + public void testArrayWithDoubleTypePreservesNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(1.5, null, 2.7), SqlTypeName.DOUBLE); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals(1.5, list.get(0)); + assertNull(list.get(1), "Null should be preserved during DOUBLE type conversion"); + assertEquals(2.7, list.get(2)); + } + + @Test + public void testArrayWithFloatTypePreservesNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList(1.5f, null, 2.7f), SqlTypeName.FLOAT); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals(1.5f, list.get(0)); + assertNull(list.get(1), "Null should be preserved during FLOAT type conversion"); + assertEquals(2.7f, list.get(2)); + } + + @Test + public void testArrayWithVarcharTypePreservesNulls() { + Object result = + ArrayFunctionImpl.internalCast(Arrays.asList("a", null, "b"), SqlTypeName.VARCHAR); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals("a", list.get(0)); + assertNull(list.get(1), "Null should be preserved during VARCHAR type conversion"); + assertEquals("b", list.get(2)); + } + + @Test + public void testArrayWithCharTypePreservesNulls() { + Object result = ArrayFunctionImpl.internalCast(Arrays.asList("x", null, "y"), SqlTypeName.CHAR); + assertNotNull(result); + List list = (List) result; + assertEquals(3, list.size()); + assertEquals("x", list.get(0)); + assertNull(list.get(1), "Null should be preserved during CHAR type conversion"); + assertEquals("y", list.get(2)); + } +} diff --git a/docs/user/ppl/cmd/nomv.md b/docs/user/ppl/cmd/nomv.md index 93a04f7bbe..87c17d1247 100644 --- a/docs/user/ppl/cmd/nomv.md +++ b/docs/user/ppl/cmd/nomv.md @@ -8,11 +8,7 @@ The `nomv` command converts a multivalue (array) field into a single-value strin ### Key behaviors -- The field must be a **direct field reference** of **ARRAY type**. For scalar fields, use the `array()` function to create an array first. -- The specified field is **replaced** with a string containing all array elements joined by newline (`\n`) characters. -- **NULL values within the array are automatically filtered out** before joining. -- If the field doesn't exist, an error is returned. -- The operation uses Calcite's ARRAY_JOIN function internally (same underlying implementation as mvjoin). +- The field must be **ARRAY type**. For scalar fields, use the `array()` function to create an array first. --- @@ -71,19 +67,6 @@ fetched rows / total rows = 1/1 +----------------+----------+ ``` -## Example 3: Error when field does not exist - -```ppl -source=accounts -| nomv does_not_exist -``` - -Expected output: -```text -{'reason': 'Invalid Query', 'details': 'Field [does_not_exist] not found.', 'type': 'IllegalArgumentException'} -Error: Query returned no data -``` - --- ## Notes @@ -91,9 +74,8 @@ Error: Query returned no data - The `nomv` command is only available when the Calcite query engine is enabled. - This command is particularly useful when you need to export or display multivalue fields as single strings. - The newline delimiter (`\n`) is fixed and cannot be customized. For custom delimiters, use the `mvjoin` function directly in an eval expression. -- NULL values are automatically filtered out during the join operation, so they do not contribute empty strings to the output. +- NULL values within the array are automatically filtered out when converting the array to a string, so they do not appear in the output or contribute empty lines. ## Related commands - `mvjoin()` -- Function used by nomv internally to join array elements with a custom delimiter -- [`eval`](eval.md) -- Create computed fields using the `array()` and `mvjoin()` functions \ No newline at end of file diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index e322fdf599..262cdf4f02 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -82,7 +82,7 @@ source=accounts | [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | | [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | | [mvcombine command](cmd/mvcombine.md) | 3.5 | stable (since 3.4) | Combines values of a specified field across rows identical on all other fields. | -| [nomv command](cmd/nomv.md) | 3.6 | experimental (since 3.6) | Converts a multivalue field to a single-value string by joining elements with newlines. | +| [nomv command](cmd/nomv.md) | 3.6 | stable (since 3.6) | Converts a multivalue field to a single-value string by joining elements with newlines. | - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java index 47605b425f..3ad50cdb4b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNoMvCommandIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -25,6 +26,7 @@ public void init() throws Exception { super.init(); enableCalcite(); loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_NULL_VALUES); } // --------------------------- @@ -43,39 +45,40 @@ public void testSanityDatasetIsLoaded() throws IOException { // --------------------------- @Test - public void testNoMvBasicUsageWithArrayLiterals() throws IOException { + public void testNoMvBasicUsageFromRFC() throws IOException { String q = "source=" + TEST_INDEX_BANK - + " | eval arr = array('web', 'production', 'east') | nomv arr | head 1 | fields arr"; + + " | where account_number=1 | eval names = array(firstname, lastname) | nomv names |" + + " fields account_number, names"; JSONObject result = executeQuery(q); - verifySchema(result, schema("arr", null, "string")); + verifySchema(result, schema("account_number", null, "bigint"), schema("names", null, "string")); - verifyDataRows(result, rows("web\nproduction\neast")); + verifyDataRows(result, rows(1, "Amber JOHnny\nDuke Willmington")); } @Test - public void testNoMvWithArrayFromFields() throws IOException { + public void testNoMvEvalCreatedFieldFromRFC() throws IOException { String q = "source=" + TEST_INDEX_BANK - + " | eval names = array(firstname, lastname) | nomv names | head 1 | fields" - + " firstname, lastname, names"; + + " | where account_number=1 | eval location = array(city, state) | nomv location |" + + " fields account_number, location"; JSONObject result = executeQuery(q); verifySchema( - result, - schema("firstname", null, "string"), - schema("lastname", null, "string"), - schema("names", null, "string")); + result, schema("account_number", null, "bigint"), schema("location", null, "string")); - verifyDataRows( - result, rows("Amber JOHnny", "Duke Willmington", "Amber JOHnny\nDuke Willmington")); + verifyDataRows(result, rows(1, "Brogan\nIL")); } + // --------------------------- + // Additional nomv tests + // --------------------------- + @Test public void testNoMvMultipleArraysAppliedInSequence() throws IOException { String q = @@ -162,145 +165,195 @@ public void testNoMvWithEvalWorksOnComputedArrays() throws IOException { } @Test - public void testNoMvPreservesFieldInPlace() throws IOException { + public void testNoMvEmptyArray() throws IOException { String q = - "source=" - + TEST_INDEX_BANK - + " | eval arr = array('a', 'b', 'c') | nomv arr | head 1 | fields arr"; + "source=" + TEST_INDEX_BANK + " | eval arr = array() | nomv arr | head 1 | fields arr"; JSONObject result = executeQuery(q); verifySchema(result, schema("arr", null, "string")); - verifyDataRows(result, rows("a\nb\nc")); - - Assertions.assertEquals(1, result.getJSONArray("schema").length()); + verifyDataRows(result, rows("")); } - // --------------------------- - // Edge case / error semantics - // --------------------------- + @Test + public void testNoMvScalarFieldError() throws IOException { + ResponseException ex = + Assertions.assertThrows( + ResponseException.class, + () -> + executeQuery("source=" + TEST_INDEX_BANK + " | fields firstname | nomv firstname")); + + int status = ex.getResponse().getStatusLine().getStatusCode(); + Assertions.assertEquals(400, status, "Expected 400 for type mismatch"); + + String msg = ex.getMessage(); + + Assertions.assertTrue( + msg.contains("MVJOIN") || msg.contains("ARRAY") || msg.contains("type"), msg); + } @Test - public void testNoMvSingleElementArray() throws IOException { + public void testNoMvResultUsedInComparison() throws IOException { String q = "source=" + TEST_INDEX_BANK - + " | eval arr = array('single') | nomv arr | head 1 | fields arr"; + + " | eval arr = array('test') | nomv arr | where arr = 'test' | head 1 | fields" + + " account_number, arr"; JSONObject result = executeQuery(q); - verifySchema(result, schema("arr", null, "string")); + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); - verifyDataRows(result, rows("single")); + Assertions.assertTrue(result.getJSONArray("datarows").length() > 0); } @Test - public void testNoMvEmptyArray() throws IOException { + public void testNoMvMissingFieldShouldReturn4xx() throws IOException { + ResponseException ex = + Assertions.assertThrows( + ResponseException.class, + () -> executeQuery("source=" + TEST_INDEX_BANK + " | nomv does_not_exist")); + + int status = ex.getResponse().getStatusLine().getStatusCode(); + + Assertions.assertEquals(400, status, "Unexpected status. ex=" + ex.getMessage()); + + String msg = ex.getMessage(); + Assertions.assertTrue( + msg.contains("does_not_exist") + || msg.contains("field") + || msg.contains("Field") + || msg.contains("ARRAY_COMPACT") + || msg.contains("ARRAY"), + msg); + } + + @Test + public void testNoMvWithNullInMiddleOfArray() throws IOException { String q = - "source=" + TEST_INDEX_BANK + " | eval arr = array() | nomv arr | head 1 | fields arr"; + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(firstname, age, lastname) | nomv" + + " arr | fields account_number, arr"; JSONObject result = executeQuery(q); - verifySchema(result, schema("arr", null, "string")); + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); - verifyDataRows(result, rows("")); + verifyDataRows(result, rows(25, "Virginia\nAyala")); } @Test - public void testNoMvArrayWithNullValues() throws IOException { + public void testNoMvWithNullAtBeginningAndEnd() throws IOException { String q = "source=" - + TEST_INDEX_BANK - + " | eval arr = array('first', 'second', 'third') | nomv arr | head 1 | fields arr"; + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(age, firstname, age) | nomv arr |" + + " fields account_number, arr"; JSONObject result = executeQuery(q); - verifySchema(result, schema("arr", null, "string")); + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); - verifyDataRows(result, rows("first\nsecond\nthird")); + verifyDataRows(result, rows(25, "Virginia")); } @Test - public void testNoMvScalarFieldError() throws IOException { - ResponseException ex = - Assertions.assertThrows( - ResponseException.class, - () -> - executeQuery("source=" + TEST_INDEX_BANK + " | fields firstname | nomv firstname")); + public void testNoMvWithAllNulls() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(age, age, age) | nomv arr | fields" + + " account_number, arr"; - int status = ex.getResponse().getStatusLine().getStatusCode(); - Assertions.assertEquals(400, status, "Expected 400 for type mismatch"); + JSONObject result = executeQuery(q); - String msg = ex.getMessage(); + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); - Assertions.assertTrue( - msg.contains("MVJOIN") || msg.contains("ARRAY") || msg.contains("type"), msg); + verifyDataRows(result, rows(25, "")); } @Test - public void testNoMvArrayWithMixedTypes() throws IOException { + public void testNoMvArrayWithAllNulls() throws IOException { String q = "source=" - + TEST_INDEX_BANK - + " | where account_number = 1 | eval arr = array('age:', cast(age as string)) | nomv" - + " arr | fields arr"; + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 25 | eval arr = array(age, age, age) | nomv arr | fields" + + " account_number, arr"; JSONObject result = executeQuery(q); - verifySchema(result, schema("arr", null, "string")); + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); - verifyDataRows(result, rows("age:\n32")); + verifyDataRows(result, rows(25, "")); } @Test - public void testNoMvLargeArray() throws IOException { + public void testNoMvMultipleRowsRowLocalBehavior() throws IOException { String q = "source=" + TEST_INDEX_BANK - + " | eval arr = array('1', '2', '3', '4', '5', '6', '7', '8', '9', '10') | nomv arr |" - + " head 1 | fields arr"; + + " | eval tags = array(firstname, lastname) | nomv tags | sort account_number | head" + + " 3 | fields account_number, tags"; JSONObject result = executeQuery(q); - verifySchema(result, schema("arr", null, "string")); + verifySchema(result, schema("account_number", null, "bigint"), schema("tags", null, "string")); - verifyDataRows(result, rows("1\n2\n3\n4\n5\n6\n7\n8\n9\n10")); + verifyDataRows( + result, + rows(1, "Amber JOHnny\nDuke Willmington"), + rows(6, "Hattie\nBond"), + rows(13, "Nanette\nBates")); } @Test - public void testNoMvResultUsedInComparison() throws IOException { + public void testNoMvNonConsecutiveRowsNoGrouping() throws IOException { String q = "source=" + TEST_INDEX_BANK - + " | eval arr = array('test') | nomv arr | where arr = 'test' | head 1 | fields" - + " account_number, arr"; + + " | where account_number = 1 or account_number = 6 or account_number = 13 | eval" + + " tags = array(firstname, city) | nomv tags | sort account_number | fields" + + " account_number, tags"; JSONObject result = executeQuery(q); - verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + verifySchema(result, schema("account_number", null, "bigint"), schema("tags", null, "string")); - Assertions.assertTrue(result.getJSONArray("datarows").length() > 0); + verifyDataRows( + result, + rows(1, "Amber JOHnny\nBrogan"), + rows(6, "Hattie\nDante"), + rows(13, "Nanette\nNogal")); } - // --------------------------- - // Edge case / error semantics - // --------------------------- + @Test + public void testNoMvNullFieldValue() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK_WITH_NULL_VALUES + + " | where account_number = 6 | eval balance_str = cast(balance as string) | eval arr" + + " = array(balance_str) | nomv arr | fields account_number, arr"; + + JSONObject result = executeQuery(q); + + verifySchema(result, schema("account_number", null, "bigint"), schema("arr", null, "string")); + + verifyDataRows(result, rows(6, "")); + } @Test - public void testNoMvMissingFieldShouldReturn4xx() throws IOException { - // Error when field does not exist - ResponseException ex = - Assertions.assertThrows( - ResponseException.class, - () -> executeQuery("source=" + TEST_INDEX_BANK + " | nomv does_not_exist")); + public void testNoMvArrayWithEmptyStrings() throws IOException { + String q = + "source=" + + TEST_INDEX_BANK + + " | eval tags = array('a', '', 'b') | nomv tags | head 1 | fields tags"; - int status = ex.getResponse().getStatusLine().getStatusCode(); + JSONObject result = executeQuery(q); - Assertions.assertEquals(400, status, "Unexpected status. ex=" + ex.getMessage()); + verifySchema(result, schema("tags", null, "string")); - String msg = ex.getMessage(); - Assertions.assertTrue( - msg.contains("does_not_exist") || msg.contains("field") || msg.contains("Field"), msg); + verifyDataRows(result, rows("a\n\nb")); } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml index 57731fa174..e522ceb639 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_nomv.yaml @@ -1,10 +1,10 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(state=[$7], city=[$5], age=[$8], location=[ARRAY_JOIN(array($7, $5), ' - ')]) + LogicalProject(state=[$7], city=[$5], age=[$8], location=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($7, $5)), ' + '), '':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[array($t0, $t1)], expr#4=[' - '], expr#5=[ARRAY_JOIN($t3, $t4)], proj#0..2=[{exprs}], location=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[array($t0, $t1)], expr#4=[ARRAY_COMPACT($t3)], expr#5=[' + '], expr#6=[ARRAY_JOIN($t4, $t5)], expr#7=['':VARCHAR], expr#8=[COALESCE($t6, $t7)], proj#0..2=[{exprs}], location=[$t8]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml index 1a60deac23..ace49cb6b9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_nomv.yaml @@ -1,11 +1,11 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(state=[$7], city=[$5], age=[$8], location=[ARRAY_JOIN(array($7, $5), ' - ')]) + LogicalProject(state=[$7], city=[$5], age=[$8], location=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($7, $5)), ' + '), '':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=[array($t7, $t5)], expr#18=[' - '], expr#19=[ARRAY_JOIN($t17, $t18)], state=[$t7], city=[$t5], age=[$t8], location=[$t19]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[array($t7, $t5)], expr#18=[ARRAY_COMPACT($t17)], expr#19=[' + '], expr#20=[ARRAY_JOIN($t18, $t19)], expr#21=['':VARCHAR], expr#22=[COALESCE($t20, $t21)], state=[$t7], city=[$t5], age=[$t8], location=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java index 9513558952..4383acf40e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFunctionTypeTest.java @@ -298,6 +298,6 @@ public void testValuesFunctionWithArrayArgType() { public void testMvjoinRejectsNonStringValues() { verifyQueryThrowsException( "source=EMP | eval result = mvjoin(42, ',') | fields result | head 1", - "MVJOIN function expects {[ARRAY,STRING]}, but got [INTEGER,STRING]"); + "MVJOIN function expects {[ARRAY,STRING]|[ARRAY,STRING,STRING]}, but got [INTEGER,STRING]"); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java index c175b45200..5d7669d20a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLNoMvTest.java @@ -54,13 +54,16 @@ public void testNoMvBasic() { "LogicalProject(arr=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('web':VARCHAR," - + " 'production':VARCHAR, 'east':VARCHAR), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('web':VARCHAR, 'production':VARCHAR," + + " 'east':VARCHAR)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT ARRAY_JOIN(ARRAY('web', 'production', 'east'), '\n') `arr`" + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('web', 'production', 'east')), '\n" + + "'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -80,13 +83,16 @@ public void testNoMvMultipleDocuments() { "LogicalProject(EMPNO=[$0], arr=[$8])\n" + " LogicalSort(fetch=[2])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('web':VARCHAR," - + " 'production':VARCHAR), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('web':VARCHAR, 'production':VARCHAR))," + + " '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('web', 'production'), '\n') `arr`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('web', 'production')), '\n" + + "'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -106,14 +112,17 @@ public void testNoMvWithMultipleFields() { "LogicalProject(arr1=[$8], arr2=[$9])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr1=[ARRAY_JOIN(array('a', 'b'), '\n')]," - + " arr2=[ARRAY_JOIN(array('x', 'y'), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr1=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a', 'b')), '\n" + + "'), '':VARCHAR)], arr2=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('x', 'y')), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT ARRAY_JOIN(ARRAY('a', 'b'), '\n') `arr1`, ARRAY_JOIN(ARRAY('x', 'y'), '\n')" - + " `arr2`" + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b')), '\n" + + "'), '') `arr1`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('x', 'y')), '\n" + + "'), '') `arr2`" + LS + "FROM `scott`.`EMP`" + LS @@ -132,12 +141,15 @@ public void testNoMvWithArrayFromFields() { "LogicalProject(EMPNO=[$0], tags=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], tags=[ARRAY_JOIN(array($1, $2), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], tags=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1," + + " $2)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `tags`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `JOB`)), '\n" + + "'), '') `tags`" + LS + "FROM `scott`.`EMP`" + LS @@ -157,13 +169,16 @@ public void testNoMvInPipeline() { "LogicalProject(EMPNO=[$0], names=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], names=[ARRAY_JOIN(array($1, $2), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " names=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1, $2)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalFilter(condition=[=($7, 10)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '\n') `names`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `JOB`)), '\n" + + "'), '') `names`" + LS + "FROM `scott`.`EMP`" + LS @@ -181,8 +196,11 @@ public void testNoMvNonExistentField() { String msg = String.valueOf(ex.getMessage()); org.junit.Assert.assertTrue( - "Expected error message to mention missing field. Actual: " + msg, - msg.toLowerCase().contains("does_not_exist") || msg.toLowerCase().contains("field")); + "Expected error message to mention missing field or type error. Actual: " + msg, + msg.toLowerCase().contains("does_not_exist") + || msg.toLowerCase().contains("field") + || msg.contains("ARRAY_COMPACT") + || msg.contains("ARRAY")); } @Test @@ -223,14 +241,19 @@ public void testNoMvWithNestedArray() { "LogicalProject(EMPNO=[$0], arr=[$8], arr_len=[$9])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('a', 'b', 'c'), '\n')]," - + " arr_len=[CHAR_LENGTH(ARRAY_JOIN(array('a', 'b', 'c'), '\n'))])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b', 'c')), '\n" + + "'), '':VARCHAR)], arr_len=[CHAR_LENGTH(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b', 'c')), '\n" + + "'), '':VARCHAR))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n') `arr`," - + " CHAR_LENGTH(ARRAY_JOIN(ARRAY('a', 'b', 'c'), '\n')) `arr_len`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b', 'c')), '\n" + + "'), '') `arr`, CHAR_LENGTH(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b', 'c'))," + + " '\n" + + "'), '')) `arr_len`" + LS + "FROM `scott`.`EMP`" + LS @@ -251,12 +274,15 @@ public void testNoMvWithConcatInArray() { + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], full_name=[CONCAT($1, ' - ':VARCHAR, $2)]," - + " arr=[ARRAY_JOIN(array(CONCAT($1, ' - ':VARCHAR, $2)), '\n')])\n" + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array(CONCAT($1, ' - ':VARCHAR, $2))), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(CONCAT(`ENAME`, ' - ', `JOB`)), '\n') `arr`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(CONCAT(`ENAME`, ' - ', `JOB`)))," + + " '\n" + + "'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -274,13 +300,14 @@ public void testNoMvSingleElementArray() { "LogicalProject(EMPNO=[$0], arr=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('single':VARCHAR)," - + " '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7]," + + " arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('single':VARCHAR)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY('single'), '\n') `arr`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('single')), '\n'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -298,12 +325,14 @@ public void testNoMvEmptyArray() { "LogicalProject(EMPNO=[$0], arr=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array(), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array())," + + " '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(), '\n') `arr`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY()), '\n'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -323,13 +352,16 @@ public void testNoMvLargeArray() { "LogicalProject(arr=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('1', '2', '3', '4', '5'," - + " '6', '7', '8', '9', '10':VARCHAR), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('1'," + + " '2', '3', '4', '5', '6', '7', '8', '9', '10':VARCHAR)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT ARRAY_JOIN(ARRAY('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), '\n') `arr`" + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('1', '2', '3', '4', '5', '6', '7', '8'," + + " '9', '10')), '\n" + + "'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -349,14 +381,18 @@ public void testNoMvChainedWithOtherOperations() { "LogicalProject(arr=[$8], arr_upper=[$9])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array('a', 'b'), '\n')]," - + " arr_upper=[UPPER(ARRAY_JOIN(array('a', 'b'), '\n'))])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b')), '\n" + + "'), '':VARCHAR)], arr_upper=[UPPER(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array('a'," + + " 'b')), '\n" + + "'), '':VARCHAR))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT ARRAY_JOIN(ARRAY('a', 'b'), '\n') `arr`, UPPER(ARRAY_JOIN(ARRAY('a', 'b')," - + " '\n')) `arr_upper`" + "SELECT COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b')), '\n" + + "'), '') `arr`, UPPER(COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY('a', 'b')), '\n" + + "'), '')) `arr_upper`" + LS + "FROM `scott`.`EMP`" + LS @@ -375,12 +411,15 @@ public void testNoMvWithNullableField() { "LogicalProject(EMPNO=[$0], arr=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array($1, $6), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1," + + " $6)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`), '\n') `arr`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `COMM`)), '\n" + + "'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -398,12 +437,14 @@ public void testNoMvWithMultipleNullableFields() { "LogicalProject(EMPNO=[$0], arr=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array($3, $6), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($3," + + " $6)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`MGR`, `COMM`), '\n') `arr`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`MGR`, `COMM`)), '\n'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS @@ -422,12 +463,15 @@ public void testNoMvWithMixedNullableAndNonNullableFields() { "LogicalProject(EMPNO=[$0], arr=[$8])\n" + " LogicalSort(fetch=[1])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[ARRAY_JOIN(array($1, $6, $2), '\n')])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[COALESCE(ARRAY_JOIN(ARRAY_COMPACT(array($1," + + " $6, $2)), '\n" + + "'), '':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, ARRAY_JOIN(ARRAY(`ENAME`, `COMM`, `JOB`), '\n') `arr`" + "SELECT `EMPNO`, COALESCE(ARRAY_JOIN(ARRAY_COMPACT(ARRAY(`ENAME`, `COMM`, `JOB`)), '\n" + + "'), '') `arr`" + LS + "FROM `scott`.`EMP`" + LS