diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 5015214796..3f7f5702d1 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -81,6 +81,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -732,6 +733,11 @@ public LogicalPlan visitExpand(Expand expand, AnalysisContext context) { throw getOnlyForCalciteException("Expand"); } + @Override + public LogicalPlan visitMvExpand(MvExpand node, AnalysisContext context) { + throw getOnlyForCalciteException("MvExpand"); + } + /** Build {@link LogicalTrendline} for Trendline command. */ @Override public LogicalPlan visitTrendline(Trendline node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 2486b63791..a36112b233 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -69,6 +69,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -475,4 +476,8 @@ public T visitAddColTotals(AddColTotals node, C context) { public T visitMvCombine(MvCombine node, C context) { return visitChildren(node, context); } + + public T visitMvExpand(MvExpand node, C context) { + return visitChildren(node, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java index 0b2e05907b..67256af554 100644 --- a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java @@ -45,6 +45,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -657,6 +658,15 @@ public Node visitMvCombine(MvCombine node, FieldResolutionContext context) { return node; } + @Override + public Node visitMvExpand(MvExpand node, FieldResolutionContext context) { + Set mvExpandFields = extractFieldsFromExpression(node.getField()); + context.pushRequirements(context.getCurrentRequirements().or(mvExpandFields)); + visitChildren(node, context); + context.popRequirements(); + return node; + } + private Set extractFieldsFromAggregation(UnresolvedExpression expr) { Set fields = new HashSet<>(); if (expr instanceof Alias alias) { diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 8b129c6267..5cf98132f7 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -63,6 +63,7 @@ import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -137,6 +138,10 @@ public Expand expand(UnresolvedPlan input, Field field, String alias) { return new Expand(field, alias).attach(input); } + public static UnresolvedPlan mvexpand(UnresolvedPlan input, Field field, Integer limit) { + return new MvExpand(field, limit).attach(input); + } + public static UnresolvedPlan projectWithArg( UnresolvedPlan input, List argList, UnresolvedExpression... projectList) { return new Project(Arrays.asList(projectList), argList).attach(input); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java new file mode 100644 index 0000000000..29dc89c541 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/MvExpand.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import javax.annotation.Nullable; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Field; + +/** AST node representing the {@code mvexpand} PPL command: {@code mvexpand [limit=N]}. */ +@ToString +@EqualsAndHashCode(callSuper = false) +public class MvExpand extends UnresolvedPlan { + + private UnresolvedPlan child; + @Getter private final Field field; + @Getter @Nullable private final Integer limit; + + public MvExpand(Field field, @Nullable Integer limit) { + this.field = field; + this.limit = limit; + } + + @Override + public MvExpand attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitMvExpand(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 5825011f65..3057e63007 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -126,6 +126,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -955,7 +956,11 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { .toList(); context.relBuilder.aggregate(context.relBuilder.groupKey(groupByList), aggCall); buildExpandRelNode( - context.relBuilder.field(node.getAlias()), node.getAlias(), node.getAlias(), context); + context.relBuilder.field(node.getAlias()), + node.getAlias(), + node.getAlias(), + null, + context); flattenParsedPattern( node.getAlias(), context.relBuilder.field(node.getAlias()), @@ -3166,7 +3171,7 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); String alias = expand.getAlias(); - buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, context); + buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, null, context); return context.relBuilder.peek(); } @@ -3339,6 +3344,61 @@ private void restoreColumnOrderAfterArrayAgg( relBuilder.project(projections, projectionNames, /* force= */ true); } + /** + * MVExpand command visitor. + * + *

Expands a multi-value (array) field into separate rows using Calcite's CORRELATE join with + * UNCOLLECT. Each element of the array becomes a separate row while preserving all other fields + * from the original row. + * + *

Implementation uses {@link #buildExpandRelNode} to create a correlate join between the + * original relation and an uncollected (unnested) version of the target array field. + * + *

Behavior: + * + *

    + *
  • Array fields: Each array element is expanded into a separate row + *
  • Non-array fields: Treated as single-element arrays (returns original row unchanged) + *
  • Missing fields: Throws {@link SemanticCheckException} + *
  • Optional limit parameter: Limits the number of expanded elements per document + *
+ * + * @param mvExpand MVExpand command containing the field to expand and optional limit + * @param context CalcitePlanContext containing the RelBuilder and planning context + * @return RelNode representing the relation with the expanded multi-value field + * @throws SemanticCheckException if the target field does not exist in the schema + */ + @Override + public RelNode visitMvExpand(MvExpand mvExpand, CalcitePlanContext context) { + visitChildren(mvExpand, context); + + final RelBuilder relBuilder = context.relBuilder; + final Field field = mvExpand.getField(); + final String fieldName = field.getField().toString(); + + final RelDataType inputType = relBuilder.peek().getRowType(); + final RelDataTypeField inputField = + inputType.getField(fieldName, /*caseSensitive*/ true, /*elideRecord*/ false); + + if (inputField == null) { + throw new SemanticCheckException( + String.format("Field '%s' not found in the schema", fieldName)); + } + + final RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(field, context); + + final SqlTypeName actual = arrayFieldRex.getType().getSqlTypeName(); + if (actual != SqlTypeName.ARRAY) { + // For non-array fields (scalars), mvexpand just returns the field unchanged. + // This treats single-value fields as if they were arrays with one element. + return relBuilder.peek(); + } + + buildExpandRelNode(arrayFieldRex, fieldName, fieldName, mvExpand.getLimit(), context); + + return relBuilder.peek(); + } + @Override public RelNode visitValues(Values values, CalcitePlanContext context) { if (values.getValues() == null || values.getValues().isEmpty()) { @@ -3583,7 +3643,11 @@ private void flattenParsedPattern( } private void buildExpandRelNode( - RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { + RexInputRef arrayFieldRex, + String arrayFieldName, + String alias, + @Nullable Integer perDocLimit, + CalcitePlanContext context) { // 3. Capture the outer row in a CorrelationId Holder correlVariable = Holder.empty(); context.relBuilder.variable(correlVariable::set); @@ -3598,14 +3662,17 @@ private void buildExpandRelNode( RelNode leftNode = context.relBuilder.build(); // 5. Build join right node and expand the array field using uncollect - RelNode rightNode = - context - .relBuilder - // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter - .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) - .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) - .uncollect(List.of(), false) - .build(); + context + .relBuilder + // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter + .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) + .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) + .uncollect(List.of(), false); + + if (perDocLimit != null) { + context.relBuilder.limit(0, perDocLimit); + } + RelNode rightNode = context.relBuilder.build(); // 6. Perform a nested-loop join (correlate) between the original table and the expanded // array field. diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 13449ec304..719d3e6f89 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -1112,6 +1112,14 @@ void populate() { OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER) .or(OperandTypes.family(SqlTypeFamily.MAP, SqlTypeFamily.ANY)), false)); + // Allow using INTERNAL_ITEM when the element type is unknown/undefined at planning time. + // Some datasets (or Calcite's type inference) may give the element an UNDEFINED type. + // Accept a "ignore" first-argument family so INTERNAL_ITEM(elem, 'key') can still be planned + // and resolved at runtime (fallback semantics handled at execution side). - Used in MVEXPAND + registerOperator( + INTERNAL_ITEM, + SqlStdOperatorTable.ITEM, + PPLTypeChecker.family(SqlTypeFamily.IGNORE, SqlTypeFamily.CHARACTER)); registerOperator( XOR, SqlStdOperatorTable.NOT_EQUALS, diff --git a/docs/category.json b/docs/category.json index bcf73cb1a8..196d6e7840 100644 --- a/docs/category.json +++ b/docs/category.json @@ -31,6 +31,7 @@ "user/ppl/cmd/regex.md", "user/ppl/cmd/rename.md", "user/ppl/cmd/multisearch.md", + "user/ppl/cmd/mvexpand.md", "user/ppl/cmd/replace.md", "user/ppl/cmd/rex.md", "user/ppl/cmd/search.md", @@ -82,4 +83,4 @@ "bash_settings": [ "user/ppl/admin/settings.md" ] -} +} \ No newline at end of file diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index e4f55ef1b3..70a7440a90 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -121,3 +121,4 @@ SQL query:: | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | +----------------+-------------+------------+-------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ + diff --git a/docs/user/ppl/cmd/mvexpand.md b/docs/user/ppl/cmd/mvexpand.md new file mode 100644 index 0000000000..0a209dc125 --- /dev/null +++ b/docs/user/ppl/cmd/mvexpand.md @@ -0,0 +1,141 @@ +# mvexpand + +## Description +The `mvexpand` command expands each value in a multivalue (array) field into a separate row. For each document, every element in the specified array field is returned as a new row. + + +## Syntax +``` +mvexpand [limit=] +``` + +- ``: The multivalue (array) field to expand. (Required) +- `limit`: Maximum number of values per document to expand. (Optional) + + +### Output field naming +After `mvexpand`, the expanded value remains under the same field name (for example, `tags` or `ids`). +If the array contains objects, you can reference subfields (for example, `skills.name`). + + +## Examples + +### Example 1: Basic Expansion (single document) +Input document (case "basic") contains three tag values. + +PPL query: +```ppl +source=people +| eval tags = array('error', 'warning', 'info') +| fields tags +| head 1 +| mvexpand tags +| fields tags +``` + +Expected output: +```text +fetched rows / total rows = 3/3 ++---------+ +| tags | +|---------| +| error | +| warning | +| info | ++---------+ +``` + +### Example 2: Expansion with Limit +Input document (case "ids") contains an array of integers; expand and apply limit. + +PPL query: +```ppl +source=people +| eval ids = array(1, 2, 3, 4, 5) +| fields ids +| head 1 +| mvexpand ids limit=3 +| fields ids +``` + +Expected output: +```text +fetched rows / total rows = 3/3 ++-----+ +| ids | +|-----| +| 1 | +| 2 | +| 3 | ++-----+ +``` + +### Example 3: Expand projects +This example demonstrates expanding a multivalue `projects` field into one row per project. + +PPL query: +```ppl +source=people +| head 1 +| fields projects +| mvexpand projects +| fields projects.name +``` + +Expected output: +```text +fetched rows / total rows = 3/3 ++--------------------------------+ +| projects.name | +|--------------------------------| +| AWS Redshift Spectrum querying | +| AWS Redshift security | +| AWS Aurora security | ++--------------------------------+ +``` + +### Example 4: Single-value array (case "single") +Single-element array should expand to one row. + +PPL query: +```ppl +source=people +| eval tags = array('error') +| fields tags +| head 1 +| mvexpand tags +| fields tags +``` + +Expected output: +```text +fetched rows / total rows = 1/1 ++-------+ +| tags | +|-------| +| error | ++-------+ +``` + +### Example 5: Missing Field +If the field does not exist in the input schema (for example, it is not mapped or was projected out earlier), mvexpand throws a semantic check exception. + +PPL query: +```ppl +source=people +| eval some_field = 'x' +| fields some_field +| head 1 +| mvexpand tags +| fields tags +``` + +Expected output: +```text +{'reason': 'Invalid Query', 'details': "Field 'tags' not found in the schema", 'type': 'SemanticCheckException'} +Error: Query returned no data +``` + +## Notes about these doctests +- The examples below generate deterministic multivalue fields using `eval` + `array()` so doctests are stable. +- All examples run against a single source index (`people`) and use `head 1` to keep output predictable. \ No newline at end of file diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 12afe96eea..7e3872fb22 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -36,53 +36,54 @@ source=accounts **Note:** Experimental commands are ready for use, but specific parameters may change based on feedback. | Command Name | Version Introduced | Current Status | Command Description | -| --- | --- | --- | --- | -| [search command](cmd/search.md) | 1.0 | stable (since 1.0) | Retrieve documents from the index. | -| [where command](cmd/where.md) | 1.0 | stable (since 1.0) | Filter the search result using boolean expressions. | -| [subquery command](cmd/subquery.md) | 3.0 | experimental (since 3.0) | Embed one PPL query inside another for complex filtering and data retrieval operations. | -| [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | -| [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | -| [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | -| [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | -| [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | -| [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | -| [flatten command](cmd/flatten.md) | 3.1 | experimental (since 3.1) | Flatten a struct or an object field into separate fields in a document. | -| [table command](cmd/table.md) | 3.3 | experimental (since 3.3) | Keep or remove fields from the search result using enhanced syntax options. | -| [stats command](cmd/stats.md) | 1.0 | stable (since 1.0) | Calculate aggregation from search results. | -| [eventstats command](cmd/eventstats.md) | 3.1 | experimental (since 3.1) | Calculate aggregation statistics and add them as new fields to each event. | -| [streamstats command](cmd/streamstats.md) | 3.4 | experimental (since 3.4) | Calculate cumulative or rolling statistics as events are processed in order. | -| [bin command](cmd/bin.md) | 3.3 | experimental (since 3.3) | Group numeric values into buckets of equal intervals. | -| [timechart command](cmd/timechart.md) | 3.3 | experimental (since 3.3) | Create time-based charts and visualizations. | -| [chart command](cmd/chart.md) | 3.4 | experimental (since 3.4) | Apply statistical aggregations to search results and group the data for visualizations. | -| [trendline command](cmd/trendline.md) | 3.0 | experimental (since 3.0) | Calculate moving averages of fields. | -| [sort command](cmd/sort.md) | 1.0 | stable (since 1.0) | Sort all the search results by the specified fields. | -| [reverse command](cmd/reverse.md) | 3.2 | experimental (since 3.2) | Reverse the display order of search results. | -| [head command](cmd/head.md) | 1.0 | stable (since 1.0) | Return the first N number of specified results after an optional offset in search order. | -| [dedup command](cmd/dedup.md) | 1.0 | stable (since 1.0) | Remove identical documents defined by the field from the search result. | -| [top command](cmd/top.md) | 1.0 | stable (since 1.0) | Find the most common tuple of values of all fields in the field list. | -| [rare command](cmd/rare.md) | 1.0 | stable (since 1.0) | Find the least common tuple of values of all fields in the field list. | -| [parse command](cmd/parse.md) | 1.3 | stable (since 1.3) | Parse a text field with a regular expression and append the result to the search result. | -| [grok command](cmd/grok.md) | 2.4 | stable (since 2.4) | Parse a text field with a grok pattern and append the results to the search result. | -| [rex command](cmd/rex.md) | 3.3 | experimental (since 3.3) | Extract fields from a raw text field using regular expression named capture groups. | -| [regex command](cmd/regex.md) | 3.3 | experimental (since 3.3) | Filter search results by matching field values against a regular expression pattern. | -| [spath command](cmd/spath.md) | 3.3 | experimental (since 3.3) | Extract fields from structured text data. | -| [patterns command](cmd/patterns.md) | 2.4 | stable (since 2.4) | Extract log patterns from a text field and append the results to the search result. | -| [join command](cmd/join.md) | 3.0 | stable (since 3.0) | Combine two datasets together. | -| [append command](cmd/append.md) | 3.3 | experimental (since 3.3) | Append the result of a sub-search to the bottom of the input search results. | -| [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | -| [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | -| [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | -| [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | -| [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | -| [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | -| [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | -| [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | -| [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | -| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | -| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | -| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | -| [mvcombine command](cmd/mvcombine.md) | 3.5 | stable (since 3.4) | Combines values of a specified field across rows identical on all other fields. | - +| --- |--------------------| --- | --- | +| [search command](cmd/search.md) | 1.0 | stable (since 1.0) | Retrieve documents from the index. | +| [where command](cmd/where.md) | 1.0 | stable (since 1.0) | Filter the search result using boolean expressions. | +| [subquery command](cmd/subquery.md) | 3.0 | experimental (since 3.0) | Embed one PPL query inside another for complex filtering and data retrieval operations. | +| [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | +| [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | +| [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | +| [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | +| [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | +| [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | +| [flatten command](cmd/flatten.md) | 3.1 | experimental (since 3.1) | Flatten a struct or an object field into separate fields in a document. | +| [table command](cmd/table.md) | 3.3 | experimental (since 3.3) | Keep or remove fields from the search result using enhanced syntax options. | +| [stats command](cmd/stats.md) | 1.0 | stable (since 1.0) | Calculate aggregation from search results. | +| [eventstats command](cmd/eventstats.md) | 3.1 | experimental (since 3.1) | Calculate aggregation statistics and add them as new fields to each event. | +| [streamstats command](cmd/streamstats.md) | 3.4 | experimental (since 3.4) | Calculate cumulative or rolling statistics as events are processed in order. | +| [bin command](cmd/bin.md) | 3.3 | experimental (since 3.3) | Group numeric values into buckets of equal intervals. | +| [timechart command](cmd/timechart.md) | 3.3 | experimental (since 3.3) | Create time-based charts and visualizations. | +| [chart command](cmd/chart.md) | 3.4 | experimental (since 3.4) | Apply statistical aggregations to search results and group the data for visualizations. | +| [trendline command](cmd/trendline.md) | 3.0 | experimental (since 3.0) | Calculate moving averages of fields. | +| [sort command](cmd/sort.md) | 1.0 | stable (since 1.0) | Sort all the search results by the specified fields. | +| [reverse command](cmd/reverse.md) | 3.2 | experimental (since 3.2) | Reverse the display order of search results. | +| [head command](cmd/head.md) | 1.0 | stable (since 1.0) | Return the first N number of specified results after an optional offset in search order. | +| [dedup command](cmd/dedup.md) | 1.0 | stable (since 1.0) | Remove identical documents defined by the field from the search result. | +| [top command](cmd/top.md) | 1.0 | stable (since 1.0) | Find the most common tuple of values of all fields in the field list. | +| [rare command](cmd/rare.md) | 1.0 | stable (since 1.0) | Find the least common tuple of values of all fields in the field list. | +| [parse command](cmd/parse.md) | 1.3 | stable (since 1.3) | Parse a text field with a regular expression and append the result to the search result. | +| [grok command](cmd/grok.md) | 2.4 | stable (since 2.4) | Parse a text field with a grok pattern and append the results to the search result. | +| [rex command](cmd/rex.md) | 3.3 | experimental (since 3.3) | Extract fields from a raw text field using regular expression named capture groups. | +| [regex command](cmd/regex.md) | 3.3 | experimental (since 3.3) | Filter search results by matching field values against a regular expression pattern. | +| [spath command](cmd/spath.md) | 3.3 | experimental (since 3.3) | Extract fields from structured text data. | +| [patterns command](cmd/patterns.md) | 2.4 | stable (since 2.4) | Extract log patterns from a text field and append the results to the search result. | +| [join command](cmd/join.md) | 3.0 | stable (since 3.0) | Combine two datasets together. | +| [append command](cmd/append.md) | 3.3 | experimental (since 3.3) | Append the result of a sub-search to the bottom of the input search results. | +| [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | +| [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | +| [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | +| [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | +| [mvexpand command](cmd/mvexpand.md) | 3.5 | experimental (since 3.4) | Expand a multi-valued field into separate documents (one per value). | +| [mvcombine command](cmd/mvcombine.md) | 3.5 | stable (since 3.4) | Combines values of a specified field across rows identical on all other fields. | +| [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | +| [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | +| [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | +| [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | +| [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | +| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** - [Aggregation Functions](functions/aggregations.md) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 50cdd8d847..00a4343557 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -35,6 +35,7 @@ CalciteDescribeCommandIT.class, CalciteExpandCommandIT.class, CalciteFieldFormatCommandIT.class, + CalciteMvExpandCommandIT.class, CalciteFieldsCommandIT.class, CalciteFillNullCommandIT.class, CalciteFlattenCommandIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index eade854276..1b47eea2c5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -56,6 +56,7 @@ public void init() throws Exception { loadIndex(Index.WORKER); loadIndex(Index.WORK_INFORMATION); loadIndex(Index.WEBLOG); + loadIndex(Index.MVEXPAND_EDGE_CASES); loadIndex(Index.DATA_TYPE_ALIAS); loadIndex(Index.DEEP_NESTED); loadIndex(Index.CASCADED_NESTED); @@ -348,6 +349,14 @@ public void testExplainMultisearchTimestampInterleaving() throws IOException { assertYamlEqualsIgnoreId(expected, result); } + @Test + public void testMvexpandExplain() throws IOException { + // mvexpand explain plan validation + String expected = loadExpectedPlan("explain_mvexpand.yaml"); + explainQueryYaml( + "source=mvexpand_edge_cases | eval skills_arr = array(1, 2, 3) | mvexpand skills_arr"); + } + // Only for Calcite @Test public void testExplainIsBlank() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java new file mode 100644 index 0000000000..8832398e59 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvExpandCommandIT.java @@ -0,0 +1,263 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import org.json.JSONObject; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteMvExpandCommandIT extends PPLIntegTestCase { + + private static final String INDEX = Index.MVEXPAND_EDGE_CASES.getName(); + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.MVEXPAND_EDGE_CASES); + } + + @Test + public void testMvexpandSingleElement() throws Exception { + String q1 = + String.format( + "source=%s | mvexpand skills | where username='single' | fields username, skills", + INDEX); + JSONObject r1 = executeQuery(q1); + + assertSingleRowNestedFieldEquals(r1, "skills", "name", "go"); + + String q2 = + String.format( + "source=%s | mvexpand skills | where username='single' | fields username, skills.name", + INDEX); + JSONObject r2 = executeQuery(q2); + verifyDataRows(r2, rows("single", "go")); + } + + /** + * Asserts the result has exactly one row and that the given column is a MAP/object containing + * nestedKey=nestedValue. + */ + private static void assertSingleRowNestedFieldEquals( + JSONObject result, String mapColumn, String nestedKey, String expectedValue) { + var dataRows = result.getJSONArray("datarows"); + Assertions.assertEquals(1, dataRows.length(), "Expected exactly one row"); + + var schema = result.getJSONArray("schema"); + + int mapIdx = -1; + for (int i = 0; i < schema.length(); i++) { + if (mapColumn.equals(schema.getJSONObject(i).getString("name"))) { + mapIdx = i; + break; + } + } + Assertions.assertTrue(mapIdx >= 0, "Column not found in schema: " + mapColumn); + + var row0 = dataRows.getJSONArray(0); + var skillsObj = row0.getJSONObject(mapIdx); // this is the MAP/object + Assertions.assertEquals(expectedValue, skillsObj.optString(nestedKey, null)); + } + + @Test + public void testMvexpandEmptyArray() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='empty' | fields username, skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result); + } + + @Test + public void testMvexpandNullArray() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='nullskills' | fields username," + + " skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result); + } + + @Test + public void testMvexpandNoArrayField() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='noskills' | fields username," + + " skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result); + } + + @Test + public void testMvexpandDuplicate() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='duplicate' | fields username," + + " skills.name | sort skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result, rows("duplicate", "dup"), rows("duplicate", "dup")); + } + + @Test + public void testMvexpandHappyMultipleElements() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='happy' | fields username, skills.name |" + + " sort skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result, rows("happy", "java"), rows("happy", "python"), rows("happy", "sql")); + } + + @Test + public void testMvexpandPartialElementMissingName() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='partial' | fields username, skills.name" + + " | sort skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows( + result, + rows("partial", "kotlin"), + rows("partial", (String) null), + rows("partial", (String) null)); + } + + @Test + public void testMvexpandMixedShapesKeepsAllElements() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='mixed_shapes' | fields username," + + " skills.name | sort skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyDataRows(result, rows("mixed_shapes", "elixir"), rows("mixed_shapes", "haskell")); + } + + @Test + public void testMvexpandFlattenedSchemaPresence() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='complex' | fields username," + + " skills.level, skills.name", + INDEX); + JSONObject result = executeQuery(query); + + verifySchema( + result, + schema("username", "string"), + schema("skills.level", "string"), + schema("skills.name", "string")); + + verifyDataRows( + result, + rows("complex", "expert", "ml"), + rows("complex", (String) null, "ai"), + rows("complex", "novice", (String) null)); + } + + @Test + public void testMvexpandOnNonArrayFieldMapping() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills_not_array | where username='u1' | fields username," + + " skills_not_array", + INDEX); + + JSONObject result = executeQuery(query); + + verifyNumOfRows(result, 1); + verifyDataRows(result, rows("u1", "scala")); + } + + @Test + public void testMvexpandMissingFieldReturnsEmpty() throws Exception { + // single-index version: username='noskills' doc has no "skills" field at all + String query = + String.format( + "source=%s | mvexpand skills | where username='noskills' | fields username, skills", + INDEX); + + JSONObject result = executeQuery(query); + verifyDataRows(result); + } + + @Test + public void testMvexpandLimitParameter() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills limit=3 | where username='limituser' | fields username," + + " skills.name", + INDEX); + JSONObject result = executeQuery(query); + verifyNumOfRows(result, 3); + verifyDataRows(result, rows("limituser", "a"), rows("limituser", "b"), rows("limituser", "c")); + } + + @Test + public void testMvexpandTypeInferenceForHeterogeneousSubfields() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='hetero_types' | fields username," + + " skills.level", + INDEX); + JSONObject result = executeQuery(query); + + verifyDataRows(result, rows("hetero_types", "senior"), rows("hetero_types", "3")); + } + + @Test + public void testMvexpandLargeArrayElements() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills | where username='large' | fields username, skills.name |" + + " sort skills.name", + INDEX); + JSONObject result = executeQuery(query); + + verifyNumOfRows(result, 10); + + verifyDataRows( + result, + rows("large", "s1"), + rows("large", "s2"), + rows("large", "s3"), + rows("large", "s4"), + rows("large", "s5"), + rows("large", "s6"), + rows("large", "s7"), + rows("large", "s8"), + rows("large", "s9"), + rows("large", "s10")); + } + + @Test + public void testMvexpandOnIntegerFieldMapping() throws Exception { + String query = + String.format( + "source=%s | mvexpand skills_int | where username='u_int' | fields username," + + " skills_int", + INDEX); + + JSONObject result = executeQuery(query); + + verifyNumOfRows(result, 1); + verifyDataRows(result, rows("u_int", 5)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index c9de7a584c..a5228c226e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -686,6 +686,11 @@ public enum Index { "_doc", getNestedSimpleIndexMapping(), "src/test/resources/nested_simple.json"), + MVEXPAND_EDGE_CASES( + "mvexpand_edge_cases", + "mvexpand_edge_cases", + getMappingFile("mvexpand_edge_cases_mapping.json"), + "src/test/resources/mvexpand_edge_cases.json"), DEEP_NESTED( TestsConstants.TEST_INDEX_DEEP_NESTED, "_doc", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index ad8a232bab..cd6c72bffa 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -92,6 +92,7 @@ public class TestsConstants { public static final String TEST_INDEX_LOGS = TEST_INDEX + "_logs"; public static final String TEST_INDEX_OTEL_LOGS = TEST_INDEX + "_otel_logs"; public static final String TEST_INDEX_TIME_DATE_NULL = TEST_INDEX + "_time_date_null"; + public static final String TEST_INDEX_MVEXPAND_EDGE_CASES = "mvexpand_edge_cases"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public static final String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index c5a1d08c37..054b1c6aef 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -10,6 +10,7 @@ import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_ENGINE_ENABLED; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MVEXPAND_EDGE_CASES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; import java.io.IOException; @@ -25,6 +26,7 @@ public void init() throws Exception { loadIndex(Index.BANK); loadIndex(Index.DOG); loadIndex(Index.STRINGS); + loadIndex(Index.MVEXPAND_EDGE_CASES); } @Test @@ -213,6 +215,19 @@ public void testTransposeCommand() throws IOException { } } + @Test + public void testMvExpandCommand() throws IOException { + JSONObject result; + try { + result = + executeQuery( + String.format("search source=%s | mvexpand skills", TEST_INDEX_MVEXPAND_EDGE_CASES)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } + @Test public void testFieldFormatCommand() throws IOException { JSONObject result; diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 571d915517..049d41c527 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -31,6 +31,8 @@ protected void init() throws Exception { loadIndex(Index.ACCOUNT, remoteClient()); loadIndex(Index.TIME_TEST_DATA); loadIndex(Index.TIME_TEST_DATA, remoteClient()); + loadIndex(Index.MVEXPAND_EDGE_CASES); + loadIndex(Index.MVEXPAND_EDGE_CASES, remoteClient()); enableCalcite(); } @@ -418,4 +420,28 @@ public void testCrossClusterFieldFormat() throws IOException { verifyDataRows( result, rows("Hattie", 36, 5686, "$5,686"), rows("Nanette", 28, 32838, "$32,838")); } + + @Test + public void testCrossClusterMvExpandBasic() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | mvexpand skills | where username='happy' | fields username," + + " skills.name | sort skills.name", + TEST_INDEX_MVEXPAND_REMOTE)); + verifySchema(result, schema("username", "string"), schema("skills.name", "string")); + verifyDataRows(result, rows("happy", "java"), rows("happy", "python"), rows("happy", "sql")); + } + + @Test + public void testCrossClusterMvExpandWithLimit() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | mvexpand skills limit=2 | where username='limituser' | fields" + + " username, skills.name | sort skills.name", + TEST_INDEX_MVEXPAND_REMOTE)); + verifySchema(result, schema("username", "string"), schema("skills.name", "string")); + verifyDataRows(result, rows("limituser", "a"), rows("limituser", "b")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterTestBase.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterTestBase.java index d9de95c663..dc4d7d0daf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterTestBase.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterTestBase.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_MVEXPAND_EDGE_CASES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import org.opensearch.sql.ppl.PPLIntegTestCase; @@ -36,6 +37,8 @@ public class CrossClusterTestBase extends PPLIntegTestCase { REMOTE_CLUSTER + ":" + TEST_INDEX_ACCOUNT; protected static final String TEST_INDEX_TIME_DATA_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_TIME_DATA; + protected static final String TEST_INDEX_MVEXPAND_REMOTE = + REMOTE_CLUSTER + ":" + TEST_INDEX_MVEXPAND_EDGE_CASES; @Override protected void init() throws Exception { diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml new file mode 100644 index 0000000000..3aba9e3098 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvexpand.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$2], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(username=[$0], skills_arr=[$1]) + LogicalUnnest + LogicalProject(username=[$0], skills_arr=[$1]) + CalciteLogicalIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}]) + EnumerableUnnest + EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml new file mode 100644 index 0000000000..3aba9e3098 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvexpand.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$2], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(username=[$0], skills_arr=[$1]) + LogicalUnnest + LogicalProject(username=[$0], skills_arr=[$1]) + CalciteLogicalIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}]) + EnumerableUnnest + EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, mvexpand_edge_cases]]) diff --git a/integ-test/src/test/resources/indexDefinitions/mvexpand_edge_cases_mapping.json b/integ-test/src/test/resources/indexDefinitions/mvexpand_edge_cases_mapping.json new file mode 100644 index 0000000000..a0b5519d17 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/mvexpand_edge_cases_mapping.json @@ -0,0 +1,16 @@ +{ + "mappings": { + "properties": { + "username": { "type": "keyword" }, + "skills": { + "type": "nested", + "properties": { + "name": { "type": "keyword" }, + "level": { "type": "keyword" } + } + }, + "skills_not_array": { "type": "keyword" }, + "skills_int": { "type": "integer" } + } + } +} diff --git a/integ-test/src/test/resources/mvexpand_edge_cases.json b/integ-test/src/test/resources/mvexpand_edge_cases.json new file mode 100644 index 0000000000..c7632bb1e2 --- /dev/null +++ b/integ-test/src/test/resources/mvexpand_edge_cases.json @@ -0,0 +1,30 @@ +{"index":{}} +{"username":"happy","skills":[{"name":"python"},{"name":"java"},{"name":"sql"}]} +{"index":{}} +{"username":"single","skills":[{"name":"go"}]} +{"index":{}} +{"username":"empty","skills":[]} +{"index":{}} +{"username":"nullskills","skills":null} +{"index":{}} +{"username":"noskills"} +{"index":{}} +{"username":"missingattr","skills":[{"name":"c"},{"level":"advanced"}]} +{"index":{}} +{"username":"complex","skills":[{"name":"ml","level":"expert"},{"name":"ai"},{"level":"novice"}]} +{"index":{}} +{"username":"duplicate","skills":[{"name":"dup"},{"name":"dup"}]} +{"index":{}} +{"username":"large","skills":[{"name":"s1"},{"name":"s2"},{"name":"s3"},{"name":"s4"},{"name":"s5"},{"name":"s6"},{"name":"s7"},{"name":"s8"},{"name":"s9"},{"name":"s10"}]} +{"index":{}} +{"username":"partial","skills":[{"name":"kotlin"},{"level":"intermediate"},{"level":"advanced"}]} +{"index":{}} +{"username":"mixed_shapes","skills":[{"name":"elixir"},{"name":"haskell"}]} +{"index":{}} +{"username":"hetero_types","skills":[{"level":"senior"},{"level":"3"}]} +{"index":{}} +{"username":"limituser","skills":[{"name":"a"},{"name":"b"},{"name":"c"},{"name":"d"},{"name":"e"}]} +{"index":{}} +{"username":"u1","skills_not_array":"scala"} +{"index":{}} +{"username":"u_int","skills_int":5} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 9113663e47..86dfeb9aa8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -56,6 +56,7 @@ ADDCOLTOTALS: 'ADDCOLTOTALS'; ROW: 'ROW'; COL: 'COL'; EXPAND: 'EXPAND'; +MVEXPAND: 'MVEXPAND'; SIMPLE_PATTERN: 'SIMPLE_PATTERN'; BRAIN: 'BRAIN'; VARIABLE_COUNT_THRESHOLD: 'VARIABLE_COUNT_THRESHOLD'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 8cc4ed932d..73282369d1 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -80,6 +80,7 @@ commands | addcoltotalsCommand | appendCommand | expandCommand + | mvexpandCommand | flattenCommand | reverseCommand | regexCommand @@ -122,6 +123,7 @@ commandName | ML | FILLNULL | EXPAND + | MVEXPAND | FLATTEN | TRENDLINE | TIMECHART @@ -555,6 +557,10 @@ mvcombineCommand : MVCOMBINE fieldExpression (DELIM EQUAL stringLiteral)? ; +mvexpandCommand + : MVEXPAND fieldExpression (LIMIT EQUAL INTEGER_LITERAL)? + ; + flattenCommand : FLATTEN fieldExpression (AS aliases = identifierSeq)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index d4426590e0..d2fd9856b1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -93,6 +93,7 @@ import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -908,6 +909,14 @@ public UnresolvedPlan visitMvcombineCommand(OpenSearchPPLParser.MvcombineCommand return new MvCombine(field, delim); } + @Override + public UnresolvedPlan visitMvexpandCommand(OpenSearchPPLParser.MvexpandCommandContext ctx) { + Field field = (Field) expressionBuilder.visit(ctx.fieldExpression()); + Integer limit = + ctx.INTEGER_LITERAL() != null ? Integer.parseInt(ctx.INTEGER_LITERAL().getText()) : null; + return new MvExpand(field, limit); + } + @Override public UnresolvedPlan visitGrokCommand(OpenSearchPPLParser.GrokCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 4376b5659d..f440930b11 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -83,6 +83,7 @@ import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; import org.opensearch.sql.ast.tree.MvCombine; +import org.opensearch.sql.ast.tree.MvExpand; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -685,6 +686,16 @@ public String visitAppend(Append node, String context) { return StringUtils.format("%s | append [%s ]", child, subsearch); } + @Override + public String visitMvExpand(MvExpand node, String context) { + String child = node.getChild().get(0).accept(this, context); + String field = MASK_COLUMN; // Always anonymize field names + if (node.getLimit() != null) { + return StringUtils.format("%s | mvexpand %s limit=%s", child, field, MASK_LITERAL); + } + return StringUtils.format("%s | mvexpand %s", child, field); + } + @Override public String visitMultisearch(Multisearch node, String context) { List anonymizedSubsearches = new ArrayList<>(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java new file mode 100644 index 0000000000..01b1bdf52d --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvExpandTest.java @@ -0,0 +1,185 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.Arrays; +import java.util.List; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.schema.Table; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Assert; +import org.junit.Test; + +public class CalcitePPLMvExpandTest extends CalcitePPLAbstractTest { + + public CalcitePPLMvExpandTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + /** + * There is no existing table with arrays. We create one for test purpose. + * + *

This mirrors CalcitePPLExpandTest.TableWithArray. + */ + public static class TableWithArray implements Table { + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("DEPTNO", SqlTypeName.INTEGER) + .add( + "EMPNOS", + factory.createArrayType(factory.createSqlType(SqlTypeName.INTEGER), -1)) + .build(); + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Add an empty table with name DEPT for test purpose + schema.add("DEPT", new TableWithArray()); + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testMvExpandBasic() { + String ppl = "source=DEPT | mvexpand EMPNOS"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(DEPTNO=[$0], EMPNOS=[$2])\n" + + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " Uncollect\n" + + " LogicalProject(EMPNOS=[$cor0.EMPNOS])\n" + + " LogicalValues(tuples=[[{ 0 }]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `$cor0`.`DEPTNO`, `t00`.`EMPNOS`\n" + + "FROM `scott`.`DEPT` `$cor0`,\n" + + "LATERAL UNNEST((SELECT `$cor0`.`EMPNOS`\n" + + "FROM (VALUES (0)) `t` (`ZERO`))) `t00` (`EMPNOS`)"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvExpandWithLimitParameter() { + String ppl = "source=DEPT | mvexpand EMPNOS limit=2"; + RelNode root = getRelNode(ppl); + + assertContains(root, "LogicalCorrelate"); + assertContains(root, "Uncollect"); + + assertAnyContains(root, "fetch=", "LIMIT", "RowNumber", "Window"); + } + + @Test + public void testMvExpandProjectNested() { + String ppl = "source=DEPT | mvexpand EMPNOS | fields DEPTNO, EMPNOS"; + RelNode root = getRelNode(ppl); + + assertContains(root, "LogicalCorrelate"); + assertContains(root, "Uncollect"); + assertContains(root, "LogicalProject"); + } + + @Test + public void testMvExpandEmptyOrNullArray() { + RelNode root = getRelNode("source=DEPT | where isnull(EMPNOS) | mvexpand EMPNOS"); + assertContains(root, "LogicalCorrelate"); + assertContains(root, "Uncollect"); + } + + @Test + public void testMvExpandWithDuplicates() { + RelNode root = getRelNode("source=DEPT | where DEPTNO in (10, 10, 20) | mvexpand EMPNOS"); + assertContains(root, "LogicalCorrelate"); + assertContains(root, "Uncollect"); + } + + @Test + public void testMvExpandLargeArray() { + RelNode root = getRelNode("source=DEPT | where DEPTNO = 999 | mvexpand EMPNOS"); + assertContains(root, "LogicalCorrelate"); + assertContains(root, "Uncollect"); + } + + @Test + public void testMvExpandPrimitiveArray() { + RelNode root = getRelNode("source=DEPT | mvexpand EMPNOS"); + assertContains(root, "LogicalCorrelate"); + assertContains(root, "Uncollect"); + } + + private static void assertContains(RelNode root, String token) { + String plan = root.explain(); + Assert.assertTrue( + "Expected plan to contain [" + token + "] but got:\n" + plan, plan.contains(token)); + } + + private static void assertAnyContains(RelNode root, String... tokens) { + String plan = root.explain(); + for (String token : tokens) { + if (plan.contains(token)) { + return; + } + } + Assert.fail( + "Expected plan to contain one of " + Arrays.toString(tokens) + " but got:\n" + plan); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java index 3418411f9c..ce41771bea 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java @@ -361,6 +361,16 @@ public void testSpathTwice() { "*"); } + @Test + public void testMvExpandCommand() { + assertSingleRelationFields("source=logs | mvexpand skills", Set.of("skills"), "*"); + } + + @Test + public void testMvExpandCommandWithLimit() { + assertSingleRelationFields("source=logs | mvexpand skills limit=5", Set.of("skills"), "*"); + } + @Test public void testUnimplementedVisitDetected() { assertThrows( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1e200eb092..9888916823 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -879,6 +879,18 @@ public void testMvzip() { "source=t | eval result=mvzip(array('a', 'b'), array('x', 'y'), '|') | fields result")); } + @Test + public void testMvexpandCommand() { + assertEquals("source=table | mvexpand identifier", anonymize("source=t | mvexpand skills")); + } + + @Test + public void testMvexpandCommandWithLimit() { + assertEquals( + "source=table | mvexpand identifier limit=***", + anonymize("source=t | mvexpand skills limit=5")); + } + @Test public void testSplit() { // Test split with delimiter