Skip to content

Commit a20ff96

Browse files
committed
Add replace command with Calcite
Signed-off-by: Manasvini B S <manasvis@amazon.com>
1 parent 7de8545 commit a20ff96

14 files changed

Lines changed: 744 additions & 1 deletion

File tree

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
import org.opensearch.sql.ast.tree.Relation;
8585
import org.opensearch.sql.ast.tree.RelationSubquery;
8686
import org.opensearch.sql.ast.tree.Rename;
87+
import org.opensearch.sql.ast.tree.Replace;
8788
import org.opensearch.sql.ast.tree.Reverse;
8889
import org.opensearch.sql.ast.tree.Rex;
8990
import org.opensearch.sql.ast.tree.Sort;
@@ -775,6 +776,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con
775776
return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context));
776777
}
777778

779+
@Override
780+
public LogicalPlan visitReplace(Replace node, AnalysisContext context) {
781+
throw getOnlyForCalciteException("Replace");
782+
}
783+
778784
@Override
779785
public LogicalPlan visitJoin(Join node, AnalysisContext context) {
780786
throw getOnlyForCalciteException("Join");

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import org.opensearch.sql.ast.tree.Relation;
7373
import org.opensearch.sql.ast.tree.RelationSubquery;
7474
import org.opensearch.sql.ast.tree.Rename;
75+
import org.opensearch.sql.ast.tree.Replace;
7576
import org.opensearch.sql.ast.tree.Reverse;
7677
import org.opensearch.sql.ast.tree.Rex;
7778
import org.opensearch.sql.ast.tree.SPath;
@@ -239,6 +240,10 @@ public T visitRename(Rename node, C context) {
239240
return visitChildren(node, context);
240241
}
241242

243+
public T visitReplace(Replace node, C context) {
244+
return visitChildren(node, context);
245+
}
246+
242247
public T visitEval(Eval node, C context) {
243248
return visitChildren(node, context);
244249
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package org.opensearch.sql.ast.tree;
2+
3+
import com.google.common.collect.ImmutableList;
4+
import java.util.HashSet;
5+
import java.util.List;
6+
import java.util.Set;
7+
import java.util.stream.Collectors;
8+
import lombok.EqualsAndHashCode;
9+
import lombok.Getter;
10+
import lombok.Setter;
11+
import lombok.ToString;
12+
import org.opensearch.sql.ast.AbstractNodeVisitor;
13+
import org.opensearch.sql.ast.expression.DataType;
14+
import org.opensearch.sql.ast.expression.Field;
15+
import org.opensearch.sql.ast.expression.Literal;
16+
import org.opensearch.sql.ast.expression.UnresolvedExpression;
17+
18+
@Getter
19+
@Setter
20+
@ToString
21+
@EqualsAndHashCode(callSuper = false)
22+
public class Replace extends UnresolvedPlan {
23+
private final UnresolvedExpression pattern;
24+
private final UnresolvedExpression replacement;
25+
private final List<Field> fieldList;
26+
private UnresolvedPlan child;
27+
28+
public Replace(
29+
UnresolvedExpression pattern, UnresolvedExpression replacement, List<Field> fieldList) {
30+
this.pattern = pattern;
31+
this.replacement = replacement;
32+
this.fieldList = fieldList;
33+
validate();
34+
}
35+
36+
private void validate() {
37+
if (pattern == null) {
38+
throw new IllegalArgumentException("Pattern expression cannot be null in Replace command");
39+
}
40+
if (replacement == null) {
41+
throw new IllegalArgumentException(
42+
"Replacement expression cannot be null in Replace command");
43+
}
44+
45+
// Validate pattern is a string literal
46+
if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) {
47+
throw new IllegalArgumentException("Pattern must be a string literal in Replace command");
48+
}
49+
50+
// Validate replacement is a string literal
51+
if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) {
52+
throw new IllegalArgumentException("Replacement must be a string literal in Replace command");
53+
}
54+
55+
if (fieldList == null || fieldList.isEmpty()) {
56+
throw new IllegalArgumentException(
57+
"Field list cannot be empty in Replace command. Use IN clause to specify the field.");
58+
}
59+
60+
Set<String> uniqueFields = new HashSet<>();
61+
List<String> duplicates =
62+
fieldList.stream()
63+
.map(field -> field.getField().toString())
64+
.filter(fieldName -> !uniqueFields.add(fieldName))
65+
.collect(Collectors.toList());
66+
67+
if (!duplicates.isEmpty()) {
68+
throw new IllegalArgumentException(
69+
String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates)));
70+
}
71+
}
72+
73+
@Override
74+
public Replace attach(UnresolvedPlan child) {
75+
if (null == this.child) {
76+
this.child = child;
77+
} else {
78+
this.child.attach(child);
79+
}
80+
return this;
81+
}
82+
83+
@Override
84+
public List<UnresolvedPlan> getChild() {
85+
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
86+
}
87+
88+
@Override
89+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
90+
return nodeVisitor.visitReplace(this, context);
91+
}
92+
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
import org.opensearch.sql.ast.tree.Regex;
110110
import org.opensearch.sql.ast.tree.Relation;
111111
import org.opensearch.sql.ast.tree.Rename;
112+
import org.opensearch.sql.ast.tree.Replace;
112113
import org.opensearch.sql.ast.tree.Rex;
113114
import org.opensearch.sql.ast.tree.SPath;
114115
import org.opensearch.sql.ast.tree.Sort;
@@ -140,6 +141,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor<RelNode, CalciteP
140141

141142
private final CalciteRexNodeVisitor rexVisitor;
142143
private final CalciteAggCallVisitor aggVisitor;
144+
private static final String NEW_FIELD_PREFIX = "new_";
143145

144146
public CalciteRelNodeVisitor() {
145147
this.rexVisitor = new CalciteRexNodeVisitor(this);
@@ -2136,6 +2138,40 @@ public RelNode visitValues(Values values, CalcitePlanContext context) {
21362138
}
21372139
}
21382140

2141+
@Override
2142+
public RelNode visitReplace(Replace node, CalcitePlanContext context) {
2143+
visitChildren(node, context);
2144+
2145+
List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();
2146+
RexNode patternNode = rexVisitor.analyze(node.getPattern(), context);
2147+
RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context);
2148+
2149+
List<RexNode> projectList = new ArrayList<>();
2150+
List<String> newFieldNames = new ArrayList<>();
2151+
2152+
// First add all original fields
2153+
for (String fieldName : fieldNames) {
2154+
RexNode fieldRef = context.relBuilder.field(fieldName);
2155+
projectList.add(fieldRef);
2156+
newFieldNames.add(fieldName);
2157+
}
2158+
2159+
// Then add new fields with replaced content using new_ prefix
2160+
for (Field field : node.getFieldList()) {
2161+
String fieldName = field.getField().toString();
2162+
RexNode fieldRef = context.relBuilder.field(fieldName);
2163+
2164+
RexNode replaceCall =
2165+
context.relBuilder.call(
2166+
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
2167+
projectList.add(replaceCall);
2168+
newFieldNames.add(NEW_FIELD_PREFIX + fieldName);
2169+
}
2170+
2171+
context.relBuilder.project(projectList, newFieldNames);
2172+
return context.relBuilder.peek();
2173+
}
2174+
21392175
private void buildParseRelNode(Parse node, CalcitePlanContext context) {
21402176
RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context);
21412177
ParseMethod parseMethod = node.getParseMethod();

docs/category.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
"user/ppl/cmd/rename.rst",
6363
"user/ppl/cmd/rex.rst",
6464
"user/ppl/cmd/stats.rst",
65-
"user/ppl/cmd/timechart.rst"
65+
"user/ppl/cmd/timechart.rst",
66+
"user/ppl/cmd/replace.rst"
6667
]
6768
}

docs/user/ppl/cmd/replace.rst

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
=============
2+
replace
3+
=============
4+
5+
.. rubric:: Table of contents
6+
7+
.. contents::
8+
:local:
9+
:depth: 2
10+
11+
12+
Description
13+
============
14+
| Using ``replace`` command to replace text in one or more fields in the search result.
15+
* The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country')
16+
* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0')
17+
18+
19+
Version
20+
=======
21+
3.2.0
22+
23+
24+
Syntax
25+
============
26+
replace '<pattern>' WITH '<replacement>' IN <field-name>[, <field-name>]...
27+
28+
Note: This command is only available when Calcite engine is enabled.
29+
30+
* pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions).
31+
* replacement: mandatory. The text you want to replace with.
32+
* field list: mandatory. One or more field names where the replacement should occur.
33+
34+
35+
Example 1: Replace text in one field
36+
====================================
37+
38+
The example shows replacing text in one field.
39+
40+
PPL query::
41+
42+
os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state;
43+
fetched rows / total rows = 4/4
44+
+-------+-----------+
45+
| state | new_state |
46+
|-------+-----------|
47+
| IL | Illinois |
48+
| TN | TN |
49+
| VA | VA |
50+
| MD | MD |
51+
+-------+-----------+
52+
53+
54+
Example 2: Replace text in multiple fields
55+
==========================================
56+
57+
The example shows replacing text in multiple fields.
58+
59+
PPL query::
60+
61+
os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address;
62+
fetched rows / total rows = 4/4
63+
+-------+----------------------+-----------+----------------------+
64+
| state | address | new_state | new_address |
65+
|-------+----------------------+-----------+----------------------|
66+
| IL | 880 Holmes Lane | Illinois | 880 Holmes Lane |
67+
| TN | 671 Bristol Street | TN | 671 Bristol Street |
68+
| VA | 789 Madison Street | VA | 789 Madison Street |
69+
| MD | 467 Hutchinson Court | MD | 467 Hutchinson Court |
70+
+-------+----------------------+-----------+----------------------+
71+
72+
73+
Example 3: Replace with IN clause and other commands
74+
====================================================
75+
76+
The example shows using replace with other commands.
77+
78+
PPL query::
79+
80+
os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state;
81+
fetched rows / total rows = 3/3
82+
+-------+-----+-----------+
83+
| state | age | new_state |
84+
|-------+-----+-----------|
85+
| IL | 32 | Illinois |
86+
| TN | 36 | TN |
87+
| MD | 33 | MD |
88+
+-------+-----+-----------+
89+
90+
Example 4: Pattern matching with LIKE and replace
91+
=================================================
92+
93+
Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs.
94+
95+
PPL query::
96+
97+
os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address;
98+
fetched rows / total rows = 1/1
99+
+-----------------+--------+--------+--------+--------+-----------------+
100+
| address | state | gender | age | city | new_address |
101+
|-----------------+--------+--------+--------+--------+-----------------|
102+
| 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane |
103+
+-----------------+--------+--------+--------+--------+-----------------+
104+
105+
Note
106+
====
107+
* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged.

docs/user/ppl/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ The query start with search command and then flowing a set of command delimited
124124

125125
- `trendline command <cmd/trendline.rst>`_
126126

127+
- `replace command <cmd/replace.rst>`_
128+
127129
- `where command <cmd/where.rst>`_
128130

129131
* **Functions**

integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
CalciteRegexCommandIT.class,
8989
CalciteRexCommandIT.class,
9090
CalciteRenameCommandIT.class,
91+
CalciteReplaceCommandIT.class,
9192
CalciteResourceMonitorIT.class,
9293
CalciteSearchCommandIT.class,
9394
CalciteSettingsIT.class,

0 commit comments

Comments
 (0)