Skip to content

Commit 2fa3845

Browse files
ahkcsmanasvinibs
andauthored
[Backport 2.19-dev] Add replace command with Calcite (#4451) (#4604)
* Add replace command with Calcite (#4451) * Add replace command with Calcite Signed-off-by: Kai Huang <ahkcs@amazon.com> --------- Signed-off-by: Kai Huang <ahkcs@amazon.com> Co-authored-by: Manasvini B S <manasvis@amazon.com> (cherry picked from commit 5677765) Signed-off-by: Kai Huang <ahkcs@amazon.com> * fix compile Signed-off-by: Kai Huang <ahkcs@amazon.com> * backporting backslash handling from main and fix tests Signed-off-by: Kai Huang <ahkcs@amazon.com> * Fix tests Signed-off-by: Kai Huang <ahkcs@amazon.com> * fix tests Signed-off-by: Kai Huang <ahkcs@amazon.com> * compatability accross java versions Signed-off-by: Kai Huang <ahkcs@amazon.com> --------- Signed-off-by: Kai Huang <ahkcs@amazon.com> Co-authored-by: Manasvini B S <manasvis@amazon.com>
1 parent 71b0386 commit 2fa3845

25 files changed

Lines changed: 1082 additions & 9 deletions

File tree

common/src/main/java/org/opensearch/sql/common/utils/StringUtils.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,11 @@ public static String unquoteText(String text) {
6363
for (int chIndex = 1; chIndex < text.length() - 1; chIndex++) {
6464
currentChar = text.charAt(chIndex);
6565
nextChar = text.charAt(chIndex + 1);
66-
if (currentChar == enclosingQuote && nextChar == currentChar) {
66+
67+
if ((currentChar == '\\' && (nextChar == '"' || nextChar == '\\' || nextChar == '\''))
68+
|| (currentChar == nextChar && currentChar == enclosingQuote)) {
6769
chIndex++;
70+
currentChar = nextChar;
6871
}
6972
textSB.append(currentChar);
7073
}

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import org.opensearch.sql.ast.tree.Relation;
8686
import org.opensearch.sql.ast.tree.RelationSubquery;
8787
import org.opensearch.sql.ast.tree.Rename;
88+
import org.opensearch.sql.ast.tree.Replace;
8889
import org.opensearch.sql.ast.tree.Reverse;
8990
import org.opensearch.sql.ast.tree.Rex;
9091
import org.opensearch.sql.ast.tree.SPath;
@@ -800,6 +801,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con
800801
return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context));
801802
}
802803

804+
@Override
805+
public LogicalPlan visitReplace(Replace node, AnalysisContext context) {
806+
throw getOnlyForCalciteException("Replace");
807+
}
808+
803809
@Override
804810
public LogicalPlan visitJoin(Join node, AnalysisContext context) {
805811
throw getOnlyForCalciteException("Join");

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import org.opensearch.sql.ast.tree.Relation;
7474
import org.opensearch.sql.ast.tree.RelationSubquery;
7575
import org.opensearch.sql.ast.tree.Rename;
76+
import org.opensearch.sql.ast.tree.Replace;
7677
import org.opensearch.sql.ast.tree.Reverse;
7778
import org.opensearch.sql.ast.tree.Rex;
7879
import org.opensearch.sql.ast.tree.SPath;
@@ -246,6 +247,10 @@ public T visitRename(Rename node, C context) {
246247
return visitChildren(node, context);
247248
}
248249

250+
public T visitReplace(Replace node, C context) {
251+
return visitChildren(node, context);
252+
}
253+
249254
public T visitEval(Eval node, C context) {
250255
return visitChildren(node, context);
251256
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import com.google.common.collect.ImmutableList;
9+
import java.util.List;
10+
import java.util.Set;
11+
import lombok.EqualsAndHashCode;
12+
import lombok.Getter;
13+
import lombok.Setter;
14+
import lombok.ToString;
15+
import org.jetbrains.annotations.Nullable;
16+
import org.opensearch.sql.ast.AbstractNodeVisitor;
17+
import org.opensearch.sql.ast.expression.Field;
18+
19+
@Getter
20+
@Setter
21+
@ToString
22+
@EqualsAndHashCode(callSuper = false)
23+
public class Replace extends UnresolvedPlan {
24+
private final List<ReplacePair> replacePairs;
25+
private final Set<Field> fieldList;
26+
@Nullable private UnresolvedPlan child;
27+
28+
/**
29+
* Constructor with multiple pattern/replacement pairs.
30+
*
31+
* @param replacePairs List of pattern/replacement pairs
32+
* @param fieldList Set of fields to apply replacements to
33+
*/
34+
public Replace(List<ReplacePair> replacePairs, Set<Field> fieldList) {
35+
this.replacePairs = replacePairs;
36+
this.fieldList = fieldList;
37+
}
38+
39+
@Override
40+
public Replace attach(UnresolvedPlan child) {
41+
if (null == this.child) {
42+
this.child = child;
43+
} else {
44+
this.child.attach(child);
45+
}
46+
return this;
47+
}
48+
49+
@Override
50+
public List<UnresolvedPlan> getChild() {
51+
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
52+
}
53+
54+
@Override
55+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
56+
return nodeVisitor.visitReplace(this, context);
57+
}
58+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import lombok.AllArgsConstructor;
9+
import lombok.EqualsAndHashCode;
10+
import lombok.Getter;
11+
import lombok.ToString;
12+
import org.opensearch.sql.ast.expression.Literal;
13+
14+
/** A pair of pattern and replacement literals for the Replace command. */
15+
@Getter
16+
@AllArgsConstructor
17+
@EqualsAndHashCode
18+
@ToString
19+
public class ReplacePair {
20+
private final Literal pattern;
21+
private final Literal replacement;
22+
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@
121121
import org.opensearch.sql.ast.tree.Regex;
122122
import org.opensearch.sql.ast.tree.Relation;
123123
import org.opensearch.sql.ast.tree.Rename;
124+
import org.opensearch.sql.ast.tree.Replace;
125+
import org.opensearch.sql.ast.tree.ReplacePair;
124126
import org.opensearch.sql.ast.tree.Rex;
125127
import org.opensearch.sql.ast.tree.SPath;
126128
import org.opensearch.sql.ast.tree.Search;
@@ -2414,6 +2416,51 @@ public RelNode visitValues(Values values, CalcitePlanContext context) {
24142416
}
24152417
}
24162418

2419+
@Override
2420+
public RelNode visitReplace(Replace node, CalcitePlanContext context) {
2421+
visitChildren(node, context);
2422+
2423+
List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();
2424+
2425+
// Create a set of field names to replace for quick lookup
2426+
Set<String> fieldsToReplace =
2427+
node.getFieldList().stream().map(f -> f.getField().toString()).collect(Collectors.toSet());
2428+
2429+
// Validate that all fields to replace exist by calling field() on each
2430+
// This leverages relBuilder.field()'s built-in validation which throws
2431+
// IllegalArgumentException if any field doesn't exist
2432+
for (String fieldToReplace : fieldsToReplace) {
2433+
context.relBuilder.field(fieldToReplace);
2434+
}
2435+
2436+
List<RexNode> projectList = new ArrayList<>();
2437+
2438+
// Project all fields, replacing specified ones in-place
2439+
for (String fieldName : fieldNames) {
2440+
if (fieldsToReplace.contains(fieldName)) {
2441+
// Replace this field in-place with all pattern/replacement pairs applied sequentially
2442+
RexNode fieldRef = context.relBuilder.field(fieldName);
2443+
2444+
// Apply all replacement pairs sequentially (nested REPLACE calls)
2445+
for (ReplacePair pair : node.getReplacePairs()) {
2446+
RexNode patternNode = rexVisitor.analyze(pair.getPattern(), context);
2447+
RexNode replacementNode = rexVisitor.analyze(pair.getReplacement(), context);
2448+
fieldRef =
2449+
context.relBuilder.call(
2450+
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
2451+
}
2452+
2453+
projectList.add(fieldRef);
2454+
} else {
2455+
// Keep original field unchanged
2456+
projectList.add(context.relBuilder.field(fieldName));
2457+
}
2458+
}
2459+
2460+
context.relBuilder.project(projectList, fieldNames);
2461+
return context.relBuilder.peek();
2462+
}
2463+
24172464
private void buildParseRelNode(Parse node, CalcitePlanContext context) {
24182465
RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context);
24192466
ParseMethod parseMethod = node.getParseMethod();

docs/category.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"user/ppl/cmd/rare.rst",
4141
"user/ppl/cmd/regex.rst",
4242
"user/ppl/cmd/rename.rst",
43+
"user/ppl/cmd/replace.rst",
4344
"user/ppl/cmd/rex.rst",
4445
"user/ppl/cmd/search.rst",
4546
"user/ppl/cmd/showdatasources.rst",
@@ -68,4 +69,4 @@
6869
"bash_settings": [
6970
"user/ppl/admin/settings.rst"
7071
]
71-
}
72+
}

docs/user/dql/expressions.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Here is an example for different type of literals::
4646
+---------+---------+---------+---------+--------+---------+---------+-----------+----------+
4747
| "Hello" | 'Hello' | "It""s" | 'It''s' | "It's" | '"Its"' | 'It\'s' | 'It\\\'s' | "\I\t\s" |
4848
|---------+---------+---------+---------+--------+---------+---------+-----------+----------|
49-
| Hello | Hello | It"s | It's | It's | "Its" | It\'s | It\\\'s | \I\t\s |
49+
| Hello | Hello | It"s | It's | It's | "Its" | It's | It\'s | \I\t\s |
5050
+---------+---------+---------+---------+--------+---------+---------+-----------+----------+
5151

5252

docs/user/ppl/cmd/replace.rst

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
=============
2+
replace
3+
=============
4+
5+
.. rubric:: Table of contents
6+
7+
.. contents::
8+
:local:
9+
:depth: 2
10+
11+
12+
Description
13+
============
14+
Using ``replace`` command to replace text in one or more fields in the search result.
15+
16+
Note: This command is only available when Calcite engine is enabled.
17+
18+
19+
Syntax
20+
============
21+
replace '<pattern>' WITH '<replacement>' [, '<pattern>' WITH '<replacement>']... IN <field-name>[, <field-name>]...
22+
23+
24+
Parameters
25+
==========
26+
* **pattern**: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions).
27+
* **replacement**: mandatory. The text you want to replace with.
28+
* **field-name**: mandatory. One or more field names where the replacement should occur.
29+
30+
31+
Examples
32+
========
33+
34+
Example 1: Replace text in one field
35+
------------------------------------
36+
37+
The example shows replacing text in one field.
38+
39+
PPL query::
40+
41+
os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state;
42+
fetched rows / total rows = 4/4
43+
+----------+
44+
| state |
45+
|----------|
46+
| Illinois |
47+
| TN |
48+
| VA |
49+
| MD |
50+
+----------+
51+
52+
53+
Example 2: Replace text in multiple fields
54+
------------------------------------
55+
56+
The example shows replacing text in multiple fields.
57+
58+
PPL query::
59+
60+
os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address;
61+
fetched rows / total rows = 4/4
62+
+----------+----------------------+
63+
| state | address |
64+
|----------+----------------------|
65+
| Illinois | 880 Holmes Lane |
66+
| TN | 671 Bristol Street |
67+
| VA | 789 Madison Street |
68+
| MD | 467 Hutchinson Court |
69+
+----------+----------------------+
70+
71+
72+
Example 3: Replace with other commands in a pipeline
73+
------------------------------------
74+
75+
The example shows using replace with other commands in a query pipeline.
76+
77+
PPL query::
78+
79+
os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age;
80+
fetched rows / total rows = 3/3
81+
+----------+-----+
82+
| state | age |
83+
|----------+-----|
84+
| Illinois | 32 |
85+
| TN | 36 |
86+
| MD | 33 |
87+
+----------+-----+
88+
89+
Example 4: Replace with multiple pattern/replacement pairs
90+
------------------------------------
91+
92+
The example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially.
93+
94+
PPL query::
95+
96+
os> source=accounts | replace "IL" WITH "Illinois", "TN" WITH "Tennessee" IN state | fields state;
97+
fetched rows / total rows = 4/4
98+
+-----------+
99+
| state |
100+
|-----------|
101+
| Illinois |
102+
| Tennessee |
103+
| VA |
104+
| MD |
105+
+-----------+
106+
107+
Example 5: Pattern matching with LIKE and replace
108+
------------------------------------
109+
110+
Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs.
111+
112+
PPL query::
113+
114+
os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city;
115+
fetched rows / total rows = 1/1
116+
+-----------------+-------+--------+-----+--------+
117+
| address | state | gender | age | city |
118+
|-----------------+-------+--------+-----+--------|
119+
| 880 HOLMES Lane | IL | M | 32 | Brogan |
120+
+-----------------+-------+--------+-----+--------+
121+
122+
123+
Limitations
124+
===========
125+
* Only supports plain text literals for pattern matching. Wildcards and regular expressions are not supported.
126+
* Pattern and replacement values must be string literals.
127+
* The replace command modifies the specified fields in-place.

docs/user/ppl/functions/string.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,14 @@ Argument type: STRING, STRING (regex pattern), STRING (replacement)
215215

216216
Return type: STRING
217217

218+
**Important - Regex Special Characters**: The pattern is interpreted as a regular expression. Characters like ``.``, ``*``, ``+``, ``[``, ``]``, ``(``, ``)``, ``{``, ``}``, ``^``, ``$``, ``|``, ``?``, and ``\`` have special meaning in regex. To match them literally, escape with backslashes:
219+
220+
* To match ``example.com``: use ``'example\\.com'`` (escape the dots)
221+
* To match ``value*``: use ``'value\\*'`` (escape the asterisk)
222+
* To match ``price+tax``: use ``'price\\+tax'`` (escape the plus)
223+
224+
For strings with many special characters, use ``\\Q...\\E`` to quote the entire literal string (e.g., ``'\\Qhttps://example.com/path?id=123\\E'`` matches that exact URL).
225+
218226
Literal String Replacement Examples::
219227

220228
os> source=people | eval `REPLACE('helloworld', 'world', 'universe')` = REPLACE('helloworld', 'world', 'universe'), `REPLACE('helloworld', 'invalid', 'universe')` = REPLACE('helloworld', 'invalid', 'universe') | fields `REPLACE('helloworld', 'world', 'universe')`, `REPLACE('helloworld', 'invalid', 'universe')`
@@ -225,6 +233,16 @@ Literal String Replacement Examples::
225233
| hellouniverse | helloworld |
226234
+--------------------------------------------+----------------------------------------------+
227235

236+
Escaping Special Characters Examples::
237+
238+
os> source=people | eval `Replace domain` = REPLACE('api.example.com', 'example\\.com', 'newsite.org'), `Replace with quote` = REPLACE('https://api.example.com/v1', '\\Qhttps://api.example.com\\E', 'http://localhost:8080') | fields `Replace domain`, `Replace with quote`
239+
fetched rows / total rows = 1/1
240+
+-----------------+--------------------------+
241+
| Replace domain | Replace with quote |
242+
|-----------------+--------------------------|
243+
| api.newsite.org | http://localhost:8080/v1 |
244+
+-----------------+--------------------------+
245+
228246
Regex Pattern Examples::
229247

230248
os> source=people | eval `Remove digits` = REPLACE('test123', '\d+', ''), `Collapse spaces` = REPLACE('hello world', ' +', ' '), `Remove special` = REPLACE('hello@world!', '[^a-zA-Z]', '') | fields `Remove digits`, `Collapse spaces`, `Remove special`

0 commit comments

Comments
 (0)