Skip to content

Commit ef12879

Browse files
committed
Add wildcard support for replace command
Signed-off-by: Manasvini B S <manasvis@amazon.com>
1 parent 966b939 commit ef12879

7 files changed

Lines changed: 619 additions & 33 deletions

File tree

core/src/main/java/org/opensearch/sql/ast/tree/Replace.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ private void validate() {
5656
throw new IllegalArgumentException(
5757
"Field list cannot be empty in Replace command. Use IN clause to specify the field.");
5858
}
59-
6059
Set<String> uniqueFields = new HashSet<>();
6160
List<String> duplicates =
6261
fieldList.stream()

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import org.apache.calcite.rex.RexNode;
5151
import org.apache.calcite.rex.RexVisitorImpl;
5252
import org.apache.calcite.rex.RexWindowBounds;
53+
import org.apache.calcite.sql.fun.SqlLibraryOperators;
5354
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
5455
import org.apache.calcite.sql.type.SqlTypeFamily;
5556
import org.apache.calcite.sql.type.SqlTypeName;
@@ -136,6 +137,7 @@
136137
import org.opensearch.sql.expression.parse.RegexCommonUtils;
137138
import org.opensearch.sql.utils.ParseUtils;
138139
import org.opensearch.sql.utils.WildcardRenameUtils;
140+
import org.opensearch.sql.utils.WildcardReplaceUtils;
139141

140142
public class CalciteRelNodeVisitor extends AbstractNodeVisitor<RelNode, CalcitePlanContext> {
141143

@@ -2141,33 +2143,55 @@ public RelNode visitValues(Values values, CalcitePlanContext context) {
21412143
@Override
21422144
public RelNode visitReplace(Replace node, CalcitePlanContext context) {
21432145
visitChildren(node, context);
2146+
String pattern = ((Literal) node.getPattern()).getValue().toString();
2147+
String replacement = ((Literal) node.getReplacement()).getValue().toString();
21442148

2145-
List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();
2146-
RexNode patternNode = rexVisitor.analyze(node.getPattern(), context);
2147-
RexNode replacementNode = rexVisitor.analyze(node.getReplacement(), context);
2149+
// Remove quotes if present
2150+
pattern = pattern.replaceAll("^[\"']|[\"']$", "");
2151+
replacement = replacement.replaceAll("^[\"']|[\"']$", "");
2152+
2153+
// Validate patterns only if wildcards are present
2154+
if (WildcardRenameUtils.isWildcardPattern(pattern)
2155+
|| WildcardRenameUtils.isWildcardPattern(replacement)) {
2156+
WildcardReplaceUtils.validatePatterns(pattern, replacement);
2157+
}
21482158

21492159
List<RexNode> projectList = new ArrayList<>();
21502160
List<String> newFieldNames = new ArrayList<>();
2151-
2152-
// First add all original fields
2153-
for (String fieldName : fieldNames) {
2154-
RexNode fieldRef = context.relBuilder.field(fieldName);
2155-
projectList.add(fieldRef);
2161+
// Add original fields
2162+
for (String fieldName : context.relBuilder.peek().getRowType().getFieldNames()) {
2163+
projectList.add(context.relBuilder.field(fieldName));
21562164
newFieldNames.add(fieldName);
21572165
}
2158-
2159-
// Then add new fields with replaced content using new_ prefix
2166+
// Process fields for replacement
21602167
for (Field field : node.getFieldList()) {
21612168
String fieldName = field.getField().toString();
21622169
RexNode fieldRef = context.relBuilder.field(fieldName);
2163-
2164-
RexNode replaceCall =
2165-
context.relBuilder.call(
2166-
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
2167-
projectList.add(replaceCall);
2170+
if (WildcardRenameUtils.isWildcardPattern(pattern)
2171+
|| WildcardRenameUtils.isWildcardPattern(replacement)) {
2172+
String regexPattern = WildcardReplaceUtils.convertToRegexPattern(pattern);
2173+
String regexReplacement = WildcardReplaceUtils.convertToRegexReplacement(replacement);
2174+
// Use REGEXP_REPLACE for wildcard patterns
2175+
RexNode replaceCall =
2176+
context.relBuilder.call(
2177+
SqlLibraryOperators.REGEXP_REPLACE_3,
2178+
fieldRef,
2179+
context.relBuilder.literal(regexPattern),
2180+
context.relBuilder.literal(regexReplacement));
2181+
projectList.add(replaceCall);
2182+
} else {
2183+
System.out.println("Using REPLACE");
2184+
// Use standard REPLACE for non-wildcard patterns
2185+
RexNode replaceCall =
2186+
context.relBuilder.call(
2187+
SqlStdOperatorTable.REPLACE,
2188+
fieldRef,
2189+
context.relBuilder.literal(pattern),
2190+
context.relBuilder.literal(replacement));
2191+
projectList.add(replaceCall);
2192+
}
21682193
newFieldNames.add(NEW_FIELD_PREFIX + fieldName);
21692194
}
2170-
21712195
context.relBuilder.project(projectList, newFieldNames);
21722196
return context.relBuilder.peek();
21732197
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
package org.opensearch.sql.utils;
2+
3+
import java.util.regex.Pattern;
4+
5+
/** Utility class for handling wildcard patterns in replace operations. */
6+
public class WildcardReplaceUtils {
7+
8+
/**
9+
* Convert wildcard pattern to regex pattern for REGEXP_REPLACE.
10+
*
11+
* @param pattern Pattern that may contain wildcards
12+
* @return Regex pattern
13+
*/
14+
public static String convertToRegexPattern(String pattern) {
15+
if (pattern == null || pattern.isEmpty()) {
16+
return pattern;
17+
}
18+
19+
// If not a wildcard pattern, return as is
20+
if (!WildcardRenameUtils.isWildcardPattern(pattern)) {
21+
return pattern;
22+
}
23+
24+
// Check for consecutive wildcards before any substring operations
25+
if (pattern.matches(".*\\*{2,}.*")) {
26+
throw new IllegalArgumentException("Consecutive wildcards are not supported");
27+
}
28+
29+
// Handle single wildcard pattern
30+
if (pattern.equals("*")) {
31+
return "(.*)";
32+
}
33+
34+
// Handle different wildcard positions
35+
if (pattern.startsWith("*") && pattern.endsWith("*")) {
36+
// *abc* -> Pattern matches 'abc' anywhere
37+
String middle = pattern.substring(1, pattern.length() - 1);
38+
return "(.*)" + Pattern.quote(middle) + "(.*)";
39+
} else if (pattern.startsWith("*")) {
40+
// *abc -> Pattern matches 'abc' at end
41+
String end = pattern.substring(1);
42+
return "(.*)" + Pattern.quote(end) + "$";
43+
} else if (pattern.endsWith("*")) {
44+
// abc* -> Pattern matches 'abc' at start with explicit capture group
45+
String start = pattern.substring(0, pattern.length() - 1);
46+
return "^" + Pattern.quote(start) + "(.*)"; // Explicitly create capture group
47+
}
48+
return pattern;
49+
}
50+
51+
/**
52+
* Convert wildcard replacement to regex replacement. Converts * to corresponding regex group
53+
* references ($1, $2, etc.)
54+
*
55+
* @param replacement Replacement pattern with wildcards
56+
* @return Regex replacement string
57+
*/
58+
public static String convertToRegexReplacement(String replacement) {
59+
if (!WildcardRenameUtils.isWildcardPattern(replacement)) {
60+
return replacement;
61+
}
62+
if (replacement.startsWith("*") && replacement.endsWith("*")) {
63+
// *XYZ* -> Replacement with both prefix and suffix captured content
64+
String middle = replacement.substring(1, replacement.length() - 1);
65+
return "$1" + middle + "$2";
66+
} else if (replacement.startsWith("*")) {
67+
// *XYZ -> Replacement with prefix captured content
68+
String end = replacement.substring(1);
69+
return "$1" + end;
70+
} else if (replacement.endsWith("*")) {
71+
// XYZ* -> Replacement with suffix captured content
72+
String start = replacement.substring(0, replacement.length() - 1);
73+
return start + "$1";
74+
}
75+
return replacement;
76+
}
77+
78+
/**
79+
* Validate wildcard patterns compatibility.
80+
*
81+
* @param pattern Source pattern
82+
* @param replacement Replacement pattern
83+
* @throws IllegalArgumentException if patterns are invalid
84+
*/
85+
public static void validatePatterns(String pattern, String replacement) {
86+
if (WildcardRenameUtils.isWildcardPattern(pattern)
87+
|| WildcardRenameUtils.isWildcardPattern(replacement)) {
88+
if (pattern.matches(".*\\*{2,}.*") || replacement.matches(".*\\*{2,}.*")) {
89+
throw new IllegalArgumentException("Consecutive wildcards are not supported");
90+
}
91+
}
92+
93+
// If replacement has wildcard, pattern must have wildcard
94+
if (WildcardRenameUtils.isWildcardPattern(replacement)
95+
&& !WildcardRenameUtils.isWildcardPattern(pattern)) {
96+
throw new IllegalArgumentException(
97+
"If replacement contains wildcard, pattern must contain wildcard");
98+
}
99+
100+
// Check if wildcard count matches
101+
if (WildcardRenameUtils.isWildcardPattern(replacement)) {
102+
long patternWildcards = pattern.chars().filter(ch -> ch == '*').count();
103+
long replacementWildcards = replacement.chars().filter(ch -> ch == '*').count();
104+
105+
if (replacementWildcards > patternWildcards) {
106+
throw new IllegalArgumentException(
107+
"Number of wildcards in replacement cannot exceed number of wildcards in pattern");
108+
}
109+
}
110+
}
111+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.utils;
7+
8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertThrows;
10+
11+
import org.junit.jupiter.api.Test;
12+
13+
class WildcardReplaceUtilsTest {
14+
15+
@Test
16+
void testConvertToRegexPatternNoWildcard() {
17+
assertEquals("TEST", WildcardReplaceUtils.convertToRegexPattern("TEST"));
18+
assertEquals("CLERK", WildcardReplaceUtils.convertToRegexPattern("CLERK"));
19+
assertEquals("", WildcardReplaceUtils.convertToRegexPattern(""));
20+
}
21+
22+
@Test
23+
void testConvertToRegexPatternWithWildcardEnd() {
24+
assertEquals("^\\QCLERK\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("CLERK*"));
25+
assertEquals("^\\QTEST\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("TEST*"));
26+
}
27+
28+
@Test
29+
void testConvertToRegexPatternWithWildcardStart() {
30+
assertEquals("(.*)\\QCLERK\\E$", WildcardReplaceUtils.convertToRegexPattern("*CLERK"));
31+
assertEquals("(.*)\\QMAN\\E$", WildcardReplaceUtils.convertToRegexPattern("*MAN"));
32+
}
33+
34+
@Test
35+
void testConvertToRegexPatternWithWildcardBothEnds() {
36+
assertEquals("(.*)\\QCLERK\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("*CLERK*"));
37+
assertEquals("(.*)\\QMAN\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("*MAN*"));
38+
}
39+
40+
@Test
41+
void testConvertToRegexReplacementNoWildcard() {
42+
assertEquals("EMPLOYEE", WildcardReplaceUtils.convertToRegexReplacement("EMPLOYEE"));
43+
assertEquals("PERSON", WildcardReplaceUtils.convertToRegexReplacement("PERSON"));
44+
assertEquals("", WildcardReplaceUtils.convertToRegexReplacement(""));
45+
}
46+
47+
@Test
48+
void testConvertToRegexReplacementWithWildcardEnd() {
49+
assertEquals("EMPLOYEE$1", WildcardReplaceUtils.convertToRegexReplacement("EMPLOYEE*"));
50+
assertEquals("PERSON$1", WildcardReplaceUtils.convertToRegexReplacement("PERSON*"));
51+
}
52+
53+
@Test
54+
void testConvertToRegexReplacementWithWildcardStart() {
55+
assertEquals("$1EMPLOYEE", WildcardReplaceUtils.convertToRegexReplacement("*EMPLOYEE"));
56+
assertEquals("$1PERSON", WildcardReplaceUtils.convertToRegexReplacement("*PERSON"));
57+
}
58+
59+
@Test
60+
void testConvertToRegexReplacementWithWildcardBothEnds() {
61+
assertEquals("$1EMPLOYEE$2", WildcardReplaceUtils.convertToRegexReplacement("*EMPLOYEE*"));
62+
assertEquals("$1PERSON$2", WildcardReplaceUtils.convertToRegexReplacement("*PERSON*"));
63+
}
64+
65+
@Test
66+
void testValidPatternsNoWildcard() {
67+
// Should not throw any exceptions
68+
WildcardReplaceUtils.validatePatterns("TEST", "REPLACE");
69+
WildcardReplaceUtils.validatePatterns("CLERK", "EMPLOYEE");
70+
}
71+
72+
@Test
73+
void testValidPatternsWithWildcards() {
74+
// Valid combinations
75+
WildcardReplaceUtils.validatePatterns("CLERK*", "EMPLOYEE*");
76+
WildcardReplaceUtils.validatePatterns("*MAN", "*PERSON");
77+
WildcardReplaceUtils.validatePatterns("*TEST*", "NEW*");
78+
WildcardReplaceUtils.validatePatterns("TEST*", "REPLACE");
79+
}
80+
81+
@Test
82+
void testInvalidConsecutiveWildcards() {
83+
IllegalArgumentException ex =
84+
assertThrows(
85+
IllegalArgumentException.class,
86+
() -> WildcardReplaceUtils.validatePatterns("CLERK**", "EMPLOYEE*"));
87+
assertEquals("Consecutive wildcards are not supported", ex.getMessage());
88+
89+
ex =
90+
assertThrows(
91+
IllegalArgumentException.class,
92+
() -> WildcardReplaceUtils.validatePatterns("CLERK*", "EMPLOYEE**"));
93+
assertEquals("Consecutive wildcards are not supported", ex.getMessage());
94+
}
95+
96+
@Test
97+
void testInvalidWildcardInReplacementOnly() {
98+
IllegalArgumentException ex =
99+
assertThrows(
100+
IllegalArgumentException.class,
101+
() -> WildcardReplaceUtils.validatePatterns("CLERK", "EMPLOYEE*"));
102+
assertEquals(
103+
"If replacement contains wildcard, pattern must contain wildcard", ex.getMessage());
104+
}
105+
106+
@Test
107+
void testInvalidWildcardCount() {
108+
IllegalArgumentException ex =
109+
assertThrows(
110+
IllegalArgumentException.class,
111+
() -> WildcardReplaceUtils.validatePatterns("TEST*", "NEW*TEXT*"));
112+
assertEquals(
113+
"Number of wildcards in replacement cannot exceed number of wildcards in pattern",
114+
ex.getMessage());
115+
}
116+
117+
@Test
118+
void testValidComplexPatterns() {
119+
// Pattern has more wildcards than replacement
120+
WildcardReplaceUtils.validatePatterns("*TEST*END*", "*NEW*");
121+
WildcardReplaceUtils.validatePatterns("*PRE*MID*", "START*END");
122+
WildcardReplaceUtils.validatePatterns("*START*END*", "*REPLACE");
123+
}
124+
125+
@Test
126+
void testSpecialCharactersInPatterns() {
127+
assertEquals("^\\Q$TEST\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("$TEST*"));
128+
assertEquals("(.*)\\Q[TEST]\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("*[TEST]*"));
129+
assertEquals("(.*)\\Q.TEST.\\E$", WildcardReplaceUtils.convertToRegexPattern("*.TEST."));
130+
}
131+
132+
@Test
133+
void testEmptyPatternWithWildcard() {
134+
// Test single wildcard pattern first
135+
assertEquals("(.*)", WildcardReplaceUtils.convertToRegexPattern("*"));
136+
137+
// Test wildcard at start
138+
assertEquals("(.*)\\Qa\\E$", WildcardReplaceUtils.convertToRegexPattern("*a"));
139+
140+
// Test wildcard at end
141+
assertEquals("^\\Qa\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("a*"));
142+
143+
// Test consecutive wildcards - should throw exception
144+
assertThrows(
145+
IllegalArgumentException.class, () -> WildcardReplaceUtils.validatePatterns("**", "TEST"));
146+
}
147+
148+
@Test
149+
void testEdgeCasePatterns() {
150+
// Single character patterns
151+
WildcardReplaceUtils.validatePatterns("a*", "b*");
152+
WildcardReplaceUtils.validatePatterns("*a", "*b");
153+
WildcardReplaceUtils.validatePatterns("*a*", "*b*");
154+
155+
// Empty patterns
156+
WildcardReplaceUtils.validatePatterns("", "");
157+
WildcardReplaceUtils.validatePatterns("*", "text");
158+
}
159+
}

0 commit comments

Comments
 (0)