Skip to content

Commit 67f3b96

Browse files
maskri17copybara-github
authored andcommitted
Aligning replace function in Regex ext with Google SQL
PiperOrigin-RevId: 773110937
1 parent 6d92e81 commit 67f3b96

3 files changed

Lines changed: 94 additions & 37 deletions

File tree

extensions/src/main/java/dev/cel/extensions/CelRegexExtensions.java

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ enum Function {
7878
String pattern = (String) args[1];
7979
String replaceStr = (String) args[2];
8080
long count = (long) args[3];
81-
return CelRegexExtensions.replace(target, pattern, replaceStr, count);
81+
return CelRegexExtensions.replaceN(target, pattern, replaceStr, count);
8282
}))),
8383
EXTRACT(
8484
CelFunctionDecl.newFunctionDeclaration(
@@ -153,18 +153,16 @@ private static Pattern compileRegexPattern(String regex) {
153153
}
154154

155155
private static String replace(String target, String regex, String replaceStr) {
156-
Pattern pattern = compileRegexPattern(regex);
157-
Matcher matcher = pattern.matcher(target);
158-
return matcher.replaceAll(replaceStr);
156+
return replaceN(target, regex, replaceStr, -1);
159157
}
160158

161-
private static String replace(String target, String regex, String replaceStr, long replaceCount) {
162-
Pattern pattern = compileRegexPattern(regex);
163-
159+
private static String replaceN(
160+
String target, String regex, String replaceStr, long replaceCount) {
164161
if (replaceCount == 0) {
165162
return target;
166163
}
167164

165+
Pattern pattern = compileRegexPattern(regex);
168166
Matcher matcher = pattern.matcher(target);
169167
StringBuffer sb = new StringBuffer();
170168
int counter = 0;
@@ -173,14 +171,54 @@ private static String replace(String target, String regex, String replaceStr, lo
173171
if (replaceCount != -1 && counter >= replaceCount) {
174172
break;
175173
}
176-
matcher.appendReplacement(sb, replaceStr);
174+
175+
String processedReplacement = replaceStrValidator(matcher, replaceStr);
176+
matcher.appendReplacement(sb, Matcher.quoteReplacement(processedReplacement));
177177
counter++;
178178
}
179179
matcher.appendTail(sb);
180180

181181
return sb.toString();
182182
}
183183

184+
private static String replaceStrValidator(Matcher matcher, String replacement) {
185+
StringBuilder sb = new StringBuilder();
186+
for (int i = 0; i < replacement.length(); i++) {
187+
char c = replacement.charAt(i);
188+
if (c == '\\') {
189+
if (i + 1 < replacement.length()) {
190+
char nextChar = replacement.charAt(++i);
191+
if (Character.isDigit(nextChar)) {
192+
int groupNum = Character.getNumericValue(nextChar);
193+
int groupCount = matcher.groupCount();
194+
if (groupNum > groupCount) {
195+
throw new IllegalArgumentException(
196+
"Replacement string references group "
197+
+ groupNum
198+
+ " but regex has only "
199+
+ groupCount
200+
+ " group(s)");
201+
}
202+
String groupValue = matcher.group(groupNum);
203+
if (groupValue != null) {
204+
sb.append(groupValue);
205+
}
206+
} else if (nextChar == '\\') {
207+
sb.append('\\');
208+
} else {
209+
throw new IllegalArgumentException(
210+
"Invalid replacement string: \\ must be followed by a digit");
211+
}
212+
} else {
213+
throw new IllegalArgumentException("Invalid replacement string: \\ not allowed at end");
214+
}
215+
} else {
216+
sb.append(c);
217+
}
218+
}
219+
return sb.toString();
220+
}
221+
184222
private static Optional<String> extract(String target, String regex) {
185223
Pattern pattern = compileRegexPattern(regex);
186224
Matcher matcher = pattern.matcher(target);
@@ -215,11 +253,12 @@ private static ImmutableList<String> extractAll(String target, String regex) {
215253
while (matcher.find()) {
216254
if (hasOneGroup) {
217255
String group = matcher.group(1);
218-
// Add the captured group's content only if it's not null (e.g. optional group didn't match)
256+
// Add the captured group's content only if it's not null
219257
if (group != null) {
220258
builder.add(group);
221259
}
222-
} else { // No capturing groups (matcher.groupCount() == 0)
260+
} else {
261+
// No capturing groups
223262
builder.add(matcher.group(0));
224263
}
225264
}

extensions/src/main/java/dev/cel/extensions/README.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -717,12 +717,14 @@ chance for collision.
717717

718718
### Replace
719719

720-
The `regex.replace` function replaces all occurrences of a regex pattern in a
721-
string with a replacement string. Optionally, you can limit the number of
722-
replacements by providing a count argument. Both numeric ($N) and named
723-
(${name}) capture group references are supported in the replacement string, with
724-
validation for correctness. An error will be thrown for invalid regex or replace
725-
string.
720+
The `regex.replace` function replaces all non-overlapping substring of a regex
721+
pattern in the target string with a replacement string. Optionally, you can
722+
limit the number of replacements by providing a count argument. Only numeric
723+
(\N) capture group references are supported in the replacement string, with
724+
validation for correctness. Backslashed-escaped digits (\1 to \9) within the
725+
replacement argument can be used to insert text matching the corresponding
726+
parenthesized group in the regexp pattern. An error will be thrown for invalid
727+
regex or replace string.
726728

727729
```
728730
regex.replace(target: string, pattern: string, replacement: string) -> string
@@ -735,11 +737,11 @@ Examples:
735737
regex.replace('banana', 'a', 'x', 0) == 'banana'
736738
regex.replace('banana', 'a', 'x', 1) == 'bxnana'
737739
regex.replace('banana', 'a', 'x', 2) == 'bxnxna'
738-
regex.replace('foo bar', '(fo)o (ba)r', '$2 $1') == 'ba fo'
740+
regex.replace('foo bar', '(fo)o (ba)r', '\\2 \\1') == 'ba fo'
739741
740742
regex.replace('test', '(.)', '$2') \\ Runtime Error invalid replace string
741743
regex.replace('foo bar', '(', '$2 $1') \\ Runtime Error invalid regex string
742-
regex.replace('id=123', 'id=(?P<value>\\\\d+)', 'value: ${values}') \\ Runtime Error invalid replace string
744+
regex.replace('id=123', 'id=(?P<value>\\\\d+)', 'value: \\values') \\ Runtime Error invalid replace string
743745
744746
```
745747

extensions/src/test/java/dev/cel/extensions/CelRegexExtensionsTest.java

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,31 @@ public final class CelRegexExtensionsTest {
3939
CelRuntimeFactory.standardCelRuntimeBuilder().addLibraries(CelExtensions.regex()).build();
4040

4141
@Test
42-
@TestParameters("{target: 'foo bar', regex: '(fo)o (ba)r', replaceStr: '$2 $1', res: 'ba fo'}")
42+
@TestParameters("{target: 'abc', regex: '^', replaceStr: 'start_', res: 'start_abc'}")
43+
@TestParameters("{target: 'abc', regex: '$', replaceStr: '_end', res: 'abc_end'}")
44+
@TestParameters("{target: 'a-b', regex: '\\\\b', replaceStr: '|', res: '|a|-|b|'}")
45+
@TestParameters(
46+
"{target: 'foo bar', regex: '(fo)o (ba)r', replaceStr: '\\\\2 \\\\1', res: 'ba fo'}")
47+
@TestParameters("{target: 'foo bar', regex: 'foo', replaceStr: '\\\\\\\\', res: '\\ bar'}")
4348
@TestParameters("{target: 'banana', regex: 'ana', replaceStr: 'x', res: 'bxna'}")
44-
@TestParameters("{target: 'abc', regex: 'b(.)', replaceStr: 'x$1', res: 'axc'}")
49+
@TestParameters("{target: 'abc', regex: 'b(.)', replaceStr: 'x\\\\1', res: 'axc'}")
4550
@TestParameters(
4651
"{target: 'hello world hello', regex: 'hello', replaceStr: 'hi', res: 'hi world hi'}")
52+
@TestParameters("{target: 'ac', regex: 'a(b)?c', replaceStr: '[\\\\1]', res: '[]'}")
4753
@TestParameters("{target: 'apple pie', regex: 'p', replaceStr: 'X', res: 'aXXle Xie'}")
4854
@TestParameters(
4955
"{target: 'remove all spaces', regex: '\\\\s', replaceStr: '', res: 'removeallspaces'}")
5056
@TestParameters("{target: 'digit:99919291992', regex: '\\\\d+', replaceStr: '3', res: 'digit:3'}")
5157
@TestParameters(
52-
"{target: 'foo bar baz', regex: '\\\\w+', replaceStr: '($0)', res: '(foo) (bar) (baz)'}")
58+
"{target: 'foo bar baz', regex: '\\\\w+', replaceStr: '(\\\\0)', res: '(foo) (bar) (baz)'}")
5359
@TestParameters("{target: '', regex: 'a', replaceStr: 'b', res: ''}")
5460
@TestParameters(
5561
"{target: 'User: Alice, Age: 30', regex: 'User: (?P<name>\\\\w+), Age: (?P<age>\\\\d+)',"
56-
+ " replaceStr: '${name} is ${age} years old', res: 'Alice is 30 years old'}")
62+
+ " replaceStr: '${name} is ${age} years old', res: '${name} is ${age} years old'}")
5763
@TestParameters(
58-
"{target: 'abc', regex: '(?P<letter>b)', replaceStr: '[${letter}]', res: 'a[b]c'}")
64+
"{target: 'User: Alice, Age: 30', regex: 'User: (?P<name>\\\\w+), Age: (?P<age>\\\\d+)',"
65+
+ " replaceStr: '\\\\1 is \\\\2 years old', res: 'Alice is 30 years old'}")
66+
@TestParameters("{target: 'hello ☃', regex: '☃', replaceStr: '❄', res: 'hello ❄'}")
5967
public void replaceAll_success(String target, String regex, String replaceStr, String res)
6068
throws Exception {
6169
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
@@ -70,8 +78,8 @@ public void replaceAll_success(String target, String regex, String replaceStr, S
7078
public void replace_nested_success() throws Exception {
7179
String expr =
7280
"regex.replace("
73-
+ " regex.replace('%(foo) %(bar) %2','%\\\\((\\\\w+)\\\\)','\\\\${$1}'),"
74-
+ " '%(\\\\d+)', '\\\\$$1')";
81+
+ " regex.replace('%(foo) %(bar) %2','%\\\\((\\\\w+)\\\\)','${\\\\1}'),"
82+
+ " '%(\\\\d+)', '$\\\\1')";
7583
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile(expr).getAst());
7684

7785
Object result = program.eval();
@@ -87,17 +95,16 @@ public void replace_nested_success() throws Exception {
8795
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -1, res: 'bxnxnx'}")
8896
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -100, res: 'banana'}")
8997
@TestParameters(
90-
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '$2-$1', i: 1,"
98+
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '\\\\2-\\\\1', i: 1,"
9199
+ " res: 'dog-cat dog-cat cat-dog dog-cat'}")
92100
@TestParameters(
93-
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '$2-$1', i: 2, res: 'dog-cat"
94-
+ " dog-cat dog-cat dog-cat'}")
101+
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '\\\\2-\\\\1', i: 2, res:"
102+
+ " 'dog-cat dog-cat dog-cat dog-cat'}")
95103
@TestParameters("{t: 'a.b.c', re: '\\\\.', rep: '-', i: 1, res: 'a-b.c'}")
96104
@TestParameters("{t: 'a.b.c', re: '\\\\.', rep: '-', i: -1, res: 'a-b-c'}")
97105
public void replaceCount_success(String t, String re, String rep, long i, String res)
98106
throws Exception {
99107
String expr = String.format("regex.replace('%s', '%s', '%s', %d)", t, re, rep, i);
100-
System.out.println("expr: " + expr);
101108
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile(expr).getAst());
102109

103110
Object result = program.eval();
@@ -121,23 +128,29 @@ public void replace_invalid_regex(String target, String regex, String replaceStr
121128
}
122129

123130
@Test
124-
@TestParameters("{target: 'test', regex: '(.)', replaceStr: '$2'}")
125-
public void replace_invalid_captureGroup(String target, String regex, String replaceStr)
126-
throws Exception {
131+
@TestParameters(
132+
"{target: 'test', regex: '(.)', replaceStr: '\\\\2', res: Replacement string references group"
133+
+ " 2 but regex has only 1 group(s)}")
134+
public void replace_invalid_captureGroup(
135+
String target, String regex, String replaceStr, String res) throws Exception {
127136
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
128137
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
129138

130139
CelEvaluationException e =
131140
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
132141

133-
assertThat(e).hasCauseThat().isInstanceOf(IndexOutOfBoundsException.class);
134-
assertThat(e).hasCauseThat().hasMessageThat().contains("n > number of groups");
142+
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
143+
assertThat(e).hasCauseThat().hasMessageThat().contains(res);
135144
}
136145

137146
@Test
138147
@TestParameters(
139-
"{target: 'id=123', regex: 'id=(?P<value>\\\\d+)', replaceStr: 'value: ${values}'}")
140-
public void replace_invalid_replaceStr(String target, String regex, String replaceStr)
148+
"{target: 'id=123', regex: 'id=(?P<value>\\\\d+)', replaceStr: '\\\\', res: 'Invalid"
149+
+ " replacement string: \\ not allowed at end'}")
150+
@TestParameters(
151+
"{target: 'id=123', regex: 'id=(?P<value>\\\\d+)', replaceStr: '\\\\a', res: 'Invalid"
152+
+ " replacement string: \\ must be followed by a digit'}")
153+
public void replace_invalid_replaceStr(String target, String regex, String replaceStr, String res)
141154
throws Exception {
142155
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
143156
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
@@ -146,7 +159,7 @@ public void replace_invalid_replaceStr(String target, String regex, String repla
146159
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
147160

148161
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
149-
assertThat(e).hasCauseThat().hasMessageThat().contains("group 'values' not found");
162+
assertThat(e).hasCauseThat().hasMessageThat().contains(res);
150163
}
151164

152165
@Test
@@ -208,6 +221,9 @@ private enum ExtractAllTestCase {
208221
NO_MATCH("regex.extractAll('id:123, id:456', 'assa')", ImmutableList.of()),
209222
NO_CAPTURE_GROUP(
210223
"regex.extractAll('id:123, id:456', 'id:\\\\d+')", ImmutableList.of("id:123", "id:456")),
224+
CAPTURE_GROUP(
225+
"regex.extractAll('key=\"\", key=\"val\"', 'key=\"([^\"]*)\"')",
226+
ImmutableList.of("", "val")),
211227
SINGLE_NAMED_GROUP(
212228
"regex.extractAll('testuser@testdomain', '(?P<username>.*)@')",
213229
ImmutableList.of("testuser")),

0 commit comments

Comments
 (0)