Skip to content

Commit f54cb43

Browse files
committed
string performance optimisation
1 parent 10c3f2c commit f54cb43

6 files changed

Lines changed: 88 additions & 58 deletions

File tree

src/main/java/com/dashjoin/jsonata/Functions.java

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -609,10 +609,11 @@ public static String leftPad(final String str, final int size, String padStr) {
609609
if (pads <= 0) {
610610
return str;
611611
}
612-
String padding = "";
612+
StringBuilder paddingSb = new StringBuilder();
613613
for (int i = 0; i < pads + 1; i++) {
614-
padding += padStr;
614+
paddingSb.append(padStr);
615615
}
616+
String padding = paddingSb.toString();
616617
return substr(padding, 0, pads).concat(str);
617618
}
618619

@@ -638,10 +639,11 @@ public static String rightPad(final String str, final int size, String padStr) {
638639
if (pads <= 0) {
639640
return str;
640641
}
641-
String padding = "";
642+
StringBuilder paddingSb = new StringBuilder();
642643
for (int i = 0; i < pads + 1; i++) {
643-
padding += padStr;
644+
paddingSb.append(padStr);
644645
}
646+
String padding = paddingSb.toString();
645647
return str.concat(substr(padding, 0, pads));
646648
}
647649

@@ -770,14 +772,21 @@ public static String join(List<String> strs, String separator) {
770772
return String.join(separator, strs);
771773
}
772774

775+
private static final Pattern DOLLAR_DOLLAR = Pattern.compile("\\$\\$");
776+
private static final Pattern DOLLAR_WITHOUT_ESCAPE = Pattern.compile("([^\\\\]|^)\\$([^0-9^<])");
777+
private static final Pattern DOLLAR_AT_END = Pattern.compile("\\$$");
773778
static String safeReplacement(String in) {
774779
// In JSONata and in Java the $ in the replacement test usually starts the insertion of a capturing group
775780
// In order to replace a simple $ in Java you have to escape the $ with "\$"
776781
// in JSONata you do this with a '$$'
777-
// "\$" followed any character besides '<' and and digit into $ + this character
778-
return in.replaceAll("\\$\\$", "\\\\\\$")
779-
.replaceAll("([^\\\\]|^)\\$([^0-9^<])", "$1\\\\\\$$2")
780-
.replaceAll("\\$$", "\\\\\\$"); // allow $ at end
782+
// "\$" followed any character besides '<' and and digit into $ + this character
783+
if (!in.contains("$")) {
784+
return in;
785+
}
786+
String result = DOLLAR_DOLLAR.matcher(in).replaceAll("\\\\\\$");
787+
result = DOLLAR_WITHOUT_ESCAPE.matcher(result).replaceAll("$1\\\\\\$$2");
788+
result = DOLLAR_AT_END.matcher(result).replaceAll("\\\\\\$");
789+
return result;
781790
}
782791

783792
/**
@@ -963,6 +972,12 @@ public static String base64decode(String str) {
963972
}
964973
}
965974

975+
private static final Pattern PLUS = Pattern.compile("\\+");
976+
private static final Pattern PERCENT_21 = Pattern.compile("%21");
977+
private static final Pattern PERCENT_27 = Pattern.compile("%27");
978+
private static final Pattern PERCENT_28 = Pattern.compile("%28");
979+
private static final Pattern PERCENT_29 = Pattern.compile("%29");
980+
private static final Pattern PERCENT_7E = Pattern.compile("%7E");
966981
/**
967982
* Encode a string into a component for a url
968983
* @param {String} str - String to encode
@@ -975,14 +990,20 @@ public static String encodeUrlComponent(String str) {
975990
}
976991

977992
Utils.checkUrl(str);
978-
979-
return URLEncoder.encode(str, StandardCharsets.UTF_8)
980-
.replaceAll("\\+", "%20")
981-
.replaceAll("\\%21", "!")
982-
.replaceAll("\\%27", "'")
983-
.replaceAll("\\%28", "(")
984-
.replaceAll("\\%29", ")")
985-
.replaceAll("\\%7E", "~");
993+
994+
String encoded = URLEncoder.encode(str, StandardCharsets.UTF_8);
995+
996+
if (!encoded.contains("+") && !encoded.contains("%")) {
997+
return encoded;
998+
}
999+
1000+
encoded = PLUS.matcher(encoded).replaceAll("%20");
1001+
encoded = PERCENT_21.matcher(encoded).replaceAll("!");
1002+
encoded = PERCENT_27.matcher(encoded).replaceAll("'");
1003+
encoded = PERCENT_28.matcher(encoded).replaceAll("(");
1004+
encoded = PERCENT_29.matcher(encoded).replaceAll(")");
1005+
encoded = PERCENT_7E.matcher(encoded).replaceAll("~");
1006+
return encoded;
9861007
}
9871008

9881009
/**

src/main/java/com/dashjoin/jsonata/Parser.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -590,17 +590,18 @@ Symbol led(Symbol left) {
590590
// is the next token a '<' - if so, parse the function signature
591591
if (node.id.equals("<")) {
592592
int depth = 1;
593-
String sig = "<";
593+
StringBuilder sigBuilder = new StringBuilder("<");
594594
while (depth > 0 && !node.id.equals("{") && !node.id.equals("(end)")) {
595595
Symbol tok = advance();
596596
if (tok.id.equals(">")) {
597597
depth--;
598598
} else if (tok.id.equals("<")) {
599599
depth++;
600600
}
601-
sig += tok.value;
601+
sigBuilder.append(tok.value);
602602
}
603603
advance(">");
604+
String sig = sigBuilder.toString();
604605
this.signature = new Signature(sig, "lambda");
605606
}
606607
// parse the function body

src/main/java/com/dashjoin/jsonata/Tokenizer.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ public class Tokenizer { // = function (path) {
8383
put("t", "\t");
8484
}};
8585

86+
private static final Pattern NUM_REGEX = Pattern.compile("^-?(0|([1-9][0-9]*))(\\.[0-9]+)?([Ee][-+]?[0-9]+)?");
8687
// Tokenizer (lexer) - invoked by the parser to return one token at a time
8788
String path;
8889
int position = 0;
@@ -262,20 +263,20 @@ Token next(boolean prefix) {
262263
char quoteType = currentChar;
263264
// double quoted string literal - find end of string
264265
position++;
265-
var qstr = "";
266+
var qstr = new StringBuilder();
266267
while (position < length) {
267268
currentChar = path.charAt(position);
268269
if (currentChar == '\\') { // escape sequence
269270
position++;
270271
if (position < path.length()) currentChar = path.charAt(position); else throw new JException("S0103", position, "");
271272
if (escapes.get(""+currentChar)!=null) {
272-
qstr += escapes.get(""+currentChar);
273+
qstr.append(escapes.get(""+currentChar));
273274
} else if (currentChar == 'u') {
274275
// u should be followed by 4 hex digits
275276
String octets = position+5 < path.length() ? path.substring(position + 1, (position + 1) + 4) : "";
276277
if (octets.matches("^[0-9a-fA-F]+$")) { // /^[0-9a-fA-F]+$/.test(octets)) {
277278
int codepoint = Integer.parseInt(octets, 16);
278-
qstr += Character.toString((char) codepoint);
279+
qstr.append((char) codepoint);
279280
position += 4;
280281
} else {
281282
throw new JException("S0104", position);
@@ -287,17 +288,16 @@ Token next(boolean prefix) {
287288
}
288289
} else if (currentChar == quoteType) {
289290
position++;
290-
return create("string", qstr);
291+
return create("string", qstr.toString());
291292
} else {
292-
qstr += currentChar;
293+
qstr.append(currentChar);
293294
}
294295
position++;
295296
}
296297
throw new JException("S0101", position);
297298
}
298299
// test for numbers
299-
Pattern numregex = Pattern.compile("^-?(0|([1-9][0-9]*))(\\.[0-9]+)?([Ee][-+]?[0-9]+)?");
300-
Matcher match = numregex.matcher(path.substring(position));
300+
Matcher match = NUM_REGEX.matcher(path.substring(position));
301301
if (match.find()) {
302302
double num = Double.parseDouble(match.group(0));
303303
if (!Double.isNaN(num) && Double.isFinite(num)) {

src/main/java/com/dashjoin/jsonata/json/JsonParser.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -344,22 +344,32 @@ private void readEscape() throws IOException {
344344
captureBuffer.append('\t');
345345
break;
346346
case 'u':
347-
char[] hexChars = new char[4];
347+
int value = 0;
348348
for (int i = 0; i < 4; i++) {
349349
read();
350350
if (!isHexDigit()) {
351351
throw expected("hexadecimal digit");
352352
}
353-
hexChars[i] = (char)current;
353+
value = (value << 4) | hexCharToValue((char) current);
354354
}
355-
captureBuffer.append((char)Integer.parseInt(new String(hexChars), 16));
355+
captureBuffer.append((char) value);
356356
break;
357357
default:
358358
throw expected("valid escape sequence");
359359
}
360360
read();
361361
}
362362

363+
private int hexCharToValue(char c) {
364+
if (c >= '0' && c <= '9') {
365+
return c - '0';
366+
} else if (c >= 'A' && c <= 'F') {
367+
return c - 'A' + 10;
368+
} else { // c >= 'a' && c <= 'f'
369+
return c - 'a' + 10;
370+
}
371+
}
372+
363373
private void readNumber() throws IOException {
364374
handler.startNumber();
365375
startCapture();

src/main/java/com/dashjoin/jsonata/utils/DateTimeUtils.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,9 @@ private static String lookup(long num, boolean prev, boolean ord) {
174174
wordValuesLong.put(lword + "th", val);
175175
}
176176
}
177-
177+
private static final Pattern SPLIT_PATTERN = Pattern.compile(",\\s|\\sand\\s|[\\s\\-]");
178178
public static int wordsToNumber(String text) {
179-
String[] parts = text.split(",\\s|\\sand\\s|[\\s\\-]");
179+
String[] parts = SPLIT_PATTERN.split(text);
180180
Integer[] values = new Integer[parts.length];
181181
for (int i = 0; i < parts.length; i++) {
182182
values[i] = wordValues.get(parts[i]);
@@ -202,7 +202,7 @@ public static int wordsToNumber(String text) {
202202
* long version of above
203203
*/
204204
public static long wordsToLong(String text) {
205-
String[] parts = text.split(",\\s|\\sand\\s|[\\s\\-]");
205+
String[] parts = SPLIT_PATTERN.split(text);
206206
Long[] values = new Long[parts.length];
207207
for (int i = 0; i < parts.length; i++) {
208208
values[i] = wordValuesLong.get(parts[i]);
@@ -788,16 +788,15 @@ public static String formatDateTime(long millis, String picture, String timezone
788788

789789
int offsetMillis = (60 * offsetHours + offsetMinutes) * 60 * 1000;
790790
LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(millis + offsetMillis), ZoneOffset.UTC);
791-
String result = "";
791+
StringBuilder resultBuilder = new StringBuilder();
792792
for (SpecPart part : formatSpec.parts) {
793793
if (part.type.equals("literal")) {
794-
result += part.value;
794+
resultBuilder.append(part.value);
795795
} else {
796-
result += formatComponent(dateTime, part, offsetHours, offsetMinutes);
796+
resultBuilder.append(formatComponent(dateTime, part, offsetHours, offsetMinutes));
797797
}
798798
}
799-
800-
return result;
799+
return resultBuilder.toString();
801800
}
802801

803802
private static String formatComponent(LocalDateTime date, SpecPart markerSpec, int offsetHours, int offsetMinutes) {
@@ -938,11 +937,12 @@ private static String getDateTimeFragment(LocalDateTime date, Character componen
938937
public static Long parseDateTime(String timestamp, String picture) {
939938
PictureFormat formatSpec = analyseDateTimePicture(picture);
940939
PictureMatcher matchSpec = generateRegex(formatSpec);
941-
String fullRegex = "^";
940+
StringBuilder fullRegexBuilder = new StringBuilder("^");
942941
for (MatcherPart part : matchSpec.parts) {
943-
fullRegex += "(" + part.regex + ")";
942+
fullRegexBuilder.append("(").append(part.regex).append(")");
944943
}
945-
fullRegex += "$";
944+
fullRegexBuilder.append("$");
945+
String fullRegex = fullRegexBuilder.toString();
946946
Pattern pattern = Pattern.compile(fullRegex, Pattern.CASE_INSENSITIVE);
947947
Matcher matcher = pattern.matcher(timestamp);
948948
if (matcher.find()) {
@@ -1060,13 +1060,13 @@ private static boolean isType(int type, int mask) {
10601060
return ((~type & mask) == 0) && (type & mask) != 0;
10611061
}
10621062

1063+
private static final Pattern LITERAL_ESCAPE_PATTERN = Pattern.compile("[.*+?^${}()|\\[\\]\\\\]");
10631064
private static PictureMatcher generateRegex(PictureFormat formatSpec) {
10641065
PictureMatcher matcher = new PictureMatcher();
10651066
for (final SpecPart part : formatSpec.parts) {
10661067
MatcherPart res;
10671068
if (part.type.equals("literal")) {
1068-
Pattern p = Pattern.compile("[.*+?^${}()|\\[\\]\\\\]");
1069-
Matcher m = p.matcher(part.value);
1069+
Matcher m = LITERAL_ESCAPE_PATTERN.matcher(part.value);
10701070

10711071
String regex = m.replaceAll("\\\\$0");
10721072
res = new MatcherPart(regex) {

src/main/java/com/dashjoin/jsonata/utils/Signature.java

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -298,13 +298,13 @@ void throwValidationError(List<?> badArgs, String badSig, String functionName) {
298298
@SuppressWarnings({"rawtypes", "unchecked"})
299299
public Object validate(Object _args, Object context) {
300300

301-
var result = new ArrayList<>();
302-
303301
var args = (List)_args;
304-
String suppliedSig = "";
305-
for (Object arg : args)
306-
suppliedSig += getSymbol(arg);
307-
302+
StringBuilder sigBuilder = new StringBuilder(args.size());
303+
for (Object arg : args) {
304+
sigBuilder.append(getSymbol(arg));
305+
}
306+
String suppliedSig = sigBuilder.toString();
307+
308308
Matcher isValid = _regex.matcher(suppliedSig);
309309
if (isValid != null && isValid.matches()) {
310310
var validatedArgs = new ArrayList<>();
@@ -337,21 +337,21 @@ public Object validate(Object _args, Object context) {
337337
} else {
338338
// may have matched multiple args (if the regex ends with a '+'
339339
// split into single tokens
340-
String[] singles = match.split("");
341-
for (String single : singles) {
340+
char[] singles = match.toCharArray();
341+
for (char single : singles) {
342342
//match.split('').forEach(function (single) {
343343
if (param.type.equals("a")) {
344-
if (single.equals("m")) {
344+
if (single == 'm') {
345345
// missing (undefined)
346346
arg = null;
347347
} else {
348348
arg = argIndex <args.size() ? args.get(argIndex) : null;
349349
var arrayOK = true;
350350
// is there type information on the contents of the array?
351351
if (param.subtype != null) {
352-
if (!single.equals("a") && !match.equals(param.subtype)) {
352+
if (single != 'a' && !match.equals(param.subtype)) {
353353
arrayOK = false;
354-
} else if (single.equals("a")) {
354+
} else if (single == 'a') {
355355
List argArr = (List)arg;
356356
if (argArr.size() > 0) {
357357
var itemType = getSymbol(argArr.get(0));
@@ -377,24 +377,22 @@ public Object validate(Object _args, Object context) {
377377
);
378378
}
379379
// the function expects an array. If it's not one, make it so
380-
if (!single.equals("a")) {
380+
if (single != 'a') {
381381
List _arg = new ArrayList<>(); _arg.add(arg);
382382
arg = _arg;
383383
}
384384
}
385-
validatedArgs.add(arg);
386-
argIndex++;
387385
} else {
388386
arg = argIndex<args.size() ? args.get(argIndex) : null;
389-
validatedArgs.add(arg);
390-
argIndex++;
391387
}
388+
validatedArgs.add(arg);
389+
argIndex++;
392390
}
393391
}
394392
index++;
395393
}
396394
return validatedArgs;
397-
}
395+
}
398396
throwValidationError(args, suppliedSig, functionName);
399397
return null; // dead code -> compiler happy
400398
}

0 commit comments

Comments
 (0)