diff --git a/src/main/java/com/googlecode/aviator/code/OptimizeCodeGenerator.java b/src/main/java/com/googlecode/aviator/code/OptimizeCodeGenerator.java index e9b316d0..ddfbfb4e 100644 --- a/src/main/java/com/googlecode/aviator/code/OptimizeCodeGenerator.java +++ b/src/main/java/com/googlecode/aviator/code/OptimizeCodeGenerator.java @@ -591,7 +591,7 @@ private void callASM(final Map variables, case Lambda_New: this.codeGen.genNewLambdaCode(delegateToken.getLambdaFunctionBootstrap()); break; - case Ternay_End: + case Ternary_End: this.codeGen.onTernaryEnd(realToken); break; } @@ -870,7 +870,7 @@ public void onTernaryRight(final Token lookahead) { @Override public void onTernaryEnd(final Token lookahead) { - this.tokenList.add(new DelegateToken(lookahead, DelegateTokenType.Ternay_End)); + this.tokenList.add(new DelegateToken(lookahead, DelegateTokenType.Ternary_End)); } diff --git a/src/main/java/com/googlecode/aviator/lexer/ExpressionLexer.java b/src/main/java/com/googlecode/aviator/lexer/ExpressionLexer.java index d4208864..e7564c37 100644 --- a/src/main/java/com/googlecode/aviator/lexer/ExpressionLexer.java +++ b/src/main/java/com/googlecode/aviator/lexer/ExpressionLexer.java @@ -115,17 +115,20 @@ public void prevChar() { this.peek = this.iterator.previous(); } - static final char[] VALID_HEX_CHAR = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'a', - 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'F', 'f'}; + // Character constants for lexical analysis + private static final String HEX_CHARS = "0123456789AaBbCcDdEeFf"; + private static final String OPERATOR_CHARS = "=><+-*/%!&|"; + + /** Suffix for BigInteger literals, e.g., 100N */ + static final char BIGINT_SUFFIX = 'N'; + /** Suffix for BigDecimal literals, e.g., 3.14M */ + static final char DECIMAL_SUFFIX = 'M'; + /** Hex number prefix after '0', e.g., 0xFF */ + static final char HEX_PREFIX = 'x'; public boolean isValidHexChar(final char ch) { - for (char c : VALID_HEX_CHAR) { - if (c == ch) { - return true; - } - } - return false; + return HEX_CHARS.indexOf(ch) >= 0; } @@ -136,11 +139,12 @@ public int getCurrentIndex() { public Token scan(final boolean analyse) { - // If buffer is not empty,return + // If buffer is not empty, return buffered token if (this.tokenBuffer != null && !this.tokenBuffer.isEmpty()) { return this.tokenBuffer.pop(); } - // Skip white space or line + + // Skip whitespace or return raw char when not analysing for (;; nextChar()) { if (this.peek == CharacterIterator.DONE) { return null; @@ -162,283 +166,380 @@ public Token scan(final boolean analyse) { } } - // if it is a hex digit - if (Character.isDigit(this.peek) && this.peek == '0') { - nextChar(); - if (this.peek == 'x' || this.peek == 'X') { - nextChar(); - StringBuilder sb = new StringBuilder(); - int startIndex = this.iterator.getIndex() - 2; - long value = 0L; - do { - sb.append(this.peek); - value = 16 * value + Character.digit(this.peek, 16); - nextChar(); - } while (isValidHexChar(this.peek)); - return new NumberToken(value, sb.toString(), this.lineNo, startIndex); - } else { - prevChar(); - } + // Try each token type in order + Token token; + + if ((token = scanHexNumber()) != null) { + return token; + } + if ((token = scanNumber()) != null) { + return token; + } + if ((token = scanQuoteVariable(analyse)) != null) { + return token; + } + if ((token = scanVariable()) != null) { + return token; + } + if ((token = scanOperator()) != null) { + return token; + } + if ((token = scanString()) != null) { + return token; + } + + // Fallback: return single character token (but not DONE) + if (this.peek == CharacterIterator.DONE) { + return null; + } + token = new CharToken(this.peek, this.lineNo, this.iterator.getIndex()); + nextChar(); + return token; + } + + + /** + * Scan hexadecimal number (0x...). + * + * @return NumberToken if hex number found, null otherwise + */ + private Token scanHexNumber() { + if (!(Character.isDigit(this.peek) && this.peek == '0')) { + return null; } - // If it is a digit - if (Character.isDigit(this.peek) || this.peek == '.') { + nextChar(); + if (this.peek == HEX_PREFIX || this.peek == 'X') { + nextChar(); StringBuilder sb = new StringBuilder(); - int startIndex = this.iterator.getIndex(); - long lval = 0L; - - double dval = 0d; - boolean hasDot = false; - double d = 10.0; - boolean isBigInt = false; - boolean isBigDecimal = false; - boolean scientificNotation = false; - boolean negExp = false; - boolean isOverflow = false; + int startIndex = this.iterator.getIndex() - 2; + long value = 0L; do { sb.append(this.peek); - if (this.peek == '.') { - if (scientificNotation) { - throw new CompileExpressionErrorException( - "Illegal number " + sb + " at " + this.iterator.getIndex()); - } - if (hasDot) { - throw new CompileExpressionErrorException( - "Illegal Number " + sb + " at " + this.iterator.getIndex()); - } else { - hasDot = true; - nextChar(); - } + value = 16 * value + Character.digit(this.peek, 16); + nextChar(); + } while (isValidHexChar(this.peek)); + return new NumberToken(value, sb.toString(), this.lineNo, startIndex); + } else { + prevChar(); + return null; + } + } - } else if (this.peek == 'N') { - // big integer - if (hasDot) { - throw new CompileExpressionErrorException( - "Illegal number " + sb + " at " + this.iterator.getIndex()); - } - isBigInt = true; - nextChar(); - break; - } else if (this.peek == 'M') { - isBigDecimal = true; + + /** + * Scan decimal number (integer, float, scientific notation, BigInt, BigDecimal). + * + * @return NumberToken or CharToken('.') if found, null otherwise + */ + private Token scanNumber() { + if (!(Character.isDigit(this.peek) || this.peek == '.')) { + return null; + } + + StringBuilder sb = new StringBuilder(); + int startIndex = this.iterator.getIndex(); + long lval = 0L; + double dval = 0d; + double d = 10.0; + boolean hasDot = false; + boolean isBigInt = false; + boolean isBigDecimal = false; + boolean scientificNotation = false; + boolean negExp = false; + boolean isOverflow = false; + + do { + sb.append(this.peek); + + if (this.peek == '.') { + if (scientificNotation) { + throw new CompileExpressionErrorException( + "Illegal number " + sb + " at " + this.iterator.getIndex()); + } + if (hasDot) { + throw new CompileExpressionErrorException( + "Illegal Number " + sb + " at " + this.iterator.getIndex()); + } + hasDot = true; + nextChar(); + + } else if (this.peek == BIGINT_SUFFIX) { + if (hasDot) { + throw new CompileExpressionErrorException( + "Illegal number " + sb + " at " + this.iterator.getIndex()); + } + isBigInt = true; + nextChar(); + break; + + } else if (this.peek == DECIMAL_SUFFIX) { + isBigDecimal = true; + nextChar(); + break; + + } else if (this.peek == 'e' || this.peek == 'E') { + if (scientificNotation) { + throw new CompileExpressionErrorException( + "Illegal number " + sb + " at " + this.iterator.getIndex()); + } + scientificNotation = true; + nextChar(); + if (this.peek == '-') { + negExp = true; + sb.append(this.peek); nextChar(); - break; - } else if (this.peek == 'e' || this.peek == 'E') { - if (scientificNotation) { - throw new CompileExpressionErrorException( - "Illegal number " + sb + " at " + this.iterator.getIndex()); - } - scientificNotation = true; + } + + } else { + int digit = Character.digit(this.peek, 10); + if (scientificNotation) { + int n = digit; nextChar(); - if (this.peek == '-') { - negExp = true; + while (Character.isDigit(this.peek)) { sb.append(this.peek); + n = 10 * n + Character.digit(this.peek, 10); nextChar(); } - } else { - int digit = Character.digit(this.peek, 10); - if (scientificNotation) { - int n = digit; - nextChar(); - while (Character.isDigit(this.peek)) { - sb.append(this.peek); - n = 10 * n + Character.digit(this.peek, 10); - nextChar(); + while (n-- > 0) { + if (negExp) { + dval = dval / 10; + } else { + dval = 10 * dval; } - while (n-- > 0) { - if (negExp) { - dval = dval / 10; - } else { - dval = 10 * dval; - } - } - hasDot = true; - } else if (hasDot) { - dval = dval + digit / d; - d = d * 10; - nextChar(); - } else { - if (!isOverflow - && (lval > OVERFLOW_FLAG || (lval == OVERFLOW_FLAG && digit > OVERFLOW_SINGLE))) { - isOverflow = true; - } - lval = 10 * lval + digit; - dval = 10 * dval + digit; - nextChar(); } - - } - - } while (Character.isDigit(this.peek) || this.peek == '.' || this.peek == 'E' - || this.peek == 'e' || this.peek == 'M' || this.peek == 'N'); - - Number value; - if (isBigDecimal) { - value = new BigDecimal(getBigNumberLexeme(sb), this.mathContext); - } else if (isBigInt) { - value = new BigInteger(getBigNumberLexeme(sb)); - } else if (hasDot) { - if (this.parseFloatIntoDecimal && sb.length() > 1) { - value = new BigDecimal(sb.toString(), this.mathContext); - } else if (sb.length() == 1) { - // only have a dot character. - return new CharToken('.', this.lineNo, startIndex); - } else { - value = dval; - } - } else { - if (this.parseIntegralNumberIntoDecimal) { - // we make integral number as a BigDecimal. - value = new BigDecimal(sb.toString(), this.mathContext); + hasDot = true; + } else if (hasDot) { + dval = dval + digit / d; + d = d * 10; + nextChar(); } else { - // The long value is overflow, we should prompt it to be a BigInteger. - if (isOverflow) { - value = new BigInteger(sb.toString()); - } else { - value = lval; + if (!isOverflow + && (lval > OVERFLOW_FLAG || (lval == OVERFLOW_FLAG && digit > OVERFLOW_SINGLE))) { + isOverflow = true; } + lval = 10 * lval + digit; + dval = 10 * dval + digit; + nextChar(); } } - String lexeme = sb.toString(); - if (isBigDecimal || isBigInt) { - lexeme = lexeme.substring(0, lexeme.length() - 1); + + } while (Character.isDigit(this.peek) || this.peek == '.' || this.peek == 'E' + || this.peek == 'e' || this.peek == DECIMAL_SUFFIX || this.peek == BIGINT_SUFFIX); + + // Build final number value + Number value; + if (isBigDecimal) { + value = new BigDecimal(getBigNumberLexeme(sb), this.mathContext); + } else if (isBigInt) { + value = new BigInteger(getBigNumberLexeme(sb)); + } else if (hasDot) { + if (this.parseFloatIntoDecimal && sb.length() > 1) { + value = new BigDecimal(sb.toString(), this.mathContext); + } else if (sb.length() == 1) { + // Only a dot character + return new CharToken('.', this.lineNo, startIndex); + } else { + value = dval; + } + } else { + if (this.parseIntegralNumberIntoDecimal) { + value = new BigDecimal(sb.toString(), this.mathContext); + } else if (isOverflow) { + value = new BigInteger(sb.toString()); + } else { + value = lval; } - return new NumberToken(value, lexeme, this.lineNo, startIndex); } - // It is a variable - if (this.peek == '#') { - int startIndex = this.iterator.getIndex(); - nextChar(); // skip '#' - boolean hasBackquote = false; + String lexeme = sb.toString(); + if (isBigDecimal || isBigInt) { + lexeme = lexeme.substring(0, lexeme.length() - 1); + } + return new NumberToken(value, lexeme, this.lineNo, startIndex); + } - if (this.peek == '#') { - // ## comments - while (this.peek != CharacterIterator.DONE && this.peek != '\n') { - nextChar(); - } - return this.scan(analyse); - } else if (this.peek == '`') { - hasBackquote = true; + + /** + * Scan quote variable (#var or #`var`). + * + * @param analyse whether to analyse (for recursive call on comment) + * @return Variable token if found, null otherwise + */ + private Token scanQuoteVariable(final boolean analyse) { + if (this.peek != '#') { + return null; + } + + int startIndex = this.iterator.getIndex(); + nextChar(); // skip '#' + + // ## is a comment + if (this.peek == '#') { + while (this.peek != CharacterIterator.DONE && this.peek != '\n') { nextChar(); } + return this.scan(analyse); + } - StringBuilder sb = new StringBuilder(); + // Check for backquote form #`...` + boolean hasBackquote = false; + if (this.peek == '`') { + hasBackquote = true; + nextChar(); + } - if (hasBackquote) { - while (this.peek != '`') { - if (this.peek == CharacterIterator.DONE) { - throw new CompileExpressionErrorException( - "EOF while reading string at index: " + this.iterator.getIndex()); - } - sb.append(this.peek); - nextChar(); + StringBuilder sb = new StringBuilder(); + + if (hasBackquote) { + while (this.peek != '`') { + if (this.peek == CharacterIterator.DONE) { + throw new CompileExpressionErrorException( + "EOF while reading string at index: " + this.iterator.getIndex()); } - // skip '`' + sb.append(this.peek); nextChar(); - } else { - while (Character.isJavaIdentifierPart(this.peek) || this.peek == '.' || this.peek == '[' - || this.peek == ']') { - sb.append(this.peek); - nextChar(); - } - } - String lexeme = sb.toString(); - if (lexeme.isEmpty()) { - throw new ExpressionSyntaxErrorException("Blank variable name after '#'"); } - Variable variable = new Variable(lexeme, this.lineNo, startIndex); - variable.setQuote(true); - return this.symbolTable.reserve(variable); - } - if (Character.isJavaIdentifierStart(this.peek)) { - int startIndex = this.iterator.getIndex(); - StringBuilder sb = new StringBuilder(); - do { + nextChar(); // skip closing '`' + } else { + while (Character.isJavaIdentifierPart(this.peek) || this.peek == '.' || this.peek == '[' + || this.peek == ']') { sb.append(this.peek); nextChar(); - } while (Character.isJavaIdentifierPart(this.peek) || this.peek == '.'); - String lexeme = sb.toString(); - Variable variable = new Variable(lexeme, this.lineNo, startIndex); - return this.symbolTable.reserve(variable); + } } - if (isBinaryOP(this.peek)) { - CharToken opToken = new CharToken(this.peek, this.lineNo, this.iterator.getIndex()); + String lexeme = sb.toString(); + if (lexeme.isEmpty()) { + throw new ExpressionSyntaxErrorException("Blank variable name after '#'"); + } + + Variable variable = new Variable(lexeme, this.lineNo, startIndex); + variable.setQuote(true); + return this.symbolTable.reserve(variable); + } + + + /** + * Scan normal variable/identifier. + * + * @return Variable token if found, null otherwise + */ + private Token scanVariable() { + if (!Character.isJavaIdentifierStart(this.peek)) { + return null; + } + + int startIndex = this.iterator.getIndex(); + StringBuilder sb = new StringBuilder(); + boolean hasDot = false; + + do { + if (this.peek == '.') { + hasDot = true; + } + sb.append(this.peek); nextChar(); - return opToken; + // Only allow [] after a dot has been seen (property access syntax) + } while (Character.isJavaIdentifierPart(this.peek) || this.peek == '.' + || (hasDot && (this.peek == '[' || this.peek == ']'))); + + String lexeme = sb.toString(); + Variable variable = new Variable(lexeme, this.lineNo, startIndex); + return this.symbolTable.reserve(variable); + } + + + /** + * Scan operator character. + * + * @return CharToken if operator found, null otherwise + */ + private Token scanOperator() { + if (!isBinaryOP(this.peek)) { + return null; } - // String - if (this.peek == '"' || this.peek == '\'') { - char left = this.peek; - int startIndex = this.iterator.getIndex(); - StringBuilder sb = new StringBuilder(); - boolean hasInterpolation = false; - // char prev = this.peek; - while ((this.peek = this.iterator.next()) != left) { - // It's not accurate,but acceptable. - if (this.peek == '#' && !hasInterpolation) { - hasInterpolation = true; - } - if (this.peek == '\\') { // escape - nextChar(); - if (this.peek == CharacterIterator.DONE) { - throw new CompileExpressionErrorException( - "EOF while reading string at index: " + this.iterator.getIndex()); - } - if (this.peek == left) { - sb.append(this.peek); - continue; - } - switch (this.peek) { - case 't': - this.peek = '\t'; - break; - case 'r': - this.peek = '\r'; - break; - case 'n': - this.peek = '\n'; - break; - case '\\': - break; - case 'b': - this.peek = '\b'; - break; - case 'f': - this.peek = '\f'; - break; - case '#': - hasInterpolation = hasInterpolation || true; - if (this.instance.isFeatureEnabled(Feature.StringInterpolation)) { - sb.append('\\'); - this.peek = '#'; - break; - } - default: { - throw new CompileExpressionErrorException( - "Unsupported escape character: \\" + this.peek); - } + CharToken opToken = new CharToken(this.peek, this.lineNo, this.iterator.getIndex()); + nextChar(); + return opToken; + } - } - } + /** + * Scan string literal ("..." or '...'). + * + * @return StringToken if found, null otherwise + */ + private Token scanString() { + if (this.peek != '"' && this.peek != '\'') { + return null; + } + + char left = this.peek; + int startIndex = this.iterator.getIndex(); + StringBuilder sb = new StringBuilder(); + boolean hasInterpolation = false; + + while ((this.peek = this.iterator.next()) != left) { + // Check for interpolation marker + if (this.peek == '#' && !hasInterpolation) { + hasInterpolation = true; + } + + // Handle escape sequences + if (this.peek == '\\') { + nextChar(); if (this.peek == CharacterIterator.DONE) { throw new CompileExpressionErrorException( "EOF while reading string at index: " + this.iterator.getIndex()); } + if (this.peek == left) { + sb.append(this.peek); + continue; + } + switch (this.peek) { + case 't': + this.peek = '\t'; + break; + case 'r': + this.peek = '\r'; + break; + case 'n': + this.peek = '\n'; + break; + case '\\': + break; + case 'b': + this.peek = '\b'; + break; + case 'f': + this.peek = '\f'; + break; + case '#': + hasInterpolation = true; + if (this.instance.isFeatureEnabled(Feature.StringInterpolation)) { + sb.append('\\'); + this.peek = '#'; + break; + } + default: + throw new CompileExpressionErrorException( + "Unsupported escape character: \\" + this.peek); + } + } - sb.append(this.peek); + if (this.peek == CharacterIterator.DONE) { + throw new CompileExpressionErrorException( + "EOF while reading string at index: " + this.iterator.getIndex()); } - nextChar(); - return new StringToken(sb.toString(), this.lineNo, startIndex).withMeta(Constants.INTER_META, - hasInterpolation); + + sb.append(this.peek); } - Token token = new CharToken(this.peek, this.lineNo, this.iterator.getIndex()); nextChar(); - return token; + return new StringToken(sb.toString(), this.lineNo, startIndex).withMeta(Constants.INTER_META, + hasInterpolation); } public String getScanString() { @@ -455,16 +556,8 @@ private String getBigNumberLexeme(final StringBuilder sb) { return lexeme; } - static final char[] OPS = {'=', '>', '<', '+', '-', '*', '/', '%', '!', '&', '|'}; - - public static boolean isBinaryOP(final char ch) { - for (char tmp : OPS) { - if (tmp == ch) { - return true; - } - } - return false; + return OPERATOR_CHARS.indexOf(ch) >= 0; } } diff --git a/src/main/java/com/googlecode/aviator/lexer/token/DelegateToken.java b/src/main/java/com/googlecode/aviator/lexer/token/DelegateToken.java index 453329e6..02ea9795 100644 --- a/src/main/java/com/googlecode/aviator/lexer/token/DelegateToken.java +++ b/src/main/java/com/googlecode/aviator/lexer/token/DelegateToken.java @@ -33,7 +33,7 @@ public void setLambdaFunctionBootstrap(final LambdaFunctionBootstrap lambdaFunct public static enum DelegateTokenType { And_Left, Join_Left, Ternary_Boolean, Ternary_Left, Array, Index_Start, // Method_Name, Method_Param, Lambda_New, // - Ternay_End + Ternary_End } diff --git a/src/main/java/com/googlecode/aviator/lexer/token/OperatorType.java b/src/main/java/com/googlecode/aviator/lexer/token/OperatorType.java index 856b104a..484c532a 100644 --- a/src/main/java/com/googlecode/aviator/lexer/token/OperatorType.java +++ b/src/main/java/com/googlecode/aviator/lexer/token/OperatorType.java @@ -24,68 +24,53 @@ /** - * Operator type + * Operator type enumeration for AviatorScript. * - * @author dennis + *

+ * Note on token field semantics: + *

* + * @author dennis */ public enum OperatorType { - BIT_OR("|", 2), - - BIT_AND("&", 2), - - BIT_XOR("^", 2), - - BIT_NOT("~", 1), - - SHIFT_LEFT("<<", 2), - - SHIFT_RIGHT(">>", 2), - - U_SHIFT_RIGHT(">>>", 2), - - NOT("!", 1), - - MULT("*", 2), - - Exponent("**", 2), - - DIV("/", 2), - - MOD("%", 2), + // Bitwise operators + BIT_OR("|", 2), BIT_AND("&", 2), BIT_XOR("^", 2), BIT_NOT("~", 1), SHIFT_LEFT("<<", + 2), SHIFT_RIGHT(">>", 2), U_SHIFT_RIGHT(">>>", 2), - ADD("+", 2), + // Logical operators + NOT("!", 1), AND("&&", 2), OR("||", 2), + // Arithmetic operators - binary + MULT("*", 2), Exponent("**", 2), DIV("/", 2), MOD("%", 2), ADD("+", 2), + /** Binary subtraction: a - b. Token differs from NEG to distinguish. */ SUB("-sub", 2), - LT("<", 2), - - LE("<=", 2), - - GT(">", 2), - - GE(">=", 2), - - EQ("==", 2), - - NEQ("!=", 2), - - AND("&&", 2), - - MATCH("=~", 2), + // Arithmetic operators - unary + /** Unary negation: -a. Token differs from SUB to distinguish. */ + NEG("-neg", 1), - OR("||", 2), + // Comparison operators + LT("<", 2), LE("<=", 2), GT(">", 2), GE(">=", 2), EQ("==", 2), NEQ("!=", 2), MATCH("=~", 2), + // Special operators + /** Array/map element access: a[i] */ INDEX("[]", 2), - + /** Function call: f(args...). Arity is MAX_VALUE for variadic. */ FUNC("()", Integer.MAX_VALUE), - - NEG("-neg", 1), - + /** Ternary conditional: a ? b : c */ TERNARY("?:", 3), + // Assignment operators + /** Assignment to existing variable: x = value */ ASSIGNMENT("=", 2), - + /** Variable definition with let: let x = value */ DEFINE("=", 2); public final String token; diff --git a/src/main/java/com/googlecode/aviator/lexer/token/Variable.java b/src/main/java/com/googlecode/aviator/lexer/token/Variable.java index 8f38a5f4..883aa627 100644 --- a/src/main/java/com/googlecode/aviator/lexer/token/Variable.java +++ b/src/main/java/com/googlecode/aviator/lexer/token/Variable.java @@ -353,6 +353,28 @@ public Object getValue(final Map env) { }; + /** + * Checks if this variable is one of the reserved keywords. + * + * @return true if this is a keyword (TRUE, FALSE, NIL, LAMBDA, END, IF, ELSE, etc.) + */ + public boolean isKeyword() { + return this == TRUE || this == FALSE || this == NIL || this == LAMBDA || this == END + || this == IF || this == ELSE || this == FOR || this == IN || this == RETURN + || this == BREAK || this == CONTINUE || this == LET || this == WHILE || this == FN + || this == ELSIF || this == TRY || this == CATCH || this == FINALLY || this == THROW + || this == NEW || this == USE; + } + + /** + * Checks if this variable is a literal keyword (TRUE, FALSE, NIL). + * + * @return true if this is a literal keyword + */ + public boolean isLiteralKeyword() { + return this == TRUE || this == FALSE || this == NIL; + } + @Override public com.googlecode.aviator.lexer.token.Token.TokenType getType() { return TokenType.Variable; diff --git a/src/main/java/com/googlecode/aviator/parser/ExpressionParser.java b/src/main/java/com/googlecode/aviator/parser/ExpressionParser.java index d4207bc1..6ffc1e5a 100644 --- a/src/main/java/com/googlecode/aviator/parser/ExpressionParser.java +++ b/src/main/java/com/googlecode/aviator/parser/ExpressionParser.java @@ -45,10 +45,29 @@ /** - * Syntex parser for expression + * Recursive descent parser for AviatorScript expressions. * - * @author dennis + *

+ * Operator Precedence (lowest to highest): + * + *

+ *  1. parseTernary        ?:
+ *  2. parseLogicalOr      ||        (logical or)
+ *  3. parseLogicalAnd     &&        (logical and)
+ *  4. parseBitOr          |         (bitwise or)
+ *  5. parseBitXor         ^         (bitwise xor)
+ *  6. parseBitAnd         &         (bitwise and)
+ *  7. parseEquality       == != =~ = (comparison and assignment)
+ *  8. parseRelational     < <= > >= (relational)
+ *  9. parseShift          << >> >>> (bit shift)
+ * 10. parseAdditive       + -       (additive)
+ * 11. parseMultiplicative * / %     (multiplicative)
+ * 12. parseUnary          ! - ~     (unary operators)
+ * 13. parseExponent       **        (power)
+ * 14. parseFactor         literals, variables, function calls, parentheses
+ * 
* + * @author dennis */ public class ExpressionParser implements Parser { private final ExpressionLexer lexer; @@ -60,6 +79,9 @@ public class ExpressionParser implements Parser { private final ArrayDeque> prevTokens = new ArrayDeque<>(); + /** Maximum number of previous tokens to keep for lookback operations */ + private static final int MAX_PREV_TOKENS = 256; + private CodeGenerator codeGenerator; private ScopeInfo scope; @@ -186,13 +208,13 @@ public void returnStatement() { } else { if (this.scope.newLexicalScope) { cg.onMethodName(Constants.ReducerReturnFn); - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("invalid value for return, missing ';'?"); } cg.onMethodParameter(this.lookahead); cg.onMethodInvoke(this.lookahead); } else { - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("invalid value for return, missing ';'?"); } } @@ -204,7 +226,7 @@ public void returnStatement() { move(true); } - public boolean ternary() { + public boolean parseTernary() { int gcTimes = this.getCGTimes; if (this.lookahead == Variable.NEW) { @@ -212,7 +234,7 @@ public boolean ternary() { return true; } - join(); + parseLogicalOr(); if (this.lookahead == null || expectChar(':') || expectChar(',')) { return gcTimes < this.getCGTimes; } @@ -221,13 +243,13 @@ public boolean ternary() { move(true); CodeGenerator cg = getCodeGeneratorWithTimes(); cg.onTernaryBoolean(opToken); - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("invalid token for ternary operator"); } if (expectChar(':')) { move(true); cg.onTernaryLeft(this.lookahead); - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("invalid token for ternary operator"); } cg.onTernaryRight(this.lookahead); @@ -239,8 +261,8 @@ public boolean ternary() { } - public void join() { - and(); + public void parseLogicalOr() { + parseLogicalAnd(); while (true) { Token opToken = this.lookahead; if (expectChar('|')) { @@ -248,7 +270,7 @@ public void join() { move(true); if (expectChar('|')) { move(true); - and(); + parseLogicalAnd(); getCodeGeneratorWithTimes().onJoinRight(opToken); } else { reportSyntaxError("expect '|'"); @@ -262,7 +284,7 @@ public void join() { CodeGenerator cg = getCodeGeneratorWithTimes(); cg.onJoinLeft(opToken); move(true); - and(); + parseLogicalAnd(); cg.onJoinRight(opToken); continue; } @@ -283,8 +305,8 @@ private boolean expectChar(final char ch) { } - public void bitOr() { - xor(); + public void parseBitOr() { + parseBitXor(); while (true) { Token opToken = this.lookahead; if (expectChar('|')) { @@ -293,7 +315,7 @@ public void bitOr() { back(); break; } - xor(); + parseBitXor(); getCodeGeneratorWithTimes().onBitOr(opToken); } else { break; @@ -302,13 +324,13 @@ public void bitOr() { } - public void xor() { - bitAnd(); + public void parseBitXor() { + parseBitAnd(); while (true) { Token opToken = this.lookahead; if (expectChar('^')) { move(true); - bitAnd(); + parseBitAnd(); getCodeGeneratorWithTimes().onBitXor(opToken); } else { break; @@ -317,8 +339,8 @@ public void xor() { } - public void bitAnd() { - equality(); + public void parseBitAnd() { + parseEquality(); while (true) { Token opToken = this.lookahead; if (expectChar('&')) { @@ -327,7 +349,7 @@ public void bitAnd() { back(); break; } - equality(); + parseEquality(); getCodeGeneratorWithTimes().onBitAnd(opToken); } else { break; @@ -336,8 +358,8 @@ public void bitAnd() { } - public void and() { - bitOr(); + public void parseLogicalAnd() { + parseBitOr(); while (true) { Token opToken = this.lookahead; @@ -347,7 +369,7 @@ public void and() { move(true); if (expectChar('&')) { move(true); - bitOr(); + parseBitOr(); cg.onAndRight(opToken); } else { reportSyntaxError("expect '&'"); @@ -361,7 +383,7 @@ public void and() { CodeGenerator cg = getCodeGeneratorWithTimes(); cg.onAndLeft(opToken); move(true); - bitOr(); + parseBitOr(); cg.onAndRight(opToken); continue; } @@ -376,8 +398,8 @@ public void and() { } - public void equality() { - rel(); + public void parseEquality() { + parseRelational(); while (true) { Token opToken = this.lookahead; Token prevToken = getPrevToken(); @@ -385,12 +407,12 @@ public void equality() { move(true); if (expectChar('=')) { move(true); - rel(); + parseRelational(); getCodeGeneratorWithTimes().onEq(opToken); } else if (expectChar('~')) { // It is a regular expression move(true); - rel(); + parseRelational(); getCodeGeneratorWithTimes().onMatch(opToken); } else { // this.back(); @@ -451,7 +473,7 @@ public void equality() { move(true); if (expectChar('=')) { move(true); - rel(); + parseRelational(); getCodeGeneratorWithTimes().onNeq(opToken); } else { reportSyntaxError("expect '='"); @@ -480,28 +502,28 @@ private void checkVarIsInit(final Token prevToken, StatementType stmtType) { } - public void rel() { - shift(); + public void parseRelational() { + parseShift(); while (true) { Token opToken = this.lookahead; if (expectChar('<')) { move(true); if (expectChar('=')) { move(true); - expr(); + parseAdditive(); getCodeGeneratorWithTimes().onLe(opToken); } else { - expr(); + parseAdditive(); getCodeGeneratorWithTimes().onLt(opToken); } } else if (expectChar('>')) { move(true); if (expectChar('=')) { move(true); - expr(); + parseAdditive(); getCodeGeneratorWithTimes().onGe(opToken); } else { - expr(); + parseAdditive(); getCodeGeneratorWithTimes().onGt(opToken); } } else { @@ -511,15 +533,15 @@ public void rel() { } - public void shift() { - expr(); + public void parseShift() { + parseAdditive(); while (true) { Token opToken = this.lookahead; if (expectChar('<')) { move(true); if (expectChar('<')) { move(true); - expr(); + parseAdditive(); getCodeGeneratorWithTimes().onShiftLeft(opToken); } else { back(); @@ -531,10 +553,10 @@ public void shift() { move(true); if (expectChar('>')) { move(true); - expr(); + parseAdditive(); getCodeGeneratorWithTimes().onUnsignedShiftRight(opToken); } else { - expr(); + parseAdditive(); getCodeGeneratorWithTimes().onShiftRight(opToken); } @@ -549,17 +571,17 @@ public void shift() { } - public void expr() { - term(); + public void parseAdditive() { + parseMultiplicative(); while (true) { Token opToken = this.lookahead; if (expectChar('+')) { move(true); - term(); + parseMultiplicative(); getCodeGeneratorWithTimes().onAdd(opToken); } else if (expectChar('-')) { move(true); - term(); + parseMultiplicative(); getCodeGeneratorWithTimes().onSub(opToken); } else { break; @@ -567,15 +589,15 @@ public void expr() { } } - public void exponent() { - factor(); + public void parseExponent() { + parseFactor(); while (true) { Token opToken = this.lookahead; if (expectChar('*')) { move(true); if (expectChar('*')) { move(true); - unary(); + parseUnary(); getCodeGeneratorWithTimes().onExponent(opToken); } else { back(); @@ -588,21 +610,21 @@ public void exponent() { } - public void term() { - unary(); + public void parseMultiplicative() { + parseUnary(); while (true) { Token opToken = this.lookahead; if (expectChar('*')) { move(true); - unary(); + parseUnary(); getCodeGeneratorWithTimes().onMult(opToken); } else if (expectChar('/')) { move(true); - unary(); + parseUnary(); getCodeGeneratorWithTimes().onDiv(opToken); } else if (expectChar('%')) { move(true); - unary(); + parseUnary(); getCodeGeneratorWithTimes().onMod(opToken); } else { break; @@ -611,16 +633,16 @@ public void term() { } - public void unary() { + public void parseUnary() { Token opToken = this.lookahead; if (expectChar('!')) { move(true); // check if it is a seq function call,"!" as variable if (expectChar(',') || expectChar(')')) { back(); - exponent(); + parseExponent(); } else { - unary(); + parseUnary(); getCodeGeneratorWithTimes().onNot(opToken); } } else if (expectChar('-')) { @@ -628,9 +650,9 @@ public void unary() { // check if it is a seq function call,"!" as variable if (expectChar(',') || expectChar(')')) { back(); - exponent(); + parseExponent(); } else { - unary(); + parseUnary(); getCodeGeneratorWithTimes().onNeg(opToken); } } else if (expectChar('~')) { @@ -638,13 +660,13 @@ public void unary() { // check if it is a seq function call,"~" as variable if (expectChar(',') || expectChar(')')) { back(); - exponent(); + parseExponent(); } else { - unary(); + parseUnary(); getCodeGeneratorWithTimes().onBitNot(opToken); } } else { - exponent(); + parseExponent(); } } @@ -696,15 +718,15 @@ public boolean isOPVariable(final Token token) { } } - public void factor() { - if (factor0()) { + public void parseFactor() { + if (parseFactor0()) { methodInvokeOrArrayAccess(); } } - private boolean factor0() { + private boolean parseFactor0() { if (this.lookahead == null) { reportSyntaxError("illegal token"); } @@ -714,7 +736,7 @@ private boolean factor0() { if (expectChar('(')) { move(true); this.scope.enterParen(); - ternary(); + parseTernary(); if (expectChar(')')) { move(true); this.scope.leaveParen(); @@ -891,11 +913,11 @@ private boolean arrayAccess() { private void array() { this.scope.enterBracket(); - if (getPrevToken() == Variable.TRUE || getPrevToken() == Variable.FALSE - || getPrevToken() == Variable.NIL) { - reportSyntaxError(getPrevToken().getLexeme() + " could not use [] operator"); + Token prev = getPrevToken(); + if (prev instanceof Variable && ((Variable) prev).isLiteralKeyword()) { + reportSyntaxError(prev.getLexeme() + " could not use [] operator"); } - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("missing index for array access"); } if (expectChar(']')) { @@ -912,13 +934,32 @@ private void checkVariableName(final Token token) { if (!((Variable) token).isQuote()) { String[] names = token.getLexeme().split("\\."); for (String name : names) { - if (!isJavaIdentifier(name)) { + if (!isValidPropertySegment(name)) { reportSyntaxError("illegal identifier: " + name); } } } } + private boolean isValidPropertySegment(final String segment) { + if (segment == null || segment.isEmpty()) { + return true; // For formats like "a.[0].b" + } + + int bracketIdx = segment.indexOf('['); + if (bracketIdx < 0) { + return isJavaIdentifier(segment); + } + + if (bracketIdx == 0) { + return segment.endsWith("]"); // "[0]" format + } + + // "bars[0]" format + String baseName = segment.substring(0, bracketIdx); + return isJavaIdentifier(baseName) && segment.endsWith("]"); + } + private void methodInvokeOrArrayAccess() { while (expectChar('[') || expectChar('(')) { if (isConstant(getPrevToken(), this.instance)) { @@ -961,7 +1002,7 @@ private void method(final Token methodName) { } } - ternary(); + parseTernary(); if (isPackArgs) { withMetaEnd(Constants.UNPACK_ARGS, true); @@ -987,7 +1028,7 @@ private void method(final Token methodName) { } } - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("invalid argument"); } @@ -1109,6 +1150,10 @@ private boolean isValidLookahead() { public void move(final boolean analyse) { if (this.lookahead != null) { this.prevTokens.push(this.lookahead); + // Limit memory usage by removing oldest tokens + if (this.prevTokens.size() > MAX_PREV_TOKENS) { + this.prevTokens.pollLast(); + } this.lookahead = this.lexer.scan(analyse); if (this.lookahead != null) { this.parsedTokens++; @@ -1579,11 +1624,11 @@ private void newStatement() { this.scope.enterParen(); move(true); if (!expectChar(')')) { - ternary(); + parseTernary(); getCodeGeneratorWithTimes().onMethodParameter(this.lookahead); while (expectChar(',')) { move(true); - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("invalid argument"); } getCodeGeneratorWithTimes().onMethodParameter(this.lookahead); @@ -1706,7 +1751,7 @@ private StatementType statement() { useStatement(); return StatementType.Other; } else { - if (ternary()) { + if (parseTernary()) { return StatementType.Ternary; } else { return StatementType.Empty; @@ -1795,7 +1840,7 @@ private void forStatement() { { getCodeGeneratorWithTimes().onMethodName(Constants.ReducerFn); // The seq - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("missing collection"); } getCodeGeneratorWithTimes().onMethodParameter(this.lookahead); @@ -1943,7 +1988,7 @@ private boolean ifStatement(final boolean isWhile, final boolean isElsif) { getCodeGeneratorWithTimes().onMethodName(Constants.IfReturnFn); { - if (!ternary()) { + if (!parseTernary()) { reportSyntaxError("missing test statement for if"); } @@ -2134,7 +2179,7 @@ public static boolean isLiteralToken(final Token token, final AviatorEvaluatorInstance instance) { switch (token.getType()) { case Variable: - return token == Variable.TRUE || token == Variable.FALSE || token == Variable.NIL; + return ((Variable) token).isLiteralKeyword(); case Char: case Number: case Pattern: diff --git a/src/test/java/com/googlecode/aviator/lexer/ExpressionLexerUnitTest.java b/src/test/java/com/googlecode/aviator/lexer/ExpressionLexerUnitTest.java index a9977a05..242a1a64 100644 --- a/src/test/java/com/googlecode/aviator/lexer/ExpressionLexerUnitTest.java +++ b/src/test/java/com/googlecode/aviator/lexer/ExpressionLexerUnitTest.java @@ -417,6 +417,42 @@ public void testQuoteVar() { } + @Test + public void testNormalVarWithArrayIndex() { + this.lexer = new ExpressionLexer(this.instance, "foo.bars[0].name"); + Token token = this.lexer.scan(); + + assertEquals(TokenType.Variable, token.getType()); + assertEquals("foo.bars[0].name", token.getValue(null)); + assertFalse(((Variable) token).isQuote()); + assertNull(this.lexer.scan()); + } + + + @Test + public void testNormalVarWithMultipleArrayIndices() { + this.lexer = new ExpressionLexer(this.instance, "a.b[0].c[1].d"); + Token token = this.lexer.scan(); + + assertEquals(TokenType.Variable, token.getType()); + assertEquals("a.b[0].c[1].d", token.getValue(null)); + assertFalse(((Variable) token).isQuote()); + assertNull(this.lexer.scan()); + } + + + @Test + public void testNormalVarWithDotOnly() { + this.lexer = new ExpressionLexer(this.instance, "obj.field.nested"); + Token token = this.lexer.scan(); + + assertEquals(TokenType.Variable, token.getType()); + assertEquals("obj.field.nested", token.getValue(null)); + assertFalse(((Variable) token).isQuote()); + assertNull(this.lexer.scan()); + } + + @Test public void testExpression_Logic_Join() { this.lexer = new ExpressionLexer(this.instance, "a || c "); diff --git a/src/test/java/com/googlecode/aviator/test/function/QuoteVarTest.java b/src/test/java/com/googlecode/aviator/test/function/QuoteVarTest.java index da47fbf8..cec21a73 100644 --- a/src/test/java/com/googlecode/aviator/test/function/QuoteVarTest.java +++ b/src/test/java/com/googlecode/aviator/test/function/QuoteVarTest.java @@ -96,4 +96,50 @@ public void testQuoteVar() { assertEquals("hello,bar", AviatorEvaluator.execute("'hello,' + #foo.bars[0].name", env)); assertEquals(3, AviatorEvaluator.execute("string.length(#foo.bars[0].name)", env)); } + + + @Test + public void testPropertyAccessWithoutQuote() { + Foo foo = new Foo(100, 3.14f, new Date()); + Map env = new HashMap(); + env.put("foo", foo); + + // These should work WITHOUT # prefix + assertEquals("bar", AviatorEvaluator.execute("foo.bars[0].name", env)); + assertEquals("hello,bar", AviatorEvaluator.execute("'hello,' + foo.bars[0].name", env)); + assertEquals(3, AviatorEvaluator.execute("string.length(foo.bars[0].name)", env)); + assertEquals(100, AviatorEvaluator.execute("foo.i", env)); + assertEquals(3.14f, AviatorEvaluator.execute("foo.f", env)); + } + + + @Test + public void testFullKeyPriority() { + Map env = new HashMap(); + env.put("a.b.c", "full-key-value"); + + Map innerB = new HashMap(); + innerB.put("c", "property-chain-value"); + Map innerA = new HashMap(); + innerA.put("b", innerB); + env.put("a", innerA); + + // Full key should take priority + assertEquals("full-key-value", AviatorEvaluator.execute("a.b.c", env)); + } + + + @Test + public void testPropertyChainWhenNoFullKey() { + Map env = new HashMap(); + + Map innerB = new HashMap(); + innerB.put("c", "property-chain-value"); + Map innerA = new HashMap(); + innerA.put("b", innerB); + env.put("a", innerA); + + // When "a.b.c" key doesn't exist, should traverse property chain + assertEquals("property-chain-value", AviatorEvaluator.execute("a.b.c", env)); + } }