|
| 1 | +"""PQL Grammar using parsimonious. |
| 2 | +
|
| 3 | +This module defines the PQL grammar for parsing boolean queries with AND/OR |
| 4 | +operators and parentheses grouping. |
| 5 | +
|
| 6 | +Grammar design: |
| 7 | +- AND operators: `,` or `AND` keyword (case-insensitive) |
| 8 | +- OR operators: `OR` keyword or `|` with whitespace (case-insensitive) |
| 9 | +- Parentheses for grouping: `(` and `)` |
| 10 | +- Expressions: `field:operation` where operation can contain `|` for IN operator |
| 11 | +
|
| 12 | +Operator precedence: AND binds tighter than OR (standard boolean precedence) |
| 13 | +
|
| 14 | +Examples: |
| 15 | + "status:running" |
| 16 | + "status:running, metrics.loss:<0.5" |
| 17 | + "status:running AND metrics.loss:<0.5" |
| 18 | + "status:running OR status:failed" |
| 19 | + "(status:running AND metrics.loss:<0.5) OR status:failed" |
| 20 | + "status:running|building" # `|` is IN operator, not OR |
| 21 | +""" |
| 22 | + |
| 23 | +from typing import List, Union |
| 24 | + |
| 25 | +from parsimonious.grammar import Grammar |
| 26 | +from parsimonious.nodes import Node, NodeVisitor |
| 27 | + |
| 28 | +from polyaxon._pql.ast import AndNode, ASTNode, ExpressionNode, OrNode |
| 29 | + |
| 30 | +# PQL Grammar Definition |
| 31 | +# Note: Order matters in PEG grammars - more specific rules must come first |
| 32 | +PQL_GRAMMAR = Grammar( |
| 33 | + r""" |
| 34 | + query = ws or_expr ws |
| 35 | +
|
| 36 | + or_expr = and_expr (or_op and_expr)* |
| 37 | + and_expr = primary (and_op primary)* |
| 38 | +
|
| 39 | + primary = group / expression |
| 40 | + group = "(" ws or_expr ws ")" |
| 41 | +
|
| 42 | + expression = field ws ":" ws operation |
| 43 | +
|
| 44 | + field = ~r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?" |
| 45 | +
|
| 46 | + operation = ~r"[^,()]+?(?=\s*(,|\s+AND\s|\s+AND$|\s+OR\s|\s+OR$|\s+\|\s+(\(|[a-zA-Z_][a-zA-Z0-9_.]*:)|\)|\s*$))"i |
| 47 | +
|
| 48 | + or_op = ws (or_keyword / pipe_or) ws |
| 49 | + or_keyword = ~r"OR"i |
| 50 | + pipe_or = "|" &(ws ("(" / ~r"[a-zA-Z_][a-zA-Z0-9_.]*:")) |
| 51 | +
|
| 52 | + and_op = ws (and_keyword / comma) ws |
| 53 | + and_keyword = ~r"AND"i |
| 54 | + comma = "," |
| 55 | +
|
| 56 | + ws = ~r"\s*" |
| 57 | + """ |
| 58 | +) |
| 59 | + |
| 60 | + |
| 61 | +class PQLVisitor(NodeVisitor): |
| 62 | + """Visitor that transforms the parse tree into an AST.""" |
| 63 | + |
| 64 | + def visit_query(self, node: Node, visited_children: List) -> ASTNode: |
| 65 | + """Visit the root query node.""" |
| 66 | + _, expr, _ = visited_children |
| 67 | + return expr |
| 68 | + |
| 69 | + def visit_or_expr(self, node: Node, visited_children: List) -> ASTNode: |
| 70 | + """Visit OR expression - combines children with OrNode if multiple.""" |
| 71 | + first, rest = visited_children |
| 72 | + if not rest: |
| 73 | + return first |
| 74 | + |
| 75 | + # Flatten into list of children |
| 76 | + children = [first] |
| 77 | + for item in rest: |
| 78 | + # item is [or_op, and_expr] |
| 79 | + children.append(item[1]) |
| 80 | + |
| 81 | + if len(children) == 1: |
| 82 | + return children[0] |
| 83 | + return OrNode(children) |
| 84 | + |
| 85 | + def visit_and_expr(self, node: Node, visited_children: List) -> ASTNode: |
| 86 | + """Visit AND expression - combines children with AndNode if multiple.""" |
| 87 | + first, rest = visited_children |
| 88 | + if not rest: |
| 89 | + return first |
| 90 | + |
| 91 | + # Flatten into list of children |
| 92 | + children = [first] |
| 93 | + for item in rest: |
| 94 | + # item is [and_op, primary] |
| 95 | + children.append(item[1]) |
| 96 | + |
| 97 | + if len(children) == 1: |
| 98 | + return children[0] |
| 99 | + return AndNode(children) |
| 100 | + |
| 101 | + def visit_primary(self, node: Node, visited_children: List) -> ASTNode: |
| 102 | + """Visit primary - either a group or expression.""" |
| 103 | + return visited_children[0] |
| 104 | + |
| 105 | + def visit_group(self, node: Node, visited_children: List) -> ASTNode: |
| 106 | + """Visit group - extract the inner expression.""" |
| 107 | + _, _, expr, _, _ = visited_children |
| 108 | + return expr |
| 109 | + |
| 110 | + def visit_expression(self, node: Node, visited_children: List) -> ExpressionNode: |
| 111 | + """Visit expression - create ExpressionNode from field:operation.""" |
| 112 | + # expression = field ws ":" ws operation |
| 113 | + field, _, _, _, operation = visited_children |
| 114 | + return ExpressionNode(field.strip(), operation.strip()) |
| 115 | + |
| 116 | + def visit_field(self, node: Node, visited_children: List) -> str: |
| 117 | + """Visit field - return the field name.""" |
| 118 | + return node.text |
| 119 | + |
| 120 | + def visit_operation(self, node: Node, visited_children: List) -> str: |
| 121 | + """Visit operation - return the operation string.""" |
| 122 | + return node.text |
| 123 | + |
| 124 | + def generic_visit(self, node: Node, visited_children: List) -> Union[List, str]: |
| 125 | + """Generic visitor for nodes we don't explicitly handle.""" |
| 126 | + return visited_children or node.text |
| 127 | + |
| 128 | + |
| 129 | +def parse_query(query: str) -> ASTNode: |
| 130 | + """Parse a PQL query string into an AST. |
| 131 | +
|
| 132 | + Args: |
| 133 | + query: The PQL query string |
| 134 | +
|
| 135 | + Returns: |
| 136 | + The root AST node representing the query |
| 137 | +
|
| 138 | + Raises: |
| 139 | + ParseError: If the query is syntactically invalid |
| 140 | + """ |
| 141 | + tree = PQL_GRAMMAR.parse(query) |
| 142 | + visitor = PQLVisitor() |
| 143 | + return visitor.visit(tree) |
0 commit comments