Skip to content

Commit c6ffc6f

Browse files
committed
Add PQL AND/OR implementation
* Allow nesting conditions * Use AST instead of regex parsing
1 parent b95c56f commit c6ffc6f

7 files changed

Lines changed: 1260 additions & 47 deletions

File tree

cli/polyaxon/_flow/operations/operation.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
StrictStr,
88
field_validator,
99
model_validator,
10-
validation_after, validation_before,
10+
validation_after,
11+
validation_before,
1112
)
1213
from clipped.config.patch_strategy import PatchStrategy
1314
from clipped.config.schema import skip_partial, to_partial

cli/polyaxon/_pql/ast.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
"""AST (Abstract Syntax Tree) representation for PQL boolean expressions.
2+
3+
This module provides AST node types to represent complex boolean queries
4+
with AND/OR operators and parentheses grouping.
5+
6+
Example:
7+
Query: "status:running AND (tags:ml OR metrics.loss:<0.5)"
8+
AST:
9+
AndNode([
10+
ExpressionNode("status", "running"),
11+
OrNode([
12+
ExpressionNode("tags", "ml"),
13+
ExpressionNode("metrics.loss", "<0.5")
14+
])
15+
])
16+
"""
17+
18+
from typing import Any, List, Union
19+
20+
21+
class ASTNode:
22+
"""Base class for all AST nodes."""
23+
24+
pass
25+
26+
27+
class ExpressionNode(ASTNode):
28+
"""Represents a single field:operation expression.
29+
30+
Attributes:
31+
field: The field name (e.g., "status", "metrics.loss")
32+
operation: The operation string (e.g., "running", "<0.5", "~tag1|tag2")
33+
"""
34+
35+
def __init__(self, field: str, operation: str):
36+
self.field = field
37+
self.operation = operation
38+
39+
def __repr__(self) -> str:
40+
return f"ExpressionNode({self.field!r}, {self.operation!r})"
41+
42+
def __eq__(self, other: Any) -> bool:
43+
if not isinstance(other, ExpressionNode):
44+
return False
45+
return self.field == other.field and self.operation == other.operation
46+
47+
48+
class AndNode(ASTNode):
49+
"""Represents an AND combination of AST nodes.
50+
51+
Attributes:
52+
children: List of child AST nodes to be ANDed together
53+
"""
54+
55+
def __init__(self, children: List[ASTNode]):
56+
self.children = children
57+
58+
def __repr__(self) -> str:
59+
return f"AndNode({self.children!r})"
60+
61+
def __eq__(self, other: Any) -> bool:
62+
if not isinstance(other, AndNode):
63+
return False
64+
return self.children == other.children
65+
66+
67+
class OrNode(ASTNode):
68+
"""Represents an OR combination of AST nodes.
69+
70+
Attributes:
71+
children: List of child AST nodes to be ORed together
72+
"""
73+
74+
def __init__(self, children: List[ASTNode]):
75+
self.children = children
76+
77+
def __repr__(self) -> str:
78+
return f"OrNode({self.children!r})"
79+
80+
def __eq__(self, other: Any) -> bool:
81+
if not isinstance(other, OrNode):
82+
return False
83+
return self.children == other.children
84+
85+
86+
# Type alias for any AST node
87+
QueryAST = Union[ExpressionNode, AndNode, OrNode]

cli/polyaxon/_pql/grammar.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
"""PQL Grammar using parsimonious.
2+
3+
This module defines the PQL grammar for parsing boolean queries with AND/OR
4+
operators and parentheses grouping.
5+
6+
Grammar design:
7+
- AND operators: `,` or `AND` keyword (case-insensitive)
8+
- OR operators: `OR` keyword or `|` with whitespace (case-insensitive)
9+
- Parentheses for grouping: `(` and `)`
10+
- Expressions: `field:operation` where operation can contain `|` for IN operator
11+
12+
Operator precedence: AND binds tighter than OR (standard boolean precedence)
13+
14+
Examples:
15+
"status:running"
16+
"status:running, metrics.loss:<0.5"
17+
"status:running AND metrics.loss:<0.5"
18+
"status:running OR status:failed"
19+
"(status:running AND metrics.loss:<0.5) OR status:failed"
20+
"status:running|building" # `|` is IN operator, not OR
21+
"""
22+
23+
from typing import List, Union
24+
25+
from parsimonious.grammar import Grammar
26+
from parsimonious.nodes import Node, NodeVisitor
27+
28+
from polyaxon._pql.ast import AndNode, ASTNode, ExpressionNode, OrNode
29+
30+
# PQL Grammar Definition
31+
# Note: Order matters in PEG grammars - more specific rules must come first
32+
PQL_GRAMMAR = Grammar(
33+
r"""
34+
query = ws or_expr ws
35+
36+
or_expr = and_expr (or_op and_expr)*
37+
and_expr = primary (and_op primary)*
38+
39+
primary = group / expression
40+
group = "(" ws or_expr ws ")"
41+
42+
expression = field ws ":" ws operation
43+
44+
field = ~r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?"
45+
46+
operation = ~r"[^,()]+?(?=\s*(,|\s+AND\s|\s+AND$|\s+OR\s|\s+OR$|\s+\|\s+(\(|[a-zA-Z_][a-zA-Z0-9_.]*:)|\)|\s*$))"i
47+
48+
or_op = ws (or_keyword / pipe_or) ws
49+
or_keyword = ~r"OR"i
50+
pipe_or = "|" &(ws ("(" / ~r"[a-zA-Z_][a-zA-Z0-9_.]*:"))
51+
52+
and_op = ws (and_keyword / comma) ws
53+
and_keyword = ~r"AND"i
54+
comma = ","
55+
56+
ws = ~r"\s*"
57+
"""
58+
)
59+
60+
61+
class PQLVisitor(NodeVisitor):
62+
"""Visitor that transforms the parse tree into an AST."""
63+
64+
def visit_query(self, node: Node, visited_children: List) -> ASTNode:
65+
"""Visit the root query node."""
66+
_, expr, _ = visited_children
67+
return expr
68+
69+
def visit_or_expr(self, node: Node, visited_children: List) -> ASTNode:
70+
"""Visit OR expression - combines children with OrNode if multiple."""
71+
first, rest = visited_children
72+
if not rest:
73+
return first
74+
75+
# Flatten into list of children
76+
children = [first]
77+
for item in rest:
78+
# item is [or_op, and_expr]
79+
children.append(item[1])
80+
81+
if len(children) == 1:
82+
return children[0]
83+
return OrNode(children)
84+
85+
def visit_and_expr(self, node: Node, visited_children: List) -> ASTNode:
86+
"""Visit AND expression - combines children with AndNode if multiple."""
87+
first, rest = visited_children
88+
if not rest:
89+
return first
90+
91+
# Flatten into list of children
92+
children = [first]
93+
for item in rest:
94+
# item is [and_op, primary]
95+
children.append(item[1])
96+
97+
if len(children) == 1:
98+
return children[0]
99+
return AndNode(children)
100+
101+
def visit_primary(self, node: Node, visited_children: List) -> ASTNode:
102+
"""Visit primary - either a group or expression."""
103+
return visited_children[0]
104+
105+
def visit_group(self, node: Node, visited_children: List) -> ASTNode:
106+
"""Visit group - extract the inner expression."""
107+
_, _, expr, _, _ = visited_children
108+
return expr
109+
110+
def visit_expression(self, node: Node, visited_children: List) -> ExpressionNode:
111+
"""Visit expression - create ExpressionNode from field:operation."""
112+
# expression = field ws ":" ws operation
113+
field, _, _, _, operation = visited_children
114+
return ExpressionNode(field.strip(), operation.strip())
115+
116+
def visit_field(self, node: Node, visited_children: List) -> str:
117+
"""Visit field - return the field name."""
118+
return node.text
119+
120+
def visit_operation(self, node: Node, visited_children: List) -> str:
121+
"""Visit operation - return the operation string."""
122+
return node.text
123+
124+
def generic_visit(self, node: Node, visited_children: List) -> Union[List, str]:
125+
"""Generic visitor for nodes we don't explicitly handle."""
126+
return visited_children or node.text
127+
128+
129+
def parse_query(query: str) -> ASTNode:
130+
"""Parse a PQL query string into an AST.
131+
132+
Args:
133+
query: The PQL query string
134+
135+
Returns:
136+
The root AST node representing the query
137+
138+
Raises:
139+
ParseError: If the query is syntactically invalid
140+
"""
141+
tree = PQL_GRAMMAR.parse(query)
142+
visitor = PQLVisitor()
143+
return visitor.visit(tree)

0 commit comments

Comments
 (0)