-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathactual_parser.py
More file actions
278 lines (211 loc) · 8.34 KB
/
actual_parser.py
File metadata and controls
278 lines (211 loc) · 8.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
"""This is an actual recursive-descent parser.
I'm sorry I didn't find something funnier to do here."""
from abc import ABC, abstractmethod
from collections import namedtuple
import typing as T
def is_number_char(c: str) -> bool:
"""Check if char c is part of a number."""
return c.isdigit() or c == "."
def is_char_token(c: str) -> bool:
"""Return true for single character tokens."""
return c in ["+", "-", "*", "/", "(", ")"]
class ExprSyntaxError(Exception):
"""Error parsing the expression."""
class UnexpectedChar(ExprSyntaxError):
"""Syntax error due to unexpected character."""
Token = namedtuple("Token", ["tok_type", "tok"])
SyntaxNode = namedtuple("SyntaxNode", ["token", "children"])
class Tokenizer:
"""Tokenizer for calculator expressions.
It's an iterable, and it can also be used by calling next_token repeatedly.
Args:
s: string to tokenize.
"""
def __init__(self, s: str):
self.s = s
self.pos = 0
@property
def current(self) -> str:
"""Return char being pointed."""
return self.s[self.pos]
def consume(self) -> str:
"""Advance to next char, returning current one."""
c = self.current
self.pos += 1
return c
def has_finished(self) -> bool:
"""Check if we've analyzed the entire string."""
return self.pos >= len(self.s)
def __iter__(self):
self.pos = 0
return self
def __next__(self) -> Token:
tok = self.next_token()
if tok is None:
raise StopIteration
return tok
def next_token(self) -> T.Optional[Token]:
"""Returns the next token in the string, or None if there are no more."""
if self.has_finished():
return None
token_type = None
token_chars = []
if is_number_char(self.current):
token_type = "N"
while not self.has_finished() and is_number_char(self.current):
token_chars.append(self.consume())
elif is_char_token(self.current):
if self.current in ["(", ")"]:
token_type = self.current
elif self.current in ["+", "-"]:
token_type = "S"
elif self.current in ["*", "/"]:
token_type = "M"
else:
raise ExprSyntaxError
token_chars.append(self.consume())
elif self.current.isspace():
self.consume()
return self.next_token()
else:
raise UnexpectedChar
return Token(token_type, "".join(token_chars))
class Parser:
"""Used to get an AST for a calculator expression.
Args:
s: string to parse.
"""
def __init__(self, s):
tokenizer = Tokenizer(s)
self.tokens = list(iter(tokenizer))
# We store the tokens reverse to make the simple LL
# parsers work.
self.tokens.reverse()
self.pos = 0
def ahead(self, k):
"""Read k tokens ahead of the current one.
For this implementation, k should always be 1."""
assert k == 1
if self.pos + k < len(self.tokens):
return self.tokens[self.pos + k]
return None
@property
def current(self) -> Token:
"""Return token under consideration."""
return self.tokens[self.pos]
def has_finished(self) -> bool:
"""Check if the parser has finished parsing."""
return self.pos >= len(self.tokens)
def consume_if(self, tok_type: str) -> Token:
"""Advance the pointer if the current token is of the right type.
Otherwise, raise ExprSyntaxError."""
curr = self.current
if curr.tok_type != tok_type:
raise ExprSyntaxError
self.pos += 1
return curr
def _parse_cat_binary(
self, expected_type: str, next_parse_func: T.Callable
) -> SyntaxNode:
ahead = self.ahead(1)
if ahead and ahead.tok_type == expected_type:
first_tok = self.parse_value()
op_tok = self.consume_if(expected_type)
second_tok = self._parse_cat_binary(expected_type, next_parse_func)
# reverse order to make up for our reverse token order.
return SyntaxNode(token=op_tok, children=[second_tok, first_tok])
first_tok = next_parse_func()
if not self.has_finished() and self.current.tok_type == expected_type:
op_tok = self.consume_if(expected_type)
second_tok = self._parse_cat_binary(expected_type, next_parse_func)
# reverse order to make up for our reverse token order.
return SyntaxNode(token=op_tok, children=[second_tok, first_tok])
return first_tok
def parse_value(self) -> SyntaxNode:
"""Parse a value (i.e. a factor in this case).
Handles parenthesised expressions and direct numeric values."""
if self.current.tok_type == ")":
self.consume_if(")")
node = self.parse_expr()
self.consume_if("(")
return node
token = self.consume_if("N")
return SyntaxNode(token=token, children=[])
def parse_term(self) -> SyntaxNode:
"""Parse a term to get the corresponding SyntaxNode.
Handles multiplication and division."""
return self._parse_cat_binary("M", self.parse_value)
def parse_expr(self) -> SyntaxNode:
"""Parse a calculator expression.
Handles sum and subtraction."""
return self._parse_cat_binary("S", self.parse_term)
def parse(self) -> T.Optional[SyntaxNode]:
"""Parse the string with which we initialized the calculator.
Returns a SyntaxNode or None if there is no expression."""
if self.tokens:
return self.parse_expr()
return None
class EvalError(Exception):
"""Interpreter error. Something in the AST is malformed for interpretation."""
class TreeInterpreter(ABC):
"""Abstract class for SyntaxNode direct interpreters.
Overriding the *_value methods and calling eval is all that
is needed to get a working AST interpreter."""
def __init__(self, syntax_tree: SyntaxNode):
self.syntax_tree = syntax_tree
@abstractmethod
def n_value(self, token):
"""Method that returns a value from an N token."""
@abstractmethod
def sum_value(self, lv, rv):
"""Method that returns a value from a sum tree.
It takes the left and right values, already computed."""
@abstractmethod
def sub_value(self, lv, rv):
"""Method that returns a value from a subtract tree.
It takes the left and right values, already computed."""
@abstractmethod
def prod_value(self, lv, rv):
"""Method that returns a value from a prod tree.
It takes the left and right values, already computed."""
@abstractmethod
def div_value(self, lv, rv):
"""Method that returns a value from a division tree.
It takes the left and right values, already computed."""
def _eval_node(self, node: SyntaxNode):
tok_type = node.token.tok_type
tok = node.token.tok
if tok_type == "N":
return self.n_value(tok)
assert len(node.children) == 2
try:
left_val = self._eval_node(node.children[0])
right_val = self._eval_node(node.children[1])
except IndexError:
raise EvalError
if tok == "+":
return self.sum_value(left_val, right_val)
if tok == "-":
return self.sub_value(left_val, right_val)
if tok == "*":
return self.prod_value(left_val, right_val)
if tok == "/":
return self.div_value(left_val, right_val)
# If we got here, it's an unknown operation.
raise EvalError
def eval(self):
"""Returns the value of the AST as per the implementation of abstract methods.
It evaluates the ast that was set in the creation of the interpreter."""
return self._eval_node(self.syntax_tree)
class ExampleInterpreter(TreeInterpreter):
"""This is an example of an interpreter. It computes operations directly."""
def n_value(self, token):
return float(token)
def sum_value(self, lv, rv):
return lv + rv
def sub_value(self, lv, rv):
return lv - rv
def prod_value(self, lv, rv):
return lv * rv
def div_value(self, lv, rv):
return lv / rv