-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexer.py
More file actions
106 lines (94 loc) · 3.75 KB
/
lexer.py
File metadata and controls
106 lines (94 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#########################
# LEXER
#########################
from errors import (
IllegalCharError
)
from position import Position
from tokens import (
DIGITS,
LETTERS,
ALPHANUMERIC,
KEYWORDS,
UNDERSCORE_ALPHANUMERIC,
TokenTypes,
TokenObj
)
class Lexer:
def __init__(self, f_name: str, text: str):
self._f_name = f_name
self._text = text
self.pos: Position = Position(-1, 0, -1, f_name, text)
self.current_char: None = None
self.advance()
def advance(self) -> None:
self.pos.advance(self.current_char)
self.current_char = self._text[self.pos._indx] if self.pos._indx < len(self._text) else None
def make_number(self) -> TokenObj:
num_str = ""
dot_count = 0
start_pos = self.pos.copy()
while self.current_char is not None and self.current_char in DIGITS + ".":
if self.current_char == ".":
if dot_count == 1:
break
dot_count += 1
num_str += "."
else:
num_str += self.current_char
self.advance()
if dot_count == 0:
return TokenObj(TokenTypes.TT_INT, int(num_str), start_pos=start_pos, end_pos=self.pos)
else:
return TokenObj(TokenTypes.TT_FLOAT, float(num_str), start_pos=start_pos, end_pos=self.pos)
def make_identifier(self):
iden_str = ""
start_pos = self.pos.copy()
while self.current_char is not None and self.current_char in UNDERSCORE_ALPHANUMERIC:
iden_str += self.current_char
self.advance()
token_type = TokenTypes.TT_KEYWORD if iden_str in KEYWORDS else TokenTypes.TT_IDENTIFIER
return TokenObj(token_type, iden_str, start_pos=start_pos, end_pos=self.pos)
def make_tokens(self):
tokens = []
while self.current_char is not None:
if self.current_char in " \t":
self.advance()
elif self.current_char in DIGITS:
tokens.append(self.make_number())
elif self.current_char in LETTERS:
tokens.append(self.make_identifier())
elif self.current_char == "+":
tokens.append(TokenObj(TokenTypes.TT_PLUS, start_pos=self.pos))
self.advance()
elif self.current_char == "-":
tokens.append(TokenObj(TokenTypes.TT_MINUS, start_pos=self.pos))
self.advance()
elif self.current_char == "*":
tokens.append(TokenObj(TokenTypes.TT_MUL, start_pos=self.pos))
self.advance()
elif self.current_char == "/":
tokens.append(TokenObj(TokenTypes.TT_DIV, start_pos=self.pos))
self.advance()
elif self.current_char == "^":
tokens.append(TokenObj(TokenTypes.TT_POWER, start_pos=self.pos))
self.advance()
elif self.current_char == "=":
tokens.append(TokenObj(TokenTypes.TT_EQ, start_pos=self.pos))
self.advance()
elif self.current_char == "(":
tokens.append(TokenObj(TokenTypes.TT_LPAREN, start_pos=self.pos))
self.advance()
elif self.current_char == ")":
tokens.append(TokenObj(TokenTypes.TT_RPAREN, start_pos=self.pos))
self.advance()
else:
pos_start = self.pos.copy()
char = self.current_char
self.advance()
return [], IllegalCharError(pos_start, self.pos, "'" + char + "'")
tokens.append(TokenObj(TokenTypes.TT_EOF, start_pos=self.pos))
return tokens, None
def exec_lexer(f_name, text):
lexer = Lexer(f_name, text)
return lexer.make_tokens()