33I Language lexer.
44Version: 0.1.3
55
6- Copyright (c) 2023-present ElBe Devleopment .
6+ Copyright (c) 2023-present ElBe Development .
77
88Permission is hereby granted, free of charge, to any person obtaining a
99copy of this software and associated documentation files (the 'Software'),
2929# IMPORTS #
3030###########
3131
32+ import sys
3233from typing import (
3334 Any ,
35+ Dict ,
3436 List ,
3537 Final ,
36- final ,
3738)
3839
3940
4344
4445DIGITS_AS_STRINGS : Final [List [str ]] = ["1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" , "0" ]
4546
47+ SEPARATORS : Final [List [str ]] = [" " , "\t " , "\n " ]
48+ DOUBLE_MARKS : Final [Dict [str , str ]] = {
49+ "==" : "EQUAL" ,
50+ "===" : "TYPE_EQUAL" ,
51+ "++" : "COUNT_UP" ,
52+ "--" : "COUNT_DOWN" ,
53+ }
54+ MARKS : Final [Dict [str , str ]] = {
55+ ";" : "END_CMD" ,
56+ "=" : "SET" ,
57+ "{" : "BLOCK_OPEN" , # Also dicts
58+ "}" : "BLOCK_CLOSE" , # Also dicts
59+ "(" : "CLAMP_OPEN" ,
60+ ")" : "CLAMP_CLOSE" ,
61+ "[" : "INDEX_OPEN" , # Also arrays
62+ "]" : "INDEX_CLOSE" , # Also arrays
63+ "?" : "INDEFINITE" ,
64+ "." : "SEPERATOR" ,
65+ ":" : "SLICE" ,
66+ ">" : "GREATER" ,
67+ "<" : "LESS" ,
68+ "+" : "ADD" ,
69+ "-" : "SUBTRACT" ,
70+ "*" : "MULTIPLY" ,
71+ "/" : "DIVIDE" ,
72+ "%" : "MODULO" ,
73+ " ." .replace (" " , "" ): "CHILD" , # Duplicate, needs escaping
74+ "," : "SEPERATOR" ,
75+ }
76+ KEYWORDS : Final [Dict [str , str ]] = {
77+ "class" : "CLASS" ,
78+ "use" : "USE" ,
79+ "import" : "IMPORT" ,
80+ "if" : "IF" ,
81+ "else" : "ELSE" ,
82+ "while" : "WHILE" ,
83+ "for" : "FOR" ,
84+ "return" : "RETURN" ,
85+ "delete" : "DELETE" ,
86+ "break" : "BREAK" ,
87+ "continue" : "CONTINUE" ,
88+ }
89+ BASE_TYPES : Final [List [str ]] = [
90+ "any" ,
91+ "array" ,
92+ "bool" ,
93+ "complex" ,
94+ "dict" ,
95+ "dictionary" ,
96+ "dynamic" ,
97+ "float" ,
98+ "int" ,
99+ "integer" ,
100+ "list" ,
101+ "str" ,
102+ "string" ,
103+ "None" ,
104+ "Null" ,
105+ ]
106+
46107
47108#################
48109# LEXER HELPERS #
49110#################
50111
51-
52112class LexerToken :
53113 """
54114 Represents a token for the lexer.
@@ -73,26 +133,24 @@ def __repr__(self) -> str:
73133 return f"{ self .type } : { self .value !r} "
74134
75135
76- class LexerError ( BaseException ) :
136+ class LexerError :
77137 """
78138 Represents an error while lexing.
79139 """
80140
81- def __init__ (self , description : str , line : int , column : int ) -> None :
141+ def __init__ (self , description : str , line : int , column : int , code : int = 1 ) -> None :
82142 """Initializes a lexing error.
83143
84144 :param description: Description of the error.
85145 :param line: Line the error occurred in.
86146 :param column: Column the error occurred in.
147+ :param code: The exit code of the error.
87148 """
88149
89- self .description = description
90- self .line = line
91- self .column = column
92-
93- super ().__init__ (
94- f"{ self .description } in line { self .line } , column { self .column } "
95- ) # TODO (ElBe): Change it to not be a subclass of BaseException
150+ print (
151+ f"{ description } in line { line } , column { column } "
152+ )
153+ sys .exit (code )
96154
97155
98156####################
@@ -107,66 +165,6 @@ class Lexer:
107165
108166 def __init__ (self , text : str ):
109167 self .text = text
110- self .separators = [" " , "\t " , "\n " ]
111- self .double_marks = {
112- "==" : "EQUAL" ,
113- "===" : "TYPE_EQUAL" ,
114- "++" : "COUNT_UP" ,
115- "--" : "COUNT_DOWN" ,
116- }
117- self .marks = {
118- ";" : "END_CMD" ,
119- "=" : "SET" ,
120- "{" : "BLOCK_OPEN" , # Also dicts
121- "}" : "BLOCK_CLOSE" , # Also dicts
122- "(" : "CLAMP_OPEN" ,
123- ")" : "CLAMP_CLOSE" ,
124- "[" : "INDEX_OPEN" , # Also arrays
125- "]" : "INDEX_CLOSE" , # Also arrays
126- "?" : "INDEFINITE" ,
127- "." : "SEPERATOR" ,
128- ":" : "SLICE" ,
129- ">" : "GREATER" ,
130- "<" : "LESS" ,
131- "+" : "ADD" ,
132- "-" : "SUBTRACT" ,
133- "*" : "MULTIPLY" ,
134- "/" : "DIVIDE" ,
135- "%" : "MODULO" ,
136- " ." .replace (" " , "" ): "CHILD" , # Duplicate, needs escaping
137- "," : "SEPERATOR" ,
138- }
139- self .keywords = {
140- "class" : "CLASS" ,
141- "use" : "USE" ,
142- "import" : "IMPORT" ,
143- "if" : "IF" ,
144- "else" : "ELSE" ,
145- "while" : "WHILE" ,
146- "for" : "FOR" ,
147- "return" : "RETURN" ,
148- "delete" : "DELETE" ,
149- "break" : "BREAK" ,
150- "continue" : "CONTINUE" ,
151- }
152- self .base_types = [
153- "any" ,
154- "array" ,
155- "bool" ,
156- "complex" ,
157- "dict" ,
158- "dictionary" ,
159- "dynamic" ,
160- "float" ,
161- "int" ,
162- "integer" ,
163- "list" ,
164- "str" ,
165- "string" ,
166- "None" ,
167- "Null" ,
168- ]
169-
170168 self .tokens = []
171169
172170 def lex (self ):
@@ -194,13 +192,13 @@ def validate_integer(string: str) -> bool:
194192 return valid
195193
196194 def gettoken (string : str , line : int , column : int ) -> LexerToken | None :
197- if string in list (self . keywords . keys () ):
198- return LexerToken (self . keywords [string ], string )
195+ if string in list (KEYWORDS ):
196+ return LexerToken (KEYWORDS [string ], string )
199197 elif len (string ) > 0 and string [0 ] == "_" :
200198 return LexerToken ("BUILTIN_CONST" , string )
201199 elif string in ["true" , "false" , "True" , "False" ]:
202200 return LexerToken ("BOOL" , string )
203- elif string in self . base_types :
201+ elif string in BASE_TYPES :
204202 return LexerToken ("BASETYPE" , string )
205203 elif len (string ) == 0 :
206204 return None
@@ -213,7 +211,7 @@ def gettoken(string: str, line: int, column: int) -> LexerToken | None:
213211 return LexerToken ("NAME" , string )
214212
215213 else :
216- raise LexerError ("Unrecognized Pattern: '" + string + "'" , line , column )
214+ LexerError ("Unrecognized Pattern: '" + string + "'" , line , column )
217215
218216 line = 1
219217 comment = 0
@@ -243,26 +241,26 @@ def gettoken(string: str, line: int, column: int) -> LexerToken | None:
243241
244242 elif in_string :
245243 buffer += self .text [index ]
246- elif self .text [index ] in self . separators :
244+ elif self .text [index ] in SEPARATORS :
247245 self .tokens .append (gettoken (buffer , line , column ))
248246 buffer = ""
249247 elif len (self .text [index :]) > 1 and self .text [
250248 index : index + 2
251- ] in list (self . double_marks . keys () ):
249+ ] in list (DOUBLE_MARKS ):
252250 self .tokens .append (gettoken (buffer , line , column ))
253251 self .tokens .append (
254252 LexerToken (
255- self . double_marks [self .text [index : index + 2 ]],
253+ DOUBLE_MARKS [self .text [index : index + 2 ]],
256254 self .text [index : index + 2 ],
257255 )
258256 )
259257 buffer = ""
260258 index += 1
261259
262- elif self .text [index ] in list (self . marks . keys () ):
260+ elif self .text [index ] in list (MARKS ):
263261 self .tokens .append (gettoken (buffer , line , column ))
264262 self .tokens .append (
265- LexerToken (self . marks [self .text [index ]], self .text [index ])
263+ LexerToken (MARKS [self .text [index ]], self .text [index ])
266264 )
267265 buffer = ""
268266
@@ -275,6 +273,8 @@ def gettoken(string: str, line: int, column: int) -> LexerToken | None:
275273
276274
277275if __name__ == "__main__" :
276+ """Only for testing purposes."""
277+
278278 with open ("../test.ilang" ) as test :
279279 data = test .read ()
280280
0 commit comments