-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMEL.py
More file actions
1282 lines (1101 loc) · 51.9 KB
/
MEL.py
File metadata and controls
1282 lines (1101 loc) · 51.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import re
import math
# --- 1. Lexer (Tokenization) ---
class Token:
"""Represents a lexical token in the MEL language."""
def __init__(self, type, value, line=None, column=None):
self.type = type
self.value = value
self.line = line
self.column = column
def __repr__(self):
return f"Token({self.type}, '{self.value}', line={self.line}, col={self.column})"
def __eq__(self, other):
return self.type == other.type and self.value == other.value
# Define token types and their regular expressions.
# Order matters: longer, more specific patterns should come before shorter, general ones.
TOKEN_SPECIFICATIONS = [
# Program Structure
('PROGRAM_START', r':::\s*Program\s*start'), # Changed from :: to :::
('PROGRAM_END', r';;;\s*Program\s*end'),
# Comments (Pure comments first to consume the whole line)
('COMMENT_PURE', r'>\s*~.*'),
('COMMENT_SINGLE', r'~.*'),
# Literals
('STRING', r'"(?:[^"\\]|\\.)*"'), # Handles escaped quotes and newlines (with re.DOTALL flag)
('ARRAY_START', r'\['), # Changed from ` to [
('ARRAY_END', r'\]'), # Changed from ` to ]
('REAL_NUMBER', r'#+(?::#+)*'), # e.g., ###:#:####
('BINARY_LITERAL_PREFIX', r';;'), # e.g., ;;$$$$$$$$;$$$$$$$
# Operators (Multi-character operators before single-character ones, and before literals)
# CRITICAL: Order operators before literals that might share prefixes!
('REAL_LE_EQ', r'#<='),
('REAL_GE_EQ', r'#>='),
('REAL_NE_EQ', r'#!='),
('BINARY_LE_EQ', r'\$<='),
('BINARY_GE_EQ', r'\$>='),
('BINARY_NE_EQ', r'\$!='),
('REAL_ADD', r'#\+'),
('REAL_SUB', r'#-'),
('REAL_MUL', r'#\*'),
('REAL_DIV', r'#/'),
('REAL_FACTORIAL', r'#!'),
('REAL_SQRT', r'#\\'), # Escaped backslash
('REAL_MOD', r'#%'),
('REAL_POW', r'#\^'),
('BINARY_ADD', r'\$\+'),
('BINARY_SUB', r'\$-'),
('BINARY_MUL', r'\$\*'),
('BINARY_DIV', r'\$/'), # Ensure this comes before binary literals like $$$$$$
('BINARY_FACTORIAL', r'\$!'),
('BINARY_SQRT', r'\$\\'), # Escaped backslash
('BINARY_MOD', r'\$%'),
('BINARY_POW', r'\$\^'),
('REAL_LT', r'#<'),
('REAL_EQ', r'#='),
('REAL_GT', r'#>'),
('BINARY_LT', r'\$<'),
('BINARY_EQ', r'\$='), # Ensure this comes before binary literals like $$$
('BINARY_GT', r'\$>'),
('ASSIGN', r'='),
('UNARY_NEGATE', r'\|'),
# Specific binary literals (after operators that might share prefixes)
('BINARY_BIT_ONE', r'\$\$\$\$\$\$\$\$'), # Single binary digit 1
('BINARY_BIT_ZERO', r'\$\$\$\$\$\$\$'), # Single binary digit 0
('BINARY_ZERO_DECIMAL', r'\$\$\$\$\$\$'), # Decimal 0
('BINARY_EMPTY', r'\$\$\$\$\$'), # Empty, non-existent (e.g., empty list/string)
('BINARY_ABSENCE', r'\$\$\$\$'), # Absence of value (None)
('BINARY_TRUE', r'\$\$\$'), # Boolean true
('BINARY_FALSE', r'\$\$'), # Boolean false
# Input/Output & Function Call Prefix
('INPUT', r'<'),
('OUTPUT_REVERSED', r'>>'), # Longer operator before shorter prefix
('OUTPUT_NORMAL', r'<<'), # Longer operator before shorter prefix
('OUTPUT_CHAR', r'>'), # Shorter prefix for multi-char output
('FUNCTION_CALL_PREFIX', r'->>'), # Also used for array functions
# Control Flow
('TRY', r'%%'),
('CATCH', r'%%%%'),
('IF', r'\?'),
('ELSE_IF', r'\?\?#'),
('ELSE', r'\?\?:'),
('END_LOOP', r'!@!'), # Longer operator before shorter prefix
('END_CONDITIONAL', r'!'), # Shorter prefix for loop end
('WHILE_LOOP', r'@'),
('FOR_LOOP', r'#@#'),
# Functions
('FUNCTION_DEF', r'->'),
('RETURN', r'\^'),
# Delimiters
('COMMA', r','),
('SEMICOLON_DELIMITER', r';'), # Used in binary number construction
('LPAREN', r'\('),
('RPAREN', r'\)'),
('COLON', r':'), # Added for eval-like function call
# Identifiers (variables, function names)
('IDENTIFIER', r'[a-zA-Z_][a-zA-Z0-9_]*'),
# Whitespace (to be ignored)
('WHITESPACE', r'\s+'),
# Catch-all for any remaining character (to be ignored as per user request)
('UNKNOWN', r'.'),
]
class Lexer:
"""Converts MEL source code into a stream of Tokens."""
def __init__(self, text):
self.text = text
self.pos = 0
self.line = 1
self.column = 1
self.tokens = []
def _advance_pos(self, length):
"""Helper to update position, line, and column."""
for _ in range(length):
if self.pos < len(self.text):
if self.text[self.pos] == '\n':
self.line += 1
self.column = 1
else:
self.column += 1
self.pos += 1
def get_tokens(self):
"""Tokenizes the input text and returns a list of Tokens."""
while self.pos < len(self.text):
match = None
current_line = self.line
current_column = self.column
for token_type, pattern in TOKEN_SPECIFICATIONS:
# Use re.DOTALL for STRING pattern to allow newlines
regex_flags = re.DOTALL if token_type == 'STRING' else 0
regex = re.compile(pattern, regex_flags)
m = regex.match(self.text, self.pos)
if m:
value = m.group(0)
if token_type == 'WHITESPACE' or token_type.startswith('COMMENT'):
# Ignore whitespace and comments
pass
elif token_type == 'UNKNOWN':
# Ignore unknown characters as requested
pass
else:
token = Token(token_type, value, current_line, current_column)
self.tokens.append(token)
self._advance_pos(len(value)) # Update position, line, column
match = True
break
if not match:
raise Exception(f"Lexer Error: Unprocessable character '{self.text[self.pos]}' at line {self.line}, column {self.column}")
return self.tokens
# --- 2. Parser (AST Construction) ---
class ASTNode:
"""Base class for all Abstract Syntax Tree nodes."""
def __repr__(self):
# Generic representation for debugging
attrs = ', '.join(f"{k}={getattr(self, k)!r}" for k in self.__dict__ if not k.startswith('_'))
return f"{self.__class__.__name__}({attrs})"
# Specific AST Node Types
class ProgramNode(ASTNode):
def __init__(self, statements):
self.statements = statements
class AssignmentNode(ASTNode):
def __init__(self, var_name_token, value_expr):
self.var_name = var_name_token.value
self.value_expr = value_expr
class StringLiteralNode(ASTNode):
def __init__(self, token):
self.value = token.value[1:-1] # Remove quotes
class BinaryLiteralNode(ASTNode):
def __init__(self, token_value):
# token_value can be '$$', '$$$', ';;$$$$$$$;$$$$$$$$', etc.
self.raw_value = token_value
class RealNumberNode(ASTNode):
def __init__(self, token):
self.raw_value = token.value
class ArrayLiteralNode(ASTNode):
def __init__(self, elements):
self.elements = elements
class IdentifierNode(ASTNode):
def __init__(self, token):
self.name = token.value
class UnaryOpNode(ASTNode):
def __init__(self, op_token, operand_expr):
self.op = op_token.value
self.operand = operand_expr
class BinaryOpNode(ASTNode):
def __init__(self, left_expr, op_token, right_expr):
self.left = left_expr
self.op = op_token.value
self.right = right_expr
class InputNode(ASTNode): # For '<'
def __init__(self, var_name_token):
self.var_name = var_name_token.value
class OutputCharNode(ASTNode): # For '>'
def __init__(self, expr):
self.expression = expr
class MultiCharOutputNode(ASTNode): # For '>> N' and '<< N'
def __init__(self, type, count_expr):
self.type = type # 'reversed' or 'normal'
self.count = count_expr
class FunctionCallNode(ASTNode): # For '->> func_name args' and array functions
def __init__(self, func_name_token, args):
self.func_name = func_name_token.value
self.args = args
class FunctionDefNode(ASTNode): # For '-> func_name args'
def __init__(self, func_name_token, params, body_statements):
self.func_name = func_name_token.value
self.params = params # List of IdentifierNode for parameters
self.body = body_statements
class ReturnNode(ASTNode): # For '^'
def __init__(self, expr=None):
self.expression = expr
class IfStatementNode(ASTNode): # For '?', '??#', '??:', '!'
def __init__(self, condition, if_body, else_if_branches, else_body):
self.condition = condition
self.if_body = if_body
self.else_if_branches = else_if_branches # List of (condition, body) tuples
self.else_body = else_body
class WhileLoopNode(ASTNode): # For '@', '!@!'
def __init__(self, condition, body_statements):
self.condition = condition
self.body = body_statements
class ForLoopNode(ASTNode): # For '#@#', '!@!'
def __init__(self, iterator_var, range_expr, body_statements):
self.iterator_var = iterator_var # IdentifierNode
self.range_expr = range_expr # Expression that evaluates to an iterable (e.g., array)
self.body = body_statements
class TryCatchNode(ASTNode): # For '%%', '%%%%'
def __init__(self, try_body, catch_body, exception_var=None):
self.try_body = try_body
self.catch_body = catch_body
self.exception_var = exception_var # IdentifierNode for exception variable
class ExceptionLiteralNode(ASTNode): # For '(exception_message)'
def __init__(self, message_token):
self.message = message_token.value[1:-1] # Remove parentheses
class Parser:
"""Builds an Abstract Syntax Tree (AST) from a stream of Tokens."""
def __init__(self, tokens):
self.tokens = tokens
self.current_token_index = 0
def _error(self, message):
token = self.current_token()
line = token.line if token else 'EOF'
col = token.column if token else 'EOF'
value = token.value if token else 'EOF'
token_type = token.type if token else 'EOF'
raise Exception(f"Parser Error at line {line}, col {col}: {message}. Got '{value}' ({token_type})")
def current_token(self):
"""Returns the current token or None if end of tokens."""
if self.current_token_index < len(self.tokens):
return self.tokens[self.current_token_index]
return None
def peek_token(self, offset=1):
"""Peeks ahead without consuming tokens."""
if self.current_token_index + offset < len(self.tokens):
return self.tokens[self.current_token_index + offset]
return None
def advance(self):
"""Consumes the current token and moves to the next."""
self.current_token_index += 1
def eat(self, *expected_types):
"""Consumes the current token if its type matches one of the expected types."""
token = self.current_token()
if token and token.type in expected_types:
self.advance()
return token
else:
self._error(f"Expected one of {expected_types}, got {token.type if token else 'EOF'}")
def parse_program(self):
"""Parses the entire MEL program."""
self.eat('PROGRAM_START')
statements = []
while self.current_token() and self.current_token().type != 'PROGRAM_END':
# Skip comments that might appear within the program body
if self.current_token().type in ['COMMENT_SINGLE', 'COMMENT_PURE']:
self.advance()
continue
statements.append(self.parse_statement())
self.eat('PROGRAM_END')
return ProgramNode(statements)
def parse_statement(self):
"""Parses a single statement."""
token = self.current_token()
# Assignment
if token and token.type == 'IDENTIFIER' and self.peek_token() and self.peek_token().type == 'ASSIGN':
statement_node = self.parse_assignment()
# Function Definition
elif token and token.type == 'FUNCTION_DEF':
statement_node = self.parse_function_definition()
# Function Call / Array Function Call (can also be an expression)
elif token and token.type == 'FUNCTION_CALL_PREFIX':
statement_node = self.parse_function_call()
# Output operations
elif token and token.type == 'OUTPUT_CHAR':
statement_node = self.parse_output_char()
elif token and (token.type == 'OUTPUT_REVERSED' or token.type == 'OUTPUT_NORMAL'):
statement_node = self.parse_multi_char_output()
# Input operation
elif token and token.type == 'INPUT':
statement_node = self.parse_input()
# Control Flow
elif token and token.type == 'IF':
statement_node = self.parse_if_statement()
elif token and token.type == 'WHILE_LOOP':
statement_node = self.parse_while_loop()
elif token and token.type == 'FOR_LOOP':
statement_node = self.parse_for_loop()
elif token and token.type == 'TRY':
statement_node = self.parse_try_catch()
# Return statement
elif token and token.type == 'RETURN':
statement_node = self.parse_return()
# Exception Literal (can appear as a statement if not part of a try/catch)
elif token and token.type == 'LPAREN' and self.peek_token() and self.peek_token().type in ['IDENTIFIER', 'STRING'] and self.peek_token(2) and self.peek_token(2).type == 'RPAREN':
statement_node = self.parse_exception_literal()
else:
self._error(f"Unexpected token for statement: '{token.value}' ({token.type})")
# After parsing a statement, consume any inline comments on the same line.
# This assumes statements are implicitly terminated by newlines or comments.
# Check if the next token is on the same line and is a comment.
while self.current_token() and self.current_token().line == token.line and \
self.current_token().type in ['COMMENT_SINGLE', 'COMMENT_PURE']:
self.advance()
return statement_node
def parse_assignment(self):
"""Parses an assignment statement: IDENTIFIER = EXPRESSION."""
var_name_token = self.eat('IDENTIFIER')
self.eat('ASSIGN')
value_expr = self.parse_expression()
return AssignmentNode(var_name_token, value_expr)
def parse_expression(self):
"""
Parses an expression, handling operator precedence.
This is a simplified implementation for demonstration.
A full parser would use a more robust precedence climbing algorithm.
For now, it handles unary ops and then binary ops from left to right.
"""
expr = self._parse_comparison_expression()
return expr
def _parse_comparison_expression(self):
"""Parses comparison expressions (lowest precedence)."""
node = self._parse_arithmetic_expression()
while self.current_token() and self.current_token().type in [
'REAL_LT', 'REAL_EQ', 'REAL_GT', 'REAL_LE_EQ', 'REAL_GE_EQ', 'REAL_NE_EQ',
'BINARY_LT', 'BINARY_EQ', 'BINARY_GT', 'BINARY_LE_EQ', 'BINARY_GE_EQ', 'BINARY_NE_EQ'
]:
op_token = self.current_token()
self.advance()
right = self._parse_arithmetic_expression()
node = BinaryOpNode(node, op_token, right)
return node
def _parse_arithmetic_expression(self):
"""Parses arithmetic expressions (higher precedence than comparison)."""
node = self._parse_term() # Handles multiplication/division first
while self.current_token() and self.current_token().type in [
'REAL_ADD', 'REAL_SUB', 'BINARY_ADD', 'BINARY_SUB'
]:
op_token = self.current_token()
self.advance()
right = self._parse_term()
node = BinaryOpNode(node, op_token, right)
return node
def _parse_term(self):
"""Parses terms (multiplication, division, modulo, factorial, sqrt, power)."""
node = self._parse_unary_expression()
while self.current_token() and self.current_token().type in [
'REAL_MUL', 'REAL_DIV', 'REAL_MOD', 'REAL_FACTORIAL', 'REAL_SQRT', 'REAL_POW',
'BINARY_MUL', 'BINARY_DIV', 'BINARY_MOD', 'BINARY_FACTORIAL', 'BINARY_SQRT', 'BINARY_POW'
]:
op_token = self.current_token()
self.advance()
# Factorial/Sqrt/Power are unary-like on the right, but here treated as binary for simplicity
# A more robust parser would handle these as part of unary or post-fix ops
if op_token.type in ['REAL_FACTORIAL', 'BINARY_FACTORIAL', 'REAL_SQRT', 'BINARY_SQRT', 'REAL_POW', 'BINARY_POW']:
# For simplicity, these are treated as binary ops requiring a right operand.
# In MEL, they are likely postfix unary, but current grammar treats them as infix binary.
right = self._parse_unary_expression()
node = BinaryOpNode(node, op_token, right)
else:
right = self._parse_unary_expression()
node = BinaryOpNode(node, op_token, right)
return node
def _parse_unary_expression(self):
"""Parses unary expressions (e.g., |NUMBER)."""
token = self.current_token()
if token and token.type == 'UNARY_NEGATE':
op_token = self.current_token()
self.advance()
operand = self._parse_primary_expression()
return UnaryOpNode(op_token, operand)
return self._parse_primary_expression()
def _parse_primary_expression(self):
"""Parses basic literals, identifiers, or parenthesized expressions."""
token = self.current_token()
if token and token.type == 'STRING':
self.eat('STRING')
return StringLiteralNode(token)
elif token and token.type == 'REAL_NUMBER':
self.eat('REAL_NUMBER')
return RealNumberNode(token)
elif token and token.type.startswith('BINARY_'): # All binary literals
# Handle multi-bit binary numbers: ;;$$$$$$$;$$$$$$$$
if token.type == 'BINARY_LITERAL_PREFIX':
self.eat('BINARY_LITERAL_PREFIX') # Consume ';;'
bit_sequence_tokens = []
while self.current_token() and self.current_token().type in ['BINARY_BIT_ZERO', 'BINARY_BIT_ONE']:
bit_sequence_tokens.append(self.eat(self.current_token().type))
if self.current_token() and self.current_token().type == 'SEMICOLON_DELIMITER':
self.eat('SEMICOLON_DELIMITER') # Consume semicolon
return BinaryLiteralNode(';;' + ';'.join([t.value for t in bit_sequence_tokens]))
else: # All other single binary literals ($$, $$$, etc.)
self.eat(token.type) # Consume the specific binary token
return BinaryLiteralNode(token.value)
elif token and token.type == 'IDENTIFIER':
self.eat('IDENTIFIER')
return IdentifierNode(token)
elif token and token.type == 'ARRAY_START':
return self.parse_array_literal()
elif token and token.type == 'LPAREN':
# Check if it's an exception literal or a parenthesized expression
# Exception literal: (IDENTIFIER) or (STRING)
if self.peek_token() and self.peek_token().type in ['IDENTIFIER', 'STRING'] and \
self.peek_token(2) and self.peek_token(2).type == 'RPAREN':
return self.parse_exception_literal()
else: # Standard parenthesized expression
self.eat('LPAREN')
expr = self.parse_expression()
self.eat('RPAREN')
return expr
elif token and token.type == 'FUNCTION_CALL_PREFIX': # Added this case for function calls as expressions
return self.parse_function_call()
else:
self._error(f"Unexpected token for primary expression: '{token.value}' ({token.type})")
def parse_array_literal(self):
"""Parses an array literal: [item1, item2, ...] (using [ and ])."""
self.eat('ARRAY_START')
elements = []
if self.current_token() and self.current_token().type != 'ARRAY_END':
elements.append(self.parse_expression())
while self.current_token() and self.current_token().type == 'COMMA':
self.eat('COMMA')
elements.append(self.parse_expression())
self.eat('ARRAY_END')
return ArrayLiteralNode(elements)
def parse_input(self):
"""Parses an input statement: < IDENTIFIER."""
self.eat('INPUT')
var_name_token = self.eat('IDENTIFIER')
return InputNode(var_name_token)
def parse_output_char(self):
"""Parses single character output: > EXPRESSION."""
self.eat('OUTPUT_CHAR')
expr = self.parse_expression()
return OutputCharNode(expr)
def parse_multi_char_output(self):
"""Parses multi-character output: >> N or << N."""
output_type_token = self.eat('OUTPUT_REVERSED', 'OUTPUT_NORMAL')
expr = self.parse_expression() # This should be the count
return MultiCharOutputNode(output_type_token.type, expr)
def parse_function_call(self):
"""Parses a function call: ->> IDENTIFIER args... or ->> : args..."""
self.eat('FUNCTION_CALL_PREFIX')
func_name_token = None
# Special handling for '->> :' (eval-like functionality)
if self.current_token() and self.current_token().type == 'COLON':
func_name_token = self.eat('COLON') # Consume the ':' token
else:
func_name_token = self.eat('IDENTIFIER') # Regular function name
args = []
# Arguments are space-separated expressions until end of line or next statement
# This is a heuristic and might need refinement for complex cases
# Stop at tokens that are clearly not part of an expression or are statement delimiters
expression_end_tokens = [
'PROGRAM_END', 'COMMENT_SINGLE', 'COMMENT_PURE',
'ASSIGN', 'INPUT', 'OUTPUT_CHAR', 'OUTPUT_REVERSED', 'OUTPUT_NORMAL',
'FUNCTION_DEF', 'FUNCTION_CALL_PREFIX', 'RETURN', 'TRY',
'IF', 'ELSE_IF', 'ELSE', 'END_CONDITIONAL',
'WHILE_LOOP', 'FOR_LOOP', 'END_LOOP', 'CATCH',
'COMMA', 'SEMICOLON_DELIMITER', 'RPAREN', 'ARRAY_END', 'ARRAY_START', 'LPAREN', 'COLON'
]
while self.current_token() and self.current_token().type not in expression_end_tokens:
try:
arg_expr = self.parse_expression()
args.append(arg_expr)
except Exception as e:
# If it's a parser error that means the current token is not an expression part,
# then we stop collecting arguments.
if "Parser Error" in str(e):
break
else:
raise # Re-raise other exceptions
return FunctionCallNode(func_name_token, args)
def parse_function_definition(self):
"""Parses a function definition: -> IDENTIFIER args BODY."""
self.eat('FUNCTION_DEF')
func_name_token = self.eat('IDENTIFIER')
params = []
# Parameters are identifiers, space-separated
while self.current_token() and self.current_token().type == 'IDENTIFIER':
params.append(IdentifierNode(self.eat('IDENTIFIER')))
# Function body is a block of statements.
# It ends at a RETURN, another FUNCTION_DEF, or PROGRAM_END.
body_statements = self._parse_block(
end_tokens=['RETURN', 'FUNCTION_DEF', 'PROGRAM_END', 'IF', 'WHILE_LOOP', 'FOR_LOOP', 'TRY', 'CATCH']
)
return FunctionDefNode(func_name_token, params, body_statements)
def parse_return(self):
"""Parses a return statement: ^ [EXPRESSION]."""
self.eat('RETURN')
expr = None
# Check if there's an expression after '^'
# A return expression can be followed by comments or end of program
if self.current_token() and self.current_token().type not in ['PROGRAM_END', 'COMMENT_SINGLE', 'COMMENT_PURE']:
try:
expr = self.parse_expression()
except Exception:
pass # No expression, just a bare return
return ReturnNode(expr)
def parse_if_statement(self):
"""Parses an If-Else If-Else block."""
self.eat('IF')
condition = self.parse_expression()
if_body = self._parse_block(
# Removed 'IF' from end_tokens for if_body, as it should be a new statement
end_tokens=['END_CONDITIONAL', 'ELSE_IF', 'ELSE', 'PROGRAM_END', 'FUNCTION_DEF', 'RETURN', 'TRY', 'WHILE_LOOP', 'FOR_LOOP', 'CATCH']
)
else_if_branches = []
while self.current_token() and self.current_token().type == 'ELSE_IF':
self.eat('ELSE_IF')
else_if_condition = self.parse_expression()
else_if_body = self._parse_block(
# Removed 'IF' from end_tokens for else_if_body
end_tokens=['END_CONDITIONAL', 'ELSE_IF', 'ELSE', 'PROGRAM_END', 'FUNCTION_DEF', 'RETURN', 'TRY', 'WHILE_LOOP', 'FOR_LOOP', 'CATCH']
)
else_if_branches.append((else_if_condition, else_if_body))
else_body = None
if self.current_token() and self.current_token().type == 'ELSE':
self.eat('ELSE')
else_body = self._parse_block(
# Removed 'IF' from end_tokens for else_body
end_tokens=['END_CONDITIONAL', 'PROGRAM_END', 'FUNCTION_DEF', 'RETURN', 'TRY', 'WHILE_LOOP', 'FOR_LOOP', 'CATCH']
)
self.eat('END_CONDITIONAL')
return IfStatementNode(condition, if_body, else_if_branches, else_body)
def _parse_block(self, end_tokens=None):
"""
Helper to parse a block of statements until a specific end token.
This is crucial for control flow (if, loops, try/catch) and functions.
It stops at tokens that mark the end of a block or the start of a new top-level construct.
"""
if end_tokens is None:
# Default end tokens for general blocks (e.g., top-level statements)
end_tokens = ['PROGRAM_END']
block_statements = []
while self.current_token() and self.current_token().type not in end_tokens:
# Skip comments within blocks
if self.current_token().type in ['COMMENT_SINGLE', 'COMMENT_PURE']:
self.advance()
continue
# If we hit PROGRAM_END, it's an error if it's not in end_tokens for this block
if self.current_token().type == 'PROGRAM_END' and 'PROGRAM_END' not in end_tokens:
self._error("Unexpected end of program within a block.")
# Attempt to parse a statement. If it fails, it might be an unexpected token
# that's not explicitly in end_tokens, or a malformed statement.
try:
statement = self.parse_statement()
block_statements.append(statement)
except Exception as e:
# If the error is not due to hitting an end_token, then it's a real syntax error
if self.current_token() and self.current_token().type not in end_tokens:
self._error(f"Error parsing block statement: {e}")
else:
# We hit an end_token, so we break the loop. This is expected.
break
return block_statements
def parse_while_loop(self):
"""Parses a While loop: @ CONDITION BODY !@!."""
self.eat('WHILE_LOOP')
condition = self.parse_expression()
body = self._parse_block(
end_tokens=['END_LOOP', 'PROGRAM_END', 'FUNCTION_DEF', 'RETURN', 'TRY', 'IF', 'WHILE_LOOP', 'FOR_LOOP', 'CATCH']
)
self.eat('END_LOOP')
return WhileLoopNode(condition, body)
def parse_for_loop(self):
"""Parses a For loop: #@# IDENTIFIER EXPRESSION BODY !@!."""
self.eat('FOR_LOOP')
iterator_var = self.eat('IDENTIFIER')
range_expr = self.parse_expression() # This should evaluate to an array
body = self._parse_block(
end_tokens=['END_LOOP', 'PROGRAM_END', 'FUNCTION_DEF', 'RETURN', 'TRY', 'IF', 'WHILE_LOOP', 'FOR_LOOP', 'CATCH']
)
self.eat('END_LOOP')
return ForLoopNode(IdentifierNode(iterator_var), range_expr, body)
def parse_try_catch(self):
"""Parses a Try-Catch block: %% BODY %%%% [IDENTIFIER] BODY."""
self.eat('TRY')
try_body = self._parse_block(
end_tokens=['CATCH', 'PROGRAM_END', 'FUNCTION_DEF', 'RETURN', 'TRY', 'IF', 'WHILE_LOOP', 'FOR_LOOP']
)
self.eat('CATCH')
exception_var = None # Not explicitly supported in MEL syntax for now
catch_body = self._parse_block(
end_tokens=['PROGRAM_END', 'FUNCTION_DEF', 'RETURN', 'TRY', 'IF', 'WHILE_LOOP', 'FOR_LOOP']
)
return TryCatchNode(try_body, catch_body, exception_var)
def parse_exception_literal(self):
"""Parses an exception literal: (exception_message)."""
self.eat('LPAREN')
# Message can be an identifier or a string literal
message_token = self.eat('IDENTIFIER', 'STRING')
self.eat('RPAREN')
return ExceptionLiteralNode(message_token)
def parse(self):
"""Starts the parsing process."""
return self.parse_program()
# --- 3. Interpreter (Execution) ---
class MELRuntimeError(Exception):
"""Custom exception for MEL runtime errors."""
pass
class Environment:
"""Manages variable scopes."""
def __init__(self, parent=None):
self.values = {}
self.parent = parent
def define(self, name, value):
"""Defines a new variable in the current scope."""
self.values[name] = value
def assign(self, name, value):
"""Assigns a value to an existing variable, searching up the scope chain."""
if name in self.values:
self.values[name] = value
return
if self.parent:
self.parent.assign(name, value)
return
raise MELRuntimeError(f"Undefined variable '{name}'")
def get(self, name):
"""Retrieves a variable's value, searching up the scope chain."""
if name in self.values:
return self.values[name]
if self.parent:
return self.parent.get(name)
raise MELRuntimeError(f"Undefined variable '{name}'")
class Interpreter:
"""Executes the MEL Abstract Syntax Tree."""
def __init__(self):
self.global_env = Environment()
self.current_env = self.global_env
self.output_buffer = [] # Stores recently printed characters for >> and <<
self.functions = {} # Stores defined functions: {name: FunctionDefNode}
def _visit(self, node):
"""Dispatches to the appropriate visit method based on node type."""
method_name = f'visit_{type(node).__name__}'
visitor = getattr(self, method_name, self._generic_visit)
return visitor(node)
def _generic_visit(self, node):
raise MELRuntimeError(f"No visit method for node type: {type(node).__name__}")
def interpret(self, ast):
"""Starts the interpretation process from the root AST node."""
self._visit(ast)
def visit_ProgramNode(self, node):
for statement in node.statements:
self._visit(statement)
def visit_AssignmentNode(self, node):
value = self._visit(node.value_expr)
self.current_env.define(node.var_name, value)
def visit_StringLiteralNode(self, node):
return node.value
def visit_BinaryLiteralNode(self, node):
raw_value = node.raw_value
if raw_value == '$$': return False
if raw_value == '$$$': return True
if raw_value == '$$$$': return None
if raw_value == '$$$$$': return [] # Represents "empty, non-existent" as an empty list/array
if raw_value == '$$$$$$': return 0
if raw_value == '$$$$$$$': return 0 # Single binary 0
if raw_value == '$$$$$$$$': return 1 # Single binary 1
# Handle multi-bit binary numbers: ;;$$$$$$$;$$$$$$$$
if raw_value.startswith(';;'):
# Reconstruct the bit string from raw_value (e.g., ';;$$$$$$$$;$$$$$$$')
# Split by ';' and map to '0' or '1'
bit_parts = raw_value[2:].split(';')
bits = ''.join(['0' if b == '$$$$$$$' else '1' for b in bit_parts if b]) # Filter empty strings from split
if not bits: # Handle cases like `;;`
return 0
return int(bits, 2)
raise MELRuntimeError(f"Unknown binary literal: {raw_value}")
def visit_RealNumberNode(self, node):
parts = node.raw_value.split(":")
# Convert # to digit length, then join for float
digits = [str(len(part)) for part in parts]
return float(".".join([digits[0]] + digits[1:]))
def visit_ArrayLiteralNode(self, node):
return [self._visit(elem) for elem in node.elements]
def visit_IdentifierNode(self, node):
return self.current_env.get(node.name)
def visit_UnaryOpNode(self, node):
operand_val = self._visit(node.operand)
if node.op == '|': # Negate number
if isinstance(operand_val, (int, float)):
return -operand_val
raise MELRuntimeError(f"Unary negate operator '|' expects a number, got {type(operand_val)}")
raise MELRuntimeError(f"Unknown unary operator: {node.op}")
def visit_BinaryOpNode(self, node):
left_val = self._visit(node.left)
right_val = self._visit(node.right)
op = node.op
# Helper for type checking
def check_types(l, r, expected_type_str):
if not isinstance(l, (int, float)) or not isinstance(r, (int, float)):
raise MELRuntimeError(f"Binary operator '{op}' expects {expected_type_str} numbers, got {type(l)} and {type(r)}")
if op.startswith('$'): # Binary operators
check_types(left_val, right_val, "binary")
if not isinstance(left_val, int) or not isinstance(right_val, int):
raise MELRuntimeError(f"Binary operator '{op}' expects integers, got {type(left_val)} and {type(right_val)}")
if op == '$+': return left_val + right_val
if op == '$-': return left_val - right_val
if op == '$*': return left_val * right_val
if op == '$/':
if right_val == 0: raise MELRuntimeError("( $$$$$$/$$$$$$ ) Binary division by zero")
return left_val // right_val # Integer division for binary
if op == '$!': # Factorial (right_val is ignored, it's unary-like)
if left_val < 0: raise MELRuntimeError("( |#...#! ) Binary factorial of negative number")
res = 1
for i in range(1, left_val + 1): res *= i
return res
if op == '$\\': # Square root (right_val is ignored, it's unary-like)
if left_val < 0: raise MELRuntimeError("( #\\|#... ) Binary square root of negative number")
return int(left_val**0.5) # Integer result for binary sqrt
if op == '$%':
if right_val == 0: raise MELRuntimeError("Binary modulo by zero")
return left_val % right_val
if op == '$^': return left_val ** right_val
# Binary Comparison
if op == '$<': return left_val < right_val
if op == '$=': return left_val == right_val
if op == '$>': return left_val > right_val
if op == '$<=': return left_val <= right_val
if op == '$>=': return left_val >= right_val
if op == '$!=': return left_val != right_val
elif op.startswith('#'): # Real operators
check_types(left_val, right_val, "real")
if op == '#+': return left_val + right_val
if op == '#-': return left_val - right_val
if op == '#*': return left_val * right_val
if op == '#/':
if right_val == 0: raise MELRuntimeError("( $$$$$$/$$$$$$ ) Real division by zero")
return left_val / right_val # Float division for real
if op == '#!': # Factorial (right_val is ignored, it's unary-like)
if left_val < 0: raise MELRuntimeError("( |#...#! ) Real factorial of negative number")
# Factorial for floats is complex (Gamma function). For simplicity, only integer part.
res = 1.0
for i in range(1, int(left_val) + 1): res *= i
return res
if op == '#\\': # Square root (right_val is ignored, it's unary-like)
if left_val < 0: raise MELRuntimeError("( #\\|#... ) Real square root of negative number")
return left_val**0.5
if op == '#%':
if right_val == 0: raise MELRuntimeError("Real modulo by zero")
return left_val % right_val
if op == '#^': return left_val ** right_val
# Real Comparison
if op == '#<': return left_val < right_val
if op == '#=': return left_val == right_val
if op == '#>': return left_val > right_val
if op == '#<=': return left_val <= right_val
if op == '#>=': return left_val >= right_val
if op == '#!=': return left_val != right_val
raise MELRuntimeError(f"Unknown binary operator: {op}")
def visit_InputNode(self, node):
input_value = input(f"Enter value for '{node.var_name}': ")
self.current_env.assign(node.var_name, input_value) # Store as string, conversion handled by eval_expr if needed
def visit_OutputCharNode(self, node):
value = self._visit(node.expression)
if isinstance(value, str) and len(value) > 0:
char_to_print = value[0] # Confirmed: prints only the first character
elif isinstance(value, (int, float)):
# Convert number to character (e.g., ASCII)
try:
char_to_print = chr(int(value))
except ValueError:
raise MELRuntimeError(f"Cannot convert number {value} to a character (out of ASCII range).")
else:
# For booleans, None, etc., convert to string and take first char
char_to_print = str(value)[0] if len(str(value)) > 0 else ''
self.output_buffer.append(char_to_print)
print(char_to_print, end="")
def visit_MultiCharOutputNode(self, node):
count = self._visit(node.count)
if not isinstance(count, int) or count < 0:
raise MELRuntimeError(f"Multi-character output count must be a non-negative integer, got {count}")
if count > len(self.output_buffer):
raise MELRuntimeError(f"Cannot retrieve {count} characters, only {len(self.output_buffer)} available in buffer.")
chars_to_print = self.output_buffer[-count:]
if node.type == 'OUTPUT_REVERSED': # >> N
print("".join(reversed(chars_to_print)), end="")
else: # << N
print("".join(chars_to_print), end="")
# Clear printed characters from buffer
self.output_buffer = self.output_buffer[:-count]
def visit_FunctionCallNode(self, node):
func_name = node.func_name
args_values = [self._visit(arg) for arg in node.args]
# Handle built-in array/string functions
if func_name == 'append':
if len(args_values) < 2: raise MELRuntimeError("append requires at least array_variable_name and one item")
array_var_name = args_values[0] # Assuming first arg is variable name string
if not isinstance(array_var_name, str): raise MELRuntimeError("append: first argument must be array variable name (string)")
array = self.current_env.get(array_var_name)
if not isinstance(array, list): raise MELRuntimeError(f"append: '{array_var_name}' is not an array")
array.extend(args_values[1:])
self.current_env.assign(array_var_name, array) # Update the array in scope
return None
elif func_name == 'length':
if len(args_values) != 1: raise MELRuntimeError("length requires one argument")
val = args_values[0]
if isinstance(val, (str, list)):
return len(val)
raise MELRuntimeError(f"length expects string or array, got {type(val)}")
elif func_name == 'getitem':
if len(args_values) != 2: raise MELRuntimeError("getitem requires two arguments: array/string and index")
collection = args_values[0]
index = args_values[1]
if not isinstance(index, int): raise MELRuntimeError("getitem index must be an integer")
if not isinstance(collection, (str, list)): raise MELRuntimeError("getitem expects string or array")
if not (1 <= index <= len(collection)): raise MELRuntimeError("getitem index out of bounds (1-based)")
return collection[index - 1] # MEL is 1-based indexing
elif func_name == 'list':
if len(args_values) != 1: raise MELRuntimeError("list requires one argument")
val = args_values[0]
if isinstance(val, (str, list)):
print(val, end="") # Prints the Python representation of the list/string
else:
raise MELRuntimeError(f"list expects string or array, got {type(val)}")
return None
elif func_name == 'reverse':
if len(args_values) != 1: raise MELRuntimeError("reverse requires one argument")
val = args_values[0]
if isinstance(val, str):
return val[::-1]
elif isinstance(val, list):
return val[::-1] # Returns a new reversed list
raise MELRuntimeError(f"reverse expects string or array, got {type(val)}")
elif func_name == ':': # Special case for `->> : Run MEL string code`
if len(args_values) != 1 or not isinstance(args_values[0], str):
raise MELRuntimeError("Run MEL string code (->> :) expects a single string argument.")
embedded_code = args_values[0]
try:
# Create a new lexer/parser/interpreter for the embedded code
# This will run in its own isolated environment (global scope)
sub_lexer = Lexer(embedded_code)
sub_tokens = sub_lexer.get_tokens()
sub_parser = Parser(sub_tokens)
sub_ast = sub_parser.parse()
sub_interpreter = Interpreter()
sub_interpreter.interpret(sub_ast)
return None
except Exception as e:
raise MELRuntimeError(f"Error in embedded MEL code: {e}")
# Handle user-defined functions
if func_name not in self.functions:
raise MELRuntimeError(f"Undefined function '{func_name}'")
func_def_node = self.functions[func_name]
if len(args_values) != len(func_def_node.params):
raise MELRuntimeError(f"Function '{func_name}' expects {len(func_def_node.params)} arguments, got {len(args_values)}")
# Create a new scope for the function call
previous_env = self.current_env
self.current_env = Environment(previous_env)