-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathintermediate.py
More file actions
404 lines (311 loc) · 10.9 KB
/
intermediate.py
File metadata and controls
404 lines (311 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
"""
PHASE 4: INTERMEDIATE CODE GENERATOR
What is Intermediate Code?
---------------------------
Intermediate code is a representation between high-level source code
and low-level machine code.
Think of it as a "middle language" that's:
- Lower level than source code (simpler operations)
- Higher level than assembly (still somewhat readable)
- Platform independent (not tied to specific CPU)
Why do we need it?
------------------
1. **Optimization**: Easier to optimize than source code or machine code
2. **Portability**: One frontend can target multiple backends
3. **Simplicity**: Each operation is simple and explicit
Example:
Source: y = x + 10 * 2;
Intermediate Code (Three Address Code):
t1 = 10 * 2
t2 = x + t1
y = t2
Notice:
- Each line has at most one operator
- Temporary variables (t1, t2) store intermediate results
- Order of operations is explicit
Three Address Code (TAC)
-------------------------
The most common intermediate representation.
Format: result = operand1 operator operand2
Examples:
t1 = a + b (addition)
t2 = t1 * c (multiplication)
x = t2 (assignment)
print t2 (output)
Why "Three Address"?
- At most 3 addresses (variables/temps) per instruction
- result, operand1, operand2
Real-world examples:
- Java bytecode (for JVM)
- LLVM IR (used by Clang, Swift, Rust)
- .NET CIL (Common Intermediate Language)
Our TAC is simpler but follows the same principles!
"""
from ast_parser import (
ProgramNode, AssignmentNode, PrintNode,
BinaryOpNode, NumberNode, VariableNode
)
class TACInstruction:
"""
A single Three Address Code instruction
Format: result = arg1 op arg2
Examples:
- TACInstruction('t1', '5', '+', '3') → t1 = 5 + 3
- TACInstruction('x', 't1', None, None) → x = t1
- TACInstruction(None, 'x', 'print', None) → print x
"""
def __init__(self, result, arg1, op, arg2):
"""
Create a TAC instruction
Args:
result: Where to store the result (variable or temp)
arg1: First operand
op: Operator (+, -, *, /, or special like 'print')
arg2: Second operand (None for unary operations)
"""
self.result = result
self.arg1 = arg1
self.op = op
self.arg2 = arg2
def __repr__(self):
"""
Pretty print the instruction
Different formats for different instruction types
"""
if self.op == 'print':
return f"print {self.arg1}"
elif self.op is None:
# Simple assignment: x = y
return f"{self.result} = {self.arg1}"
else:
# Binary operation: t1 = a + b
return f"{self.result} = {self.arg1} {self.op} {self.arg2}"
class IntermediateCodeGenerator:
"""
Generates Three Address Code from AST
Process:
1. Walk through AST (like semantic analyzer)
2. For each operation, generate TAC instructions
3. Use temporary variables for intermediate results
Example:
AST for "y = (x + 5) * 2":
Assignment(y)
|
BinOp(*)
/ \
BinOp(+) 2
/ \
x 5
Generated TAC:
t1 = x + 5
t2 = t1 * 2
y = t2
"""
def __init__(self):
"""Initialize the code generator"""
self.instructions = [] # List of TACInstruction objects
self.temp_counter = 0 # Counter for generating unique temp names
def new_temp(self):
"""
Generate a new temporary variable name
Temps are named t0, t1, t2, ...
Why temporaries?
- Store intermediate results
- Make each instruction simple (one operation)
Example: x + y * z
- Can't do in one instruction (two operations: * and +)
- Need temp: t1 = y * z; t2 = x + t1
Returns:
A unique temporary variable name
"""
temp_name = f"t{self.temp_counter}"
self.temp_counter += 1
return temp_name
def emit(self, result, arg1, op, arg2):
"""
Emit (generate) a TAC instruction
Args:
result: Result variable
arg1: First operand
op: Operator
arg2: Second operand
"""
instruction = TACInstruction(result, arg1, op, arg2)
self.instructions.append(instruction)
print(f" Generated: {instruction}")
def generate(self, ast):
"""
Main entry point for code generation
Args:
ast: The root of the AST
Returns:
List of TAC instructions
"""
print("\n⚙️ Starting Intermediate Code Generation...")
print("-" * 60)
self.visit(ast)
print("-" * 60)
print(f"\n✅ Generated {len(self.instructions)} TAC instructions")
return self.instructions
def visit(self, node):
"""
Visit a node (Visitor Pattern, like semantic analyzer)
Returns:
The variable/temp that holds the result of this node
"""
method_name = f'visit_{type(node).__name__}'
visitor = getattr(self, method_name, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
"""Fallback for unhandled nodes"""
raise Exception(f'No visit method for {type(node).__name__}')
# ========================================================================
# VISITOR METHODS
# ========================================================================
def visit_ProgramNode(self, node):
"""
Visit program node
Just visit all statements
"""
for statement in node.statements:
self.visit(statement)
def visit_AssignmentNode(self, node):
"""
Visit assignment: x = expression;
Steps:
1. Generate code for the expression (right side)
2. This returns a temp holding the result
3. Assign that temp to the variable
Example: y = x + 5
AST:
Assignment(y)
|
BinOp(+, x, 5)
Generated TAC:
t0 = x + 5 (from visiting BinOp)
y = t0 (assignment)
"""
print(f" Processing assignment to '{node.variable_name}'")
# Generate code for expression, get result temp
expr_result = self.visit(node.expression)
# Assign to variable
self.emit(node.variable_name, expr_result, None, None)
def visit_PrintNode(self, node):
"""
Visit print statement: print(expression);
Steps:
1. Generate code for expression
2. Emit print instruction
Example: print(x + 5)
Generated TAC:
t0 = x + 5
print t0
"""
print(" Processing print statement")
# Generate code for expression
expr_result = self.visit(node.expression)
# Emit print instruction
self.emit(None, expr_result, 'print', None)
def visit_BinaryOpNode(self, node):
"""
Visit binary operation: left op right
Steps:
1. Generate code for left operand
2. Generate code for right operand
3. Generate instruction to combine them
4. Return temp holding result
Example: x + y * 2
AST:
BinOp(+)
/ \
x BinOp(*)
/ \
y 2
Generated TAC:
t0 = y * 2 (from visiting right subtree)
t1 = x + t0 (combining left and right)
Returns:
Temp variable holding the result (t1 in example)
"""
# Generate code for left operand
left_result = self.visit(node.left)
# Generate code for right operand
right_result = self.visit(node.right)
# Create temp for result
result_temp = self.new_temp()
# Emit instruction
self.emit(result_temp, left_result, node.operator, right_result)
# Return the temp holding result
return result_temp
def visit_NumberNode(self, node):
"""
Visit number literal: 42
Numbers are used directly, no code generation needed.
Just return the number as a string.
Returns:
The number as a string
"""
return str(node.value)
def visit_VariableNode(self, node):
"""
Visit variable reference: x
Variables are used directly, no code generation needed.
Just return the variable name.
Returns:
The variable name
"""
return node.name
def print_tac(instructions):
"""
Pretty print Three Address Code
Shows the TAC in a readable format with line numbers
"""
print("\n📋 Three Address Code (TAC):")
print("=" * 60)
if not instructions:
print("(no instructions)")
return
for i, instr in enumerate(instructions):
print(f"{i:3d}: {instr}")
print("=" * 60)
def demo_intermediate_code():
"""
Demonstration of intermediate code generation
"""
print("=" * 60)
print("PHASE 4: INTERMEDIATE CODE GENERATION DEMO")
print("=" * 60)
from lexer import Lexer
from ast_parser import Parser
# Example program
source_code = """
x = 5;
y = x + 10;
z = x * y + 2;
print(z);
"""
print("\n📝 Source Code:")
print(source_code)
# Compile to AST
print("\n🔄 Compiling to AST...")
lexer = Lexer(source_code)
tokens = lexer.tokenize()
parser = Parser(tokens)
ast = parser.parse()
# Generate intermediate code
generator = IntermediateCodeGenerator()
tac_instructions = generator.generate(ast)
# Display TAC
print_tac(tac_instructions)
print("\n💡 What happened?")
print(" - AST was converted to Three Address Code (TAC)")
print(" - Each instruction has at most one operation")
print(" - Temporary variables (t0, t1, ...) store intermediate results")
print(" - TAC is easier to optimize and translate to machine code")
print(" - Notice how 'z = x * y + 2' became multiple instructions:")
print(" - First: t2 = x * y")
print(" - Then: t3 = t2 + 2")
print(" - Finally: z = t3")
print(" - This makes the order of operations explicit!\n")
if __name__ == "__main__":
demo_intermediate_code()