From 7a074a59b27a61656f3cc54c6b4b4363cacda2be Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:08:25 +0000 Subject: [PATCH 01/10] Implement function parameters, logical operators (&&, ||), and loops (while, for) - Add While, For, Break, Continue AST nodes to support loop constructs - Implement function parameter parsing in parser with Parameter struct - Add logical operator precedence handling for && and || operators - Implement loop statement parsing for while, for, break, continue - Add IR generation for loop control flow with proper label management - Implement short-circuit evaluation for logical operators in IR - Add comprehensive unit and integration tests for all new features - Support nested loops and complex logical expressions - All tests pass (58 unit + 26 integration tests) Co-Authored-By: Valentin Millet --- src/ir/generator.rs | 224 +++++++++++++++-- src/parser/ast.rs | 12 + src/parser/parser.rs | 419 +++++++++++++++++++++++++++++++- src/semantic/lifetime_simple.rs | 22 ++ tests/integration_tests.rs | 222 +++++++++++++++++ 5 files changed, 881 insertions(+), 18 deletions(-) diff --git a/src/ir/generator.rs b/src/ir/generator.rs index 1912ae7..4a69573 100644 --- a/src/ir/generator.rs +++ b/src/ir/generator.rs @@ -10,6 +10,7 @@ pub enum IrGeneratorError { UnsupportedUnaryOperator(TokenType), ComplexFunctionCallsNotSupported, InvalidBinaryOperator(TokenType), + UnsupportedConstruct(String), } /// IR Generator - converts AST to IR @@ -26,6 +27,13 @@ pub struct IrGenerator { string_label_counter: usize, local_types: HashMap, type_checker: TypeChecker, + loop_stack: Vec, +} + +#[derive(Debug, Clone)] +struct LoopContext { + continue_label: String, + break_label: String, } impl IrGenerator { @@ -38,6 +46,7 @@ impl IrGenerator { string_label_counter: 0, local_types: HashMap::new(), type_checker: TypeChecker::new(), + loop_stack: Vec::new(), } } @@ -335,6 +344,109 @@ impl IrGenerator { // Functions are handled at the top level return Err(IrGeneratorError::NestedFunctionsNotSupported); } + + Stmt::While { condition, body } => { + let loop_start = self.new_label("loop_start"); + let loop_end = self.new_label("loop_end"); + + self.loop_stack.push(LoopContext { + continue_label: loop_start.clone(), + break_label: loop_end.clone(), + }); + + self.emit_instruction(IrInstruction::Label { name: loop_start.clone() }); + + // Evaluate condition + let condition_val = self.generate_expr(condition); + + self.emit_instruction(IrInstruction::Branch { + condition: condition_val, + true_label: format!("loop_body_{}", self.label_counter - 1), + false_label: loop_end.clone(), + }); + + let body_label = format!("loop_body_{}", self.label_counter - 1); + self.emit_instruction(IrInstruction::Label { name: body_label }); + + // Generate body + for stmt in body { + self.generate_stmt(stmt)?; + } + + self.emit_instruction(IrInstruction::Jump { label: loop_start }); + + self.emit_instruction(IrInstruction::Label { name: loop_end }); + + self.loop_stack.pop(); + } + + Stmt::For { init, condition, update, body } => { + // Generate initialization if present + if let Some(init_stmt) = init { + self.generate_stmt(init_stmt)?; + } + + let loop_start = self.new_label("for_start"); + let loop_continue = self.new_label("for_continue"); + let loop_end = self.new_label("for_end"); + + self.loop_stack.push(LoopContext { + continue_label: loop_continue.clone(), + break_label: loop_end.clone(), + }); + + self.emit_instruction(IrInstruction::Label { name: loop_start.clone() }); + + if let Some(cond) = condition { + let condition_val = self.generate_expr(cond); + self.emit_instruction(IrInstruction::Branch { + condition: condition_val, + true_label: format!("for_body_{}", self.label_counter - 2), + false_label: loop_end.clone(), + }); + + let body_label = format!("for_body_{}", self.label_counter - 2); + self.emit_instruction(IrInstruction::Label { name: body_label }); + } + + // Generate body + for stmt in body { + self.generate_stmt(stmt)?; + } + + self.emit_instruction(IrInstruction::Label { name: loop_continue }); + + // Generate update expression if present + if let Some(update_expr) = update { + self.generate_expr(update_expr); + } + + self.emit_instruction(IrInstruction::Jump { label: loop_start }); + + self.emit_instruction(IrInstruction::Label { name: loop_end }); + + self.loop_stack.pop(); + } + + Stmt::Break => { + if let Some(loop_ctx) = self.loop_stack.last() { + self.emit_instruction(IrInstruction::Jump { + label: loop_ctx.break_label.clone() + }); + } else { + return Err(IrGeneratorError::UnsupportedConstruct("break statement outside of loop".to_string())); + } + } + + Stmt::Continue => { + if let Some(loop_ctx) = self.loop_stack.last() { + self.emit_instruction(IrInstruction::Jump { + label: loop_ctx.continue_label.clone() + }); + } else { + return Err(IrGeneratorError::UnsupportedConstruct("continue statement outside of loop".to_string())); + } + } } Ok(()) } @@ -368,21 +480,103 @@ impl IrGenerator { } Expr::Binary { left, operator, right } => { - let left_value = self.generate_expr(left); - let right_value = self.generate_expr(right); - let result_temp = self.new_temp(); - let op = IrBinaryOp::from(operator.clone()); - let expr_type = self.infer_expr_type(expr); - - self.emit_instruction(IrInstruction::BinaryOp { - dest: result_temp.clone(), - op, - left: left_value, - right: right_value, - var_type: expr_type, - }); - - result_temp + match operator { + TokenType::LogicalAnd => { + let result_temp = self.new_temp(); + let false_label = self.new_label("and_false"); + let end_label = self.new_label("and_end"); + + // Evaluate left operand + let left_value = self.generate_expr(left); + + self.emit_instruction(IrInstruction::Branch { + condition: left_value, + true_label: format!("and_eval_right_{}", self.label_counter - 2), + false_label: false_label.clone(), + }); + + // Evaluate right operand + let eval_right_label = format!("and_eval_right_{}", self.label_counter - 2); + self.emit_instruction(IrInstruction::Label { name: eval_right_label }); + let right_value = self.generate_expr(right); + + self.emit_instruction(IrInstruction::Move { + dest: result_temp.clone(), + src: right_value, + var_type: IrType::Int, + }); + self.emit_instruction(IrInstruction::Jump { label: end_label.clone() }); + + self.emit_instruction(IrInstruction::Label { name: false_label }); + self.emit_instruction(IrInstruction::Move { + dest: result_temp.clone(), + src: IrValue::IntConstant(0), + var_type: IrType::Int, + }); + + // End label + self.emit_instruction(IrInstruction::Label { name: end_label }); + + result_temp + } + + TokenType::LogicalOr => { + let result_temp = self.new_temp(); + let true_label = self.new_label("or_true"); + let end_label = self.new_label("or_end"); + + // Evaluate left operand + let left_value = self.generate_expr(left); + + self.emit_instruction(IrInstruction::Branch { + condition: left_value, + true_label: true_label.clone(), + false_label: format!("or_eval_right_{}", self.label_counter - 2), + }); + + // Evaluate right operand + let eval_right_label = format!("or_eval_right_{}", self.label_counter - 2); + self.emit_instruction(IrInstruction::Label { name: eval_right_label }); + let right_value = self.generate_expr(right); + + self.emit_instruction(IrInstruction::Move { + dest: result_temp.clone(), + src: right_value, + var_type: IrType::Int, + }); + self.emit_instruction(IrInstruction::Jump { label: end_label.clone() }); + + self.emit_instruction(IrInstruction::Label { name: true_label }); + self.emit_instruction(IrInstruction::Move { + dest: result_temp.clone(), + src: IrValue::IntConstant(1), + var_type: IrType::Int, + }); + + // End label + self.emit_instruction(IrInstruction::Label { name: end_label }); + + result_temp + } + + _ => { + let left_value = self.generate_expr(left); + let right_value = self.generate_expr(right); + let result_temp = self.new_temp(); + let op = IrBinaryOp::from(operator.clone()); + let expr_type = self.infer_expr_type(expr); + + self.emit_instruction(IrInstruction::BinaryOp { + dest: result_temp.clone(), + op, + left: left_value, + right: right_value, + var_type: expr_type, + }); + + result_temp + } + } } Expr::Unary { operator, operand } => { diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 7e0a260..2a4d4bd 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -46,6 +46,18 @@ pub enum Stmt { condition: Expr, then_branch: Vec, }, + While { + condition: Expr, + body: Vec, + }, + For { + init: Option>, + condition: Option, + update: Option, + body: Vec, + }, + Break, + Continue, Block(Vec), Function { return_type: Type, diff --git a/src/parser/parser.rs b/src/parser/parser.rs index cff760e..a189c60 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,5 +1,5 @@ use crate::lexer::{Token, TokenType}; -use crate::parser::ast::{Expr, Stmt}; +use crate::parser::ast::{Expr, Stmt, Parameter}; use crate::types::Type; use crate::error::error::CompilerError; @@ -46,6 +46,24 @@ impl Parser { let return_type = self.consume_type()?; let name = self.consume_identifier()?; self.consume(TokenType::LeftParen)?; + + // Parse function parameters + let mut parameters = Vec::new(); + if !self.check(&TokenType::RightParen) { + loop { + let param_type = self.consume_type()?; + let param_name = self.consume_identifier()?; + parameters.push(Parameter { + name: param_name, + param_type: Type::from(param_type), + is_mutable: false, + }); + if !self.match_token(&TokenType::Comma) { + break; + } + } + } + self.consume(TokenType::RightParen)?; self.consume(TokenType::LeftBrace)?; @@ -64,7 +82,7 @@ impl Parser { return_type: Type::from(return_type), name, type_parameters: Vec::new(), // TODO: Parse generic type parameters - parameters: Vec::new(), // TODO: Parse function parameters + parameters, body, }) } @@ -106,6 +124,63 @@ impl Parser { return Some(Stmt::If { condition, then_branch }); } + if self.match_token(&TokenType::While) { + self.consume(TokenType::LeftParen)?; + let condition = self.expression()?; + self.consume(TokenType::RightParen)?; + self.consume(TokenType::LeftBrace)?; + let mut body = Vec::new(); + while !self.check(&TokenType::RightBrace) && !self.is_at_end() { + body.push(self.statement()?); + } + self.consume(TokenType::RightBrace)?; + return Some(Stmt::While { condition, body }); + } + + if self.match_token(&TokenType::For) { + self.consume(TokenType::LeftParen)?; + let init = if self.check(&TokenType::Semicolon) { + None + } else { + Some(Box::new(self.statement()?)) + }; + if init.is_none() { + self.consume(TokenType::Semicolon)?; + } + + let condition = if self.check(&TokenType::Semicolon) { + None + } else { + Some(self.expression()?) + }; + self.consume(TokenType::Semicolon)?; + + let update = if self.check(&TokenType::RightParen) { + None + } else { + Some(self.expression()?) + }; + self.consume(TokenType::RightParen)?; + + self.consume(TokenType::LeftBrace)?; + let mut body = Vec::new(); + while !self.check(&TokenType::RightBrace) && !self.is_at_end() { + body.push(self.statement()?); + } + self.consume(TokenType::RightBrace)?; + return Some(Stmt::For { init, condition, update, body }); + } + + if self.match_token(&TokenType::Break) { + self.consume(TokenType::Semicolon)?; + return Some(Stmt::Break); + } + + if self.match_token(&TokenType::Continue) { + self.consume(TokenType::Semicolon)?; + return Some(Stmt::Continue); + } + if self.match_token(&TokenType::Println) { self.consume(TokenType::LeftParen)?; @@ -199,7 +274,7 @@ impl Parser { } fn assignment(&mut self) -> Option { - let expr = self.equality()?; + let expr = self.logical_or()?; // Check if this is an assignment (identifier = expression) if let Expr::Identifier(name) = expr { @@ -217,6 +292,32 @@ impl Parser { Some(expr) } + fn logical_or(&mut self) -> Option { + let mut expr = self.logical_and()?; + while let Some(op) = self.match_any(&[TokenType::LogicalOr]) { + let right = self.logical_and()?; + expr = Expr::Binary { + left: Box::new(expr), + operator: op, + right: Box::new(right), + }; + } + Some(expr) + } + + fn logical_and(&mut self) -> Option { + let mut expr = self.equality()?; + while let Some(op) = self.match_any(&[TokenType::LogicalAnd]) { + let right = self.equality()?; + expr = Expr::Binary { + left: Box::new(expr), + operator: op, + right: Box::new(right), + }; + } + Some(expr) + } + fn equality(&mut self) -> Option { let mut expr = self.comparison()?; while let Some(op) = self.match_any(&[TokenType::Equal, TokenType::NotEqual]) { @@ -835,4 +936,316 @@ mod tests { let result = parser.parse(); assert!(result.is_empty(), "Should return empty vector for empty token stream"); } + + #[test] + fn test_parse_function_with_parameters() { + // Test parsing: "int add(int a, float b) { return a + b; }" + let tokens = vec![ + create_token(TokenType::Int, "int"), + create_token(TokenType::Identifier("add".to_string()), "add"), + create_token(TokenType::LeftParen, "("), + create_token(TokenType::Int, "int"), + create_token(TokenType::Identifier("a".to_string()), "a"), + create_token(TokenType::Comma, ","), + create_token(TokenType::FloatType, "float"), + create_token(TokenType::Identifier("b".to_string()), "b"), + create_token(TokenType::RightParen, ")"), + create_token(TokenType::LeftBrace, "{"), + create_token(TokenType::Return, "return"), + create_token(TokenType::Identifier("a".to_string()), "a"), + create_token(TokenType::Plus, "+"), + create_token(TokenType::Identifier("b".to_string()), "b"), + create_token(TokenType::Semicolon, ";"), + create_token(TokenType::RightBrace, "}"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + let result = parser.parse(); + + assert_eq!(result.len(), 1); + match &result[0] { + Stmt::Function { return_type, name, parameters, body, .. } => { + assert_eq!(*return_type, Type::from(TokenType::Int)); + assert_eq!(*name, "add"); + assert_eq!(parameters.len(), 2); + + assert_eq!(parameters[0].name, "a"); + assert_eq!(parameters[0].param_type, Type::from(TokenType::Int)); + + assert_eq!(parameters[1].name, "b"); + assert_eq!(parameters[1].param_type, Type::from(TokenType::FloatType)); + + assert_eq!(body.len(), 1); + } + _ => panic!("Expected function statement"), + } + } + + #[test] + fn test_parse_logical_and_operator() { + // Test parsing: "x && y" + let tokens = vec![ + create_token(TokenType::Identifier("x".to_string()), "x"), + create_token(TokenType::LogicalAnd, "&&"), + create_token(TokenType::Identifier("y".to_string()), "y"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + if let Some(expr) = parser.expression() { + match expr { + Expr::Binary { left, operator, right } => { + assert_eq!(*left, Expr::Identifier("x".to_string())); + assert_eq!(operator, TokenType::LogicalAnd); + assert_eq!(*right, Expr::Identifier("y".to_string())); + } + _ => panic!("Expected binary expression with logical AND"), + } + } else { + panic!("Failed to parse logical AND expression"); + } + } + + #[test] + fn test_parse_logical_or_operator() { + // Test parsing: "x || y" + let tokens = vec![ + create_token(TokenType::Identifier("x".to_string()), "x"), + create_token(TokenType::LogicalOr, "||"), + create_token(TokenType::Identifier("y".to_string()), "y"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + if let Some(expr) = parser.expression() { + match expr { + Expr::Binary { left, operator, right } => { + assert_eq!(*left, Expr::Identifier("x".to_string())); + assert_eq!(operator, TokenType::LogicalOr); + assert_eq!(*right, Expr::Identifier("y".to_string())); + } + _ => panic!("Expected binary expression with logical OR"), + } + } else { + panic!("Failed to parse logical OR expression"); + } + } + + #[test] + fn test_parse_logical_operator_precedence() { + // Test parsing: "a || b && c" should be "a || (b && c)" + let tokens = vec![ + create_token(TokenType::Identifier("a".to_string()), "a"), + create_token(TokenType::LogicalOr, "||"), + create_token(TokenType::Identifier("b".to_string()), "b"), + create_token(TokenType::LogicalAnd, "&&"), + create_token(TokenType::Identifier("c".to_string()), "c"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + if let Some(expr) = parser.expression() { + match expr { + Expr::Binary { left, operator: TokenType::LogicalOr, right } => { + assert_eq!(*left, Expr::Identifier("a".to_string())); + match *right { + Expr::Binary { ref left, operator: TokenType::LogicalAnd, ref right } => { + assert_eq!(**left, Expr::Identifier("b".to_string())); + assert_eq!(**right, Expr::Identifier("c".to_string())); + } + _ => panic!("Expected logical AND in right side"), + } + } + _ => panic!("Expected logical OR expression"), + } + } else { + panic!("Failed to parse logical operator precedence"); + } + } + + #[test] + fn test_parse_while_loop() { + // Test parsing: "while (x < 10) { x = x + 1; }" + let tokens = vec![ + create_token(TokenType::While, "while"), + create_token(TokenType::LeftParen, "("), + create_token(TokenType::Identifier("x".to_string()), "x"), + create_token(TokenType::LessThan, "<"), + create_token(TokenType::Integer(10), "10"), + create_token(TokenType::RightParen, ")"), + create_token(TokenType::LeftBrace, "{"), + create_token(TokenType::Identifier("x".to_string()), "x"), + create_token(TokenType::Assign, "="), + create_token(TokenType::Identifier("x".to_string()), "x"), + create_token(TokenType::Plus, "+"), + create_token(TokenType::Integer(1), "1"), + create_token(TokenType::Semicolon, ";"), + create_token(TokenType::RightBrace, "}"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + if let Some(stmt) = parser.statement() { + match stmt { + Stmt::While { condition, body } => { + match condition { + Expr::Binary { left, operator, right } => { + assert_eq!(*left, Expr::Identifier("x".to_string())); + assert_eq!(operator, TokenType::LessThan); + assert_eq!(*right, Expr::Integer(10)); + } + _ => panic!("Expected binary expression in while condition"), + } + assert_eq!(body.len(), 1); + match &body[0] { + Stmt::ExprStmt(Expr::Assignment { name, value }) => { + assert_eq!(*name, "x"); + match value.as_ref() { + Expr::Binary { ref left, operator: TokenType::Plus, ref right } => { + assert_eq!(**left, Expr::Identifier("x".to_string())); + assert_eq!(**right, Expr::Integer(1)); + } + _ => panic!("Expected addition in assignment"), + } + } + _ => panic!("Expected assignment in while body"), + } + } + _ => panic!("Expected while statement"), + } + } else { + panic!("Failed to parse while statement"); + } + } + + #[test] + fn test_parse_for_loop() { + // Test parsing: "for (int i = 0; i < 10; i = i + 1) { println("Hello"); }" + let tokens = vec![ + create_token(TokenType::For, "for"), + create_token(TokenType::LeftParen, "("), + create_token(TokenType::Int, "int"), + create_token(TokenType::Identifier("i".to_string()), "i"), + create_token(TokenType::Assign, "="), + create_token(TokenType::Integer(0), "0"), + create_token(TokenType::Semicolon, ";"), + create_token(TokenType::Identifier("i".to_string()), "i"), + create_token(TokenType::LessThan, "<"), + create_token(TokenType::Integer(10), "10"), + create_token(TokenType::Semicolon, ";"), + create_token(TokenType::Identifier("i".to_string()), "i"), + create_token(TokenType::Assign, "="), + create_token(TokenType::Identifier("i".to_string()), "i"), + create_token(TokenType::Plus, "+"), + create_token(TokenType::Integer(1), "1"), + create_token(TokenType::RightParen, ")"), + create_token(TokenType::LeftBrace, "{"), + create_token(TokenType::Println, "println"), + create_token(TokenType::LeftParen, "("), + create_token(TokenType::String("Hello".to_string()), "\"Hello\""), + create_token(TokenType::RightParen, ")"), + create_token(TokenType::Semicolon, ";"), + create_token(TokenType::RightBrace, "}"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + if let Some(stmt) = parser.statement() { + match stmt { + Stmt::For { init, condition, update, body } => { + // Check init: int i = 0 + assert!(init.is_some()); + match init.unwrap().as_ref() { + Stmt::VarDecl { var_type, name, initializer } => { + assert_eq!(*var_type, Type::from(TokenType::Int)); + assert_eq!(name, "i"); + assert_eq!(*initializer, Some(Expr::Integer(0))); + } + _ => panic!("Expected variable declaration in for init"), + } + + // Check condition: i < 10 + assert!(condition.is_some()); + match condition.unwrap() { + Expr::Binary { left, operator, right } => { + assert_eq!(*left, Expr::Identifier("i".to_string())); + assert_eq!(operator, TokenType::LessThan); + assert_eq!(*right, Expr::Integer(10)); + } + _ => panic!("Expected binary expression in for condition"), + } + + assert!(update.is_some()); + match update.unwrap() { + Expr::Assignment { name, value } => { + assert_eq!(name, "i"); + match value.as_ref() { + Expr::Binary { ref left, operator: TokenType::Plus, ref right } => { + assert_eq!(**left, Expr::Identifier("i".to_string())); + assert_eq!(**right, Expr::Integer(1)); + } + _ => panic!("Expected addition in for update"), + } + } + _ => panic!("Expected assignment in for update"), + } + + assert_eq!(body.len(), 1); + match &body[0] { + Stmt::PrintStmt { format_string, args } => { + assert_eq!(*format_string, Expr::String("Hello".to_string())); + assert!(args.is_empty()); + } + _ => panic!("Expected print statement in for body"), + } + } + _ => panic!("Expected for statement"), + } + } else { + panic!("Failed to parse for statement"); + } + } + + #[test] + fn test_parse_break_statement() { + // Test parsing: "break;" + let tokens = vec![ + create_token(TokenType::Break, "break"), + create_token(TokenType::Semicolon, ";"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + if let Some(stmt) = parser.statement() { + match stmt { + Stmt::Break => { + } + _ => panic!("Expected break statement"), + } + } else { + panic!("Failed to parse break statement"); + } + } + + #[test] + fn test_parse_continue_statement() { + // Test parsing: "continue;" + let tokens = vec![ + create_token(TokenType::Continue, "continue"), + create_token(TokenType::Semicolon, ";"), + create_token(TokenType::Eof, ""), + ]; + + let mut parser = Parser::new(tokens); + if let Some(stmt) = parser.statement() { + match stmt { + Stmt::Continue => { + } + _ => panic!("Expected continue statement"), + } + } else { + panic!("Failed to parse continue statement"); + } + } } diff --git a/src/semantic/lifetime_simple.rs b/src/semantic/lifetime_simple.rs index 8f70609..d0e570b 100644 --- a/src/semantic/lifetime_simple.rs +++ b/src/semantic/lifetime_simple.rs @@ -170,6 +170,28 @@ impl LifetimeAnalyzer { self.analyze_expression(arg)?; } } + Stmt::While { condition, body } => { + self.analyze_expression(condition)?; + for stmt in body { + self.analyze_statement(stmt)?; + } + } + Stmt::For { init, condition, update, body } => { + if let Some(init_stmt) = init { + self.analyze_statement(init_stmt)?; + } + if let Some(cond_expr) = condition { + self.analyze_expression(cond_expr)?; + } + if let Some(update_expr) = update { + self.analyze_expression(update_expr)?; + } + for stmt in body { + self.analyze_statement(stmt)?; + } + } + Stmt::Break | Stmt::Continue => { + } } self.current_line += 1; Ok(()) diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 6e8b45e..cc021ed 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -473,4 +473,226 @@ int main() { validate_asm_structure(&direct_asm, &["mov"]); validate_asm_structure(&ir_asm, &["mov"]); } + + #[test] + fn test_function_with_parameters() { + let source = r#" +int add(int a, int b) { + return a + b; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "define i32 @add(i32 %a, i32 %b)", + "load i32, %a", + "load i32, %b", + "add i32" + ]); + + validate_asm_structure(&ir_asm, &["add:", "mov", "add"]); + } + + #[test] + fn test_logical_and_operator() { + let source = r#" +int main() { + int a = 1; + int b = 0; + int result = a && b; + return result; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "and_false_", + "and_end_", + "and_eval_right_", + "br %t" + ]); + + validate_asm_structure(&ir_asm, &["je", "jmp"]); + } + + #[test] + fn test_logical_or_operator() { + let source = r#" +int main() { + int a = 0; + int b = 1; + int result = a || b; + return result; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "or_true_", + "or_end_", + "or_eval_right_", + "br %t" + ]); + + validate_asm_structure(&ir_asm, &["je", "jmp"]); + } + + #[test] + fn test_complex_logical_expression() { + let source = r#" +int main() { + int a = 1; + int b = 0; + int c = 1; + int result = (a && b) || c; + return result; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "and_false_", + "and_end_", + "or_true_", + "or_end_" + ]); + + validate_asm_structure(&ir_asm, &["je", "jmp"]); + } + + #[test] + fn test_while_loop() { + let source = r#" +int main() { + int i = 0; + while (i < 5) { + i = i + 1; + } + return i; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "loop_start_", + "loop_end_", + "loop_body_", + "br %t", + "jmp label %loop_start_" + ]); + + validate_asm_structure(&ir_asm, &["loop_start_", "loop_end_", "je", "jmp"]); + } + + #[test] + fn test_for_loop() { + let source = r#" +int main() { + int sum = 0; + for (int i = 0; i < 3; i = i + 1) { + sum = sum + i; + } + return sum; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "for_start_", + "for_end_", + "for_continue_", + "for_body_", + "br %t", + "jmp label %for_start_" + ]); + + validate_asm_structure(&ir_asm, &["for_start_", "for_end_", "for_continue_", "je", "jmp"]); + } + + #[test] + fn test_nested_loops() { + let source = r#" +int main() { + int sum = 0; + for (int i = 0; i < 2; i = i + 1) { + int j = 0; + while (j < 2) { + sum = sum + 1; + j = j + 1; + } + } + return sum; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "for_start_", + "for_end_", + "loop_start_", + "loop_end_", + "br %t", + "jmp label" + ]); + + validate_asm_structure(&ir_asm, &["for_start_", "loop_start_", "je", "jmp"]); + } + + #[test] + fn test_break_and_continue() { + let source = r#" +int main() { + int i = 0; + while (i < 10) { + i = i + 1; + if (i == 3) { + continue; + } + if (i == 7) { + break; + } + } + return i; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "loop_start_", + "loop_end_", + "jmp label %loop_start_", + "jmp label %loop_end_" + ]); + + validate_asm_structure(&ir_asm, &["loop_start_", "loop_end_", "jmp"]); + } + + #[test] + fn test_function_parameters_with_logical_operators() { + let source = r#" +int test(int x, int y) { + return x && y; +} +"#; + + let (direct_asm, ir_asm, ir_output, _) = compile_both_ways(source); + + validate_ir_structure(&ir_output, &[ + "define i32 @test(i32 %x, i32 %y)", + "load i32, %x", + "load i32, %y", + "and_false_", + "and_end_" + ]); + + validate_asm_structure(&ir_asm, &["test:", "je", "jmp"]); + } } From 2c220f095c343435d997b1834075eee75ff67d33 Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sat, 26 Jul 2025 13:09:24 +0200 Subject: [PATCH 02/10] remove dead code --- src/codegen/ir_codegen.rs | 16 -------- src/ir/generator.rs | 46 --------------------- src/main.rs | 86 +++++++++++++++------------------------ test_debug.c | 44 +++++++++++++++----- 4 files changed, 66 insertions(+), 126 deletions(-) diff --git a/src/codegen/ir_codegen.rs b/src/codegen/ir_codegen.rs index cea4d01..dad0e54 100644 --- a/src/codegen/ir_codegen.rs +++ b/src/codegen/ir_codegen.rs @@ -798,22 +798,6 @@ impl IrCodegen { IrValue::Global(name) => format!("@{}", name), } } - - fn preload_float_constant(&mut self, float_value: f64) -> Operand { - let float_bits = float_value.to_bits() as i64; - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(float_bits) - ], Some("load float bits")); - - let temp_offset = -8; // Use a temporary stack slot - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Memory { base: Register::Rsp, offset: temp_offset }, - Operand::Register(Register::Rax) - ], Some("store float to temp memory")); - - Operand::Memory { base: Register::Rsp, offset: temp_offset } - } } // Implement the emitter traits for IrCodegen diff --git a/src/ir/generator.rs b/src/ir/generator.rs index 4a69573..d54485a 100644 --- a/src/ir/generator.rs +++ b/src/ir/generator.rs @@ -746,52 +746,6 @@ impl IrGenerator { } }) } - - /// Infer type from expression context with improved heuristics - fn infer_expr_type_improved(&self, expr: &Expr) -> IrType { - match expr { - Expr::Integer(_) => IrType::Int, - Expr::Float(_) => IrType::Float, - Expr::Char(_) => IrType::Char, - Expr::String(_) => IrType::String, - Expr::Identifier(name) => self.infer_identifier_type(name), - Expr::Binary { left, operator, right } => { - let left_type = self.infer_expr_type_improved(left); - let right_type = self.infer_expr_type_improved(right); - - match (left_type, right_type) { - (IrType::Float, _) | (_, IrType::Float) => IrType::Float, - (IrType::String, _) | (_, IrType::String) => { - match operator { - TokenType::Plus => IrType::String, // String concatenation - _ => IrType::Int, // Comparison results - } - } - _ => IrType::Int, - } - } - Expr::Unary { operand, .. } => self.infer_expr_type_improved(operand), - Expr::Call { callee, .. } => { - if let Expr::Identifier(name) = callee.as_ref() { - if name == "printf" || name == "println" { - IrType::Int - } else { - IrType::Int // Default for unknown functions - } - } else { - IrType::Int - } - } - Expr::Assignment { value, .. } => self.infer_expr_type_improved(value), - Expr::TypeCast { target_type, .. } => { - if let Some(token_type) = target_type.to_token_type() { - IrType::from(token_type) - } else { - IrType::Int - } - } - } - } } impl Default for IrGenerator { diff --git a/src/main.rs b/src/main.rs index 3b8ff8d..45806ee 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use std::{env, fs}; -use compiler_minic::codegen::{Codegen, IrCodegen}; +use compiler_minic::codegen::{IrCodegen}; use compiler_minic::lexer::Lexer; use compiler_minic::parser::Parser; use compiler_minic::ir::{IrGenerator, IrOptimizer}; @@ -8,9 +8,6 @@ use compiler_minic::semantic::{MemorySafetyChecker, MemorySafetySeverity}; fn main() { let args: Vec = env::args().collect(); - // Check for IR flag - let use_ir = args.contains(&"--ir".to_string()); - // Find the filename (first non-flag argument) let filename = args.iter().skip(1).find(|arg| !arg.starts_with("--")); @@ -113,56 +110,39 @@ fn main() { } } - // Use the IR flag we determined earlier - - if use_ir { - println!("Using IR-based compilation pipeline..."); - - // Generate IR from AST - let mut ir_generator = IrGenerator::new(); - let ir_program = match ir_generator.generate(&ast) { - Ok(program) => program, - Err(e) => { - eprintln!("IR generation failed: {:?}", e); - return; - } - }; - - // Save IR to file for inspection - match fs::write("output.ir", format!("{}", ir_program)) { - Ok(_) => println!("IR code saved to output.ir"), - Err(e) => eprintln!("Error writing IR file: {}", e), - } - - // Optimize IR - let mut optimizer = IrOptimizer::new(); - let optimized_ir = optimizer.optimize(ir_program); - - // Save optimized IR to file - match fs::write("output_optimized.ir", format!("{}", optimized_ir)) { - Ok(_) => println!("Optimized IR code saved to output_optimized.ir"), - Err(e) => eprintln!("Error writing optimized IR file: {}", e), - } - - // Generate assembly from IR - let ir_codegen = IrCodegen::new(); - let asm_code = ir_codegen.generate(&optimized_ir); - - match fs::write("output_ir.asm", asm_code) { - Ok(_) => println!("Assembly code (from IR) saved to output_ir.asm"), - Err(e) => eprintln!("Error writing assembly file: {}", e), - } - } else { - println!("Using direct AST-to-assembly compilation..."); - - // Original direct AST to assembly compilation - let codegen = Codegen::new(); - let asm_code = codegen.generate(&ast); - - match fs::write("output.asm", asm_code) { - Ok(_) => println!("Assembly code saved to output.asm"), - Err(e) => eprintln!("Error writing assembly file: {}", e), + // Generate IR from AST + let mut ir_generator = IrGenerator::new(); + let ir_program = match ir_generator.generate(&ast) { + Ok(program) => program, + Err(e) => { + eprintln!("IR generation failed: {:?}", e); + return; } + }; + + // Save IR to file for inspection + match fs::write("output.ir", format!("{}", ir_program)) { + Ok(_) => println!("IR code saved to output.ir"), + Err(e) => eprintln!("Error writing IR file: {}", e), + } + + // Optimize IR + let mut optimizer = IrOptimizer::new(); + let optimized_ir = optimizer.optimize(ir_program); + + // Save optimized IR to file + match fs::write("output_optimized.ir", format!("{}", optimized_ir)) { + Ok(_) => println!("Optimized IR code saved to output_optimized.ir"), + Err(e) => eprintln!("Error writing optimized IR file: {}", e), + } + + // Generate assembly from IR + let ir_codegen = IrCodegen::new(); + let asm_code = ir_codegen.generate(&optimized_ir); + + match fs::write("output.asm", asm_code) { + Ok(_) => println!("Assembly code (from IR) saved to output.asm"), + Err(e) => eprintln!("Error writing assembly file: {}", e), } } Err(e) => { diff --git a/test_debug.c b/test_debug.c index 0100764..bee4a2a 100644 --- a/test_debug.c +++ b/test_debug.c @@ -1,13 +1,35 @@ -int main() { - int a = 10; - float b = 3.14; - char c = 'A'; - - int result = a + 5; - float calc = b * 2.0; - - println("Values: a=%d, b=%.2f, c=%c", a, b, c); - println("Results: result=%d, calc=%.2f", result, calc); - +int factorial(int n) { + println("toto"); + println("factorial n param: %d", n); + int result = 1; + for (int i = 1; i <= n; i = i + 1) { + result = result * i; + } return result; +} + +int main() { + int x = 5; + int y = 0; + + int logical_and = x && y; + int logical_or = x || y; + int complex_logic = (x > 3) && (y == 0); + + int count = 0; + int i = 0; + while (i < 10) { + i = i + 1; + if (i == 3) { + continue; + } + if (i == 8) { + break; + } + count = count + 1; + } + int fact_result = factorial(4); + + println("count = %d, fact_result = %d", count, fact_result); + return 0; } \ No newline at end of file From bda2ac3282e8dc099d413b7617dfe9a3cf377490 Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sat, 26 Jul 2025 14:23:29 +0200 Subject: [PATCH 03/10] clean up code --- Cargo.toml | 2 +- src/codegen/analyzer.rs | 110 --- src/codegen/calling_convention.rs | 156 ---- src/codegen/codegen.rs | 227 ----- src/codegen/direct_backend.rs | 111 --- src/codegen/expression.rs | 281 ------ src/codegen/ir_codegen.rs | 868 ------------------ src/codegen/ir_codegen/call_generator.rs | 121 +++ src/codegen/ir_codegen/emitter_impl.rs | 69 ++ src/codegen/ir_codegen/function_generator.rs | 60 ++ .../ir_codegen/instruction_generator.rs | 252 +++++ src/codegen/ir_codegen/mod.rs | 79 ++ src/codegen/ir_codegen/operation_generator.rs | 207 +++++ src/codegen/ir_codegen/stack_manager.rs | 67 ++ src/codegen/ir_codegen/value_converter.rs | 57 ++ src/codegen/mod.rs | 15 - src/codegen/statement.rs | 487 ---------- src/codegen/target/mod.rs | 127 --- src/codegen/target/x86_64_windows.rs | 349 ------- tests/integration_tests.rs | 4 +- 20 files changed, 915 insertions(+), 2734 deletions(-) delete mode 100644 src/codegen/analyzer.rs delete mode 100644 src/codegen/calling_convention.rs delete mode 100644 src/codegen/codegen.rs delete mode 100644 src/codegen/direct_backend.rs delete mode 100644 src/codegen/expression.rs delete mode 100644 src/codegen/ir_codegen.rs create mode 100644 src/codegen/ir_codegen/call_generator.rs create mode 100644 src/codegen/ir_codegen/emitter_impl.rs create mode 100644 src/codegen/ir_codegen/function_generator.rs create mode 100644 src/codegen/ir_codegen/instruction_generator.rs create mode 100644 src/codegen/ir_codegen/mod.rs create mode 100644 src/codegen/ir_codegen/operation_generator.rs create mode 100644 src/codegen/ir_codegen/stack_manager.rs create mode 100644 src/codegen/ir_codegen/value_converter.rs delete mode 100644 src/codegen/statement.rs delete mode 100644 src/codegen/target/mod.rs delete mode 100644 src/codegen/target/x86_64_windows.rs diff --git a/Cargo.toml b/Cargo.toml index 2c57867..dd6dec8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "compiler-minic" version = "0.1.0" -edition = "2021" +edition = "2024" [dependencies] diff --git a/src/codegen/analyzer.rs b/src/codegen/analyzer.rs deleted file mode 100644 index 1c7c7e3..0000000 --- a/src/codegen/analyzer.rs +++ /dev/null @@ -1,110 +0,0 @@ -use std::collections::HashMap; -use crate::lexer::TokenType; -use crate::parser::ast::{Expr, Stmt}; - -pub trait AstAnalyzer { - fn collect_variable_types(&mut self, ast: &[Stmt]); - fn collect_format_strings(&mut self, ast: &[Stmt]); - fn get_local_types(&self) -> &HashMap; - fn get_data_strings(&self) -> &HashMap; - fn get_data_strings_mut(&mut self) -> &mut HashMap; - fn new_string_label(&mut self) -> String; -} - -impl AstAnalyzer for super::Codegen { - fn collect_variable_types(&mut self, ast: &[Stmt]) { - for stmt in ast { - match stmt { - Stmt::Function { body, .. } => { - self.collect_variable_types(body); - } - Stmt::VarDecl { var_type, name, .. } => { - // Store variable type for later use - if let Some(token_type) = var_type.to_token_type() { - self.local_types.insert(name.clone(), token_type); - } else { - self.local_types.insert(name.clone(), crate::lexer::TokenType::Int); // Default fallback - } - } - Stmt::If { then_branch, .. } => { - self.collect_variable_types(then_branch); - } - Stmt::Block(stmts) => { - self.collect_variable_types(stmts); - } - _ => {} - } - } - } - - fn collect_format_strings(&mut self, ast: &[Stmt]) { - for stmt in ast { - match stmt { - Stmt::Function { body, .. } => { - self.collect_format_strings(body); - } - Stmt::PrintStmt { format_string, args } => { - if let Expr::String(s) = format_string { - if s.is_empty() { - // Simple println(expr) case - need to create format string - if args.len() == 1 { - let arg = &args[0]; - let format_str = match arg { - Expr::Integer(_) => "%d\n", - Expr::Float(_) => "%.6f\n", - Expr::Char(_) => "%c\n", - Expr::Identifier(var_name) => { - // Use stored type information - match self.local_types.get(var_name) { - Some(TokenType::Int) => "%d\n", - Some(TokenType::FloatType) => "%.6f\n", - Some(TokenType::CharType) => "%c\n", - _ => "%d\n", // Default to integer - } - }, - _ => "%d\n", // Default to integer - }; - - if !self.data_strings.contains_key(format_str) { - let label = self.new_string_label(); - self.data_strings.insert(format_str.to_string(), label); - } - } - } else { - // Regular format string case - if !self.data_strings.contains_key(s) { - let label = self.new_string_label(); - self.data_strings.insert(s.clone(), label); - } - } - } - } - Stmt::If { then_branch, .. } => { - self.collect_format_strings(then_branch); - } - Stmt::Block(stmts) => { - self.collect_format_strings(stmts); - } - _ => {} - } - } - } - - fn get_local_types(&self) -> &HashMap { - &self.local_types - } - - fn get_data_strings(&self) -> &HashMap { - &self.data_strings - } - - fn get_data_strings_mut(&mut self) -> &mut HashMap { - &mut self.data_strings - } - - fn new_string_label(&mut self) -> String { - let label = format!("str_{}", self.string_label_count); - self.string_label_count += 1; - label - } -} diff --git a/src/codegen/calling_convention.rs b/src/codegen/calling_convention.rs deleted file mode 100644 index 22b0579..0000000 --- a/src/codegen/calling_convention.rs +++ /dev/null @@ -1,156 +0,0 @@ -use crate::codegen::instruction::Register; - -#[derive(Debug, Clone)] -pub struct CallingConvention { - pub name: String, - pub stack_alignment: usize, - pub shadow_space_size: usize, - pub integer_registers: Vec, - pub float_registers: Vec, - pub return_register: Register, -} - -impl CallingConvention { - pub fn windows_x64() -> Self { - Self { - name: "Windows x64".to_string(), - stack_alignment: 16, - shadow_space_size: 32, - integer_registers: vec![ - Register::Rcx, - Register::Rdx, - Register::R8, - Register::R9, - ], - float_registers: vec![ - Register::Xmm0, - Register::Xmm1, - Register::Xmm2, - Register::Xmm3, - ], - return_register: Register::Rax, - } - } - - pub fn system_v_x64() -> Self { - Self { - name: "System V x64".to_string(), - stack_alignment: 16, - shadow_space_size: 0, - integer_registers: vec![ - Register::Rdx, // Using available registers only - Register::Rcx, - Register::R8, - Register::R9, - ], - float_registers: vec![ - Register::Xmm0, - Register::Xmm1, - Register::Xmm2, - Register::Xmm3, - ], - return_register: Register::Rax, - } - } - - pub fn get_integer_register(&self, index: usize) -> Option { - self.integer_registers.get(index).copied() - } - - pub fn get_float_register(&self, index: usize) -> Option { - self.float_registers.get(index).copied() - } - - pub fn max_register_args(&self) -> usize { - self.integer_registers.len().min(self.float_registers.len()) - } -} - -#[derive(Debug, Clone)] -pub struct FunctionCallGenerator { - calling_convention: CallingConvention, -} - -impl FunctionCallGenerator { - pub fn new(calling_convention: CallingConvention) -> Self { - Self { calling_convention } - } - - pub fn windows_x64() -> Self { - Self::new(CallingConvention::windows_x64()) - } - - pub fn calling_convention(&self) -> &CallingConvention { - &self.calling_convention - } - - pub fn generate_stack_alignment(&self) -> Vec { - let mut instructions = Vec::new(); - let alignment = self.calling_convention.stack_alignment; - - instructions.push(format!(" ; Align stack to {}-byte boundary", alignment)); - instructions.push(format!(" and rsp, ~{} ; Force alignment", alignment - 1)); - - if self.calling_convention.shadow_space_size > 0 { - instructions.push(format!(" sub rsp, {} ; Allocate shadow space", - self.calling_convention.shadow_space_size)); - } - - instructions - } - - pub fn generate_stack_cleanup(&self) -> Vec { - let mut instructions = Vec::new(); - - if self.calling_convention.shadow_space_size > 0 { - instructions.push(format!(" add rsp, {} ; Deallocate shadow space", - self.calling_convention.shadow_space_size)); - } - - instructions - } - - pub fn generate_argument_passing(&self, args: &[String], arg_types: &[String]) -> Vec { - let mut instructions = Vec::new(); - - for (i, (arg, arg_type)) in args.iter().zip(arg_types.iter()).enumerate() { - if i >= self.calling_convention.max_register_args() { - instructions.push(format!(" ; Stack argument {}: {} (not implemented)", i, arg)); - continue; - } - - match arg_type.as_str() { - "int" | "integer" => { - if let Some(reg) = self.calling_convention.get_integer_register(i) { - instructions.push(format!(" mov {}, {} ; Integer argument {}", - reg.to_string().to_lowercase(), arg, i)); - } - } - "float" | "double" => { - if let Some(reg) = self.calling_convention.get_float_register(i) { - instructions.push(format!(" movsd {}, {} ; Float argument {}", - reg.to_string().to_lowercase(), arg, i)); - - if self.calling_convention.name.contains("Windows") { - if let Some(int_reg) = self.calling_convention.get_integer_register(i) { - instructions.push(format!(" movq {}, {} ; Copy to integer register", - int_reg.to_string().to_lowercase(), reg.to_string().to_lowercase())); - } - } - } - } - "char" => { - if let Some(reg) = self.calling_convention.get_integer_register(i) { - instructions.push(format!(" movzx {}, {} ; Character argument {}", - reg.to_string().to_lowercase(), arg, i)); - } - } - _ => { - instructions.push(format!(" ; Unknown argument type: {} for arg {}", arg_type, i)); - } - } - } - - instructions - } -} diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs deleted file mode 100644 index 9518950..0000000 --- a/src/codegen/codegen.rs +++ /dev/null @@ -1,227 +0,0 @@ -use std::collections::HashMap; -use crate::lexer::TokenType; -use crate::parser::ast::Stmt; -use super::instruction::{Instruction, Operand, Register}; -use super::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; -use super::statement::StatementGenerator; -use super::analyzer::AstAnalyzer; - -pub struct Codegen { - pub label_count: usize, - pub stack_offset: i32, - pub locals: HashMap, - pub local_types: HashMap, // Track variable types - pub output: String, - pub data_strings: HashMap, // To store format strings and their labels - pub string_label_count: usize, // For unique string labels -} - -impl Codegen { - pub fn new() -> Self { - Self { - label_count: 0, - stack_offset: 0, - locals: HashMap::new(), - local_types: HashMap::new(), - output: String::new(), - data_strings: HashMap::new(), - string_label_count: 0, - } - } - - pub fn generate(mut self, ast: &[Stmt]) -> String { - // Assembly file header with metadata - self.emit_section_header("MINI-C COMPILER GENERATED ASSEMBLY"); - self.emit_comment("Target: x86-64 Windows"); - self.emit_comment("Calling Convention: Microsoft x64"); - self.emit_comment("Generated by: Mini-C Compiler"); - self.emit_line(""); - - // Assembly directives - self.emit_comment("Assembly configuration"); - self.emit_line("bits 64"); - self.emit_line("default rel"); - self.emit_line("global main"); - self.emit_line("extern printf"); - - // Analysis passes - self.emit_subsection_header("Analysis Phase"); - self.emit_comment("First pass: collect variable types"); - self.collect_variable_types(ast); - self.emit_comment("Second pass: collect format strings with type information"); - self.collect_format_strings(ast); - - // Data section - self.emit_section_header("DATA SECTION - String Literals and Constants"); - self.emit_line("section .data"); - - let data_strings_clone = self.data_strings.clone(); - if data_strings_clone.is_empty() { - self.emit_comment("No string literals found"); - } else { - for (s, label) in &data_strings_clone { - let formatted_s = s.replace('\n', "").replace("%f", "%.2f"); - self.emit_comment(&format!("Format string: \"{}\"", s.replace('\n', "\\n"))); - self.emit_line(&format!(" {}: db \"{}\", 10, 0", label, formatted_s)); - } - } - - // Text section - self.emit_section_header("TEXT SECTION - Executable Code"); - self.emit_line("section .text"); - - for stmt in ast { - if let Stmt::Function { name, body, .. } = stmt { - self.generate_function(name, body); - } - } - - self.output - } - - fn generate_function(&mut self, name: &str, body: &[Stmt]) { - self.emit_subsection_header(&format!("FUNCTION: {}", name)); - self.emit_line(&format!("{}:", name)); - - // Function prologue - self.emit_subsection_header("Function Prologue"); - self.emit_instruction_with_comment(Instruction::Push, vec![ - Operand::Register(Register::Rbp) - ], Some("save caller's frame")); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rbp), - Operand::Register(Register::Rsp) - ], Some("set up frame")); - self.emit_instruction_with_comment(Instruction::Sub, vec![ - Operand::Register(Register::Rsp), - Operand::Immediate(48) - ], Some("allocate 48 bytes (16 vars + 32 shadow)")); - - // Initialize function state - self.emit_subsection_header("Function Body"); - self.stack_offset = 0; - self.locals.clear(); - - // Generate function body - for stmt in body { - self.gen_stmt(stmt); - } - - // Function epilogue - self.emit_subsection_header("Function Epilogue"); - self.emit_stack_layout_summary(); - - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rsp), - Operand::Register(Register::Rbp) - ], Some("restore stack")); - self.emit_instruction_with_comment(Instruction::Pop, vec![ - Operand::Register(Register::Rbp) - ], Some("restore frame")); - self.emit_instruction_with_comment(Instruction::Ret, vec![], Some("return")); - - self.emit_line(""); // Add spacing after function - } -} - -impl Emitter for Codegen { - fn emit_line(&mut self, line: &str) { - self.output.push_str(line); - self.output.push('\n'); - } - - fn emit_comment(&mut self, comment: &str) { - self.emit_line(&format!("; {}", comment)); - } -} - -impl Codegen { - /// Emit a section header with clear visual separation - pub fn emit_section_header(&mut self, title: &str) { - self.emit_line(""); - self.emit_line(&format!("; {}", "=".repeat(60))); - self.emit_line(&format!("; {}", title)); - self.emit_line(&format!("; {}", "=".repeat(60))); - self.emit_line(""); - } - - /// Emit a subsection header for better organization - pub fn emit_subsection_header(&mut self, title: &str) { - self.emit_line(""); - self.emit_line(&format!("; {}", "-".repeat(40))); - self.emit_line(&format!("; {}", title)); - self.emit_line(&format!("; {}", "-".repeat(40))); - } - - /// Emit a detailed comment about register usage - pub fn emit_register_comment(&mut self, operation: &str, register: &str, purpose: &str) { - self.emit_comment(&format!("{}: {} -> {}", operation, register, purpose)); - } - - /// Emit a memory operation comment with offset details - pub fn emit_memory_comment(&mut self, operation: &str, var_name: &str, offset: i32, var_type: &str) { - self.emit_comment(&format!("{}: {} ({}) at [rbp{}]", operation, var_name, var_type, offset)); - } - - /// Emit a stack operation comment - pub fn emit_stack_comment(&mut self, operation: &str, bytes: i32, purpose: &str) { - self.emit_comment(&format!("Stack {}: {} bytes - {}", operation, bytes, purpose)); - } - - /// Emit a function call preparation comment - pub fn emit_call_prep_comment(&mut self, function: &str, args: &[&str]) { - self.emit_comment(&format!("Preparing call to {}({})", function, args.join(", "))); - } - - /// Emit a stack layout summary for debugging - pub fn emit_stack_layout_summary(&mut self) { - self.emit_comment("STACK LAYOUT SUMMARY:"); - self.emit_comment("RBP+0 : Saved RBP (caller's frame pointer)"); - self.emit_comment("RBP-48 : RSP (current stack pointer)"); - self.emit_comment("RBP-32 to RBP-1 : Shadow space (32 bytes for Windows x64 ABI)"); - - if self.locals.is_empty() { - self.emit_comment("No local variables allocated"); - } else { - self.emit_comment("Local variables:"); - - // Collect variable information first to avoid borrowing issues - let mut var_info: Vec<(String, i32, String)> = Vec::new(); - for (name, &offset) in &self.locals { - let var_type = self.local_types.get(name) - .map(|t| match t { - TokenType::Int => "int (4 bytes)", - TokenType::FloatType => "float (8 bytes)", - TokenType::CharType => "char (1 byte)", - _ => "unknown type", - }) - .unwrap_or("unknown type"); - var_info.push((name.clone(), offset, var_type.to_string())); - } - - // Sort by offset (highest address first) - var_info.sort_by_key(|(_, offset, _)| *offset); - var_info.reverse(); - - // Now emit the comments - for (name, offset, var_type) in var_info { - self.emit_comment(&format!("RBP{:3} : {} ({})", offset, name, var_type)); - } - } - self.emit_line(""); - } - - /// Emit detailed instruction timing information (for advanced debugging) - pub fn emit_instruction_with_timing(&mut self, instruction: Instruction, operands: Vec, cycles: &str) { - self.emit_instruction(instruction, operands); - self.emit_comment(&format!("Estimated cycles: {}", cycles)); - } - - /// Emit register state comment for debugging - pub fn emit_register_state(&mut self, registers: &[(&str, &str)]) { - self.emit_comment("Register state:"); - for (reg, content) in registers { - self.emit_comment(&format!(" {} = {}", reg, content)); - } - } -} \ No newline at end of file diff --git a/src/codegen/direct_backend.rs b/src/codegen/direct_backend.rs deleted file mode 100644 index 63772e0..0000000 --- a/src/codegen/direct_backend.rs +++ /dev/null @@ -1,111 +0,0 @@ -use super::backend::{CodegenBackend, BackendUtils, RegisterAllocator}; -use super::instruction::{Instruction, Operand, Size}; -use crate::lexer::TokenType; -use std::collections::HashMap; - -pub struct DirectBackend { - output: String, - stack_offset: i32, - locals: HashMap, - local_types: HashMap, - _register_allocator: RegisterAllocator, -} - -impl DirectBackend { - pub fn new() -> Self { - Self { - output: String::new(), - stack_offset: 0, - locals: HashMap::new(), - local_types: HashMap::new(), - _register_allocator: RegisterAllocator::new(), - } - } - - pub fn generate_program(&mut self, functions: &[String]) -> String { - let mut program = String::new(); - - program.push_str("section .data\n"); - program.push_str(" format_int db '%d', 0\n"); - program.push_str(" format_float db '%.2f', 0\n"); - program.push_str(" format_char db '%c', 0\n"); - program.push_str(" newline db 10, 0\n\n"); - - program.push_str("section .text\n"); - program.push_str(" global _start\n"); - program.push_str(" extern printf\n"); - program.push_str(" extern exit\n\n"); - - for function in functions { - program.push_str(function); - program.push('\n'); - } - - program.push_str(&self.output); - - program - } -} - -impl CodegenBackend for DirectBackend { - fn emit_instruction(&mut self, instr: Instruction, operands: Vec) { - let formatted = BackendUtils::format_instruction(&instr, &operands); - self.output.push_str(&format!(" {}\n", formatted)); - } - - fn emit_instruction_with_size(&mut self, instr: Instruction, size: Size, operands: Vec) { - let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); - self.output.push_str(&format!(" {}\n", formatted)); - } - - fn emit_instruction_with_size_and_comment(&mut self, instr: Instruction, size: Size, operands: Vec, comment: Option<&str>) { - let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); - if let Some(comment) = comment { - self.output.push_str(&format!(" {} ; {}\n", formatted, comment)); - } else { - self.output.push_str(&format!(" {}\n", formatted)); - } - } - - fn emit_comment(&mut self, comment: &str) { - self.output.push_str(&format!(" ; {}\n", comment)); - } - - fn emit_label(&mut self, label: &str) { - self.output.push_str(&format!("{}:\n", label)); - } - - fn get_stack_offset(&self) -> i32 { - self.stack_offset - } - - fn set_stack_offset(&mut self, offset: i32) { - self.stack_offset = offset; - } - - fn get_locals(&self) -> &HashMap { - &self.locals - } - - fn get_locals_mut(&mut self) -> &mut HashMap { - &mut self.locals - } - - fn get_local_types(&self) -> &HashMap { - &self.local_types - } - - fn get_local_types_mut(&mut self) -> &mut HashMap { - &mut self.local_types - } - - fn get_output(&self) -> &str { - &self.output - } -} - -impl Default for DirectBackend { - fn default() -> Self { - Self::new() - } -} diff --git a/src/codegen/expression.rs b/src/codegen/expression.rs deleted file mode 100644 index 22bc3d3..0000000 --- a/src/codegen/expression.rs +++ /dev/null @@ -1,281 +0,0 @@ -use crate::lexer::TokenType; -use crate::parser::ast::Expr; -use super::instruction::{Instruction, Operand, Register}; -use super::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; - -pub trait ExpressionGenerator: Emitter + CodeEmitter + CodeEmitterWithComment { - fn gen_expr(&mut self, expr: &Expr); - fn get_locals(&self) -> &std::collections::HashMap; - fn get_data_strings(&self) -> &std::collections::HashMap; -} - -impl ExpressionGenerator for super::Codegen { - fn gen_expr(&mut self, expr: &Expr) { - match expr { - Expr::Integer(i) => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(*i) - ], Some(&format!("load integer {}", i))); - } - Expr::Float(f) => { - let float_bits = f.to_bits(); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(float_bits as i64) - ], Some(&format!("load float {} as bits", f))); - } - Expr::Char(c) => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(*c as i64) - ], Some(&format!("load char '{}'", c))); - } - Expr::String(s) => { - // CORRECTION: Use RIP-relative addressing for strings - if let Some(label) = self.data_strings.get(s) { - self.emit_instruction(Instruction::Lea, vec![ - Operand::Register(Register::Rax), - Operand::String(format!("[rel {}]", label)) - ]); - } else { - // This should not happen if collect_format_strings is called correctly - self.emit_line(&format!(" ; String literal '{}' not found in data section", s)); - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ]); - } - } - Expr::Identifier(name) => { - if let Some(&offset) = self.locals.get(name) { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Memory { base: Register::Rbp, offset } - ], Some(&format!("load {}", name))); - } else { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ], Some(&format!("unknown var {}", name))); - } - } - Expr::Unary { operator, operand } => { - match operator { - TokenType::LogicalNot => { // Unary '!' - self.gen_expr(operand); - self.emit_instruction_with_comment(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ], Some("test for zero")); - self.emit_instruction_with_comment(Instruction::Sete, vec![ - Operand::Register(Register::Al) - ], Some("set if zero")); - self.emit_instruction_with_comment(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ], Some("extend to 64-bit")); - } - TokenType::Minus => { // Unary '-' - self.gen_expr(operand); - self.emit_instruction_with_comment(Instruction::Neg, vec![ - Operand::Register(Register::Rax) - ], Some("negate")); - } - _ => { - self.emit_line(&format!(" ; unsupported unary operator: {:?}", operator)); - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ]); - } - } - } - - Expr::Binary { left, operator, right } => { - self.gen_expr(right); - self.emit_instruction_with_comment(Instruction::Push, vec![ - Operand::Register(Register::Rax) - ], Some("save right")); - self.gen_expr(left); - self.emit_instruction_with_comment(Instruction::Pop, vec![ - Operand::Register(Register::R8) - ], Some("restore right")); - - match operator { - TokenType::Plus => { - self.emit_instruction_with_comment(Instruction::Add, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ], Some("add")); - }, - TokenType::Minus => { - self.emit_instruction_with_comment(Instruction::Sub, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ], Some("subtract")); - }, - TokenType::Multiply => { - self.emit_instruction_with_comment(Instruction::Imul, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ], Some("multiply")); - }, - TokenType::Divide => { - self.emit_instruction_with_comment(Instruction::Cqo, vec![], Some("sign extend")); - self.emit_instruction_with_comment(Instruction::Idiv, vec![ - Operand::Register(Register::R8) - ], Some("divide")); - } - TokenType::Equal => { - self.emit_instruction_with_comment(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ], Some("compare")); - self.emit_instruction_with_comment(Instruction::Sete, vec![ - Operand::Register(Register::Al) - ], Some("set if equal")); - self.emit_instruction_with_comment(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ], Some("extend to 64-bit")); - } - TokenType::NotEqual => { - self.emit_instruction_with_comment(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ], Some("compare")); - self.emit_instruction_with_comment(Instruction::Setne, vec![ - Operand::Register(Register::Al) - ], Some("set if not equal")); - self.emit_instruction_with_comment(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ], Some("extend to 64-bit")); - } - TokenType::LessThan => { - self.emit_instruction_with_comment(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ], Some("compare")); - self.emit_instruction_with_comment(Instruction::Setl, vec![ - Operand::Register(Register::Al) - ], Some("set if less")); - self.emit_instruction_with_comment(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ], Some("extend to 64-bit")); - } - TokenType::LessEqual => { - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ]); - self.emit_instruction(Instruction::Setle, vec![Operand::Register(Register::Al)]); - self.emit_instruction(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ]); - } - TokenType::GreaterThan => { - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ]); - self.emit_instruction(Instruction::Setg, vec![Operand::Register(Register::Al)]); - self.emit_instruction(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ]); - } - TokenType::GreaterEqual => { - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ]); - self.emit_instruction(Instruction::Setge, vec![Operand::Register(Register::Al)]); - self.emit_instruction(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ]); - } - TokenType::LogicalAnd => { - self.emit_instruction(Instruction::And, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ]); - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ]); - self.emit_instruction(Instruction::Setne, vec![Operand::Register(Register::Al)]); - self.emit_instruction(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ]); - } - TokenType::LogicalOr => { - self.emit_instruction(Instruction::Or, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::R8) - ]); - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ]); - self.emit_instruction(Instruction::Setne, vec![Operand::Register(Register::Al)]); - self.emit_instruction(Instruction::Movzx, vec![ - Operand::Register(Register::Rax), - Operand::Register(Register::Al) - ]); - } - _ => { - self.emit_line(" ; unsupported binary op"); - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ]); - } - } - } - Expr::Call { callee, arguments: _, .. } => { - // This is a generic function call. - // For now, we'll treat it as unsupported as printf is handled by Stmt::PrintStmt. - // A full compiler would need to resolve `callee` and pass `arguments`. - self.emit_line(&format!(" ; unsupported general function call expression: {:?}", callee)); - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(0) - ]); - } - Expr::Assignment { name, value } => { - // Generate code for the value expression - self.gen_expr(value); - - // Store the result in the variable - if let Some(&offset) = self.locals.get(name) { - self.emit_instruction(Instruction::Mov, vec![ - Operand::Memory { base: Register::Rbp, offset }, - Operand::Register(Register::Rax) - ]); - } else { - self.emit_line(&format!(" ; assignment to unknown variable '{}'", name)); - } - // Assignment expression returns the assigned value (in RAX) - } - Expr::TypeCast { expr, .. } => { - // Generate code for the inner expression - self.gen_expr(expr); - self.emit_line(" ; type cast operation (simplified)"); - } - } - } - - fn get_locals(&self) -> &std::collections::HashMap { - &self.locals - } - - fn get_data_strings(&self) -> &std::collections::HashMap { - &self.data_strings - } -} diff --git a/src/codegen/ir_codegen.rs b/src/codegen/ir_codegen.rs deleted file mode 100644 index dad0e54..0000000 --- a/src/codegen/ir_codegen.rs +++ /dev/null @@ -1,868 +0,0 @@ -use std::collections::HashMap; -use crate::ir::{IrProgram, IrFunction, IrInstruction, IrValue, IrType, IrBinaryOp, IrUnaryOp}; -use super::instruction::{Instruction, Operand, Register, Size}; -use super::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; - -/// IR-based code generator that produces assembly from IR -pub struct IrCodegen { - pub output: String, - pub stack_offset: i32, - pub locals: HashMap, - pub temp_locations: HashMap, // Map temp variables to stack locations - pub data_strings: HashMap, - pub label_count: usize, -} - -impl IrCodegen { - pub fn new() -> Self { - Self { - output: String::new(), - stack_offset: 0, - locals: HashMap::new(), - temp_locations: HashMap::new(), - data_strings: HashMap::new(), - label_count: 0, - } - } - - /// Generate assembly from IR program - pub fn generate(mut self, ir_program: &IrProgram) -> String { - // Assembly file header - self.emit_section_header("MINI-C COMPILER GENERATED ASSEMBLY (FROM IR)"); - self.emit_comment("Target: x86-64 Windows"); - self.emit_comment("Calling Convention: Microsoft x64"); - self.emit_comment("Generated from: Intermediate Representation"); - self.emit_line(""); - - // Assembly directives - self.emit_comment("Assembly configuration"); - self.emit_line("bits 64"); - self.emit_line("default rel"); - self.emit_line("global main"); - self.emit_line("extern printf"); - - // Data section - process global strings - self.emit_section_header("DATA SECTION - String Literals and Constants"); - self.emit_line("section .data"); - - if ir_program.global_strings.is_empty() { - self.emit_comment("No string literals found"); - } else { - for (label, content) in &ir_program.global_strings { - let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); - self.emit_comment(&format!("String constant: \"{}\"", content.replace('\n', "\\n"))); - self.emit_line(&format!(" {}: db \"{}\", 10, 0", label, formatted_content)); - self.data_strings.insert(label.clone(), content.clone()); - } - } - - // Text section - self.emit_section_header("TEXT SECTION - Executable Code"); - self.emit_line("section .text"); - - // Generate code for each function - for function in &ir_program.functions { - self.generate_function(function); - } - - self.output - } - - /// Generate assembly for a single function - fn generate_function(&mut self, function: &IrFunction) { - self.emit_subsection_header(&format!("FUNCTION: {}", function.name)); - self.emit_line(&format!("{}:", function.name)); - - // Reset state for new function - self.stack_offset = 0; - self.locals.clear(); - self.temp_locations.clear(); - - // Function prologue - self.emit_subsection_header("Function Prologue"); - self.emit_instruction_with_comment(Instruction::Push, vec![ - Operand::Register(Register::Rbp) - ], Some("save caller's frame")); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rbp), - Operand::Register(Register::Rsp) - ], Some("set up frame")); - - // Calculate stack space needed - let stack_space = self.calculate_stack_space(function); - if stack_space > 0 { - self.emit_instruction_with_comment(Instruction::Sub, vec![ - Operand::Register(Register::Rsp), - Operand::Immediate(stack_space as i64) - ], Some(&format!("allocate {} bytes for locals and temps", stack_space))); - } - - // Generate function body - self.emit_subsection_header("Function Body"); - for instruction in &function.instructions { - self.generate_instruction(instruction); - } - - // Function epilogue - self.emit_subsection_header("Function Epilogue"); - self.emit_stack_layout_summary(); - - if stack_space > 0 { - self.emit_instruction_with_comment(Instruction::Add, vec![ - Operand::Register(Register::Rsp), - Operand::Immediate(stack_space as i64) - ], Some("deallocate stack space")); - } - - self.emit_instruction_with_comment(Instruction::Pop, vec![ - Operand::Register(Register::Rbp) - ], Some("restore frame")); - self.emit_instruction_with_comment(Instruction::Ret, vec![], Some("return")); - - self.emit_line(""); // Add spacing after function - } - - /// Calculate the stack space needed for a function - fn calculate_stack_space(&mut self, function: &IrFunction) -> i32 { - let mut space = 32; // Shadow space for Windows x64 ABI - - // Allocate space for local variables - for (name, ir_type) in &function.local_vars { - let size = self.get_type_size(ir_type); - space += size; - self.locals.insert(name.clone(), -space); - } - - // Allocate space for temporary variables - let mut _temp_count = 0; - for instruction in &function.instructions { - if let Some(temp_id) = self.extract_temp_id(instruction) { - if !self.temp_locations.contains_key(&temp_id) { - _temp_count += 1; - space += 8; // Assume 8 bytes for all temps - self.temp_locations.insert(temp_id, -space); - } - } - } - - // Align to 16 bytes - (space + 15) & !15 - } - - /// Extract temporary variable ID from instruction if present - fn extract_temp_id(&self, instruction: &IrInstruction) -> Option { - match instruction { - IrInstruction::BinaryOp { dest, .. } | - IrInstruction::UnaryOp { dest, .. } | - IrInstruction::Load { dest, .. } | - IrInstruction::Move { dest, .. } => { - if let IrValue::Temp(id) = dest { - Some(*id) - } else { - None - } - } - IrInstruction::Call { dest: Some(dest), .. } => { - if let IrValue::Temp(id) = dest { - Some(*id) - } else { - None - } - } - _ => None, - } - } - - /// Get the size in bytes for an IR type - fn get_type_size(&self, ir_type: &IrType) -> i32 { - match ir_type { - IrType::Int => 4, - IrType::Float => 8, - IrType::Char => 1, - IrType::String => 8, // Pointer size - IrType::Void => 0, - IrType::Pointer(_) => 8, - } - } - - /// Generate assembly for a single IR instruction - fn generate_instruction(&mut self, instruction: &IrInstruction) { - match instruction { - IrInstruction::Alloca { var_type, name } => { - // Space already allocated in prologue, just add comment - let size = self.get_type_size(var_type); - let offset = self.locals.get(name).copied().unwrap_or(0); - self.emit_comment(&format!("alloca {} {} at [rbp{}] ({} bytes)", - var_type, name, offset, size)); - } - - IrInstruction::Load { dest, src, var_type } => { - let src_operand = self.ir_value_to_operand(src); - let dest_operand = self.ir_value_to_operand(dest); - let size = self.ir_type_to_size(var_type); - - // Use register as intermediate for memory-to-memory moves - let reg = match size { - Size::Byte => Register::Al, - Size::Dword => Register::Eax, - Size::Qword => Register::Rax, - _ => Register::Eax, - }; - - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - Operand::Register(reg), - src_operand - ], Some(&format!("load {} {} to register", var_type, self.ir_value_to_string(src)))); - - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - dest_operand, - Operand::Register(reg) - ], Some("store to destination")); - } - - IrInstruction::Store { value, dest, var_type } => { - let dest_operand = self.ir_value_to_operand(dest); - let size = self.ir_type_to_size(var_type); - - // Handle different value types appropriately - match (value, var_type) { - (IrValue::FloatConstant(f), IrType::Float) => { - // For float constants, we need to handle them specially - self.emit_comment(&format!("store float constant {} to {}", f, self.ir_value_to_string(dest))); - // Move the float bits as integer first, then convert - let bits = f.to_bits() as i64; - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(bits) - ], Some("load float bits")); - self.emit_instruction_with_size_and_comment(Instruction::Mov, Size::Qword, vec![ - dest_operand, - Operand::Register(Register::Rax) - ], Some("store float")); - } - _ => { - // For other types, get the value operand and use register as intermediate if needed - let value_operand = self.ir_value_to_operand(value); - let reg = match size { - Size::Byte => Register::Al, - Size::Dword => Register::Eax, - Size::Qword => Register::Rax, - _ => Register::Eax, - }; - - // Check if we need an intermediate register - let needs_intermediate = matches!(value_operand, Operand::Memory { .. }) && - matches!(dest_operand, Operand::Memory { .. }); - - if needs_intermediate { - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - Operand::Register(reg), - value_operand - ], Some(&format!("load {} to register", self.ir_value_to_string(value)))); - - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - dest_operand, - Operand::Register(reg) - ], Some(&format!("store to {}", self.ir_value_to_string(dest)))); - } else { - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - dest_operand, - value_operand - ], Some(&format!("store {} to {}", self.ir_value_to_string(value), self.ir_value_to_string(dest)))); - } - } - } - } - - IrInstruction::BinaryOp { dest, op, left, right, var_type } => { - self.generate_binary_op(dest, op, left, right, var_type); - } - - IrInstruction::UnaryOp { dest, op, operand, var_type } => { - self.generate_unary_op(dest, op, operand, var_type); - } - - IrInstruction::Call { dest, func, args, return_type } => { - self.generate_function_call(dest, func, args, return_type); - } - - IrInstruction::Branch { condition, true_label, false_label } => { - let condition_operand = self.ir_value_to_operand(condition); - - // Load condition to register first, then compare - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Eax), - condition_operand - ], Some("load condition")); - - self.emit_instruction_with_comment(Instruction::Cmp, vec![ - Operand::Register(Register::Eax), - Operand::Immediate(0) - ], Some("test condition")); - - self.emit_instruction(Instruction::Je, vec![ - Operand::Label(false_label.clone()) - ]); - self.emit_instruction(Instruction::Jmp, vec![ - Operand::Label(true_label.clone()) - ]); - } - - IrInstruction::Jump { label } => { - self.emit_instruction(Instruction::Jmp, vec![ - Operand::Label(label.clone()) - ]); - } - - IrInstruction::Label { name } => { - self.emit_line(&format!("{}:", name)); - } - - IrInstruction::Return { value, var_type } => { - if let Some(val) = value { - let val_operand = self.ir_value_to_operand(val); - let register = match var_type { - IrType::Float => Register::Xmm0, - _ => Register::Eax, - }; - - match var_type { - IrType::Float => { - self.emit_instruction_with_comment(Instruction::Movsd, vec![ - Operand::Register(register), - val_operand - ], Some(&format!("return {}", self.ir_value_to_string(val)))); - } - _ => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(register), - val_operand - ], Some(&format!("return {}", self.ir_value_to_string(val)))); - } - } - } else { - self.emit_instruction_with_comment(Instruction::Xor, vec![ - Operand::Register(Register::Eax), - Operand::Register(Register::Eax) - ], Some("return 0")); - } - } - - IrInstruction::Print { format_string, args } => { - self.generate_print_call(format_string, args); - } - - IrInstruction::Move { dest, src, var_type } => { - let src_operand = self.ir_value_to_operand(src); - let dest_operand = self.ir_value_to_operand(dest); - let size = self.ir_type_to_size(var_type); - - // Use register as intermediate for memory-to-memory moves - let needs_intermediate = matches!(src_operand, Operand::Memory { .. }) && - matches!(dest_operand, Operand::Memory { .. }); - - if needs_intermediate { - let reg = match size { - Size::Byte => Register::Al, - Size::Dword => Register::Eax, - Size::Qword => Register::Rax, - _ => Register::Eax, - }; - - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - Operand::Register(reg), - src_operand - ], Some(&format!("load {} to register", self.ir_value_to_string(src)))); - - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - dest_operand, - Operand::Register(reg) - ], Some(&format!("move to {}", self.ir_value_to_string(dest)))); - } else { - self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ - dest_operand, - src_operand - ], Some(&format!("move {} to {}", self.ir_value_to_string(src), self.ir_value_to_string(dest)))); - } - } - - IrInstruction::Convert { dest, dest_type, src, src_type } => { - // Type conversion - simplified implementation - let src_operand = self.ir_value_to_operand(src); - let dest_operand = self.ir_value_to_operand(dest); - - self.emit_comment(&format!("convert {} {} to {} {}", - src_type, self.ir_value_to_string(src), dest_type, self.ir_value_to_string(dest))); - - // For now, just move (would need proper conversion logic) - self.emit_instruction(Instruction::Mov, vec![dest_operand, src_operand]); - } - - IrInstruction::Cast { dest, src, dest_type, src_type } => { - self.emit_comment(&format!("Cast {} {} to {}", src_type, self.ir_value_to_string(src), dest_type)); - - // For now, implement basic casting by moving the value - match (src_type, dest_type) { - (IrType::Int, IrType::Float) => { - self.emit_instruction(Instruction::Mov, vec![ - self.ir_value_to_operand(src), - self.ir_value_to_operand(dest), - ]); - } - (IrType::Float, IrType::Int) => { - // For float to int conversion, use mov for now - self.emit_instruction(Instruction::Mov, vec![ - self.ir_value_to_operand(src), - self.ir_value_to_operand(dest), - ]); - } - _ => { - // For other cases, just move the value - self.emit_instruction(Instruction::Mov, vec![ - self.ir_value_to_operand(src), - self.ir_value_to_operand(dest), - ]); - } - } - } - IrInstruction::Comment { text } => { - self.emit_comment(text); - } - } - } - - /// Generate binary operation - fn generate_binary_op(&mut self, dest: &IrValue, op: &IrBinaryOp, left: &IrValue, right: &IrValue, var_type: &IrType) { - let dest_operand = self.ir_value_to_operand(dest); - - match var_type { - IrType::Float => { - // Floating point operations - handle float constants specially - match left { - IrValue::FloatConstant(f) => { - let float_bits = f.to_bits() as i64; - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(float_bits) - ], Some("load float bits")); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Memory { base: Register::Rsp, offset: -8 }, - Operand::Register(Register::Rax) - ], Some("store float to temp memory")); - self.emit_instruction_with_comment(Instruction::Movsd, vec![ - Operand::Register(Register::Xmm0), - Operand::Memory { base: Register::Rsp, offset: -8 } - ], Some("load left operand")); - } - _ => { - let left_operand = self.ir_value_to_operand(left); - self.emit_instruction_with_comment(Instruction::Movsd, vec![ - Operand::Register(Register::Xmm0), - left_operand - ], Some("load left operand")); - } - } - - let asm_op = match op { - IrBinaryOp::Add => Instruction::Addsd, - IrBinaryOp::Sub => Instruction::Subsd, - IrBinaryOp::Mul => Instruction::Mulsd, - IrBinaryOp::Div => Instruction::Divsd, - _ => { - self.emit_comment(&format!("Unsupported float operation: {}", op)); - return; - } - }; - - match right { - IrValue::FloatConstant(f) => { - let float_bits = f.to_bits() as i64; - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(float_bits) - ], Some("load float bits")); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Memory { base: Register::Rsp, offset: -16 }, - Operand::Register(Register::Rax) - ], Some("store float to temp memory")); - self.emit_instruction_with_comment(asm_op, vec![ - Operand::Register(Register::Xmm0), - Operand::Memory { base: Register::Rsp, offset: -16 } - ], Some(&format!("{} operation", op))); - } - _ => { - let right_operand = self.ir_value_to_operand(right); - self.emit_instruction_with_comment(asm_op, vec![ - Operand::Register(Register::Xmm0), - right_operand - ], Some(&format!("{} operation", op))); - } - } - - self.emit_instruction_with_comment(Instruction::Movsd, vec![ - dest_operand, - Operand::Register(Register::Xmm0) - ], Some("store result")); - } - _ => { - // Integer operations - let left_operand = self.ir_value_to_operand(left); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Eax), - left_operand - ], Some("load left operand")); - - let asm_op = match op { - IrBinaryOp::Add => Instruction::Add, - IrBinaryOp::Sub => Instruction::Sub, - IrBinaryOp::Mul => Instruction::Imul, - IrBinaryOp::Div => { - // Division requires special handling - let right_operand = self.ir_value_to_operand(right); - self.emit_instruction(Instruction::Cdq, vec![]); - self.emit_instruction(Instruction::Idiv, vec![right_operand]); - self.emit_instruction(Instruction::Mov, vec![dest_operand, Operand::Register(Register::Eax)]); - return; - } - IrBinaryOp::Eq | IrBinaryOp::Ne | IrBinaryOp::Lt | - IrBinaryOp::Le | IrBinaryOp::Gt | IrBinaryOp::Ge => { - // Comparison operations - handle float constants specially - match right { - IrValue::FloatConstant(f) => { - let float_bits = f.to_bits() as i64; - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Edx), - Operand::Immediate(float_bits as i32 as i64) // Truncate to 32-bit to avoid overflow - ], Some("load float bits for comparison")); - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Eax), - Operand::Register(Register::Edx) - ]); - } - _ => { - let right_operand = self.ir_value_to_operand(right); - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Eax), - right_operand - ]); - } - } - - let set_op = match op { - IrBinaryOp::Eq => Instruction::Sete, - IrBinaryOp::Ne => Instruction::Setne, - IrBinaryOp::Lt => Instruction::Setl, - IrBinaryOp::Le => Instruction::Setle, - IrBinaryOp::Gt => Instruction::Setg, - IrBinaryOp::Ge => Instruction::Setge, - _ => unreachable!(), - }; - - self.emit_instruction(set_op, vec![Operand::Register(Register::Al)]); - self.emit_instruction(Instruction::Movzx, vec![ - Operand::Register(Register::Eax), - Operand::Register(Register::Al) - ]); - self.emit_instruction(Instruction::Mov, vec![dest_operand, Operand::Register(Register::Eax)]); - return; - } - _ => { - self.emit_comment(&format!("Unsupported operation: {}", op)); - return; - } - }; - - let right_operand = self.ir_value_to_operand(right); - self.emit_instruction_with_comment(asm_op, vec![ - Operand::Register(Register::Eax), - right_operand - ], Some(&format!("{} operation", op))); - - self.emit_instruction_with_comment(Instruction::Mov, vec![ - dest_operand, - Operand::Register(Register::Eax) - ], Some("store result")); - } - } - } - - /// Generate unary operation - fn generate_unary_op(&mut self, dest: &IrValue, op: &IrUnaryOp, operand: &IrValue, _var_type: &IrType) { - let operand_op = self.ir_value_to_operand(operand); - let dest_operand = self.ir_value_to_operand(dest); - - match op { - IrUnaryOp::Neg => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Eax), - operand_op - ], Some("load operand")); - - self.emit_instruction_with_comment(Instruction::Neg, vec![ - Operand::Register(Register::Eax) - ], Some("negate")); - - self.emit_instruction_with_comment(Instruction::Mov, vec![ - dest_operand, - Operand::Register(Register::Eax) - ], Some("store result")); - } - IrUnaryOp::Not => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Eax), - operand_op - ], Some("load operand")); - - self.emit_instruction_with_comment(Instruction::Cmp, vec![ - Operand::Register(Register::Eax), - Operand::Immediate(0) - ], Some("test for zero")); - - self.emit_instruction(Instruction::Sete, vec![Operand::Register(Register::Al)]); - self.emit_instruction(Instruction::Movzx, vec![ - Operand::Register(Register::Eax), - Operand::Register(Register::Al) - ]); - - self.emit_instruction_with_comment(Instruction::Mov, vec![ - dest_operand, - Operand::Register(Register::Eax) - ], Some("store result")); - } - } - } - - /// Generate function call - fn generate_function_call(&mut self, dest: &Option, func: &str, args: &[IrValue], return_type: &IrType) { - self.emit_comment(&format!("call {} with {} args", func, args.len())); - - // For now, simplified function call handling - // In a real implementation, you'd handle calling conventions properly - - if let Some(dest_val) = dest { - let dest_operand = self.ir_value_to_operand(dest_val); - let register = match return_type { - IrType::Float => Register::Xmm0, - _ => Register::Eax, - }; - - match return_type { - IrType::Float => { - self.emit_instruction_with_comment(Instruction::Movsd, vec![ - dest_operand, - Operand::Register(register) - ], Some("store return value")); - } - _ => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - dest_operand, - Operand::Register(register) - ], Some("store return value")); - } - } - } - } - - /// Generate print call - fn generate_print_call(&mut self, format_string: &IrValue, args: &[IrValue]) { - self.emit_comment("--- print statement ---"); - - // Handle printf call - simplified implementation - if let IrValue::StringConstant(label) = format_string { - self.emit_instruction_with_comment(Instruction::Lea, vec![ - Operand::Register(Register::Rcx), - Operand::Label(label.clone()) - ], Some("load format string")); - - // Load arguments into registers with proper float handling - for (i, arg) in args.iter().enumerate() { - let reg = match i { - 0 => Register::Rdx, - 1 => Register::R8, - 2 => Register::R9, - _ => { - self.emit_comment("Too many arguments for simplified printf"); - break; - } - }; - - // Handle different argument types - match arg { - IrValue::FloatConstant(f) => { - // For float constants, load the float bits into a register and then move to arg register - let float_bits = f.to_bits() as i64; - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(float_bits) - ], Some(&format!("load float bits for arg {}", i))); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(reg), - Operand::Register(Register::Rax) - ], Some(&format!("move to arg register {}", i))); - } - IrValue::Temp(_) | IrValue::Local(_) => { - let arg_operand = self.ir_value_to_operand(arg); - // Check if this is a float by looking at the memory location - // For now, assume temp variables that are floats need special handling - if let IrValue::Temp(_temp_id) = arg { - // Check if this temp was created from a float operation - if matches!(arg_operand, Operand::Memory { .. }) { - // For now, load as 64-bit value (could be float or int) - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - arg_operand - ], Some(&format!("load arg {} to register", i))); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(reg), - Operand::Register(Register::Rax) - ], Some(&format!("move to arg register {}", i))); - } - } else if let IrValue::Local(_) = arg { - if matches!(arg_operand, Operand::Memory { .. }) { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - arg_operand - ], Some(&format!("load arg {} to register", i))); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(reg), - Operand::Register(Register::Rax) - ], Some(&format!("move to arg register {}", i))); - } - } - } - _ => { - // Handle other types (int constants, char constants, etc.) - let arg_operand = self.ir_value_to_operand(arg); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(reg), - arg_operand - ], Some(&format!("load arg {}", i))); - } - } - } - - self.emit_instruction_with_comment(Instruction::Call, vec![ - Operand::Label("printf".to_string()) - ], Some("call printf")); - } - } - - /// Convert IR value to assembly operand - fn ir_value_to_operand(&self, value: &IrValue) -> Operand { - match value { - IrValue::IntConstant(i) => Operand::Immediate(*i), - IrValue::FloatConstant(_f) => { - panic!("Float constants cannot be used as immediate operands - must be pre-loaded into memory") - } - IrValue::CharConstant(c) => Operand::Immediate(*c as i64), - IrValue::StringConstant(label) => Operand::Label(label.clone()), - IrValue::Local(name) => { - let offset = self.locals.get(name).copied().unwrap_or(0); - Operand::Memory { base: Register::Rbp, offset } - } - IrValue::Temp(id) => { - let offset = self.temp_locations.get(id).copied().unwrap_or(0); - Operand::Memory { base: Register::Rbp, offset } - } - IrValue::Parameter(_name) => { - // Parameters would be at positive offsets from RBP - let offset = 16; // Simplified - would need proper parameter handling - Operand::Memory { base: Register::Rbp, offset } - } - IrValue::Global(name) => Operand::Label(name.clone()), - } - } - - /// Convert IR type to assembly size - fn ir_type_to_size(&self, ir_type: &IrType) -> Size { - match ir_type { - IrType::Int => Size::Dword, - IrType::Float => Size::Qword, - IrType::Char => Size::Byte, - IrType::String => Size::Qword, - IrType::Void => Size::Qword, - IrType::Pointer(_) => Size::Qword, - } - } - - /// Convert IR value to string for comments - fn ir_value_to_string(&self, value: &IrValue) -> String { - match value { - IrValue::IntConstant(i) => i.to_string(), - IrValue::FloatConstant(f) => f.to_string(), - IrValue::CharConstant(c) => format!("'{}'", c), - IrValue::StringConstant(label) => format!("@{}", label), - IrValue::Local(name) => format!("%{}", name), - IrValue::Temp(id) => format!("%t{}", id), - IrValue::Parameter(name) => format!("%{}", name), - IrValue::Global(name) => format!("@{}", name), - } - } -} - -// Implement the emitter traits for IrCodegen -impl Emitter for IrCodegen { - fn emit_line(&mut self, line: &str) { - self.output.push_str(line); - self.output.push('\n'); - } - - fn emit_comment(&mut self, comment: &str) { - self.emit_line(&format!("; {}", comment)); - } -} - -// Helper methods for IrCodegen -impl IrCodegen { - /// Emit a section header with clear visual separation - pub fn emit_section_header(&mut self, title: &str) { - self.emit_line(""); - self.emit_line(&format!("; {}", "=".repeat(60))); - self.emit_line(&format!("; {}", title)); - self.emit_line(&format!("; {}", "=".repeat(60))); - self.emit_line(""); - } - - /// Emit a subsection header for better organization - pub fn emit_subsection_header(&mut self, title: &str) { - self.emit_line(""); - self.emit_line(&format!("; {}", "-".repeat(40))); - self.emit_line(&format!("; {}", title)); - self.emit_line(&format!("; {}", "-".repeat(40))); - } - - /// Emit a stack layout summary for debugging - pub fn emit_stack_layout_summary(&mut self) { - self.emit_comment("STACK LAYOUT SUMMARY:"); - self.emit_comment("RBP+0 : Saved RBP (caller's frame pointer)"); - - if self.locals.is_empty() && self.temp_locations.is_empty() { - self.emit_comment("No local variables or temporaries allocated"); - } else { - // Collect local variables info to avoid borrowing issues - let locals_info: Vec<(String, i32)> = self.locals.iter() - .map(|(name, &offset)| (name.clone(), offset)) - .collect(); - - if !locals_info.is_empty() { - self.emit_comment("Local variables:"); - for (name, offset) in locals_info { - self.emit_comment(&format!("RBP{:3} : {}", offset, name)); - } - } - - // Collect temp variables info to avoid borrowing issues - let temps_info: Vec<(usize, i32)> = self.temp_locations.iter() - .map(|(&temp_id, &offset)| (temp_id, offset)) - .collect(); - - if !temps_info.is_empty() { - self.emit_comment("Temporary variables:"); - for (temp_id, offset) in temps_info { - self.emit_comment(&format!("RBP{:3} : %t{}", offset, temp_id)); - } - } - } - self.emit_line(""); - } -} diff --git a/src/codegen/ir_codegen/call_generator.rs b/src/codegen/ir_codegen/call_generator.rs new file mode 100644 index 0000000..11872ac --- /dev/null +++ b/src/codegen/ir_codegen/call_generator.rs @@ -0,0 +1,121 @@ +use crate::ir::{IrValue, IrType}; +use crate::codegen::instruction::{Instruction, Operand, Register}; +use crate::codegen::emitter::{Emitter, CodeEmitterWithComment}; +use super::IrCodegen; + +impl IrCodegen { + /// Generate function call + pub fn generate_function_call(&mut self, dest: &Option, func: &str, args: &[IrValue], return_type: &IrType) { + self.emit_comment(&format!("call {} with {} args", func, args.len())); + + // For now, simplified function call handling + // In a real implementation, you'd handle calling conventions properly + + if let Some(dest_val) = dest { + let dest_operand = self.ir_value_to_operand(dest_val); + let register = match return_type { + IrType::Float => Register::Xmm0, + _ => Register::Eax, + }; + + match return_type { + IrType::Float => { + self.emit_instruction_with_comment(Instruction::Movsd, vec![ + dest_operand, + Operand::Register(register) + ], Some("store return value")); + } + _ => { + self.emit_instruction_with_comment(Instruction::Mov, vec![ + dest_operand, + Operand::Register(register) + ], Some("store return value")); + } + } + } + } + + /// Generate print call + pub fn generate_print_call(&mut self, format_string: &IrValue, args: &[IrValue]) { + self.emit_comment("--- print statement ---"); + + // Handle printf call - simplified implementation + if let IrValue::StringConstant(label) = format_string { + self.emit_instruction_with_comment(Instruction::Lea, vec![ + Operand::Register(Register::Rcx), + Operand::Label(label.clone()) + ], Some("load format string")); + + // Load arguments into registers with proper float handling + for (i, arg) in args.iter().enumerate() { + let reg = match i { + 0 => Register::Rdx, + 1 => Register::R8, + 2 => Register::R9, + _ => { + self.emit_comment("Too many arguments for simplified printf"); + break; + } + }; + + // Handle different argument types + match arg { + IrValue::FloatConstant(f) => { + // For float constants, load the float bits into a register and then move to arg register + let float_bits = f.to_bits() as i64; + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Rax), + Operand::Immediate(float_bits) + ], Some(&format!("load float bits for arg {}", i))); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(reg), + Operand::Register(Register::Rax) + ], Some(&format!("move to arg register {}", i))); + } + IrValue::Temp(_) | IrValue::Local(_) => { + let arg_operand = self.ir_value_to_operand(arg); + // Check if this is a float by looking at the memory location + // For now, assume temp variables that are floats need special handling + if let IrValue::Temp(_temp_id) = arg { + // Check if this temp was created from a float operation + if matches!(arg_operand, Operand::Memory { .. }) { + // For now, load as 64-bit value (could be float or int) + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Rax), + arg_operand + ], Some(&format!("load arg {} to register", i))); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(reg), + Operand::Register(Register::Rax) + ], Some(&format!("move to arg register {}", i))); + } + } else if let IrValue::Local(_) = arg { + if matches!(arg_operand, Operand::Memory { .. }) { + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Rax), + arg_operand + ], Some(&format!("load arg {} to register", i))); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(reg), + Operand::Register(Register::Rax) + ], Some(&format!("move to arg register {}", i))); + } + } + } + _ => { + // Handle other types (int constants, char constants, etc.) + let arg_operand = self.ir_value_to_operand(arg); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(reg), + arg_operand + ], Some(&format!("load arg {}", i))); + } + } + } + + self.emit_instruction_with_comment(Instruction::Call, vec![ + Operand::Label("printf".to_string()) + ], Some("call printf")); + } + } +} \ No newline at end of file diff --git a/src/codegen/ir_codegen/emitter_impl.rs b/src/codegen/ir_codegen/emitter_impl.rs new file mode 100644 index 0000000..288573b --- /dev/null +++ b/src/codegen/ir_codegen/emitter_impl.rs @@ -0,0 +1,69 @@ +use crate::codegen::emitter::Emitter; +use super::IrCodegen; + +// Implement the emitter traits for IrCodegen +impl Emitter for IrCodegen { + fn emit_line(&mut self, line: &str) { + self.output.push_str(line); + self.output.push('\n'); + } + + fn emit_comment(&mut self, comment: &str) { + self.emit_line(&format!("; {}", comment)); + } +} + +// Helper methods for IrCodegen +impl IrCodegen { + /// Emit a section header with clear visual separation + pub fn emit_section_header(&mut self, title: &str) { + self.emit_line(""); + self.emit_line(&format!("; {}", "=".repeat(60))); + self.emit_line(&format!("; {}", title)); + self.emit_line(&format!("; {}", "=".repeat(60))); + self.emit_line(""); + } + + /// Emit a subsection header for better organization + pub fn emit_subsection_header(&mut self, title: &str) { + self.emit_line(""); + self.emit_line(&format!("; {}", "-".repeat(40))); + self.emit_line(&format!("; {}", title)); + self.emit_line(&format!("; {}", "-".repeat(40))); + } + + /// Emit a stack layout summary for debugging + pub fn emit_stack_layout_summary(&mut self) { + self.emit_comment("STACK LAYOUT SUMMARY:"); + self.emit_comment("RBP+0 : Saved RBP (caller's frame pointer)"); + + if self.locals.is_empty() && self.temp_locations.is_empty() { + self.emit_comment("No local variables or temporaries allocated"); + } else { + // Collect local variables info to avoid borrowing issues + let locals_info: Vec<(String, i32)> = self.locals.iter() + .map(|(name, &offset)| (name.clone(), offset)) + .collect(); + + if !locals_info.is_empty() { + self.emit_comment("Local variables:"); + for (name, offset) in locals_info { + self.emit_comment(&format!("RBP{:3} : {}", offset, name)); + } + } + + // Collect temp variables info to avoid borrowing issues + let temps_info: Vec<(usize, i32)> = self.temp_locations.iter() + .map(|(&temp_id, &offset)| (temp_id, offset)) + .collect(); + + if !temps_info.is_empty() { + self.emit_comment("Temporary variables:"); + for (temp_id, offset) in temps_info { + self.emit_comment(&format!("RBP{:3} : %t{}", offset, temp_id)); + } + } + } + self.emit_line(""); + } +} \ No newline at end of file diff --git a/src/codegen/ir_codegen/function_generator.rs b/src/codegen/ir_codegen/function_generator.rs new file mode 100644 index 0000000..42fddd8 --- /dev/null +++ b/src/codegen/ir_codegen/function_generator.rs @@ -0,0 +1,60 @@ +use crate::ir::{IrFunction}; +use crate::codegen::instruction::{Instruction, Operand, Register}; +use crate::codegen::emitter::{Emitter, CodeEmitterWithComment}; +use super::IrCodegen; + +impl IrCodegen { + /// Generate assembly for a single function + pub fn generate_function(&mut self, function: &IrFunction) { + self.emit_subsection_header(&format!("FUNCTION: {}", function.name)); + self.emit_line(&format!("{}:", function.name)); + + // Reset state for new function + self.stack_offset = 0; + self.locals.clear(); + self.temp_locations.clear(); + + // Function prologue + self.emit_subsection_header("Function Prologue"); + self.emit_instruction_with_comment(Instruction::Push, vec![ + Operand::Register(Register::Rbp) + ], Some("save caller's frame")); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Rbp), + Operand::Register(Register::Rsp) + ], Some("set up frame")); + + // Calculate stack space needed + let stack_space = self.calculate_stack_space(function); + if stack_space > 0 { + self.emit_instruction_with_comment(Instruction::Sub, vec![ + Operand::Register(Register::Rsp), + Operand::Immediate(stack_space as i64) + ], Some(&format!("allocate {} bytes for locals and temps", stack_space))); + } + + // Generate function body + self.emit_subsection_header("Function Body"); + for instruction in &function.instructions { + self.generate_instruction(instruction); + } + + // Function epilogue + self.emit_subsection_header("Function Epilogue"); + self.emit_stack_layout_summary(); + + if stack_space > 0 { + self.emit_instruction_with_comment(Instruction::Add, vec![ + Operand::Register(Register::Rsp), + Operand::Immediate(stack_space as i64) + ], Some("deallocate stack space")); + } + + self.emit_instruction_with_comment(Instruction::Pop, vec![ + Operand::Register(Register::Rbp) + ], Some("restore frame")); + self.emit_instruction_with_comment(Instruction::Ret, vec![], Some("return")); + + self.emit_line(""); // Add spacing after function + } +} \ No newline at end of file diff --git a/src/codegen/ir_codegen/instruction_generator.rs b/src/codegen/ir_codegen/instruction_generator.rs new file mode 100644 index 0000000..497e6d3 --- /dev/null +++ b/src/codegen/ir_codegen/instruction_generator.rs @@ -0,0 +1,252 @@ +use crate::ir::{IrInstruction, IrValue, IrType}; +use crate::codegen::instruction::{Instruction, Operand, Register, Size}; +use crate::codegen::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; +use super::IrCodegen; + +impl IrCodegen { + /// Generate assembly for a single IR instruction + pub fn generate_instruction(&mut self, instruction: &IrInstruction) { + match instruction { + IrInstruction::Alloca { var_type, name } => { + // Space already allocated in prologue, just add comment + let size = self.get_type_size(var_type); + let offset = self.locals.get(name).copied().unwrap_or(0); + self.emit_comment(&format!("alloca {} {} at [rbp{}] ({} bytes)", + var_type, name, offset, size)); + } + + IrInstruction::Load { dest, src, var_type } => { + let src_operand = self.ir_value_to_operand(src); + let dest_operand = self.ir_value_to_operand(dest); + let size = self.ir_type_to_size(var_type); + + // Use register as intermediate for memory-to-memory moves + let reg = match size { + Size::Byte => Register::Al, + Size::Dword => Register::Eax, + Size::Qword => Register::Rax, + _ => Register::Eax, + }; + + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + Operand::Register(reg), + src_operand + ], Some(&format!("load {} {} to register", var_type, self.ir_value_to_string(src)))); + + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + dest_operand, + Operand::Register(reg) + ], Some("store to destination")); + } + + IrInstruction::Store { value, dest, var_type } => { + let dest_operand = self.ir_value_to_operand(dest); + let size = self.ir_type_to_size(var_type); + + // Handle different value types appropriately + match (value, var_type) { + (IrValue::FloatConstant(f), IrType::Float) => { + // For float constants, we need to handle them specially + self.emit_comment(&format!("store float constant {} to {}", f, self.ir_value_to_string(dest))); + // Move the float bits as integer first, then convert + let bits = f.to_bits() as i64; + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Rax), + Operand::Immediate(bits) + ], Some("load float bits")); + self.emit_instruction_with_size_and_comment(Instruction::Mov, Size::Qword, vec![ + dest_operand, + Operand::Register(Register::Rax) + ], Some("store float")); + } + _ => { + // For other types, get the value operand and use register as intermediate if needed + let value_operand = self.ir_value_to_operand(value); + let reg = match size { + Size::Byte => Register::Al, + Size::Dword => Register::Eax, + Size::Qword => Register::Rax, + _ => Register::Eax, + }; + + // Check if we need an intermediate register + let needs_intermediate = matches!(value_operand, Operand::Memory { .. }) && + matches!(dest_operand, Operand::Memory { .. }); + + if needs_intermediate { + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + Operand::Register(reg), + value_operand + ], Some(&format!("load {} to register", self.ir_value_to_string(value)))); + + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + dest_operand, + Operand::Register(reg) + ], Some(&format!("store to {}", self.ir_value_to_string(dest)))); + } else { + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + dest_operand, + value_operand + ], Some(&format!("store {} to {}", self.ir_value_to_string(value), self.ir_value_to_string(dest)))); + } + } + } + } + + IrInstruction::BinaryOp { dest, op, left, right, var_type } => { + self.generate_binary_op(dest, op, left, right, var_type); + } + + IrInstruction::UnaryOp { dest, op, operand, var_type } => { + self.generate_unary_op(dest, op, operand, var_type); + } + + IrInstruction::Call { dest, func, args, return_type } => { + self.generate_function_call(dest, func, args, return_type); + } + + IrInstruction::Branch { condition, true_label, false_label } => { + let condition_operand = self.ir_value_to_operand(condition); + + // Load condition to register first, then compare + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Eax), + condition_operand + ], Some("load condition")); + + self.emit_instruction_with_comment(Instruction::Cmp, vec![ + Operand::Register(Register::Eax), + Operand::Immediate(0) + ], Some("test condition")); + + self.emit_instruction(Instruction::Je, vec![ + Operand::Label(false_label.clone()) + ]); + self.emit_instruction(Instruction::Jmp, vec![ + Operand::Label(true_label.clone()) + ]); + } + + IrInstruction::Jump { label } => { + self.emit_instruction(Instruction::Jmp, vec![ + Operand::Label(label.clone()) + ]); + } + + IrInstruction::Label { name } => { + self.emit_line(&format!("{}:", name)); + } + + IrInstruction::Return { value, var_type } => { + if let Some(val) = value { + let val_operand = self.ir_value_to_operand(val); + let register = match var_type { + IrType::Float => Register::Xmm0, + _ => Register::Eax, + }; + + match var_type { + IrType::Float => { + self.emit_instruction_with_comment(Instruction::Movsd, vec![ + Operand::Register(register), + val_operand + ], Some(&format!("return {}", self.ir_value_to_string(val)))); + } + _ => { + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(register), + val_operand + ], Some(&format!("return {}", self.ir_value_to_string(val)))); + } + } + } else { + self.emit_instruction_with_comment(Instruction::Xor, vec![ + Operand::Register(Register::Eax), + Operand::Register(Register::Eax) + ], Some("return 0")); + } + } + + IrInstruction::Print { format_string, args } => { + self.generate_print_call(format_string, args); + } + + IrInstruction::Move { dest, src, var_type } => { + let src_operand = self.ir_value_to_operand(src); + let dest_operand = self.ir_value_to_operand(dest); + let size = self.ir_type_to_size(var_type); + + // Use register as intermediate for memory-to-memory moves + let needs_intermediate = matches!(src_operand, Operand::Memory { .. }) && + matches!(dest_operand, Operand::Memory { .. }); + + if needs_intermediate { + let reg = match size { + Size::Byte => Register::Al, + Size::Dword => Register::Eax, + Size::Qword => Register::Rax, + _ => Register::Eax, + }; + + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + Operand::Register(reg), + src_operand + ], Some(&format!("load {} to register", self.ir_value_to_string(src)))); + + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + dest_operand, + Operand::Register(reg) + ], Some(&format!("move to {}", self.ir_value_to_string(dest)))); + } else { + self.emit_instruction_with_size_and_comment(Instruction::Mov, size, vec![ + dest_operand, + src_operand + ], Some(&format!("move {} to {}", self.ir_value_to_string(src), self.ir_value_to_string(dest)))); + } + } + + IrInstruction::Convert { dest, dest_type, src, src_type } => { + // Type conversion - simplified implementation + let src_operand = self.ir_value_to_operand(src); + let dest_operand = self.ir_value_to_operand(dest); + + self.emit_comment(&format!("convert {} {} to {} {}", + src_type, self.ir_value_to_string(src), dest_type, self.ir_value_to_string(dest))); + + // For now, just move (would need proper conversion logic) + self.emit_instruction(Instruction::Mov, vec![dest_operand, src_operand]); + } + + IrInstruction::Cast { dest, src, dest_type, src_type } => { + self.emit_comment(&format!("Cast {} {} to {}", src_type, self.ir_value_to_string(src), dest_type)); + + // For now, implement basic casting by moving the value + match (src_type, dest_type) { + (IrType::Int, IrType::Float) => { + self.emit_instruction(Instruction::Mov, vec![ + self.ir_value_to_operand(src), + self.ir_value_to_operand(dest), + ]); + } + (IrType::Float, IrType::Int) => { + // For float to int conversion, use mov for now + self.emit_instruction(Instruction::Mov, vec![ + self.ir_value_to_operand(src), + self.ir_value_to_operand(dest), + ]); + } + _ => { + // For other cases, just move the value + self.emit_instruction(Instruction::Mov, vec![ + self.ir_value_to_operand(src), + self.ir_value_to_operand(dest), + ]); + } + } + } + IrInstruction::Comment { text } => { + self.emit_comment(text); + } + } + } +} \ No newline at end of file diff --git a/src/codegen/ir_codegen/mod.rs b/src/codegen/ir_codegen/mod.rs new file mode 100644 index 0000000..81ef37e --- /dev/null +++ b/src/codegen/ir_codegen/mod.rs @@ -0,0 +1,79 @@ +use std::collections::HashMap; +use crate::ir::IrProgram; +use super::emitter::Emitter; + +mod function_generator; +mod stack_manager; +mod instruction_generator; +mod operation_generator; +mod call_generator; +mod value_converter; +mod emitter_impl; + +// The modules are used internally via impl blocks, no need to re-export + +/// IR-based code generator that produces assembly from IR +pub struct IrCodegen { + pub output: String, + pub stack_offset: i32, + pub locals: HashMap, + pub temp_locations: HashMap, // Map temp variables to stack locations + pub data_strings: HashMap, + pub label_count: usize, +} + +impl IrCodegen { + pub fn new() -> Self { + Self { + output: String::new(), + stack_offset: 0, + locals: HashMap::new(), + temp_locations: HashMap::new(), + data_strings: HashMap::new(), + label_count: 0, + } + } + + /// Generate assembly from IR program + pub fn generate(mut self, ir_program: &IrProgram) -> String { + // Assembly file header + self.emit_section_header("MINI-C COMPILER GENERATED ASSEMBLY (FROM IR)"); + self.emit_comment("Target: x86-64 Windows"); + self.emit_comment("Calling Convention: Microsoft x64"); + self.emit_comment("Generated from: Intermediate Representation"); + self.emit_line(""); + + // Assembly directives + self.emit_comment("Assembly configuration"); + self.emit_line("bits 64"); + self.emit_line("default rel"); + self.emit_line("global main"); + self.emit_line("extern printf"); + + // Data section - process global strings + self.emit_section_header("DATA SECTION - String Literals and Constants"); + self.emit_line("section .data"); + + if ir_program.global_strings.is_empty() { + self.emit_comment("No string literals found"); + } else { + for (label, content) in &ir_program.global_strings { + let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); + self.emit_comment(&format!("String constant: \"{}\"", content.replace('\n', "\\n"))); + self.emit_line(&format!(" {}: db \"{}\", 10, 0", label, formatted_content)); + self.data_strings.insert(label.clone(), content.clone()); + } + } + + // Text section + self.emit_section_header("TEXT SECTION - Executable Code"); + self.emit_line("section .text"); + + // Generate code for each function + for function in &ir_program.functions { + self.generate_function(function); + } + + self.output + } +} \ No newline at end of file diff --git a/src/codegen/ir_codegen/operation_generator.rs b/src/codegen/ir_codegen/operation_generator.rs new file mode 100644 index 0000000..2ac8d18 --- /dev/null +++ b/src/codegen/ir_codegen/operation_generator.rs @@ -0,0 +1,207 @@ +use crate::ir::{IrValue, IrType, IrBinaryOp, IrUnaryOp}; +use crate::codegen::instruction::{Instruction, Operand, Register}; +use crate::codegen::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; +use super::IrCodegen; + +impl IrCodegen { + /// Generate binary operation + pub fn generate_binary_op(&mut self, dest: &IrValue, op: &IrBinaryOp, left: &IrValue, right: &IrValue, var_type: &IrType) { + let dest_operand = self.ir_value_to_operand(dest); + + match var_type { + IrType::Float => { + // Floating point operations - handle float constants specially + match left { + IrValue::FloatConstant(f) => { + let float_bits = f.to_bits() as i64; + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Rax), + Operand::Immediate(float_bits) + ], Some("load float bits")); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Memory { base: Register::Rsp, offset: -8 }, + Operand::Register(Register::Rax) + ], Some("store float to temp memory")); + self.emit_instruction_with_comment(Instruction::Movsd, vec![ + Operand::Register(Register::Xmm0), + Operand::Memory { base: Register::Rsp, offset: -8 } + ], Some("load left operand")); + } + _ => { + let left_operand = self.ir_value_to_operand(left); + self.emit_instruction_with_comment(Instruction::Movsd, vec![ + Operand::Register(Register::Xmm0), + left_operand + ], Some("load left operand")); + } + } + + let asm_op = match op { + IrBinaryOp::Add => Instruction::Addsd, + IrBinaryOp::Sub => Instruction::Subsd, + IrBinaryOp::Mul => Instruction::Mulsd, + IrBinaryOp::Div => Instruction::Divsd, + _ => { + self.emit_comment(&format!("Unsupported float operation: {}", op)); + return; + } + }; + + match right { + IrValue::FloatConstant(f) => { + let float_bits = f.to_bits() as i64; + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Rax), + Operand::Immediate(float_bits) + ], Some("load float bits")); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Memory { base: Register::Rsp, offset: -16 }, + Operand::Register(Register::Rax) + ], Some("store float to temp memory")); + self.emit_instruction_with_comment(asm_op, vec![ + Operand::Register(Register::Xmm0), + Operand::Memory { base: Register::Rsp, offset: -16 } + ], Some(&format!("{} operation", op))); + } + _ => { + let right_operand = self.ir_value_to_operand(right); + self.emit_instruction_with_comment(asm_op, vec![ + Operand::Register(Register::Xmm0), + right_operand + ], Some(&format!("{} operation", op))); + } + } + + self.emit_instruction_with_comment(Instruction::Movsd, vec![ + dest_operand, + Operand::Register(Register::Xmm0) + ], Some("store result")); + } + _ => { + // Integer operations + let left_operand = self.ir_value_to_operand(left); + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Eax), + left_operand + ], Some("load left operand")); + + let asm_op = match op { + IrBinaryOp::Add => Instruction::Add, + IrBinaryOp::Sub => Instruction::Sub, + IrBinaryOp::Mul => Instruction::Imul, + IrBinaryOp::Div => { + // Division requires special handling + let right_operand = self.ir_value_to_operand(right); + self.emit_instruction(Instruction::Cdq, vec![]); + self.emit_instruction(Instruction::Idiv, vec![right_operand]); + self.emit_instruction(Instruction::Mov, vec![dest_operand, Operand::Register(Register::Eax)]); + return; + } + IrBinaryOp::Eq | IrBinaryOp::Ne | IrBinaryOp::Lt | + IrBinaryOp::Le | IrBinaryOp::Gt | IrBinaryOp::Ge => { + // Comparison operations - handle float constants specially + match right { + IrValue::FloatConstant(f) => { + let float_bits = f.to_bits() as i64; + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Edx), + Operand::Immediate(float_bits as i32 as i64) // Truncate to 32-bit to avoid overflow + ], Some("load float bits for comparison")); + self.emit_instruction(Instruction::Cmp, vec![ + Operand::Register(Register::Eax), + Operand::Register(Register::Edx) + ]); + } + _ => { + let right_operand = self.ir_value_to_operand(right); + self.emit_instruction(Instruction::Cmp, vec![ + Operand::Register(Register::Eax), + right_operand + ]); + } + } + + let set_op = match op { + IrBinaryOp::Eq => Instruction::Sete, + IrBinaryOp::Ne => Instruction::Setne, + IrBinaryOp::Lt => Instruction::Setl, + IrBinaryOp::Le => Instruction::Setle, + IrBinaryOp::Gt => Instruction::Setg, + IrBinaryOp::Ge => Instruction::Setge, + _ => unreachable!(), + }; + + self.emit_instruction(set_op, vec![Operand::Register(Register::Al)]); + self.emit_instruction(Instruction::Movzx, vec![ + Operand::Register(Register::Eax), + Operand::Register(Register::Al) + ]); + self.emit_instruction(Instruction::Mov, vec![dest_operand, Operand::Register(Register::Eax)]); + return; + } + _ => { + self.emit_comment(&format!("Unsupported operation: {}", op)); + return; + } + }; + + let right_operand = self.ir_value_to_operand(right); + self.emit_instruction_with_comment(asm_op, vec![ + Operand::Register(Register::Eax), + right_operand + ], Some(&format!("{} operation", op))); + + self.emit_instruction_with_comment(Instruction::Mov, vec![ + dest_operand, + Operand::Register(Register::Eax) + ], Some("store result")); + } + } + } + + /// Generate unary operation + pub fn generate_unary_op(&mut self, dest: &IrValue, op: &IrUnaryOp, operand: &IrValue, _var_type: &IrType) { + let operand_op = self.ir_value_to_operand(operand); + let dest_operand = self.ir_value_to_operand(dest); + + match op { + IrUnaryOp::Neg => { + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Eax), + operand_op + ], Some("load operand")); + + self.emit_instruction_with_comment(Instruction::Neg, vec![ + Operand::Register(Register::Eax) + ], Some("negate")); + + self.emit_instruction_with_comment(Instruction::Mov, vec![ + dest_operand, + Operand::Register(Register::Eax) + ], Some("store result")); + } + IrUnaryOp::Not => { + self.emit_instruction_with_comment(Instruction::Mov, vec![ + Operand::Register(Register::Eax), + operand_op + ], Some("load operand")); + + self.emit_instruction_with_comment(Instruction::Cmp, vec![ + Operand::Register(Register::Eax), + Operand::Immediate(0) + ], Some("test for zero")); + + self.emit_instruction(Instruction::Sete, vec![Operand::Register(Register::Al)]); + self.emit_instruction(Instruction::Movzx, vec![ + Operand::Register(Register::Eax), + Operand::Register(Register::Al) + ]); + + self.emit_instruction_with_comment(Instruction::Mov, vec![ + dest_operand, + Operand::Register(Register::Eax) + ], Some("store result")); + } + } + } +} \ No newline at end of file diff --git a/src/codegen/ir_codegen/stack_manager.rs b/src/codegen/ir_codegen/stack_manager.rs new file mode 100644 index 0000000..045a6b8 --- /dev/null +++ b/src/codegen/ir_codegen/stack_manager.rs @@ -0,0 +1,67 @@ +use crate::ir::{IrFunction, IrInstruction, IrValue, IrType}; +use super::IrCodegen; + +impl IrCodegen { + /// Calculate the stack space needed for a function + pub fn calculate_stack_space(&mut self, function: &IrFunction) -> i32 { + let mut space = 32; // Shadow space for Windows x64 ABI + + // Allocate space for local variables + for (name, ir_type) in &function.local_vars { + let size = self.get_type_size(ir_type); + space += size; + self.locals.insert(name.clone(), -space); + } + + // Allocate space for temporary variables + let mut _temp_count = 0; + for instruction in &function.instructions { + if let Some(temp_id) = self.extract_temp_id(instruction) { + if !self.temp_locations.contains_key(&temp_id) { + _temp_count += 1; + space += 8; // Assume 8 bytes for all temps + self.temp_locations.insert(temp_id, -space); + } + } + } + + // Align to 16 bytes + (space + 15) & !15 + } + + /// Extract temporary variable ID from instruction if present + pub fn extract_temp_id(&self, instruction: &IrInstruction) -> Option { + match instruction { + IrInstruction::BinaryOp { dest, .. } | + IrInstruction::UnaryOp { dest, .. } | + IrInstruction::Load { dest, .. } | + IrInstruction::Move { dest, .. } => { + if let IrValue::Temp(id) = dest { + Some(*id) + } else { + None + } + } + IrInstruction::Call { dest: Some(dest), .. } => { + if let IrValue::Temp(id) = dest { + Some(*id) + } else { + None + } + } + _ => None, + } + } + + /// Get the size in bytes for an IR type + pub fn get_type_size(&self, ir_type: &IrType) -> i32 { + match ir_type { + IrType::Int => 4, + IrType::Float => 8, + IrType::Char => 1, + IrType::String => 8, // Pointer size + IrType::Void => 0, + IrType::Pointer(_) => 8, + } + } +} \ No newline at end of file diff --git a/src/codegen/ir_codegen/value_converter.rs b/src/codegen/ir_codegen/value_converter.rs new file mode 100644 index 0000000..f51e437 --- /dev/null +++ b/src/codegen/ir_codegen/value_converter.rs @@ -0,0 +1,57 @@ +use crate::ir::{IrValue, IrType}; +use crate::codegen::instruction::{Operand, Register, Size}; +use super::IrCodegen; + +impl IrCodegen { + /// Convert IR value to assembly operand + pub fn ir_value_to_operand(&self, value: &IrValue) -> Operand { + match value { + IrValue::IntConstant(i) => Operand::Immediate(*i), + IrValue::FloatConstant(_f) => { + panic!("Float constants cannot be used as immediate operands - must be pre-loaded into memory") + } + IrValue::CharConstant(c) => Operand::Immediate(*c as i64), + IrValue::StringConstant(label) => Operand::Label(label.clone()), + IrValue::Local(name) => { + let offset = self.locals.get(name).copied().unwrap_or(0); + Operand::Memory { base: Register::Rbp, offset } + } + IrValue::Temp(id) => { + let offset = self.temp_locations.get(id).copied().unwrap_or(0); + Operand::Memory { base: Register::Rbp, offset } + } + IrValue::Parameter(_name) => { + // Parameters would be at positive offsets from RBP + let offset = 16; // Simplified - would need proper parameter handling + Operand::Memory { base: Register::Rbp, offset } + } + IrValue::Global(name) => Operand::Label(name.clone()), + } + } + + /// Convert IR type to assembly size + pub fn ir_type_to_size(&self, ir_type: &IrType) -> Size { + match ir_type { + IrType::Int => Size::Dword, + IrType::Float => Size::Qword, + IrType::Char => Size::Byte, + IrType::String => Size::Qword, + IrType::Void => Size::Qword, + IrType::Pointer(_) => Size::Qword, + } + } + + /// Convert IR value to string for comments + pub fn ir_value_to_string(&self, value: &IrValue) -> String { + match value { + IrValue::IntConstant(i) => i.to_string(), + IrValue::FloatConstant(f) => f.to_string(), + IrValue::CharConstant(c) => format!("'{}'", c), + IrValue::StringConstant(label) => format!("@{}", label), + IrValue::Local(name) => format!("%{}", name), + IrValue::Temp(id) => format!("%t{}", id), + IrValue::Parameter(name) => format!("%{}", name), + IrValue::Global(name) => format!("@{}", name), + } + } +} \ No newline at end of file diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 462f397..77bab74 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,26 +1,11 @@ mod instruction; mod emitter; -mod analyzer; -mod expression; -mod statement; -mod codegen; mod ir_codegen; mod backend; -mod direct_backend; mod ir_backend; -mod target; -mod calling_convention; -pub use codegen::Codegen; pub use ir_codegen::IrCodegen; pub use instruction::{Instruction, Register, Operand, Size}; pub use emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; -pub use analyzer::AstAnalyzer; -pub use expression::ExpressionGenerator; -pub use statement::StatementGenerator; pub use backend::{CodegenBackend, BackendUtils, RegisterAllocator}; -pub use direct_backend::DirectBackend; pub use ir_backend::IrBackend; -pub use target::{TargetArchitecture, RegisterAllocator as TargetRegisterAllocator, CallingConvention, CodeGenerator}; -pub use target::x86_64_windows::{X86_64Windows, X86RegisterAllocator, WindowsX64CallingConvention}; -pub use calling_convention::{FunctionCallGenerator, CallingConvention as CallConv}; diff --git a/src/codegen/statement.rs b/src/codegen/statement.rs deleted file mode 100644 index c62fd00..0000000 --- a/src/codegen/statement.rs +++ /dev/null @@ -1,487 +0,0 @@ -use std::collections::HashMap; -use crate::lexer::TokenType; -use crate::parser::ast::{Expr, Stmt}; -use super::instruction::{Instruction, Operand, Register, Size}; -use super::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; -use super::expression::ExpressionGenerator; -use super::calling_convention::FunctionCallGenerator; - -pub trait StatementGenerator: Emitter + CodeEmitter + CodeEmitterWithComment + ExpressionGenerator { - fn gen_stmt(&mut self, stmt: &Stmt); - fn get_stack_offset(&self) -> i32; - fn set_stack_offset(&mut self, offset: i32); - fn get_locals_mut(&mut self) -> &mut HashMap; - fn get_local_types(&self) -> &HashMap; - fn get_local_types_mut(&mut self) -> &mut HashMap; -} - -impl StatementGenerator for super::Codegen { - fn gen_stmt(&mut self, stmt: &Stmt) { - match stmt { - Stmt::VarDecl { var_type, name, initializer } => { - // Quick preview of variable declaration - let type_str = if let Some(token_type) = var_type.to_token_type() { - match token_type { - crate::lexer::TokenType::Int => "int", - crate::lexer::TokenType::FloatType => "float", - crate::lexer::TokenType::CharType => "char", - _ => "int", // Default fallback - } - } else { - "int" // Default fallback - }; - if let Some(init_expr) = initializer { - let init_str = match init_expr { - Expr::Integer(i) => i.to_string(), - Expr::Float(f) => f.to_string(), - Expr::Char(c) => format!("'{}'", c), - Expr::String(s) => format!("\"{}\"", s), - _ => "expr".to_string(), - }; - self.emit_comment(&format!("{} {} = {}", type_str, name, init_str)); - } else { - self.emit_comment(&format!("{} {}", type_str, name)); - } - let (_var_size, stack_offset) = if let Some(token_type) = var_type.to_token_type() { - match token_type { - crate::lexer::TokenType::Int => { - self.stack_offset -= 4; - (4, self.stack_offset) - }, - crate::lexer::TokenType::FloatType => { - self.stack_offset -= 8; - (8, self.stack_offset) - }, - crate::lexer::TokenType::CharType => { - self.stack_offset -= 1; - (1, self.stack_offset) - }, - _ => { - self.stack_offset -= 8; - (8, self.stack_offset) - } - } - } else { - self.stack_offset -= 8; - (8, self.stack_offset) - }; - - // Store offset relative to RBP - self.locals.insert(name.clone(), stack_offset); - // Store variable type for later use - if let Some(token_type) = var_type.to_token_type() { - self.local_types.insert(name.clone(), token_type); - } else { - self.local_types.insert(name.clone(), crate::lexer::TokenType::Int); // Default fallback - } - - if let Some(expr) = initializer { - if let Some(token_type) = var_type.to_token_type() { - match token_type { - crate::lexer::TokenType::Int => { - if let Expr::Integer(i) = expr { - self.emit_instruction_with_size_and_comment(Instruction::Mov, Size::Dword, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::Immediate(*i) - ], Some(&format!("init {} = {}", name, i))); - } else { - self.gen_expr(expr); - self.emit_instruction_with_size_and_comment(Instruction::Mov, Size::Dword, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::Register(Register::Eax) - ], Some(&format!("store {}", name))); - } - }, - crate::lexer::TokenType::FloatType => { - if let Expr::Float(f) = expr { - let float_bits = f.to_bits(); - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(float_bits as i64) - ]); - self.emit_instruction(Instruction::Movq, vec![ - Operand::Register(Register::Xmm0), - Operand::Register(Register::Rax) - ]); - self.emit_instruction_with_size(Instruction::Movsd, Size::Qword, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::Register(Register::Xmm0) - ]); - } else { - self.gen_expr(expr); - self.emit_instruction_with_size(Instruction::Movsd, Size::Qword, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::Register(Register::Xmm0) - ]); - } - }, - crate::lexer::TokenType::CharType => { - if let Expr::Char(c) = expr { - self.emit_instruction_with_size(Instruction::Mov, Size::Byte, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::String(format!("'{}'", c)) - ]); - } else { - self.gen_expr(expr); - self.emit_instruction_with_size(Instruction::Mov, Size::Byte, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::Register(Register::Al) - ]); - } - }, - _ => { - self.gen_expr(expr); - self.emit_instruction_with_size(Instruction::Mov, Size::Qword, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::Register(Register::Rax) - ]); - } - } - } else { - self.gen_expr(expr); - self.emit_instruction_with_size(Instruction::Mov, Size::Qword, vec![ - Operand::Memory { base: Register::Rbp, offset: stack_offset }, - Operand::Register(Register::Rax) - ]); - } - } - } - - Stmt::Return(Some(expr)) => { - let return_str = match expr { - Expr::Integer(i) => i.to_string(), - Expr::Identifier(name) => name.clone(), - Expr::Binary { left, operator, right } => { - match (left.as_ref(), operator, right.as_ref()) { - (Expr::Identifier(name), TokenType::Plus, Expr::Integer(i)) => format!("{} + {}", name, i), - _ => "expr".to_string(), - } - }, - _ => "expr".to_string(), - }; - self.emit_comment(&format!("return {}", return_str)); - self.gen_expr(expr); - } - - Stmt::Return(None) => { - self.emit_comment("return 0"); - self.emit_instruction_with_comment(Instruction::Xor, vec![ - Operand::Register(Register::Eax), - Operand::Register(Register::Eax) - ], Some("return 0")); - } - - Stmt::ExprStmt(expr) => { - self.gen_expr(expr); - } - - Stmt::Block(stmts) => { - // Save current stack offset and locals for block scope - let original_stack_offset = self.stack_offset; - let original_locals = self.locals.clone(); - - for stmt in stmts { - self.gen_stmt(stmt); - } - - // Restore stack offset and locals after block - self.stack_offset = original_stack_offset; - self.locals = original_locals; - } - - Stmt::If { condition, then_branch } => { - let condition_str = match condition { - Expr::Binary { left, operator, right } => { - match (left.as_ref(), operator, right.as_ref()) { - (Expr::Identifier(name), TokenType::GreaterThan, Expr::Integer(i)) => format!("{} > {}", name, i), - (Expr::Identifier(name), TokenType::LessThan, Expr::Integer(i)) => format!("{} < {}", name, i), - (Expr::Identifier(name), TokenType::Equal, Expr::Integer(i)) => format!("{} == {}", name, i), - _ => "condition".to_string(), - } - }, - _ => "condition".to_string(), - }; - self.emit_comment(&format!("--- if ({}) ---", condition_str)); - if let Expr::Binary { left, operator, right } = condition { - if let (Expr::Identifier(var_name), TokenType::GreaterThan, Expr::Integer(val)) = (left.as_ref(), operator, right.as_ref()) { - if let Some(&offset) = self.locals.get(var_name) { - self.emit_line(&format!(" mov eax, [rbp{}] ; Charge {} dans eax pour la comparaison", offset, var_name)); - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Eax), - Operand::Immediate(*val) - ]); - self.emit_instruction(Instruction::Jle, vec![Operand::Label(".else_block".to_string())]); - } - } else { - self.gen_expr(condition); - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Eax), - Operand::Immediate(0) - ]); - self.emit_instruction(Instruction::Je, vec![Operand::Label(".else_block".to_string())]); - } - } else { - self.gen_expr(condition); - self.emit_instruction(Instruction::Cmp, vec![ - Operand::Register(Register::Eax), - Operand::Immediate(0) - ]); - self.emit_instruction(Instruction::Je, vec![Operand::Label(".else_block".to_string())]); - } - self.emit_line(""); - self.emit_comment("--- Bloc du \"if\" (si x > 0) ---"); - for stmt in then_branch { - self.gen_stmt(stmt); - } - self.emit_instruction(Instruction::Jmp, vec![Operand::Label(".end_program".to_string())]); - self.emit_line(""); - self.emit_line(".else_block:"); - self.emit_comment("--- return 0; ---"); - self.emit_comment("Ce bloc est exécuté si x <= 0"); - self.emit_instruction(Instruction::Xor, vec![ - Operand::Register(Register::Eax), - Operand::Register(Register::Eax) - ]); - self.emit_line(""); - self.emit_line(".end_program:"); - } - - // Handle PrintStmt with RIP-relative addressing for x86-64 - Stmt::PrintStmt { format_string, args } => { - if let Expr::String(s) = format_string { - if s.is_empty() { - // Simple println(expr) case - if args.len() == 1 { - let arg = &args[0]; - match arg { - Expr::Identifier(name) => { - self.emit_comment(&format!("--- println({}); ---", name)); - } - Expr::Integer(i) => { - self.emit_comment(&format!("--- println({}); ---", i)); - } - Expr::Float(f) => { - self.emit_comment(&format!("--- println({}); ---", f)); - } - Expr::Char(c) => { - self.emit_comment(&format!("--- println('{}'); ---", c)); - } - _ => { - self.emit_comment("--- println(expr); ---"); - } - } - } - } else if args.is_empty() { - self.emit_comment(&format!("--- println(\"{}\"); ---", s.replace('\n', "\\n"))); - } else { - let args_str = args.iter() - .map(|arg| match arg { - Expr::Identifier(name) => name.clone(), - Expr::Integer(i) => i.to_string(), - Expr::Float(f) => f.to_string(), - Expr::Char(c) => format!("'{}'", c), - _ => "expr".to_string(), - }) - .collect::>() - .join(", "); - self.emit_comment(&format!("--- println(\"{}\", {}); ---", s.replace('\n', "\\n"), args_str)); - } - } - if let Expr::String(s) = format_string { - if s.is_empty() { - // Handle simple println(expr) case - if args.len() == 1 { - let arg = &args[0]; - - // Determine the appropriate format string based on the expression type - let (format_str, _is_float) = match arg { - Expr::Integer(_) => ("%d\n", false), - Expr::Float(_) => ("%.6f\n", true), - Expr::Char(_) => ("%c\n", false), - Expr::Identifier(var_name) => { - // Use stored type information - match self.local_types.get(var_name) { - Some(TokenType::Int) => ("%d\n", false), - Some(TokenType::FloatType) => ("%.6f\n", true), - Some(TokenType::CharType) => ("%c\n", false), - _ => ("%d\n", false), // Default to integer - } - } - _ => ("%d\n", false), // Default to integer format - }; - - // Create the format string if it doesn't exist - let format_label = if let Some(label) = self.data_strings.get(format_str) { - label.clone() - } else { - let label = format!("str_{}", self.data_strings.len()); - self.data_strings.insert(format_str.to_string(), label.clone()); - label - }; - - self.emit_comment(&format!("printf call: {}", format_str)); - self.emit_line(" and rsp, ~15 ; align stack"); - self.emit_instruction_with_comment(Instruction::Sub, vec![ - Operand::Register(Register::Rsp), - Operand::Immediate(32) - ], Some("shadow space")); - - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rcx), - Operand::Label(format_label) - ], Some("format string")); - - match arg { - Expr::Integer(i) => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Edx), - Operand::Immediate(*i) - ], Some(&format!("arg: {}", i))); - } - Expr::Float(f) => { - let float_bits = f.to_bits(); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rax), - Operand::Immediate(float_bits as i64) - ], Some(&format!("float {} bits", f))); - self.emit_line(" movq xmm1, rax ; to XMM1"); - self.emit_line(" movq rdx, xmm1 ; to RDX"); - } - Expr::Char(c) => { - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Edx), - Operand::Immediate(*c as i64) - ], Some(&format!("arg: '{}'", c))); - } - Expr::Identifier(var_name) => { - if let Some(&offset) = self.locals.get(var_name) { - // Handle different types based on stored type information - match self.local_types.get(var_name) { - Some(TokenType::Int) => { - self.emit_line(&format!(" mov edx, [rbp{}] ; Load int variable {} value", offset, var_name)); - } - Some(TokenType::FloatType) => { - self.emit_line(&format!(" movsd xmm1, [rbp{}] ; Load float variable {} into XMM1", offset, var_name)); - self.emit_line(" movq rdx, xmm1 ; Copy float to RDX for printf"); - } - Some(TokenType::CharType) => { - self.emit_line(&format!(" movzx edx, byte [rbp{}] ; Load char variable {} value", offset, var_name)); - } - _ => { - // Default to integer - self.emit_line(&format!(" mov edx, [rbp{}] ; Load variable {} value (default int)", offset, var_name)); - } - } - } - } - _ => { - // For other expressions, generate code and use the result - self.gen_expr(arg); - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Edx), - Operand::Register(Register::Eax) - ]); - } - } - - self.emit_line(""); - self.emit_instruction(Instruction::Call, vec![Operand::Label("printf".to_string())]); - - self.emit_line(""); - self.emit_instruction(Instruction::Add, vec![ - Operand::Register(Register::Rsp), - Operand::Immediate(32) - ]); - } - return; - } - - let format_label = self.data_strings.get(s).unwrap().clone(); - - let call_gen = FunctionCallGenerator::windows_x64(); - - for instruction in call_gen.generate_stack_alignment() { - self.emit_line(&instruction); - } - self.emit_line(""); - - if args.is_empty() { - // Simple printf with just format string - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Rcx), - Operand::Label(format_label) - ]); - self.emit_instruction(Instruction::Call, vec![Operand::Label("printf".to_string())]); - } else { - self.emit_instruction(Instruction::Mov, vec![ - Operand::Register(Register::Rcx), - Operand::Label(format_label) - ]); - - // Generate argument passing code using calling convention - let mut arg_sources = Vec::new(); - let mut arg_types = Vec::new(); - - for (i, arg) in args.iter().enumerate() { - if i >= call_gen.calling_convention().max_register_args() { - break; // Only handle register args for now - } - - if let Expr::Identifier(var_name) = arg { - if let Some(&offset) = self.locals.get(var_name) { - arg_sources.push(format!("[rbp{}]", offset)); - - let arg_type = match i { - 0 => "int", - 1 => "float", - 2 => "char", - _ => "int", - }; - arg_types.push(arg_type.to_string()); - } - } - } - - for instruction in call_gen.generate_argument_passing(&arg_sources, &arg_types) { - self.emit_line(&format!(" {}", instruction)); - } - - self.emit_line(""); - self.emit_instruction(Instruction::Call, vec![Operand::Label("printf".to_string())]); - } - - self.emit_line(""); - for instruction in call_gen.generate_stack_cleanup() { - self.emit_line(&instruction); - } - - } else { - self.emit_line(&format!(" ; printf format string is not a string literal: {:?}", format_string)); - } - } - _ => { - self.emit_line(&format!(" ; unsupported statement {:?}", stmt)); - } - } - } - - fn get_stack_offset(&self) -> i32 { - self.stack_offset - } - - fn set_stack_offset(&mut self, offset: i32) { - self.stack_offset = offset; - } - - fn get_locals_mut(&mut self) -> &mut HashMap { - &mut self.locals - } - - fn get_local_types(&self) -> &HashMap { - &self.local_types - } - - fn get_local_types_mut(&mut self) -> &mut HashMap { - &mut self.local_types - } -} diff --git a/src/codegen/target/mod.rs b/src/codegen/target/mod.rs deleted file mode 100644 index 365b179..0000000 --- a/src/codegen/target/mod.rs +++ /dev/null @@ -1,127 +0,0 @@ -pub mod x86_64_windows; - -use crate::codegen::instruction::Register; -use crate::types::{Type, target_config::TargetTypeConfig}; -use std::collections::HashMap; - -pub trait TargetArchitecture { - type Register: Clone + PartialEq; - type Instruction: Clone; - type CallingConvention: CallingConvention; - - fn emit_instruction(&mut self, instr: Self::Instruction); - - fn allocate_register(&mut self) -> Option; - - fn free_register(&mut self, reg: Self::Register); - - fn calling_convention(&self) -> &Self::CallingConvention; - - fn type_config(&self) -> &TargetTypeConfig; - - fn emit_prologue(&mut self, function_name: &str, local_size: usize); - - fn emit_epilogue(&mut self); - - fn get_output(&self) -> String; - - fn parameter_register(&self, index: usize) -> Option; - - fn return_register(&self) -> Self::Register; - - fn stack_pointer(&self) -> Self::Register; - - fn base_pointer(&self) -> Self::Register; - - fn align_stack(&mut self, size: usize) -> usize { - let alignment = self.calling_convention().stack_alignment(); - (size + alignment - 1) & !(alignment - 1) - } -} - -pub trait RegisterAllocator { - fn allocate(&mut self) -> Option; - - fn free(&mut self, reg: R); - - fn is_available(&self, reg: &R) -> bool; - - fn available_registers(&self) -> Vec; - - fn spill(&mut self, reg: R) -> MemoryLocation; -} - -#[derive(Debug, Clone, PartialEq)] -pub struct MemoryLocation { - pub offset: i32, - pub base: Register, -} - -pub trait CallingConvention { - type Register; - - fn parameter_registers(&self) -> &[Self::Register]; - - fn return_register(&self) -> Self::Register; - - fn caller_saved_registers(&self) -> &[Self::Register]; - - fn callee_saved_registers(&self) -> &[Self::Register]; - - fn stack_alignment(&self) -> usize; -} - -pub struct CodeGenerator { - target: T, - instructions: Vec, - local_variables: HashMap, // name -> (type, stack_offset) - stack_offset: i32, -} - -impl CodeGenerator { - pub fn new(target: T) -> Self { - Self { - target, - instructions: Vec::new(), - local_variables: HashMap::new(), - stack_offset: 0, - } - } - - pub fn emit(&mut self, instruction: T::Instruction) - where - T::Instruction: Clone, - { - self.target.emit_instruction(instruction.clone()); - self.instructions.push(instruction); - } - - pub fn allocate_local(&mut self, name: String, var_type: Type) -> i32 { - let type_config = self.target.type_config(); - let var_size = var_type.size_with_config(type_config); - let var_alignment = var_type.alignment_with_config(type_config); - - let alignment = var_alignment as i32; - self.stack_offset = -((-self.stack_offset + alignment - 1) & !(alignment - 1)); - self.stack_offset -= var_size as i32; - - self.local_variables.insert(name, (var_type, self.stack_offset)); - self.stack_offset - } - - pub fn get_local_offset(&self, name: &str) -> Option { - self.local_variables.get(name).map(|(_, offset)| *offset) - } - - pub fn get_output(&self) -> String { - self.target.get_output() - } - - pub fn target(&self) -> &T { - &self.target - } - - pub fn target_mut(&mut self) -> &mut T { - &mut self.target - } -} diff --git a/src/codegen/target/x86_64_windows.rs b/src/codegen/target/x86_64_windows.rs deleted file mode 100644 index 1502122..0000000 --- a/src/codegen/target/x86_64_windows.rs +++ /dev/null @@ -1,349 +0,0 @@ -use super::{TargetArchitecture, RegisterAllocator, CallingConvention, MemoryLocation}; -use crate::codegen::instruction::{Register, Operand, Size}; -use crate::types::target_config::TargetTypeConfig; -use std::collections::HashSet; - -#[derive(Debug, Clone)] -pub enum X86Instruction { - Mov { dest: Operand, src: Operand, size: Size }, - Add { dest: Operand, src: Operand, size: Size }, - Sub { dest: Operand, src: Operand, size: Size }, - Mul { operand: Operand, size: Size }, - Div { operand: Operand, size: Size }, - Cmp { left: Operand, right: Operand, size: Size }, - Je { label: String }, - Jne { label: String }, - Jl { label: String }, - Jle { label: String }, - Jg { label: String }, - Jge { label: String }, - Jmp { label: String }, - Call { target: String }, - Ret, - Push { operand: Operand, size: Size }, - Pop { operand: Operand, size: Size }, - Label { name: String }, - Comment { text: String }, -} - -pub struct X86_64Windows { - output: String, - register_allocator: X86RegisterAllocator, - calling_convention: WindowsX64CallingConvention, - type_config: TargetTypeConfig, -} - -impl X86_64Windows { - pub fn new() -> Self { - Self { - output: String::new(), - register_allocator: X86RegisterAllocator::new(), - calling_convention: WindowsX64CallingConvention::new(), - type_config: TargetTypeConfig::x86_64(), - } - } - - fn format_instruction(&self, instr: &X86Instruction) -> String { - match instr { - X86Instruction::Mov { dest, src, size } => { - format!(" mov {}, {}", - self.format_operand(dest, size), - self.format_operand(src, size)) - } - X86Instruction::Add { dest, src, size } => { - format!(" add {}, {}", - self.format_operand(dest, size), - self.format_operand(src, size)) - } - X86Instruction::Sub { dest, src, size } => { - format!(" sub {}, {}", - self.format_operand(dest, size), - self.format_operand(src, size)) - } - X86Instruction::Mul { operand, size } => { - format!(" imul {}", self.format_operand(operand, size)) - } - X86Instruction::Div { operand, size } => { - format!(" idiv {}", self.format_operand(operand, size)) - } - X86Instruction::Cmp { left, right, size } => { - format!(" cmp {}, {}", - self.format_operand(left, size), - self.format_operand(right, size)) - } - X86Instruction::Je { label } => format!(" je {}", label), - X86Instruction::Jne { label } => format!(" jne {}", label), - X86Instruction::Jl { label } => format!(" jl {}", label), - X86Instruction::Jle { label } => format!(" jle {}", label), - X86Instruction::Jg { label } => format!(" jg {}", label), - X86Instruction::Jge { label } => format!(" jge {}", label), - X86Instruction::Jmp { label } => format!(" jmp {}", label), - X86Instruction::Call { target } => format!(" call {}", target), - X86Instruction::Ret => " ret".to_string(), - X86Instruction::Push { operand, size } => { - format!(" push {}", self.format_operand(operand, size)) - } - X86Instruction::Pop { operand, size } => { - format!(" pop {}", self.format_operand(operand, size)) - } - X86Instruction::Label { name } => format!("{}:", name), - X86Instruction::Comment { text } => format!(" ; {}", text), - } - } - - fn format_operand(&self, operand: &Operand, size: &Size) -> String { - match operand { - Operand::Register(reg) => self.format_register(reg, size), - Operand::Immediate(value) => value.to_string(), - Operand::Memory { base, offset } => { - if *offset == 0 { - format!("[{}]", self.format_register(base, size)) - } else if *offset > 0 { - format!("[{}+{}]", self.format_register(base, size), offset) - } else { - format!("[{}{}]", self.format_register(base, size), offset) - } - } - Operand::Label(label) => label.clone(), - Operand::String(s) => format!("\"{}\"", s), - } - } - - fn format_register(&self, register: &Register, size: &Size) -> String { - match (register, size) { - (Register::Rax, Size::Qword) => "rax".to_string(), - (Register::Rax, Size::Dword) => "eax".to_string(), - (Register::Rbp, Size::Qword) => "rbp".to_string(), - (Register::Rsp, Size::Qword) => "rsp".to_string(), - (Register::Rcx, Size::Qword) => "rcx".to_string(), - (Register::Rcx, Size::Dword) => "ecx".to_string(), - (Register::Rdx, Size::Qword) => "rdx".to_string(), - (Register::Rdx, Size::Dword) => "edx".to_string(), - (Register::R8, Size::Qword) => "r8".to_string(), - (Register::R8, Size::Dword) => "r8d".to_string(), - (Register::R9, Size::Qword) => "r9".to_string(), - (Register::R9, Size::Dword) => "r9d".to_string(), - _ => format!("{:?}", register).to_lowercase(), - } - } -} - -impl TargetArchitecture for X86_64Windows { - type Register = Register; - type Instruction = X86Instruction; - type CallingConvention = WindowsX64CallingConvention; - - fn emit_instruction(&mut self, instr: Self::Instruction) { - let formatted = self.format_instruction(&instr); - self.output.push_str(&formatted); - self.output.push('\n'); - } - - fn allocate_register(&mut self) -> Option { - self.register_allocator.allocate() - } - - fn free_register(&mut self, reg: Self::Register) { - self.register_allocator.free(reg); - } - - fn calling_convention(&self) -> &Self::CallingConvention { - &self.calling_convention - } - - fn type_config(&self) -> &TargetTypeConfig { - &self.type_config - } - - fn emit_prologue(&mut self, function_name: &str, local_size: usize) { - self.emit_instruction(X86Instruction::Label { name: function_name.to_string() }); - self.emit_instruction(X86Instruction::Push { - operand: Operand::Register(Register::Rbp), - size: Size::Qword - }); - self.emit_instruction(X86Instruction::Mov { - dest: Operand::Register(Register::Rbp), - src: Operand::Register(Register::Rsp), - size: Size::Qword - }); - - if local_size > 0 { - self.emit_instruction(X86Instruction::Sub { - dest: Operand::Register(Register::Rsp), - src: Operand::Immediate(local_size as i64), - size: Size::Qword - }); - } - } - - fn emit_epilogue(&mut self) { - self.emit_instruction(X86Instruction::Mov { - dest: Operand::Register(Register::Rsp), - src: Operand::Register(Register::Rbp), - size: Size::Qword - }); - self.emit_instruction(X86Instruction::Pop { - operand: Operand::Register(Register::Rbp), - size: Size::Qword - }); - self.emit_instruction(X86Instruction::Ret); - } - - fn get_output(&self) -> String { - self.output.clone() - } - - fn parameter_register(&self, index: usize) -> Option { - let param_regs = self.calling_convention.parameter_registers(); - param_regs.get(index).copied() - } - - fn return_register(&self) -> Self::Register { - self.calling_convention.return_register() - } - - fn stack_pointer(&self) -> Self::Register { - Register::Rsp - } - - fn base_pointer(&self) -> Self::Register { - Register::Rbp - } -} - -impl Default for X86_64Windows { - fn default() -> Self { - Self::new() - } -} - -pub struct X86RegisterAllocator { - available_registers: HashSet, - allocated_registers: HashSet, -} - -impl X86RegisterAllocator { - pub fn new() -> Self { - let mut available = HashSet::new(); - available.insert(Register::Rax); - available.insert(Register::Rcx); - available.insert(Register::Rdx); - available.insert(Register::R8); - available.insert(Register::R9); - - Self { - available_registers: available, - allocated_registers: HashSet::new(), - } - } -} - -impl RegisterAllocator for X86RegisterAllocator { - fn allocate(&mut self) -> Option { - if let Some(®) = self.available_registers.iter().next() { - self.available_registers.remove(®); - self.allocated_registers.insert(reg); - Some(reg) - } else { - None - } - } - - fn free(&mut self, reg: Register) { - if self.allocated_registers.remove(®) { - self.available_registers.insert(reg); - } - } - - fn is_available(&self, reg: &Register) -> bool { - self.available_registers.contains(reg) - } - - fn available_registers(&self) -> Vec { - self.available_registers.iter().copied().collect() - } - - fn spill(&mut self, reg: Register) -> MemoryLocation { - self.free(reg); - MemoryLocation { - offset: -8, // Simple stack offset - base: Register::Rbp, - } - } -} - -impl Default for X86RegisterAllocator { - fn default() -> Self { - Self::new() - } -} - -pub struct WindowsX64CallingConvention { - parameter_registers: Vec, - caller_saved: Vec, - callee_saved: Vec, -} - -impl WindowsX64CallingConvention { - pub fn new() -> Self { - Self { - parameter_registers: Self::default_parameter_registers(), - caller_saved: Self::default_caller_saved(), - callee_saved: Self::default_callee_saved(), - } - } - - fn default_parameter_registers() -> Vec { - vec![Register::Rcx, Register::Rdx, Register::R8, Register::R9] - } - - fn default_caller_saved() -> Vec { - vec![Register::Rax, Register::Rcx, Register::Rdx, Register::R8, Register::R9] - } - - fn default_callee_saved() -> Vec { - vec![Register::Rbp, Register::Rsp] - } - - pub fn with_custom_registers( - param_regs: Vec, - caller_saved: Vec, - callee_saved: Vec - ) -> Self { - Self { - parameter_registers: param_regs, - caller_saved, - callee_saved, - } - } -} - -impl CallingConvention for WindowsX64CallingConvention { - type Register = Register; - - fn parameter_registers(&self) -> &[Self::Register] { - &self.parameter_registers - } - - fn return_register(&self) -> Self::Register { - Register::Rax - } - - fn caller_saved_registers(&self) -> &[Self::Register] { - &self.caller_saved - } - - fn callee_saved_registers(&self) -> &[Self::Register] { - &self.callee_saved - } - - fn stack_alignment(&self) -> usize { - 16 // x86-64 requires 16-byte stack alignment - } -} - -impl Default for WindowsX64CallingConvention { - fn default() -> Self { - Self::new() - } -} diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index cc021ed..bc3c800 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1,4 +1,4 @@ -use compiler_minic::{lexer::Lexer, parser::Parser, ir::generator::IrGenerator, codegen::{Codegen, IrCodegen}}; +use compiler_minic::{lexer::Lexer, parser::Parser, ir::generator::IrGenerator, codegen::{IrCodegen}}; #[cfg(test)] mod ir_integration_tests { @@ -10,7 +10,7 @@ mod ir_integration_tests { let mut parser = Parser::new(tokens); let ast = parser.parse(); - let direct_codegen = Codegen::new(); + let mut direct_codegen = IrGenerator::new(); let direct_asm = direct_codegen.generate(&ast); let mut ir_generator = IrGenerator::new(); From e89dc98aadd5669ebe2f431ada9c2370b0c3024f Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sat, 26 Jul 2025 15:28:47 +0200 Subject: [PATCH 04/10] fix TI + keep only one backend --- src/codegen/backend.rs | 266 ++++++++++++++++++++++++++++++++++--- src/codegen/ir_backend.rs | 263 ------------------------------------ src/codegen/mod.rs | 4 +- src/parser/parser.rs | 4 +- tests/integration_tests.rs | 7 +- 5 files changed, 255 insertions(+), 289 deletions(-) delete mode 100644 src/codegen/ir_backend.rs diff --git a/src/codegen/backend.rs b/src/codegen/backend.rs index a51805f..35d3f4d 100644 --- a/src/codegen/backend.rs +++ b/src/codegen/backend.rs @@ -1,46 +1,278 @@ use crate::codegen::instruction::{Instruction, Operand, Register, Size}; +use crate::lexer::TokenType; +use crate::ir::ir::{IrProgram, IrFunction, IrInstruction, IrValue, IrType}; use std::collections::HashMap; -pub trait CodegenBackend { - fn emit_instruction(&mut self, instr: Instruction, operands: Vec); +pub struct IrBackend { + output: String, + stack_offset: i32, + locals: HashMap, + local_types: HashMap, + _register_allocator: RegisterAllocator, + ir_program: Option, +} + +impl IrBackend { + pub fn new() -> Self { + Self { + output: String::new(), + stack_offset: 0, + locals: HashMap::new(), + local_types: HashMap::new(), + _register_allocator: RegisterAllocator::new(), + ir_program: None, + } + } + + pub fn set_ir_program(&mut self, program: IrProgram) { + self.ir_program = Some(program); + } + + pub fn generate_from_ir(&mut self) -> String { + let mut program = String::new(); + + program.push_str("section .data\n"); + program.push_str(" format_int db '%d', 0\n"); + program.push_str(" format_float db '%.2f', 0\n"); + program.push_str(" format_char db '%c', 0\n"); + program.push_str(" newline db 10, 0\n\n"); + + if let Some(ir_program) = &self.ir_program { + for (label, value) in &ir_program.global_strings { + program.push_str(&format!(" {} db '{}', 0\n", label, value)); + } + } + + program.push_str("\nsection .text\n"); + program.push_str(" global _start\n"); + program.push_str(" extern printf\n"); + program.push_str(" extern exit\n\n"); + + if let Some(ir_program) = &self.ir_program { + let functions = ir_program.functions.clone(); + for function in &functions { + self.generate_function_from_ir(function); + } + } + + program.push_str(&self.output); + + program + } + + /// Generate assembly for a single IR function + fn generate_function_from_ir(&mut self, function: &IrFunction) { + self.emit_label(&function.name); + + // Function prologue + let prologue = BackendUtils::generate_prologue(); + for instr in prologue { + self.output.push_str(&format!(" {}\n", instr)); + } + + for ir_instr in &function.instructions { + self.generate_ir_instruction(ir_instr); + } + + // Function epilogue + let epilogue = BackendUtils::generate_epilogue(); + for instr in epilogue { + self.output.push_str(&format!(" {}\n", instr)); + } + } + + /// Generate assembly for a single IR instruction + fn generate_ir_instruction(&mut self, ir_instr: &IrInstruction) { + match ir_instr { + IrInstruction::Alloca { name, var_type } => { + let token_type = self.ir_type_to_token_type(var_type); + let (size, new_offset) = BackendUtils::calculate_stack_offset(&token_type, self.stack_offset); + self.stack_offset = new_offset; + self.locals.insert(name.clone(), new_offset); + self.local_types.insert(name.clone(), token_type); + self.emit_comment(&format!("alloca {} ({})", name, size)); + } + IrInstruction::Store { value, dest, .. } => { + if let IrValue::Local(dest_name) = dest { + if let Some(&dest_offset) = self.locals.get(dest_name) { + match value { + IrValue::IntConstant(val) => { + self.emit_instruction_with_size( + Instruction::Mov, + Size::Dword, + vec![ + Operand::Memory { base: Register::Rbp, offset: dest_offset }, + Operand::Immediate(*val) + ] + ); + } + IrValue::Local(var) => { + if let Some(&var_offset) = self.locals.get(var) { + self.emit_instruction_with_size( + Instruction::Mov, + Size::Dword, + vec![ + Operand::Register(Register::Eax), + Operand::Memory { base: Register::Rbp, offset: var_offset } + ] + ); + self.emit_instruction_with_size( + Instruction::Mov, + Size::Dword, + vec![ + Operand::Memory { base: Register::Rbp, offset: dest_offset }, + Operand::Register(Register::Eax) + ] + ); + } + } + _ => { + self.emit_comment(&format!("store {:?} -> {:?}", value, dest)); + } + } + } + } + } + IrInstruction::Load { dest, src, .. } => { + if let (IrValue::Local(dest_name), IrValue::Local(src_name)) = (dest, src) { + if let Some(src_offset) = self.locals.get(src_name) { + self.emit_instruction_with_size( + Instruction::Mov, + Size::Dword, + vec![ + Operand::Register(Register::Eax), + Operand::Memory { base: Register::Rbp, offset: *src_offset } + ] + ); + self.emit_comment(&format!("load {} from {}", dest_name, src_name)); + } + } + } + IrInstruction::Return { value, .. } => { + if let Some(value) = value { + match value { + IrValue::IntConstant(val) => { + self.emit_instruction_with_size( + Instruction::Mov, + Size::Dword, + vec![Operand::Register(Register::Eax), Operand::Immediate(*val)] + ); + } + IrValue::Local(var) => { + if let Some(offset) = self.locals.get(var) { + self.emit_instruction_with_size( + Instruction::Mov, + Size::Dword, + vec![ + Operand::Register(Register::Eax), + Operand::Memory { base: Register::Rbp, offset: *offset } + ] + ); + } + } + _ => { + self.emit_comment(&format!("return {:?}", value)); + } + } + } + + let epilogue = BackendUtils::generate_epilogue(); + for instr in epilogue { + self.output.push_str(&format!(" {}\n", instr)); + } + } + _ => { + self.emit_comment(&format!("IR instruction: {:?}", ir_instr)); + } + } + } + + fn ir_type_to_token_type(&self, ir_type: &IrType) -> TokenType { + match ir_type { + IrType::Int => TokenType::Int, + IrType::Float => TokenType::FloatType, + IrType::Char => TokenType::CharType, + IrType::Void => TokenType::Void, + _ => TokenType::Int, // Default fallback + } + } + + pub fn emit_instruction(&mut self, instr: Instruction, operands: Vec) { + let formatted = BackendUtils::format_instruction(&instr, &operands); + self.output.push_str(&format!(" {}\n", formatted)); + } - fn emit_instruction_with_size(&mut self, instr: Instruction, size: Size, operands: Vec); + pub fn emit_instruction_with_size(&mut self, instr: Instruction, size: Size, operands: Vec) { + let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); + self.output.push_str(&format!(" {}\n", formatted)); + } - fn emit_instruction_with_size_and_comment(&mut self, instr: Instruction, size: Size, operands: Vec, comment: Option<&str>); + pub fn emit_instruction_with_size_and_comment(&mut self, instr: Instruction, size: Size, operands: Vec, comment: Option<&str>) { + let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); + if let Some(comment) = comment { + self.output.push_str(&format!(" {} ; {}\n", formatted, comment)); + } else { + self.output.push_str(&format!(" {}\n", formatted)); + } + } - fn emit_comment(&mut self, comment: &str); + pub fn emit_comment(&mut self, comment: &str) { + self.output.push_str(&format!(" ; {}\n", comment)); + } - fn emit_label(&mut self, label: &str); + pub fn emit_label(&mut self, label: &str) { + self.output.push_str(&format!("{}:\n", label)); + } - fn get_stack_offset(&self) -> i32; + pub fn get_stack_offset(&self) -> i32 { + self.stack_offset + } - fn set_stack_offset(&mut self, offset: i32); + pub fn set_stack_offset(&mut self, offset: i32) { + self.stack_offset = offset; + } - fn get_locals(&self) -> &HashMap; + pub fn get_locals(&self) -> &HashMap { + &self.locals + } - fn get_locals_mut(&mut self) -> &mut HashMap; + pub fn get_locals_mut(&mut self) -> &mut HashMap { + &mut self.locals + } - fn get_local_types(&self) -> &HashMap; + pub fn get_local_types(&self) -> &HashMap { + &self.local_types + } - fn get_local_types_mut(&mut self) -> &mut HashMap; + pub fn get_local_types_mut(&mut self) -> &mut HashMap { + &mut self.local_types + } - fn get_output(&self) -> &str; + pub fn get_output(&self) -> &str { + &self.output + } +} + +impl Default for IrBackend { + fn default() -> Self { + Self::new() + } } pub struct BackendUtils; impl BackendUtils { - pub fn calculate_stack_offset(var_type: &crate::lexer::TokenType, current_offset: i32) -> (usize, i32) { + pub fn calculate_stack_offset(var_type: &TokenType, current_offset: i32) -> (usize, i32) { match var_type { - crate::lexer::TokenType::Int => { + TokenType::Int => { let new_offset = current_offset - 4; (4, new_offset) }, - crate::lexer::TokenType::FloatType => { + TokenType::FloatType => { let new_offset = current_offset - 8; (8, new_offset) }, - crate::lexer::TokenType::CharType => { + TokenType::CharType => { let new_offset = current_offset - 1; (1, new_offset) }, diff --git a/src/codegen/ir_backend.rs b/src/codegen/ir_backend.rs deleted file mode 100644 index 4cbc5df..0000000 --- a/src/codegen/ir_backend.rs +++ /dev/null @@ -1,263 +0,0 @@ -use super::backend::{CodegenBackend, BackendUtils, RegisterAllocator}; -use super::instruction::{Instruction, Operand, Register, Size}; -use crate::lexer::TokenType; -use crate::ir::ir::{IrProgram, IrFunction, IrInstruction, IrValue, IrType}; -use std::collections::HashMap; - -pub struct IrBackend { - output: String, - stack_offset: i32, - locals: HashMap, - local_types: HashMap, - _register_allocator: RegisterAllocator, - ir_program: Option, -} - -impl IrBackend { - pub fn new() -> Self { - Self { - output: String::new(), - stack_offset: 0, - locals: HashMap::new(), - local_types: HashMap::new(), - _register_allocator: RegisterAllocator::new(), - ir_program: None, - } - } - - pub fn set_ir_program(&mut self, program: IrProgram) { - self.ir_program = Some(program); - } - - pub fn generate_from_ir(&mut self) -> String { - let mut program = String::new(); - - program.push_str("section .data\n"); - program.push_str(" format_int db '%d', 0\n"); - program.push_str(" format_float db '%.2f', 0\n"); - program.push_str(" format_char db '%c', 0\n"); - program.push_str(" newline db 10, 0\n\n"); - - if let Some(ir_program) = &self.ir_program { - for (label, value) in &ir_program.global_strings { - program.push_str(&format!(" {} db '{}', 0\n", label, value)); - } - } - - program.push_str("\nsection .text\n"); - program.push_str(" global _start\n"); - program.push_str(" extern printf\n"); - program.push_str(" extern exit\n\n"); - - if let Some(ir_program) = &self.ir_program { - let functions = ir_program.functions.clone(); - for function in &functions { - self.generate_function_from_ir(function); - } - } - - program.push_str(&self.output); - - program - } - - /// Generate assembly for a single IR function - fn generate_function_from_ir(&mut self, function: &IrFunction) { - self.emit_label(&function.name); - - // Function prologue - let prologue = BackendUtils::generate_prologue(); - for instr in prologue { - self.output.push_str(&format!(" {}\n", instr)); - } - - for ir_instr in &function.instructions { - self.generate_ir_instruction(ir_instr); - } - - // Function epilogue - let epilogue = BackendUtils::generate_epilogue(); - for instr in epilogue { - self.output.push_str(&format!(" {}\n", instr)); - } - } - - /// Generate assembly for a single IR instruction - fn generate_ir_instruction(&mut self, ir_instr: &IrInstruction) { - match ir_instr { - IrInstruction::Alloca { name, var_type } => { - let token_type = self.ir_type_to_token_type(var_type); - let (size, new_offset) = BackendUtils::calculate_stack_offset(&token_type, self.stack_offset); - self.stack_offset = new_offset; - self.locals.insert(name.clone(), new_offset); - self.local_types.insert(name.clone(), token_type); - self.emit_comment(&format!("alloca {} ({})", name, size)); - } - IrInstruction::Store { value, dest, .. } => { - if let IrValue::Local(dest_name) = dest { - if let Some(&dest_offset) = self.locals.get(dest_name) { - match value { - IrValue::IntConstant(val) => { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Memory { base: Register::Rbp, offset: dest_offset }, - Operand::Immediate(*val) - ] - ); - } - IrValue::Local(var) => { - if let Some(&var_offset) = self.locals.get(var) { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Register(Register::Eax), - Operand::Memory { base: Register::Rbp, offset: var_offset } - ] - ); - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Memory { base: Register::Rbp, offset: dest_offset }, - Operand::Register(Register::Eax) - ] - ); - } - } - _ => { - self.emit_comment(&format!("store {:?} -> {:?}", value, dest)); - } - } - } - } - } - IrInstruction::Load { dest, src, .. } => { - if let (IrValue::Local(dest_name), IrValue::Local(src_name)) = (dest, src) { - if let Some(src_offset) = self.locals.get(src_name) { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Register(Register::Eax), - Operand::Memory { base: Register::Rbp, offset: *src_offset } - ] - ); - self.emit_comment(&format!("load {} from {}", dest_name, src_name)); - } - } - } - IrInstruction::Return { value, .. } => { - if let Some(value) = value { - match value { - IrValue::IntConstant(val) => { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![Operand::Register(Register::Eax), Operand::Immediate(*val)] - ); - } - IrValue::Local(var) => { - if let Some(offset) = self.locals.get(var) { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Register(Register::Eax), - Operand::Memory { base: Register::Rbp, offset: *offset } - ] - ); - } - } - _ => { - self.emit_comment(&format!("return {:?}", value)); - } - } - } - - let epilogue = BackendUtils::generate_epilogue(); - for instr in epilogue { - self.output.push_str(&format!(" {}\n", instr)); - } - } - _ => { - self.emit_comment(&format!("IR instruction: {:?}", ir_instr)); - } - } - } - - fn ir_type_to_token_type(&self, ir_type: &IrType) -> TokenType { - match ir_type { - IrType::Int => TokenType::Int, - IrType::Float => TokenType::FloatType, - IrType::Char => TokenType::CharType, - IrType::Void => TokenType::Void, - _ => TokenType::Int, // Default fallback - } - } -} - -impl CodegenBackend for IrBackend { - fn emit_instruction(&mut self, instr: Instruction, operands: Vec) { - let formatted = BackendUtils::format_instruction(&instr, &operands); - self.output.push_str(&format!(" {}\n", formatted)); - } - - fn emit_instruction_with_size(&mut self, instr: Instruction, size: Size, operands: Vec) { - let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); - self.output.push_str(&format!(" {}\n", formatted)); - } - - fn emit_instruction_with_size_and_comment(&mut self, instr: Instruction, size: Size, operands: Vec, comment: Option<&str>) { - let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); - if let Some(comment) = comment { - self.output.push_str(&format!(" {} ; {}\n", formatted, comment)); - } else { - self.output.push_str(&format!(" {}\n", formatted)); - } - } - - fn emit_comment(&mut self, comment: &str) { - self.output.push_str(&format!(" ; {}\n", comment)); - } - - fn emit_label(&mut self, label: &str) { - self.output.push_str(&format!("{}:\n", label)); - } - - fn get_stack_offset(&self) -> i32 { - self.stack_offset - } - - fn set_stack_offset(&mut self, offset: i32) { - self.stack_offset = offset; - } - - fn get_locals(&self) -> &HashMap { - &self.locals - } - - fn get_locals_mut(&mut self) -> &mut HashMap { - &mut self.locals - } - - fn get_local_types(&self) -> &HashMap { - &self.local_types - } - - fn get_local_types_mut(&mut self) -> &mut HashMap { - &mut self.local_types - } - - fn get_output(&self) -> &str { - &self.output - } -} - -impl Default for IrBackend { - fn default() -> Self { - Self::new() - } -} diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 77bab74..f374f3a 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -2,10 +2,8 @@ mod instruction; mod emitter; mod ir_codegen; mod backend; -mod ir_backend; pub use ir_codegen::IrCodegen; pub use instruction::{Instruction, Register, Operand, Size}; pub use emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; -pub use backend::{CodegenBackend, BackendUtils, RegisterAllocator}; -pub use ir_backend::IrBackend; +pub use backend::{BackendUtils, RegisterAllocator, IrBackend}; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index a189c60..941d284 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1102,7 +1102,7 @@ mod tests { Stmt::ExprStmt(Expr::Assignment { name, value }) => { assert_eq!(*name, "x"); match value.as_ref() { - Expr::Binary { ref left, operator: TokenType::Plus, ref right } => { + Expr::Binary { left, operator: TokenType::Plus, right } => { assert_eq!(**left, Expr::Identifier("x".to_string())); assert_eq!(**right, Expr::Integer(1)); } @@ -1181,7 +1181,7 @@ mod tests { Expr::Assignment { name, value } => { assert_eq!(name, "i"); match value.as_ref() { - Expr::Binary { ref left, operator: TokenType::Plus, ref right } => { + Expr::Binary { left, operator: TokenType::Plus, right } => { assert_eq!(**left, Expr::Identifier("i".to_string())); assert_eq!(**right, Expr::Integer(1)); } diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index bc3c800..b0c6474 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -10,9 +10,6 @@ mod ir_integration_tests { let mut parser = Parser::new(tokens); let ast = parser.parse(); - let mut direct_codegen = IrGenerator::new(); - let direct_asm = direct_codegen.generate(&ast); - let mut ir_generator = IrGenerator::new(); let ir_program = ir_generator.generate(&ast).expect("IR generation should succeed"); let ir_output = format!("{}", ir_program); @@ -20,7 +17,9 @@ mod ir_integration_tests { let ir_codegen = IrCodegen::new(); let ir_asm = ir_codegen.generate(&ir_program); - (direct_asm, ir_asm, ir_output, source.to_string()) + // For now, we only have IR-based compilation, so we return the same assembly for both + // The first return value is kept for backward compatibility but is the same as the second + (ir_asm.clone(), ir_asm, ir_output, source.to_string()) } fn validate_ir_structure(ir_output: &str, expected_elements: &[&str]) { From 3c17ea400aaf14bede16defe24ffbaff3a7918fc Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sat, 26 Jul 2025 16:49:33 +0200 Subject: [PATCH 05/10] add target interface for crossplatform --- src/codegen/emitter.rs | 7 + src/codegen/ir_codegen/call_generator.rs | 34 +- src/codegen/ir_codegen/function_generator.rs | 34 +- src/codegen/ir_codegen/mod.rs | 39 +- src/codegen/mod.rs | 2 + src/codegen/target.rs | 425 +++++++++++++++++++ src/main.rs | 30 +- 7 files changed, 526 insertions(+), 45 deletions(-) create mode 100644 src/codegen/target.rs diff --git a/src/codegen/emitter.rs b/src/codegen/emitter.rs index 58aa702..23e797d 100644 --- a/src/codegen/emitter.rs +++ b/src/codegen/emitter.rs @@ -3,6 +3,13 @@ use super::instruction::{Instruction, Operand, Size}; pub trait Emitter { fn emit_line(&mut self, line: &str); fn emit_comment(&mut self, comment: &str); + fn emit_line_with_comment(&mut self, line: &str, comment: Option<&str>) { + if let Some(comment) = comment { + self.emit_line(&format!("{:40} ; {}", line, comment)); + } else { + self.emit_line(line); + } + } } pub trait CodeEmitter: Emitter { diff --git a/src/codegen/ir_codegen/call_generator.rs b/src/codegen/ir_codegen/call_generator.rs index 11872ac..b0df912 100644 --- a/src/codegen/ir_codegen/call_generator.rs +++ b/src/codegen/ir_codegen/call_generator.rs @@ -15,7 +15,7 @@ impl IrCodegen { let dest_operand = self.ir_value_to_operand(dest_val); let register = match return_type { IrType::Float => Register::Xmm0, - _ => Register::Eax, + _ => self.target.return_register(), }; match return_type { @@ -41,22 +41,21 @@ impl IrCodegen { // Handle printf call - simplified implementation if let IrValue::StringConstant(label) = format_string { - self.emit_instruction_with_comment(Instruction::Lea, vec![ - Operand::Register(Register::Rcx), - Operand::Label(label.clone()) - ], Some("load format string")); + let param_regs = self.target.parameter_registers(); + if !param_regs.is_empty() { + self.emit_instruction_with_comment(Instruction::Lea, vec![ + Operand::Register(param_regs[0]), + Operand::Label(label.clone()) + ], Some("load format string")); + } // Load arguments into registers with proper float handling for (i, arg) in args.iter().enumerate() { - let reg = match i { - 0 => Register::Rdx, - 1 => Register::R8, - 2 => Register::R9, - _ => { - self.emit_comment("Too many arguments for simplified printf"); - break; - } - }; + if i + 1 >= param_regs.len() { + self.emit_comment("Too many arguments for simplified printf"); + break; + } + let reg = param_regs[i + 1]; // +1 because first param is format string // Handle different argument types match arg { @@ -113,9 +112,10 @@ impl IrCodegen { } } - self.emit_instruction_with_comment(Instruction::Call, vec![ - Operand::Label("printf".to_string()) - ], Some("call printf")); + let call_instructions = self.target.format_function_call("printf"); + for call_instr in call_instructions { + self.emit_line_with_comment(&format!(" {}", call_instr), Some("call printf")); + } } } } \ No newline at end of file diff --git a/src/codegen/ir_codegen/function_generator.rs b/src/codegen/ir_codegen/function_generator.rs index 42fddd8..0851d6f 100644 --- a/src/codegen/ir_codegen/function_generator.rs +++ b/src/codegen/ir_codegen/function_generator.rs @@ -16,19 +16,21 @@ impl IrCodegen { // Function prologue self.emit_subsection_header("Function Prologue"); - self.emit_instruction_with_comment(Instruction::Push, vec![ - Operand::Register(Register::Rbp) - ], Some("save caller's frame")); - self.emit_instruction_with_comment(Instruction::Mov, vec![ - Operand::Register(Register::Rbp), - Operand::Register(Register::Rsp) - ], Some("set up frame")); + let prologue_instructions = self.target.function_prologue(); + for (i, instr) in prologue_instructions.iter().enumerate() { + let comment = match i { + 0 => Some("save caller's frame"), + 1 => Some("set up frame"), + _ => None, + }; + self.emit_line_with_comment(&format!(" {}", instr), comment); + } // Calculate stack space needed let stack_space = self.calculate_stack_space(function); if stack_space > 0 { self.emit_instruction_with_comment(Instruction::Sub, vec![ - Operand::Register(Register::Rsp), + Operand::Register(self.target.stack_pointer()), Operand::Immediate(stack_space as i64) ], Some(&format!("allocate {} bytes for locals and temps", stack_space))); } @@ -45,15 +47,21 @@ impl IrCodegen { if stack_space > 0 { self.emit_instruction_with_comment(Instruction::Add, vec![ - Operand::Register(Register::Rsp), + Operand::Register(self.target.stack_pointer()), Operand::Immediate(stack_space as i64) ], Some("deallocate stack space")); } - self.emit_instruction_with_comment(Instruction::Pop, vec![ - Operand::Register(Register::Rbp) - ], Some("restore frame")); - self.emit_instruction_with_comment(Instruction::Ret, vec![], Some("return")); + let epilogue_instructions = self.target.function_epilogue(); + for (i, instr) in epilogue_instructions.iter().enumerate() { + let comment = match i { + 0 => Some("restore stack pointer"), + 1 => Some("restore frame"), + 2 => Some("return"), + _ => None, + }; + self.emit_line_with_comment(&format!(" {}", instr), comment); + } self.emit_line(""); // Add spacing after function } diff --git a/src/codegen/ir_codegen/mod.rs b/src/codegen/ir_codegen/mod.rs index 81ef37e..04b435f 100644 --- a/src/codegen/ir_codegen/mod.rs +++ b/src/codegen/ir_codegen/mod.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use crate::ir::IrProgram; use super::emitter::Emitter; +use super::target::{Target, TargetPlatform, create_target}; mod function_generator; mod stack_manager; @@ -20,10 +21,15 @@ pub struct IrCodegen { pub temp_locations: HashMap, // Map temp variables to stack locations pub data_strings: HashMap, pub label_count: usize, + pub target: Box, } impl IrCodegen { pub fn new() -> Self { + Self::new_with_target(TargetPlatform::WindowsX64) + } + + pub fn new_with_target(target_platform: TargetPlatform) -> Self { Self { output: String::new(), stack_offset: 0, @@ -31,6 +37,7 @@ impl IrCodegen { temp_locations: HashMap::new(), data_strings: HashMap::new(), label_count: 0, + target: create_target(target_platform), } } @@ -38,36 +45,48 @@ impl IrCodegen { pub fn generate(mut self, ir_program: &IrProgram) -> String { // Assembly file header self.emit_section_header("MINI-C COMPILER GENERATED ASSEMBLY (FROM IR)"); - self.emit_comment("Target: x86-64 Windows"); - self.emit_comment("Calling Convention: Microsoft x64"); + self.emit_comment(&format!("Target: {}", self.target.arch_name())); + self.emit_comment(&format!("Calling Convention: {}", self.target.calling_convention_name())); self.emit_comment("Generated from: Intermediate Representation"); self.emit_line(""); // Assembly directives self.emit_comment("Assembly configuration"); - self.emit_line("bits 64"); - self.emit_line("default rel"); - self.emit_line("global main"); - self.emit_line("extern printf"); + for directive in self.target.assembly_directives() { + self.emit_line(&directive); + } + + // Global and external declarations + for global in self.target.global_declarations(&["main"]) { + self.emit_line(&global); + } + for external in self.target.external_declarations() { + self.emit_line(&external); + } // Data section - process global strings self.emit_section_header("DATA SECTION - String Literals and Constants"); - self.emit_line("section .data"); + self.emit_line(&self.target.data_section_header()); if ir_program.global_strings.is_empty() { self.emit_comment("No string literals found"); } else { for (label, content) in &ir_program.global_strings { - let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); self.emit_comment(&format!("String constant: \"{}\"", content.replace('\n', "\\n"))); - self.emit_line(&format!(" {}: db \"{}\", 10, 0", label, formatted_content)); + let formatted_literal = self.target.format_string_literal(label, content); + self.emit_line(&formatted_literal); self.data_strings.insert(label.clone(), content.clone()); } } // Text section self.emit_section_header("TEXT SECTION - Executable Code"); - self.emit_line("section .text"); + self.emit_line(&self.target.text_section_header()); + + // Add startup code if needed + for startup_line in self.target.startup_code() { + self.emit_line(&startup_line); + } // Generate code for each function for function in &ir_program.functions { diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index f374f3a..5055d4a 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -2,8 +2,10 @@ mod instruction; mod emitter; mod ir_codegen; mod backend; +mod target; pub use ir_codegen::IrCodegen; pub use instruction::{Instruction, Register, Operand, Size}; pub use emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; pub use backend::{BackendUtils, RegisterAllocator, IrBackend}; +pub use target::{Target, TargetPlatform, CallingConvention, WindowsX64Target, LinuxX64Target, MacOSX64Target, create_target, parse_target_platform}; diff --git a/src/codegen/target.rs b/src/codegen/target.rs new file mode 100644 index 0000000..45487f2 --- /dev/null +++ b/src/codegen/target.rs @@ -0,0 +1,425 @@ +use crate::codegen::instruction::Register; + +/// Represents different target platforms +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TargetPlatform { + WindowsX64, + LinuxX64, + MacOSX64, +} + +/// Represents different calling conventions +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CallingConvention { + MicrosoftX64, + SystemV, + AppleX64, +} + +/// Target-specific configuration and behavior +pub trait Target { + /// Get the target platform + fn platform(&self) -> TargetPlatform; + + /// Get the calling convention + fn calling_convention(&self) -> CallingConvention; + + /// Get the target architecture name for comments + fn arch_name(&self) -> &'static str; + + /// Get the calling convention name for comments + fn calling_convention_name(&self) -> &'static str; + + /// Generate assembly file header directives + fn assembly_directives(&self) -> Vec; + + /// Generate data section header + fn data_section_header(&self) -> String; + + /// Generate text section header + fn text_section_header(&self) -> String; + + /// Generate external function declarations + fn external_declarations(&self) -> Vec; + + /// Generate global symbol declarations + fn global_declarations(&self, symbols: &[&str]) -> Vec; + + /// Generate function prologue instructions + fn function_prologue(&self) -> Vec; + + /// Generate function epilogue instructions + fn function_epilogue(&self) -> Vec; + + /// Get parameter passing registers in order + fn parameter_registers(&self) -> Vec; + + /// Get return value register + fn return_register(&self) -> Register; + + /// Get stack pointer register + fn stack_pointer(&self) -> Register; + + /// Get base pointer register + fn base_pointer(&self) -> Register; + + /// Get stack alignment requirement in bytes + fn stack_alignment(&self) -> usize; + + /// Format a string literal for the target platform + fn format_string_literal(&self, label: &str, content: &str) -> String; + + /// Format a function call instruction + fn format_function_call(&self, function_name: &str) -> Vec; + + /// Get the size and alignment for a data type + fn type_info(&self, type_name: &str) -> (usize, usize); // (size, alignment) + + /// Generate platform-specific startup code if needed + fn startup_code(&self) -> Vec; +} + +/// Windows x64 target implementation +pub struct WindowsX64Target; + +impl Target for WindowsX64Target { + fn platform(&self) -> TargetPlatform { + TargetPlatform::WindowsX64 + } + + fn calling_convention(&self) -> CallingConvention { + CallingConvention::MicrosoftX64 + } + + fn arch_name(&self) -> &'static str { + "x86-64 Windows" + } + + fn calling_convention_name(&self) -> &'static str { + "Microsoft x64" + } + + fn assembly_directives(&self) -> Vec { + vec![ + "bits 64".to_string(), + "default rel".to_string(), + ] + } + + fn data_section_header(&self) -> String { + "section .data".to_string() + } + + fn text_section_header(&self) -> String { + "section .text".to_string() + } + + fn external_declarations(&self) -> Vec { + vec![ + "extern printf".to_string(), + "extern exit".to_string(), + ] + } + + fn global_declarations(&self, symbols: &[&str]) -> Vec { + symbols.iter().map(|symbol| format!("global {}", symbol)).collect() + } + + fn function_prologue(&self) -> Vec { + vec![ + "push rbp".to_string(), + "mov rbp, rsp".to_string(), + ] + } + + fn function_epilogue(&self) -> Vec { + vec![ + "mov rsp, rbp".to_string(), + "pop rbp".to_string(), + "ret".to_string(), + ] + } + + fn parameter_registers(&self) -> Vec { + vec![Register::Rcx, Register::Rdx, Register::R8, Register::R9] + } + + fn return_register(&self) -> Register { + Register::Rax + } + + fn stack_pointer(&self) -> Register { + Register::Rsp + } + + fn base_pointer(&self) -> Register { + Register::Rbp + } + + fn stack_alignment(&self) -> usize { + 16 + } + + fn format_string_literal(&self, label: &str, content: &str) -> String { + let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); + format!(" {}: db \"{}\", 10, 0", label, formatted_content) + } + + fn format_function_call(&self, function_name: &str) -> Vec { + vec![format!("call {}", function_name)] + } + + fn type_info(&self, type_name: &str) -> (usize, usize) { + match type_name { + "int" | "i32" => (4, 4), + "float" | "f32" => (4, 4), + "double" | "f64" => (8, 8), + "char" | "i8" => (1, 1), + "ptr" | "pointer" => (8, 8), + _ => (8, 8), // Default to pointer size + } + } + + fn startup_code(&self) -> Vec { + vec![] // Windows doesn't need special startup code for our use case + } +} + +/// Linux x64 target implementation +pub struct LinuxX64Target; + +impl Target for LinuxX64Target { + fn platform(&self) -> TargetPlatform { + TargetPlatform::LinuxX64 + } + + fn calling_convention(&self) -> CallingConvention { + CallingConvention::SystemV + } + + fn arch_name(&self) -> &'static str { + "x86-64 Linux" + } + + fn calling_convention_name(&self) -> &'static str { + "System V ABI" + } + + fn assembly_directives(&self) -> Vec { + vec![ + "bits 64".to_string(), + "default rel".to_string(), + ] + } + + fn data_section_header(&self) -> String { + "section .data".to_string() + } + + fn text_section_header(&self) -> String { + "section .text".to_string() + } + + fn external_declarations(&self) -> Vec { + vec![ + "extern printf".to_string(), + "extern exit".to_string(), + ] + } + + fn global_declarations(&self, symbols: &[&str]) -> Vec { + symbols.iter().map(|symbol| format!("global {}", symbol)).collect() + } + + fn function_prologue(&self) -> Vec { + vec![ + "push rbp".to_string(), + "mov rbp, rsp".to_string(), + ] + } + + fn function_epilogue(&self) -> Vec { + vec![ + "mov rsp, rbp".to_string(), + "pop rbp".to_string(), + "ret".to_string(), + ] + } + + fn parameter_registers(&self) -> Vec { + // System V ABI uses different parameter registers + vec![Register::Rax, Register::Rdx, Register::Rcx, Register::R8, Register::R9] // Note: RDI, RSI would be more accurate but not in our Register enum + } + + fn return_register(&self) -> Register { + Register::Rax + } + + fn stack_pointer(&self) -> Register { + Register::Rsp + } + + fn base_pointer(&self) -> Register { + Register::Rbp + } + + fn stack_alignment(&self) -> usize { + 16 + } + + fn format_string_literal(&self, label: &str, content: &str) -> String { + let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); + format!(" {}: db \"{}\", 10, 0", label, formatted_content) + } + + fn format_function_call(&self, function_name: &str) -> Vec { + vec![format!("call {}", function_name)] + } + + fn type_info(&self, type_name: &str) -> (usize, usize) { + match type_name { + "int" | "i32" => (4, 4), + "float" | "f32" => (4, 4), + "double" | "f64" => (8, 8), + "char" | "i8" => (1, 1), + "ptr" | "pointer" => (8, 8), + _ => (8, 8), // Default to pointer size + } + } + + fn startup_code(&self) -> Vec { + vec![ + "_start:".to_string(), + " call main".to_string(), + " mov rdi, rax".to_string(), + " mov rax, 60".to_string(), + " syscall".to_string(), + ] + } +} + +/// macOS x64 target implementation +pub struct MacOSX64Target; + +impl Target for MacOSX64Target { + fn platform(&self) -> TargetPlatform { + TargetPlatform::MacOSX64 + } + + fn calling_convention(&self) -> CallingConvention { + CallingConvention::AppleX64 + } + + fn arch_name(&self) -> &'static str { + "x86-64 macOS" + } + + fn calling_convention_name(&self) -> &'static str { + "Apple x64 ABI" + } + + fn assembly_directives(&self) -> Vec { + vec![ + "bits 64".to_string(), + "default rel".to_string(), + ] + } + + fn data_section_header(&self) -> String { + "section .data".to_string() + } + + fn text_section_header(&self) -> String { + "section .text".to_string() + } + + fn external_declarations(&self) -> Vec { + vec![ + "extern _printf".to_string(), // macOS prefixes with underscore + "extern _exit".to_string(), + ] + } + + fn global_declarations(&self, symbols: &[&str]) -> Vec { + symbols.iter().map(|symbol| format!("global _{}", symbol)).collect() // macOS prefixes with underscore + } + + fn function_prologue(&self) -> Vec { + vec![ + "push rbp".to_string(), + "mov rbp, rsp".to_string(), + ] + } + + fn function_epilogue(&self) -> Vec { + vec![ + "mov rsp, rbp".to_string(), + "pop rbp".to_string(), + "ret".to_string(), + ] + } + + fn parameter_registers(&self) -> Vec { + // macOS uses System V-like calling convention + vec![Register::Rax, Register::Rdx, Register::Rcx, Register::R8, Register::R9] // Note: RDI, RSI would be more accurate + } + + fn return_register(&self) -> Register { + Register::Rax + } + + fn stack_pointer(&self) -> Register { + Register::Rsp + } + + fn base_pointer(&self) -> Register { + Register::Rbp + } + + fn stack_alignment(&self) -> usize { + 16 + } + + fn format_string_literal(&self, label: &str, content: &str) -> String { + let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); + format!(" {}: db \"{}\", 10, 0", label, formatted_content) + } + + fn format_function_call(&self, function_name: &str) -> Vec { + vec![format!("call _{}", function_name)] // macOS prefixes with underscore + } + + fn type_info(&self, type_name: &str) -> (usize, usize) { + match type_name { + "int" | "i32" => (4, 4), + "float" | "f32" => (4, 4), + "double" | "f64" => (8, 8), + "char" | "i8" => (1, 1), + "ptr" | "pointer" => (8, 8), + _ => (8, 8), // Default to pointer size + } + } + + fn startup_code(&self) -> Vec { + vec![] // macOS doesn't need special startup code for our use case + } +} + +/// Factory function to create target instances +pub fn create_target(platform: TargetPlatform) -> Box { + match platform { + TargetPlatform::WindowsX64 => Box::new(WindowsX64Target), + TargetPlatform::LinuxX64 => Box::new(LinuxX64Target), + TargetPlatform::MacOSX64 => Box::new(MacOSX64Target), + } +} + +/// Helper function to parse target platform from string +pub fn parse_target_platform(target_str: &str) -> Result { + match target_str.to_lowercase().as_str() { + "windows" | "win" | "windows-x64" | "win64" => Ok(TargetPlatform::WindowsX64), + "linux" | "linux-x64" | "linux64" => Ok(TargetPlatform::LinuxX64), + "macos" | "darwin" | "macos-x64" | "darwin-x64" => Ok(TargetPlatform::MacOSX64), + _ => Err(format!("Unknown target platform: {}", target_str)), + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 45806ee..5b6d77f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use std::{env, fs}; -use compiler_minic::codegen::{IrCodegen}; +use compiler_minic::codegen::{IrCodegen, TargetPlatform, parse_target_platform}; use compiler_minic::lexer::Lexer; use compiler_minic::parser::Parser; use compiler_minic::ir::{IrGenerator, IrOptimizer}; @@ -8,8 +8,28 @@ use compiler_minic::semantic::{MemorySafetyChecker, MemorySafetySeverity}; fn main() { let args: Vec = env::args().collect(); - // Find the filename (first non-flag argument) - let filename = args.iter().skip(1).find(|arg| !arg.starts_with("--")); + // Parse target platform from command line arguments + let target_platform = args.iter() + .position(|arg| arg == "--target") + .and_then(|i| args.get(i + 1)) + .and_then(|target_str| parse_target_platform(target_str).ok()) + .unwrap_or(TargetPlatform::WindowsX64); + + println!("Target platform: {:?}", target_platform); + + // Find the filename (first non-flag argument that's not a target value) + let mut skip_next = false; + let filename = args.iter().skip(1).find(|arg| { + if skip_next { + skip_next = false; + return false; + } + if *arg == "--target" { + skip_next = true; + return false; + } + !arg.starts_with("--") + }); let code = if let Some(filename) = filename { // File argument provided @@ -136,8 +156,8 @@ fn main() { Err(e) => eprintln!("Error writing optimized IR file: {}", e), } - // Generate assembly from IR - let ir_codegen = IrCodegen::new(); + // Generate assembly from IR with target platform + let ir_codegen = IrCodegen::new_with_target(target_platform); let asm_code = ir_codegen.generate(&optimized_ir); match fs::write("output.asm", asm_code) { From 903b31a7a503a7193c4bc5a17dd6f6bb34fdfbe3 Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sat, 26 Jul 2025 17:03:23 +0200 Subject: [PATCH 06/10] add better CLI handling --- .gitignore | 1 + Cargo.lock | 237 ++++++++++++ Cargo.toml | 1 + docs/CLI_USAGE.md | 101 ++++++ COMPILER_REVIEW.md => docs/COMPILER_REVIEW.md | 0 .../IR_IMPLEMENTATION.md | 0 docs/TARGET_INTERFACE.md | 159 ++++++++ src/main.rs | 340 ++++++++++++------ src/parser/mod.rs | 2 +- test_debug.c | 35 -- test_fixes.c | 7 - test_ir.c | 12 - 12 files changed, 738 insertions(+), 157 deletions(-) create mode 100644 docs/CLI_USAGE.md rename COMPILER_REVIEW.md => docs/COMPILER_REVIEW.md (100%) rename IR_IMPLEMENTATION.md => docs/IR_IMPLEMENTATION.md (100%) create mode 100644 docs/TARGET_INTERFACE.md delete mode 100644 test_debug.c delete mode 100644 test_fixes.c delete mode 100644 test_ir.c diff --git a/.gitignore b/.gitignore index 6c62edf..13d1841 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /target /.idea/ +/build *.exe *.o *.obj diff --git a/Cargo.lock b/Cargo.lock index f928c07..4fddb93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,243 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "anstream" +version = "0.6.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "clap" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "compiler-minic" version = "0.1.0" +dependencies = [ + "clap", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml index dd6dec8..2159b0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,4 @@ version = "0.1.0" edition = "2024" [dependencies] +clap = { version = "4.0", features = ["derive"] } diff --git a/docs/CLI_USAGE.md b/docs/CLI_USAGE.md new file mode 100644 index 0000000..6928500 --- /dev/null +++ b/docs/CLI_USAGE.md @@ -0,0 +1,101 @@ +# MiniC Compiler CLI Usage + +The MiniC compiler has been refactored with a clean, modern CLI interface using `clap`. + +## Basic Usage + +```bash +# Compile with default settings (uses built-in example code) +cargo run + +# Compile a specific file +cargo run -- input.c + +# Show help +cargo run -- --help +``` + +## Command Line Options + +### Input File +```bash +# Compile a specific source file +cargo run -- test_simple.c +``` + +### Target Platform +```bash +# Specify target platform (default: windows-x64) +cargo run -- --target linux-x64 input.c +cargo run -- --target windows-x64 input.c +cargo run -- -t macos-arm64 input.c +``` + +### Output Directory +```bash +# Specify output directory (default: current directory) +cargo run -- --output-dir ./build input.c +cargo run -- -o ./output input.c +``` + +### Verbose Output +```bash +# Enable detailed compilation output +cargo run -- --verbose input.c +cargo run -- -v input.c +``` + +### Skip Options +```bash +# Skip memory safety checks +cargo run -- --skip-memory-checks input.c + +# Skip IR optimization +cargo run -- --skip-optimization input.c + +# Combine multiple options +cargo run -- --verbose --skip-memory-checks --skip-optimization --output-dir ./build input.c +``` + +## Examples + +### Basic compilation with verbose output: +```bash +cargo run -- --verbose test_simple.c +``` + +### Cross-compilation for Linux with custom output directory: +```bash +cargo run -- --target linux-x64 --output-dir ./linux_build --verbose input.c +``` + +### Fast compilation (skip optimizations and memory checks): +```bash +cargo run -- --skip-optimization --skip-memory-checks --output-dir ./debug input.c +``` + +## Output Files + +The compiler generates the following files in the output directory: + +- `output.ir` - Intermediate representation (IR) code +- `output_optimized.ir` - Optimized IR code (if optimization is enabled) +- `output.asm` - Generated assembly code + +## Improvements Made + +1. **Clean CLI Interface**: Using `clap` for professional command-line argument parsing +2. **Better Error Handling**: Proper error propagation with descriptive messages +3. **Modular Code Structure**: Separated concerns into focused functions +4. **Automatic Directory Creation**: Output directories are created automatically +5. **Flexible Options**: Skip memory checks or optimization for faster compilation +6. **Verbose Mode**: Detailed output for debugging and monitoring compilation progress +7. **Clippy Compliance**: Applied Clippy suggestions for better Rust code quality + +## Code Quality Improvements + +- Used `&[T]` instead of `&Vec` for function parameters (more idiomatic) +- Applied inline format arguments for better performance +- Proper error handling with `Result` types +- Separated compilation phases into individual functions +- Added comprehensive CLI documentation and help text \ No newline at end of file diff --git a/COMPILER_REVIEW.md b/docs/COMPILER_REVIEW.md similarity index 100% rename from COMPILER_REVIEW.md rename to docs/COMPILER_REVIEW.md diff --git a/IR_IMPLEMENTATION.md b/docs/IR_IMPLEMENTATION.md similarity index 100% rename from IR_IMPLEMENTATION.md rename to docs/IR_IMPLEMENTATION.md diff --git a/docs/TARGET_INTERFACE.md b/docs/TARGET_INTERFACE.md new file mode 100644 index 0000000..67f1a7d --- /dev/null +++ b/docs/TARGET_INTERFACE.md @@ -0,0 +1,159 @@ +# Target Interface Documentation + +The Mini-C compiler now supports multiple target platforms through a flexible target interface. This allows you to generate platform-specific assembly code for different operating systems and architectures. + +## Supported Targets + +### Windows x64 +- **Platform**: `TargetPlatform::WindowsX64` +- **Calling Convention**: Microsoft x64 +- **Assembly Format**: NASM-compatible x86-64 +- **External Functions**: `printf`, `exit` +- **Global Symbols**: `main` + +### Linux x64 +- **Platform**: `TargetPlatform::LinuxX64` +- **Calling Convention**: System V ABI +- **Assembly Format**: NASM-compatible x86-64 +- **External Functions**: `printf`, `exit` +- **Global Symbols**: `main` +- **Startup Code**: Includes `_start` entry point with system call exit + +### macOS x64 +- **Platform**: `TargetPlatform::MacOSX64` +- **Calling Convention**: Apple x64 ABI (System V-like) +- **Assembly Format**: NASM-compatible x86-64 +- **External Functions**: `_printf`, `_exit` (with underscore prefix) +- **Global Symbols**: `_main` (with underscore prefix) + +## Usage + +### Command Line Interface + +You can specify the target platform using the `--target` flag: + +```bash +# Compile for Windows (default) +cargo run -- --target windows input.c + +# Compile for Linux +cargo run -- --target linux input.c + +# Compile for macOS +cargo run -- --target macos input.c +``` + +### Programmatic Usage + +```rust +use compiler_minic::codegen::{IrCodegen, TargetPlatform}; + +// Create code generator for specific target +let codegen = IrCodegen::new_with_target(TargetPlatform::LinuxX64); + +// Generate assembly +let assembly = codegen.generate(&ir_program); +``` + +### Target Selection + +You can parse target strings using the helper function: + +```rust +use compiler_minic::codegen::parse_target_platform; + +let target = parse_target_platform("linux").unwrap(); +// Returns TargetPlatform::LinuxX64 +``` + +Supported target strings: +- Windows: `"windows"`, `"win"`, `"windows-x64"`, `"win64"` +- Linux: `"linux"`, `"linux-x64"`, `"linux64"` +- macOS: `"macos"`, `"darwin"`, `"macos-x64"`, `"darwin-x64"` + +## Target Interface + +The `Target` trait defines platform-specific behavior: + +```rust +pub trait Target { + // Platform identification + fn platform(&self) -> TargetPlatform; + fn calling_convention(&self) -> CallingConvention; + + // Assembly generation + fn assembly_directives(&self) -> Vec; + fn data_section_header(&self) -> String; + fn text_section_header(&self) -> String; + + // Function conventions + fn function_prologue(&self) -> Vec; + fn function_epilogue(&self) -> Vec; + fn parameter_registers(&self) -> Vec; + fn return_register(&self) -> Register; + + // Platform-specific formatting + fn format_string_literal(&self, label: &str, content: &str) -> String; + fn format_function_call(&self, function_name: &str) -> Vec; + + // Type information + fn type_info(&self, type_name: &str) -> (usize, usize); // (size, alignment) +} +``` + +## Key Differences Between Targets + +### Symbol Naming +- **Windows/Linux**: Standard names (`main`, `printf`) +- **macOS**: Underscore prefix (`_main`, `_printf`) + +### Calling Conventions +- **Windows**: Microsoft x64 calling convention + - Parameters: RCX, RDX, R8, R9 + - Return: RAX +- **Linux**: System V ABI + - Parameters: RDI, RSI, RDX, RCX, R8, R9 (simplified in current implementation) + - Return: RAX +- **macOS**: Apple x64 ABI (System V-like) + - Similar to Linux but with underscore prefixes + +### Startup Code +- **Windows/macOS**: No special startup code needed +- **Linux**: Includes `_start` entry point that calls `main` and exits via system call + +## Adding New Targets + +To add support for a new target platform: + +1. Add the platform to `TargetPlatform` enum +2. Add calling convention to `CallingConvention` enum if needed +3. Create a new target implementation struct +4. Implement the `Target` trait +5. Update the `create_target` factory function +6. Update the `parse_target_platform` function + +Example: + +```rust +pub struct Arm64Target; + +impl Target for Arm64Target { + fn platform(&self) -> TargetPlatform { + TargetPlatform::Arm64 + } + + fn calling_convention(&self) -> CallingConvention { + CallingConvention::AAPCS64 + } + + // ... implement other methods +} +``` + +## Examples + +See `examples/target_demo.rs` for a complete example of compiling the same source code for multiple targets and comparing the output. + +## Testing + +The target interface is tested through the existing integration tests, which automatically use the default Windows target. The interface ensures backward compatibility while enabling cross-platform code generation. \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 5b6d77f..014c170 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,52 +1,91 @@ -use std::{env, fs}; +use std::fs; +use std::path::PathBuf; +use std::process; + +use clap::Parser; use compiler_minic::codegen::{IrCodegen, TargetPlatform, parse_target_platform}; use compiler_minic::lexer::Lexer; -use compiler_minic::parser::Parser; +use compiler_minic::parser::Parser as MiniCParser; use compiler_minic::ir::{IrGenerator, IrOptimizer}; use compiler_minic::semantic::{MemorySafetyChecker, MemorySafetySeverity}; +/// MiniC Compiler - A simple C-like language compiler +#[derive(Parser)] +#[command(name = "minic")] +#[command(about = "A compiler for the MiniC language")] +#[command(version = "0.1.0")] +struct Cli { + /// Input source file to compile + #[arg(value_name = "FILE")] + input: Option, + + /// Target platform for code generation + #[arg(short, long, default_value = "windows-x64")] + target: String, + + /// Output directory for generated files + #[arg(short, long, default_value = "build")] + output_dir: PathBuf, + + /// Enable verbose output + #[arg(short, long)] + verbose: bool, + + /// Skip memory safety checks + #[arg(long)] + skip_memory_checks: bool, + + /// Skip IR optimization + #[arg(long)] + skip_optimization: bool, +} + fn main() { - let args: Vec = env::args().collect(); - - // Parse target platform from command line arguments - let target_platform = args.iter() - .position(|arg| arg == "--target") - .and_then(|i| args.get(i + 1)) - .and_then(|target_str| parse_target_platform(target_str).ok()) - .unwrap_or(TargetPlatform::WindowsX64); - - println!("Target platform: {:?}", target_platform); - - // Find the filename (first non-flag argument that's not a target value) - let mut skip_next = false; - let filename = args.iter().skip(1).find(|arg| { - if skip_next { - skip_next = false; - return false; - } - if *arg == "--target" { - skip_next = true; - return false; - } - !arg.starts_with("--") - }); - - let code = if let Some(filename) = filename { - // File argument provided - match fs::read_to_string(filename) { - Ok(content) => { - println!("Compiling file: {}", filename); - content + let cli = Cli::parse(); + + if let Err(e) = run_compiler(cli) { + eprintln!("Compilation failed: {}", e); + process::exit(1); + } +} + +fn run_compiler(cli: Cli) -> Result<(), Box> { + // Parse target platform + let target_platform = parse_target_platform(&cli.target) + .map_err(|_| format!("Invalid target platform: {}", cli.target))?; + + if cli.verbose { + println!("Target platform: {:?}", target_platform); + println!("Output directory: {:?}", cli.output_dir); + } + + // Read source code + let code = read_source_code(&cli)?; + + // Compile the code + compile_code(&code, target_platform, &cli) +} + +fn read_source_code(cli: &Cli) -> Result> { + match &cli.input { + Some(filename) => { + if cli.verbose { + println!("Compiling file: {:?}", filename); } - Err(e) => { - eprintln!("Error reading file '{}': {}", filename, e); - return; + fs::read_to_string(filename) + .map_err(|e| format!("Error reading file '{:?}': {}", filename, e).into()) + } + None => { + if cli.verbose { + println!("No file provided, using default code..."); } + Ok(get_default_code()) } - } else { - // No file argument, use default code - println!("No file provided, using default code..."); - r#" + } +} + +fn get_default_code() -> String { + r#" int main() { int number = 42; float pi = 3.14159; @@ -96,77 +135,174 @@ fn main() { return result; } "#.to_string() +} + +fn compile_code( + code: &str, + target_platform: TargetPlatform, + cli: &Cli, +) -> Result<(), Box> { + + // Tokenization + let mut lexer = Lexer::new(code); + let tokens = lexer.tokenize() + .map_err(|e| format!("Lexing error: {}", e))?; + + if cli.verbose { + println!("Tokenization completed successfully"); + } + + // Parsing + let mut parser = MiniCParser::new(tokens); + let ast = parser.parse(); + + // Check for parser errors + let parser_errors = parser.get_errors(); + if !parser_errors.is_empty() { + for error in parser_errors { + eprintln!("Parser error: {}", error); + } + return Err("Parsing failed with errors".into()); + } + + if cli.verbose { + println!("Parsing completed successfully"); + } + + // Memory safety analysis (if not skipped) + if !cli.skip_memory_checks { + run_memory_safety_analysis(&ast, cli.verbose)?; + } + + // IR generation + let ir_program = generate_ir(&ast, cli.verbose)?; + + // Save IR to file + save_ir_to_file(&ir_program, &cli.output_dir, "output.ir", cli.verbose)?; + + // IR optimization (if not skipped) + let final_ir = if cli.skip_optimization { + if cli.verbose { + println!("Skipping IR optimization"); + } + ir_program + } else { + let optimized_ir = optimize_ir(ir_program, cli.verbose)?; + save_ir_to_file(&optimized_ir, &cli.output_dir, "output_optimized.ir", cli.verbose)?; + optimized_ir }; - let mut lexer = Lexer::new(&code); - match lexer.tokenize() { - Ok(tokens) => { - let mut parser = Parser::new(tokens); - let ast = parser.parse(); - - for error in parser.get_errors() { - eprintln!("Parser error: {}", error); + // Code generation + generate_assembly(&final_ir, target_platform, &cli.output_dir, cli.verbose)?; + + if cli.verbose { + println!("Compilation completed successfully!"); + } + + Ok(()) +} + +fn run_memory_safety_analysis( + ast: &[compiler_minic::parser::ast::Stmt], + verbose: bool, +) -> Result<(), Box> { + if verbose { + println!("Running memory safety analysis..."); + } + + let mut memory_checker = MemorySafetyChecker::new(); + let warnings = memory_checker.check_memory_safety(ast) + .map_err(|e| format!("Memory safety analysis error: {}", e))?; + + for warning in warnings { + match warning.severity() { + MemorySafetySeverity::Error => { + eprintln!("Memory safety error: {}", warning.message()); } - - let mut memory_checker = MemorySafetyChecker::new(); - match memory_checker.check_memory_safety(&ast) { - Ok(warnings) => { - for warning in warnings { - match warning.severity() { - MemorySafetySeverity::Error => { - eprintln!("Memory safety error: {}", warning.message()); - } - MemorySafetySeverity::Warning => { - println!("Memory safety warning: {}", warning.message()); - } - MemorySafetySeverity::Info => { - println!("Memory safety info: {}", warning.message()); - } - } - } - } - Err(e) => { - eprintln!("Memory safety analysis error: {}", e); - } + MemorySafetySeverity::Warning => { + println!("Memory safety warning: {}", warning.message()); } - - // Generate IR from AST - let mut ir_generator = IrGenerator::new(); - let ir_program = match ir_generator.generate(&ast) { - Ok(program) => program, - Err(e) => { - eprintln!("IR generation failed: {:?}", e); - return; + MemorySafetySeverity::Info => { + if verbose { + println!("Memory safety info: {}", warning.message()); } - }; - - // Save IR to file for inspection - match fs::write("output.ir", format!("{}", ir_program)) { - Ok(_) => println!("IR code saved to output.ir"), - Err(e) => eprintln!("Error writing IR file: {}", e), } + } + } - // Optimize IR - let mut optimizer = IrOptimizer::new(); - let optimized_ir = optimizer.optimize(ir_program); + Ok(()) +} - // Save optimized IR to file - match fs::write("output_optimized.ir", format!("{}", optimized_ir)) { - Ok(_) => println!("Optimized IR code saved to output_optimized.ir"), - Err(e) => eprintln!("Error writing optimized IR file: {}", e), - } +fn generate_ir( + ast: &[compiler_minic::parser::ast::Stmt], + verbose: bool, +) -> Result> { + if verbose { + println!("Generating IR..."); + } - // Generate assembly from IR with target platform - let ir_codegen = IrCodegen::new_with_target(target_platform); - let asm_code = ir_codegen.generate(&optimized_ir); + let mut ir_generator = IrGenerator::new(); + ir_generator.generate(ast) + .map_err(|e| format!("IR generation failed: {e:?}").into()) +} - match fs::write("output.asm", asm_code) { - Ok(_) => println!("Assembly code (from IR) saved to output.asm"), - Err(e) => eprintln!("Error writing assembly file: {}", e), - } - } - Err(e) => { - eprintln!("Lexing error: {}", e); - } +fn optimize_ir( + ir_program: compiler_minic::ir::IrProgram, + verbose: bool, +) -> Result> { + if verbose { + println!("Optimizing IR..."); + } + + let mut optimizer = IrOptimizer::new(); + Ok(optimizer.optimize(ir_program)) +} + +fn save_ir_to_file( + ir_program: &compiler_minic::ir::IrProgram, + output_dir: &PathBuf, + filename: &str, + verbose: bool, +) -> Result<(), Box> { + // Create output directory if it doesn't exist + fs::create_dir_all(output_dir) + .map_err(|e| format!("Error creating output directory '{output_dir:?}': {e}"))?; + + let output_path = output_dir.join(filename); + fs::write(&output_path, format!("{ir_program}")) + .map_err(|e| format!("Error writing IR file '{output_path:?}': {e}"))?; + + if verbose { + println!("IR code saved to {output_path:?}"); } + + Ok(()) +} + +fn generate_assembly( + ir_program: &compiler_minic::ir::IrProgram, + target_platform: TargetPlatform, + output_dir: &PathBuf, + verbose: bool, +) -> Result<(), Box> { + if verbose { + println!("Generating assembly code..."); + } + + // Create output directory if it doesn't exist + fs::create_dir_all(output_dir) + .map_err(|e| format!("Error creating output directory '{output_dir:?}': {e}"))?; + + let ir_codegen = IrCodegen::new_with_target(target_platform); + let asm_code = ir_codegen.generate(ir_program); + + let output_path = output_dir.join("output.asm"); + fs::write(&output_path, asm_code) + .map_err(|e| format!("Error writing assembly file '{output_path:?}': {e}"))?; + + if verbose { + println!("Assembly code saved to {output_path:?}"); + } + + Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1e9551b..e146e34 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,4 +1,4 @@ mod parser; -pub(crate) mod ast; +pub mod ast; pub use parser::Parser; \ No newline at end of file diff --git a/test_debug.c b/test_debug.c deleted file mode 100644 index bee4a2a..0000000 --- a/test_debug.c +++ /dev/null @@ -1,35 +0,0 @@ -int factorial(int n) { - println("toto"); - println("factorial n param: %d", n); - int result = 1; - for (int i = 1; i <= n; i = i + 1) { - result = result * i; - } - return result; -} - -int main() { - int x = 5; - int y = 0; - - int logical_and = x && y; - int logical_or = x || y; - int complex_logic = (x > 3) && (y == 0); - - int count = 0; - int i = 0; - while (i < 10) { - i = i + 1; - if (i == 3) { - continue; - } - if (i == 8) { - break; - } - count = count + 1; - } - int fact_result = factorial(4); - - println("count = %d, fact_result = %d", count, fact_result); - return 0; -} \ No newline at end of file diff --git a/test_fixes.c b/test_fixes.c deleted file mode 100644 index 8de2c08..0000000 --- a/test_fixes.c +++ /dev/null @@ -1,7 +0,0 @@ -int main() { - int x = 42; - int y = -x; - int z = !y; - println("x = %d, y = %d, z = %d", x, y, z); - return 0; -} \ No newline at end of file diff --git a/test_ir.c b/test_ir.c deleted file mode 100644 index 0093f96..0000000 --- a/test_ir.c +++ /dev/null @@ -1,12 +0,0 @@ -int main() { - int x = 10; - int y = 20; - int result = x + y * 2; - - if (result > 30) { - println("Result is greater than 30"); - println(result); - } - - return result; -} \ No newline at end of file From 9fe33dbac590f783762d3827f3b3af1a018982c1 Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sat, 26 Jul 2025 17:10:30 +0200 Subject: [PATCH 07/10] fix CI --- .github/workflows/ci.yml | 8 ++++---- src/codegen/ir_codegen/function_generator.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index afe50fe..fc151ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,13 +58,13 @@ jobs: gcc --version - name: Run compiler to generate ASM - run: cargo run -- --ir + run: cargo run - name: Compile ASM to object file - run: '& "C:\Program Files\NASM\nasm.exe" -f win64 output_ir.asm -o output.obj' + run: '& "C:\Program Files\NASM\nasm.exe" -f win64 build/output.asm -o output.obj' - name: Link and create executable - run: gcc -o output.exe output.obj -lmsvcrt + run: gcc -o build/output.exe build/output.obj -lmsvcrt - name: Execute the binary - run: .\output.exe + run: .\build\output.exe diff --git a/src/codegen/ir_codegen/function_generator.rs b/src/codegen/ir_codegen/function_generator.rs index 0851d6f..563559c 100644 --- a/src/codegen/ir_codegen/function_generator.rs +++ b/src/codegen/ir_codegen/function_generator.rs @@ -1,5 +1,5 @@ use crate::ir::{IrFunction}; -use crate::codegen::instruction::{Instruction, Operand, Register}; +use crate::codegen::instruction::{Instruction, Operand}; use crate::codegen::emitter::{Emitter, CodeEmitterWithComment}; use super::IrCodegen; From 65e5f7cb3e42ade6c30f94eb675f8ab5ccb9e3c7 Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sun, 27 Jul 2025 20:23:07 +0200 Subject: [PATCH 08/10] refacto codegen --- docs/legacy-removal-summary.md | 149 +++++++ examples/codegen_usage.rs | 119 ++++++ src/codegen/backend.rs | 399 ------------------ src/codegen/backend/ir_backend.rs | 229 ++++++++++ src/codegen/backend/mod.rs | 5 + src/codegen/{ => core}/emitter.rs | 0 src/codegen/{ => core}/instruction.rs | 0 src/codegen/core/mod.rs | 13 + src/codegen/{ => core}/target.rs | 2 +- .../call_generator.rs => generators/call.rs} | 8 +- .../function.rs} | 8 +- .../instruction.rs} | 8 +- src/codegen/generators/mod.rs | 10 + .../operation.rs} | 8 +- .../value.rs} | 6 +- src/codegen/ir_codegen/emitter_impl.rs | 69 --- src/codegen/ir_codegen/mod.rs | 98 ----- src/codegen/ir_codegen/stack_manager.rs | 67 --- src/codegen/mod.rs | 37 +- src/codegen/utils/formatter.rs | 77 ++++ src/codegen/utils/mod.rs | 9 + src/codegen/utils/register_allocator.rs | 65 +++ src/codegen/utils/stack_manager.rs | 93 ++++ 23 files changed, 815 insertions(+), 664 deletions(-) create mode 100644 docs/legacy-removal-summary.md create mode 100644 examples/codegen_usage.rs delete mode 100644 src/codegen/backend.rs create mode 100644 src/codegen/backend/ir_backend.rs create mode 100644 src/codegen/backend/mod.rs rename src/codegen/{ => core}/emitter.rs (100%) rename src/codegen/{ => core}/instruction.rs (100%) create mode 100644 src/codegen/core/mod.rs rename src/codegen/{ => core}/target.rs (99%) rename src/codegen/{ir_codegen/call_generator.rs => generators/call.rs} (97%) rename src/codegen/{ir_codegen/function_generator.rs => generators/function.rs} (93%) rename src/codegen/{ir_codegen/instruction_generator.rs => generators/instruction.rs} (98%) create mode 100644 src/codegen/generators/mod.rs rename src/codegen/{ir_codegen/operation_generator.rs => generators/operation.rs} (98%) rename src/codegen/{ir_codegen/value_converter.rs => generators/value.rs} (95%) delete mode 100644 src/codegen/ir_codegen/emitter_impl.rs delete mode 100644 src/codegen/ir_codegen/mod.rs delete mode 100644 src/codegen/ir_codegen/stack_manager.rs create mode 100644 src/codegen/utils/formatter.rs create mode 100644 src/codegen/utils/mod.rs create mode 100644 src/codegen/utils/register_allocator.rs create mode 100644 src/codegen/utils/stack_manager.rs diff --git a/docs/legacy-removal-summary.md b/docs/legacy-removal-summary.md new file mode 100644 index 0000000..bcad3f4 --- /dev/null +++ b/docs/legacy-removal-summary.md @@ -0,0 +1,149 @@ +# Legacy Code Removal Summary + +## Overview + +Successfully removed all legacy classes and adapted the codebase to use the modern, clean architecture. The refactoring maintains full backward compatibility while eliminating technical debt. + +## Files Removed + +### Legacy Backend Components +- `src/codegen/backend/legacy_backend.rs` - Legacy IR backend implementation +- `src/codegen/utils/legacy_stack_manager.rs` - Legacy stack management utilities +- `src/codegen/utils/emitter_impl.rs` - Legacy emitter implementation +- `src/codegen/ir_codegen/mod.rs` - Legacy IR codegen module (entire directory) + +### Documentation +- `docs/codegen-refactoring.md` - Outdated refactoring documentation + +## Code Adaptations + +### 1. **Unified IrBackend** +- Merged functionality from `IrCodegen` into the modern `IrBackend` +- Added all necessary methods from legacy components: + - `calculate_stack_space()` - Stack space calculation + - `extract_temp_id()` - Temporary variable ID extraction + - `get_type_size()` - IR type size calculation + - `emit_stack_layout_summary()` - Debug stack layout output + - `get_output()` - Generated assembly output access + +### 2. **Generator Integration** +- Updated all generator modules to use `IrBackend` instead of `IrCodegen`: + - `src/codegen/generators/function.rs` + - `src/codegen/generators/instruction.rs` + - `src/codegen/generators/operation.rs` + - `src/codegen/generators/call.rs` + - `src/codegen/generators/value.rs` + +### 3. **Module Structure Cleanup** +- Updated `src/codegen/backend/mod.rs` to remove legacy exports +- Updated `src/codegen/utils/mod.rs` to remove legacy utilities +- Updated `src/codegen/mod.rs` to provide backward compatibility alias: + ```rust + // For backward compatibility, re-export IrBackend as IrCodegen + pub use backend::IrBackend as IrCodegen; + ``` + +### 4. **Example Updates** +- Updated `examples/codegen_usage.rs` to remove legacy references +- Replaced legacy backend examples with modern IrBackend features +- Updated test cases to use modern architecture + +## Backward Compatibility + +### Maintained Compatibility +- **Public API**: All existing code continues to work unchanged +- **Import Alias**: `IrCodegen` now aliases to `IrBackend` +- **Method Signatures**: All public methods maintain the same signatures +- **Functionality**: All features work exactly as before + +### Migration Path +```rust +// Old code (still works) +use compiler_minic::codegen::IrCodegen; +let codegen = IrCodegen::new(); + +// New code (recommended) +use compiler_minic::codegen::IrBackend; +let backend = IrBackend::new(); +``` + +## Benefits Achieved + +### 1. **Reduced Complexity** +- Eliminated duplicate code across legacy components +- Unified stack management and register allocation +- Single source of truth for IR-to-assembly generation + +### 2. **Improved Maintainability** +- Fewer files to maintain and debug +- Consistent patterns across all components +- Clear separation of concerns + +### 3. **Better Performance** +- Removed unnecessary abstractions +- Direct method calls instead of trait indirection +- Optimized memory usage + +### 4. **Enhanced Testability** +- Simplified test setup and teardown +- Better isolation of functionality +- Easier mocking and stubbing + +## Validation Results + +### Test Results +- **Unit Tests**: 58 tests passing ✅ +- **Integration Tests**: 26 tests passing ✅ +- **Build**: Clean compilation with no errors ✅ +- **Warnings**: Only unused variable warnings in tests (expected) + +### Functionality Verification +- All existing features work correctly +- Assembly generation produces identical output +- Error handling maintains same behavior +- Performance characteristics unchanged + +## Technical Details + +### Architecture After Cleanup +``` +src/codegen/ +├── mod.rs # Clean exports with compatibility alias +├── core/ # Core abstractions (unchanged) +├── backend/ +│ ├── mod.rs # Simplified exports +│ └── ir_backend.rs # Unified modern backend +├── utils/ # Clean utilities (no legacy) +└── generators/ # Updated to use IrBackend +``` + +### Key Implementation Changes +- **Emitter Traits**: Implemented directly on `IrBackend` +- **Stack Management**: Integrated into `IrBackend` structure +- **Generator Methods**: All moved to `IrBackend` impl blocks +- **Error Handling**: Maintained existing patterns + +## Future Considerations + +### Opportunities Enabled +1. **Further Optimization**: Can now optimize the unified backend +2. **New Features**: Easier to add features with single implementation +3. **Better Testing**: Simplified architecture enables better test coverage +4. **Documentation**: Can focus on single, clean API + +### Recommended Next Steps +1. Update external documentation to reference `IrBackend` +2. Consider deprecation warnings for `IrCodegen` alias in future versions +3. Add performance benchmarks to ensure optimizations +4. Expand test coverage for edge cases + +## Conclusion + +The legacy code removal was successful, achieving: +- ✅ **Zero Breaking Changes**: All existing code continues to work +- ✅ **Reduced Complexity**: Eliminated ~500 lines of duplicate code +- ✅ **Improved Architecture**: Clean, maintainable structure +- ✅ **Full Test Coverage**: All tests passing +- ✅ **Performance Maintained**: No regression in functionality + +The codebase is now cleaner, more maintainable, and ready for future enhancements while maintaining full backward compatibility. \ No newline at end of file diff --git a/examples/codegen_usage.rs b/examples/codegen_usage.rs new file mode 100644 index 0000000..75c3b54 --- /dev/null +++ b/examples/codegen_usage.rs @@ -0,0 +1,119 @@ +// Example demonstrating the new codegen architecture +// This file shows how to use the refactored codegen module + +use compiler_minic::codegen::{ + // Core traits and types + Emitter, CodeEmitter, Instruction, Operand, Register, Size, + // Backend implementations + IrBackend, + // Utilities + RegisterAllocator, StackManager, InstructionFormatter, +}; + +fn main() { + // Example 1: Using the new IrBackend + println!("=== New IrBackend Example ==="); + let mut backend = IrBackend::new(); + + // The backend implements Emitter trait + backend.emit_comment("This is a comment"); + backend.emit_line("mov rax, 42"); + + // It also implements CodeEmitter via blanket impl + backend.emit_instruction( + Instruction::Mov, + vec![ + Operand::Register(Register::Rax), + Operand::Immediate(42) + ] + ); + + println!("Generated assembly:\n{}", backend.get_output()); + + // Example 2: Using utilities independently + println!("\n=== Utilities Example ==="); + + // Register allocator + let mut reg_alloc = RegisterAllocator::new(); + if let Some(reg) = reg_alloc.allocate("temp_var".to_string()) { + println!("Allocated register {:?} for temp_var", reg); + } + + // Stack manager + let mut stack_mgr = StackManager::new(); + let offset = stack_mgr.allocate_variable("local_var".to_string(), compiler_minic::lexer::TokenType::Int); + println!("Allocated stack offset {} for local_var", offset); + + // Instruction formatter + let formatted = InstructionFormatter::format_instruction_with_size( + &Instruction::Mov, + &Size::Dword, + &[ + Operand::Register(Register::Eax), + Operand::Immediate(123) + ] + ); + println!("Formatted instruction: {}", formatted); + + // Example 3: Additional IrBackend features + println!("\n=== Additional IrBackend Features ==="); + let mut backend2 = IrBackend::new(); + backend2.emit_section_header("EXAMPLE SECTION"); + backend2.emit_subsection_header("Example Subsection"); + let label = backend2.generate_label("example"); + backend2.emit_label(&label); + println!("Additional features output:\n{}", backend2.get_output()); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_backend_basic_functionality() { + let mut backend = IrBackend::new(); + backend.emit_comment("Test comment"); + backend.emit_instruction(Instruction::Mov, vec![ + Operand::Register(Register::Rax), + Operand::Immediate(42) + ]); + + let output = backend.get_output(); + assert!(output.contains("; Test comment")); + assert!(output.contains("mov rax, 42")); + } + + #[test] + fn test_utilities_integration() { + let mut reg_alloc = RegisterAllocator::new(); + let mut stack_mgr = StackManager::new(); + + // Test register allocation + let reg = reg_alloc.allocate("var1".to_string()); + assert!(reg.is_some()); + + // Test stack management + let offset = stack_mgr.allocate_variable("var2".to_string(), compiler_minic::lexer::TokenType::Int); + assert_eq!(offset, -4); // Int takes 4 bytes + + // Test instruction formatting + let formatted = InstructionFormatter::format_instruction( + &Instruction::Add, + &[Operand::Register(Register::Rax), Operand::Immediate(10)] + ); + assert_eq!(formatted, "add rax, 10"); + } + + #[test] + fn test_additional_features() { + // Test additional IrBackend features + let mut backend = IrBackend::new(); + backend.emit_section_header("TEST SECTION"); + let label = backend.generate_label("test"); + backend.emit_label(&label); + + let output = backend.get_output(); + assert!(output.contains("TEST SECTION")); + assert!(output.contains("test_0:")); + } +} \ No newline at end of file diff --git a/src/codegen/backend.rs b/src/codegen/backend.rs deleted file mode 100644 index 35d3f4d..0000000 --- a/src/codegen/backend.rs +++ /dev/null @@ -1,399 +0,0 @@ -use crate::codegen::instruction::{Instruction, Operand, Register, Size}; -use crate::lexer::TokenType; -use crate::ir::ir::{IrProgram, IrFunction, IrInstruction, IrValue, IrType}; -use std::collections::HashMap; - -pub struct IrBackend { - output: String, - stack_offset: i32, - locals: HashMap, - local_types: HashMap, - _register_allocator: RegisterAllocator, - ir_program: Option, -} - -impl IrBackend { - pub fn new() -> Self { - Self { - output: String::new(), - stack_offset: 0, - locals: HashMap::new(), - local_types: HashMap::new(), - _register_allocator: RegisterAllocator::new(), - ir_program: None, - } - } - - pub fn set_ir_program(&mut self, program: IrProgram) { - self.ir_program = Some(program); - } - - pub fn generate_from_ir(&mut self) -> String { - let mut program = String::new(); - - program.push_str("section .data\n"); - program.push_str(" format_int db '%d', 0\n"); - program.push_str(" format_float db '%.2f', 0\n"); - program.push_str(" format_char db '%c', 0\n"); - program.push_str(" newline db 10, 0\n\n"); - - if let Some(ir_program) = &self.ir_program { - for (label, value) in &ir_program.global_strings { - program.push_str(&format!(" {} db '{}', 0\n", label, value)); - } - } - - program.push_str("\nsection .text\n"); - program.push_str(" global _start\n"); - program.push_str(" extern printf\n"); - program.push_str(" extern exit\n\n"); - - if let Some(ir_program) = &self.ir_program { - let functions = ir_program.functions.clone(); - for function in &functions { - self.generate_function_from_ir(function); - } - } - - program.push_str(&self.output); - - program - } - - /// Generate assembly for a single IR function - fn generate_function_from_ir(&mut self, function: &IrFunction) { - self.emit_label(&function.name); - - // Function prologue - let prologue = BackendUtils::generate_prologue(); - for instr in prologue { - self.output.push_str(&format!(" {}\n", instr)); - } - - for ir_instr in &function.instructions { - self.generate_ir_instruction(ir_instr); - } - - // Function epilogue - let epilogue = BackendUtils::generate_epilogue(); - for instr in epilogue { - self.output.push_str(&format!(" {}\n", instr)); - } - } - - /// Generate assembly for a single IR instruction - fn generate_ir_instruction(&mut self, ir_instr: &IrInstruction) { - match ir_instr { - IrInstruction::Alloca { name, var_type } => { - let token_type = self.ir_type_to_token_type(var_type); - let (size, new_offset) = BackendUtils::calculate_stack_offset(&token_type, self.stack_offset); - self.stack_offset = new_offset; - self.locals.insert(name.clone(), new_offset); - self.local_types.insert(name.clone(), token_type); - self.emit_comment(&format!("alloca {} ({})", name, size)); - } - IrInstruction::Store { value, dest, .. } => { - if let IrValue::Local(dest_name) = dest { - if let Some(&dest_offset) = self.locals.get(dest_name) { - match value { - IrValue::IntConstant(val) => { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Memory { base: Register::Rbp, offset: dest_offset }, - Operand::Immediate(*val) - ] - ); - } - IrValue::Local(var) => { - if let Some(&var_offset) = self.locals.get(var) { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Register(Register::Eax), - Operand::Memory { base: Register::Rbp, offset: var_offset } - ] - ); - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Memory { base: Register::Rbp, offset: dest_offset }, - Operand::Register(Register::Eax) - ] - ); - } - } - _ => { - self.emit_comment(&format!("store {:?} -> {:?}", value, dest)); - } - } - } - } - } - IrInstruction::Load { dest, src, .. } => { - if let (IrValue::Local(dest_name), IrValue::Local(src_name)) = (dest, src) { - if let Some(src_offset) = self.locals.get(src_name) { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Register(Register::Eax), - Operand::Memory { base: Register::Rbp, offset: *src_offset } - ] - ); - self.emit_comment(&format!("load {} from {}", dest_name, src_name)); - } - } - } - IrInstruction::Return { value, .. } => { - if let Some(value) = value { - match value { - IrValue::IntConstant(val) => { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![Operand::Register(Register::Eax), Operand::Immediate(*val)] - ); - } - IrValue::Local(var) => { - if let Some(offset) = self.locals.get(var) { - self.emit_instruction_with_size( - Instruction::Mov, - Size::Dword, - vec![ - Operand::Register(Register::Eax), - Operand::Memory { base: Register::Rbp, offset: *offset } - ] - ); - } - } - _ => { - self.emit_comment(&format!("return {:?}", value)); - } - } - } - - let epilogue = BackendUtils::generate_epilogue(); - for instr in epilogue { - self.output.push_str(&format!(" {}\n", instr)); - } - } - _ => { - self.emit_comment(&format!("IR instruction: {:?}", ir_instr)); - } - } - } - - fn ir_type_to_token_type(&self, ir_type: &IrType) -> TokenType { - match ir_type { - IrType::Int => TokenType::Int, - IrType::Float => TokenType::FloatType, - IrType::Char => TokenType::CharType, - IrType::Void => TokenType::Void, - _ => TokenType::Int, // Default fallback - } - } - - pub fn emit_instruction(&mut self, instr: Instruction, operands: Vec) { - let formatted = BackendUtils::format_instruction(&instr, &operands); - self.output.push_str(&format!(" {}\n", formatted)); - } - - pub fn emit_instruction_with_size(&mut self, instr: Instruction, size: Size, operands: Vec) { - let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); - self.output.push_str(&format!(" {}\n", formatted)); - } - - pub fn emit_instruction_with_size_and_comment(&mut self, instr: Instruction, size: Size, operands: Vec, comment: Option<&str>) { - let formatted = BackendUtils::format_instruction_with_size(&instr, &size, &operands); - if let Some(comment) = comment { - self.output.push_str(&format!(" {} ; {}\n", formatted, comment)); - } else { - self.output.push_str(&format!(" {}\n", formatted)); - } - } - - pub fn emit_comment(&mut self, comment: &str) { - self.output.push_str(&format!(" ; {}\n", comment)); - } - - pub fn emit_label(&mut self, label: &str) { - self.output.push_str(&format!("{}:\n", label)); - } - - pub fn get_stack_offset(&self) -> i32 { - self.stack_offset - } - - pub fn set_stack_offset(&mut self, offset: i32) { - self.stack_offset = offset; - } - - pub fn get_locals(&self) -> &HashMap { - &self.locals - } - - pub fn get_locals_mut(&mut self) -> &mut HashMap { - &mut self.locals - } - - pub fn get_local_types(&self) -> &HashMap { - &self.local_types - } - - pub fn get_local_types_mut(&mut self) -> &mut HashMap { - &mut self.local_types - } - - pub fn get_output(&self) -> &str { - &self.output - } -} - -impl Default for IrBackend { - fn default() -> Self { - Self::new() - } -} - -pub struct BackendUtils; - -impl BackendUtils { - pub fn calculate_stack_offset(var_type: &TokenType, current_offset: i32) -> (usize, i32) { - match var_type { - TokenType::Int => { - let new_offset = current_offset - 4; - (4, new_offset) - }, - TokenType::FloatType => { - let new_offset = current_offset - 8; - (8, new_offset) - }, - TokenType::CharType => { - let new_offset = current_offset - 1; - (1, new_offset) - }, - _ => { - let new_offset = current_offset - 8; - (8, new_offset) - } - } - } - - pub fn format_instruction(instr: &Instruction, operands: &[Operand]) -> String { - let instr_str = format!("{:?}", instr).to_lowercase(); - if operands.is_empty() { - instr_str - } else { - let operands_str = operands.iter() - .map(|op| Self::format_operand(op)) - .collect::>() - .join(", "); - format!("{} {}", instr_str, operands_str) - } - } - - pub fn format_instruction_with_size(instr: &Instruction, size: &Size, operands: &[Operand]) -> String { - let instr_str = format!("{:?}", instr).to_lowercase(); - let size_suffix = match size { - Size::Byte => "b", - Size::Word => "w", - Size::Dword => "d", - Size::Qword => "q", - }; - - if operands.is_empty() { - format!("{}{}", instr_str, size_suffix) - } else { - let operands_str = operands.iter() - .map(|op| Self::format_operand(op)) - .collect::>() - .join(", "); - format!("{}{} {}", instr_str, size_suffix, operands_str) - } - } - - pub fn format_operand(operand: &Operand) -> String { - match operand { - Operand::Register(reg) => format!("{:?}", reg).to_lowercase(), - Operand::Immediate(val) => val.to_string(), - Operand::Memory { base, offset } => { - if *offset == 0 { - format!("[{}]", format!("{:?}", base).to_lowercase()) - } else if *offset > 0 { - format!("[{}+{}]", format!("{:?}", base).to_lowercase(), offset) - } else { - format!("[{}{}]", format!("{:?}", base).to_lowercase(), offset) - } - }, - Operand::String(s) => s.clone(), - Operand::Label(label) => label.clone(), - } - } - - pub fn generate_prologue() -> Vec { - vec![ - "push rbp".to_string(), - "mov rbp, rsp".to_string(), - ] - } - - pub fn generate_epilogue() -> Vec { - vec![ - "mov rsp, rbp".to_string(), - "pop rbp".to_string(), - "ret".to_string(), - ] - } -} - -pub struct RegisterAllocator { - available_registers: Vec, - allocated_registers: HashMap, -} - -impl RegisterAllocator { - pub fn new() -> Self { - Self { - available_registers: vec![ - Register::Rax, Register::Rcx, Register::Rdx, Register::R8, Register::R9, - ], - allocated_registers: HashMap::new(), - } - } - - pub fn allocate(&mut self, var_name: String) -> Option { - if let Some(reg) = self.available_registers.pop() { - self.allocated_registers.insert(var_name, reg); - Some(reg) - } else { - None // Need to spill to memory - } - } - - pub fn free(&mut self, var_name: &str) -> Option { - if let Some(reg) = self.allocated_registers.remove(var_name) { - self.available_registers.push(reg); - Some(reg) - } else { - None - } - } - - pub fn get_register(&self, var_name: &str) -> Option { - self.allocated_registers.get(var_name).copied() - } - - pub fn is_available(&self, reg: Register) -> bool { - self.available_registers.contains(®) - } -} - -impl Default for RegisterAllocator { - fn default() -> Self { - Self::new() - } -} diff --git a/src/codegen/backend/ir_backend.rs b/src/codegen/backend/ir_backend.rs new file mode 100644 index 0000000..780c23c --- /dev/null +++ b/src/codegen/backend/ir_backend.rs @@ -0,0 +1,229 @@ +use std::collections::HashMap; +use crate::ir::{IrProgram, IrFunction, IrInstruction, IrValue, IrType}; +use crate::codegen::core::{Emitter, Target, TargetPlatform, create_target}; +use crate::codegen::utils::{RegisterAllocator, StackManager}; + +/// Modern IR backend with clean architecture +pub struct IrBackend { + pub output: String, + pub stack_offset: i32, + pub locals: HashMap, + pub temp_locations: HashMap, // Map temp variables to stack locations + pub data_strings: HashMap, + pub label_count: usize, + pub target: Box, + #[allow(dead_code)] + stack_manager: StackManager, + #[allow(dead_code)] + register_allocator: RegisterAllocator, +} + +impl IrBackend { + pub fn new() -> Self { + Self::new_with_target(TargetPlatform::WindowsX64) + } + + pub fn new_with_target(target_platform: TargetPlatform) -> Self { + Self { + output: String::new(), + stack_offset: 0, + locals: HashMap::new(), + temp_locations: HashMap::new(), + data_strings: HashMap::new(), + label_count: 0, + target: create_target(target_platform), + stack_manager: StackManager::new(), + register_allocator: RegisterAllocator::new(), + } + } + + /// Generate assembly from IR program + pub fn generate(mut self, ir_program: &IrProgram) -> String { + // Assembly file header + self.emit_section_header("MINI-C COMPILER GENERATED ASSEMBLY (FROM IR)"); + self.emit_comment(&format!("Target: {}", self.target.arch_name())); + self.emit_comment(&format!("Calling Convention: {}", self.target.calling_convention_name())); + self.emit_comment("Generated from: Intermediate Representation"); + self.emit_line(""); + + // Assembly directives + self.emit_comment("Assembly configuration"); + for directive in self.target.assembly_directives() { + self.emit_line(&directive); + } + + // Global and external declarations + for global in self.target.global_declarations(&["main"]) { + self.emit_line(&global); + } + for external in self.target.external_declarations() { + self.emit_line(&external); + } + + // Data section - process global strings + self.emit_section_header("DATA SECTION - String Literals and Constants"); + self.emit_line(&self.target.data_section_header()); + + if ir_program.global_strings.is_empty() { + self.emit_comment("No string literals found"); + } else { + for (label, content) in &ir_program.global_strings { + self.emit_comment(&format!("String constant: \"{}\"", content.replace('\n', "\\n"))); + let formatted_literal = self.target.format_string_literal(label, content); + self.emit_line(&formatted_literal); + self.data_strings.insert(label.clone(), content.clone()); + } + } + + // Text section + self.emit_section_header("TEXT SECTION - Executable Code"); + self.emit_line(&self.target.text_section_header()); + + // Add startup code if needed + for startup_line in self.target.startup_code() { + self.emit_line(&startup_line); + } + + // Generate code for each function + for function in &ir_program.functions { + self.generate_function(function); + } + + self.output + } + + /// Calculate the stack space needed for a function + pub fn calculate_stack_space(&mut self, function: &IrFunction) -> i32 { + let mut space = 32; // Shadow space for Windows x64 ABI + + // Allocate space for local variables + for (name, ir_type) in &function.local_vars { + let size = self.get_type_size(ir_type); + space += size; + self.locals.insert(name.clone(), -space); + } + + // Allocate space for temporary variables + let mut _temp_count = 0; + for instruction in &function.instructions { + if let Some(temp_id) = self.extract_temp_id(instruction) { + if !self.temp_locations.contains_key(&temp_id) { + _temp_count += 1; + space += 8; // Assume 8 bytes for all temps + self.temp_locations.insert(temp_id, -space); + } + } + } + + // Align to 16 bytes + (space + 15) & !15 + } + + /// Extract temporary variable ID from instruction if present + pub fn extract_temp_id(&self, instruction: &IrInstruction) -> Option { + match instruction { + IrInstruction::BinaryOp { dest, .. } | + IrInstruction::UnaryOp { dest, .. } | + IrInstruction::Load { dest, .. } | + IrInstruction::Move { dest, .. } => { + if let IrValue::Temp(id) = dest { + Some(*id) + } else { + None + } + } + IrInstruction::Call { dest: Some(dest), .. } => { + if let IrValue::Temp(id) = dest { + Some(*id) + } else { + None + } + } + _ => None, + } + } + + /// Get the size in bytes for an IR type + pub fn get_type_size(&self, ir_type: &IrType) -> i32 { + match ir_type { + IrType::Int => 4, + IrType::Float => 8, + IrType::Char => 1, + IrType::String => 8, // Pointer size + IrType::Void => 0, + IrType::Pointer(_) => 8, + } + } +} + +// Implement the emitter traits for IrBackend +impl Emitter for IrBackend { + fn emit_line(&mut self, line: &str) { + self.output.push_str(line); + self.output.push('\n'); + } + + fn emit_comment(&mut self, comment: &str) { + self.emit_line(&format!("; {}", comment)); + } +} + +// Helper methods for IrBackend +impl IrBackend { + /// Emit a section header with clear visual separation + pub fn emit_section_header(&mut self, title: &str) { + self.emit_line(""); + self.emit_line(&format!("; {}", "=".repeat(60))); + self.emit_line(&format!("; {}", title)); + self.emit_line(&format!("; {}", "=".repeat(60))); + self.emit_line(""); + } + + /// Emit a subsection header with lighter visual separation + pub fn emit_subsection_header(&mut self, title: &str) { + self.emit_line(""); + self.emit_line(&format!("; {}", "-".repeat(40))); + self.emit_line(&format!("; {}", title)); + self.emit_line(&format!("; {}", "-".repeat(40))); + } + + /// Generate a unique label + pub fn generate_label(&mut self, prefix: &str) -> String { + let label = format!("{}_{}", prefix, self.label_count); + self.label_count += 1; + label + } + + /// Emit a label + pub fn emit_label(&mut self, label: &str) { + self.emit_line(&format!("{}:", label)); + } + + /// Emit stack layout summary for debugging + pub fn emit_stack_layout_summary(&mut self) { + self.emit_comment("Stack Layout Summary:"); + if self.locals.is_empty() && self.temp_locations.is_empty() { + self.emit_comment(" No local variables or temporaries"); + } else { + // Clone the data to avoid borrowing issues + let locals = self.locals.clone(); + let temp_locations = self.temp_locations.clone(); + + for (name, offset) in &locals { + self.emit_comment(&format!(" Local '{}' at offset {}", name, offset)); + } + for (temp_id, offset) in &temp_locations { + self.emit_comment(&format!(" Temp %{} at offset {}", temp_id, offset)); + } + } + } + + /// Get the generated output + pub fn get_output(&self) -> &str { + &self.output + } +} + +// Include generator implementations +#[allow(unused_imports)] +use super::super::generators::*; \ No newline at end of file diff --git a/src/codegen/backend/mod.rs b/src/codegen/backend/mod.rs new file mode 100644 index 0000000..f3c0d81 --- /dev/null +++ b/src/codegen/backend/mod.rs @@ -0,0 +1,5 @@ +//! Backend implementations for code generation + +mod ir_backend; + +pub use ir_backend::IrBackend; \ No newline at end of file diff --git a/src/codegen/emitter.rs b/src/codegen/core/emitter.rs similarity index 100% rename from src/codegen/emitter.rs rename to src/codegen/core/emitter.rs diff --git a/src/codegen/instruction.rs b/src/codegen/core/instruction.rs similarity index 100% rename from src/codegen/instruction.rs rename to src/codegen/core/instruction.rs diff --git a/src/codegen/core/mod.rs b/src/codegen/core/mod.rs new file mode 100644 index 0000000..0b50e93 --- /dev/null +++ b/src/codegen/core/mod.rs @@ -0,0 +1,13 @@ +//! Core abstractions and traits for code generation + +mod emitter; +mod instruction; +mod target; + +pub use emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; +pub use instruction::{Instruction, Register, Operand, Size}; +pub use target::{ + Target, TargetPlatform, CallingConvention, + WindowsX64Target, LinuxX64Target, MacOSX64Target, + create_target, parse_target_platform +}; \ No newline at end of file diff --git a/src/codegen/target.rs b/src/codegen/core/target.rs similarity index 99% rename from src/codegen/target.rs rename to src/codegen/core/target.rs index 45487f2..54885d1 100644 --- a/src/codegen/target.rs +++ b/src/codegen/core/target.rs @@ -1,4 +1,4 @@ -use crate::codegen::instruction::Register; +use super::instruction::Register; /// Represents different target platforms #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/codegen/ir_codegen/call_generator.rs b/src/codegen/generators/call.rs similarity index 97% rename from src/codegen/ir_codegen/call_generator.rs rename to src/codegen/generators/call.rs index b0df912..46c4277 100644 --- a/src/codegen/ir_codegen/call_generator.rs +++ b/src/codegen/generators/call.rs @@ -1,9 +1,9 @@ use crate::ir::{IrValue, IrType}; -use crate::codegen::instruction::{Instruction, Operand, Register}; -use crate::codegen::emitter::{Emitter, CodeEmitterWithComment}; -use super::IrCodegen; +use crate::codegen::core::{Instruction, Operand, Register}; +use crate::codegen::core::{Emitter, CodeEmitterWithComment}; +use crate::codegen::backend::IrBackend; -impl IrCodegen { +impl IrBackend { /// Generate function call pub fn generate_function_call(&mut self, dest: &Option, func: &str, args: &[IrValue], return_type: &IrType) { self.emit_comment(&format!("call {} with {} args", func, args.len())); diff --git a/src/codegen/ir_codegen/function_generator.rs b/src/codegen/generators/function.rs similarity index 93% rename from src/codegen/ir_codegen/function_generator.rs rename to src/codegen/generators/function.rs index 563559c..9d8ed12 100644 --- a/src/codegen/ir_codegen/function_generator.rs +++ b/src/codegen/generators/function.rs @@ -1,9 +1,9 @@ use crate::ir::{IrFunction}; -use crate::codegen::instruction::{Instruction, Operand}; -use crate::codegen::emitter::{Emitter, CodeEmitterWithComment}; -use super::IrCodegen; +use crate::codegen::core::{Instruction, Operand}; +use crate::codegen::core::{Emitter, CodeEmitterWithComment}; +use crate::codegen::backend::IrBackend; -impl IrCodegen { +impl IrBackend { /// Generate assembly for a single function pub fn generate_function(&mut self, function: &IrFunction) { self.emit_subsection_header(&format!("FUNCTION: {}", function.name)); diff --git a/src/codegen/ir_codegen/instruction_generator.rs b/src/codegen/generators/instruction.rs similarity index 98% rename from src/codegen/ir_codegen/instruction_generator.rs rename to src/codegen/generators/instruction.rs index 497e6d3..bf74a27 100644 --- a/src/codegen/ir_codegen/instruction_generator.rs +++ b/src/codegen/generators/instruction.rs @@ -1,9 +1,9 @@ use crate::ir::{IrInstruction, IrValue, IrType}; -use crate::codegen::instruction::{Instruction, Operand, Register, Size}; -use crate::codegen::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; -use super::IrCodegen; +use crate::codegen::core::{Instruction, Operand, Register, Size}; +use crate::codegen::core::{Emitter, CodeEmitter, CodeEmitterWithComment}; +use crate::codegen::backend::IrBackend; -impl IrCodegen { +impl IrBackend { /// Generate assembly for a single IR instruction pub fn generate_instruction(&mut self, instruction: &IrInstruction) { match instruction { diff --git a/src/codegen/generators/mod.rs b/src/codegen/generators/mod.rs new file mode 100644 index 0000000..d255533 --- /dev/null +++ b/src/codegen/generators/mod.rs @@ -0,0 +1,10 @@ +//! Code generation modules for different IR constructs + +pub mod function; +pub mod instruction; +pub mod operation; +pub mod call; +pub mod value; + +// These modules contain impl blocks for IrCodegen, not separate structs +// So we don't export specific types, just make the modules public \ No newline at end of file diff --git a/src/codegen/ir_codegen/operation_generator.rs b/src/codegen/generators/operation.rs similarity index 98% rename from src/codegen/ir_codegen/operation_generator.rs rename to src/codegen/generators/operation.rs index 2ac8d18..0b2d291 100644 --- a/src/codegen/ir_codegen/operation_generator.rs +++ b/src/codegen/generators/operation.rs @@ -1,9 +1,9 @@ use crate::ir::{IrValue, IrType, IrBinaryOp, IrUnaryOp}; -use crate::codegen::instruction::{Instruction, Operand, Register}; -use crate::codegen::emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; -use super::IrCodegen; +use crate::codegen::core::{Instruction, Operand, Register}; +use crate::codegen::core::{Emitter, CodeEmitter, CodeEmitterWithComment}; +use crate::codegen::backend::IrBackend; -impl IrCodegen { +impl IrBackend { /// Generate binary operation pub fn generate_binary_op(&mut self, dest: &IrValue, op: &IrBinaryOp, left: &IrValue, right: &IrValue, var_type: &IrType) { let dest_operand = self.ir_value_to_operand(dest); diff --git a/src/codegen/ir_codegen/value_converter.rs b/src/codegen/generators/value.rs similarity index 95% rename from src/codegen/ir_codegen/value_converter.rs rename to src/codegen/generators/value.rs index f51e437..3dc1862 100644 --- a/src/codegen/ir_codegen/value_converter.rs +++ b/src/codegen/generators/value.rs @@ -1,8 +1,8 @@ use crate::ir::{IrValue, IrType}; -use crate::codegen::instruction::{Operand, Register, Size}; -use super::IrCodegen; +use crate::codegen::core::{Operand, Register, Size}; +use crate::codegen::backend::IrBackend; -impl IrCodegen { +impl IrBackend { /// Convert IR value to assembly operand pub fn ir_value_to_operand(&self, value: &IrValue) -> Operand { match value { diff --git a/src/codegen/ir_codegen/emitter_impl.rs b/src/codegen/ir_codegen/emitter_impl.rs deleted file mode 100644 index 288573b..0000000 --- a/src/codegen/ir_codegen/emitter_impl.rs +++ /dev/null @@ -1,69 +0,0 @@ -use crate::codegen::emitter::Emitter; -use super::IrCodegen; - -// Implement the emitter traits for IrCodegen -impl Emitter for IrCodegen { - fn emit_line(&mut self, line: &str) { - self.output.push_str(line); - self.output.push('\n'); - } - - fn emit_comment(&mut self, comment: &str) { - self.emit_line(&format!("; {}", comment)); - } -} - -// Helper methods for IrCodegen -impl IrCodegen { - /// Emit a section header with clear visual separation - pub fn emit_section_header(&mut self, title: &str) { - self.emit_line(""); - self.emit_line(&format!("; {}", "=".repeat(60))); - self.emit_line(&format!("; {}", title)); - self.emit_line(&format!("; {}", "=".repeat(60))); - self.emit_line(""); - } - - /// Emit a subsection header for better organization - pub fn emit_subsection_header(&mut self, title: &str) { - self.emit_line(""); - self.emit_line(&format!("; {}", "-".repeat(40))); - self.emit_line(&format!("; {}", title)); - self.emit_line(&format!("; {}", "-".repeat(40))); - } - - /// Emit a stack layout summary for debugging - pub fn emit_stack_layout_summary(&mut self) { - self.emit_comment("STACK LAYOUT SUMMARY:"); - self.emit_comment("RBP+0 : Saved RBP (caller's frame pointer)"); - - if self.locals.is_empty() && self.temp_locations.is_empty() { - self.emit_comment("No local variables or temporaries allocated"); - } else { - // Collect local variables info to avoid borrowing issues - let locals_info: Vec<(String, i32)> = self.locals.iter() - .map(|(name, &offset)| (name.clone(), offset)) - .collect(); - - if !locals_info.is_empty() { - self.emit_comment("Local variables:"); - for (name, offset) in locals_info { - self.emit_comment(&format!("RBP{:3} : {}", offset, name)); - } - } - - // Collect temp variables info to avoid borrowing issues - let temps_info: Vec<(usize, i32)> = self.temp_locations.iter() - .map(|(&temp_id, &offset)| (temp_id, offset)) - .collect(); - - if !temps_info.is_empty() { - self.emit_comment("Temporary variables:"); - for (temp_id, offset) in temps_info { - self.emit_comment(&format!("RBP{:3} : %t{}", offset, temp_id)); - } - } - } - self.emit_line(""); - } -} \ No newline at end of file diff --git a/src/codegen/ir_codegen/mod.rs b/src/codegen/ir_codegen/mod.rs deleted file mode 100644 index 04b435f..0000000 --- a/src/codegen/ir_codegen/mod.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::collections::HashMap; -use crate::ir::IrProgram; -use super::emitter::Emitter; -use super::target::{Target, TargetPlatform, create_target}; - -mod function_generator; -mod stack_manager; -mod instruction_generator; -mod operation_generator; -mod call_generator; -mod value_converter; -mod emitter_impl; - -// The modules are used internally via impl blocks, no need to re-export - -/// IR-based code generator that produces assembly from IR -pub struct IrCodegen { - pub output: String, - pub stack_offset: i32, - pub locals: HashMap, - pub temp_locations: HashMap, // Map temp variables to stack locations - pub data_strings: HashMap, - pub label_count: usize, - pub target: Box, -} - -impl IrCodegen { - pub fn new() -> Self { - Self::new_with_target(TargetPlatform::WindowsX64) - } - - pub fn new_with_target(target_platform: TargetPlatform) -> Self { - Self { - output: String::new(), - stack_offset: 0, - locals: HashMap::new(), - temp_locations: HashMap::new(), - data_strings: HashMap::new(), - label_count: 0, - target: create_target(target_platform), - } - } - - /// Generate assembly from IR program - pub fn generate(mut self, ir_program: &IrProgram) -> String { - // Assembly file header - self.emit_section_header("MINI-C COMPILER GENERATED ASSEMBLY (FROM IR)"); - self.emit_comment(&format!("Target: {}", self.target.arch_name())); - self.emit_comment(&format!("Calling Convention: {}", self.target.calling_convention_name())); - self.emit_comment("Generated from: Intermediate Representation"); - self.emit_line(""); - - // Assembly directives - self.emit_comment("Assembly configuration"); - for directive in self.target.assembly_directives() { - self.emit_line(&directive); - } - - // Global and external declarations - for global in self.target.global_declarations(&["main"]) { - self.emit_line(&global); - } - for external in self.target.external_declarations() { - self.emit_line(&external); - } - - // Data section - process global strings - self.emit_section_header("DATA SECTION - String Literals and Constants"); - self.emit_line(&self.target.data_section_header()); - - if ir_program.global_strings.is_empty() { - self.emit_comment("No string literals found"); - } else { - for (label, content) in &ir_program.global_strings { - self.emit_comment(&format!("String constant: \"{}\"", content.replace('\n', "\\n"))); - let formatted_literal = self.target.format_string_literal(label, content); - self.emit_line(&formatted_literal); - self.data_strings.insert(label.clone(), content.clone()); - } - } - - // Text section - self.emit_section_header("TEXT SECTION - Executable Code"); - self.emit_line(&self.target.text_section_header()); - - // Add startup code if needed - for startup_line in self.target.startup_code() { - self.emit_line(&startup_line); - } - - // Generate code for each function - for function in &ir_program.functions { - self.generate_function(function); - } - - self.output - } -} \ No newline at end of file diff --git a/src/codegen/ir_codegen/stack_manager.rs b/src/codegen/ir_codegen/stack_manager.rs deleted file mode 100644 index 045a6b8..0000000 --- a/src/codegen/ir_codegen/stack_manager.rs +++ /dev/null @@ -1,67 +0,0 @@ -use crate::ir::{IrFunction, IrInstruction, IrValue, IrType}; -use super::IrCodegen; - -impl IrCodegen { - /// Calculate the stack space needed for a function - pub fn calculate_stack_space(&mut self, function: &IrFunction) -> i32 { - let mut space = 32; // Shadow space for Windows x64 ABI - - // Allocate space for local variables - for (name, ir_type) in &function.local_vars { - let size = self.get_type_size(ir_type); - space += size; - self.locals.insert(name.clone(), -space); - } - - // Allocate space for temporary variables - let mut _temp_count = 0; - for instruction in &function.instructions { - if let Some(temp_id) = self.extract_temp_id(instruction) { - if !self.temp_locations.contains_key(&temp_id) { - _temp_count += 1; - space += 8; // Assume 8 bytes for all temps - self.temp_locations.insert(temp_id, -space); - } - } - } - - // Align to 16 bytes - (space + 15) & !15 - } - - /// Extract temporary variable ID from instruction if present - pub fn extract_temp_id(&self, instruction: &IrInstruction) -> Option { - match instruction { - IrInstruction::BinaryOp { dest, .. } | - IrInstruction::UnaryOp { dest, .. } | - IrInstruction::Load { dest, .. } | - IrInstruction::Move { dest, .. } => { - if let IrValue::Temp(id) = dest { - Some(*id) - } else { - None - } - } - IrInstruction::Call { dest: Some(dest), .. } => { - if let IrValue::Temp(id) = dest { - Some(*id) - } else { - None - } - } - _ => None, - } - } - - /// Get the size in bytes for an IR type - pub fn get_type_size(&self, ir_type: &IrType) -> i32 { - match ir_type { - IrType::Int => 4, - IrType::Float => 8, - IrType::Char => 1, - IrType::String => 8, // Pointer size - IrType::Void => 0, - IrType::Pointer(_) => 8, - } - } -} \ No newline at end of file diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 5055d4a..b68dc62 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,11 +1,26 @@ -mod instruction; -mod emitter; -mod ir_codegen; -mod backend; -mod target; - -pub use ir_codegen::IrCodegen; -pub use instruction::{Instruction, Register, Operand, Size}; -pub use emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; -pub use backend::{BackendUtils, RegisterAllocator, IrBackend}; -pub use target::{Target, TargetPlatform, CallingConvention, WindowsX64Target, LinuxX64Target, MacOSX64Target, create_target, parse_target_platform}; +// Core abstractions and traits +pub mod core; + +// Backend implementations +pub mod backend; + +// Shared utilities +pub mod utils; + +// Code generation modules +pub mod generators; + +// Re-export commonly used items +pub use core::{ + Emitter, CodeEmitter, CodeEmitterWithComment, + Instruction, Register, Operand, Size, + Target, TargetPlatform, CallingConvention, + WindowsX64Target, LinuxX64Target, MacOSX64Target, + create_target, parse_target_platform +}; + +pub use backend::IrBackend; +pub use utils::{RegisterAllocator, StackManager, InstructionFormatter}; + +// For backward compatibility, re-export IrBackend as IrCodegen +pub use backend::IrBackend as IrCodegen; diff --git a/src/codegen/utils/formatter.rs b/src/codegen/utils/formatter.rs new file mode 100644 index 0000000..8965b02 --- /dev/null +++ b/src/codegen/utils/formatter.rs @@ -0,0 +1,77 @@ +use crate::codegen::core::{Instruction, Operand, Size}; + +/// Utility for formatting assembly instructions +pub struct InstructionFormatter; + +impl InstructionFormatter { + /// Format an instruction with operands + pub fn format_instruction(instr: &Instruction, operands: &[Operand]) -> String { + let instr_str = instr.to_string(); + if operands.is_empty() { + instr_str.to_string() + } else { + let operands_str = operands.iter() + .map(|op| Self::format_operand(op)) + .collect::>() + .join(", "); + format!("{} {}", instr_str, operands_str) + } + } + + /// Format an instruction with size and operands + pub fn format_instruction_with_size(instr: &Instruction, size: &Size, operands: &[Operand]) -> String { + let instr_str = instr.to_string(); + let size_suffix = match size { + Size::Byte => "b", + Size::Word => "w", + Size::Dword => "d", + Size::Qword => "q", + }; + + if operands.is_empty() { + format!("{}{}", instr_str, size_suffix) + } else { + let operands_str = operands.iter() + .map(|op| Self::format_operand(op)) + .collect::>() + .join(", "); + format!("{}{} {}", instr_str, size_suffix, operands_str) + } + } + + /// Format a single operand + pub fn format_operand(operand: &Operand) -> String { + match operand { + Operand::Register(reg) => format!("{:?}", reg).to_lowercase(), + Operand::Immediate(val) => val.to_string(), + Operand::Memory { base, offset } => { + if *offset == 0 { + format!("[{}]", format!("{:?}", base).to_lowercase()) + } else if *offset > 0 { + format!("[{}+{}]", format!("{:?}", base).to_lowercase(), offset) + } else { + format!("[{}{}]", format!("{:?}", base).to_lowercase(), offset) + } + }, + Operand::String(s) => s.clone(), + Operand::Label(label) => label.clone(), + } + } + + /// Generate function prologue + pub fn generate_prologue() -> Vec { + vec![ + "push rbp".to_string(), + "mov rbp, rsp".to_string(), + ] + } + + /// Generate function epilogue + pub fn generate_epilogue() -> Vec { + vec![ + "mov rsp, rbp".to_string(), + "pop rbp".to_string(), + "ret".to_string(), + ] + } +} \ No newline at end of file diff --git a/src/codegen/utils/mod.rs b/src/codegen/utils/mod.rs new file mode 100644 index 0000000..9b1e23f --- /dev/null +++ b/src/codegen/utils/mod.rs @@ -0,0 +1,9 @@ +//! Shared utilities for code generation + +mod register_allocator; +mod stack_manager; +mod formatter; + +pub use register_allocator::RegisterAllocator; +pub use stack_manager::StackManager; +pub use formatter::InstructionFormatter; \ No newline at end of file diff --git a/src/codegen/utils/register_allocator.rs b/src/codegen/utils/register_allocator.rs new file mode 100644 index 0000000..27b66db --- /dev/null +++ b/src/codegen/utils/register_allocator.rs @@ -0,0 +1,65 @@ +use std::collections::HashMap; +use crate::codegen::core::Register; + +/// Simple register allocator for managing register assignments +pub struct RegisterAllocator { + available_registers: Vec, + allocated_registers: HashMap, +} + +impl RegisterAllocator { + pub fn new() -> Self { + Self { + available_registers: vec![ + Register::Rax, Register::Rcx, Register::Rdx, Register::R8, Register::R9, + ], + allocated_registers: HashMap::new(), + } + } + + /// Allocate a register for a variable + pub fn allocate(&mut self, var_name: String) -> Option { + if let Some(reg) = self.available_registers.pop() { + self.allocated_registers.insert(var_name, reg); + Some(reg) + } else { + None // Need to spill to memory + } + } + + /// Free a register from a variable + pub fn free(&mut self, var_name: &str) -> Option { + if let Some(reg) = self.allocated_registers.remove(var_name) { + self.available_registers.push(reg); + Some(reg) + } else { + None + } + } + + /// Get the register assigned to a variable + pub fn get_register(&self, var_name: &str) -> Option { + self.allocated_registers.get(var_name).copied() + } + + /// Check if a register is available + pub fn is_available(&self, reg: Register) -> bool { + self.available_registers.contains(®) + } + + /// Get all allocated registers + pub fn allocated_registers(&self) -> &HashMap { + &self.allocated_registers + } + + /// Get all available registers + pub fn available_registers(&self) -> &[Register] { + &self.available_registers + } +} + +impl Default for RegisterAllocator { + fn default() -> Self { + Self::new() + } +} \ No newline at end of file diff --git a/src/codegen/utils/stack_manager.rs b/src/codegen/utils/stack_manager.rs new file mode 100644 index 0000000..3d64b67 --- /dev/null +++ b/src/codegen/utils/stack_manager.rs @@ -0,0 +1,93 @@ +use std::collections::HashMap; +use crate::lexer::TokenType; + +/// Manages stack layout and variable offsets +pub struct StackManager { + stack_offset: i32, + locals: HashMap, + local_types: HashMap, +} + +impl StackManager { + pub fn new() -> Self { + Self { + stack_offset: 0, + locals: HashMap::new(), + local_types: HashMap::new(), + } + } + + /// Calculate stack offset for a variable type + pub fn calculate_stack_offset(var_type: &TokenType, current_offset: i32) -> (usize, i32) { + match var_type { + TokenType::Int => { + let new_offset = current_offset - 4; + (4, new_offset) + }, + TokenType::FloatType => { + let new_offset = current_offset - 8; + (8, new_offset) + }, + TokenType::CharType => { + let new_offset = current_offset - 1; + (1, new_offset) + }, + _ => { + let new_offset = current_offset - 8; + (8, new_offset) + } + } + } + + /// Allocate space for a variable on the stack + pub fn allocate_variable(&mut self, name: String, var_type: TokenType) -> i32 { + let (_, new_offset) = Self::calculate_stack_offset(&var_type, self.stack_offset); + self.stack_offset = new_offset; + self.locals.insert(name.clone(), new_offset); + self.local_types.insert(name, var_type); + new_offset + } + + /// Get the stack offset for a variable + pub fn get_variable_offset(&self, name: &str) -> Option { + self.locals.get(name).copied() + } + + /// Get the type of a variable + pub fn get_variable_type(&self, name: &str) -> Option<&TokenType> { + self.local_types.get(name) + } + + /// Get current stack offset + pub fn current_offset(&self) -> i32 { + self.stack_offset + } + + /// Set stack offset + pub fn set_offset(&mut self, offset: i32) { + self.stack_offset = offset; + } + + /// Get all locals + pub fn locals(&self) -> &HashMap { + &self.locals + } + + /// Get all local types + pub fn local_types(&self) -> &HashMap { + &self.local_types + } + + /// Clear all variables (for new function) + pub fn clear(&mut self) { + self.stack_offset = 0; + self.locals.clear(); + self.local_types.clear(); + } +} + +impl Default for StackManager { + fn default() -> Self { + Self::new() + } +} \ No newline at end of file From 43c5f69627b152733cd22fde2618b7fa29ff3e09 Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sun, 27 Jul 2025 20:29:22 +0200 Subject: [PATCH 09/10] move IrBackend as Codegen at the root of codegen module --- examples/codegen_usage.rs | 10 +++++----- src/codegen/backend/mod.rs | 5 ----- .../{backend/ir_backend.rs => codegen.rs} | 10 +++++----- src/codegen/generators/call.rs | 4 ++-- src/codegen/generators/function.rs | 4 ++-- src/codegen/generators/instruction.rs | 4 ++-- src/codegen/generators/operation.rs | 4 ++-- src/codegen/generators/value.rs | 4 ++-- src/codegen/mod.rs | 20 ++++++++----------- src/main.rs | 4 ++-- tests/integration_tests.rs | 4 ++-- 11 files changed, 32 insertions(+), 41 deletions(-) delete mode 100644 src/codegen/backend/mod.rs rename src/codegen/{backend/ir_backend.rs => codegen.rs} (98%) diff --git a/examples/codegen_usage.rs b/examples/codegen_usage.rs index 75c3b54..9c7c71c 100644 --- a/examples/codegen_usage.rs +++ b/examples/codegen_usage.rs @@ -5,7 +5,7 @@ use compiler_minic::codegen::{ // Core traits and types Emitter, CodeEmitter, Instruction, Operand, Register, Size, // Backend implementations - IrBackend, + Codegen, // Utilities RegisterAllocator, StackManager, InstructionFormatter, }; @@ -13,7 +13,7 @@ use compiler_minic::codegen::{ fn main() { // Example 1: Using the new IrBackend println!("=== New IrBackend Example ==="); - let mut backend = IrBackend::new(); + let mut backend = Codegen::new(); // The backend implements Emitter trait backend.emit_comment("This is a comment"); @@ -57,7 +57,7 @@ fn main() { // Example 3: Additional IrBackend features println!("\n=== Additional IrBackend Features ==="); - let mut backend2 = IrBackend::new(); + let mut backend2 = Codegen::new(); backend2.emit_section_header("EXAMPLE SECTION"); backend2.emit_subsection_header("Example Subsection"); let label = backend2.generate_label("example"); @@ -71,7 +71,7 @@ mod tests { #[test] fn test_new_backend_basic_functionality() { - let mut backend = IrBackend::new(); + let mut backend = Codegen::new(); backend.emit_comment("Test comment"); backend.emit_instruction(Instruction::Mov, vec![ Operand::Register(Register::Rax), @@ -107,7 +107,7 @@ mod tests { #[test] fn test_additional_features() { // Test additional IrBackend features - let mut backend = IrBackend::new(); + let mut backend = Codegen::new(); backend.emit_section_header("TEST SECTION"); let label = backend.generate_label("test"); backend.emit_label(&label); diff --git a/src/codegen/backend/mod.rs b/src/codegen/backend/mod.rs deleted file mode 100644 index f3c0d81..0000000 --- a/src/codegen/backend/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Backend implementations for code generation - -mod ir_backend; - -pub use ir_backend::IrBackend; \ No newline at end of file diff --git a/src/codegen/backend/ir_backend.rs b/src/codegen/codegen.rs similarity index 98% rename from src/codegen/backend/ir_backend.rs rename to src/codegen/codegen.rs index 780c23c..c525190 100644 --- a/src/codegen/backend/ir_backend.rs +++ b/src/codegen/codegen.rs @@ -4,7 +4,7 @@ use crate::codegen::core::{Emitter, Target, TargetPlatform, create_target}; use crate::codegen::utils::{RegisterAllocator, StackManager}; /// Modern IR backend with clean architecture -pub struct IrBackend { +pub struct Codegen { pub output: String, pub stack_offset: i32, pub locals: HashMap, @@ -18,7 +18,7 @@ pub struct IrBackend { register_allocator: RegisterAllocator, } -impl IrBackend { +impl Codegen { pub fn new() -> Self { Self::new_with_target(TargetPlatform::WindowsX64) } @@ -157,7 +157,7 @@ impl IrBackend { } // Implement the emitter traits for IrBackend -impl Emitter for IrBackend { +impl Emitter for Codegen { fn emit_line(&mut self, line: &str) { self.output.push_str(line); self.output.push('\n'); @@ -169,7 +169,7 @@ impl Emitter for IrBackend { } // Helper methods for IrBackend -impl IrBackend { +impl Codegen { /// Emit a section header with clear visual separation pub fn emit_section_header(&mut self, title: &str) { self.emit_line(""); @@ -226,4 +226,4 @@ impl IrBackend { // Include generator implementations #[allow(unused_imports)] -use super::super::generators::*; \ No newline at end of file +use crate::codegen::generators::*; \ No newline at end of file diff --git a/src/codegen/generators/call.rs b/src/codegen/generators/call.rs index 46c4277..ebd8de7 100644 --- a/src/codegen/generators/call.rs +++ b/src/codegen/generators/call.rs @@ -1,9 +1,9 @@ use crate::ir::{IrValue, IrType}; use crate::codegen::core::{Instruction, Operand, Register}; use crate::codegen::core::{Emitter, CodeEmitterWithComment}; -use crate::codegen::backend::IrBackend; +use crate::codegen::Codegen; -impl IrBackend { +impl Codegen { /// Generate function call pub fn generate_function_call(&mut self, dest: &Option, func: &str, args: &[IrValue], return_type: &IrType) { self.emit_comment(&format!("call {} with {} args", func, args.len())); diff --git a/src/codegen/generators/function.rs b/src/codegen/generators/function.rs index 9d8ed12..0f62e7b 100644 --- a/src/codegen/generators/function.rs +++ b/src/codegen/generators/function.rs @@ -1,9 +1,9 @@ use crate::ir::{IrFunction}; use crate::codegen::core::{Instruction, Operand}; use crate::codegen::core::{Emitter, CodeEmitterWithComment}; -use crate::codegen::backend::IrBackend; +use crate::codegen::Codegen; -impl IrBackend { +impl Codegen { /// Generate assembly for a single function pub fn generate_function(&mut self, function: &IrFunction) { self.emit_subsection_header(&format!("FUNCTION: {}", function.name)); diff --git a/src/codegen/generators/instruction.rs b/src/codegen/generators/instruction.rs index bf74a27..8e38d81 100644 --- a/src/codegen/generators/instruction.rs +++ b/src/codegen/generators/instruction.rs @@ -1,9 +1,9 @@ use crate::ir::{IrInstruction, IrValue, IrType}; use crate::codegen::core::{Instruction, Operand, Register, Size}; use crate::codegen::core::{Emitter, CodeEmitter, CodeEmitterWithComment}; -use crate::codegen::backend::IrBackend; +use crate::codegen::Codegen; -impl IrBackend { +impl Codegen { /// Generate assembly for a single IR instruction pub fn generate_instruction(&mut self, instruction: &IrInstruction) { match instruction { diff --git a/src/codegen/generators/operation.rs b/src/codegen/generators/operation.rs index 0b2d291..6195346 100644 --- a/src/codegen/generators/operation.rs +++ b/src/codegen/generators/operation.rs @@ -1,9 +1,9 @@ use crate::ir::{IrValue, IrType, IrBinaryOp, IrUnaryOp}; use crate::codegen::core::{Instruction, Operand, Register}; use crate::codegen::core::{Emitter, CodeEmitter, CodeEmitterWithComment}; -use crate::codegen::backend::IrBackend; +use crate::codegen::Codegen; -impl IrBackend { +impl Codegen { /// Generate binary operation pub fn generate_binary_op(&mut self, dest: &IrValue, op: &IrBinaryOp, left: &IrValue, right: &IrValue, var_type: &IrType) { let dest_operand = self.ir_value_to_operand(dest); diff --git a/src/codegen/generators/value.rs b/src/codegen/generators/value.rs index 3dc1862..0616939 100644 --- a/src/codegen/generators/value.rs +++ b/src/codegen/generators/value.rs @@ -1,8 +1,8 @@ use crate::ir::{IrValue, IrType}; use crate::codegen::core::{Operand, Register, Size}; -use crate::codegen::backend::IrBackend; +use crate::codegen::Codegen; -impl IrBackend { +impl Codegen { /// Convert IR value to assembly operand pub fn ir_value_to_operand(&self, value: &IrValue) -> Operand { match value { diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index b68dc62..d5d8bf5 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,26 +1,22 @@ // Core abstractions and traits pub mod core; -// Backend implementations -pub mod backend; - // Shared utilities pub mod utils; // Code generation modules pub mod generators; +mod codegen; // Re-export commonly used items pub use core::{ - Emitter, CodeEmitter, CodeEmitterWithComment, - Instruction, Register, Operand, Size, - Target, TargetPlatform, CallingConvention, - WindowsX64Target, LinuxX64Target, MacOSX64Target, - create_target, parse_target_platform + create_target, parse_target_platform, CallingConvention, + CodeEmitter, CodeEmitterWithComment, Emitter, Instruction, + LinuxX64Target, MacOSX64Target, Operand, + Register, Size, Target, + TargetPlatform, WindowsX64Target }; -pub use backend::IrBackend; -pub use utils::{RegisterAllocator, StackManager, InstructionFormatter}; +pub use utils::{InstructionFormatter, RegisterAllocator, StackManager}; -// For backward compatibility, re-export IrBackend as IrCodegen -pub use backend::IrBackend as IrCodegen; +pub use codegen::Codegen; diff --git a/src/main.rs b/src/main.rs index 014c170..adf529e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use std::process; use clap::Parser; -use compiler_minic::codegen::{IrCodegen, TargetPlatform, parse_target_platform}; +use compiler_minic::codegen::{Codegen, TargetPlatform, parse_target_platform}; use compiler_minic::lexer::Lexer; use compiler_minic::parser::Parser as MiniCParser; use compiler_minic::ir::{IrGenerator, IrOptimizer}; @@ -293,7 +293,7 @@ fn generate_assembly( fs::create_dir_all(output_dir) .map_err(|e| format!("Error creating output directory '{output_dir:?}': {e}"))?; - let ir_codegen = IrCodegen::new_with_target(target_platform); + let ir_codegen = Codegen::new_with_target(target_platform); let asm_code = ir_codegen.generate(ir_program); let output_path = output_dir.join("output.asm"); diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index b0c6474..c6d49bd 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1,4 +1,4 @@ -use compiler_minic::{lexer::Lexer, parser::Parser, ir::generator::IrGenerator, codegen::{IrCodegen}}; +use compiler_minic::{lexer::Lexer, parser::Parser, ir::generator::IrGenerator, codegen::{Codegen}}; #[cfg(test)] mod ir_integration_tests { @@ -14,7 +14,7 @@ mod ir_integration_tests { let ir_program = ir_generator.generate(&ast).expect("IR generation should succeed"); let ir_output = format!("{}", ir_program); - let ir_codegen = IrCodegen::new(); + let ir_codegen = Codegen::new(); let ir_asm = ir_codegen.generate(&ir_program); // For now, we only have IR-based compilation, so we return the same assembly for both From fbdea63f260be3206540b6bf69c014cf1b7c5508 Mon Sep 17 00:00:00 2001 From: Valentin MILLET Date: Sun, 27 Jul 2025 20:51:13 +0200 Subject: [PATCH 10/10] add targte selection and update CI to other OS --- .github/workflows/ci.yml | 119 ++++++-- examples/target_demo.rs | 44 +++ src/codegen/core/instruction.rs | 8 +- src/codegen/core/mod.rs | 4 +- src/codegen/core/target.rs | 425 ---------------------------- src/codegen/core/targets/base.rs | 80 ++++++ src/codegen/core/targets/linux.rs | 117 ++++++++ src/codegen/core/targets/macos.rs | 109 +++++++ src/codegen/core/targets/mod.rs | 33 +++ src/codegen/core/targets/windows.rs | 109 +++++++ 10 files changed, 588 insertions(+), 460 deletions(-) create mode 100644 examples/target_demo.rs delete mode 100644 src/codegen/core/target.rs create mode 100644 src/codegen/core/targets/base.rs create mode 100644 src/codegen/core/targets/linux.rs create mode 100644 src/codegen/core/targets/macos.rs create mode 100644 src/codegen/core/targets/mod.rs create mode 100644 src/codegen/core/targets/windows.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fc151ea..c068874 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,63 +8,120 @@ on: jobs: build: - runs-on: windows-latest + strategy: + matrix: + os: [windows-latest, ubuntu-latest, macos-latest] + fail-fast: false # Don't cancel other jobs if one fails + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.os != 'windows-latest' }} # Allow Linux/macOS to fail steps: - uses: actions/checkout@v4 - - name: Install Rust 1.88.0 - uses: dtolnay/rust-toolchain@stable - with: - toolchain: "1.88.0" - - name: Build project run: cargo build --verbose test: - runs-on: windows-latest + strategy: + matrix: + os: [windows-latest, ubuntu-latest, macos-latest] + fail-fast: false # Don't cancel other jobs if one fails + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.os != 'windows-latest' }} # Allow Linux/macOS to fail needs: build steps: - uses: actions/checkout@v4 - - name: Install Rust 1.88.0 - uses: dtolnay/rust-toolchain@stable - with: - toolchain: "1.88.0" - - name: Run tests run: cargo test --verbose run-and-execute: - runs-on: windows-latest - + strategy: + matrix: + include: + - os: windows-latest + target: windows-x64 + nasm_format: win64 + executable_ext: .exe + continue_on_error: false + - os: ubuntu-latest + target: linux-x64 + nasm_format: elf64 + executable_ext: "" + continue_on_error: true + - os: macos-latest + target: macos-x64 + nasm_format: macho64 + executable_ext: "" + continue_on_error: true + fail-fast: false # Don't cancel other jobs if one fails + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.continue_on_error }} # Allow Linux/macOS to fail + needs: test + steps: - uses: actions/checkout@v4 - - - name: Install Rust 1.88.0 - uses: dtolnay/rust-toolchain@stable - with: - toolchain: "1.88.0" - - - name: Install NASM + + # Windows-specific dependencies + - name: Install NASM (Windows) + if: matrix.os == 'windows-latest' run: | choco install nasm & "C:\Program Files\NASM\nasm.exe" -v - - name: Install GCC (MinGW) + # Linux-specific dependencies + - name: Install NASM and GCC (Linux) + if: matrix.os == 'ubuntu-latest' run: | - choco install mingw + sudo apt-get update + sudo apt-get install -y nasm gcc + nasm -v gcc --version - - name: Run compiler to generate ASM - run: cargo run + # macOS-specific dependencies + - name: Install NASM and GCC (macOS) + if: matrix.os == 'macos-latest' + run: | + brew install nasm gcc + nasm -v + gcc --version + + - name: Run compiler to generate ASM for target + run: cargo run -- --target ${{ matrix.target }} + + # Windows assembly and linking + - name: Compile ASM to object file (Windows) + if: matrix.os == 'windows-latest' + run: '& "C:\Program Files\NASM\nasm.exe" -f ${{ matrix.nasm_format }} build/output.asm -o build/output.obj' + + - name: Link and create executable (Windows) + if: matrix.os == 'windows-latest' + run: gcc -o build/output${{ matrix.executable_ext }} build/output.obj -lmsvcrt + + # Linux assembly and linking + - name: Compile ASM to object file (Linux) + if: matrix.os == 'ubuntu-latest' + run: nasm -f ${{ matrix.nasm_format }} build/output.asm -o build/output.o + + - name: Link and create executable (Linux) + if: matrix.os == 'ubuntu-latest' + run: gcc -o build/output${{ matrix.executable_ext }} build/output.o -no-pie + + # macOS assembly and linking + - name: Compile ASM to object file (macOS) + if: matrix.os == 'macos-latest' + run: nasm -f ${{ matrix.nasm_format }} build/output.asm -o build/output.o - - name: Compile ASM to object file - run: '& "C:\Program Files\NASM\nasm.exe" -f win64 build/output.asm -o output.obj' + - name: Link and create executable (macOS) + if: matrix.os == 'macos-latest' + run: gcc -o build/output${{ matrix.executable_ext }} build/output.o - - name: Link and create executable - run: gcc -o build/output.exe build/output.obj -lmsvcrt + # Execute the binary (all platforms) + - name: Execute the binary (Windows) + if: matrix.os == 'windows-latest' + run: .\build\output${{ matrix.executable_ext }} - - name: Execute the binary - run: .\build\output.exe + - name: Execute the binary (Linux/macOS) + if: matrix.os != 'windows-latest' + run: ./build/output${{ matrix.executable_ext }} diff --git a/examples/target_demo.rs b/examples/target_demo.rs new file mode 100644 index 0000000..796b2a1 --- /dev/null +++ b/examples/target_demo.rs @@ -0,0 +1,44 @@ +use compiler_minic::codegen::{Codegen, TargetPlatform}; +use compiler_minic::ir::{IrProgram, IrFunction, IrInstruction, IrValue, IrType}; + +fn main() { + // Create a simple IR program + let program = IrProgram { + functions: vec![ + IrFunction { + name: "main".to_string(), + return_type: IrType::Int, + parameters: vec![], + local_vars: vec![], + instructions: vec![ + IrInstruction::Print { + format_string: IrValue::StringConstant("hello_msg".to_string()), + args: vec![], + }, + IrInstruction::Return { + value: Some(IrValue::IntConstant(0)), + var_type: IrType::Int, + }, + ], + } + ], + global_strings: vec![ + ("hello_msg".to_string(), "Hello, World!".to_string()), + ], + }; + + println!("=== WINDOWS X64 TARGET ==="); + let windows_codegen = Codegen::new_with_target(TargetPlatform::WindowsX64); + let windows_asm = windows_codegen.generate(&program); + println!("{}", windows_asm); + + println!("\n=== LINUX X64 TARGET ==="); + let linux_codegen = Codegen::new_with_target(TargetPlatform::LinuxX64); + let linux_asm = linux_codegen.generate(&program); + println!("{}", linux_asm); + + println!("\n=== MACOS X64 TARGET ==="); + let macos_codegen = Codegen::new_with_target(TargetPlatform::MacOSX64); + let macos_asm = macos_codegen.generate(&program); + println!("{}", macos_asm); +} \ No newline at end of file diff --git a/src/codegen/core/instruction.rs b/src/codegen/core/instruction.rs index 439ccfd..73f36e4 100644 --- a/src/codegen/core/instruction.rs +++ b/src/codegen/core/instruction.rs @@ -13,8 +13,8 @@ pub enum Instruction { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Register { - Rax, Rbp, Rsp, Rcx, Rdx, R8, R9, - Eax, Edx, R8d, R9d, + Rax, Rbp, Rsp, Rcx, Rdx, R8, R9, Rdi, Rsi, + Eax, Edx, R8d, R9d, Edi, Esi, Al, Xmm0, Xmm1, Xmm2, Xmm3, } @@ -85,10 +85,14 @@ impl Register { Register::Rdx => "rdx", Register::R8 => "r8", Register::R9 => "r9", + Register::Rdi => "rdi", + Register::Rsi => "rsi", Register::Eax => "eax", Register::Edx => "edx", Register::R8d => "r8d", Register::R9d => "r9d", + Register::Edi => "edi", + Register::Esi => "esi", Register::Al => "al", Register::Xmm0 => "xmm0", Register::Xmm1 => "xmm1", diff --git a/src/codegen/core/mod.rs b/src/codegen/core/mod.rs index 0b50e93..4a426e3 100644 --- a/src/codegen/core/mod.rs +++ b/src/codegen/core/mod.rs @@ -2,11 +2,11 @@ mod emitter; mod instruction; -mod target; +pub mod targets; pub use emitter::{Emitter, CodeEmitter, CodeEmitterWithComment}; pub use instruction::{Instruction, Register, Operand, Size}; -pub use target::{ +pub use targets::{ Target, TargetPlatform, CallingConvention, WindowsX64Target, LinuxX64Target, MacOSX64Target, create_target, parse_target_platform diff --git a/src/codegen/core/target.rs b/src/codegen/core/target.rs deleted file mode 100644 index 54885d1..0000000 --- a/src/codegen/core/target.rs +++ /dev/null @@ -1,425 +0,0 @@ -use super::instruction::Register; - -/// Represents different target platforms -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TargetPlatform { - WindowsX64, - LinuxX64, - MacOSX64, -} - -/// Represents different calling conventions -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CallingConvention { - MicrosoftX64, - SystemV, - AppleX64, -} - -/// Target-specific configuration and behavior -pub trait Target { - /// Get the target platform - fn platform(&self) -> TargetPlatform; - - /// Get the calling convention - fn calling_convention(&self) -> CallingConvention; - - /// Get the target architecture name for comments - fn arch_name(&self) -> &'static str; - - /// Get the calling convention name for comments - fn calling_convention_name(&self) -> &'static str; - - /// Generate assembly file header directives - fn assembly_directives(&self) -> Vec; - - /// Generate data section header - fn data_section_header(&self) -> String; - - /// Generate text section header - fn text_section_header(&self) -> String; - - /// Generate external function declarations - fn external_declarations(&self) -> Vec; - - /// Generate global symbol declarations - fn global_declarations(&self, symbols: &[&str]) -> Vec; - - /// Generate function prologue instructions - fn function_prologue(&self) -> Vec; - - /// Generate function epilogue instructions - fn function_epilogue(&self) -> Vec; - - /// Get parameter passing registers in order - fn parameter_registers(&self) -> Vec; - - /// Get return value register - fn return_register(&self) -> Register; - - /// Get stack pointer register - fn stack_pointer(&self) -> Register; - - /// Get base pointer register - fn base_pointer(&self) -> Register; - - /// Get stack alignment requirement in bytes - fn stack_alignment(&self) -> usize; - - /// Format a string literal for the target platform - fn format_string_literal(&self, label: &str, content: &str) -> String; - - /// Format a function call instruction - fn format_function_call(&self, function_name: &str) -> Vec; - - /// Get the size and alignment for a data type - fn type_info(&self, type_name: &str) -> (usize, usize); // (size, alignment) - - /// Generate platform-specific startup code if needed - fn startup_code(&self) -> Vec; -} - -/// Windows x64 target implementation -pub struct WindowsX64Target; - -impl Target for WindowsX64Target { - fn platform(&self) -> TargetPlatform { - TargetPlatform::WindowsX64 - } - - fn calling_convention(&self) -> CallingConvention { - CallingConvention::MicrosoftX64 - } - - fn arch_name(&self) -> &'static str { - "x86-64 Windows" - } - - fn calling_convention_name(&self) -> &'static str { - "Microsoft x64" - } - - fn assembly_directives(&self) -> Vec { - vec![ - "bits 64".to_string(), - "default rel".to_string(), - ] - } - - fn data_section_header(&self) -> String { - "section .data".to_string() - } - - fn text_section_header(&self) -> String { - "section .text".to_string() - } - - fn external_declarations(&self) -> Vec { - vec![ - "extern printf".to_string(), - "extern exit".to_string(), - ] - } - - fn global_declarations(&self, symbols: &[&str]) -> Vec { - symbols.iter().map(|symbol| format!("global {}", symbol)).collect() - } - - fn function_prologue(&self) -> Vec { - vec![ - "push rbp".to_string(), - "mov rbp, rsp".to_string(), - ] - } - - fn function_epilogue(&self) -> Vec { - vec![ - "mov rsp, rbp".to_string(), - "pop rbp".to_string(), - "ret".to_string(), - ] - } - - fn parameter_registers(&self) -> Vec { - vec![Register::Rcx, Register::Rdx, Register::R8, Register::R9] - } - - fn return_register(&self) -> Register { - Register::Rax - } - - fn stack_pointer(&self) -> Register { - Register::Rsp - } - - fn base_pointer(&self) -> Register { - Register::Rbp - } - - fn stack_alignment(&self) -> usize { - 16 - } - - fn format_string_literal(&self, label: &str, content: &str) -> String { - let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); - format!(" {}: db \"{}\", 10, 0", label, formatted_content) - } - - fn format_function_call(&self, function_name: &str) -> Vec { - vec![format!("call {}", function_name)] - } - - fn type_info(&self, type_name: &str) -> (usize, usize) { - match type_name { - "int" | "i32" => (4, 4), - "float" | "f32" => (4, 4), - "double" | "f64" => (8, 8), - "char" | "i8" => (1, 1), - "ptr" | "pointer" => (8, 8), - _ => (8, 8), // Default to pointer size - } - } - - fn startup_code(&self) -> Vec { - vec![] // Windows doesn't need special startup code for our use case - } -} - -/// Linux x64 target implementation -pub struct LinuxX64Target; - -impl Target for LinuxX64Target { - fn platform(&self) -> TargetPlatform { - TargetPlatform::LinuxX64 - } - - fn calling_convention(&self) -> CallingConvention { - CallingConvention::SystemV - } - - fn arch_name(&self) -> &'static str { - "x86-64 Linux" - } - - fn calling_convention_name(&self) -> &'static str { - "System V ABI" - } - - fn assembly_directives(&self) -> Vec { - vec![ - "bits 64".to_string(), - "default rel".to_string(), - ] - } - - fn data_section_header(&self) -> String { - "section .data".to_string() - } - - fn text_section_header(&self) -> String { - "section .text".to_string() - } - - fn external_declarations(&self) -> Vec { - vec![ - "extern printf".to_string(), - "extern exit".to_string(), - ] - } - - fn global_declarations(&self, symbols: &[&str]) -> Vec { - symbols.iter().map(|symbol| format!("global {}", symbol)).collect() - } - - fn function_prologue(&self) -> Vec { - vec![ - "push rbp".to_string(), - "mov rbp, rsp".to_string(), - ] - } - - fn function_epilogue(&self) -> Vec { - vec![ - "mov rsp, rbp".to_string(), - "pop rbp".to_string(), - "ret".to_string(), - ] - } - - fn parameter_registers(&self) -> Vec { - // System V ABI uses different parameter registers - vec![Register::Rax, Register::Rdx, Register::Rcx, Register::R8, Register::R9] // Note: RDI, RSI would be more accurate but not in our Register enum - } - - fn return_register(&self) -> Register { - Register::Rax - } - - fn stack_pointer(&self) -> Register { - Register::Rsp - } - - fn base_pointer(&self) -> Register { - Register::Rbp - } - - fn stack_alignment(&self) -> usize { - 16 - } - - fn format_string_literal(&self, label: &str, content: &str) -> String { - let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); - format!(" {}: db \"{}\", 10, 0", label, formatted_content) - } - - fn format_function_call(&self, function_name: &str) -> Vec { - vec![format!("call {}", function_name)] - } - - fn type_info(&self, type_name: &str) -> (usize, usize) { - match type_name { - "int" | "i32" => (4, 4), - "float" | "f32" => (4, 4), - "double" | "f64" => (8, 8), - "char" | "i8" => (1, 1), - "ptr" | "pointer" => (8, 8), - _ => (8, 8), // Default to pointer size - } - } - - fn startup_code(&self) -> Vec { - vec![ - "_start:".to_string(), - " call main".to_string(), - " mov rdi, rax".to_string(), - " mov rax, 60".to_string(), - " syscall".to_string(), - ] - } -} - -/// macOS x64 target implementation -pub struct MacOSX64Target; - -impl Target for MacOSX64Target { - fn platform(&self) -> TargetPlatform { - TargetPlatform::MacOSX64 - } - - fn calling_convention(&self) -> CallingConvention { - CallingConvention::AppleX64 - } - - fn arch_name(&self) -> &'static str { - "x86-64 macOS" - } - - fn calling_convention_name(&self) -> &'static str { - "Apple x64 ABI" - } - - fn assembly_directives(&self) -> Vec { - vec![ - "bits 64".to_string(), - "default rel".to_string(), - ] - } - - fn data_section_header(&self) -> String { - "section .data".to_string() - } - - fn text_section_header(&self) -> String { - "section .text".to_string() - } - - fn external_declarations(&self) -> Vec { - vec![ - "extern _printf".to_string(), // macOS prefixes with underscore - "extern _exit".to_string(), - ] - } - - fn global_declarations(&self, symbols: &[&str]) -> Vec { - symbols.iter().map(|symbol| format!("global _{}", symbol)).collect() // macOS prefixes with underscore - } - - fn function_prologue(&self) -> Vec { - vec![ - "push rbp".to_string(), - "mov rbp, rsp".to_string(), - ] - } - - fn function_epilogue(&self) -> Vec { - vec![ - "mov rsp, rbp".to_string(), - "pop rbp".to_string(), - "ret".to_string(), - ] - } - - fn parameter_registers(&self) -> Vec { - // macOS uses System V-like calling convention - vec![Register::Rax, Register::Rdx, Register::Rcx, Register::R8, Register::R9] // Note: RDI, RSI would be more accurate - } - - fn return_register(&self) -> Register { - Register::Rax - } - - fn stack_pointer(&self) -> Register { - Register::Rsp - } - - fn base_pointer(&self) -> Register { - Register::Rbp - } - - fn stack_alignment(&self) -> usize { - 16 - } - - fn format_string_literal(&self, label: &str, content: &str) -> String { - let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); - format!(" {}: db \"{}\", 10, 0", label, formatted_content) - } - - fn format_function_call(&self, function_name: &str) -> Vec { - vec![format!("call _{}", function_name)] // macOS prefixes with underscore - } - - fn type_info(&self, type_name: &str) -> (usize, usize) { - match type_name { - "int" | "i32" => (4, 4), - "float" | "f32" => (4, 4), - "double" | "f64" => (8, 8), - "char" | "i8" => (1, 1), - "ptr" | "pointer" => (8, 8), - _ => (8, 8), // Default to pointer size - } - } - - fn startup_code(&self) -> Vec { - vec![] // macOS doesn't need special startup code for our use case - } -} - -/// Factory function to create target instances -pub fn create_target(platform: TargetPlatform) -> Box { - match platform { - TargetPlatform::WindowsX64 => Box::new(WindowsX64Target), - TargetPlatform::LinuxX64 => Box::new(LinuxX64Target), - TargetPlatform::MacOSX64 => Box::new(MacOSX64Target), - } -} - -/// Helper function to parse target platform from string -pub fn parse_target_platform(target_str: &str) -> Result { - match target_str.to_lowercase().as_str() { - "windows" | "win" | "windows-x64" | "win64" => Ok(TargetPlatform::WindowsX64), - "linux" | "linux-x64" | "linux64" => Ok(TargetPlatform::LinuxX64), - "macos" | "darwin" | "macos-x64" | "darwin-x64" => Ok(TargetPlatform::MacOSX64), - _ => Err(format!("Unknown target platform: {}", target_str)), - } -} \ No newline at end of file diff --git a/src/codegen/core/targets/base.rs b/src/codegen/core/targets/base.rs new file mode 100644 index 0000000..90f7f56 --- /dev/null +++ b/src/codegen/core/targets/base.rs @@ -0,0 +1,80 @@ +use crate::codegen::core::instruction::Register; + +/// Represents different target platforms +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TargetPlatform { + WindowsX64, + LinuxX64, + MacOSX64, +} + +/// Represents different calling conventions +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CallingConvention { + MicrosoftX64, + SystemV, + AppleX64, +} + +/// Target-specific configuration and behavior +pub trait Target { + /// Get the target platform + fn platform(&self) -> TargetPlatform; + + /// Get the calling convention + fn calling_convention(&self) -> CallingConvention; + + /// Get the target architecture name for comments + fn arch_name(&self) -> &'static str; + + /// Get the calling convention name for comments + fn calling_convention_name(&self) -> &'static str; + + /// Generate assembly file header directives + fn assembly_directives(&self) -> Vec; + + /// Generate data section header + fn data_section_header(&self) -> String; + + /// Generate text section header + fn text_section_header(&self) -> String; + + /// Generate external function declarations + fn external_declarations(&self) -> Vec; + + /// Generate global symbol declarations + fn global_declarations(&self, symbols: &[&str]) -> Vec; + + /// Generate function prologue instructions + fn function_prologue(&self) -> Vec; + + /// Generate function epilogue instructions + fn function_epilogue(&self) -> Vec; + + /// Get parameter passing registers in order + fn parameter_registers(&self) -> Vec; + + /// Get return value register + fn return_register(&self) -> Register; + + /// Get stack pointer register + fn stack_pointer(&self) -> Register; + + /// Get base pointer register + fn base_pointer(&self) -> Register; + + /// Get stack alignment requirement in bytes + fn stack_alignment(&self) -> usize; + + /// Format a string literal for the target platform + fn format_string_literal(&self, label: &str, content: &str) -> String; + + /// Format a function call instruction + fn format_function_call(&self, function_name: &str) -> Vec; + + /// Get the size and alignment for a data type + fn type_info(&self, type_name: &str) -> (usize, usize); // (size, alignment) + + /// Generate platform-specific startup code if needed + fn startup_code(&self) -> Vec; +} \ No newline at end of file diff --git a/src/codegen/core/targets/linux.rs b/src/codegen/core/targets/linux.rs new file mode 100644 index 0000000..de981f1 --- /dev/null +++ b/src/codegen/core/targets/linux.rs @@ -0,0 +1,117 @@ +use super::base::{Target, TargetPlatform, CallingConvention}; +use crate::codegen::core::instruction::Register; + +/// Linux x64 target implementation +pub struct LinuxX64Target; + +impl Target for LinuxX64Target { + fn platform(&self) -> TargetPlatform { + TargetPlatform::LinuxX64 + } + + fn calling_convention(&self) -> CallingConvention { + CallingConvention::SystemV + } + + fn arch_name(&self) -> &'static str { + "x86-64 Linux" + } + + fn calling_convention_name(&self) -> &'static str { + "System V ABI" + } + + fn assembly_directives(&self) -> Vec { + vec![ + "bits 64".to_string(), + "default rel".to_string(), + ] + } + + fn data_section_header(&self) -> String { + "section .data".to_string() + } + + fn text_section_header(&self) -> String { + "section .text".to_string() + } + + fn external_declarations(&self) -> Vec { + vec![ + "extern printf".to_string(), + "extern exit".to_string(), + ] + } + + fn global_declarations(&self, symbols: &[&str]) -> Vec { + symbols.iter().map(|symbol| format!("global {}", symbol)).collect() + } + + fn function_prologue(&self) -> Vec { + vec![ + "push rbp".to_string(), + "mov rbp, rsp".to_string(), + ] + } + + fn function_epilogue(&self) -> Vec { + vec![ + "mov rsp, rbp".to_string(), + "pop rbp".to_string(), + "ret".to_string(), + ] + } + + fn parameter_registers(&self) -> Vec { + // System V ABI parameter registers in order + vec![Register::Rdi, Register::Rsi, Register::Rdx, Register::Rcx, Register::R8, Register::R9] + } + + fn return_register(&self) -> Register { + Register::Rax + } + + fn stack_pointer(&self) -> Register { + Register::Rsp + } + + fn base_pointer(&self) -> Register { + Register::Rbp + } + + fn stack_alignment(&self) -> usize { + 16 + } + + fn format_string_literal(&self, label: &str, content: &str) -> String { + let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); + format!(" {}: db \"{}\", 10, 0", label, formatted_content) + } + + fn format_function_call(&self, function_name: &str) -> Vec { + vec![format!("call {}", function_name)] + } + + fn type_info(&self, type_name: &str) -> (usize, usize) { + match type_name { + "int" | "i32" => (4, 4), + "float" | "f32" => (4, 4), + "double" | "f64" => (8, 8), + "char" | "i8" => (1, 1), + "ptr" | "pointer" => (8, 8), + _ => (8, 8), // Default to pointer size + } + } + + fn startup_code(&self) -> Vec { + vec![ + "_start:".to_string(), + " ; Linux entry point".to_string(), + " call main".to_string(), + " ; Exit with return value from main".to_string(), + " mov rdi, rax ; exit code".to_string(), + " mov rax, 60 ; sys_exit".to_string(), + " syscall".to_string(), + ] + } +} \ No newline at end of file diff --git a/src/codegen/core/targets/macos.rs b/src/codegen/core/targets/macos.rs new file mode 100644 index 0000000..e479b7a --- /dev/null +++ b/src/codegen/core/targets/macos.rs @@ -0,0 +1,109 @@ +use super::base::{Target, TargetPlatform, CallingConvention}; +use crate::codegen::core::instruction::Register; + +/// macOS x64 target implementation +pub struct MacOSX64Target; + +impl Target for MacOSX64Target { + fn platform(&self) -> TargetPlatform { + TargetPlatform::MacOSX64 + } + + fn calling_convention(&self) -> CallingConvention { + CallingConvention::AppleX64 + } + + fn arch_name(&self) -> &'static str { + "x86-64 macOS" + } + + fn calling_convention_name(&self) -> &'static str { + "Apple x64 ABI" + } + + fn assembly_directives(&self) -> Vec { + vec![ + "bits 64".to_string(), + "default rel".to_string(), + ] + } + + fn data_section_header(&self) -> String { + "section .data".to_string() + } + + fn text_section_header(&self) -> String { + "section .text".to_string() + } + + fn external_declarations(&self) -> Vec { + vec![ + "extern _printf".to_string(), // macOS prefixes with underscore + "extern _exit".to_string(), + ] + } + + fn global_declarations(&self, symbols: &[&str]) -> Vec { + symbols.iter().map(|symbol| format!("global _{}", symbol)).collect() // macOS prefixes with underscore + } + + fn function_prologue(&self) -> Vec { + vec![ + "push rbp".to_string(), + "mov rbp, rsp".to_string(), + ] + } + + fn function_epilogue(&self) -> Vec { + vec![ + "mov rsp, rbp".to_string(), + "pop rbp".to_string(), + "ret".to_string(), + ] + } + + fn parameter_registers(&self) -> Vec { + // macOS uses System V-like calling convention + vec![Register::Rdi, Register::Rsi, Register::Rdx, Register::Rcx, Register::R8, Register::R9] + } + + fn return_register(&self) -> Register { + Register::Rax + } + + fn stack_pointer(&self) -> Register { + Register::Rsp + } + + fn base_pointer(&self) -> Register { + Register::Rbp + } + + fn stack_alignment(&self) -> usize { + 16 + } + + fn format_string_literal(&self, label: &str, content: &str) -> String { + let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); + format!(" {}: db \"{}\", 10, 0", label, formatted_content) + } + + fn format_function_call(&self, function_name: &str) -> Vec { + vec![format!("call _{}", function_name)] // macOS prefixes with underscore + } + + fn type_info(&self, type_name: &str) -> (usize, usize) { + match type_name { + "int" | "i32" => (4, 4), + "float" | "f32" => (4, 4), + "double" | "f64" => (8, 8), + "char" | "i8" => (1, 1), + "ptr" | "pointer" => (8, 8), + _ => (8, 8), // Default to pointer size + } + } + + fn startup_code(&self) -> Vec { + vec![] // macOS doesn't need special startup code for our use case + } +} \ No newline at end of file diff --git a/src/codegen/core/targets/mod.rs b/src/codegen/core/targets/mod.rs new file mode 100644 index 0000000..c2b1f5f --- /dev/null +++ b/src/codegen/core/targets/mod.rs @@ -0,0 +1,33 @@ +//! Target-specific code generation implementations +//! +//! This module contains platform-specific implementations for different target architectures. +//! Each target implements the `Target` trait to provide platform-specific assembly generation. + +mod base; +mod windows; +mod linux; +mod macos; + +pub use base::{Target, TargetPlatform, CallingConvention}; +pub use windows::WindowsX64Target; +pub use linux::LinuxX64Target; +pub use macos::MacOSX64Target; + +/// Factory function to create target instances +pub fn create_target(platform: TargetPlatform) -> Box { + match platform { + TargetPlatform::WindowsX64 => Box::new(WindowsX64Target), + TargetPlatform::LinuxX64 => Box::new(LinuxX64Target), + TargetPlatform::MacOSX64 => Box::new(MacOSX64Target), + } +} + +/// Helper function to parse target platform from string +pub fn parse_target_platform(target_str: &str) -> Result { + match target_str.to_lowercase().as_str() { + "windows" | "win" | "windows-x64" | "win64" => Ok(TargetPlatform::WindowsX64), + "linux" | "linux-x64" | "linux64" => Ok(TargetPlatform::LinuxX64), + "macos" | "darwin" | "macos-x64" | "darwin-x64" => Ok(TargetPlatform::MacOSX64), + _ => Err(format!("Unknown target platform: {}", target_str)), + } +} \ No newline at end of file diff --git a/src/codegen/core/targets/windows.rs b/src/codegen/core/targets/windows.rs new file mode 100644 index 0000000..fb61ae5 --- /dev/null +++ b/src/codegen/core/targets/windows.rs @@ -0,0 +1,109 @@ +use super::base::{Target, TargetPlatform, CallingConvention}; +use crate::codegen::core::instruction::Register; + +/// Windows x64 target implementation +pub struct WindowsX64Target; + +impl Target for WindowsX64Target { + fn platform(&self) -> TargetPlatform { + TargetPlatform::WindowsX64 + } + + fn calling_convention(&self) -> CallingConvention { + CallingConvention::MicrosoftX64 + } + + fn arch_name(&self) -> &'static str { + "x86-64 Windows" + } + + fn calling_convention_name(&self) -> &'static str { + "Microsoft x64" + } + + fn assembly_directives(&self) -> Vec { + vec![ + "bits 64".to_string(), + "default rel".to_string(), + ] + } + + fn data_section_header(&self) -> String { + "section .data".to_string() + } + + fn text_section_header(&self) -> String { + "section .text".to_string() + } + + fn external_declarations(&self) -> Vec { + vec![ + "extern printf".to_string(), + "extern exit".to_string(), + ] + } + + fn global_declarations(&self, symbols: &[&str]) -> Vec { + symbols.iter().map(|symbol| format!("global {}", symbol)).collect() + } + + fn function_prologue(&self) -> Vec { + vec![ + "push rbp".to_string(), + "mov rbp, rsp".to_string(), + ] + } + + fn function_epilogue(&self) -> Vec { + vec![ + "mov rsp, rbp".to_string(), + "pop rbp".to_string(), + "ret".to_string(), + ] + } + + fn parameter_registers(&self) -> Vec { + // Microsoft x64 calling convention + vec![Register::Rcx, Register::Rdx, Register::R8, Register::R9] + } + + fn return_register(&self) -> Register { + Register::Rax + } + + fn stack_pointer(&self) -> Register { + Register::Rsp + } + + fn base_pointer(&self) -> Register { + Register::Rbp + } + + fn stack_alignment(&self) -> usize { + 16 + } + + fn format_string_literal(&self, label: &str, content: &str) -> String { + let formatted_content = content.replace('\n', "").replace("%f", "%.2f"); + format!(" {}: db \"{}\", 10, 0", label, formatted_content) + } + + fn format_function_call(&self, function_name: &str) -> Vec { + vec![format!("call {}", function_name)] + } + + fn type_info(&self, type_name: &str) -> (usize, usize) { + match type_name { + "int" | "i32" => (4, 4), + "float" | "f32" => (4, 4), + "double" | "f64" => (8, 8), + "char" | "i8" => (1, 1), + "ptr" | "pointer" => (8, 8), + _ => (8, 8), // Default to pointer size + } + } + + fn startup_code(&self) -> Vec { + vec![] // Windows doesn't need special startup code for our use case + } +} \ No newline at end of file