From fb75861a0ef6343215150e4af8f41e2eaeb220cb Mon Sep 17 00:00:00 2001 From: "XU, Hui" Date: Tue, 7 Apr 2026 19:13:21 +0800 Subject: [PATCH] Change hard coded `use std` (#12) * Add English doc comments to `src/ir/gen/conversions.rs` (#4) * Initial plan * Add English comments to src/ir/gen/conversions.rs Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/7792cd15-2408-40a6-9f66-65c0304b2f83 Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * Fix comment: default type is i32, not int Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/4b6b2ee3-8cff-4b11-8274-8dd2d93eda4f Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * Add comprehensive English comments to `src/ir/gen/function_gen.rs` (#5) * Initial plan * Add comprehensive English comments to src/ir/gen/function_gen.rs Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/f0385687-16d6-48bf-a37a-1b55fde78f5b Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * Add comprehensive English doc comments to `src/ir/function.rs` (#6) * Initial plan * Add comprehensive English comments to src/ir/function.rs Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/88a5174f-8e69-4cec-8288-e496f8f1d06b Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * Update the src/ir/module.rs file with new core IR data structures * Add comprehensive English doc comments to `src/ir/gen/module_gen.rs` (#7) * Initial plan * Add comprehensive English comments to src/ir/gen/module_gen.rs Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/04d277ba-dda4-4f00-bd5f-66e52c85d27d Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * Add English doc comments to `src/ir/gen/static_eval.rs` (#8) * Initial plan * Add English doc comments to src/ir/gen/static_eval.rs Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/b08d71ed-943f-436e-b293-679a0cd077b9 Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * Replace hardcoded `register_std_functions` with `.teah` file parsing in `handle_use_stmt` (#9) * Initial plan * Refactor handle_use_stmt to parse .teah files instead of hardcoding std functions Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/b7a14ea2-5d90-4b24-9ea4-28fe9a163830 Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * Add comprehensive English comments to tests/tests.rs (#10) * Initial plan * Add comprehensive English comments to tests/tests.rs Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/7f28adf1-1a4a-49d1-bd99-627d65604cb6 Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> * aarch64: skip external function declarations in assembly codegen (#11) * Initial plan * Fix linker duplicate-symbol errors by skipping external function declarations in aarch64 codegen Agent-Logs-Url: https://github.com/hxuhack/teac/sessions/5fc2e1bd-cf6a-455a-add3-2ea52a82c97e Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: hxuhack <13302178+hxuhack@users.noreply.github.com> --- src/asm/aarch64.rs | 6 + src/ir/error.rs | 13 +++ src/ir/function.rs | 109 ++++++++++++++++++ src/ir/gen/conversions.rs | 56 +++++++++ src/ir/gen/function_gen.rs | 207 +++++++++++++++++++++++++++++++++ src/ir/gen/module_gen.rs | 230 ++++++++++++++++++++++++++++--------- src/ir/gen/static_eval.rs | 51 ++++++++ src/ir/module.rs | 36 +++++- src/main.rs | 12 +- tests/tests.rs | 164 +++++++++++++++++++++++++- 10 files changed, 825 insertions(+), 59 deletions(-) diff --git a/src/asm/aarch64.rs b/src/asm/aarch64.rs index 549dc40..0e5ce44 100644 --- a/src/asm/aarch64.rs +++ b/src/asm/aarch64.rs @@ -78,6 +78,12 @@ impl<'a> Generator for AArch64AsmGenerator<'a> { self.functions.clear(); for func in self.module.function_list.values() { + // Skip external declarations (blocks == None); they are provided by + // the linked object file (e.g. std.o) and must not be emitted as + // assembly symbols, otherwise the linker will report duplicate definitions. + if func.blocks.is_none() { + continue; + } self.functions .push(Self::handle_function(&layouts, func, self.target)?); } diff --git a/src/ir/error.rs b/src/ir/error.rs index 68d72a2..ea487d9 100644 --- a/src/ir/error.rs +++ b/src/ir/error.rs @@ -1,4 +1,5 @@ use crate::ast; +use std::path::PathBuf; use thiserror::Error; #[derive(Debug, Error)] @@ -6,6 +7,18 @@ pub enum Error { #[error("Initialization of structs not supported")] StructInitialization, + #[error("Module '{module_name}' not found: expected header file at '{}'", path.display())] + ModuleNotFound { + module_name: String, + path: PathBuf, + }, + + #[error("Failed to parse module '{module_name}': {message}")] + ModuleParseError { + module_name: String, + message: String, + }, + #[error("Division by zero in constant expression")] DivisionByZero, diff --git a/src/ir/function.rs b/src/ir/function.rs index 723a5e2..5fd87ab 100644 --- a/src/ir/function.rs +++ b/src/ir/function.rs @@ -1,3 +1,11 @@ +//! IR-level function representation and code generation for the `teac` compiler. +//! +//! This module defines the data structures used to represent functions in the +//! intermediate representation (IR), as well as [`FunctionGenerator`], a stateful +//! builder that translates source-level constructs into a flat list of IR statements +//! similar to LLVM IR. Each function is eventually assembled from basic blocks, each +//! of which contains a sequence of [`Stmt`] instructions. + use super::error::Error; use super::module::Registry; use super::stmt::{ArithBinOp, CmpPredicate, Stmt}; @@ -6,9 +14,15 @@ use super::value::{GlobalVariable, LocalVariable, Operand}; use indexmap::IndexMap; use std::fmt::{Display, Formatter}; +/// A label that identifies either a numbered basic block or a named function entry point. +/// +/// Labels are used as branch targets in control-flow instructions and as keys +/// when building a basic-block map during IR lowering. #[derive(Clone)] pub enum BlockLabel { + /// A numbered basic block label, displayed as `bb0`, `bb1`, etc. BasicBlock(usize), + /// A named function entry label, displayed as the function's identifier string. Function(String), } @@ -22,37 +36,83 @@ impl Display for BlockLabel { } impl BlockLabel { + /// Returns the string representation of this label, used as a map key. pub fn key(&self) -> String { format!("{}", self) } } +/// A basic block: an ordered sequence of IR statements preceded by a label. +/// +/// In well-formed IR each basic block ends with a terminator instruction +/// (jump, conditional jump, or return). #[derive(Clone)] pub struct BasicBlock { + /// The label that identifies this basic block and serves as a branch target. pub label: BlockLabel, + /// The ordered list of IR statements that form the body of this basic block. pub stmts: Vec, } +/// The final IR representation of a compiled function. +/// +/// This struct is produced after code generation is complete and holds all +/// information needed for subsequent optimization and assembly-emission passes. pub struct Function { + /// The source-level name of the function. pub identifier: String, + /// All local variables declared in the function, keyed by their source-level + /// name. `None` before code generation has finished populating the map. pub local_variables: Option>, + /// The basic blocks that make up the function body, in emission order. + /// `None` before the flat IR statement list has been split into blocks. pub blocks: Option>, + /// The function's formal parameters as a list of local variables. pub arguments: Vec, + /// The next available virtual register index; preserved so that further + /// passes can allocate new temporaries without colliding with existing ones. pub next_vreg: usize, } +/// Stateful builder used during IR generation to translate source-level constructs +/// into a flat list of IR statements for a single function. +/// +/// After all statements have been emitted the caller splits the flat `irs` list +/// into [`BasicBlock`]s and wraps them together with the remaining fields into a +/// [`Function`]. pub struct FunctionGenerator<'ir> { + /// Shared type registry containing struct and function type definitions for + /// the whole module. pub registry: &'ir Registry, + /// Reference to the module's global variable map, used during variable lookup. pub global_variables: &'ir IndexMap, + /// Map of currently visible local variables, keyed by their source-level name. + /// Variables are inserted when declared and removed when their enclosing scope + /// exits. pub local_variables: IndexMap, + /// A stack of scopes. Each entry is the list of local variable names introduced + /// in that scope, enabling bulk removal when the scope exits via [`exit_scope`]. + /// + /// [`exit_scope`]: FunctionGenerator::exit_scope scope_locals: Vec>, + /// The flat list of IR statements being accumulated for the current function. pub irs: Vec, + /// The function's formal parameters as local variables. pub arguments: Vec, + /// Counter for allocating unique virtual register indices; incremented by + /// [`alloc_vreg`]. + /// + /// [`alloc_vreg`]: FunctionGenerator::alloc_vreg pub next_vreg: usize, + /// Counter for allocating unique basic block label indices; starts at `1` + /// because index `0` is reserved for the implicit function-entry block. pub next_basic_block: usize, } impl<'ir> FunctionGenerator<'ir> { + /// Constructs a new [`FunctionGenerator`] with empty state, ready to build a + /// function body. Virtual register allocation starts at `0` and basic block + /// label allocation starts at `1`. pub fn new( registry: &'ir Registry, global_variables: &'ir IndexMap, @@ -69,22 +129,38 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Allocates and returns the next unique virtual register index, then advances + /// the internal counter. pub fn alloc_vreg(&mut self) -> usize { let idx = self.next_vreg; self.next_vreg += 1; idx } + /// Creates an unnamed temporary [`Operand`] of the given data type, backed by a + /// freshly allocated virtual register. pub fn alloc_temporary(&mut self, dtype: Dtype) -> Operand { Operand::from(LocalVariable::new(dtype, self.alloc_vreg(), None)) } + /// Allocates and returns a new unique [`BlockLabel::BasicBlock`] label, then + /// advances the internal counter. pub fn alloc_basic_block(&mut self) -> BlockLabel { let idx = self.next_basic_block; self.next_basic_block += 1; BlockLabel::BasicBlock(idx) } + /// Resolves a variable name to an [`Operand`]. + /// + /// Lookup order: + /// 1. Local variables in [`local_variables`] (innermost scope wins). + /// 2. Global variables in [`global_variables`]. + /// + /// Returns [`Error::VariableNotDefined`] if the name is not found in either map. + /// + /// [`local_variables`]: FunctionGenerator::local_variables + /// [`global_variables`]: FunctionGenerator::global_variables pub fn lookup_variable(&self, id: &str) -> Result { if let Some(local) = self.local_variables.get(id) { Ok(Operand::from(local)) @@ -97,10 +173,21 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Pushes a new empty lexical scope onto the scope stack. + /// + /// Call this before entering any block (e.g., `{` in the source language) so + /// that variables declared inside can be tracked and later removed by + /// [`exit_scope`]. + /// + /// [`exit_scope`]: FunctionGenerator::exit_scope pub fn enter_scope(&mut self) { self.scope_locals.push(Vec::new()); } + /// Pops the innermost lexical scope from the scope stack and removes all local + /// variables that were introduced in that scope from [`local_variables`]. + /// + /// [`local_variables`]: FunctionGenerator::local_variables pub fn exit_scope(&mut self) { if let Some(locals) = self.scope_locals.pop() { for id in locals { @@ -109,6 +196,11 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Registers a local variable name in the current (innermost) scope so that it + /// will be removed from [`local_variables`] when [`exit_scope`] is called. + /// + /// [`local_variables`]: FunctionGenerator::local_variables + /// [`exit_scope`]: FunctionGenerator::exit_scope pub fn record_scoped_local(&mut self, id: String) { if let Some(scope) = self.scope_locals.last_mut() { scope.push(id); @@ -117,46 +209,63 @@ impl<'ir> FunctionGenerator<'ir> { } impl FunctionGenerator<'_> { + /// Emits a stack-allocation (`alloca`) instruction that reserves space for `dst`. pub fn emit_alloca(&mut self, dst: Operand) { self.irs.push(Stmt::as_alloca(dst)); } + /// Emits a memory-load instruction that reads a value from `ptr` into `dst`. pub fn emit_load(&mut self, dst: Operand, ptr: Operand) { self.irs.push(Stmt::as_load(dst, ptr)); } + /// Emits a memory-store instruction that writes `src` to the address `ptr`. pub fn emit_store(&mut self, src: Operand, ptr: Operand) { self.irs.push(Stmt::as_store(src, ptr)); } + /// Emits a get-element-pointer (GEP) instruction that computes the address of + /// `base_ptr[index]` and stores it in `new_ptr`. pub fn emit_gep(&mut self, new_ptr: Operand, base_ptr: Operand, index: Operand) { self.irs.push(Stmt::as_gep(new_ptr, base_ptr, index)); } + /// Emits an arithmetic binary operation (`op`) on `left` and `right`, storing + /// the result in `dst`. pub fn emit_biop(&mut self, op: ArithBinOp, left: Operand, right: Operand, dst: Operand) { self.irs.push(Stmt::as_biop(op, left, right, dst)); } + /// Emits an integer comparison instruction using predicate `op` on `left` and + /// `right`, storing the boolean result in `dst`. pub fn emit_cmp(&mut self, op: CmpPredicate, left: Operand, right: Operand, dst: Operand) { self.irs.push(Stmt::as_cmp(op, left, right, dst)); } + /// Emits a conditional branch instruction that jumps to `true_label` when `cond` + /// is non-zero and to `false_label` otherwise. pub fn emit_cjump(&mut self, cond: Operand, true_label: BlockLabel, false_label: BlockLabel) { self.irs.push(Stmt::as_cjump(cond, true_label, false_label)); } + /// Emits an unconditional branch instruction that transfers control to `target`. pub fn emit_jump(&mut self, target: BlockLabel) { self.irs.push(Stmt::as_jump(target)); } + /// Emits a basic block label marker, signalling the start of a new basic block + /// identified by `label`. pub fn emit_label(&mut self, label: BlockLabel) { self.irs.push(Stmt::as_label(label)); } + /// Emits a function-call instruction that invokes `func_name` with `args`, + /// optionally storing the return value in `result`. pub fn emit_call(&mut self, func_name: String, result: Option, args: Vec) { self.irs.push(Stmt::as_call(func_name, result, args)); } + /// Emits a return instruction, optionally carrying a return value `val`. pub fn emit_return(&mut self, val: Option) { self.irs.push(Stmt::as_return(val)); } diff --git a/src/ir/gen/conversions.rs b/src/ir/gen/conversions.rs index f87b46a..94b1fb4 100644 --- a/src/ir/gen/conversions.rs +++ b/src/ir/gen/conversions.rs @@ -1,7 +1,22 @@ +//! Conversions between AST types and IR types. +//! +//! This module provides trait implementations and helper functions to convert +//! AST-level type representations (`ast::TypeSpecifier`, `ast::VarDecl`, +//! `ast::VarDef`, `ast::VarDeclStmt`) into their corresponding IR-level +//! data types (`Dtype`), as well as implementations of the `Named` trait +//! for extracting identifiers from AST declaration nodes. + use crate::ast; use crate::ir::types::Dtype; use crate::ir::value::Named; +/// Converts an optional AST type specifier into the corresponding base IR data type (`Dtype`). +/// +/// - `Composite` type specifiers (e.g., user-defined structs) map to `Dtype::Struct`. +/// - `Reference` type specifiers (e.g., `&[T]`) map to a pointer to an unsized array, +/// where the element type is resolved recursively. +/// - `BuiltIn` type specifiers (e.g., `i32`) and `None` (absent specifier) both default +/// to `Dtype::I32`. fn base_dtype(type_specifier: &Option) -> Dtype { match type_specifier.as_ref().map(|t| &t.inner) { Some(ast::TypeSpecifierInner::Composite(name)) => Dtype::Struct { @@ -15,18 +30,29 @@ fn base_dtype(type_specifier: &Option) -> Dtype { } } +// --- `Named` trait implementations --- +// These allow AST declaration nodes to expose their identifier strings +// in a uniform way, which is used during IR generation to name variables. + +/// Implements `Named` for a variable declaration (without initializer), +/// returning the declared identifier. impl Named for ast::VarDecl { fn identifier(&self) -> Option { Some(self.identifier.clone()) } } +/// Implements `Named` for a variable definition (declaration with initializer), +/// returning the defined identifier. impl Named for ast::VarDef { fn identifier(&self) -> Option { Some(self.identifier.clone()) } } +/// Implements `Named` for a variable declaration statement, which may be +/// either a pure declaration or a definition. Delegates to the inner variant +/// to retrieve the identifier. impl Named for ast::VarDeclStmt { fn identifier(&self) -> Option { match &self.inner { @@ -36,12 +62,23 @@ impl Named for ast::VarDeclStmt { } } +// --- `From` trait implementations: AST TypeSpecifier -> IR Dtype --- +// These provide infallible conversions from AST type specifiers to IR types. + +/// Converts an owned `ast::TypeSpecifier` into a `Dtype` by delegating to the +/// by-reference implementation. impl From for Dtype { fn from(a: ast::TypeSpecifier) -> Self { Self::from(&a) } } +/// Converts a reference to an `ast::TypeSpecifier` into the corresponding `Dtype`. +/// +/// - `BuiltIn` maps to `Dtype::I32` (the only built-in type is `i32`). +/// - `Composite` maps to `Dtype::Struct` with the user-defined type name. +/// - `Reference` maps to a pointer to an unsized array whose element type +/// is recursively converted from the inner type specifier. impl From<&ast::TypeSpecifier> for Dtype { fn from(a: &ast::TypeSpecifier) -> Self { match &a.inner { @@ -57,6 +94,15 @@ impl From<&ast::TypeSpecifier> for Dtype { } } +// --- `TryFrom` trait implementations: AST declarations -> IR Dtype --- +// These are fallible conversions because certain combinations (e.g., struct +// definitions with initializers) are not supported and produce an error. + +/// Converts a variable declaration (`VarDecl`) to its IR data type. +/// +/// First resolves the base type from the optional type specifier, then wraps it +/// in an array type if the declaration is for an array (with a known length), +/// or returns the base type directly for scalar declarations. impl TryFrom<&ast::VarDecl> for Dtype { type Error = crate::ir::Error; @@ -69,10 +115,16 @@ impl TryFrom<&ast::VarDecl> for Dtype { } } +/// Converts a variable definition (`VarDef`) to its IR data type. +/// +/// Similar to the `VarDecl` conversion, but additionally rejects struct types +/// with initializers—struct variables cannot be initialized inline, so +/// attempting to do so returns `Error::StructInitialization`. impl TryFrom<&ast::VarDef> for Dtype { type Error = crate::ir::Error; fn try_from(def: &ast::VarDef) -> Result { + // Struct types cannot have inline initializers; reject early. if let Dtype::Struct { .. } = &base_dtype(&def.type_specifier) { return Err(crate::ir::Error::StructInitialization); } @@ -84,6 +136,10 @@ impl TryFrom<&ast::VarDef> for Dtype { } } +/// Converts a variable declaration statement (`VarDeclStmt`) to its IR data type. +/// +/// Delegates to the `TryFrom<&VarDecl>` or `TryFrom<&VarDef>` implementation +/// depending on whether the statement is a pure declaration or a definition. impl TryFrom<&ast::VarDeclStmt> for Dtype { type Error = crate::ir::Error; diff --git a/src/ir/gen/function_gen.rs b/src/ir/gen/function_gen.rs index aab972e..7d2ba84 100644 --- a/src/ir/gen/function_gen.rs +++ b/src/ir/gen/function_gen.rs @@ -1,3 +1,11 @@ +//! IR generation for function bodies. +//! +//! This module translates an AST function definition ([`ast::FnDef`]) into a +//! flat sequence of IR statements by walking each statement and expression node. +//! It handles local variable allocation, control-flow (if / while / break / +//! continue), arithmetic and boolean expressions, array and struct member +//! access, and function calls. + use crate::ast::{self, ArrayInitializer, AssignmentStmt, RightValList}; use crate::ir::function::{BlockLabel, FunctionGenerator}; use crate::ir::stmt::{ArithBinOp, CmpPredicate, StmtInner}; @@ -5,12 +13,29 @@ use crate::ir::types::Dtype; use crate::ir::value::{LocalVariable, Operand}; use crate::ir::Error; +/// Describes how stack storage for a local variable should be handled during IR generation. enum LocalStoragePlan { + /// Storage is deferred: the variable's type will be inferred from its first assignment. Deferred, + /// Emit an `alloca` instruction immediately for the given element type. Alloca(Dtype), } +// ── Function entry-point generation ────────────────────────────────────────── + impl<'ir> FunctionGenerator<'ir> { + /// Generates IR for a complete function definition. + /// + /// Emits the function entry label, allocates stack slots for every argument + /// (alloca + store pattern), lowers each statement in the body, and appends + /// an implicit return if the last instruction is not already a `return`. + /// + /// # Parameters + /// - `from`: the AST node for the function being compiled. + /// + /// # Errors + /// Returns an error if the function is not registered in the type registry, + /// an argument name is redefined, or the return type is unsupported. pub fn generate(&mut self, from: &ast::FnDef) -> Result<(), Error> { let identifier = &from.fn_decl.identifier; let function_type = self @@ -23,30 +48,37 @@ impl<'ir> FunctionGenerator<'ir> { let arguments = function_type.arguments.clone(); let return_dtype = function_type.return_dtype.clone(); + // Emit the function entry label. self.emit_label(BlockLabel::Function(identifier.clone())); + // Spill every argument to the stack (alloca + store) so they are addressable. for (id, dtype) in arguments.iter() { if self.local_variables.contains_key(id) { return Err(Error::VariableRedefinition { symbol: id.clone() }); } + // Allocate a virtual register that carries the incoming argument value. let var = LocalVariable::new(dtype.clone(), self.alloc_vreg(), Some(id.to_string())); self.arguments.push(var.clone()); + // Allocate a stack slot (pointer to the argument type) for the argument. let alloca_var = LocalVariable::new( Dtype::ptr_to(dtype.clone()), self.alloc_vreg(), Some(id.to_string()), ); self.emit_alloca(Operand::from(alloca_var.clone())); + // Store the incoming value into the newly allocated stack slot. self.emit_store(Operand::from(var), Operand::from(alloca_var.clone())); self.local_variables.insert(id.clone(), alloca_var); } + // Lower the function body statement by statement. for stmt in from.stmts.iter() { self.handle_block(stmt, None, None)?; } + // Append an implicit return if the last instruction is not already a return. if let Some(stmt) = self.irs.last() { if !matches!(stmt.inner, StmtInner::Return(_)) { match &return_dtype { @@ -65,7 +97,14 @@ impl<'ir> FunctionGenerator<'ir> { } } +// ── Statement handlers ──────────────────────────────────────────────────────── + impl<'ir> FunctionGenerator<'ir> { + /// Dispatches a single code-block statement to the appropriate handler. + /// + /// `con_label` and `bre_label` are the jump targets for `continue` and + /// `break` inside the current loop, respectively. Both are `None` when the + /// statement is not nested inside a loop. pub fn handle_block( &mut self, stmt: &ast::CodeBlockStmt, @@ -88,15 +127,23 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Lowers an assignment statement (`left = right`). + /// + /// If the left-hand side resolves to an `Undecided` type (a bare identifier + /// that has not been declared yet), a new stack slot is allocated with the + /// type inferred from the right-hand side, and the variable is registered as + /// a new scoped local. pub fn handle_assignment_stmt(&mut self, stmt: &AssignmentStmt) -> Result<(), Error> { let mut left = self.handle_left_val(&stmt.left_val)?; let right = self.handle_right_val(&stmt.right_val)?; + // Left side has no concrete type yet — allocate a slot and register as a new local. if left.dtype() == &Dtype::Undecided { let left_name = match &stmt.left_val.inner { ast::LeftValInner::Id(id) => Some(id.clone()), _ => None, }; + // Infer the stack-slot type from the right-hand-side value. let right_type = right.dtype(); let local_val = LocalVariable::new( Dtype::ptr_to(right_type.clone()), @@ -113,10 +160,16 @@ impl<'ir> FunctionGenerator<'ir> { } } + // Write the right-hand-side value into the destination slot. self.emit_store(right, left); Ok(()) } + /// Inserts a local variable into the current scope's symbol table. + /// + /// Records the identifier for scope-exit cleanup via [`record_scoped_local`]. + /// Returns `VariableRedefinition` if a variable with the same name already + /// exists in the symbol table. fn insert_scoped_local( &mut self, identifier: &str, @@ -135,6 +188,9 @@ impl<'ir> FunctionGenerator<'ir> { Ok(()) } + /// Creates a new pointer-typed local variable and emits an `alloca` for it. + /// + /// Returns the resulting [`LocalVariable`] whose type is `*pointee`. fn allocate_pointer_local(&mut self, identifier: &str, pointee: Dtype) -> LocalVariable { let variable = LocalVariable::new( Dtype::ptr_to(pointee), @@ -145,6 +201,9 @@ impl<'ir> FunctionGenerator<'ir> { variable } + /// Allocates stack space for a scalar local and initializes it with `right_val`. + /// + /// Combines [`allocate_pointer_local`] with an immediate `store` instruction. fn define_scalar_local( &mut self, identifier: &str, @@ -156,6 +215,10 @@ impl<'ir> FunctionGenerator<'ir> { local } + /// Determines the storage strategy for a variable *declaration* (no initialiser). + /// + /// Returns `Deferred` for untyped scalars (type to be resolved at first + /// assignment), and `Alloca` for typed scalars and all arrays. fn plan_local_decl_storage(decl: &ast::VarDecl) -> Result { let dtype = decl.type_specifier.as_ref().map(Dtype::from); match (&decl.inner, dtype.as_ref()) { @@ -183,6 +246,10 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Determines the storage strategy for a scalar variable *definition*. + /// + /// Returns `Deferred` when no explicit type annotation is present so that + /// the type is inferred from the initialiser expression. fn plan_local_scalar_def_storage(dtype: &Option) -> Result { match dtype.as_ref() { None => Ok(LocalStoragePlan::Deferred), @@ -194,6 +261,10 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Returns the concrete array [`Dtype`] for an array variable definition. + /// + /// Only `i32` element arrays are currently supported; other element types + /// return `LocalVarDefinitionUnsupported`. fn plan_local_array_def_storage(dtype: &Option, len: usize) -> Result { match dtype.as_ref() { None | Some(Dtype::I32) => Ok(Dtype::array_of(Dtype::I32, len)), @@ -201,6 +272,10 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Lowers a local variable declaration (without an initialiser). + /// + /// Allocates storage according to [`plan_local_decl_storage`] and registers + /// the variable in the current scope. pub fn handle_local_var_decl(&mut self, decl: &ast::VarDecl) -> Result<(), Error> { let identifier = decl.identifier.as_str(); let variable = match Self::plan_local_decl_storage(decl)? { @@ -214,6 +289,14 @@ impl<'ir> FunctionGenerator<'ir> { self.insert_scoped_local(identifier, variable) } + /// Stores a flat list of values into a stack-allocated array. + /// + /// For each value, computes an element pointer via GEP using the value's + /// position as the index and emits a `store` instruction. + /// + /// # Parameters + /// - `base_ptr`: operand pointing to the first element of the array. + /// - `vals`: list of right-hand-side values to store sequentially. pub fn init_array(&mut self, base_ptr: Operand, vals: &RightValList) -> Result<(), Error> { for (i, val) in vals.iter().enumerate() { let element_ptr = self.alloc_temporary(Dtype::ptr_to(Dtype::I32)); @@ -229,6 +312,11 @@ impl<'ir> FunctionGenerator<'ir> { Ok(()) } + /// Initializes an array from an [`ArrayInitializer`]. + /// + /// Delegates to [`init_array`] for explicit element lists. For fill + /// initializers, evaluates the fill value once and repeats the store for + /// every index up to `count`. pub fn init_array_from( &mut self, base_ptr: Operand, @@ -252,6 +340,10 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Lowers a local variable definition (declaration with an initializer). + /// + /// Handles both scalar and array initializers, then registers the resulting + /// local variable in the current scope. pub fn handle_local_var_def(&mut self, def: &ast::VarDef) -> Result<(), Error> { let identifier = def.identifier.as_str(); let dtype = def.type_specifier.as_ref().map(Dtype::from); @@ -279,6 +371,10 @@ impl<'ir> FunctionGenerator<'ir> { self.insert_scoped_local(identifier, variable) } + /// Lowers a standalone function call statement. + /// + /// Evaluates each argument, allocates a temporary for a non-void return + /// value (which is subsequently discarded), and emits the `call` instruction. pub fn handle_call_stmt(&mut self, stmt: &ast::CallStmt) -> Result<(), Error> { let function_name = stmt.fn_call.qualified_name(); let mut args = Vec::new(); @@ -305,6 +401,15 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Lowers an `if` / `else` statement into branching IR. + /// + /// Allocates three basic blocks (`true_label`, `false_label`, `after_label`) + /// and emits a conditional branch on the boolean condition. Both the + /// then-branch and the (possibly absent) else-branch jump to `after_label` + /// when they finish. + /// + /// `con_label` and `bre_label` are threaded through to nested statements so + /// that `continue` / `break` inside the branches target the correct loop. pub fn handle_if_stmt( &mut self, stmt: &ast::IfStmt, @@ -315,16 +420,20 @@ impl<'ir> FunctionGenerator<'ir> { let false_label = self.alloc_basic_block(); let after_label = self.alloc_basic_block(); + // Evaluate the condition; jump to the appropriate branch. self.handle_bool_unit(&stmt.bool_unit, true_label.clone(), false_label.clone())?; + // Emit the then-branch; a new scope is opened so that any locals are cleaned up. self.emit_label(true_label); self.enter_scope(); for s in stmt.if_stmts.iter() { self.handle_block(s, con_label.clone(), bre_label.clone())?; } self.exit_scope(); + // Jump past the else-branch to the merge point. self.emit_jump(after_label.clone()); + // Emit the (possibly absent) else-branch in its own scope. self.emit_label(false_label); self.enter_scope(); if let Some(else_stmts) = &stmt.else_stmts { @@ -335,33 +444,51 @@ impl<'ir> FunctionGenerator<'ir> { self.exit_scope(); self.emit_jump(after_label.clone()); + // Merge point reached by both branches. self.emit_label(after_label); Ok(()) } + /// Lowers a `while` loop into branching IR. + /// + /// Structure: + /// ```text + /// entry → test_label ←── back-edge + /// ↓ true ↓ false + /// true_label false_label + /// ``` + /// `continue` inside the body targets `test_label`; `break` targets `false_label`. pub fn handle_while_stmt(&mut self, stmt: &ast::WhileStmt) -> Result<(), Error> { let test_label = self.alloc_basic_block(); let true_label = self.alloc_basic_block(); let false_label = self.alloc_basic_block(); + // Jump unconditionally into the loop test from the predecessor block. self.emit_jump(test_label.clone()); + // Emit the loop condition test. self.emit_label(test_label.clone()); self.handle_bool_unit(&stmt.bool_unit, true_label.clone(), false_label.clone())?; + // Loop body; `continue` → test_label, `break` → false_label. self.emit_label(true_label); self.enter_scope(); for s in stmt.stmts.iter() { self.handle_block(s, Some(test_label.clone()), Some(false_label.clone()))?; } self.exit_scope(); + // Back-edge: jump back to the loop condition. self.emit_jump(test_label); self.emit_label(false_label); Ok(()) } + /// Lowers a `return` statement. + /// + /// Emits a void `return` when no value is present, or evaluates the return + /// expression and emits a value-carrying `return` otherwise. pub fn handle_return_stmt(&mut self, stmt: &ast::ReturnStmt) -> Result<(), Error> { match &stmt.val { None => { @@ -375,12 +502,18 @@ impl<'ir> FunctionGenerator<'ir> { Ok(()) } + /// Lowers a `continue` statement by jumping to the enclosing loop's test label. + /// + /// Returns `InvalidContinueInst` if called outside of a loop context. pub fn handle_continue_stmt(&mut self, con_label: Option) -> Result<(), Error> { let label = con_label.ok_or(Error::InvalidContinueInst)?; self.emit_jump(label); Ok(()) } + /// Lowers a `break` statement by jumping to the enclosing loop's exit label. + /// + /// Returns `InvalidBreakInst` if called outside of a loop context. pub fn handle_break_stmt(&mut self, bre_label: Option) -> Result<(), Error> { let label = bre_label.ok_or(Error::InvalidBreakInst)?; self.emit_jump(label); @@ -388,7 +521,13 @@ impl<'ir> FunctionGenerator<'ir> { } } +// ── Expression and value handlers ───────────────────────────────────────────── + impl<'ir> FunctionGenerator<'ir> { + /// Lowers a comparison expression into a conditional branch. + /// + /// Emits a `cmp` instruction (result type `i1`) followed by a conditional + /// jump to `true_label` or `false_label`. fn handle_com_op_expr( &mut self, expr: &ast::ComExpr, @@ -410,11 +549,17 @@ impl<'ir> FunctionGenerator<'ir> { Ok(()) } + /// Lowers a single expression unit to an [`Operand`]. + /// + /// After resolving the unit's inner form, performs an implicit load for + /// addressable scalar pointers and for global `i32` values, so the caller + /// always receives a value-typed operand rather than a pointer. fn handle_expr_unit(&mut self, unit: &ast::ExprUnit) -> Result { let operand = match &unit.inner { ast::ExprUnitInner::Num(num) => Ok(Operand::from(*num)), ast::ExprUnitInner::Id(id) => { let op = self.lookup_variable(id)?; + // Arrays cannot be used directly as scalar values. let is_array = matches!( op.dtype(), Dtype::Pointer { pointee } if matches!(pointee.as_ref(), Dtype::Array { .. }) @@ -462,6 +607,7 @@ impl<'ir> FunctionGenerator<'ir> { }?; Ok(match operand.dtype() { + // Auto-load: dereference addressable scalar pointers (but leave arrays/structs as-is). Dtype::Pointer { pointee } if operand.is_addressable() && !matches!(pointee.as_ref(), Dtype::Array { .. } | Dtype::Struct { .. }) => @@ -470,6 +616,7 @@ impl<'ir> FunctionGenerator<'ir> { self.emit_load(dst.clone(), operand); dst } + // Auto-load global i32 values which are stored behind a pointer. Dtype::I32 if matches!(&operand, Operand::Global(_)) => { let dst = self.alloc_temporary(Dtype::I32); self.emit_load(dst.clone(), operand); @@ -479,6 +626,10 @@ impl<'ir> FunctionGenerator<'ir> { }) } + /// Lowers a reference expression (`&id`) to a pointer to the array's first element. + /// + /// The variable must be (or point to) an array; emits a GEP with index 0 to + /// yield a `*[element_type; ?]` operand. fn handle_reference_expr(&mut self, id: &str) -> Result { let operand = self.lookup_variable(id)?; let element_type = match operand.dtype() { @@ -505,6 +656,7 @@ impl<'ir> FunctionGenerator<'ir> { Ok(target) } + /// Lowers an arithmetic expression (binary operation or a single unit). fn handle_arith_expr(&mut self, expr: &ast::ArithExpr) -> Result { match &expr.inner { ast::ArithExprInner::ArithBiOpExpr(expr) => self.handle_arith_biop_expr(expr), @@ -512,6 +664,7 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Lowers a right-hand-side value (arithmetic or boolean expression) to an [`Operand`]. fn handle_right_val(&mut self, val: &ast::RightVal) -> Result { match &val.inner { ast::RightValInner::ArithExpr(expr) => self.handle_arith_expr(expr), @@ -519,9 +672,15 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Lowers an array element access expression (`arr[idx]`) to an element pointer. + /// + /// Loads the base pointer if it is itself pointer-typed (e.g., a parameter + /// passed as a pointer-to-pointer), then computes the element address via GEP. fn handle_array_expr(&mut self, expr: &ast::ArrayExpr) -> Result { let arr = self.handle_left_val(&expr.arr)?; + // If the array is accessed through a pointer-to-pointer (e.g., a function parameter + // holding a pointer to an array), load the inner pointer first. let (arr, arr_dtype) = match arr.dtype() { Dtype::Pointer { pointee } if matches!(pointee.as_ref(), Dtype::Pointer { .. }) => { let loaded = self.alloc_temporary(pointee.as_ref().clone()); @@ -550,6 +709,10 @@ impl<'ir> FunctionGenerator<'ir> { Ok(target) } + /// Lowers a struct member access expression (`s.member`) to a member pointer. + /// + /// Looks up the struct type in the registry, finds the member's byte offset, + /// and emits a GEP to yield a pointer to that member. fn handle_member_expr(&mut self, expr: &ast::MemberExpr) -> Result { let s = self.handle_left_val(&expr.struct_id)?; @@ -583,6 +746,10 @@ impl<'ir> FunctionGenerator<'ir> { Ok(target) } + /// Resolves a left-hand-side value to an addressable [`Operand`] (a pointer). + /// + /// For a simple identifier, looks up the symbol; for array and member + /// expressions, delegates to the respective handlers. fn handle_left_val(&mut self, val: &ast::LeftVal) -> Result { match &val.inner { ast::LeftValInner::Id(id) => self.lookup_variable(id), @@ -591,6 +758,7 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Lowers a binary arithmetic expression (`left op right`) to an `i32` temporary. fn handle_arith_biop_expr(&mut self, expr: &ast::ArithBiOpExpr) -> Result { let left = self.handle_arith_expr(&expr.left)?; let right = self.handle_arith_expr(&expr.right)?; @@ -599,6 +767,10 @@ impl<'ir> FunctionGenerator<'ir> { Ok(dst) } + /// Lowers an array index expression to an `i32` operand. + /// + /// Variable indices are loaded from their stack slot; numeric literals are + /// returned directly as immediate operands. fn handle_index_expr(&mut self, expr: &ast::IndexExpr) -> Result { match &expr.inner { ast::IndexExprInner::Id(id) => { @@ -612,15 +784,24 @@ impl<'ir> FunctionGenerator<'ir> { } } +// ── Boolean expression handlers ─────────────────────────────────────────────── + impl<'ir> FunctionGenerator<'ir> { + /// Lowers a boolean expression to a materialized `i32` value (0 or 1). + /// + /// Allocates a temporary `i32` stack slot, evaluates the expression as a + /// branch (writing 1 on the true path and 0 on the false path via + /// [`emit_bool_materialization`]), then loads and returns the result. fn handle_bool_expr_as_value(&mut self, expr: &ast::BoolExpr) -> Result { let true_label = self.alloc_basic_block(); let false_label = self.alloc_basic_block(); let after_label = self.alloc_basic_block(); + // Allocate stack storage for the materialised boolean result. let bool_evaluated = self.alloc_temporary(Dtype::ptr_to(Dtype::I32)); self.emit_alloca(bool_evaluated.clone()); + // Branch-based evaluation; result is written into bool_evaluated. self.handle_bool_expr_as_branch(expr, true_label.clone(), false_label.clone())?; self.emit_bool_materialization( true_label, @@ -629,12 +810,17 @@ impl<'ir> FunctionGenerator<'ir> { bool_evaluated.clone(), ); + // Load the materialised 0/1 value back into a register. let loaded = self.alloc_temporary(Dtype::I32); self.emit_load(loaded.clone(), bool_evaluated); Ok(loaded) } + /// Lowers a boolean expression as a branching construct. + /// + /// Jumps to `true_label` if the expression evaluates to true, or to + /// `false_label` otherwise. fn handle_bool_expr_as_branch( &mut self, expr: &ast::BoolExpr, @@ -651,6 +837,12 @@ impl<'ir> FunctionGenerator<'ir> { } } + /// Emits the true/false branches that write an integer 0 or 1 into `bool_ptr`. + /// + /// - True path: stores 1 and jumps to `after_label`. + /// - False path: stores 0 and jumps to `after_label`. + /// + /// Finishes by emitting `after_label` as the merge point. fn emit_bool_materialization( &mut self, true_label: BlockLabel, @@ -658,10 +850,12 @@ impl<'ir> FunctionGenerator<'ir> { after_label: BlockLabel, bool_ptr: Operand, ) { + // True path: store 1 and jump to the merge point. self.emit_label(true_label); self.emit_store(Operand::from(1), bool_ptr.clone()); self.emit_jump(after_label.clone()); + // False path: store 0 and jump to the merge point. self.emit_label(false_label); self.emit_store(Operand::from(0), bool_ptr); self.emit_jump(after_label.clone()); @@ -669,6 +863,13 @@ impl<'ir> FunctionGenerator<'ir> { self.emit_label(after_label); } + /// Lowers a binary boolean expression (`&&` or `||`) using short-circuit evaluation. + /// + /// For `&&`: evaluate the left operand; jump to `false_label` immediately if + /// false, otherwise fall through to evaluate the right operand. + /// + /// For `||`: evaluate the left operand; jump to `true_label` immediately if + /// true, otherwise fall through to evaluate the right operand. fn handle_bool_biop_expr( &mut self, expr: &ast::BoolBiOpExpr, @@ -678,6 +879,7 @@ impl<'ir> FunctionGenerator<'ir> { let eval_right_label = self.alloc_basic_block(); match &expr.op { ast::BoolBiOp::And => { + // Short-circuit AND: only evaluate the right side if the left side is true. self.handle_bool_expr_as_branch( &expr.left, eval_right_label.clone(), @@ -688,6 +890,7 @@ impl<'ir> FunctionGenerator<'ir> { self.handle_bool_expr_as_branch(&expr.right, true_label, false_label)?; } ast::BoolBiOp::Or => { + // Short-circuit OR: only evaluate the right side if the left side is false. self.handle_bool_expr_as_branch( &expr.left, true_label.clone(), @@ -701,6 +904,10 @@ impl<'ir> FunctionGenerator<'ir> { Ok(()) } + /// Lowers a boolean unit (comparison, sub-expression, or negation) as a branch. + /// + /// For a negation (`!expr`), the true and false labels are swapped so that + /// the inner expression's result is inverted. fn handle_bool_unit( &mut self, unit: &ast::BoolUnit, diff --git a/src/ir/gen/module_gen.rs b/src/ir/gen/module_gen.rs index 163637a..475675d 100644 --- a/src/ir/gen/module_gen.rs +++ b/src/ir/gen/module_gen.rs @@ -1,3 +1,12 @@ +//! IR generation from the AST at the module (translation-unit) level. +//! +//! This module implements the [`Generator`] trait for [`IrGenerator`], driving +//! the full compilation pipeline from a parsed AST program to a populated IR +//! module ready for emission. The implementation also contains a collection +//! of private helper methods on `IrGenerator` that handle each category of +//! top-level AST node (use statements, global variable declarations, function +//! declarations, function definitions, and struct definitions). + use crate::ast; use crate::ir::function::{BasicBlock, BlockLabel, Function, FunctionGenerator}; use crate::ir::module::IrGenerator; @@ -9,18 +18,35 @@ use crate::ir::Error; use crate::common::Generator; use crate::ir::value::Named; +use std::fs; use std::io::Write; +/// Implements the two-phase `Generator` trait for the module-level IR generator. impl<'a> Generator for IrGenerator<'a> { type Error = Error; + /// Drive the three-pass IR generation pipeline for the whole program: + /// + /// 1. **Use-statement pass** — process every `use` statement so that + /// external symbols (e.g. the standard library) are pre-registered + /// before any declarations reference them. + /// 2. **Declaration/definition registration pass** — iterate over all + /// top-level elements and register global variables, function + /// declarations, function definitions (signature only), and struct + /// definitions into the module and type registry. + /// 3. **Function body generation pass** — for each function definition, + /// invoke the `FunctionGenerator` to emit flat IR statements, then + /// convert those statements into structured basic blocks via + /// `harvest_function_irs`, and store the result back into the module. fn generate(&mut self) -> Result<(), Error> { let input = self.input; + // Pass 1: handle `use` statements so imported symbols are available. for use_stmt in input.use_stmts.iter() { self.handle_use_stmt(use_stmt)?; } + // Pass 2: register all declarations and definitions (signatures only). for elem in input.elements.iter() { use ast::ProgramElementInner::*; match &elem.inner { @@ -31,21 +57,27 @@ impl<'a> Generator for IrGenerator<'a> { } } + // Pass 3: generate IR bodies for every function definition. for elem in input.elements.iter() { use ast::ProgramElementInner::*; if let FnDef(fn_def) = &elem.inner { + // Use a scoped FunctionGenerator so its temporary state is + // dropped before we mutably borrow `self.module` below. let (next_vreg, blocks, local_variables, arguments) = { let mut function_generator = FunctionGenerator::new(&self.registry, &self.module.global_list); function_generator.generate(fn_def)?; let next_vreg = function_generator.next_vreg; + // Convert the flat IR statement list into basic blocks. let blocks = Self::harvest_function_irs(function_generator.irs); let local_variables = function_generator.local_variables; let arguments = function_generator.arguments; (next_vreg, blocks, local_variables, arguments) }; + // Look up the Function entry that was created during pass 2 + // and populate it with the generated body. let func = self .module .function_list @@ -67,21 +99,34 @@ impl<'a> Generator for IrGenerator<'a> { Ok(()) } + /// Emit the complete IR module to the provided writer in textual form. + /// + /// The output is structured as follows: + /// 1. **Header** — target triple and data-layout string. + /// 2. **Struct type definitions** — one line per registered struct type. + /// 3. **Global variables** — all global variable declarations/definitions. + /// 4. **Functions** — for each function, either a full definition (if a + /// body is present) or an external declaration (if body is absent). fn output(&self, w: &mut W) -> Result<(), Error> { let mut printer = IrPrinter::new(w); + // Emit the LLVM-style target triple and data layout header. printer.emit_header(Self::TARGET_TRIPLE, Self::TARGET_DATALAYOUT)?; + // Emit all struct type definitions collected during IR generation. for (name, st) in self.registry.struct_types.iter() { printer.emit_struct_type(name, st)?; } printer.emit_newline()?; + // Emit all global variable declarations and definitions. for global in self.module.global_list.values() { printer.emit_global(global)?; } printer.emit_newline()?; + // Emit each function — as a definition if it has a body, or as an + // external declaration otherwise. for func in self.module.function_list.values() { let func_type = self .registry @@ -101,67 +146,95 @@ impl<'a> Generator for IrGenerator<'a> { } } +/// Private helper methods on `IrGenerator` for each category of top-level AST node. impl<'a> IrGenerator<'a> { + /// Process a single `use` statement from the source program. + /// + /// Resolves the path to `.teah` relative to the source + /// file's directory (`self.source_dir`), reads and parses it using + /// the existing [`crate::parser::Parser`], and registers every + /// `fn` declaration found in that header into the type registry + /// and module function list. + /// + /// The function name stored in the registry is qualified with the + /// module prefix — e.g. a declaration `fn getint() -> i32;` in + /// `std.teah` is registered as `"std::getint"` — to match the + /// `std::getint()` call-site syntax used in TeaLang source files. + /// + /// # Errors + /// + /// Returns [`Error::ModuleNotFound`] if the `.teah` file does not + /// exist, [`Error::ModuleParseError`] if it cannot be parsed, and + /// propagates any [`Error::Io`] encountered while reading the file. fn handle_use_stmt(&mut self, use_stmt: &ast::UseStmt) -> Result<(), Error> { - if use_stmt.module_name == "std" { - self.register_std_functions()?; + let module_name = &use_stmt.module_name; + let header_path = self.source_dir.join(format!("{module_name}.teah")); + + if !header_path.exists() { + return Err(Error::ModuleNotFound { + module_name: module_name.clone(), + path: header_path, + }); } - Ok(()) - } - fn register_std_functions(&mut self) -> Result<(), Error> { - let std_functions = vec![ - ("std::getint", vec![], Dtype::I32), - ("std::getch", vec![], Dtype::I32), - ( - "std::putint", - vec![("a".to_string(), Dtype::I32)], - Dtype::Void, - ), - ( - "std::putch", - vec![("a".to_string(), Dtype::I32)], - Dtype::Void, - ), - ( - "std::timer_start", - vec![("lineno".to_string(), Dtype::I32)], - Dtype::Void, - ), - ( - "std::timer_stop", - vec![("lineno".to_string(), Dtype::I32)], - Dtype::Void, - ), - ( - "std::putarray", - vec![ - ("n".to_string(), Dtype::I32), - ( - "a".to_string(), - Dtype::ptr_to(Dtype::Array { - element: Box::new(Dtype::I32), - length: None, - }), - ), - ], - Dtype::Void, - ), - ]; - - for (name, arguments, return_dtype) in std_functions { - self.registry.function_types.insert( - name.to_string(), - FunctionType { - return_dtype, - arguments, - }, - ); + let source = fs::read_to_string(&header_path)?; + let mut parser = crate::parser::Parser::new(&source); + parser.generate().map_err(|e| Error::ModuleParseError { + module_name: module_name.clone(), + message: e.to_string(), + })?; + + if let Some(program) = parser.program { + for elem in program.elements.iter() { + // Header files are expected to contain only `fn` declarations. + // Other element kinds (global variables, struct definitions, + // function bodies) are not valid in a `.teah` file and are + // silently skipped. + if let ast::ProgramElementInner::FnDeclStmt(fn_decl_stmt) = &elem.inner { + // Qualify the function name with the module prefix so that + // call sites such as `std::getint()` resolve correctly. + let mut prefixed_decl = fn_decl_stmt.fn_decl.as_ref().clone(); + prefixed_decl.identifier = + format!("{module_name}::{}", prefixed_decl.identifier); + self.handle_fn_decl(&prefixed_decl)?; + } + } } Ok(()) } + /// Convert a flat list of IR statements produced by `FunctionGenerator` + /// into a list of [`BasicBlock`]s. + /// + /// ## Basic-block construction + /// + /// The flat statement list uses `Label` pseudo-instructions as block + /// boundaries. This function walks the list and starts a new basic block + /// each time it encounters a `Label`. When the *next* label is seen (or + /// the end of the list is reached) the accumulated statements are flushed + /// into the current block. + /// + /// Statements that appear before the first label, or after a terminator + /// (`Return`, `CJump`, or `Jump`) within the same block, are **dead code** + /// and are silently dropped. The `terminated` flag tracks whether the + /// current block has already received a terminator so that subsequent + /// statements can be skipped until the next label is seen. + /// + /// ## Alloca hoisting + /// + /// After all blocks are formed, any `Alloca` instruction found in a + /// non-entry block is moved to the *beginning* of the entry block (block + /// index 0). This matches the LLVM convention that all stack allocations + /// should appear in the function's entry block, which simplifies later + /// analyses and code generation. + /// + /// ## Empty-block cleanup + /// + /// Once allocas have been hoisted some non-entry blocks may be left with + /// no statements (e.g. a block that contained *only* allocas). Such + /// blocks are removed because an empty basic block (one with a label but + /// no terminator) is invalid IR. fn harvest_function_irs(irs: Vec) -> Vec { let mut blocks = Vec::new(); let mut label: Option = None; @@ -171,6 +244,7 @@ impl<'a> IrGenerator<'a> { for stmt in irs { match &stmt.inner { StmtInner::Label(l) => { + // Finalise the previous block (if any) and start a new one. if let Some(prev_label) = label.take() { blocks.push(BasicBlock { label: prev_label, @@ -181,6 +255,8 @@ impl<'a> IrGenerator<'a> { terminated = false; } _ => { + // Drop statements that precede the first label or follow a + // terminator — they are unreachable (dead) code. if label.is_none() || terminated { continue; } @@ -225,6 +301,19 @@ impl<'a> IrGenerator<'a> { blocks } + /// Process a global variable declaration or definition. + /// + /// * Extracts the identifier and resolves the data type from the AST node. + /// * If the node is a *definition* (not just a declaration), the + /// initializer list is evaluated as a vector of static (compile-time + /// constant) values: + /// - **Array — explicit list**: each element value is evaluated + /// individually via `handle_right_val_static`. + /// - **Array — fill**: a single value is repeated `count` times. + /// - **Scalar**: a single-element vector wrapping the scalar value. + /// * A [`GlobalVariable`] is inserted into the module's global list. + /// If an entry with the same identifier already exists the function + /// returns a [`Error::VariableRedefinition`] error. fn handle_global_var_decl(&mut self, stmt: &ast::VarDeclStmt) -> Result<(), Error> { let identifier = match stmt.identifier() { Some(id) => id, @@ -270,6 +359,19 @@ impl<'a> IrGenerator<'a> { }) } + /// Process a function declaration (`fn foo(...) -> T;`). + /// + /// Steps: + /// 1. Collect each parameter's name and data type. Array parameters are + /// rejected outright (`Error::ArrayParameterNotAllowed`). + /// 2. Build a [`FunctionType`] from the parameter list and the optional + /// return type (defaults to `void` if absent). + /// 3. Insert the function type into the registry. If a type with the + /// same identifier already exists and *differs* from the new one, a + /// [`Error::ConflictedFunction`] error is returned. Identical + /// re-declarations are silently accepted. + /// 4. Insert a skeleton [`Function`] (body-less) into the module's + /// function list so that the printer can emit an external declaration. fn handle_fn_decl(&mut self, decl: &ast::FnDecl) -> Result<(), Error> { let identifier = decl.identifier.clone(); @@ -317,6 +419,16 @@ impl<'a> IrGenerator<'a> { Ok(()) } + /// Process a function definition (`fn foo(...) -> T { ... }`). + /// + /// This pass only handles the *signature*; the body is generated later in + /// `generate`'s third pass. + /// + /// * If no prior declaration exists for this function, delegate to + /// `handle_fn_decl` to register the signature. + /// * If a prior declaration already exists, verify that the definition's + /// signature matches it exactly; a mismatch yields + /// [`Error::DeclDefMismatch`]. fn handle_fn_def(&mut self, stmt: &ast::FnDef) -> Result<(), Error> { let identifier = stmt.fn_decl.identifier.clone(); @@ -334,6 +446,20 @@ impl<'a> IrGenerator<'a> { Ok(()) } + /// Process a struct type definition. + /// + /// Iterates over the struct's member declarations in order, resolving each + /// member's base type and computing its layout offset (zero-based index + /// within the struct). + /// + /// * If a member's type is itself a struct, the referenced struct type + /// must already be registered in the type registry; otherwise an + /// [`Error::UndefinedStructMemberType`] error is returned. This + /// enforces forward-declaration ordering for nested struct types. + /// * Array members are expanded to a `Dtype::Array` wrapping the base + /// element type and the declared length. + /// * The completed [`StructType`] is inserted into the registry under the + /// struct's identifier. fn handle_struct_def(&mut self, struct_def: &ast::StructDef) -> Result<(), Error> { let identifier = struct_def.identifier.clone(); let mut elements = Vec::new(); diff --git a/src/ir/gen/static_eval.rs b/src/ir/gen/static_eval.rs index 16747a0..9120c3c 100644 --- a/src/ir/gen/static_eval.rs +++ b/src/ir/gen/static_eval.rs @@ -2,7 +2,17 @@ use crate::ast; use crate::ir::module::IrGenerator; use crate::ir::Error; +/// Static evaluation methods for the IR generator. +/// +/// These functions perform compile-time (static) evaluation of constant expressions +/// from the AST, folding them into concrete `i32` values. This is used for constant +/// folding during IR generation — expressions composed entirely of literals and +/// constant operations can be reduced to a single integer value at compile time. impl IrGenerator<'_> { + /// Statically evaluates a right-hand-side value. + /// + /// Dispatches to either arithmetic or boolean expression evaluation + /// depending on the variant of the right value. pub fn handle_right_val_static(r: &ast::RightVal) -> Result { match &r.inner { ast::RightValInner::ArithExpr(expr) => Self::handle_arith_expr_static(expr), @@ -10,6 +20,10 @@ impl IrGenerator<'_> { } } + /// Statically evaluates an arithmetic expression. + /// + /// An arithmetic expression is either a binary operation (e.g., `a + b`) + /// or a single expression unit (e.g., a literal number or parenthesized expression). pub fn handle_arith_expr_static(expr: &ast::ArithExpr) -> Result { match &expr.inner { ast::ArithExprInner::ArithBiOpExpr(expr) => Self::handle_arith_biop_expr_static(expr), @@ -17,6 +31,10 @@ impl IrGenerator<'_> { } } + /// Statically evaluates a boolean expression. + /// + /// A boolean expression is either a binary boolean operation (e.g., `a && b`) + /// or a single boolean unit (e.g., a comparison or negation). pub fn handle_bool_expr_static(expr: &ast::BoolExpr) -> Result { match &expr.inner { ast::BoolExprInner::BoolBiOpExpr(expr) => Self::handle_bool_biop_expr_static(expr), @@ -24,6 +42,11 @@ impl IrGenerator<'_> { } } + /// Statically evaluates an arithmetic binary operation expression. + /// + /// Recursively evaluates both the left and right operands, then applies + /// the operator (Add, Sub, Mul, Div). Uses checked arithmetic to detect + /// integer overflow and division by zero, returning appropriate errors. pub fn handle_arith_biop_expr_static(expr: &ast::ArithBiOpExpr) -> Result { let left = Self::handle_arith_expr_static(&expr.left)?; let right = Self::handle_arith_expr_static(&expr.right)?; @@ -35,6 +58,13 @@ impl IrGenerator<'_> { } } + /// Statically evaluates an expression unit. + /// + /// An expression unit can be: + /// - A numeric literal (`Num`), which is returned directly. + /// - A parenthesized arithmetic expression, which is evaluated recursively. + /// - Any other variant (e.g., a variable reference), which cannot be statically + /// evaluated and results in an `InvalidExprUnit` error. pub fn handle_expr_unit_static(expr: &ast::ExprUnit) -> Result { match &expr.inner { ast::ExprUnitInner::Num(num) => Ok(*num), @@ -45,6 +75,11 @@ impl IrGenerator<'_> { } } + /// Statically evaluates a boolean binary operation expression (AND / OR). + /// + /// Evaluates both operands, converts them to booleans (non-zero is true), + /// and applies the logical AND or OR operator. The result is returned as + /// an `i32` (1 for true, 0 for false). pub fn handle_bool_biop_expr_static(expr: &ast::BoolBiOpExpr) -> Result { let left = Self::handle_bool_expr_static(&expr.left)? != 0; let right = Self::handle_bool_expr_static(&expr.right)? != 0; @@ -55,6 +90,12 @@ impl IrGenerator<'_> { } } + /// Statically evaluates a boolean unit. + /// + /// A boolean unit can be: + /// - A comparison expression (e.g., `a < b`). + /// - A nested boolean expression (parenthesized). + /// - A unary boolean operation (e.g., `!cond`). pub fn handle_bool_unit_static(unit: &ast::BoolUnit) -> Result { match &unit.inner { ast::BoolUnitInner::ComExpr(expr) => Self::handle_com_op_expr_static(expr), @@ -63,6 +104,11 @@ impl IrGenerator<'_> { } } + /// Statically evaluates a comparison expression. + /// + /// Evaluates both the left and right operands as arithmetic expression units, + /// then applies the comparison operator (Lt, Eq, Ge, Gt, Le, Ne). + /// Returns 1 if the comparison is true, 0 otherwise. pub fn handle_com_op_expr_static(expr: &ast::ComExpr) -> Result { let left = Self::handle_expr_unit_static(&expr.left)?; let right = Self::handle_expr_unit_static(&expr.right)?; @@ -76,6 +122,11 @@ impl IrGenerator<'_> { } } + /// Statically evaluates a boolean unary operation expression. + /// + /// Currently only the `Not` operator is supported, which inverts the boolean + /// value of the inner condition (0 becomes 1, non-zero becomes 0). + /// For any other unary operator, returns 0 as a default. pub fn handle_bool_uop_expr_static(expr: &ast::BoolUOpExpr) -> Result { if expr.op == ast::BoolUOp::Not { Ok((Self::handle_bool_unit_static(&expr.cond)? == 0) as i32) diff --git a/src/ir/module.rs b/src/ir/module.rs index 94893c4..d754553 100644 --- a/src/ir/module.rs +++ b/src/ir/module.rs @@ -1,33 +1,64 @@ +//! This module defines the core IR (Intermediate Representation) data structures, +//! including the Module, Registry, and IrGenerator used for code generation. + use super::function::Function; use super::types::FunctionType; use super::value::GlobalVariable; use crate::ast; use indexmap::IndexMap; +use std::path::PathBuf; use super::types::StructType; +/// A registry that holds type definitions used during IR generation. +/// It stores struct type definitions and function type signatures +/// that are referenced throughout the compilation process. pub struct Registry { + /// A map of struct type names to their corresponding struct type definitions. pub struct_types: IndexMap, + /// A map of function type names to their corresponding function type signatures. pub function_types: IndexMap, } +/// Represents a compiled module containing all global variables and functions. +/// This is the top-level container for the generated IR output. pub struct Module { + /// A map of global variable names to their definitions. pub global_list: IndexMap, + /// A map of function names to their compiled function representations. pub function_list: IndexMap, } +/// The main IR generator that transforms an AST program into IR. +/// It holds a reference to the input AST, the output module, and +/// a registry of type definitions used during the generation process. pub struct IrGenerator<'a> { + /// A reference to the input AST program to be compiled. pub input: &'a ast::Program, + /// The directory containing the source file being compiled. + /// Used to resolve module header files (e.g. `std.teah`) relative + /// to the source file when processing `use` statements. + pub source_dir: PathBuf, + /// The output module that accumulates generated IR constructs. pub module: Module, + /// The registry of type definitions available during IR generation. pub registry: Registry, } impl<'a> IrGenerator<'a> { + /// The target triple specifying the architecture, vendor, and OS for code generation. pub(crate) const TARGET_TRIPLE: &'static str = "aarch64-unknown-linux-gnu"; + /// The target data layout string describing the memory layout conventions + /// (endianness, alignment, pointer sizes, etc.) for the target platform. pub(crate) const TARGET_DATALAYOUT: &'static str = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; - pub fn new(input: &'a ast::Program) -> Self { + /// Creates a new `IrGenerator` with the given AST program as input + /// and the directory containing the source file. + /// The `source_dir` is used to resolve module header files when + /// processing `use` statements. + /// Initializes an empty module and an empty type registry. + pub fn new(input: &'a ast::Program, source_dir: PathBuf) -> Self { let module = Module { global_list: IndexMap::new(), function_list: IndexMap::new(), @@ -38,8 +69,9 @@ impl<'a> IrGenerator<'a> { }; Self { input, + source_dir, module, registry, } } -} +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 47a324f..ae5c051 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,7 @@ use common::{Generator, Target}; use std::{ fs::{self, File}, io::{self, BufWriter, Write}, - path::Path, + path::{Path, PathBuf}, }; #[derive(Copy, Clone, Debug, PartialEq, ValueEnum)] @@ -79,7 +79,15 @@ fn run() -> Result<()> { .program .as_ref() .context("internal parser state missing AST after parse")?; - let mut ir_gen = ir::IrGenerator::new(ast); + let input_path = Path::new(&cli.input); + // `Path::parent()` returns `Some("")` for a bare filename (e.g. "main.tea"), + // not `None`, so we filter the empty case and fall back to the current directory. + let source_dir = input_path + .parent() + .filter(|p| !p.as_os_str().is_empty()) + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| PathBuf::from(".")); + let mut ir_gen = ir::IrGenerator::new(ast, source_dir); ir_gen.generate().context("failed to generate IR")?; let pass_manager = opt::FunctionPassManager::with_default_pipeline(); diff --git a/tests/tests.rs b/tests/tests.rs index 33827da..44cc205 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,6 +1,34 @@ -// Integration tests for the TeaLang compiler. -// Supports: Native AArch64 Linux, x86/x86_64 Linux (cross-compile + QEMU), -// macOS AArch64 (native), macOS x86_64 (Docker). +/* + * Integration tests for the TeaLang compiler. + * + * Supported platforms: + * - Native AArch64 Linux + * - x86/x86_64 Linux (cross-compile with aarch64-linux-gnu-gcc + QEMU) + * - macOS AArch64 / Apple Silicon (native cc toolchain) + * - macOS x86_64 (Docker linux/arm64 emulation) + * + * Manual compilation equivalent (using the `dfs` test case as an example): + * + * Step 1 – Compile TeaLang source to assembly. + * The compiler must be invoked from inside the test-case directory so that + * `std.teah` is resolved as `./std.teah`: + * + * cd tests/dfs && mkdir -p build + * ../../target/debug/teac dfs.tea --emit asm -o build/dfs.s + * + * Step 2 – Compile the C standard library to an object file (once per platform): + * + * gcc -c tests/std/std.c -o tests/std/std.o # Linux native + * cc -c tests/std/std.c -o tests/std/std-macos.o # macOS AArch64 + * aarch64-linux-gnu-gcc -c tests/std/std.c -o tests/std/std-linux.o # cross-compile + * + * Step 3 – Link assembly + stdlib object into an executable and run: + * + * gcc build/dfs.s ../std/std.o -o build/dfs # Linux native + * cc build/dfs.s ../std/std-macos.o -o build/dfs # macOS AArch64 + * aarch64-linux-gnu-gcc build/dfs.s ../std/std-linux.o -o build/dfs -static # cross-compile + * qemu-aarch64 build/dfs < dfs.in # run via QEMU + */ use std::fs::{self, File, OpenOptions}; use std::io::{self, Read, Write}; @@ -10,14 +38,20 @@ use std::sync::Once; static INIT: Once = Once::new(); +/// Returns `true` when running natively on macOS AArch64 (Apple Silicon). fn is_native_macos() -> bool { cfg!(all(target_os = "macos", target_arch = "aarch64")) } +/// Returns `true` when running on macOS with a non-AArch64 host (e.g., Intel Mac). +/// In this configuration Docker is used to emulate an AArch64 Linux environment. fn is_docker_macos() -> bool { cfg!(all(target_os = "macos", not(target_arch = "aarch64"))) } +/// Returns `true` when running on x86 or x86_64 Linux. +/// In this configuration the AArch64 cross-compiler and QEMU are used to +/// build and run the test binaries. fn is_cross_linux() -> bool { cfg!(all( target_os = "linux", @@ -25,6 +59,8 @@ fn is_cross_linux() -> bool { )) } +/// Checks whether `cmd` is available on PATH by running `which `. +/// Returns `true` if the command is found and `false` otherwise. fn command_exists(cmd: &str) -> bool { Command::new("which") .arg(cmd) @@ -35,6 +71,9 @@ fn command_exists(cmd: &str) -> bool { .unwrap_or(false) } +/// Verifies that all external tools required for the current platform are +/// installed. Panics with a human-readable install hint if a required tool +/// is missing. fn ensure_cross_tools() { if is_native_macos() { if !command_exists("cc") { @@ -79,6 +118,10 @@ fn ensure_cross_tools() { } } +/// Returns the platform-specific path for the compiled std object file: +/// - macOS AArch64 → `tests/std/std-macos.o` +/// - Docker macOS or cross-compile Linux → `tests/std/std-linux.o` +/// - Native AArch64 Linux → `tests/std/std.o` fn get_std_o_path() -> PathBuf { let project_root = Path::new(env!("CARGO_MANIFEST_DIR")); let std_dir = project_root.join("tests").join("std"); @@ -91,6 +134,8 @@ fn get_std_o_path() -> PathBuf { } } +/// Compiles `tests/std/std.c` using the system `cc` toolchain on macOS AArch64, +/// producing the object file at `o_path`. fn compile_std_native_macos(std_dir: &Path, o_path: &Path) { let status = Command::new("cc") .arg("-c") @@ -110,6 +155,9 @@ fn compile_std_native_macos(std_dir: &Path, o_path: &Path) { ); } +/// Compiles `tests/std/std.c` inside a `linux/arm64` Docker container so that +/// the resulting object file is an AArch64 ELF object (compatible with the +/// cross-compiled test binaries). fn compile_std_in_docker(std_dir: &Path, o_path: &Path) { let o_name = o_path.file_name().unwrap().to_str().unwrap(); @@ -138,6 +186,8 @@ fn compile_std_in_docker(std_dir: &Path, o_path: &Path) { ); } +/// Compiles `tests/std/std.c` using the AArch64 cross-compiler +/// (`aarch64-linux-gnu-gcc`) on an x86/x86_64 Linux host. fn compile_std_cross_linux(std_dir: &Path, o_path: &Path) { let status = Command::new("aarch64-linux-gnu-gcc") .arg("-c") @@ -157,7 +207,13 @@ fn compile_std_cross_linux(std_dir: &Path, o_path: &Path) { ); } +/// Ensures the standard-library object file is up-to-date before any test +/// runs. Uses a [`Once`] guard so the build happens **at most once** per +/// process even when multiple tests run in parallel. Rebuilds the object +/// only when `std.c` is newer than the existing `.o` file. fn ensure_std() { + // INIT is a process-wide Once flag; the closure runs exactly once no + // matter how many tests call ensure_std() concurrently. INIT.call_once(|| { ensure_cross_tools(); @@ -166,6 +222,8 @@ fn ensure_std() { let c_path = std_dir.join("std.c"); let o_path = get_std_o_path(); + // Rebuild only when std.c is newer than the existing .o (mtime comparison), + // or when the .o does not yet exist. Missing std.c is a fatal error. let needs_build = match (fs::metadata(&c_path), fs::metadata(&o_path)) { (Ok(c_meta), Ok(o_meta)) => match (c_meta.modified(), o_meta.modified()) { (Ok(c_m), Ok(o_m)) => c_m > o_m, @@ -211,6 +269,18 @@ fn ensure_std() { }); } +/// Invokes the `teac` compiler binary with `--emit asm` to produce an assembly +/// file from a TeaLang source file. +/// +/// `dir` is set as the working directory for the compiler process. The +/// `input_file` argument is intentionally passed as a **bare filename** (no +/// directory component) so that `teac` resolves `source_dir` to `.`, which +/// is the working directory — i.e. `dir`. This ensures that the `use std` +/// statement in the source file finds `std.teah` as `./std.teah` inside the +/// test-case directory. +/// +/// On Docker-macOS hosts the `--target linux` flag is added so that `teac` +/// emits Linux AArch64 assembly instead of macOS assembly. #[inline(always)] fn launch(dir: &PathBuf, input_file: &str, output_file: &str) -> Output { let tool = Path::new(env!("CARGO_BIN_EXE_teac")); @@ -230,6 +300,10 @@ fn launch(dir: &PathBuf, input_file: &str, output_file: &str) -> Output { .expect("Failed to execute teac") } +/// Normalizes a string for whitespace-insensitive comparison: collapses runs +/// of whitespace within each line into a single space, drops blank lines, and +/// appends a trailing newline. Used to compare expected vs. actual output +/// without being sensitive to trailing spaces or blank lines. fn normalize_for_diff_bb(s: &str) -> String { let mut out = Vec::new(); for line in s.lines() { @@ -246,6 +320,9 @@ fn normalize_for_diff_bb(s: &str) -> String { } } +/// Reads the file at `path` into a `String`. Returns `Ok(None)` if the file +/// does not exist (instead of propagating a `NotFound` error), and `Err` for +/// any other I/O error. fn read_to_string_if_exists(path: &Path) -> io::Result> { match fs::read_to_string(path) { Ok(s) => Ok(Some(s)), @@ -254,12 +331,17 @@ fn read_to_string_if_exists(path: &Path) -> io::Result> { } } +/// Runs a pre-configured [`Command`] and returns `(exit_code, stdout, stderr)`. fn run_capture(cmd: &mut Command) -> io::Result<(i32, Vec, Vec)> { let output = cmd.output()?; let code = output.status.code().unwrap_or(-1); Ok((code, output.stdout, output.stderr)) } +/// Appends `line` followed by a newline to the file at `path`, creating the +/// file if it does not exist. Used to append the program's exit code as the +/// final line of the actual output file so it can be compared with the golden +/// `*.out` file. fn append_line>(path: P, line: &str) { let mut f = OpenOptions::new() .create(true) @@ -269,6 +351,17 @@ fn append_line>(path: P, line: &str) { writeln!(f, "{line}").expect("Failed to append line"); } +/// Links assembly with `std_o` inside a `linux/arm64` Docker container +/// producing a statically-linked AArch64 executable, then runs it inside a +/// minimal Debian container. +/// +/// Two-phase design: +/// - **Link phase**: uses the `gcc:latest` Docker image which contains the +/// full GNU toolchain. +/// - **Run phase**: uses `debian:bookworm-slim` (no compiler) for a lighter +/// runtime image; the executable is mounted read-only from the host. +/// +/// Optional `input` is piped to the program's stdin when provided. fn link_and_run_in_docker( build_dir: &Path, asm_name: &str, @@ -351,6 +444,9 @@ fn link_and_run_in_docker( } } +/// Links assembly and the stdlib object using the AArch64 cross-linker +/// (`aarch64-linux-gnu-gcc`) with `-static`, producing a statically-linked +/// AArch64 ELF executable. Returns `(exit_code, stderr_bytes)`. fn link_cross_linux( build_dir: &Path, asm_path: &Path, @@ -371,6 +467,8 @@ fn link_cross_linux( Ok((output.status.code().unwrap_or(-1), output.stderr)) } +/// Runs an AArch64 ELF executable under `qemu-aarch64`. Optional `input` is +/// piped to stdin. Returns `(exit_code, stdout_bytes, stderr_bytes)`. fn run_with_qemu(exe: &Path, input: Option<&Path>) -> io::Result<(i32, Vec, Vec)> { if let Some(input_path) = input { let mut data = Vec::new(); @@ -404,6 +502,8 @@ fn run_with_qemu(exe: &Path, input: Option<&Path>) -> io::Result<(i32, Vec, } } +/// Links assembly and the stdlib object using `cc` on macOS AArch64, producing +/// a native AArch64 Mach-O executable. Returns `(exit_code, stderr_bytes)`. fn link_native_macos( build_dir: &Path, asm_path: &Path, @@ -423,6 +523,9 @@ fn link_native_macos( Ok((output.status.code().unwrap_or(-1), output.stderr)) } +/// Links assembly and the stdlib object using `gcc` on a native AArch64 Linux +/// host, producing a native AArch64 ELF executable. Returns +/// `(exit_code, stderr_bytes)`. fn link_native( build_dir: &Path, asm_path: &Path, @@ -442,6 +545,8 @@ fn link_native( Ok((output.status.code().unwrap_or(-1), output.stderr)) } +/// Runs a native executable (AArch64 ELF or Mach-O). Optional `input` is +/// piped to stdin. Returns `(exit_code, stdout_bytes, stderr_bytes)`. fn run_native(exe: &Path, input: Option<&Path>) -> io::Result<(i32, Vec, Vec)> { if let Some(input_path) = input { let mut data = Vec::new(); @@ -473,6 +578,17 @@ fn run_native(exe: &Path, input: Option<&Path>) -> io::Result<(i32, Vec, Vec } } +/// Runs a parse-only (AST-emit) test for the given `test_name`. +/// +/// Locates `tests//.tea`, invokes `teac --emit ast` on +/// it, checks that the command succeeds and produces no stderr output, and +/// then verifies that every identifier in `must_contain` appears somewhere in +/// the AST output. +/// +/// Note: `teac` is given the **absolute path** to the source file, so +/// `source_dir` inside the compiler is set to the test-case directory and +/// `std.teah` is found there automatically — no `current_dir` override is +/// needed. fn test_ast_parse(test_name: &str, must_contain: &[&str]) { let base_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"); let case_dir = base_dir.join(test_name); @@ -524,6 +640,24 @@ fn test_ast_parse(test_name: &str, must_contain: &[&str]) { } } +/// Runs a full compile-link-execute test for `test_name`. +/// +/// Expected directory layout under `tests//`: +/// ```text +/// tests// +/// .tea — TeaLang source file (required) +/// std.teah — standard-library header (required by `use std`) +/// .in — stdin input for the program (optional) +/// .out — golden stdout + exit-code output (required) +/// build/ — created automatically; receives .s and executable +/// ``` +/// +/// Five-step pipeline: +/// 1. Compile `.tea` → `.s` using `teac --emit asm` +/// 2. Locate the pre-built stdlib object file +/// 3. Link `.s` + stdlib → executable (and run it), platform-specific +/// 4. Write actual stdout + exit code to `build/.out` +/// 5. Compare actual output against the golden `.out` file fn test_single(test_name: &str) { let base_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"); let case_dir = base_dir.join(test_name); @@ -538,6 +672,11 @@ fn test_single(test_name: &str) { tea.display() ); + // --- Step 1: Compile TeaLang source to assembly --- + // case_dir is used as the working directory so that `teac` resolves + // `std.teah` relative to `./` (i.e. the test-case directory). + // The source file is passed as a bare filename (no directory prefix) so + // that `source_dir` inside the compiler falls back to `.` = case_dir. let output_name = format!("{test_name}.s"); let output_path = out_dir.join(&output_name); let output = launch( @@ -562,6 +701,7 @@ fn test_single(test_name: &str) { output_path.display() ); + // --- Step 2: Locate the pre-built stdlib object file --- let stdlib = get_std_o_path(); assert!( stdlib.is_file(), @@ -573,12 +713,15 @@ fn test_single(test_name: &str) { let expected_out = case_dir.join(format!("{test_name}.out")); let actual_out = out_dir.join(format!("{test_name}.out")); + // input_path is None when no `.in` file exists; the program reads from + // /dev/null (or equivalent) in that case. let input_path = if input.is_file() { Some(input.as_path()) } else { None }; + // --- Step 3: Link assembly + stdlib → executable and run (platform-specific) --- let (run_code, run_stdout, run_stderr) = if is_native_macos() { let exe = out_dir.join(test_name); let (link_code, link_err) = @@ -614,6 +757,9 @@ fn test_single(test_name: &str) { run_native(&exe, input_path).expect("Failed to run executable") }; + // On Docker macOS, linking errors are reported via stderr of the run + // phase rather than as a non-zero link exit code; propagate them as a + // test failure here. if !run_stderr.is_empty() { let stderr_str = String::from_utf8_lossy(&run_stderr); if stderr_str.contains("Linking failed") { @@ -621,10 +767,12 @@ fn test_single(test_name: &str) { } } + // --- Step 4: Write actual output (stdout + exit code) to file --- fs::write(&actual_out, &run_stdout) .unwrap_or_else(|e| panic!("Failed to write {}: {e}", actual_out.display())); append_line(&actual_out, &run_code.to_string()); + // --- Step 5: Compare actual output against the golden .out file --- match read_to_string_if_exists(&expected_out).expect("Failed to read expected output file") { Some(exp) => { let got = fs::read_to_string(&actual_out) @@ -649,6 +797,11 @@ fn test_single(test_name: &str) { } } +// ── Full compile-link-run tests ────────────────────────────────────────────── +// Each test calls ensure_std() to build std.o if needed, then test_single() +// which compiles the .tea file, links it with std.o, runs the result, and +// compares stdout+exit-code against the golden *.out file. + #[test] fn dfs() { ensure_std(); @@ -799,6 +952,11 @@ fn type_infer() { ensure_std(); test_single("type_infer"); } +// ── AST parse-only tests ───────────────────────────────────────────────────── +// These tests only verify that teac can parse the source file and produce a +// non-empty AST containing the expected identifiers. They do NOT link or run +// the program, so no std.o is needed. + #[test] fn float_basic() { test_ast_parse("float_basic", &["main"]);