From d08e3438f61e3b4af01add2060e0b9b2fd5ca90e Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Thu, 20 Mar 2025 11:06:39 +0100 Subject: [PATCH 01/12] feat(vm): adding LOAD_SYMBOL_BY_INDEX instruction --- include/Ark/Compiler/Instructions.hpp | 135 +++++++++++++------------- src/arkreactor/VM/VM.cpp | 11 +++ 2 files changed, 81 insertions(+), 65 deletions(-) diff --git a/include/Ark/Compiler/Instructions.hpp b/include/Ark/Compiler/Instructions.hpp index aede62115..28beb6fd1 100644 --- a/include/Ark/Compiler/Instructions.hpp +++ b/include/Ark/Compiler/Instructions.hpp @@ -39,234 +39,239 @@ namespace Ark::internal // @role Load a symbol from its ID onto the stack LOAD_SYMBOL = 0x01, + // @args stack index + // @role Load a symbol from the locals stack by its index + LOAD_SYMBOL_BY_INDEX = 0x02, + // @args symbol id // @role Load a constant from its ID onto the stack - LOAD_CONST = 0x02, + LOAD_CONST = 0x03, // @args absolute address to jump to // @role Jump to the provided address if the last value on the stack was equal to true. Remove the value from the stack no matter what it is - POP_JUMP_IF_TRUE = 0x03, + POP_JUMP_IF_TRUE = 0x04, // @args symbol id // @role Take the value on top of the stack and create a variable in the current scope, named following the given symbol id (cf symbols table) - STORE = 0x04, + STORE = 0x05, // @args symbol id // @role Take the value on top of the stack and put it inside a variable named following the symbol id (cf symbols table), in the nearest scope. Raise an error if it couldn't find a scope where the variable exists - SET_VAL = 0x05, + SET_VAL = 0x06, // @args absolute address to jump to // @role Jump to the provided address if the last value on the stack was equal to false. Remove the value from the stack no matter what it is - POP_JUMP_IF_FALSE = 0x06, + POP_JUMP_IF_FALSE = 0x07, // @args absolute address to jump to // @role Jump to the provided address - JUMP = 0x07, + JUMP = 0x08, // @role If in a code segment other than the main one, quit it, and push the value on top of the stack to the new stack; should as well delete the current environment. Otherwise, acts as a #[code HALT] - RET = 0x08, + RET = 0x09, // @role Stop the Virtual Machine - HALT = 0x09, + HALT = 0x0a, // @args argument count // @role Call function from its symbol id located on top of the stack. Take the given number of arguments from the top of stack and give them to the function (the first argument taken from the stack will be the last one of the function). The stack of the function is now composed of its arguments, from the first to the last one - CALL = 0x0a, + CALL = 0x0b, // @args symbol id // @role Tell the Virtual Machine to capture the variable from the current environment. Main goal is to be able to handle closures, which need to save the environment in which they were created - CAPTURE = 0x0b, + CAPTURE = 0x0c, // @args builtin id // @role Push the corresponding builtin function object on the stack - BUILTIN = 0x0c, + BUILTIN = 0x0d, // @args symbol id // @role Remove a variable/constant named following the given symbol id (cf symbols table) - DEL = 0x0d, + DEL = 0x0e, // @args constant id // @role Push a Closure with the page address pointed by the constant, along with the saved scope created by CAPTURE instruction(s) - MAKE_CLOSURE = 0x0e, + MAKE_CLOSURE = 0x0f, // @args symbol id // @role Read the field named following the given symbol id (cf symbols table) of a #[code Closure] stored in TS. Pop TS and push the value of field read on the stack - GET_FIELD = 0x0f, + GET_FIELD = 0x10, // @args constant id // @role Load a plugin dynamically, plugin name is stored as a string in the constants table - PLUGIN = 0x10, + PLUGIN = 0x11, // @args number of elements // @role Create a list from the N elements pushed on the stack. Follows the function calling convention - LIST = 0x11, + LIST = 0x12, // @args number of elements // @role Append N elements to a list (TS). Elements are stored in TS(1)..TS(N). Follows the function calling convention - APPEND = 0x12, + APPEND = 0x13, // @args number of elements // @role Concatenate N lists to a list (TS). Lists to concat to TS are stored in TS(1)..TS(N). Follows the function calling convention - CONCAT = 0x13, + CONCAT = 0x14, // @args number of elements // @role Append N elements to a reference to a list (TS), the list is being mutated in-place, no new object created. Elements are stored in TS(1)..TS(N). Follows the function calling convention - APPEND_IN_PLACE = 0x14, + APPEND_IN_PLACE = 0x15, // @args number of elements // @role Concatenate N lists to a reference to a list (TS), the list is being mutated in-place, no new object created. Lists to concat to TS are stored in TS(1)..TS(N). Follows the function calling convention - CONCAT_IN_PLACE = 0x15, + CONCAT_IN_PLACE = 0x16, // @role Remove an element from a list (TS), given an index (TS1). Push a new list without the removed element to the stack - POP_LIST = 0x16, + POP_LIST = 0x17, // @role Remove an element from a reference to a list (TS), given an index (TS1). The list is mutated in-place, no new object created - POP_LIST_IN_PLACE = 0x17, + POP_LIST_IN_PLACE = 0x18, // @role Modify a reference to a list or string (TS) by replacing the element at TS1 (must be a number) by the value in TS2. The object is mutated in-place, no new object created - SET_AT_INDEX = 0x18, + SET_AT_INDEX = 0x19, // @role Modify a reference to a list (TS) by replacing TS[TS2][TS1] by the value in TS3. TS[TS2] can be a string (if it is, TS3 must be a string). The object is mutated in-place, no new object created - SET_AT_2_INDEX = 0x19, + SET_AT_2_INDEX = 0x1a, // @role Remove the top of the stack - POP = 0x1a, + POP = 0x1b, // @role Duplicate the top of the stack - DUP = 0x1b, + DUP = 0x1c, // @role Create a new local scope - CREATE_SCOPE = 0x1c, + CREATE_SCOPE = 0x1d, // @role Destroy the last local scope - POP_SCOPE = 0x1d, + POP_SCOPE = 0x1e, - FIRST_OPERATOR = 0x1e, + FIRST_OPERATOR = 0x1f, // @role Push #[code TS1 + TS] - ADD = 0x1e, + ADD = 0x1f, // @role Push #[code TS1 - TS] - SUB = 0x1f, + SUB = 0x20, // @role Push #[code TS1 * TS] - MUL = 0x20, + MUL = 0x21, // @role Push #[code TS1 / TS] - DIV = 0x21, + DIV = 0x22, // @role Push #[code TS1 > TS] - GT = 0x22, + GT = 0x23, // @role Push #[code TS1 < TS] - LT = 0x23, + LT = 0x24, // @role Push #[code TS1 <= TS] - LE = 0x24, + LE = 0x25, // @role Push #[code TS1 >= TS] - GE = 0x25, + GE = 0x26, // @role Push #[code TS1 != TS] - NEQ = 0x26, + NEQ = 0x27, // @role Push #[code TS1 == TS] - EQ = 0x27, + EQ = 0x28, // @role Push #[code len(TS)], TS must be a list - LEN = 0x28, + LEN = 0x29, // @role Push #[code empty?(TS)], TS must be a list or string - EMPTY = 0x29, + EMPTY = 0x2a, // @role Push #[code tail(TS)], all the elements of TS except the first one. TS must be a list or string - TAIL = 0x2a, + TAIL = 0x2b, // @role Push #[code head(TS)], the first element of TS or nil if empty. TS must be a list or string - HEAD = 0x2b, + HEAD = 0x2c, // @role Push true if TS is nil, false otherwise - ISNIL = 0x2c, + ISNIL = 0x2d, // @role Throw an exception if TS1 is false, and display TS (must be a string). Do not push anything on the stack - ASSERT = 0x2d, + ASSERT = 0x2e, // @role Convert TS to number (must be a string) - TO_NUM = 0x2e, + TO_NUM = 0x2f, // @role Convert TS to string - TO_STR = 0x2f, + TO_STR = 0x30, // @role Push the value at index TS (must be a number) in TS1, which must be a list or string - AT = 0x30, + AT = 0x31, // @role Push the value at index TS (must be a number), inside the list or string at index TS1 (must be a number) in the list at TS2 - AT_AT = 0x31, + AT_AT = 0x32, // @role Push #[code TS1 % TS] - MOD = 0x32, + MOD = 0x33, // @role Push the type of TS as a string - TYPE = 0x33, + TYPE = 0x34, // @role Check if TS1 is a closure field of TS. TS must be a Closure, TS1 a String - HASFIELD = 0x34, + HASFIELD = 0x35, // @role Push #[code !TS] - NOT = 0x35, + NOT = 0x36, // @args constant id, constant id // @role Load two consts (#[code primary] then #[code secondary]) on the stack in one instruction - LOAD_CONST_LOAD_CONST = 0x36, + LOAD_CONST_LOAD_CONST = 0x37, // @args constant id, symbol id // @role Load const #[code primary] into the symbol #[code secondary] (create a variable) - LOAD_CONST_STORE = 0x37, + LOAD_CONST_STORE = 0x38, // @args constant id, symbol id // @role Load const #[code primary] into the symbol #[code secondary] (search for the variable with the given symbol id) - LOAD_CONST_SET_VAL = 0x38, + LOAD_CONST_SET_VAL = 0x39, // @args symbol id, symbol id // @role Store the value of the symbol #[code primary] into a new variable #[code secondary] - STORE_FROM = 0x39, + STORE_FROM = 0x3a, // @args symbol id, symbol id // @role Store the value of the symbol #[code primary] into an existing variable #[code secondary] - SET_VAL_FROM = 0x3a, + SET_VAL_FROM = 0x3b, // @args symbol id, count // @role Increment the variable #[code primary] by #[code count] and push its value on the stack - INCREMENT = 0x3b, + INCREMENT = 0x3c, // @args symbol id, count // @role Decrement the variable #[code primary] by #[code count] and push its value on the stack - DECREMENT = 0x3c, + DECREMENT = 0x3d, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its tail, store it in a new variable #[code secondary] - STORE_TAIL = 0x3d, + STORE_TAIL = 0x3e, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its head, store it in a new variable #[code secondary] - STORE_HEAD = 0x3e, + STORE_HEAD = 0x3f, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its tail, store it in an existing variable #[code secondary] - SET_VAL_TAIL = 0x3f, + SET_VAL_TAIL = 0x40, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its head, store it in an existing variable #[code secondary] - SET_VAL_HEAD = 0x40, + SET_VAL_HEAD = 0x41, // @args builtin id, argument count // @role Call a builtin by its id in #[code primary], with #[code secondary] arguments. Bypass the stack size check because we do not push IP/PP since builtins calls do not alter the stack - CALL_BUILTIN = 0x41 + CALL_BUILTIN = 0x42 }; constexpr std::array InstructionNames = { "NOP", "LOAD_SYMBOL", + "LOAD_SYMBOL_BY_INDEX", "LOAD_CONST", "POP_JUMP_IF_TRUE", "STORE", diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index 92c1b61ea..ce5afe7bd 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -363,6 +363,7 @@ namespace Ark constexpr std::array opcode_targets = { &&TARGET_NOP, &&TARGET_LOAD_SYMBOL, + &&TARGET_LOAD_SYMBOL_BY_INDEX, &&TARGET_LOAD_CONST, &&TARGET_POP_JUMP_IF_TRUE, &&TARGET_STORE, @@ -460,6 +461,16 @@ namespace Ark DISPATCH(); } + TARGET(LOAD_SYMBOL_BY_INDEX) + { + Value& var = context.scopes_storage[arg].second; + if (var.valueType() == ValueType::Reference) + push(var.reference(), context); + else + push(var, context); + DISPATCH(); + } + TARGET(LOAD_CONST) { push(loadConstAsPtr(arg), context); From 7ba2926e97118c28eedcce1fdda1c7283b13b5c3 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Thu, 20 Mar 2025 11:06:56 +0100 Subject: [PATCH 02/12] refactor: rename Compiler to ASTLowerer --- include/Ark/Ark.hpp | 2 +- .../{Compiler.hpp => Lowerer/ASTLowerer.hpp} | 22 ++-- include/Ark/Compiler/Welder.hpp | 4 +- .../{Compiler.cpp => Lowerer/ASTLowerer.cpp} | 115 +++++++++--------- src/arkreactor/Compiler/Welder.cpp | 12 +- 5 files changed, 75 insertions(+), 80 deletions(-) rename include/Ark/Compiler/{Compiler.hpp => Lowerer/ASTLowerer.hpp} (93%) rename src/arkreactor/Compiler/{Compiler.cpp => Lowerer/ASTLowerer.cpp} (84%) diff --git a/include/Ark/Ark.hpp b/include/Ark/Ark.hpp index 03df5faa8..6a44707a3 100644 --- a/include/Ark/Ark.hpp +++ b/include/Ark/Ark.hpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #endif diff --git a/include/Ark/Compiler/Compiler.hpp b/include/Ark/Compiler/Lowerer/ASTLowerer.hpp similarity index 93% rename from include/Ark/Compiler/Compiler.hpp rename to include/Ark/Compiler/Lowerer/ASTLowerer.hpp index 7e9f1ee2e..ddbd4bc2d 100644 --- a/include/Ark/Compiler/Compiler.hpp +++ b/include/Ark/Compiler/Lowerer/ASTLowerer.hpp @@ -1,16 +1,16 @@ /** - * @file Compiler.hpp + * @file ASTLowerver.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) - * @brief ArkScript compiler is in charge of transforming the AST into bytecode + * @brief ArkScript compiler is in charge of transforming the AST into IR * @version 3.1 * @date 2020-10-27 * - * @copyright Copyright (c) 2020-2024 + * @copyright Copyright (c) 2020-2025 * */ -#ifndef ARK_COMPILER_COMPILER_HPP -#define ARK_COMPILER_COMPILER_HPP +#ifndef ARK_COMPILER_LOWERER_ASTLOWERER_HPP +#define ARK_COMPILER_LOWERER_ASTLOWERER_HPP #include #include @@ -30,18 +30,18 @@ namespace Ark::internal class Welder; /** - * @brief The ArkScript bytecode compiler + * @brief The ArkScript AST to IR compiler * */ - class ARK_API Compiler final + class ARK_API ASTLowerer final { public: /** - * @brief Construct a new Compiler object + * @brief Construct a new ASTLowerer object * * @param debug the debug level */ - explicit Compiler(unsigned debug); + explicit ASTLowerer(unsigned debug); /** * @brief Start the compilation @@ -156,7 +156,7 @@ namespace Ark::internal * @param message * @param node */ - static void compilerWarning(const std::string& message, const Node& node); + static void warning(const std::string& message, const Node& node); /** * @brief Throw a nice error message @@ -164,7 +164,7 @@ namespace Ark::internal * @param message * @param node */ - [[noreturn]] static void throwCompilerError(const std::string& message, const Node& node); + [[noreturn]] static void buildAndThrowError(const std::string& message, const Node& node); /** * @brief Compile an expression (a node) recursively diff --git a/include/Ark/Compiler/Welder.hpp b/include/Ark/Compiler/Welder.hpp index 2249bc4b3..4ee0b46e3 100644 --- a/include/Ark/Compiler/Welder.hpp +++ b/include/Ark/Compiler/Welder.hpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -100,9 +100,9 @@ namespace Ark internal::NameResolutionPass m_name_resolver; internal::Logger m_logger; + internal::ASTLowerer m_lowerer; internal::IROptimizer m_ir_optimizer; internal::IRCompiler m_ir_compiler; - internal::Compiler m_compiler; void dumpIRToFile() const; diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp similarity index 84% rename from src/arkreactor/Compiler/Compiler.cpp rename to src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp index 91308f9ad..3d3a6f0d9 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp @@ -1,28 +1,23 @@ -#include +#include -#include #include #include -#include #include #include #include -#include #include -#include #include -#include namespace Ark::internal { using namespace literals; - Compiler::Compiler(const unsigned debug) : - m_logger("Compiler", debug) + ASTLowerer::ASTLowerer(const unsigned debug) : + m_logger("ASTLowerer", debug) {} - void Compiler::process(const Node& ast) + void ASTLowerer::process(const Node& ast) { m_logger.traceStart("process"); m_code_pages.emplace_back(); // create empty page @@ -36,22 +31,22 @@ namespace Ark::internal m_logger.traceEnd(); } - const std::vector& Compiler::intermediateRepresentation() const noexcept + const std::vector& ASTLowerer::intermediateRepresentation() const noexcept { return m_code_pages; } - const std::vector& Compiler::symbols() const noexcept + const std::vector& ASTLowerer::symbols() const noexcept { return m_symbols; } - const std::vector& Compiler::values() const noexcept + const std::vector& ASTLowerer::values() const noexcept { return m_values; } - std::optional Compiler::getOperator(const std::string& name) noexcept + std::optional ASTLowerer::getOperator(const std::string& name) noexcept { const auto it = std::ranges::find(Language::operators, name); if (it != Language::operators.end()) @@ -59,7 +54,7 @@ namespace Ark::internal return std::nullopt; } - std::optional Compiler::getBuiltin(const std::string& name) noexcept + std::optional ASTLowerer::getBuiltin(const std::string& name) noexcept { const auto it = std::ranges::find_if(Builtins::builtins, [&name](const std::pair& element) -> bool { @@ -70,7 +65,7 @@ namespace Ark::internal return std::nullopt; } - std::optional Compiler::getListInstruction(const std::string& name) noexcept + std::optional ASTLowerer::getListInstruction(const std::string& name) noexcept { const auto it = std::ranges::find(Language::listInstructions, name); if (it != Language::listInstructions.end()) @@ -78,7 +73,7 @@ namespace Ark::internal return std::nullopt; } - bool Compiler::nodeProducesOutput(const Node& node) + bool ASTLowerer::nodeProducesOutput(const Node& node) { if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword) return (node.constList()[0].keyword() == Keyword::Begin && node.constList().size() > 1) || @@ -87,7 +82,7 @@ namespace Ark::internal return true; // any other node, function call, symbol, number... } - bool Compiler::isUnaryInst(const Instruction inst) noexcept + bool ASTLowerer::isUnaryInst(const Instruction inst) noexcept { switch (inst) { @@ -107,7 +102,7 @@ namespace Ark::internal } } - bool Compiler::isTernaryInst(const Instruction inst) noexcept + bool ASTLowerer::isTernaryInst(const Instruction inst) noexcept { switch (inst) { @@ -119,17 +114,17 @@ namespace Ark::internal } } - void Compiler::compilerWarning(const std::string& message, const Node& node) + void ASTLowerer::warning(const std::string& message, const Node& node) { fmt::println("{} {}", fmt::styled("Warning", fmt::fg(fmt::color::dark_orange)), Diagnostics::makeContextWithNode(message, node)); } - void Compiler::throwCompilerError(const std::string& message, const Node& node) + void ASTLowerer::buildAndThrowError(const std::string& message, const Node& node) { throw CodeError(message, node.filename(), node.line(), node.col(), node.repr()); } - void Compiler::compileExpression(const Node& x, const Page p, const bool is_result_unused, const bool is_terminal, const std::string& var_name) + void ASTLowerer::compileExpression(const Node& x, const Page p, const bool is_result_unused, const bool is_terminal, const std::string& var_name) { // register symbols if (x.nodeType() == NodeType::Symbol) @@ -226,32 +221,32 @@ namespace Ark::internal handleCalls(x, p, is_result_unused, is_terminal, var_name); } else - throwCompilerError( + buildAndThrowError( fmt::format( - "NodeType `{}' not handled in Compiler::compileExpression. Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark", + "NodeType `{}' not handled in ASTLowerer::compileExpression. Please fill an issue on GitHub: https://github.com/ArkScript-lang/Ark", typeToString(x)), x); } - void Compiler::compileSymbol(const Node& x, const Page p, const bool is_result_unused) + void ASTLowerer::compileSymbol(const Node& x, const Page p, const bool is_result_unused) { const std::string& name = x.string(); if (const auto it_builtin = getBuiltin(name)) page(p).emplace_back(Instruction::BUILTIN, it_builtin.value()); else if (getOperator(name).has_value()) - throwCompilerError(fmt::format("Found a free standing operator: `{}`", name), x); + buildAndThrowError(fmt::format("Found a free standing operator: `{}`", name), x); else page(p).emplace_back(LOAD_SYMBOL, addSymbol(x)); // using the variable if (is_result_unused) { - compilerWarning("Statement has no effect", x); + warning("Statement has no effect", x); page(p).emplace_back(POP); } } - void Compiler::compileListInstruction(const Node& c0, const Node& x, const Page p, const bool is_result_unused) + void ASTLowerer::compileListInstruction(const Node& c0, const Node& x, const Page p, const bool is_result_unused) { std::string name = c0.string(); Instruction inst = getListInstruction(name).value(); @@ -260,13 +255,13 @@ namespace Ark::internal const auto argc = x.constList().size() - 1u; // error, can not use append/concat/pop (and their in place versions) with a <2 length argument list if (argc < 2 && APPEND <= inst && inst <= POP) - throwCompilerError(fmt::format("Can not use {} with less than 2 arguments", name), c0); + buildAndThrowError(fmt::format("Can not use {} with less than 2 arguments", name), c0); if (inst <= POP && std::cmp_greater(argc, std::numeric_limits::max())) - throwCompilerError(fmt::format("Too many arguments ({}), exceeds 65'535", argc), x); + buildAndThrowError(fmt::format("Too many arguments ({}), exceeds 65'535", argc), x); if (argc != 3 && inst == SET_AT_INDEX) - throwCompilerError(fmt::format("Expected 3 arguments (list, index, value) for {}, got {}", name, argc), c0); + buildAndThrowError(fmt::format("Expected 3 arguments (list, index, value) for {}, got {}", name, argc), c0); if (argc != 4 && inst == SET_AT_2_INDEX) - throwCompilerError(fmt::format("Expected 4 arguments (list, y, x, value) for {}, got {}", name, argc), c0); + buildAndThrowError(fmt::format("Expected 4 arguments (list, y, x, value) for {}, got {}", name, argc), c0); // compile arguments in reverse order for (std::size_t i = x.constList().size() - 1u; i > 0; --i) @@ -275,7 +270,7 @@ namespace Ark::internal if (nodeProducesOutput(node)) compileExpression(node, p, false, false); else - throwCompilerError(fmt::format("Invalid node inside call to {}", name), node); + buildAndThrowError(fmt::format("Invalid node inside call to {}", name), node); } // put inst and number of arguments @@ -305,12 +300,12 @@ namespace Ark::internal if (is_result_unused && name.back() != '!' && inst <= POP_LIST_IN_PLACE) // in-place functions never push a value { - compilerWarning("Ignoring return value of function", x); + warning("Ignoring return value of function", x); page(p).emplace_back(POP); } } - void Compiler::compileIf(const Node& x, const Page p, const bool is_result_unused, const bool is_terminal, const std::string& var_name) + void ASTLowerer::compileIf(const Node& x, const Page p, const bool is_result_unused, const bool is_terminal, const std::string& var_name) { // compile condition compileExpression(x.constList()[1], p, false, false); @@ -335,12 +330,12 @@ namespace Ark::internal page(p).emplace_back(label_end); } - void Compiler::compileFunction(const Node& x, const Page p, const bool is_result_unused, const std::string& var_name) + void ASTLowerer::compileFunction(const Node& x, const Page p, const bool is_result_unused, const std::string& var_name) { if (const auto args = x.constList()[1]; args.nodeType() != NodeType::List) - throwCompilerError(fmt::format("Expected a well formed argument(s) list, got a {}", typeToString(args)), args); + buildAndThrowError(fmt::format("Expected a well formed argument(s) list, got a {}", typeToString(args)), args); if (x.constList().size() != 3) - throwCompilerError("Invalid node ; if it was computed by a macro, check that a node is returned", x); + buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x); // capture, if needed bool is_closure = false; @@ -375,17 +370,17 @@ namespace Ark::internal // if the computed function is unused, pop it if (is_result_unused) { - compilerWarning("Unused declared function", x); + warning("Unused declared function", x); page(p).emplace_back(POP); } } - void Compiler::compileLetMutSet(const Keyword n, const Node& x, const Page p) + void ASTLowerer::compileLetMutSet(const Keyword n, const Node& x, const Page p) { if (const auto sym = x.constList()[1]; sym.nodeType() != NodeType::Symbol) - throwCompilerError(fmt::format("Expected a symbol, got a {}", typeToString(sym)), sym); + buildAndThrowError(fmt::format("Expected a symbol, got a {}", typeToString(sym)), sym); if (x.constList().size() != 3) - throwCompilerError("Invalid node ; if it was computed by a macro, check that a node is returned", x); + buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x); const std::string name = x.constList()[1].string(); uint16_t i = addSymbol(x.constList()[1]); @@ -401,10 +396,10 @@ namespace Ark::internal page(p).emplace_back(SET_VAL, i); } - void Compiler::compileWhile(const Node& x, const Page p) + void ASTLowerer::compileWhile(const Node& x, const Page p) { if (x.constList().size() != 3) - throwCompilerError("Invalid node ; if it was computed by a macro, check that a node is returned", x); + buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x); page(p).emplace_back(CREATE_SCOPE); @@ -428,7 +423,7 @@ namespace Ark::internal page(p).emplace_back(POP_SCOPE); } - void Compiler::compilePluginImport(const Node& x, const Page p) + void ASTLowerer::compilePluginImport(const Node& x, const Page p) { std::string path; const Node package_node = x.constList()[1]; @@ -446,7 +441,7 @@ namespace Ark::internal page(p).emplace_back(PLUGIN, id); } - void Compiler::handleCalls(const Node& x, const Page p, bool is_result_unused, const bool is_terminal, const std::string& var_name) + void ASTLowerer::handleCalls(const Node& x, const Page p, bool is_result_unused, const bool is_terminal, const std::string& var_name) { constexpr std::size_t start_index = 1; @@ -471,7 +466,7 @@ namespace Ark::internal { // short circuit implementation if (x.constList().size() < 3) - throwCompilerError( + buildAndThrowError( fmt::format( "Expected at least 2 arguments while compiling '{}', got {}", node.string(), @@ -512,7 +507,7 @@ namespace Ark::internal if (nodeProducesOutput(x.constList()[i])) compileExpression(x.constList()[i], p, false, false); else - throwCompilerError(fmt::format("Invalid node inside tail call to `{}'", node.repr()), x); + buildAndThrowError(fmt::format("Invalid node inside tail call to `{}'", node.repr()), x); } // jump to the top of the function @@ -526,7 +521,7 @@ namespace Ark::internal // closure chains have been handled (eg: closure.field.field.function) compileExpression(node, proc_page, false, false); // storing proc if (m_temp_pages.back().empty()) - throwCompilerError(fmt::format("Can not call {}", x.constList()[0].repr()), x); + buildAndThrowError(fmt::format("Can not call {}", x.constList()[0].repr()), x); // push arguments on current page for (auto exp = x.constList().begin() + start_index, exp_end = x.constList().end(); exp != exp_end; ++exp) @@ -534,7 +529,7 @@ namespace Ark::internal if (nodeProducesOutput(*exp)) compileExpression(*exp, p, false, false); else - throwCompilerError(fmt::format("Invalid node inside call to `{}'", node.repr()), x); + buildAndThrowError(fmt::format("Invalid node inside call to `{}'", node.repr()), x); } // push proc from temp page for (const auto& inst : m_temp_pages.back()) @@ -567,7 +562,7 @@ namespace Ark::internal if (nodeProducesOutput(x.constList()[index])) compileExpression(x.constList()[index], p, false, false); else - throwCompilerError(fmt::format("Invalid node inside call to operator `{}'", node.repr()), x); + buildAndThrowError(fmt::format("Invalid node inside call to operator `{}'", node.repr()), x); if ((index + 1 < size && x.constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size) exp_count++; @@ -581,17 +576,17 @@ namespace Ark::internal if (isUnaryInst(op)) { if (exp_count != 1) - throwCompilerError(fmt::format("Operator needs one argument, but was called with {}", exp_count), x.constList()[0]); + buildAndThrowError(fmt::format("Operator needs one argument, but was called with {}", exp_count), x.constList()[0]); page(p).emplace_back(op); } else if (isTernaryInst(op)) { if (exp_count != 3) - throwCompilerError(fmt::format("Operator needs three arguments, but was called with {}", exp_count), x.constList()[0]); + buildAndThrowError(fmt::format("Operator needs three arguments, but was called with {}", exp_count), x.constList()[0]); page(p).emplace_back(op); } else if (exp_count <= 1) - throwCompilerError(fmt::format("Operator needs two arguments, but was called with {}", exp_count), x.constList()[0]); + buildAndThrowError(fmt::format("Operator needs two arguments, but was called with {}", exp_count), x.constList()[0]); // need to check we didn't push the (op A B C D...) things for operators not supporting it if (exp_count > 2) @@ -608,7 +603,7 @@ namespace Ark::internal break; default: - throwCompilerError( + buildAndThrowError( fmt::format( "`{}' requires 2 arguments, but got {}.", Language::operators[static_cast(op - FIRST_OPERATOR)], @@ -622,7 +617,7 @@ namespace Ark::internal page(p).emplace_back(POP); } - uint16_t Compiler::addSymbol(const Node& sym) + uint16_t ASTLowerer::addSymbol(const Node& sym) { // otherwise, add the symbol, and return its id in the table auto it = std::ranges::find(m_symbols, sym.string()); @@ -635,10 +630,10 @@ namespace Ark::internal const auto distance = std::distance(m_symbols.begin(), it); if (distance < std::numeric_limits::max()) return static_cast(distance); - throwCompilerError("Too many symbols (exceeds 65'536), aborting compilation.", sym); + buildAndThrowError("Too many symbols (exceeds 65'536), aborting compilation.", sym); } - uint16_t Compiler::addValue(const Node& x) + uint16_t ASTLowerer::addValue(const Node& x) { const ValTableElem v(x); auto it = std::ranges::find(m_values, v); @@ -651,10 +646,10 @@ namespace Ark::internal const auto distance = std::distance(m_values.begin(), it); if (distance < std::numeric_limits::max()) return static_cast(distance); - throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", x); + buildAndThrowError("Too many values (exceeds 65'536), aborting compilation.", x); } - uint16_t Compiler::addValue(const std::size_t page_id, const Node& current) + uint16_t ASTLowerer::addValue(const std::size_t page_id, const Node& current) { const ValTableElem v(page_id); auto it = std::ranges::find(m_values, v); @@ -667,6 +662,6 @@ namespace Ark::internal const auto distance = std::distance(m_values.begin(), it); if (distance < std::numeric_limits::max()) return static_cast(distance); - throwCompilerError("Too many values (exceeds 65'536), aborting compilation.", current); + buildAndThrowError("Too many values (exceeds 65'536), aborting compilation.", current); } } diff --git a/src/arkreactor/Compiler/Welder.cpp b/src/arkreactor/Compiler/Welder.cpp index b51ed5779..fecb77ef0 100644 --- a/src/arkreactor/Compiler/Welder.cpp +++ b/src/arkreactor/Compiler/Welder.cpp @@ -22,9 +22,9 @@ namespace Ark m_ast_optimizer(debug), m_name_resolver(debug), m_logger("Welder", debug), + m_lowerer(debug), m_ir_optimizer(debug), - m_ir_compiler(debug), - m_compiler(debug) + m_ir_compiler(debug) {} void Welder::registerSymbol(const std::string& name) @@ -51,16 +51,16 @@ namespace Ark { try { - m_compiler.process(m_computed_ast); - m_ir = m_compiler.intermediateRepresentation(); + m_lowerer.process(m_computed_ast); + m_ir = m_lowerer.intermediateRepresentation(); if ((m_features & FeatureIROptimizer) != 0) { - m_ir_optimizer.process(m_ir, m_compiler.symbols(), m_compiler.values()); + m_ir_optimizer.process(m_ir, m_lowerer.symbols(), m_lowerer.values()); m_ir = m_ir_optimizer.intermediateRepresentation(); } - m_ir_compiler.process(m_ir, m_compiler.symbols(), m_compiler.values()); + m_ir_compiler.process(m_ir, m_lowerer.symbols(), m_lowerer.values()); m_bytecode = m_ir_compiler.bytecode(); if ((m_features & FeatureDumpIR) != 0) From 944e5e8c99888913861c007cd05b7fd484e0e7c1 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Thu, 20 Mar 2025 18:40:59 +0100 Subject: [PATCH 03/12] refactor: moving Word.hpp to Compiler/IR/ --- include/Ark/Compiler/IntermediateRepresentation/Entity.hpp | 2 +- include/Ark/Compiler/{ => IntermediateRepresentation}/Word.hpp | 0 tests/unittests/CompilerSuite.cpp | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename include/Ark/Compiler/{ => IntermediateRepresentation}/Word.hpp (100%) diff --git a/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp index 24220e86d..b27b5bcf4 100644 --- a/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp +++ b/include/Ark/Compiler/IntermediateRepresentation/Entity.hpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include namespace Ark::internal::IR diff --git a/include/Ark/Compiler/Word.hpp b/include/Ark/Compiler/IntermediateRepresentation/Word.hpp similarity index 100% rename from include/Ark/Compiler/Word.hpp rename to include/Ark/Compiler/IntermediateRepresentation/Word.hpp diff --git a/tests/unittests/CompilerSuite.cpp b/tests/unittests/CompilerSuite.cpp index df78bc315..180285d71 100644 --- a/tests/unittests/CompilerSuite.cpp +++ b/tests/unittests/CompilerSuite.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include From 8a561b021201bd93dfbf0b8ba9bdb1ffc88d40f1 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Thu, 20 Mar 2025 18:41:48 +0100 Subject: [PATCH 04/12] feat(scope): using symbol loading by stack index when the symbol is known in a given scope --- include/Ark/Compiler/Lowerer/ASTLowerer.hpp | 3 ++ .../Ark/Compiler/Lowerer/LocalsLocator.hpp | 50 +++++++++++++++++++ include/Ark/VM/ScopeView.hpp | 14 +++++- src/arkreactor/Compiler/BytecodeReader.cpp | 1 + .../Compiler/Lowerer/ASTLowerer.cpp | 27 ++++++++-- .../Compiler/Lowerer/LocalsLocator.cpp | 45 +++++++++++++++++ src/arkreactor/VM/VM.cpp | 8 ++- 7 files changed, 138 insertions(+), 10 deletions(-) create mode 100644 include/Ark/Compiler/Lowerer/LocalsLocator.hpp create mode 100644 src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp diff --git a/include/Ark/Compiler/Lowerer/ASTLowerer.hpp b/include/Ark/Compiler/Lowerer/ASTLowerer.hpp index ddbd4bc2d..25cfd0866 100644 --- a/include/Ark/Compiler/Lowerer/ASTLowerer.hpp +++ b/include/Ark/Compiler/Lowerer/ASTLowerer.hpp @@ -23,6 +23,7 @@ #include #include #include +#include namespace Ark::internal { @@ -78,6 +79,8 @@ namespace Ark::internal bool is_temp; }; + LocalsLocator m_locals_locator; + // tables: symbols, values, plugins and codes std::vector m_symbols; std::vector m_values; diff --git a/include/Ark/Compiler/Lowerer/LocalsLocator.hpp b/include/Ark/Compiler/Lowerer/LocalsLocator.hpp new file mode 100644 index 000000000..01b59172f --- /dev/null +++ b/include/Ark/Compiler/Lowerer/LocalsLocator.hpp @@ -0,0 +1,50 @@ +/** + * @file LocalsLocator.hpp + * @author Alexandre Plateau (lexplt.dev@gmail.com) + * @brief Track locals at compile + * @version 0.1 + * @date 2025-03-20 + * + * @copyright Copyright (c) 2025 + * + */ + +#ifndef ARK_COMPILER_LOWERER_LOCALSLOCATOR_HPP +#define ARK_COMPILER_LOWERER_LOCALSLOCATOR_HPP + +#include +#include +#include + +namespace Ark::internal +{ + class LocalsLocator + { + public: + enum class ScopeType + { + Default, + Function, + Closure + }; + + LocalsLocator(); + + void addLocal(const std::string& name); + std::optional lookupLastScopeByName(const std::string& name); + + void createScope(ScopeType type = ScopeType::Default); + void deleteScope(); + + private: + struct Scope + { + std::vector data; + ScopeType type; + }; + + std::vector m_scopes; + }; +} + +#endif // ARK_COMPILER_LOWERER_LOCALSLOCATOR_HPP diff --git a/include/Ark/VM/ScopeView.hpp b/include/Ark/VM/ScopeView.hpp index 547402b19..909a78808 100644 --- a/include/Ark/VM/ScopeView.hpp +++ b/include/Ark/VM/ScopeView.hpp @@ -92,15 +92,25 @@ namespace Ark::internal [[nodiscard]] uint16_t idFromValue(const Value& val) const noexcept; /** - * @brief Return the start index of the current + * @brief Return the element at index in scope * - * @return const std::size_t + * @return const pair_t& */ [[nodiscard]] inline const pair_t& atPos(const std::size_t i) const noexcept { return m_storage[m_start + i]; } + /** + * @brief Return the element at index, starting from the end + * + * @return const pair_t& + */ + [[nodiscard]] inline pair_t& atPosReverse(const std::size_t i) noexcept + { + return m_storage[m_start + m_size - 1 - i]; + } + /** * @brief Return the size of the scope * diff --git a/src/arkreactor/Compiler/BytecodeReader.cpp b/src/arkreactor/Compiler/BytecodeReader.cpp index 780c7bfae..2bbbd7220 100644 --- a/src/arkreactor/Compiler/BytecodeReader.cpp +++ b/src/arkreactor/Compiler/BytecodeReader.cpp @@ -344,6 +344,7 @@ namespace Ark const std::unordered_map arg_kinds = { { LOAD_SYMBOL, ArgKind::Symbol }, + { LOAD_SYMBOL_BY_INDEX, ArgKind::Raw }, { LOAD_CONST, ArgKind::Value }, { POP_JUMP_IF_TRUE, ArgKind::Raw }, { STORE, ArgKind::Symbol }, diff --git a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp index 3d3a6f0d9..b21297e65 100644 --- a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp +++ b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp @@ -237,7 +237,13 @@ namespace Ark::internal else if (getOperator(name).has_value()) buildAndThrowError(fmt::format("Found a free standing operator: `{}`", name), x); else - page(p).emplace_back(LOAD_SYMBOL, addSymbol(x)); // using the variable + { + const std::optional maybe_local_idx = m_locals_locator.lookupLastScopeByName(name); + if (maybe_local_idx.has_value()) + page(p).emplace_back(LOAD_SYMBOL_BY_INDEX, static_cast(maybe_local_idx.value())); + else + page(p).emplace_back(LOAD_SYMBOL, addSymbol(x)); + } if (is_result_unused) { @@ -338,15 +344,21 @@ namespace Ark::internal buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x); // capture, if needed - bool is_closure = false; + std::size_t capture_inst_count = 0; for (const auto& node : x.constList()[1].constList()) { if (node.nodeType() == NodeType::Capture) { page(p).emplace_back(CAPTURE, addSymbol(node)); - is_closure = true; + ++capture_inst_count; } } + const bool is_closure = capture_inst_count > 0; + + m_locals_locator.createScope( + is_closure + ? LocalsLocator::ScopeType::Closure + : LocalsLocator::ScopeType::Function); // create new page for function body m_code_pages.emplace_back(); @@ -358,7 +370,10 @@ namespace Ark::internal for (const auto& node : x.constList()[1].constList()) { if (node.nodeType() == NodeType::Symbol) + { page(function_body_page).emplace_back(STORE, addSymbol(node)); + m_locals_locator.addLocal(node.string()); + } } // push body of the function @@ -366,6 +381,7 @@ namespace Ark::internal // return last value on the stack page(function_body_page).emplace_back(RET); + m_locals_locator.deleteScope(); // if the computed function is unused, pop it if (is_result_unused) @@ -391,7 +407,10 @@ namespace Ark::internal compileExpression(x.constList()[idx], p, false, false, name); if (n == Keyword::Let || n == Keyword::Mut) + { page(p).emplace_back(STORE, i); + m_locals_locator.addLocal(name); + } else page(p).emplace_back(SET_VAL, i); } @@ -401,6 +420,7 @@ namespace Ark::internal if (x.constList().size() != 3) buildAndThrowError("Invalid node ; if it was computed by a macro, check that a node is returned", x); + m_locals_locator.createScope(); page(p).emplace_back(CREATE_SCOPE); // save current position to jump there at the end of the loop @@ -421,6 +441,7 @@ namespace Ark::internal page(p).emplace_back(label_end); page(p).emplace_back(POP_SCOPE); + m_locals_locator.deleteScope(); } void ASTLowerer::compilePluginImport(const Node& x, const Page p) diff --git a/src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp b/src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp new file mode 100644 index 000000000..42fb11d72 --- /dev/null +++ b/src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp @@ -0,0 +1,45 @@ +#include + +#include + +namespace Ark::internal +{ + LocalsLocator::LocalsLocator() + { + // create a default scope + m_scopes.emplace_back(); + } + + void LocalsLocator::addLocal(const std::string& name) + { + auto& scope = m_scopes.back(); + if (std::ranges::find(scope.data, name) == scope.data.end()) + scope.data.push_back(name); + } + + std::optional LocalsLocator::lookupLastScopeByName(const std::string& name) + { + auto& back = m_scopes.back(); + + if (back.type != ScopeType::Closure) + { + // Compute the index of the variable in the active scope from the end. + if (const auto it = std::ranges::find(back.data, name); it != back.data.end()) + return static_cast(std::distance(it, back.data.end())) - 1; + } + + return std::nullopt; + } + + void LocalsLocator::createScope(const ScopeType type) + { + m_scopes.emplace_back(Scope { + .data = {}, + .type = type }); + } + + void LocalsLocator::deleteScope() + { + m_scopes.pop_back(); + } +} diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index ce5afe7bd..ea9f4abc7 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -463,11 +463,9 @@ namespace Ark TARGET(LOAD_SYMBOL_BY_INDEX) { - Value& var = context.scopes_storage[arg].second; - if (var.valueType() == ValueType::Reference) - push(var.reference(), context); - else - push(var, context); + auto& [id, value] = context.locals.back().atPosReverse(arg); + context.last_symbol = id; + push(value, context); DISPATCH(); } From cffc6401dc254b1159aaa94d2a667d65e2d33223 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 21 Mar 2025 10:21:18 +0100 Subject: [PATCH 05/12] feat(compiler, optimizer): add new super instructions using the _BY_INDEX version of LOAD_SYMBOL, fix a corner case in the index computation inside if branches --- CHANGELOG.md | 5 ++ include/Ark/Compiler/Instructions.hpp | 40 +++++++++--- .../Ark/Compiler/Lowerer/LocalsLocator.hpp | 35 ++++++++++ include/Ark/VM/VM.hpp | 1 + include/Ark/VM/VM.inl | 7 ++ .../IROptimizer.cpp | 15 ++++- .../Compiler/Lowerer/ASTLowerer.cpp | 6 ++ .../Compiler/Lowerer/LocalsLocator.cpp | 25 ++++++-- src/arkreactor/VM/VM.cpp | 64 ++++++++++++++++++- .../CompilerSuite/ir/99bottles.expected | 6 +- .../CompilerSuite/ir/ackermann.expected | 16 ++--- .../CompilerSuite/ir/closures.expected | 22 +++---- .../CompilerSuite/ir/factorial.expected | 4 +- .../optimized_ir/99bottles.expected | 6 +- .../optimized_ir/ackermann.expected | 16 ++--- .../optimized_ir/closures.expected | 22 +++---- .../optimized_ir/factorial.expected | 4 +- .../optimized_ir/increments.expected | 10 +-- 18 files changed, 235 insertions(+), 69 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39b84917a..062eeb5d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,9 @@ - new operator `@@` to get elements in list of lists / list of strings - new builtin `random`, returning a random number between INT_MIN and INT_MAX, or in a custom range - `$as-is` to paste a node inside a maro without evaluating it further ; useful to stop recursive evaluation of nodes inside function macros +- `LOAD_SYMBOL_BY_INDEX` instruction, loading a local from the current scope by an index (0 being the last element added to the scope) +- `STORE_FROM_INDEX` and `SET_VAL_FROM_INDEX` instructions for parity with the super instructions not using load by index +- `INCREMENT_BY_INDEX` and `DECREMENT_BY_INDEX` instructions for parity with the super instructions not using load by index ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument @@ -109,6 +112,8 @@ - magic numbers for value types in bytecode files have been changed from 0x01, 0x02, 0x03 to 0xF1, 0xF2, 0xF3 (number, string, function) - numbers in the values table in bytecode files are no longer stringified but their IEEE754 representation is now encoded on 12 bytes (4 for the exponent, 8 for the mantissa) - changed how scopes are stored inside the VM to enhance performances. All scope data are now contiguous! +- when possible, accessing variables from the current scope is compiled to a new instruction `LOAD_SYMBOL_BY_INDEX`, to avoid the sometimes expansive lookup by id + - this works inside normal scopes (introduced by while loops) and functions scopes, but not for closures ### Removed - removed unused `NodeType::Closure` diff --git a/include/Ark/Compiler/Instructions.hpp b/include/Ark/Compiler/Instructions.hpp index 28beb6fd1..7f992c2f5 100644 --- a/include/Ark/Compiler/Instructions.hpp +++ b/include/Ark/Compiler/Instructions.hpp @@ -235,37 +235,55 @@ namespace Ark::internal // @role Store the value of the symbol #[code primary] into a new variable #[code secondary] STORE_FROM = 0x3a, + // @args symbol index, symbol id + // @role Store the value of the symbol #[code primary] into a new variable #[code secondary] + STORE_FROM_INDEX = 0x3b, + // @args symbol id, symbol id // @role Store the value of the symbol #[code primary] into an existing variable #[code secondary] - SET_VAL_FROM = 0x3b, + SET_VAL_FROM = 0x3c, + + // @args symbol index, symbol id + // @role Store the value of the symbol #[code primary] into an existing variable #[code secondary] + SET_VAL_FROM_INDEX = 0x3d, // @args symbol id, count // @role Increment the variable #[code primary] by #[code count] and push its value on the stack - INCREMENT = 0x3c, + INCREMENT = 0x3e, + + // @args symbol index, count + // @role Increment the variable #[code primary] by #[code count] and push its value on the stack + INCREMENT_BY_INDEX = 0x3f, // @args symbol id, count // @role Decrement the variable #[code primary] by #[code count] and push its value on the stack - DECREMENT = 0x3d, + DECREMENT = 0x40, + + // @args symbol index, count + // @role Decrement the variable #[code primary] by #[code count] and push its value on the stack + DECREMENT_BY_INDEX = 0x41, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its tail, store it in a new variable #[code secondary] - STORE_TAIL = 0x3e, + STORE_TAIL = 0x42, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its head, store it in a new variable #[code secondary] - STORE_HEAD = 0x3f, + STORE_HEAD = 0x43, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its tail, store it in an existing variable #[code secondary] - SET_VAL_TAIL = 0x40, + SET_VAL_TAIL = 0x44, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its head, store it in an existing variable #[code secondary] - SET_VAL_HEAD = 0x41, + SET_VAL_HEAD = 0x45, // @args builtin id, argument count // @role Call a builtin by its id in #[code primary], with #[code secondary] arguments. Bypass the stack size check because we do not push IP/PP since builtins calls do not alter the stack - CALL_BUILTIN = 0x42 + CALL_BUILTIN = 0x46, + + LAST }; constexpr std::array InstructionNames = { @@ -330,15 +348,21 @@ namespace Ark::internal "LOAD_CONST_STORE", "LOAD_CONST_SET_VAL", "STORE_FROM", + "STORE_FROM_INDEX", "SET_VAL_FROM", + "SET_VAL_FROM_INDEX", "INCREMENT", + "INCREMENT_BY_INDEX", "DECREMENT", + "DECREMENT_BY_INDEX", "STORE_TAIL", "STORE_HEAD", "SET_VAL_TAIL", "SET_VAL_HEAD", "CALL_BUILTIN" }; + + static_assert(InstructionNames.size() == static_cast(Instruction::LAST) && "Some instruction names appear to be missing"); } #endif diff --git a/include/Ark/Compiler/Lowerer/LocalsLocator.hpp b/include/Ark/Compiler/Lowerer/LocalsLocator.hpp index 01b59172f..be526bd14 100644 --- a/include/Ark/Compiler/Lowerer/LocalsLocator.hpp +++ b/include/Ark/Compiler/Lowerer/LocalsLocator.hpp @@ -28,14 +28,48 @@ namespace Ark::internal Closure }; + /** + * @brief Create a new LocalsLocator to track the position of variables in the scope stack + */ LocalsLocator(); + /** + * @brief Register a local in the current scope, triggered by a STORE instruction. If the local already exists, it won't be added. + * + * @param name local's name + */ void addLocal(const std::string& name); + + /** + * @brief Search for a local in the current scope. Returns std::nullopt in case of closure scopes or if the variable is in a parent scope. + * + * @param name local's name + * @return std::optional + */ std::optional lookupLastScopeByName(const std::string& name); + /** + * @brief Create a new scope + * + * @param type scope type, default `ScopeType::Default` + */ void createScope(ScopeType type = ScopeType::Default); + + /** + * @brief Delete the last scope + */ void deleteScope(); + /** + * @brief Save the current scope length before entering a branch, so that we can ignore variable definitions inside the branch and generate valid indices + */ + void saveScopeLengthForBranch(); + + /** + * @brief Drop potentially defined variables in the last saved branch + */ + void dropVarsForBranch(); + private: struct Scope { @@ -44,6 +78,7 @@ namespace Ark::internal }; std::vector m_scopes; + std::vector m_drop_for_conds; ///< Needed to drop variables inside if/else branches since they don't have their own scope }; } diff --git a/include/Ark/VM/VM.hpp b/include/Ark/VM/VM.hpp index 2a87b95e4..efb79d4e9 100644 --- a/include/Ark/VM/VM.hpp +++ b/include/Ark/VM/VM.hpp @@ -188,6 +188,7 @@ namespace Ark // ================================================ [[nodiscard]] inline Value* loadSymbol(uint16_t id, internal::ExecutionContext& context); + [[nodiscard]] inline Value* loadSymbolFromIndex(uint16_t index, internal::ExecutionContext& context); [[nodiscard]] inline Value* loadConstAsPtr(uint16_t id) const; inline void store(uint16_t id, const Value* val, internal::ExecutionContext& context); inline void setVal(uint16_t id, const Value* val, internal::ExecutionContext& context); diff --git a/include/Ark/VM/VM.inl b/include/Ark/VM/VM.inl index 77b16b016..36c326d75 100644 --- a/include/Ark/VM/VM.inl +++ b/include/Ark/VM/VM.inl @@ -136,6 +136,13 @@ inline Value* VM::loadSymbol(const uint16_t id, internal::ExecutionContext& cont return nullptr; } +inline Value* VM::loadSymbolFromIndex(const uint16_t index, internal::ExecutionContext& context) +{ + auto& [id, value] = context.locals.back().atPosReverse(index); + context.last_symbol = id; + return &value; +} + inline Value* VM::loadConstAsPtr(const uint16_t id) const { return &m_state.m_constants[id]; diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp index 4b3d486ad..e845c1337 100644 --- a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp @@ -101,13 +101,17 @@ namespace Ark::internal return IR::Entity(LOAD_CONST_STORE, first.primaryArg(), second.primaryArg()); if (first.inst() == LOAD_CONST && second.inst() == SET_VAL) return IR::Entity(LOAD_CONST_SET_VAL, first.primaryArg(), second.primaryArg()); - // LOAD_SYMBOL a + // LOAD_SYMBOL / LOAD_SYMBOL_BY_INDEX a // STORE / SET_VAL b // ---> STORE_FROM a b ; SET_VAL_FROM a b if (first.inst() == LOAD_SYMBOL && second.inst() == STORE) return IR::Entity(STORE_FROM, first.primaryArg(), second.primaryArg()); + if (first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == STORE) + return IR::Entity(STORE_FROM_INDEX, first.primaryArg(), second.primaryArg()); if (first.inst() == LOAD_SYMBOL && second.inst() == SET_VAL) return IR::Entity(SET_VAL_FROM, first.primaryArg(), second.primaryArg()); + if (first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == SET_VAL) + return IR::Entity(SET_VAL_FROM_INDEX, first.primaryArg(), second.primaryArg()); // BUILTIN i // CALL n // ---> CALL_BUILTIN i n @@ -132,6 +136,15 @@ namespace Ark::internal return IR::Entity(INCREMENT, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); if (third.inst() == SUB && first.inst() == LOAD_SYMBOL && second.inst() == LOAD_CONST && isPositiveNumberInlinable(second.primaryArg())) return IR::Entity(DECREMENT, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); + + // todo: refactor + if (third.inst() == ADD && first.inst() == LOAD_CONST && second.inst() == LOAD_SYMBOL_BY_INDEX && isPositiveNumberInlinable(first.primaryArg())) + return IR::Entity(INCREMENT_BY_INDEX, second.primaryArg(), static_cast(std::get(m_values[first.primaryArg()].value))); + if (third.inst() == ADD && first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == LOAD_CONST && isPositiveNumberInlinable(second.primaryArg())) + return IR::Entity(INCREMENT_BY_INDEX, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); + if (third.inst() == SUB && first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == LOAD_CONST && isPositiveNumberInlinable(second.primaryArg())) + return IR::Entity(DECREMENT_BY_INDEX, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); + // LOAD_SYMBOL list // TAIL / HEAD // STORE / SET_VAL a diff --git a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp index b21297e65..dd19e1c23 100644 --- a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp +++ b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp @@ -322,7 +322,11 @@ namespace Ark::internal // else code if (x.constList().size() == 4) // we have an else clause + { + m_locals_locator.saveScopeLengthForBranch(); compileExpression(x.constList()[3], p, is_result_unused, is_terminal, var_name); + m_locals_locator.dropVarsForBranch(); + } // when else is finished, jump to end const auto label_end = IR::Entity::Label(m_current_label++); @@ -331,7 +335,9 @@ namespace Ark::internal // absolute address to jump to if condition is true page(p).emplace_back(label_then); // if code + m_locals_locator.saveScopeLengthForBranch(); compileExpression(x.constList()[2], p, is_result_unused, is_terminal, var_name); + m_locals_locator.dropVarsForBranch(); // set jump to end pos page(p).emplace_back(label_end); } diff --git a/src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp b/src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp index 42fb11d72..7611c200c 100644 --- a/src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp +++ b/src/arkreactor/Compiler/Lowerer/LocalsLocator.cpp @@ -19,13 +19,13 @@ namespace Ark::internal std::optional LocalsLocator::lookupLastScopeByName(const std::string& name) { - auto& back = m_scopes.back(); + auto& [data, type] = m_scopes.back(); - if (back.type != ScopeType::Closure) + if (type != ScopeType::Closure) { // Compute the index of the variable in the active scope from the end. - if (const auto it = std::ranges::find(back.data, name); it != back.data.end()) - return static_cast(std::distance(it, back.data.end())) - 1; + if (const auto it = std::ranges::find(data, name); it != data.end()) + return static_cast(std::distance(it, data.end())) - 1; } return std::nullopt; @@ -42,4 +42,21 @@ namespace Ark::internal { m_scopes.pop_back(); } + + void LocalsLocator::saveScopeLengthForBranch() + { + m_drop_for_conds.push_back(m_scopes.back().data.size()); + } + + void LocalsLocator::dropVarsForBranch() + { + const auto old_length = m_drop_for_conds.back(); + m_drop_for_conds.pop_back(); + + auto& back = m_scopes.back(); + if (back.data.size() > old_length) + back.data.erase( + back.data.begin() + static_cast(old_length), + back.data.end()); + } } diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index ea9f4abc7..d3775719b 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -420,9 +420,13 @@ namespace Ark &&TARGET_LOAD_CONST_STORE, &&TARGET_LOAD_CONST_SET_VAL, &&TARGET_STORE_FROM, + &&TARGET_STORE_FROM_INDEX, &&TARGET_SET_VAL_FROM, + &&TARGET_SET_VAL_FROM_INDEX, &&TARGET_INCREMENT, + &&TARGET_INCREMENT_BY_INDEX, &&TARGET_DECREMENT, + &&TARGET_DECREMENT_BY_INDEX, &&TARGET_STORE_TAIL, &&TARGET_STORE_HEAD, &&TARGET_SET_VAL_TAIL, @@ -463,9 +467,7 @@ namespace Ark TARGET(LOAD_SYMBOL_BY_INDEX) { - auto& [id, value] = context.locals.back().atPosReverse(arg); - context.last_symbol = id; - push(value, context); + push(loadSymbolFromIndex(arg, context), context); DISPATCH(); } @@ -1319,6 +1321,13 @@ namespace Ark DISPATCH(); } + TARGET(STORE_FROM_INDEX) + { + UNPACK_ARGS(); + store(secondary_arg, loadSymbolFromIndex(primary_arg, context), context); + DISPATCH(); + } + TARGET(SET_VAL_FROM) { UNPACK_ARGS(); @@ -1326,6 +1335,13 @@ namespace Ark DISPATCH(); } + TARGET(SET_VAL_FROM_INDEX) + { + UNPACK_ARGS(); + setVal(secondary_arg, loadSymbolFromIndex(primary_arg, context), context); + DISPATCH(); + } + TARGET(INCREMENT) { UNPACK_ARGS(); @@ -1347,6 +1363,27 @@ namespace Ark DISPATCH(); } + TARGET(INCREMENT_BY_INDEX) + { + UNPACK_ARGS(); + { + Value* var = loadSymbolFromIndex(primary_arg, context); + + // use internal reference, shouldn't break anything so far, unless it's already a ref + if (var->valueType() == ValueType::Reference) + var = var->reference(); + + if (var->valueType() == ValueType::Number) + push(Value(var->number() + secondary_arg), context); + else + types::generateError( + "+", + { { types::Contract { { types::Typedef("a", ValueType::Number), types::Typedef("b", ValueType::Number) } } } }, + { *var, Value(secondary_arg) }); + } + DISPATCH(); + } + TARGET(DECREMENT) { UNPACK_ARGS(); @@ -1368,6 +1405,27 @@ namespace Ark DISPATCH(); } + TARGET(DECREMENT_BY_INDEX) + { + UNPACK_ARGS(); + { + Value* var = loadSymbolFromIndex(primary_arg, context); + + // use internal reference, shouldn't break anything so far, unless it's already a ref + if (var->valueType() == ValueType::Reference) + var = var->reference(); + + if (var->valueType() == ValueType::Number) + push(Value(var->number() - secondary_arg), context); + else + types::generateError( + "-", + { { types::Contract { { types::Typedef("a", ValueType::Number), types::Typedef("b", ValueType::Number) } } } }, + { *var, Value(secondary_arg) }); + } + DISPATCH(); + } + TARGET(STORE_TAIL) { UNPACK_ARGS(); diff --git a/tests/unittests/resources/CompilerSuite/ir/99bottles.expected b/tests/unittests/resources/CompilerSuite/ir/99bottles.expected index dab5d2fb4..ec7a15d1b 100644 --- a/tests/unittests/resources/CompilerSuite/ir/99bottles.expected +++ b/tests/unittests/resources/CompilerSuite/ir/99bottles.expected @@ -13,16 +13,16 @@ page_0 TO_NUM 0 .L1: STORE 0 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 ISNIL 0 GOTO_IF_TRUE L2 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 GOTO L3 .L2: LOAD_CONST 2 .L3: STORE 2 - LOAD_SYMBOL 2 + LOAD_SYMBOL_BY_INDEX 0 STORE 3 CREATE_SCOPE 0 .L4: diff --git a/tests/unittests/resources/CompilerSuite/ir/ackermann.expected b/tests/unittests/resources/CompilerSuite/ir/ackermann.expected index 5adc0ecf5..f22f1b29a 100644 --- a/tests/unittests/resources/CompilerSuite/ir/ackermann.expected +++ b/tests/unittests/resources/CompilerSuite/ir/ackermann.expected @@ -4,7 +4,7 @@ page_0 LOAD_CONST 3 LOAD_CONST 4 LOAD_CONST 5 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 CALL 2 BUILTIN 9 CALL 2 @@ -13,33 +13,33 @@ page_0 page_1 STORE 1 STORE 2 - LOAD_SYMBOL 1 + LOAD_SYMBOL_BY_INDEX 1 LOAD_CONST 1 GT 0 GOTO_IF_TRUE L0 LOAD_CONST 2 - LOAD_SYMBOL 2 + LOAD_SYMBOL_BY_INDEX 0 ADD 0 GOTO L1 .L0: LOAD_CONST 1 - LOAD_SYMBOL 2 + LOAD_SYMBOL_BY_INDEX 0 EQ 0 GOTO_IF_TRUE L2 - LOAD_SYMBOL 1 - LOAD_SYMBOL 2 + LOAD_SYMBOL_BY_INDEX 1 + LOAD_SYMBOL_BY_INDEX 0 LOAD_CONST 2 SUB 0 LOAD_SYMBOL 0 CALL 2 - LOAD_SYMBOL 1 + LOAD_SYMBOL_BY_INDEX 1 LOAD_CONST 2 SUB 0 JUMP 0 GOTO L3 .L2: LOAD_CONST 2 - LOAD_SYMBOL 1 + LOAD_SYMBOL_BY_INDEX 1 LOAD_CONST 2 SUB 0 JUMP 0 diff --git a/tests/unittests/resources/CompilerSuite/ir/closures.expected b/tests/unittests/resources/CompilerSuite/ir/closures.expected index e24d36ccd..2418a1f58 100644 --- a/tests/unittests/resources/CompilerSuite/ir/closures.expected +++ b/tests/unittests/resources/CompilerSuite/ir/closures.expected @@ -4,17 +4,17 @@ page_0 LOAD_CONST 3 LOAD_CONST 4 LOAD_CONST 5 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 CALL 3 STORE 6 LOAD_CONST 6 LOAD_CONST 7 LOAD_CONST 8 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 1 CALL 3 STORE 7 LOAD_CONST 9 - LOAD_SYMBOL 6 + LOAD_SYMBOL_BY_INDEX 1 GET_FIELD 2 BUILTIN 9 CALL 2 @@ -24,18 +24,18 @@ page_0 CALL 1 POP 0 LOAD_CONST 11 - LOAD_SYMBOL 6 + LOAD_SYMBOL_BY_INDEX 1 GET_FIELD 4 CALL 1 POP 0 LOAD_CONST 12 - LOAD_SYMBOL 6 + LOAD_SYMBOL_BY_INDEX 1 GET_FIELD 2 BUILTIN 9 CALL 2 POP 0 LOAD_CONST 13 - LOAD_SYMBOL 7 + LOAD_SYMBOL_BY_INDEX 0 GET_FIELD 2 BUILTIN 9 CALL 2 @@ -43,23 +43,23 @@ page_0 LOAD_CONST 14 STORE 8 LOAD_CONST 17 - LOAD_SYMBOL 8 + LOAD_SYMBOL_BY_INDEX 0 CALL 1 STORE 10 LOAD_CONST 18 - LOAD_SYMBOL 10 + LOAD_SYMBOL_BY_INDEX 0 CALL 0 BUILTIN 9 CALL 2 POP 0 LOAD_CONST 18 - LOAD_SYMBOL 10 + LOAD_SYMBOL_BY_INDEX 0 CALL 0 BUILTIN 9 CALL 2 POP 0 LOAD_CONST 18 - LOAD_SYMBOL 10 + LOAD_SYMBOL_BY_INDEX 0 CALL 0 BUILTIN 9 CALL 2 @@ -81,7 +81,7 @@ page_1 page_2 STORE 5 - LOAD_SYMBOL 5 + LOAD_SYMBOL_BY_INDEX 0 SET_VAL 2 RET 0 HALT 0 diff --git a/tests/unittests/resources/CompilerSuite/ir/factorial.expected b/tests/unittests/resources/CompilerSuite/ir/factorial.expected index 65d34e3f3..21dc15162 100644 --- a/tests/unittests/resources/CompilerSuite/ir/factorial.expected +++ b/tests/unittests/resources/CompilerSuite/ir/factorial.expected @@ -3,7 +3,7 @@ page_0 STORE 0 LOAD_CONST 3 LOAD_CONST 4 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 CALL 1 BUILTIN 9 CALL 2 @@ -32,6 +32,6 @@ page_1 GOTO L0 .L1: POP_SCOPE 0 - LOAD_SYMBOL 2 + LOAD_SYMBOL_BY_INDEX 1 RET 0 HALT 0 diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/99bottles.expected b/tests/unittests/resources/CompilerSuite/optimized_ir/99bottles.expected index 118f1b9fe..43cbbc15e 100644 --- a/tests/unittests/resources/CompilerSuite/optimized_ir/99bottles.expected +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/99bottles.expected @@ -13,16 +13,16 @@ page_0 TO_NUM 0 .L1: STORE 0 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 ISNIL 0 GOTO_IF_TRUE L2 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 GOTO L3 .L2: LOAD_CONST 2 .L3: STORE 2 - STORE_FROM 2, 3 + STORE_FROM_INDEX 0, 3 CREATE_SCOPE 0 .L4: LOAD_SYMBOL 3 diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/ackermann.expected b/tests/unittests/resources/CompilerSuite/optimized_ir/ackermann.expected index 72a7c8687..15703fad0 100644 --- a/tests/unittests/resources/CompilerSuite/optimized_ir/ackermann.expected +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/ackermann.expected @@ -2,7 +2,7 @@ page_0 LOAD_CONST_STORE 0, 0 LOAD_CONST_LOAD_CONST 3, 4 LOAD_CONST 5 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 CALL 2 CALL_BUILTIN 9, 2 HALT 0 @@ -10,27 +10,27 @@ page_0 page_1 STORE 1 STORE 2 - LOAD_SYMBOL 1 + LOAD_SYMBOL_BY_INDEX 1 LOAD_CONST 1 GT 0 GOTO_IF_TRUE L0 - INCREMENT 2, 1 + INCREMENT_BY_INDEX 0, 1 GOTO L1 .L0: LOAD_CONST 1 - LOAD_SYMBOL 2 + LOAD_SYMBOL_BY_INDEX 0 EQ 0 GOTO_IF_TRUE L2 - LOAD_SYMBOL 1 - DECREMENT 2, 1 + LOAD_SYMBOL_BY_INDEX 1 + DECREMENT_BY_INDEX 0, 1 LOAD_SYMBOL 0 CALL 2 - DECREMENT 1, 1 + DECREMENT_BY_INDEX 1, 1 JUMP 0 GOTO L3 .L2: LOAD_CONST 2 - DECREMENT 1, 1 + DECREMENT_BY_INDEX 1, 1 JUMP 0 .L3: .L1: diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/closures.expected b/tests/unittests/resources/CompilerSuite/optimized_ir/closures.expected index 165ba8804..34ab4efa2 100644 --- a/tests/unittests/resources/CompilerSuite/optimized_ir/closures.expected +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/closures.expected @@ -2,16 +2,16 @@ page_0 LOAD_CONST_STORE 0, 0 LOAD_CONST_LOAD_CONST 3, 4 LOAD_CONST 5 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 CALL 3 STORE 6 LOAD_CONST_LOAD_CONST 6, 7 LOAD_CONST 8 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 1 CALL 3 STORE 7 LOAD_CONST 9 - LOAD_SYMBOL 6 + LOAD_SYMBOL_BY_INDEX 1 GET_FIELD 2 CALL_BUILTIN 9, 2 POP 0 @@ -19,37 +19,37 @@ page_0 CALL_BUILTIN 9, 1 POP 0 LOAD_CONST 11 - LOAD_SYMBOL 6 + LOAD_SYMBOL_BY_INDEX 1 GET_FIELD 4 CALL 1 POP 0 LOAD_CONST 12 - LOAD_SYMBOL 6 + LOAD_SYMBOL_BY_INDEX 1 GET_FIELD 2 CALL_BUILTIN 9, 2 POP 0 LOAD_CONST 13 - LOAD_SYMBOL 7 + LOAD_SYMBOL_BY_INDEX 0 GET_FIELD 2 CALL_BUILTIN 9, 2 POP 0 LOAD_CONST_STORE 14, 8 LOAD_CONST 17 - LOAD_SYMBOL 8 + LOAD_SYMBOL_BY_INDEX 0 CALL 1 STORE 10 LOAD_CONST 18 - LOAD_SYMBOL 10 + LOAD_SYMBOL_BY_INDEX 0 CALL 0 CALL_BUILTIN 9, 2 POP 0 LOAD_CONST 18 - LOAD_SYMBOL 10 + LOAD_SYMBOL_BY_INDEX 0 CALL 0 CALL_BUILTIN 9, 2 POP 0 LOAD_CONST 18 - LOAD_SYMBOL 10 + LOAD_SYMBOL_BY_INDEX 0 CALL 0 CALL_BUILTIN 9, 2 HALT 0 @@ -69,7 +69,7 @@ page_1 page_2 STORE 5 - SET_VAL_FROM 5, 2 + SET_VAL_FROM_INDEX 0, 2 RET 0 HALT 0 diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/factorial.expected b/tests/unittests/resources/CompilerSuite/optimized_ir/factorial.expected index cc5a8d2a8..0574a6144 100644 --- a/tests/unittests/resources/CompilerSuite/optimized_ir/factorial.expected +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/factorial.expected @@ -1,7 +1,7 @@ page_0 LOAD_CONST_STORE 0, 0 LOAD_CONST_LOAD_CONST 3, 4 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 CALL 1 CALL_BUILTIN 9, 2 HALT 0 @@ -25,6 +25,6 @@ page_1 GOTO L0 .L1: POP_SCOPE 0 - LOAD_SYMBOL 2 + LOAD_SYMBOL_BY_INDEX 1 RET 0 HALT 0 diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/increments.expected b/tests/unittests/resources/CompilerSuite/optimized_ir/increments.expected index a51b2a45d..567b42cdb 100644 --- a/tests/unittests/resources/CompilerSuite/optimized_ir/increments.expected +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/increments.expected @@ -1,16 +1,16 @@ page_0 LOAD_CONST_STORE 0, 0 - INCREMENT 0, 4 + INCREMENT_BY_INDEX 0, 4 SET_VAL 0 - INCREMENT 0, 6 + INCREMENT_BY_INDEX 0, 6 SET_VAL 0 - DECREMENT 0, 8 + DECREMENT_BY_INDEX 0, 8 SET_VAL 0 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 LOAD_CONST 4 ADD 0 SET_VAL 0 - LOAD_SYMBOL 0 + LOAD_SYMBOL_BY_INDEX 0 LOAD_CONST 5 ADD 0 SET_VAL 0 From 78bb993e75f7195be5c4ae3fdba42426df1f00f1 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 21 Mar 2025 11:28:13 +0100 Subject: [PATCH 06/12] feat(ir optimizer): improve instructions merging to use a list of rules, easier to maintain with less code repetition --- .../IROptimizer.hpp | 25 ++- .../IROptimizer.cpp | 180 +++++++++++------- 2 files changed, 134 insertions(+), 71 deletions(-) diff --git a/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp b/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp index b761868d8..da8242193 100644 --- a/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp +++ b/include/Ark/Compiler/IntermediateRepresentation/IROptimizer.hpp @@ -17,6 +17,7 @@ #include #include +#include namespace Ark::internal { @@ -47,15 +48,35 @@ namespace Ark::internal [[nodiscard]] const std::vector& intermediateRepresentation() const noexcept; private: + using Entities = std::vector; + using DualArgs = std::pair; + + struct Rule + { + std::vector expected; + Instruction replacement; + std::function condition = [](const Entities&) { + return true; + }; ///< Additional condition to match + std::function createReplacement = + [](const Entities& entities) { + return std::make_pair(entities[0].primaryArg(), entities[1].primaryArg()); + }; ///< Create the replacement instructions from given context + }; + + std::vector m_ruleset_two; + std::vector m_ruleset_three; + Logger m_logger; std::vector m_ir; std::vector m_symbols; std::vector m_values; - [[nodiscard]] std::optional compactEntities(const IR::Entity& first, const IR::Entity& second); - [[nodiscard]] std::optional compactEntities(const IR::Entity& first, const IR::Entity& second, const IR::Entity& third); + [[nodiscard]] bool match(const std::vector& expected_insts, const Entities& entities) const; + std::optional replaceWithRules(const std::vector& rules, const Entities& entities); [[nodiscard]] bool isPositiveNumberInlinable(uint16_t id) const; + [[nodiscard]] uint16_t numberAsArg(uint16_t id) const; }; } diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp index e845c1337..9bede8ccf 100644 --- a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp @@ -13,7 +13,88 @@ namespace Ark::internal IROptimizer::IROptimizer(const unsigned debug) : m_logger("IROptimizer", debug) - {} + { + m_ruleset_two = { + Rule { + { LOAD_CONST, LOAD_CONST }, LOAD_CONST_LOAD_CONST }, + Rule { + { LOAD_CONST, STORE }, LOAD_CONST_STORE }, + Rule { + { LOAD_CONST, SET_VAL }, LOAD_CONST_SET_VAL }, + Rule { + { LOAD_SYMBOL, STORE }, STORE_FROM }, + Rule { + { LOAD_SYMBOL_BY_INDEX, STORE }, STORE_FROM_INDEX }, + Rule { + { LOAD_SYMBOL, SET_VAL }, SET_VAL_FROM }, + Rule { + { LOAD_SYMBOL_BY_INDEX, SET_VAL }, SET_VAL_FROM_INDEX }, + Rule { + { BUILTIN, CALL }, CALL_BUILTIN, [](const Entities& entities) { + return Builtins::builtins[entities[0].primaryArg()].second.isFunction(); + } } + }; + + m_ruleset_three = { + // LOAD_SYMBOL a / LOAD_SYMBOL_BY_INDEX index + // LOAD_CONST n (1) + // ADD / SUB + // ---> INCREMENT / DECREMENT a value + Rule { + { LOAD_CONST, LOAD_SYMBOL, ADD }, INCREMENT, [this](const Entities& e) { + return isPositiveNumberInlinable(e[0].primaryArg()); + }, + [this](const Entities& e) { + return std::make_pair(e[1].primaryArg(), numberAsArg(e[0].primaryArg())); + } }, + Rule { { LOAD_SYMBOL, LOAD_CONST, ADD }, INCREMENT, [this](const Entities& e) { + return isPositiveNumberInlinable(e[1].primaryArg()); + }, + [this](const Entities& e) { + return std::make_pair(e[0].primaryArg(), numberAsArg(e[1].primaryArg())); + } }, + Rule { { LOAD_SYMBOL, LOAD_CONST, SUB }, DECREMENT, [this](const Entities& e) { + return isPositiveNumberInlinable(e[1].primaryArg()); + }, + [this](const Entities& e) { + return std::make_pair(e[0].primaryArg(), numberAsArg(e[1].primaryArg())); + } }, + Rule { { LOAD_CONST, LOAD_SYMBOL_BY_INDEX, ADD }, INCREMENT_BY_INDEX, [this](const Entities& e) { + return isPositiveNumberInlinable(e[0].primaryArg()); + }, + [this](const Entities& e) { + return std::make_pair(e[1].primaryArg(), numberAsArg(e[0].primaryArg())); + } }, + Rule { { LOAD_SYMBOL_BY_INDEX, LOAD_CONST, ADD }, INCREMENT_BY_INDEX, [this](const Entities& e) { + return isPositiveNumberInlinable(e[1].primaryArg()); + }, + [this](const Entities& e) { + return std::make_pair(e[0].primaryArg(), numberAsArg(e[1].primaryArg())); + } }, + Rule { { LOAD_SYMBOL_BY_INDEX, LOAD_CONST, SUB }, DECREMENT_BY_INDEX, [this](const Entities& e) { + return isPositiveNumberInlinable(e[1].primaryArg()); + }, + [this](const Entities& e) { + return std::make_pair(e[0].primaryArg(), numberAsArg(e[1].primaryArg())); + } }, + // LOAD_SYMBOL list + // TAIL / HEAD + // STORE / SET_VAL a + // ---> STORE_TAIL list a ; STORE_HEAD ; SET_VAL_TAIL ; SET_VAL_HEAD + Rule { .expected = { LOAD_SYMBOL, TAIL, STORE }, .replacement = STORE_TAIL, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + } }, + Rule { .expected = { LOAD_SYMBOL, TAIL, SET_VAL }, .replacement = SET_VAL_TAIL, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + } }, + Rule { .expected = { LOAD_SYMBOL, HEAD, STORE }, .replacement = STORE_HEAD, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + } }, + Rule { .expected = { LOAD_SYMBOL, HEAD, SET_VAL }, .replacement = SET_VAL_HEAD, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + } } + }; + } void IROptimizer::process(const std::vector& pages, const std::vector& symbols, const std::vector& values) { @@ -47,7 +128,7 @@ namespace Ark::internal if (i + 1 < end) maybe_compacted = map( - compactEntities(block[i], block[i + 1]), + replaceWithRules(m_ruleset_two, { block[i], block[i + 1] }), [](const auto& entity) { return std::make_optional(entity, 2); }); @@ -56,7 +137,7 @@ namespace Ark::internal maybe_compacted, [&, this]() { return map( - compactEntities(block[i], block[i + 1], block[i + 2]), + replaceWithRules(m_ruleset_three, { block[i], block[i + 1], block[i + 2] }), [](const auto& entity) { return std::make_optional(entity, 3); }); @@ -84,79 +165,35 @@ namespace Ark::internal return m_ir; } - std::optional IROptimizer::compactEntities(const IR::Entity& first, const IR::Entity& second) + bool IROptimizer::match(const std::vector& expected_insts, const Entities& entities) const { - if (first.primaryArg() > IR::MaxValueForDualArg || second.primaryArg() > IR::MaxValueForDualArg) - return std::nullopt; + assert(expected_insts.size() == entities.size() && "Mismatching size between expected instructions and given entities"); - // LOAD_CONST x - // LOAD_CONST y - // ---> LOAD_CONST_LOAD_CONST x y - if (first.inst() == LOAD_CONST && second.inst() == LOAD_CONST) - return IR::Entity(LOAD_CONST_LOAD_CONST, first.primaryArg(), second.primaryArg()); - // LOAD_CONST x - // STORE / SET_VAL a - // ---> LOAD_CONST_STORE x a ; LOAD_CONST_SET_VAL x a - if (first.inst() == LOAD_CONST && second.inst() == STORE) - return IR::Entity(LOAD_CONST_STORE, first.primaryArg(), second.primaryArg()); - if (first.inst() == LOAD_CONST && second.inst() == SET_VAL) - return IR::Entity(LOAD_CONST_SET_VAL, first.primaryArg(), second.primaryArg()); - // LOAD_SYMBOL / LOAD_SYMBOL_BY_INDEX a - // STORE / SET_VAL b - // ---> STORE_FROM a b ; SET_VAL_FROM a b - if (first.inst() == LOAD_SYMBOL && second.inst() == STORE) - return IR::Entity(STORE_FROM, first.primaryArg(), second.primaryArg()); - if (first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == STORE) - return IR::Entity(STORE_FROM_INDEX, first.primaryArg(), second.primaryArg()); - if (first.inst() == LOAD_SYMBOL && second.inst() == SET_VAL) - return IR::Entity(SET_VAL_FROM, first.primaryArg(), second.primaryArg()); - if (first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == SET_VAL) - return IR::Entity(SET_VAL_FROM_INDEX, first.primaryArg(), second.primaryArg()); - // BUILTIN i - // CALL n - // ---> CALL_BUILTIN i n - if (first.inst() == BUILTIN && second.inst() == CALL && Builtins::builtins[first.primaryArg()].second.isFunction()) - return IR::Entity(CALL_BUILTIN, first.primaryArg(), second.primaryArg()); + for (std::size_t i = 0; i < expected_insts.size(); ++i) + { + if (expected_insts[i] != entities[i].inst()) + return false; + } - return std::nullopt; + return true; } - std::optional IROptimizer::compactEntities(const IR::Entity& first, const IR::Entity& second, const IR::Entity& third) + std::optional IROptimizer::replaceWithRules(const std::vector& rules, const Entities& entities) { - if (first.primaryArg() > IR::MaxValueForDualArg || second.primaryArg() > IR::MaxValueForDualArg || third.primaryArg() > IR::MaxValueForDualArg) - return std::nullopt; + for (auto&& entity : entities) + { + if (entity.primaryArg() > IR::MaxValueForDualArg) + return std::nullopt; + } - // LOAD_SYMBOL a - // LOAD_CONST n (1) - // ADD / SUB - // ---> INCREMENT / DECREMENT a value - if (third.inst() == ADD && first.inst() == LOAD_CONST && second.inst() == LOAD_SYMBOL && isPositiveNumberInlinable(first.primaryArg())) - return IR::Entity(INCREMENT, second.primaryArg(), static_cast(std::get(m_values[first.primaryArg()].value))); - if (third.inst() == ADD && first.inst() == LOAD_SYMBOL && second.inst() == LOAD_CONST && isPositiveNumberInlinable(second.primaryArg())) - return IR::Entity(INCREMENT, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); - if (third.inst() == SUB && first.inst() == LOAD_SYMBOL && second.inst() == LOAD_CONST && isPositiveNumberInlinable(second.primaryArg())) - return IR::Entity(DECREMENT, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); - - // todo: refactor - if (third.inst() == ADD && first.inst() == LOAD_CONST && second.inst() == LOAD_SYMBOL_BY_INDEX && isPositiveNumberInlinable(first.primaryArg())) - return IR::Entity(INCREMENT_BY_INDEX, second.primaryArg(), static_cast(std::get(m_values[first.primaryArg()].value))); - if (third.inst() == ADD && first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == LOAD_CONST && isPositiveNumberInlinable(second.primaryArg())) - return IR::Entity(INCREMENT_BY_INDEX, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); - if (third.inst() == SUB && first.inst() == LOAD_SYMBOL_BY_INDEX && second.inst() == LOAD_CONST && isPositiveNumberInlinable(second.primaryArg())) - return IR::Entity(DECREMENT_BY_INDEX, first.primaryArg(), static_cast(std::get(m_values[second.primaryArg()].value))); - - // LOAD_SYMBOL list - // TAIL / HEAD - // STORE / SET_VAL a - // ---> STORE_TAIL list a ; STORE_HEAD ; SET_VAL_TAIL ; SET_VAL_HEAD - if (first.inst() == LOAD_SYMBOL && second.inst() == TAIL && third.inst() == STORE) - return IR::Entity(STORE_TAIL, first.primaryArg(), third.primaryArg()); - if (first.inst() == LOAD_SYMBOL && second.inst() == TAIL && third.inst() == SET_VAL) - return IR::Entity(SET_VAL_TAIL, first.primaryArg(), third.primaryArg()); - if (first.inst() == LOAD_SYMBOL && second.inst() == HEAD && third.inst() == STORE) - return IR::Entity(STORE_HEAD, first.primaryArg(), third.primaryArg()); - if (first.inst() == LOAD_SYMBOL && second.inst() == HEAD && third.inst() == SET_VAL) - return IR::Entity(SET_VAL_HEAD, first.primaryArg(), third.primaryArg()); + for (const auto& [expected, replacement, condition, createReplacement] : rules) + { + if (match(expected, entities) && condition(entities)) + { + auto [first, second] = createReplacement(entities); + return IR::Entity(replacement, first, second); + } + } return std::nullopt; } @@ -172,4 +209,9 @@ namespace Ark::internal } return false; } + + uint16_t IROptimizer::numberAsArg(const uint16_t id) const + { + return static_cast(std::get(m_values[id].value)); + } } From 463f70a1726412451a5249cb31c61b051b23768f Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 21 Mar 2025 15:41:27 +0100 Subject: [PATCH 07/12] feat(compiler, vm, instructions): adding super instructions for list manipulation using load by index --- CHANGELOG.md | 1 + include/Ark/Compiler/Instructions.hpp | 32 +++++++++--- .../IROptimizer.cpp | 20 ++++++-- src/arkreactor/VM/VM.cpp | 50 +++++++++++++++++++ .../CompilerSuite/optimized_ir/lists.ark | 17 +++++++ .../CompilerSuite/optimized_ir/lists.expected | 24 +++++++++ 6 files changed, 134 insertions(+), 10 deletions(-) create mode 100644 tests/unittests/resources/CompilerSuite/optimized_ir/lists.ark create mode 100644 tests/unittests/resources/CompilerSuite/optimized_ir/lists.expected diff --git a/CHANGELOG.md b/CHANGELOG.md index 062eeb5d7..4c38db3f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ - `LOAD_SYMBOL_BY_INDEX` instruction, loading a local from the current scope by an index (0 being the last element added to the scope) - `STORE_FROM_INDEX` and `SET_VAL_FROM_INDEX` instructions for parity with the super instructions not using load by index - `INCREMENT_BY_INDEX` and `DECREMENT_BY_INDEX` instructions for parity with the super instructions not using load by index +- `STORE_TAIL_BY_INDEX`, `STORE_HEAD_BY_INDEX`, `SET_VAL_TAIL_BY_INDEX`, `SET_VAL_HEAD_BY_INDEX` super instructions added for parity with the super instructions not using load by index ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument diff --git a/include/Ark/Compiler/Instructions.hpp b/include/Ark/Compiler/Instructions.hpp index 7f992c2f5..f789a3726 100644 --- a/include/Ark/Compiler/Instructions.hpp +++ b/include/Ark/Compiler/Instructions.hpp @@ -267,23 +267,39 @@ namespace Ark::internal // @role Load the symbol #[code primary], compute its tail, store it in a new variable #[code secondary] STORE_TAIL = 0x42, + // @args symbol index, symbol id + // @role Load the symbol #[code primary], compute its tail, store it in a new variable #[code secondary] + STORE_TAIL_BY_INDEX = 0x43, + // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its head, store it in a new variable #[code secondary] - STORE_HEAD = 0x43, + STORE_HEAD = 0x44, + + // @args symbol index, symbol id + // @role Load the symbol #[code primary], compute its head, store it in a new variable #[code secondary] + STORE_HEAD_BY_INDEX = 0x45, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its tail, store it in an existing variable #[code secondary] - SET_VAL_TAIL = 0x44, + SET_VAL_TAIL = 0x46, + + // @args symbol index, symbol id + // @role Load the symbol #[code primary], compute its tail, store it in an existing variable #[code secondary] + SET_VAL_TAIL_BY_INDEX = 0x47, // @args symbol id, symbol id // @role Load the symbol #[code primary], compute its head, store it in an existing variable #[code secondary] - SET_VAL_HEAD = 0x45, + SET_VAL_HEAD = 0x48, + + // @args symbol index, symbol id + // @role Load the symbol #[code primary], compute its head, store it in an existing variable #[code secondary] + SET_VAL_HEAD_BY_INDEX = 0x49, // @args builtin id, argument count // @role Call a builtin by its id in #[code primary], with #[code secondary] arguments. Bypass the stack size check because we do not push IP/PP since builtins calls do not alter the stack - CALL_BUILTIN = 0x46, + CALL_BUILTIN = 0x4a, - LAST + InstructionsCount }; constexpr std::array InstructionNames = { @@ -356,13 +372,17 @@ namespace Ark::internal "DECREMENT", "DECREMENT_BY_INDEX", "STORE_TAIL", + "STORE_TAIL_BY_INDEX", "STORE_HEAD", + "STORE_HEAD_BY_INDEX", "SET_VAL_TAIL", + "SET_VAL_TAIL_BY_INDEX", "SET_VAL_HEAD", + "SET_VAL_HEAD_BY_INDEX", "CALL_BUILTIN" }; - static_assert(InstructionNames.size() == static_cast(Instruction::LAST) && "Some instruction names appear to be missing"); + static_assert(InstructionNames.size() == static_cast(Instruction::InstructionsCount) && "Some instruction names appear to be missing"); } #endif diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp index 9bede8ccf..ad5b05ad2 100644 --- a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp @@ -82,16 +82,28 @@ namespace Ark::internal // STORE / SET_VAL a // ---> STORE_TAIL list a ; STORE_HEAD ; SET_VAL_TAIL ; SET_VAL_HEAD Rule { .expected = { LOAD_SYMBOL, TAIL, STORE }, .replacement = STORE_TAIL, .createReplacement = [](const Entities& e) { - return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); } }, Rule { .expected = { LOAD_SYMBOL, TAIL, SET_VAL }, .replacement = SET_VAL_TAIL, .createReplacement = [](const Entities& e) { - return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); } }, Rule { .expected = { LOAD_SYMBOL, HEAD, STORE }, .replacement = STORE_HEAD, .createReplacement = [](const Entities& e) { - return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); } }, Rule { .expected = { LOAD_SYMBOL, HEAD, SET_VAL }, .replacement = SET_VAL_HEAD, .createReplacement = [](const Entities& e) { - return std::make_pair(e[0].primaryArg(), e[1].primaryArg()); + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); + } }, + Rule { .expected = { LOAD_SYMBOL_BY_INDEX, TAIL, STORE }, .replacement = STORE_TAIL_BY_INDEX, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); + } }, + Rule { .expected = { LOAD_SYMBOL_BY_INDEX, TAIL, SET_VAL }, .replacement = SET_VAL_TAIL_BY_INDEX, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); + } }, + Rule { .expected = { LOAD_SYMBOL_BY_INDEX, HEAD, STORE }, .replacement = STORE_HEAD_BY_INDEX, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); + } }, + Rule { .expected = { LOAD_SYMBOL_BY_INDEX, HEAD, SET_VAL }, .replacement = SET_VAL_HEAD_BY_INDEX, .createReplacement = [](const Entities& e) { + return std::make_pair(e[0].primaryArg(), e[2].primaryArg()); } } }; } diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index d3775719b..a689b1c73 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -428,11 +428,17 @@ namespace Ark &&TARGET_DECREMENT, &&TARGET_DECREMENT_BY_INDEX, &&TARGET_STORE_TAIL, + &&TARGET_STORE_TAIL_BY_INDEX, &&TARGET_STORE_HEAD, + &&TARGET_STORE_HEAD_BY_INDEX, &&TARGET_SET_VAL_TAIL, + &&TARGET_SET_VAL_TAIL_BY_INDEX, &&TARGET_SET_VAL_HEAD, + &&TARGET_SET_VAL_HEAD_BY_INDEX, &&TARGET_CALL_BUILTIN }; + + static_assert(opcode_targets.size() == static_cast(Instruction::InstructionsCount) && "Some instructions are not implemented in the VM"); # pragma GCC diagnostic pop #endif @@ -1437,6 +1443,17 @@ namespace Ark DISPATCH(); } + TARGET(STORE_TAIL_BY_INDEX) + { + UNPACK_ARGS(); + { + Value* list = loadSymbolFromIndex(primary_arg, context); + Value tail = helper::tail(list); + store(secondary_arg, &tail, context); + } + DISPATCH(); + } + TARGET(STORE_HEAD) { UNPACK_ARGS(); @@ -1448,6 +1465,17 @@ namespace Ark DISPATCH(); } + TARGET(STORE_HEAD_BY_INDEX) + { + UNPACK_ARGS(); + { + Value* list = loadSymbolFromIndex(primary_arg, context); + Value head = helper::head(list); + store(secondary_arg, &head, context); + } + DISPATCH(); + } + TARGET(SET_VAL_TAIL) { UNPACK_ARGS(); @@ -1459,6 +1487,17 @@ namespace Ark DISPATCH(); } + TARGET(SET_VAL_TAIL_BY_INDEX) + { + UNPACK_ARGS(); + { + Value* list = loadSymbolFromIndex(primary_arg, context); + Value tail = helper::tail(list); + setVal(secondary_arg, &tail, context); + } + DISPATCH(); + } + TARGET(SET_VAL_HEAD) { UNPACK_ARGS(); @@ -1470,6 +1509,17 @@ namespace Ark DISPATCH(); } + TARGET(SET_VAL_HEAD_BY_INDEX) + { + UNPACK_ARGS(); + { + Value* list = loadSymbolFromIndex(primary_arg, context); + Value head = helper::head(list); + setVal(secondary_arg, &head, context); + } + DISPATCH(); + } + TARGET(CALL_BUILTIN) { UNPACK_ARGS(); diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/lists.ark b/tests/unittests/resources/CompilerSuite/optimized_ir/lists.ark new file mode 100644 index 000000000..9955ffefa --- /dev/null +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/lists.ark @@ -0,0 +1,17 @@ +(let source [1 2 3 4]) + +(let foo (fun () { + (mut head_1 (head source)) + (mut tail_1 (tail source)) + + (mut copy_1 source) + (set copy_1 (head source)) + (set copy_1 (tail source)) })) +(foo) + +(mut head_2 (head source)) +(mut tail_2 (tail source)) + +(mut copy_2 source) +(set copy_2 (head source)) +(set copy_2 (tail source)) diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/lists.expected b/tests/unittests/resources/CompilerSuite/optimized_ir/lists.expected new file mode 100644 index 000000000..90059d824 --- /dev/null +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/lists.expected @@ -0,0 +1,24 @@ +page_0 + LOAD_CONST_LOAD_CONST 0, 1 + LOAD_CONST_LOAD_CONST 2, 3 + LIST 4 + STORE 0 + LOAD_CONST_STORE 4, 1 + LOAD_SYMBOL_BY_INDEX 0 + CALL 0 + POP 0 + STORE_HEAD_BY_INDEX 1, 5 + STORE_TAIL_BY_INDEX 2, 6 + STORE_FROM_INDEX 3, 7 + SET_VAL_HEAD_BY_INDEX 4, 7 + SET_VAL_TAIL_BY_INDEX 4, 7 + HALT 0 + +page_1 + STORE_HEAD 0, 2 + STORE_TAIL 0, 3 + STORE_FROM 0, 4 + SET_VAL_HEAD 0, 4 + SET_VAL_TAIL 0, 4 + RET 0 + HALT 0 From 9f0e0c3e02af42691e82562a72ed37aea6bc4b83 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 21 Mar 2025 16:56:02 +0100 Subject: [PATCH 08/12] refactor(vm): removing dead code in the VM do {} while value != InstPtr is not needed anymore as we have solved the stack trashing problem about two years ago, we only have useful values on the stack. --- src/arkreactor/VM/VM.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index a689b1c73..0fbf9e1b7 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -533,12 +533,8 @@ namespace Ark // value on the stack else [[likely]] { - const Value* ip; - do - { - ip = popAndResolveAsPtr(context); - } while (ip->valueType() != ValueType::InstPtr); - + const Value* ip = popAndResolveAsPtr(context); + assert(ip->valueType() == ValueType::InstPtr && "Expected instruction pointer on the stack (is the stack trashed?)"); context.ip = ip->pageAddr(); context.pp = pop(context)->pageAddr(); From 81ed54d3b2dde920d938e7e596617340637bd8af Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 21 Mar 2025 17:46:50 +0100 Subject: [PATCH 09/12] feat(tests): printing a diff when multiline strings do not match in the tests --- .gitmodules | 3 +++ CMakeLists.txt | 4 ++++ lib/README.md | 3 ++- lib/dtl | 1 + .../{ => Suites}/BytecodeReaderSuite.cpp | 2 +- .../unittests/{ => Suites}/CompilerSuite.cpp | 6 ++--- .../{ => Suites}/DiagnosticsSuite.cpp | 6 ++--- .../unittests/{ => Suites}/EmbeddingSuite.cpp | 0 .../unittests/{ => Suites}/ExamplesSuite.cpp | 1 - .../unittests/{ => Suites}/FormatterSuite.cpp | 6 ++--- tests/unittests/{ => Suites}/LangSuite.cpp | 2 +- .../{ => Suites}/NameResolutionSuite.cpp | 2 +- .../unittests/{ => Suites}/OptimizerSuite.cpp | 4 ++-- tests/unittests/{ => Suites}/ParserSuite.cpp | 6 ++--- tests/unittests/{ => Suites}/ReplSuite.cpp | 0 tests/unittests/{ => Suites}/RosettaSuite.cpp | 2 +- tests/unittests/{ => Suites}/ToolsSuite.cpp | 0 tests/unittests/{ => Suites}/Utf8Suite.cpp | 0 .../unittests/{ => Suites}/ValidAstSuite.cpp | 4 ++-- tests/unittests/TestsHelper.cpp | 23 ++++++++++++++++++- tests/unittests/TestsHelper.hpp | 2 ++ 21 files changed, 54 insertions(+), 23 deletions(-) create mode 160000 lib/dtl rename tests/unittests/{ => Suites}/BytecodeReaderSuite.cpp (99%) rename tests/unittests/{ => Suites}/CompilerSuite.cpp (97%) rename tests/unittests/{ => Suites}/DiagnosticsSuite.cpp (93%) rename tests/unittests/{ => Suites}/EmbeddingSuite.cpp (100%) rename tests/unittests/{ => Suites}/ExamplesSuite.cpp (97%) rename tests/unittests/{ => Suites}/FormatterSuite.cpp (87%) rename tests/unittests/{ => Suites}/LangSuite.cpp (98%) rename tests/unittests/{ => Suites}/NameResolutionSuite.cpp (99%) rename tests/unittests/{ => Suites}/OptimizerSuite.cpp (90%) rename tests/unittests/{ => Suites}/ParserSuite.cpp (94%) rename tests/unittests/{ => Suites}/ReplSuite.cpp (100%) rename tests/unittests/{ => Suites}/RosettaSuite.cpp (97%) rename tests/unittests/{ => Suites}/ToolsSuite.cpp (100%) rename tests/unittests/{ => Suites}/Utf8Suite.cpp (100%) rename tests/unittests/{ => Suites}/ValidAstSuite.cpp (90%) diff --git a/.gitmodules b/.gitmodules index 94cc07e61..1d0ff2632 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "lib/ut"] path = lib/ut url = https://github.com/boost-ext/ut.git +[submodule "lib/dtl"] + path = lib/dtl + url = https://github.com/cubicdaiya/dtl.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 10e8e6414..6cabb1269 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,11 +186,15 @@ endif () if (ARK_TESTS) file(GLOB_RECURSE SOURCES ${ark_SOURCE_DIR}/tests/unittests/*.cpp + ${ark_SOURCE_DIR}/tests/unittests/Suites/*.cpp ${ark_SOURCE_DIR}/lib/fmt/src/format.cc ${ark_SOURCE_DIR}/src/arkscript/Formatter.cpp ${ark_SOURCE_DIR}/src/arkscript/JsonCompiler.cpp ${ark_SOURCE_DIR}/src/arkscript/REPL/Utils.cpp) add_executable(unittests ${SOURCES}) + target_include_directories(unittests PUBLIC ${ark_SOURCE_DIR}/tests/unittests) + + target_include_directories(unittests SYSTEM PUBLIC ${ark_SOURCE_DIR}/lib/dtl/dtl) add_subdirectory(${ark_SOURCE_DIR}/lib/ut) target_include_directories(unittests PUBLIC ${ark_SOURCE_DIR}/include) diff --git a/lib/README.md b/lib/README.md index a2c3566ce..4b6d1b9e8 100644 --- a/lib/README.md +++ b/lib/README.md @@ -2,7 +2,8 @@ Includes -* [clipp](https://github.com/GerHobbelt/clipp), MIT License +* [clipp](https://github.com/SuperFola/clipp), MIT License +* [dtl](https://github.com/cubicdaiya/dtl/), BSD License * [fmt](https://github.com/fmtlib/fmt), MIT License * [picosha2](https://github.com/okdshin/PicoSHA2), MIT License * [replxx](https://github.com/AmokHuginnsson/replxx/blob/master/LICENSE.md), MIT License + specifities diff --git a/lib/dtl b/lib/dtl new file mode 160000 index 000000000..32567bb9e --- /dev/null +++ b/lib/dtl @@ -0,0 +1 @@ +Subproject commit 32567bb9ec704f09040fb1ed7431a3d967e3df03 diff --git a/tests/unittests/BytecodeReaderSuite.cpp b/tests/unittests/Suites/BytecodeReaderSuite.cpp similarity index 99% rename from tests/unittests/BytecodeReaderSuite.cpp rename to tests/unittests/Suites/BytecodeReaderSuite.cpp index 062e34030..3c84d5314 100644 --- a/tests/unittests/BytecodeReaderSuite.cpp +++ b/tests/unittests/Suites/BytecodeReaderSuite.cpp @@ -7,7 +7,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; diff --git a/tests/unittests/CompilerSuite.cpp b/tests/unittests/Suites/CompilerSuite.cpp similarity index 97% rename from tests/unittests/CompilerSuite.cpp rename to tests/unittests/Suites/CompilerSuite.cpp index 180285d71..d7fea5078 100644 --- a/tests/unittests/CompilerSuite.cpp +++ b/tests/unittests/Suites/CompilerSuite.cpp @@ -5,7 +5,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; @@ -93,7 +93,7 @@ ut::suite<"Compiler"> compiler_suite = [] { std::string ir = welder.textualIR(); ltrim(rtrim(ir)); - expect(that % ir == data.expected); + expect_or_diff(data.expected, ir); }; }); }; @@ -115,7 +115,7 @@ ut::suite<"Compiler"> compiler_suite = [] { std::string ir = welder.textualIR(); ltrim(rtrim(ir)); - expect(that % ir == data.expected); + expect_or_diff(data.expected, ir); }; }); }; diff --git a/tests/unittests/DiagnosticsSuite.cpp b/tests/unittests/Suites/DiagnosticsSuite.cpp similarity index 93% rename from tests/unittests/DiagnosticsSuite.cpp rename to tests/unittests/Suites/DiagnosticsSuite.cpp index ceba6047c..af7f10a49 100644 --- a/tests/unittests/DiagnosticsSuite.cpp +++ b/tests/unittests/Suites/DiagnosticsSuite.cpp @@ -3,7 +3,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; @@ -27,7 +27,7 @@ ut::suite<"Diagnostics"> diagnostics_suite = [] { { std::string diag = sanitize_error(e, /* remove_in_file_line= */ true); rtrim(diag); - expect(that % diag == data.expected); + expect_or_diff(data.expected, diag); } }; }); @@ -54,7 +54,7 @@ ut::suite<"Diagnostics"> diagnostics_suite = [] { if (diag.find_first_of('\n') != std::string::npos) diag.erase(diag.find_first_of('\n'), diag.size() - 1); ltrim(rtrim(diag)); - expect(that % diag == data.expected); + expect_or_diff(data.expected, diag); } }; }); diff --git a/tests/unittests/EmbeddingSuite.cpp b/tests/unittests/Suites/EmbeddingSuite.cpp similarity index 100% rename from tests/unittests/EmbeddingSuite.cpp rename to tests/unittests/Suites/EmbeddingSuite.cpp diff --git a/tests/unittests/ExamplesSuite.cpp b/tests/unittests/Suites/ExamplesSuite.cpp similarity index 97% rename from tests/unittests/ExamplesSuite.cpp rename to tests/unittests/Suites/ExamplesSuite.cpp index 7264ee893..78c430ba4 100644 --- a/tests/unittests/ExamplesSuite.cpp +++ b/tests/unittests/Suites/ExamplesSuite.cpp @@ -3,7 +3,6 @@ #include #include -#include "TestsHelper.hpp" using namespace boost; diff --git a/tests/unittests/FormatterSuite.cpp b/tests/unittests/Suites/FormatterSuite.cpp similarity index 87% rename from tests/unittests/FormatterSuite.cpp rename to tests/unittests/Suites/FormatterSuite.cpp index 9785a0a0a..c2e7e6e39 100644 --- a/tests/unittests/FormatterSuite.cpp +++ b/tests/unittests/Suites/FormatterSuite.cpp @@ -2,7 +2,7 @@ #include -#include "TestsHelper.hpp" +#include using namespace boost; @@ -23,7 +23,7 @@ ut::suite<"Formatter"> formatter_suite = [] { formatted_code = formatter.output(); // data.expected is ltrim(rtrim(file content)) // we want to ensure that a blank line has been added - expect(that % formatted_code == (data.expected + "\n")); + expect_or_diff((data.expected + "\n"), formatted_code); }; should("not update an already correctly formatted code (" + data.stem + ")") = [&] { @@ -33,7 +33,7 @@ ut::suite<"Formatter"> formatter_suite = [] { })); const std::string code = formatter.output(); - expect(that % code == formatted_code); + expect_or_diff(formatted_code, code); }; }); }; diff --git a/tests/unittests/LangSuite.cpp b/tests/unittests/Suites/LangSuite.cpp similarity index 98% rename from tests/unittests/LangSuite.cpp rename to tests/unittests/Suites/LangSuite.cpp index e8b6cf97e..8a2b91b39 100644 --- a/tests/unittests/LangSuite.cpp +++ b/tests/unittests/Suites/LangSuite.cpp @@ -3,7 +3,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; diff --git a/tests/unittests/NameResolutionSuite.cpp b/tests/unittests/Suites/NameResolutionSuite.cpp similarity index 99% rename from tests/unittests/NameResolutionSuite.cpp rename to tests/unittests/Suites/NameResolutionSuite.cpp index bf18e169d..6478bed90 100644 --- a/tests/unittests/NameResolutionSuite.cpp +++ b/tests/unittests/Suites/NameResolutionSuite.cpp @@ -3,7 +3,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; diff --git a/tests/unittests/OptimizerSuite.cpp b/tests/unittests/Suites/OptimizerSuite.cpp similarity index 90% rename from tests/unittests/OptimizerSuite.cpp rename to tests/unittests/Suites/OptimizerSuite.cpp index 2193db39f..96b2b2f3f 100644 --- a/tests/unittests/OptimizerSuite.cpp +++ b/tests/unittests/Suites/OptimizerSuite.cpp @@ -3,7 +3,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; @@ -25,7 +25,7 @@ ut::suite<"Optimizer"> optimizer_suite = [] { }; should("output the expected AST for " + data.stem) = [&] { - expect(that % json == data.expected); + expect_or_diff(data.expected, json); }; }, /* expected_ext= */ "json"); diff --git a/tests/unittests/ParserSuite.cpp b/tests/unittests/Suites/ParserSuite.cpp similarity index 94% rename from tests/unittests/ParserSuite.cpp rename to tests/unittests/Suites/ParserSuite.cpp index 2d4ef2bfa..e362a1d77 100644 --- a/tests/unittests/ParserSuite.cpp +++ b/tests/unittests/Suites/ParserSuite.cpp @@ -7,7 +7,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; @@ -60,7 +60,7 @@ ut::suite<"Parser"> parser_suite = [] { ltrim(rtrim(ast)); should("output the same AST and imports (" + data.stem + ")") = [&] { - expect(that % ast == data.expected); + expect_or_diff(data.expected, ast); }; }); }; @@ -80,7 +80,7 @@ ut::suite<"Parser"> parser_suite = [] { should("output the same error message (" + data.stem + ")") = [&] { std::string tested = sanitize_error(e); ltrim(rtrim(tested)); - expect(that % tested == data.expected); + expect_or_diff(data.expected, tested); }; } catch (...) diff --git a/tests/unittests/ReplSuite.cpp b/tests/unittests/Suites/ReplSuite.cpp similarity index 100% rename from tests/unittests/ReplSuite.cpp rename to tests/unittests/Suites/ReplSuite.cpp diff --git a/tests/unittests/RosettaSuite.cpp b/tests/unittests/Suites/RosettaSuite.cpp similarity index 97% rename from tests/unittests/RosettaSuite.cpp rename to tests/unittests/Suites/RosettaSuite.cpp index 187e6bac6..ccaddd310 100644 --- a/tests/unittests/RosettaSuite.cpp +++ b/tests/unittests/Suites/RosettaSuite.cpp @@ -3,7 +3,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; diff --git a/tests/unittests/ToolsSuite.cpp b/tests/unittests/Suites/ToolsSuite.cpp similarity index 100% rename from tests/unittests/ToolsSuite.cpp rename to tests/unittests/Suites/ToolsSuite.cpp diff --git a/tests/unittests/Utf8Suite.cpp b/tests/unittests/Suites/Utf8Suite.cpp similarity index 100% rename from tests/unittests/Utf8Suite.cpp rename to tests/unittests/Suites/Utf8Suite.cpp diff --git a/tests/unittests/ValidAstSuite.cpp b/tests/unittests/Suites/ValidAstSuite.cpp similarity index 90% rename from tests/unittests/ValidAstSuite.cpp rename to tests/unittests/Suites/ValidAstSuite.cpp index 121577bcb..53398d520 100644 --- a/tests/unittests/ValidAstSuite.cpp +++ b/tests/unittests/Suites/ValidAstSuite.cpp @@ -3,7 +3,7 @@ #include #include -#include "TestsHelper.hpp" +#include using namespace boost; @@ -25,7 +25,7 @@ ut::suite<"AST"> ast_suite = [] { }; should("output the expected AST for " + data.stem) = [&] { - expect(that % json == data.expected); + expect_or_diff(data.expected, json); }; }, /* expected_ext= */ "json"); diff --git a/tests/unittests/TestsHelper.cpp b/tests/unittests/TestsHelper.cpp index f31a58a68..17a2426a9 100644 --- a/tests/unittests/TestsHelper.cpp +++ b/tests/unittests/TestsHelper.cpp @@ -1,6 +1,10 @@ #include "TestsHelper.hpp" +#include + +#include #include +#include void iter_test_files(const std::string& folder, std::function&& test, const std::string& expected_ext) { @@ -13,7 +17,7 @@ void iter_test_files(const std::string& folder, std::function& expected_path.replace_extension(expected_ext); std::string expected = Ark::Utils::readFile(expected_path.generic_string()); // getting rid of the \r because of Windows - expected.erase(std::remove(expected.begin(), expected.end(), '\r'), expected.end()); + std::erase(expected, '\r'); ltrim(rtrim(expected)); auto data = TestData { @@ -46,3 +50,20 @@ std::string sanitize_error(const Ark::CodeError& e, const bool remove_in_file_li return diag; } + +void expect_or_diff(const std::string& expected, const std::string& received) +{ + const bool comparison = expected == received; + boost::ut::expect(comparison) << [&] { + dtl::Diff> d( + Ark::Utils::splitString(received, '\n'), + Ark::Utils::splitString(expected, '\n')); + d.enableHuge(); + d.compose(); + d.composeUnifiedHunks(); + std::stringstream stream; + d.printUnifiedFormat(stream); + + return stream.str(); + }; +} diff --git a/tests/unittests/TestsHelper.hpp b/tests/unittests/TestsHelper.hpp index a0d7d67e7..02e945a0f 100644 --- a/tests/unittests/TestsHelper.hpp +++ b/tests/unittests/TestsHelper.hpp @@ -46,4 +46,6 @@ inline std::string& rtrim(std::string& s) std::string sanitize_error(const Ark::CodeError& e, bool remove_in_file_line = false); +void expect_or_diff(const std::string& expected, const std::string& received); + #endif // ARK_TESTSHELPER_HPP From 64ba997a425373b811967700764c632f72cdf025 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 21 Mar 2025 18:09:24 +0100 Subject: [PATCH 10/12] fix: adding missing cassert include --- .../Compiler/IntermediateRepresentation/IROptimizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp index ad5b05ad2..c80677952 100644 --- a/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp +++ b/src/arkreactor/Compiler/IntermediateRepresentation/IROptimizer.cpp @@ -1,5 +1,6 @@ #include +#include #include #include From 230e7146a80b74e72088a410b9b7e3eb475b6f76 Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 21 Mar 2025 18:05:28 +0100 Subject: [PATCH 11/12] refactor: created Proxy/MiniWindows.h to include only what we need for loading DLLs --- include/Ark/VM/Plugin.hpp | 5 +---- include/Proxy/MiniWindows.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 include/Proxy/MiniWindows.h diff --git a/include/Ark/VM/Plugin.hpp b/include/Ark/VM/Plugin.hpp index ec8c21317..855b4e09b 100644 --- a/include/Ark/VM/Plugin.hpp +++ b/include/Ark/VM/Plugin.hpp @@ -15,10 +15,7 @@ #include #if defined(ARK_OS_WINDOWS) -// do not include winsock.h -# define WIN32_LEAN_AND_MEAN -# define NOMINMAX -# include +# include #elif defined(ARK_OS_LINUX) # include #else diff --git a/include/Proxy/MiniWindows.h b/include/Proxy/MiniWindows.h new file mode 100644 index 000000000..952078cdd --- /dev/null +++ b/include/Proxy/MiniWindows.h @@ -0,0 +1,29 @@ +// Inspired by https://aras-p.info/blog/2018/01/12/Minimizing-windows.h/ + +#ifndef MINIWINDOWS_H +#define MINIWINDOWS_H + +#if defined(_WIN32) || defined(_WIN64) + +# if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# define _AMD64_ +# elif defined(i386) || defined(__i386) || defined(__i386__) || defined(__i386__) || defined(_M_IX86) +# define _X86_ +# elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARMT) +# define _ARM_ +# endif + +# define NOMINMAX + +// https://learn.microsoft.com/en-us/windows/win32/api/errhandlingapi/nf-errhandlingapi-getlasterror +# include + +// FreeLibrary +// LoadLibrary +// GetProcAddress +// -> https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/ +# include +# include +#endif + +#endif // MINIWINDOWS_H From 8dcea74d78b7b238c353a61d930d7102d4b913ed Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Sat, 22 Mar 2025 22:14:36 +0100 Subject: [PATCH 12/12] chore: cleaning up dead code and old todos --- .github/workflows/ci.yml | 2 +- include/Ark/Compiler/BytecodeReader.hpp | 11 ++-------- include/Ark/Files.hpp | 1 - src/arkreactor/Builtins/List.cpp | 9 ++------- src/arkreactor/Builtins/Mathematics.cpp | 6 +++--- src/arkreactor/Compiler/AST/Parser.cpp | 2 +- src/arkreactor/Compiler/BytecodeReader.cpp | 5 ----- src/arkreactor/Compiler/Macros/Processor.cpp | 6 +++--- src/arkreactor/Exceptions.cpp | 1 - src/arkreactor/VM/VM.cpp | 20 ++++++------------- .../resources/CompilerSuite/ir/plugin.ark | 3 +++ .../CompilerSuite/ir/plugin.expected | 10 ++++++++++ 12 files changed, 31 insertions(+), 45 deletions(-) create mode 100644 tests/unittests/resources/CompilerSuite/ir/plugin.ark create mode 100644 tests/unittests/resources/CompilerSuite/ir/plugin.expected diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0aba7f69..c1fa7144d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -176,7 +176,7 @@ jobs: compiler_version: 16 sanitizers: On coverage: On - with_deps: false + with_deps: true - name: Run tests run: | diff --git a/include/Ark/Compiler/BytecodeReader.hpp b/include/Ark/Compiler/BytecodeReader.hpp index b819145af..48d4b288f 100644 --- a/include/Ark/Compiler/BytecodeReader.hpp +++ b/include/Ark/Compiler/BytecodeReader.hpp @@ -2,10 +2,10 @@ * @file BytecodeReader.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) * @brief A bytecode disassembler for ArkScript - * @version 0.5 + * @version 1.0 * @date 2020-10-27 * - * @copyright Copyright (c) 2020-2024 + * @copyright Copyright (c) 2020-2025 * */ @@ -96,13 +96,6 @@ namespace Ark */ [[nodiscard]] bool checkMagic() const; - /** - * @brief Return the bytecode object constructed - * - * @return const bytecode_t& - */ - [[nodiscard]] const bytecode_t& bytecode() noexcept; - /** * * @return Version compiler version used to create the given bytecode file diff --git a/include/Ark/Files.hpp b/include/Ark/Files.hpp index 06477f700..74a00a692 100644 --- a/include/Ark/Files.hpp +++ b/include/Ark/Files.hpp @@ -77,7 +77,6 @@ namespace Ark::Utils ifs.close(); auto bytecode = std::vector(static_cast(pos)); - // TODO would it be faster to memcpy? for (std::size_t i = 0; i < static_cast(pos); ++i) bytecode[i] = static_cast(temp[i]); return bytecode; diff --git a/src/arkreactor/Builtins/List.cpp b/src/arkreactor/Builtins/List.cpp index 050c8d613..f74e81f76 100644 --- a/src/arkreactor/Builtins/List.cpp +++ b/src/arkreactor/Builtins/List.cpp @@ -51,13 +51,8 @@ namespace Ark::internal::Builtins::List { { types::Contract { { types::Typedef("list", ValueType::List), types::Typedef("value", ValueType::Any) } } } }, n); - std::vector& l = n[0].list(); - for (auto it = l.begin(), it_end = l.end(); it != it_end; ++it) - { - if (*it == n[1]) // FIXME cast - return Value(static_cast(std::distance(l.begin(), it))); - } - + if (const auto it = std::ranges::find(n[0].list(), n[1]); it != n[0].list().end()) + return Value(static_cast(std::distance(n[0].list().begin(), it))); return Value(-1); } diff --git a/src/arkreactor/Builtins/Mathematics.cpp b/src/arkreactor/Builtins/Mathematics.cpp index 25a7c9853..9679ba440 100644 --- a/src/arkreactor/Builtins/Mathematics.cpp +++ b/src/arkreactor/Builtins/Mathematics.cpp @@ -43,12 +43,12 @@ namespace Ark::internal::Builtins::Mathematics { if (!types::check(n, ValueType::Number)) types::generateError( - "math:log", + "math:ln", { { types::Contract { { types::Typedef("value", ValueType::Number) } } } }, n); if (n[0].number() <= 0.0) - throw std::runtime_error(fmt::format("math:log: value {} must be greater than 0", n[0].number())); + throw std::runtime_error(fmt::format("math:ln: value {} must be greater than 0", n[0].number())); return Value(std::log(n[0].number())); } @@ -128,7 +128,7 @@ namespace Ark::internal::Builtins::Mathematics { if (!types::check(n, ValueType::Any)) types::generateError( - "math:exp", + "math:NaN?", { { types::Contract { { types::Typedef("value", ValueType::Any) } } } }, n); diff --git a/src/arkreactor/Compiler/AST/Parser.cpp b/src/arkreactor/Compiler/AST/Parser.cpp index 7ba4d896a..a37c6363f 100644 --- a/src/arkreactor/Compiler/AST/Parser.cpp +++ b/src/arkreactor/Compiler/AST/Parser.cpp @@ -353,7 +353,7 @@ namespace Ark::internal Node symbols(NodeType::List); setNodePosAndFilename(symbols); // then parse the symbols to import, if any - if (space()) // FIXME: potential regression introduced here + if (space()) { comment.clear(); newlineOrComment(&comment); diff --git a/src/arkreactor/Compiler/BytecodeReader.cpp b/src/arkreactor/Compiler/BytecodeReader.cpp index 2bbbd7220..762605bdb 100644 --- a/src/arkreactor/Compiler/BytecodeReader.cpp +++ b/src/arkreactor/Compiler/BytecodeReader.cpp @@ -47,11 +47,6 @@ namespace Ark m_bytecode[3] == bytecode::Magic[3]; } - const bytecode_t& BytecodeReader::bytecode() noexcept - { - return m_bytecode; - } - Version BytecodeReader::version() const { if (!checkMagic() || m_bytecode.size() < bytecode::Magic.size() + bytecode::Version.size()) diff --git a/src/arkreactor/Compiler/Macros/Processor.cpp b/src/arkreactor/Compiler/Macros/Processor.cpp index b1a7fc152..7bbe92747 100644 --- a/src/arkreactor/Compiler/Macros/Processor.cpp +++ b/src/arkreactor/Compiler/Macros/Processor.cpp @@ -79,7 +79,7 @@ namespace Ark::internal { if (node.nodeType() == NodeType::List && node.constList().size() == 3 && node.constList()[0].nodeType() == NodeType::Keyword) { - Keyword kw = node.constList()[0].keyword(); + const Keyword kw = node.constList()[0].keyword(); // checking for function definition, which can occur only inside an assignment node if (kw != Keyword::Let && kw != Keyword::Mut && kw != Keyword::Set) return; @@ -99,7 +99,7 @@ namespace Ark::internal } } - void MacroProcessor::processNode(Node& node, unsigned depth, bool is_processing_namespace) + void MacroProcessor::processNode(Node& node, unsigned depth, const bool is_processing_namespace) { if (depth >= MaxMacroProcessingDepth) throwMacroProcessingError( @@ -666,7 +666,7 @@ namespace Ark::internal node.constList()[0].keyword() == Keyword::Begin; } - void MacroProcessor::removeBegin(Node& node, std::size_t i) + void MacroProcessor::removeBegin(Node& node, const std::size_t i) { if (node.isListLike() && node.list()[i].nodeType() == NodeType::List && !node.list()[i].list().empty()) { diff --git a/src/arkreactor/Exceptions.cpp b/src/arkreactor/Exceptions.cpp index 77a46c2ae..781d560b6 100644 --- a/src/arkreactor/Exceptions.cpp +++ b/src/arkreactor/Exceptions.cpp @@ -194,7 +194,6 @@ namespace Ark::Diagnostics if (e.filename != ARK_NO_NAME_FILE) file_content = Utils::readFile(e.filename); - // TODO enhance the error messages helper( os, e.what(), diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index 0fbf9e1b7..1a4a8c7ca 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -1637,21 +1637,13 @@ namespace Ark fmt::styled(m_state.m_symbols[old_scope.atPos(i).first], fmt::fg(fmt::color::cyan)), old_scope.atPos(i).second.toString(*this)); } - - while (context.fc != 1) - { - Value* tmp = pop(context); - if (tmp->valueType() == ValueType::InstPtr) - --context.fc; - *tmp = m_no_value; - } - // pop the PP as well - pop(context); } - std::cerr << "At IP: " << (saved_ip / 4) // dividing by 4 because the instructions are actually on 4 bytes - << ", PP: " << saved_pp - << ", SP: " << saved_sp - << "\n"; + fmt::println( + "At IP: {}, PP: {}, SP: {}", + // dividing by 4 because the instructions are actually on 4 bytes + fmt::styled(saved_ip / 4, fmt::fg(fmt::color::cyan)), + fmt::styled(saved_pp, fmt::fg(fmt::color::green)), + fmt::styled(saved_sp, fmt::fg(fmt::color::yellow))); } } diff --git a/tests/unittests/resources/CompilerSuite/ir/plugin.ark b/tests/unittests/resources/CompilerSuite/ir/plugin.ark new file mode 100644 index 000000000..637de913e --- /dev/null +++ b/tests/unittests/resources/CompilerSuite/ir/plugin.ark @@ -0,0 +1,3 @@ +(import hash) + +(assert (= "5d41402abc4b2a76b9719d911017c592" (hash:md5 "hello")) "md5 'hello'") diff --git a/tests/unittests/resources/CompilerSuite/ir/plugin.expected b/tests/unittests/resources/CompilerSuite/ir/plugin.expected new file mode 100644 index 000000000..7e0662b1a --- /dev/null +++ b/tests/unittests/resources/CompilerSuite/ir/plugin.expected @@ -0,0 +1,10 @@ +page_0 + PLUGIN 0 + LOAD_CONST 1 + LOAD_CONST 2 + LOAD_SYMBOL 0 + CALL 1 + EQ 0 + LOAD_CONST 3 + ASSERT 0 + HALT 0