From 7d637fdbd5c4e4833e66d63a19b053ce343481fd Mon Sep 17 00:00:00 2001 From: Lexy Plt Date: Fri, 20 Mar 2026 16:55:30 +0100 Subject: [PATCH 1/5] fix(ast lowerer): track how many arguments function have when performing tail calls, to run some arity checks --- include/Ark/Compiler/Lowerer/ASTLowerer.hpp | 10 +++- lib/std | 2 +- .../Compiler/Lowerer/ASTLowerer.cpp | 59 +++++++++++++++---- .../compileTime/too_many_args_tail_call.ark | 3 + .../too_many_args_tail_call.expected | 7 +++ .../runtime/stackoverflow_recur.ark | 9 --- .../runtime/stackoverflow_recur.expected | 16 ----- 7 files changed, 67 insertions(+), 39 deletions(-) create mode 100644 tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.ark create mode 100644 tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.expected delete mode 100644 tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark delete mode 100644 tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.expected diff --git a/include/Ark/Compiler/Lowerer/ASTLowerer.hpp b/include/Ark/Compiler/Lowerer/ASTLowerer.hpp index 820ed47e3..ad3d897c8 100644 --- a/include/Ark/Compiler/Lowerer/ASTLowerer.hpp +++ b/include/Ark/Compiler/Lowerer/ASTLowerer.hpp @@ -98,6 +98,12 @@ namespace Ark::internal bool is_temp; }; + struct Var + { + std::string name; + std::size_t argument_count; + }; + LocalsLocator m_locals_locator; // tables: symbols, values, plugins and codes @@ -107,7 +113,7 @@ namespace Ark::internal std::vector m_code_pages; std::vector m_temp_pages; ///< we need temporary code pages for some compilations passes IR::label_t m_current_label = 0; - std::stack m_opened_vars; ///< stack of vars we are currently declaring + std::stack m_opened_vars; ///< stack of vars we are currently declaring enum class ErrorKind { @@ -150,7 +156,7 @@ namespace Ark::internal */ [[nodiscard]] bool isFunctionCallingItself(const std::string& name) noexcept { - return !m_opened_vars.empty() && m_opened_vars.top() == name; + return !m_opened_vars.empty() && m_opened_vars.top().name == name; } /** diff --git a/lib/std b/lib/std index 0585f3ccc..820dca491 160000 --- a/lib/std +++ b/lib/std @@ -1 +1 @@ -Subproject commit 0585f3cccf6cf7faeeccd7377c362d1c95e0ca31 +Subproject commit 820dca491d37a7a4e730cecbc29c56eb2722aec4 diff --git a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp index 124e1fb23..7eddf690a 100644 --- a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp +++ b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -531,6 +532,7 @@ namespace Ark::internal // save page_id into the constants table as PageAddr and load the const page(p).emplace_back(is_closure ? MAKE_CLOSURE : LOAD_CONST, addValue(function_body_page.index, x)); + std::size_t arg_count = 0; // pushing arguments from the stack into variables in the new scope for (const auto& node : x.constList()[1].constList() | std::ranges::views::reverse) { @@ -538,11 +540,13 @@ namespace Ark::internal { page(function_body_page).emplace_back(STORE, addSymbol(node)); m_locals_locator.addLocal(node.string()); + arg_count++; } else if (node.nodeType() == NodeType::RefArg) { page(function_body_page).emplace_back(STORE_REF, addSymbol(node)); m_locals_locator.addLocal(node.string()); + arg_count++; } } @@ -551,7 +555,7 @@ namespace Ark::internal // (let name (fun (e) (map lst (fun (e) (name e))))) // Otherwise, `name` would have been optimized to a CALL_CURRENT_PAGE, which would have returned the wrong page. if (x.isAnonymousFunction()) - m_opened_vars.emplace("#anonymous"); + m_opened_vars.emplace("#anonymous", arg_count); // push body of the function compileExpression(x.list()[2], function_body_page, false, true); if (x.isAnonymousFunction()) @@ -579,13 +583,23 @@ namespace Ark::internal const std::string name = x.constList()[1].string(); uint16_t i = addSymbol(x.constList()[1]); - if (!m_opened_vars.empty() && m_opened_vars.top() == name) + if (!m_opened_vars.empty() && m_opened_vars.top().name == name) buildAndThrowError("Can not define a variable using the same name as the function it is defined inside. You need to rename the function or the variable", x); const bool is_function = x.constList()[2].isFunction(); if (is_function) { - m_opened_vars.push(name); + std::size_t arg_count = 0; + if (x.constList()[2].nodeType() == NodeType::List && x.constList()[2].constList().size() >= 2 && + x.constList()[2].constList()[1].nodeType() == NodeType::List) + { + for (const auto& node : x.constList()[2].constList()[1].constList()) + { + if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::MutArg || node.nodeType() == NodeType::RefArg) + arg_count++; + } + } + m_opened_vars.push(Var(name, arg_count)); x.list()[2].setFunctionKind(/* anonymous= */ false); } @@ -815,10 +829,41 @@ namespace Ark::internal constexpr std::size_t start_index = 1; Node& node = x.list()[0]; + // number of arguments + std::size_t args_count = 0; + for (auto it = x.constList().begin() + start_index, it_end = x.constList().end(); it != it_end; ++it) + { + if (it->nodeType() != NodeType::Capture && !isBreakpoint(*it)) + args_count++; + } + if (is_terminal && node.nodeType() == NodeType::Symbol && isFunctionCallingItself(node.string())) { pushFunctionCallArguments(x, p, /* is_tail_call= */ true); + if (const std::size_t expected_arg_count = m_opened_vars.top().argument_count; args_count != expected_arg_count) + { + std::vector arg_names; + if (expected_arg_count > 0) + { + arg_names.reserve(expected_arg_count + 1); + arg_names.emplace_back(""); + for (std::size_t i = 0; i < expected_arg_count; ++i) + arg_names.emplace_back(1, static_cast('a' + i)); + } + + buildAndThrowError( + fmt::format( + "When performing tail-call `{}', received {} argument{}, but expected {}: `({}{})'", + x.repr(), + args_count, + args_count > 1 ? "s" : "", + expected_arg_count, + node.string(), + fmt::join(arg_names, " ")), + x); + } + // jump to the top of the function page(p).emplace_back(TAIL_CALL_SELF); page(p).back().setSourceLocation(node.filename(), node.position().start.line); @@ -889,14 +934,6 @@ namespace Ark::internal pushFunctionCallArguments(x, p, /* is_tail_call= */ false); - // number of arguments - std::size_t args_count = 0; - for (auto it = x.constList().begin() + start_index, it_end = x.constList().end(); it != it_end; ++it) - { - if (it->nodeType() != NodeType::Capture && !isBreakpoint(*it)) - args_count++; - } - // call the procedure switch (call_type) { diff --git a/tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.ark b/tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.ark new file mode 100644 index 000000000..93910f030 --- /dev/null +++ b/tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.ark @@ -0,0 +1,3 @@ +(let f (fun(i) + (f (print i) (+ 1 i)))) +(f 1) diff --git a/tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.expected b/tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.expected new file mode 100644 index 000000000..5a5fae973 --- /dev/null +++ b/tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.expected @@ -0,0 +1,7 @@ +In file tests/unittests/resources/DiagnosticsSuite/compileTime/too_many_args_tail_call.ark:2 + 1 | (let f (fun(i) + 2 | (f (print i) (+ 1 i)))) + | ^~~~~~~~~~~~~~~~~~~~~ + 3 | (f 1) + 4 | + When performing tail-call `(f (print i) (+ 1 i))', received 2 arguments, but expected 1: `(f a)' diff --git a/tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark b/tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark deleted file mode 100644 index a81548daa..000000000 --- a/tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark +++ /dev/null @@ -1,9 +0,0 @@ -(let A (fun ((mut k) x1 x0 x3 x4 x5) { - (let B (fun () { - (set k (- k 1)) - (B k A x1 x0 x3 x4) })) - (if (<= k 0) - (+ (x4) (x5)) - (B)) })) - -(A 9 (fun () 0) (fun () -1) (fun () -1) (fun () 0) (fun () 0)) diff --git a/tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.expected b/tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.expected deleted file mode 100644 index ffe39c724..000000000 --- a/tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.expected +++ /dev/null @@ -1,16 +0,0 @@ -Stack overflow. You could consider rewriting your function to make use of tail-call optimization. - -In file tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark:4 - 1 | (let A (fun ((mut k) x1 x0 x3 x4 x5) { - 2 | (let B (fun () { - 3 | (set k (- k 1)) - 4 | (B k A x1 x0 x3 x4) })) - | ^~~~~~~~~~~~~~~~~~~~~~ - 5 | (if (<= k 0) - 6 | (+ (x4) (x5)) - -[ 3] In function `B' (tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark:4) -[ 2] In function `A' (tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark:7) -[ 1] In global scope (tests/unittests/resources/DiagnosticsSuite/runtime/stackoverflow_recur.ark:9) - -Current scope variables values: From c7aba84ccb71e49ff7eb6349781df741c4a13ac8 Mon Sep 17 00:00:00 2001 From: Lexy Plt Date: Sun, 22 Mar 2026 11:30:38 +0100 Subject: [PATCH 2/5] fix(ast lowerer): all paths in 'if' should return something (closes #661) --- CHANGELOG.md | 1 + src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp | 5 +++++ .../CompilerSuite/optimized_ir/type.expected | 1 + tests/unittests/resources/LangSuite/weird-tests.ark | 11 ++++++++++- 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb565ca14..b77b6b461 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Added ### Changed +- all paths inside `if` should return a value, when used as an expression. If an `else` branch is missing, `nil` will be returned ### Removed diff --git a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp index 7eddf690a..ac983aa85 100644 --- a/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp +++ b/src/arkreactor/Compiler/Lowerer/ASTLowerer.cpp @@ -474,6 +474,11 @@ namespace Ark::internal page(p).back().setSourceLocation(x.constList()[3].filename(), x.constList()[3].position().start.line); m_locals_locator.dropVarsForBranch(); } + else + { + Node tmp = Node(NodeType::List); + compileExpression(tmp, p, is_result_unused, is_terminal); + } // when else is finished, jump to end const auto label_end = IR::Entity::Label(m_current_label++); diff --git a/tests/unittests/resources/CompilerSuite/optimized_ir/type.expected b/tests/unittests/resources/CompilerSuite/optimized_ir/type.expected index 6094a62a8..b864d6a6c 100644 --- a/tests/unittests/resources/CompilerSuite/optimized_ir/type.expected +++ b/tests/unittests/resources/CompilerSuite/optimized_ir/type.expected @@ -40,6 +40,7 @@ page_1 .L7: CHECK_TYPE_OF 0, 1 POP_JUMP_IF_TRUE L9 + BUILTIN 2 JUMP L10 .L9: PUSH_RETURN_ADDRESS L11 diff --git a/tests/unittests/resources/LangSuite/weird-tests.ark b/tests/unittests/resources/LangSuite/weird-tests.ark index db7dd5453..da0811b06 100644 --- a/tests/unittests/resources/LangSuite/weird-tests.ark +++ b/tests/unittests/resources/LangSuite/weird-tests.ark @@ -54,4 +54,13 @@ (test:case "(f4 (mut n 1) (+ 0 n) n (set n 2))" { (mut output []) (f4 (mut n 1) (+ 0 n) n (set n 2)) - (test:eq output [1 1 2 2]) }) }) + (test:eq output [1 1 2 2]) }) + + # not all paths in `or` return, and we have a POP at the end, that would pop the function and leave + # the instruction pointer alone, messing up the call stack + (test:case "(or false (if 0 0))" { + (let g (fun (a b) { + (or false (if 0 0)) + b })) + + (test:eq (g 1 2) 2) }) }) From 3983ae8d410d98612ac7933b6c8ecd178a37821e Mon Sep 17 00:00:00 2001 From: Lexy Plt Date: Sun, 22 Mar 2026 12:08:39 +0100 Subject: [PATCH 3/5] feat(macro processor): add new macro $gensym --- CHANGELOG.md | 1 + docs/arkdoc/Macros.txt | 11 +++++++++++ include/Ark/Compiler/Macros/Processor.hpp | 1 + src/arkreactor/Compiler/Macros/Processor.cpp | 8 +++++++- .../compileTime/macro_gensym_too_many_args.ark | 1 + .../compileTime/macro_gensym_too_many_args.expected | 5 +++++ 6 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.ark create mode 100644 tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.expected diff --git a/CHANGELOG.md b/CHANGELOG.md index b77b6b461..10c40f1e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ### Deprecations ### Added +- added new macro `$gensym`, to generate a unique symbol identifier to use in macros ### Changed - all paths inside `if` should return a value, when used as an expression. If an `else` branch is missing, `nil` will be returned diff --git a/docs/arkdoc/Macros.txt b/docs/arkdoc/Macros.txt index baf9cf75f..e54775575 100644 --- a/docs/arkdoc/Macros.txt +++ b/docs/arkdoc/Macros.txt @@ -122,3 +122,14 @@ * (print (one 1 5 6 7 8)) # 5 * =end #-- + +--# +* @name $gensym +* @brief Return a new unique symbol identifier to use in macros +* =begin +* (macro switch (value case then ...cases) { +* (macro var ($gensym)) +* (let var value) +* (_switch_impl var case then ...cases) }) +* =end +#-- diff --git a/include/Ark/Compiler/Macros/Processor.hpp b/include/Ark/Compiler/Macros/Processor.hpp index 9d065a29f..b7138e5b1 100644 --- a/include/Ark/Compiler/Macros/Processor.hpp +++ b/include/Ark/Compiler/Macros/Processor.hpp @@ -61,6 +61,7 @@ namespace Ark::internal std::shared_ptr m_conditional_executor; std::vector> m_executors; std::unordered_map m_defined_functions; + std::size_t m_genned_sym; /** * @brief Return std::nullopt if the function isn't registered, otherwise return its node diff --git a/src/arkreactor/Compiler/Macros/Processor.cpp b/src/arkreactor/Compiler/Macros/Processor.cpp index a6152b6bb..2c2e0314c 100644 --- a/src/arkreactor/Compiler/Macros/Processor.cpp +++ b/src/arkreactor/Compiler/Macros/Processor.cpp @@ -18,7 +18,7 @@ namespace Ark::internal { MacroProcessor::MacroProcessor(const unsigned debug) noexcept : - Pass("MacroProcessor", debug) + Pass("MacroProcessor", debug), m_genned_sym(0) { // create executors pipeline m_conditional_executor = std::make_shared(this); @@ -518,6 +518,12 @@ namespace Ark::internal node.push_back(getListNode()); } } + else if (name == "$gensym") + { + checkMacroArgCountEq(node, 0, "$gensym", true); + node.updateValueAndType(Node(NodeType::Symbol, fmt::format("#gensym-{}", m_genned_sym))); + ++m_genned_sym; + } else if (name == Language::Symcat) { if (node.list().size() <= 2) diff --git a/tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.ark b/tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.ark new file mode 100644 index 000000000..0049f9e94 --- /dev/null +++ b/tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.ark @@ -0,0 +1 @@ +(macro a ($gensym 1)) diff --git a/tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.expected b/tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.expected new file mode 100644 index 000000000..dcace069a --- /dev/null +++ b/tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.expected @@ -0,0 +1,5 @@ +In file tests/unittests/resources/DiagnosticsSuite/compileTime/macro_gensym_too_many_args.ark:1 + 1 | (macro a ($gensym 1)) + | ^~~~~~~~~~~ + 2 | + When expanding `$gensym' inside a macro, got 1 argument, expected 0 From 72184299e5842053b7dd68b4c6f3191c974398e6 Mon Sep 17 00:00:00 2001 From: Lexy Plt Date: Sun, 22 Mar 2026 12:09:27 +0100 Subject: [PATCH 4/5] chore(docs): add documentation for apply --- docs/arkdoc/Builtins.txt | 12 ++++++++++++ lib/std | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/arkdoc/Builtins.txt b/docs/arkdoc/Builtins.txt index 27f046818..3bbc400a0 100644 --- a/docs/arkdoc/Builtins.txt +++ b/docs/arkdoc/Builtins.txt @@ -115,3 +115,15 @@ * (print (hasField closure "B")) # false, field names are case-sensitive * =end #-- + +--# +* @name apply +* @brief Call a function with a list of arguments +* @param f function +* @param args list, can be empty if the function takes no argument +* =begin +* (print (apply + [1 2])) # 3 +* (let foo (fun (a b c) (+ a b c))) +* (print (apply foo [1 2 3])) # 6 +* =end +#-- diff --git a/lib/std b/lib/std index 820dca491..5f9c9be86 160000 --- a/lib/std +++ b/lib/std @@ -1 +1 @@ -Subproject commit 820dca491d37a7a4e730cecbc29c56eb2722aec4 +Subproject commit 5f9c9be867b764fe51aa19deeb99131387c7d796 From e1d7b111de837e94507fe9e9d24891ab87c5815e Mon Sep 17 00:00:00 2001 From: Lexy Plt Date: Sun, 22 Mar 2026 12:53:46 +0100 Subject: [PATCH 5/5] feat(tool): improve ark_frequent_instructions.py script for the CI --- .github/workflows/ci.yml | 5 +---- tools/ark_frequent_instructions.py | 35 +++++++++++++++++------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 801531b78..63f209826 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -272,11 +272,8 @@ jobs: python-version: '3.13' - run: | - KO=0 - python3 tools/ark_frequent_instructions.py super_insts_usage > output.txt || KO=1 - echo "SUPER_INSTS_REPORT_KO=$KO" >> $GITHUB_ENV echo "SUPER_INSTS_REPORT<> $GITHUB_ENV - cat output.txt >> $GITHUB_ENV + python3 tools/ark_frequent_instructions.py super_insts_usage >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV - uses: 8BitJonny/gh-get-current-pr@4.0.0 diff --git a/tools/ark_frequent_instructions.py b/tools/ark_frequent_instructions.py index 452ac05ea..8a40b7d11 100755 --- a/tools/ark_frequent_instructions.py +++ b/tools/ark_frequent_instructions.py @@ -149,26 +149,31 @@ def print_most_freqs(data, max_percent=10): most = sorted(data.items(), key=lambda e: e[1], reverse=True) interesting = most[:(len(most) * max_percent) // 100] if compute_super_insts_usage: + threshold = 10 + over, under = [(x, c) for (x, c) in most if c > threshold], [(x, c) for (x, c) in most if c <= threshold] + + if under: + print(f"Some Super Instructions are under the usage threshold ({threshold}).\n") + print("| Super Instruction | Uses in compiled code |") + print("| ----------------- | --------------------- |") + print("\n".join(f"| {insts} | {count} |" for (insts, count) in under)) + + print("
Super Instructions over the threshold\n") print("| Super Instruction | Uses in compiled code |") print("| ----------------- | --------------------- |") - print("\n".join(f"| {insts} | {count} |" for (insts, count) in interesting)) + print("\n".join(f"| {insts} | {count} |" for (insts, count) in over)) + print("\n
") else: print("\n".join(f"{insts} -> {count}" for (insts, count) in interesting)) - if compute_super_insts_usage: - threshold = 10 - for (inst, count) in most: - if count <= threshold: - sys.exit(1) - - -if not compute_super_insts_usage: - print("Super instructions present:") -print_most_freqs(super_insts_freqs, max_percent=100) if compute_super_insts_usage: - sys.exit(0) + print_most_freqs(super_insts_freqs, max_percent=100) +else: + print("Super instructions present:") + print_most_freqs(super_insts_freqs, max_percent=100) -for i in (2, 3, 4): - print(f"\nPairs of {i}:") - print_most_freqs(frequent[i]) + print("Potential pairs of instructions that could be optimized:") + for i in (2, 3, 4): + print(f"\nPairs of {i}:") + print_most_freqs(frequent[i])