From a70dbe1065f58a23a85e576d19e3434e6dd4bb64 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Thu, 12 Mar 2026 13:27:40 -0400
Subject: [PATCH 1/7] Use Prism for mk_builtin_loader.rb

---
 .github/actions/setup/directories/action.yml |   1 +
 .github/workflows/wasm.yml                   |   1 +
 common.mk                                    |  24 +-
 configure.ac                                 |   6 +
 defs/gmake.mk                                |   4 +
 depend                                       |  24 ++
 prism/templates/src/node.c.erb               |   4 +
 template/Makefile.in                         |   1 +
 tool/dump_ast.c                              |  64 +++
 tool/mk_builtin_loader.rb                    | 390 +++++++++----------
 tool/prereq.status                           |   1 +
 win32/Makefile.sub                           |   2 +
 12 files changed, 310 insertions(+), 212 deletions(-)
 create mode 100644 tool/dump_ast.c

diff --git a/.github/actions/setup/directories/action.yml b/.github/actions/setup/directories/action.yml
index aeaa8d9783ba68..6b569d66ac7c8d 100644
--- a/.github/actions/setup/directories/action.yml
+++ b/.github/actions/setup/directories/action.yml
@@ -145,6 +145,7 @@ runs:
       run: |
         rm -f config.status .rbconfig.time \
           Makefile GNUmakefile uncommon.mk enc.mk noarch-fake.rb
+        rm -f prism/.time prism/util/.time
 
     - if: steps.which.outputs.sudo
       shell: bash
diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml
index 9eb7976a613832..e77336225a628c 100644
--- a/.github/workflows/wasm.yml
+++ b/.github/workflows/wasm.yml
@@ -124,6 +124,7 @@ jobs:
           ../src/configure \
             --host wasm32-unknown-wasi \
             --with-baseruby=$PWD/../baseruby/install/bin/ruby \
+            --with-dump-ast=$PWD/../baseruby/dump_ast \
             --with-static-linked-ext \
             --with-ext=cgi/escape,continuation,coverage,date,digest/bubblebabble,digest,digest/md5,digest/rmd160,digest/sha1,digest/sha2,etc,fcntl,json,json/generator,json/parser,objspace,pathname,rbconfig/sizeof,ripper,stringio,strscan,monitor \
             LDFLAGS=" \
diff --git a/common.mk b/common.mk
index 374b722349c3f1..4b8791efe0d731 100644
--- a/common.mk
+++ b/common.mk
@@ -90,10 +90,8 @@ MAKE_ENC      = -f $(ENC_MK) V="$(V)" UNICODE_HDR_DIR="$(UNICODE_HDR_DIR)" \
 
 PRISM_BUILD_DIR = prism
 
-PRISM_FILES = prism/api_node.$(OBJEXT) \
-		prism/diagnostic.$(OBJEXT) \
+LIBPRISM_OBJS = prism/diagnostic.$(OBJEXT) \
 		prism/encoding.$(OBJEXT) \
-		prism/extension.$(OBJEXT) \
 		prism/node.$(OBJEXT) \
 		prism/options.$(OBJEXT) \
 		prism/prettyprint.$(OBJEXT) \
@@ -112,9 +110,14 @@ PRISM_FILES = prism/api_node.$(OBJEXT) \
 		prism/util/pm_string.$(OBJEXT) \
 		prism/util/pm_strncasecmp.$(OBJEXT) \
 		prism/util/pm_strpbrk.$(OBJEXT) \
-		prism/prism.$(OBJEXT) \
+		prism/prism.$(OBJEXT)
+
+EXTPRISM_OBJS = prism/api_node.$(OBJEXT) \
+		prism/extension.$(OBJEXT) \
 		prism_init.$(OBJEXT)
 
+PRISM_OBJS = $(LIBPRISM_OBJS) $(EXTPRISM_OBJS)
+
 COMMONOBJS    = \
 		array.$(OBJEXT) \
 		ast.$(OBJEXT) \
@@ -192,7 +195,7 @@ COMMONOBJS    = \
 		vm_sync.$(OBJEXT) \
 		vm_trace.$(OBJEXT) \
 		weakmap.$(OBJEXT) \
-		$(PRISM_FILES) \
+		$(PRISM_OBJS) \
 		$(YJIT_OBJ) \
 		$(ZJIT_OBJ) \
 		$(JIT_OBJ) \
@@ -203,7 +206,7 @@ COMMONOBJS    = \
 		$(BUILTIN_TRANSOBJS) \
 		$(MISSING)
 
-$(PRISM_FILES): $(PRISM_BUILD_DIR)/.time $(PRISM_BUILD_DIR)/util/.time
+$(PRISM_OBJS): $(PRISM_BUILD_DIR)/.time $(PRISM_BUILD_DIR)/util/.time
 
 $(PRISM_BUILD_DIR)/.time $(PRISM_BUILD_DIR)/util/.time:
 	$(Q) $(MAKEDIRS) $(@D)
@@ -1292,7 +1295,8 @@ preludes: {$(VPATH)}miniprelude.c
 
 {$(srcdir)}.rb.rbinc:
 	$(ECHO) making $@
-	$(Q) $(BASERUBY) $(tooldir)/mk_builtin_loader.rb $(SRC_FILE)
+	-$(Q) $(MAKE) $(DUMP_AST)
+	$(Q) $(BASERUBY) $(tooldir)/mk_builtin_loader.rb $(DUMP_AST) $(SRC_FILE)
 
 $(BUILTIN_BINARY:yes=built)in_binary.rbbin: $(PREP) $(BUILTIN_RB_SRCS) $(srcdir)/template/builtin_binary.rbbin.tmpl
 	$(Q) $(MINIRUBY) $(tooldir)/generic_erb.rb -o $@ \
@@ -1302,7 +1306,11 @@ $(BUILTIN_BINARY:yes=built)in_binary.rbbin: $(PREP) $(BUILTIN_RB_SRCS) $(srcdir)
 $(BUILTIN_BINARY:no=builtin)_binary.rbbin:
 	$(Q) echo> $@ // empty $(@F)
 
-$(BUILTIN_RB_INCS): $(top_srcdir)/tool/mk_builtin_loader.rb
+$(BUILTIN_RB_INCS): $(tooldir)/mk_builtin_loader.rb
+
+dump_ast$(EXEEXT): $(tooldir)/dump_ast.c $(LIBPRISM_OBJS)
+	$(ECHO) compiling $@
+	$(Q) $(CC) $(CFLAGS) $(OUTFLAG)$@ $(INCFLAGS) $(tooldir)/dump_ast.c $(LIBPRISM_OBJS)
 
 $(srcdir)/revision.h$(no_baseruby:no=~disabled~): $(REVISION_H)
 
diff --git a/configure.ac b/configure.ac
index 1b59942463bad6..1dd17e06528518 100644
--- a/configure.ac
+++ b/configure.ac
@@ -111,6 +111,12 @@ AS_IF([test "$HAVE_BASERUBY" = no], [
 AC_SUBST(BASERUBY)
 AC_SUBST(HAVE_BASERUBY)
 
+AC_ARG_WITH(dump-ast,
+	AS_HELP_STRING([--with-dump-ast=DUMP_AST], [use DUMP_AST as dump_ast; for cross-compiling with a host-built dump_ast]),
+	[DUMP_AST=$withval],
+	[DUMP_AST='./dump_ast$(EXEEXT)'])
+AC_SUBST(DUMP_AST)
+
 : ${GIT=git}
 HAVE_GIT=yes
 AC_ARG_WITH(git,
diff --git a/defs/gmake.mk b/defs/gmake.mk
index 718131e937a4ae..2131d24631f714 100644
--- a/defs/gmake.mk
+++ b/defs/gmake.mk
@@ -413,6 +413,10 @@ ifneq ($(DOT_WAIT),)
 up:: $(DOT_WAIT) after-update
 endif
 
+ifneq ($(CC),false)
+$(BUILTIN_RB_INCS): $(DUMP_AST)
+endif
+
 ifneq ($(filter update-bundled_gems refresh-gems,$(MAKECMDGOALS)),)
 update-gems: update-bundled_gems
 endif
diff --git a/depend b/depend
index f1e2433346826d..3d050525fb655a 100644
--- a/depend
+++ b/depend
@@ -3639,6 +3639,30 @@ dmydln.$(OBJEXT): {$(VPATH)}st.h
 dmydln.$(OBJEXT): {$(VPATH)}subst.h
 dmyenc.$(OBJEXT): {$(VPATH)}dmyenc.c
 dmyext.$(OBJEXT): {$(VPATH)}dmyext.c
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/ast.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/defines.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/diagnostic.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/encoding.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/node.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/options.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/parser.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/prettyprint.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/prism.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/regexp.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/static_literals.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_arena.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_buffer.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_char.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_constant_pool.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_integer.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_line_offset_list.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_list.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_memchr.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_string.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_strncasecmp.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_strpbrk.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/version.h
+dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/tool/dump_ast.c
 enc/ascii.$(OBJEXT): $(hdrdir)/ruby/ruby.h
 enc/ascii.$(OBJEXT): {$(VPATH)}assert.h
 enc/ascii.$(OBJEXT): {$(VPATH)}backward/2/assume.h
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
index 5806742612066c..df59545129afba 100644
--- a/prism/templates/src/node.c.erb
+++ b/prism/templates/src/node.c.erb
@@ -173,7 +173,11 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no
 
             // Dump the <%= field.name %> field
             pm_buffer_append_byte(buffer, ',');
+            <%- if field.is_a?(Prism::Template::Flags) -%>
+            pm_buffer_append_string(buffer, "\"flags\":", 8);
+            <%- else -%>
             pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
+            <%- end -%>
             <%- case field -%>
             <%- when Prism::Template::NodeField -%>
             pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
diff --git a/template/Makefile.in b/template/Makefile.in
index 8e93efc310cd97..3226daa7917b80 100644
--- a/template/Makefile.in
+++ b/template/Makefile.in
@@ -37,6 +37,7 @@ CONFIGURE = @CONFIGURE@
 MKFILES = @MAKEFILES@
 BASERUBY = @BASERUBY@
 HAVE_BASERUBY = @HAVE_BASERUBY@
+DUMP_AST = @DUMP_AST@
 TEST_RUNNABLE = @TEST_RUNNABLE@
 CROSS_COMPILING = @CROSS_COMPILING@
 DOXYGEN = @DOXYGEN@
diff --git a/tool/dump_ast.c b/tool/dump_ast.c
new file mode 100644
index 00000000000000..593ecce8c44215
--- /dev/null
+++ b/tool/dump_ast.c
@@ -0,0 +1,64 @@
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * When prism is compiled as part of CRuby, the xmalloc/xfree/etc. macros are
+ * redirected to ruby_xmalloc/ruby_xfree/etc. Since this is a standalone
+ * program that links against those same object files, we need to provide
+ * implementations of these functions.
+ */
+void *ruby_xmalloc(size_t size) { return malloc(size); }
+void *ruby_xcalloc(size_t nelems, size_t elemsiz) { return calloc(nelems, elemsiz); }
+void *ruby_xrealloc(void *ptr, size_t newsiz) { return realloc(ptr, newsiz); }
+void ruby_xfree(void *ptr) { free(ptr); }
+
+#include "prism.h"
+
+int
+main(int argc, const char *argv[]) {
+    if (argc != 2) {
+        fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
+        return EXIT_FAILURE;
+    }
+
+    const char *filepath = argv[1];
+    pm_string_t input;
+
+    if (pm_string_mapped_init(&input, filepath) != PM_STRING_INIT_SUCCESS) {
+        fprintf(stderr, "unable to map file: %s\n", filepath);
+        return EXIT_FAILURE;
+    }
+
+    pm_options_t options = { 0 };
+    pm_options_line_set(&options, 1);
+    pm_options_filepath_set(&options, filepath);
+
+    pm_arena_t arena = { 0 };
+    pm_parser_t parser;
+    pm_parser_init(&arena, &parser, pm_string_source(&input), pm_string_length(&input), &options);
+
+    pm_node_t *node = pm_parse(&parser);
+    int exit_status;
+
+    if (parser.error_list.size > 0) {
+        fprintf(stderr, "error parsing %s\n", filepath);
+        for (const pm_diagnostic_t *diagnostic = (const pm_diagnostic_t *) parser.error_list.head; diagnostic != NULL; diagnostic = (const pm_diagnostic_t *) diagnostic->node.next) {
+            const pm_line_column_t line_column = pm_line_offset_list_line_column(&parser.line_offsets, diagnostic->location.start, parser.start_line);
+            fprintf(stderr, "%" PRIi32 ":%" PRIu32 ":%s\n", line_column.line, line_column.column, diagnostic->message);
+        }
+        exit_status = EXIT_FAILURE;
+    } else {
+        pm_buffer_t json = { 0 };
+        pm_dump_json(&json, &parser, node);
+        printf("%.*s\n", (int) pm_buffer_length(&json), pm_buffer_value(&json));
+        pm_buffer_free(&json);
+        exit_status = EXIT_SUCCESS;
+    }
+
+    pm_parser_free(&parser);
+    pm_arena_free(&arena);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return exit_status;
+}
diff --git a/tool/mk_builtin_loader.rb b/tool/mk_builtin_loader.rb
index 3ab36ec45c323d..2965c651e9834b 100644
--- a/tool/mk_builtin_loader.rb
+++ b/tool/mk_builtin_loader.rb
@@ -1,6 +1,7 @@
 # Parse built-in script and make rbinc file
 
-require 'ripper'
+require 'json'
+require 'open3'
 require 'stringio'
 require_relative 'ruby_vm/helpers/c_escape'
 
@@ -24,231 +25,204 @@ def self.reset
 
 Warning.extend CompileWarning
 
-def string_literal(lit, str = [])
-  while lit
-    case lit.first
-    when :string_concat, :string_embexpr, :string_content
-      _, *lit = lit
-      lit.each {|s| string_literal(s, str)}
-      return str
-    when :string_literal
-      _, lit = lit
-    when :@tstring_content
-      str << lit[1]
-      return str
-    else
-      raise "unexpected #{lit.first}"
-    end
-  end
-end
+# ruby mk_builtin_loader.rb path/to/dump_ast TARGET_FILE.rb
+# #=> generate TARGET_FILE.rbinc
+#
+# dump_ast is a standalone C program (tool/dump_ast.c) that parses Ruby files
+# with prism and dumps the AST as JSON. It must be compiled with CC before this
+# script can run, which means rbinc generation is skipped during `make up`
+# (where CC=false). The rbinc files are gitignored build artifacts, so they do
+# not need to be present in srcdir after `make up` — they will be generated in
+# the build directory during `make all` once dump_ast has been compiled.
+
+LOCALS_DB = {} # [method_name, first_line] = locals
 
-# e.g. [:symbol_literal, [:symbol, [:@ident, "inline", [19, 21]]]]
-def symbol_literal(lit)
-  symbol_literal, symbol_lit = lit
-  raise "#{lit.inspect} was not :symbol_literal" if symbol_literal != :symbol_literal
-  symbol, ident_lit = symbol_lit
-  raise "#{symbol_lit.inspect} was not :symbol" if symbol != :symbol
-  ident, symbol_name, = ident_lit
-  raise "#{ident.inspect} was not :@ident" if ident != :@ident
-  symbol_name
+# Extract the contents of the given string node.
+def extract_string_literal(node)
+  case node["type"]
+  when "StringNode"
+    node["unescaped"]
+  when "InterpolatedStringNode"
+    node["parts"].map { |part| extract_string_literal(part) }.join
+  else
+    raise "unexpected #{node["type"]}"
+  end
 end
 
-def inline_text argc, arg1
-  raise "argc (#{argc}) of inline! should be 1" unless argc == 1
-  arg1 = string_literal(arg1)
-  raise "1st argument should be string literal" unless arg1
-  arg1.join("").rstrip
+# Retrieve the line number of the given node in the source.
+def line_number(source, node)
+  source.b.byteslice(0, node["location"]["start"]).count("\n") + 1
 end
 
-def inline_attrs(args)
-  raise "args was empty" if args.empty?
-  args.each do |arg|
-    attr = symbol_literal(arg)
-    unless BUILTIN_ATTRS.include?(attr)
-      raise "attr (#{attr}) was not in: #{BUILTIN_ATTRS.join(', ')}"
-    end
+def visit_call_node(source, node, name, locals, requires, bs, inlines)
+  # If this is a call to require or require relative with a single string node
+  # argument, then we will attempt to find the file that is being required and
+  # add it to the files that should be processed.
+  if %w[require require_relative].include?(node["name"]) && !node["arguments"].nil? && (argument = node["arguments"]["arguments"][0])["type"] == "StringNode"
+    requires << argument["unescaped"]
+    return true
   end
-end
 
-def make_cfunc_name inlines, name, lineno
-  case name
-  when /\[\]/
-    name = '_GETTER'
-  when /\[\]=/
-    name = '_SETTER'
+  primitive_name = nil
+
+  receiver = node["receiver"]
+
+  if (!receiver.nil? && receiver["type"] == "ConstantReadNode" && receiver["name"] == "Primitive") ||
+     (!receiver.nil? && receiver["type"] == "CallNode" && receiver["flags"].include?("VARIABLE_CALL") && receiver["name"] == "__builtin")
+    primitive_name = node["name"]
+  elsif node["name"].start_with?("__builtin_")
+    primitive_name = node["name"][10..-1]
   else
-    name = name.tr('!?', 'EP')
+    # If we get here, then this isn't a primitive function call and we can
+    # continue the visit.
+    return true
   end
 
-  base = "builtin_inline_#{name}_#{lineno}"
-  if inlines[base]
-    1000.times{|i|
-      name = "#{base}_#{i}"
-      return name unless inlines[name]
-    }
-    raise "too many functions in same line..."
-  else
-    base
+  # The name of the C function that we will be calling for this call node. It
+  # may change later in this method depending on the type of primitive.
+  cfunction_name = primitive_name
+
+  args = node["arguments"].nil? ? [] : node["arguments"]["arguments"]
+  argc = args.size
+
+  if primitive_name.match?(/[\!\?]$/)
+    case (primitive_macro = primitive_name[0...-1])
+    when "arg"
+      # This is a call to Primitive.arg!, which expects a single symbol argument
+      # detailing the name of the argument.
+      raise "unexpected argument number #{argc}" if argc != 1
+      raise "symbol literal expected, got #{args[0]["type"]}" if args[0]["type"] != "SymbolNode"
+      return true
+    when "attr"
+      # This is a call to Primitive.attr!, which expects a list of known
+      # symbols. We will check that each of the arguments is a symbol and that
+      # the symbol is one of the known symbols.
+      raise "args was empty" if argc == 0
+
+      args.each do |arg|
+        raise "#{arg["type"]} was not a SymbolNode" if arg["type"] != "SymbolNode"
+        raise "attr (#{arg["unescaped"]}) was not in: leaf, inline_block, use_block" unless BUILTIN_ATTRS.include?(arg["unescaped"])
+      end
+
+      return true
+    when "mandatory_only"
+      # This is a call to Primitive.mandatory_only?. This method does not
+      # require any further processing.
+      return true
+    when "cstmt", "cexpr", "cconst", "cinit"
+      # This is a call to Primitive.cstmt!, Primitive.cexpr!, Primitive.cconst!,
+      # or Primitive.cinit!. These methods expect a single string argument that
+      # is the C code that should be executed. We will extract the string, emit
+      # an inline function, and then continue the visit.
+      raise "argc (#{argc}) of inline! should be 1" if argc != 1
+
+      text = extract_string_literal(args[0]).rstrip
+      lineno = line_number(source, node)
+
+      case primitive_macro
+      when "cstmt", "cexpr", "cconst"
+        cfunction_name = "builtin_inline_#{name}_#{lineno}"
+        primitive_name = "_bi#{lineno}"
+
+        if primitive_macro == "cstmt"
+          inlines << [cfunction_name, lineno, text, locals, primitive_name]
+        else
+          inlines << [cfunction_name, lineno, "return #{text};", primitive_macro == "cexpr" ? locals : nil, primitive_name]
+        end
+      when "cinit"
+        inlines << [inlines.size, lineno, text, nil, nil]
+        return true
+      end
+
+      argc -= 1
+    else
+      # This is a call to Primitive that is not a known method, so it must be a
+      # regular C function. In this case we do not need any special processing.
+    end
   end
+
+  bs << [primitive_name, argc, cfunction_name]
+  return true
 end
 
-def collect_locals tree
-  _type, name, (line, _cols) = tree
-  if locals = LOCALS_DB[[name, line]]
-    locals
-  else
-    if false # for debugging
-      pp LOCALS_DB
-      raise "not found: [#{name}, #{line}]"
+def each_node(root, &blk)
+  return unless yield root
+
+  root.each do |key, value|
+    next if key == "type" || key == "location"
+
+    if value.is_a?(Hash)
+      each_node(value, &blk) if value.key?("type")
+    elsif value.is_a?(Array) && value[0].is_a?(Hash)
+      value.each { |node| each_node(node, &blk) }
     end
   end
 end
 
-def collect_builtin base, tree, name, bs, inlines, locals = nil
-  while tree
-    recv = sep = mid = args = nil
-    case tree.first
-    when :def
-      locals = collect_locals(tree[1])
-      tree = tree[3]
-      next
-    when :defs
-      locals = collect_locals(tree[3])
-      tree = tree[5]
-      next
-    when :class
-      name = 'class'
-      tree = tree[3]
-      next
-    when :sclass, :module
-      name = 'class'
-      tree = tree[2]
-      next
-    when :method_add_arg
-      _method_add_arg, mid, (_arg_paren, args) = tree
-      case mid.first
-      when :call
-        _, recv, sep, mid = mid
-      when :fcall
-        _, mid = mid
-      else
-        mid = nil
-      end
-      # w/  trailing comma: [[:method_add_arg, ...]]
-      # w/o trailing comma: [:args_add_block, [[:method_add_arg, ...]], false]
-      if args && args.first == :args_add_block
-        args = args[1]
-      end
-    when :vcall
-      _, mid = tree
-    when :command               # FCALL
-      _, mid, (_, args) = tree
-    when :call, :command_call   # CALL
-      _, recv, sep, mid, (_, args) = tree
+def visit_node(source, root, name, locals, requires, bs, inlines)
+  each_node(root) do |node|
+    case node["type"]
+    when "CallNode"
+      visit_call_node(source, node, name, locals, requires, bs, inlines)
+    when "DefNode"
+      lineno = line_number(source, node)
+      visit_node(source, node["body"], name, LOCALS_DB[[node["name"], lineno]], requires, bs, inlines) if node["body"]
+      false
+    when "ClassNode", "ModuleNode", "SingletonClassNode"
+      visit_node(source, node["body"], "class", nil, requires, bs, inlines) if node["body"]
+      false
+    else
+      true
     end
+  end
+end
 
-    if mid
-      raise "unknown sexp: #{mid.inspect}" unless %i[@ident @const].include?(mid.first)
-      _, mid, (lineno,) = mid
-      if recv
-        func_name = nil
-        case recv.first
-        when :var_ref
-          _, recv = recv
-          if recv.first == :@const and recv[1] == "Primitive"
-            func_name = mid.to_s
-          end
-        when :vcall
-          _, recv = recv
-          if recv.first == :@ident and recv[1] == "__builtin"
-            func_name = mid.to_s
-          end
-        end
-        collect_builtin(base, recv, name, bs, inlines) unless func_name
-      else
-        func_name = mid[/\A__builtin_(.+)/, 1]
-      end
-      if func_name
-        cfunc_name = func_name
-        args.pop unless (args ||= []).last
-        argc = args.size
-
-        if /(.+)[\!\?]\z/ =~ func_name
-          case $1
-          when 'attr'
-            # Compile-time validation only. compile.c will parse them.
-            inline_attrs(args)
-            break
-          when 'cstmt'
-            text = inline_text argc, args.first
-
-            func_name = "_bi#{lineno}"
-            cfunc_name = make_cfunc_name(inlines, name, lineno)
-            inlines[cfunc_name] = [lineno, text, locals, func_name]
-            argc -= 1
-          when 'cexpr', 'cconst'
-            text = inline_text argc, args.first
-            code = "return #{text};"
-
-            func_name = "_bi#{lineno}"
-            cfunc_name = make_cfunc_name(inlines, name, lineno)
-
-            locals = [] if $1 == 'cconst'
-            inlines[cfunc_name] = [lineno, code, locals, func_name]
-            argc -= 1
-          when 'cinit'
-            text = inline_text argc, args.first
-            func_name = nil # required
-            inlines[inlines.size] = [lineno, text, nil, nil]
-            argc -= 1
-          when 'mandatory_only'
-            func_name = nil
-          when 'arg'
-            argc == 1 or raise "unexpected argument number #{argc}"
-            (arg = args.first)[0] == :symbol_literal or raise "symbol literal expected #{args}"
-            (arg = arg[1])[0] == :symbol or raise "symbol expected #{arg}"
-            (var = arg[1] and var = var[1]) or raise "argument name expected #{arg}"
-            func_name = nil
-          end
-        end
+def collect_builtins(dump_ast, file)
+  stdout, stderr, status = Open3.capture3(dump_ast, file)
+  unless status.success?
+    warn(stderr)
+    exit(1)
+  end
 
-        if bs[func_name] &&
-           bs[func_name] != [argc, cfunc_name]
-          raise "same builtin function \"#{func_name}\", but different arity (was #{bs[func_name]} but #{argc})"
-        end
+  source = File.read(file)
+  root = JSON.parse(stdout)
+  visit_node(source, root, "top", nil, requires = [], builtins = [], inlines = [])
 
-        bs[func_name] = [argc, cfunc_name] if func_name
-      elsif /\Arequire(?:_relative)\z/ =~ mid and args.size == 1 and
-           (arg1 = args[0])[0] == :string_literal and
-           (arg1 = arg1[1])[0] == :string_content and
-           (arg1 = arg1[1])[0] == :@tstring_content and
-           sublib = arg1[1]
-        if File.exist?(f = File.join(@dir, sublib)+".rb")
-          puts "- #{@base}.rb requires #{sublib}"
-          if REQUIRED[sublib]
-            warn "!!! #{sublib} is required from #{REQUIRED[sublib]} already; ignored"
-          else
-            REQUIRED[sublib] = @base
-            (SUBLIBS[@base] ||= []) << sublib
-          end
-          ARGV.push(f)
-        end
+  requires.each do |sublib|
+    if File.exist?(f = File.join(@dir, sublib)+".rb")
+      puts "- #{@base}.rb requires #{sublib}"
+      if REQUIRED[sublib]
+        warn "!!! #{sublib} is required from #{REQUIRED[sublib]} already; ignored"
+      else
+        REQUIRED[sublib] = @base
+        (SUBLIBS[@base] ||= []) << sublib
       end
-      break unless tree = args
+      ARGV.push(f)
     end
+  end
 
-    tree.each do |t|
-      collect_builtin base, t, name, bs, inlines, locals if Array === t
+  processed_builtins = {}
+  builtins.each do |(primitive_name, argc, cfunction_name)|
+    if processed_builtins.key?(primitive_name) && processed_builtins[primitive_name] != [argc, cfunction_name]
+      raise "same builtin function \"#{primitive_name}\", but different arity (was #{processed_builtins[primitive_name]} but #{argc})"
     end
-    break
+
+    processed_builtins[primitive_name] = [argc, cfunction_name]
   end
-end
 
-# ruby mk_builtin_loader.rb TARGET_FILE.rb
-# #=> generate TARGET_FILE.rbinc
-#
+  processed_inlines = {}
+  inlines.each do |(cfunction_name, lineno, text, locals, primitive_name)|
+    if processed_inlines.key?(cfunction_name)
+      found = 1000.times.find { |i| !processed_inlines.key?("#{cfunction_name}_#{i}") }
+      raise "too many functions in same line..." unless found
+      cfunction_name = "#{cfunction_name}_#{found}"
+    end
 
-LOCALS_DB = {} # [method_name, first_line] = locals
+    processed_inlines[cfunction_name] = [lineno, text, locals, primitive_name]
+  end
+
+  [processed_builtins, processed_inlines]
+end
 
 def collect_iseq iseq_ary
   # iseq_ary.each_with_index{|e, i| p [i, e]}
@@ -313,24 +287,24 @@ def generate_cexpr(ofile, lineno, line_file, body_lineno, text, locals, func_nam
   return lineno, f.string
 end
 
-def mk_builtin_header file
+def mk_builtin_header dump_ast, file
   @dir = File.dirname(file)
   base = File.basename(file, '.rb')
   @base = base
   ofile = "#{file}inc"
 
-  # bs = { func_name => argc }
-  code = File.read(file)
   begin
     verbose, $VERBOSE = $VERBOSE, true
-    collect_iseq RubyVM::InstructionSequence.compile(code, base).to_a
+    collect_iseq RubyVM::InstructionSequence.compile_file(file).to_a
   ensure
     $VERBOSE = verbose
   end
   if warnings = CompileWarning.reset
     raise "#{warnings} warnings in #{file}"
   end
-  collect_builtin(base, Ripper.sexp(code), 'top', bs = {}, inlines = {})
+
+  # bs = { func_name => argc }
+  bs, inlines = collect_builtins(dump_ast, file)
 
   StringIO.open do |f|
     if File::ALT_SEPARATOR
@@ -423,7 +397,15 @@ def mk_builtin_header file
   end
 end
 
+dump_ast = ARGV.shift
+if !File.executable?(dump_ast)
+  # dump_ast may not be available during `make up` (CC=false). In that case,
+  # silently skip rbinc generation — the files will be generated during the
+  # actual build when CC is available and dump_ast has been compiled.
+  exit
+end
+
 ARGV.each{|file|
   # feature.rb => load_feature.inc
-  mk_builtin_header file
+  mk_builtin_header dump_ast, file
 }
diff --git a/tool/prereq.status b/tool/prereq.status
index 6aca615e90ba4d..78b5c2228bf5a9 100644
--- a/tool/prereq.status
+++ b/tool/prereq.status
@@ -14,6 +14,7 @@ s,@CPPFLAGS@,,g
 s,@CXXFLAGS@,,g
 s,@DLDFLAGS@,,g
 s,@DTRACE_EXT@,dmyh,g
+s,@DUMP_AST@,./dump_ast,g
 s,@EXEEXT@,,g
 s,@HAVE_BASERUBY@,yes,g
 s,@IFCHANGE@,tool/ifchange,g
diff --git a/win32/Makefile.sub b/win32/Makefile.sub
index 1115dd60c0d99f..d3c8475fdbaab3 100644
--- a/win32/Makefile.sub
+++ b/win32/Makefile.sub
@@ -558,6 +558,8 @@ ACTIONS_ENDGROUP = @::
 
 ABI_VERSION_HDR = $(hdrdir)/ruby/internal/abi.h
 
+DUMP_AST = dump_ast$(EXEEXT)
+
 !include $(srcdir)/common.mk
 
 !ifdef SCRIPTPROGRAMS

From c0e41097b0c815049e6290e0a3b212b829292bad Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Thu, 12 Mar 2026 11:38:40 -0400
Subject: [PATCH 2/7] Speed up memmem on Apple

Apple's libc implementation of memmem is super slow (it is a forked
version of freebsd's that never got vectorized). Instead, we should
fall back to the rolling hash on Apple. In the attached benchmark,
I'm seeing 1.07% slower to 30.34% slower, depending on the
haystack.

For reference, here are the various implementations I checked:

* musl: https://git.musl-libc.org/cgit/musl/tree/src/string/memmem.c
* freebsd: https://github.com/freebsd/freebsd-src/blob/main/lib/libc/string/memmem.c
* apple: https://github.com/apple-oss-distributions/Libc/blob/main/string/FreeBSD/memmem.c

You can see Apple just linearly searches through the string and
calls memcmp each time, whereas the other two do a window'd rolling
hash similar to the fallback Ruby already has.
---
 benchmark/string_memsearch.yml | 75 ++++++++++++++++++++++++++++++++++
 re.c                           |  2 +-
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 benchmark/string_memsearch.yml

diff --git a/benchmark/string_memsearch.yml b/benchmark/string_memsearch.yml
new file mode 100644
index 00000000000000..cde363289a5187
--- /dev/null
+++ b/benchmark/string_memsearch.yml
@@ -0,0 +1,75 @@
+prelude: |
+  # Haystacks of various sizes
+  small_hay  = "a" * 256
+  medium_hay = "a" * 4096
+  large_hay  = "a" * 65536
+
+  # Short needles (2-8 bytes) that exercise rb_memsearch_ss
+  needle_2 = "xy"
+  needle_4 = "xyzw"
+  needle_8 = "xyzwabcd"
+
+  # Needle whose first byte is absent from the haystack (memchr fast-path)
+  # vs needle whose first byte is common (rolling hash comparison)
+  first_byte_absent  = "x" + "a" * 3
+  first_byte_common  = "a" + "x" * 3
+
+  # Haystack with match at the end
+  hay_match_end = "a" * 4095 + "xy"
+
+  # Haystack with match at the start
+  hay_match_start = "xy" + "a" * 4094
+
+  # Mixed content haystack (more realistic)
+  mixed_hay = (("abcdefghij" * 100) + "z") * 10
+
+benchmark:
+  # === First byte absent from haystack (biggest win for rolling hash) ===
+  index_first_byte_absent_small: |
+    small_hay.index(first_byte_absent)
+  index_first_byte_absent_medium: |
+    medium_hay.index(first_byte_absent)
+  index_first_byte_absent_large: |
+    large_hay.index(first_byte_absent)
+
+  # === First byte common in haystack (stresses comparison loop) ===
+  index_first_byte_common_small: |
+    small_hay.index(first_byte_common)
+  index_first_byte_common_medium: |
+    medium_hay.index(first_byte_common)
+  index_first_byte_common_large: |
+    large_hay.index(first_byte_common)
+
+  # === Needle length variations (all absent) ===
+  index_needle_2_absent: |
+    medium_hay.index(needle_2)
+  index_needle_4_absent: |
+    medium_hay.index(needle_4)
+  index_needle_8_absent: |
+    medium_hay.index(needle_8)
+
+  # === Match at end of haystack ===
+  index_match_at_end: |
+    hay_match_end.index(needle_2)
+
+  # === Match at start of haystack ===
+  index_match_at_start: |
+    hay_match_start.index(needle_2)
+
+  # === include? (same code path) ===
+  include_first_byte_absent: |
+    medium_hay.include?(first_byte_absent)
+  include_first_byte_common: |
+    medium_hay.include?(first_byte_common)
+
+  # === byteindex ===
+  byteindex_first_byte_absent: |
+    medium_hay.byteindex(first_byte_absent)
+  byteindex_first_byte_common: |
+    medium_hay.byteindex(first_byte_common)
+
+  # === Mixed/realistic haystack ===
+  index_mixed_absent: |
+    mixed_hay.index(needle_4)
+  index_mixed_present: |
+    mixed_hay.index("ijab")
diff --git a/re.c b/re.c
index 0e169694d4e536..9d50ae2d7e882a 100644
--- a/re.c
+++ b/re.c
@@ -106,7 +106,7 @@ rb_memcicmp(const void *x, const void *y, long len)
     return 0;
 }
 
-#ifdef HAVE_MEMMEM
+#if defined(HAVE_MEMMEM) && !defined(__APPLE__)
 static inline long
 rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
 {

From 30ec9c089e322a05f89bf5fc3830b6ce0b4ab45c Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Wed, 11 Mar 2026 07:47:16 -0400
Subject: [PATCH 3/7] Fix use-after-poison in compile.c and prism_compile.c

Prevent GC from accidentally collecting
---
 compile.c       | 1 +
 prism_compile.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/compile.c b/compile.c
index ecc19f6b651d72..1cd1e8e492f9e1 100644
--- a/compile.c
+++ b/compile.c
@@ -5375,6 +5375,7 @@ compile_hash(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int meth
                 }
                 VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary) / 2);
                 rb_hash_bulk_insert(RARRAY_LEN(ary), RARRAY_CONST_PTR(ary), hash);
+                RB_GC_GUARD(ary);
                 hash = RB_OBJ_SET_FROZEN_SHAREABLE(rb_obj_hide(hash));
 
                 /* Emit optimized code */
diff --git a/prism_compile.c b/prism_compile.c
index 6bc1da58d0bab0..3fa24029412308 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -863,6 +863,7 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, pm_scope_node_t
 
         VALUE value = rb_hash_new_with_size(elements->size);
         rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), value);
+        RB_GC_GUARD(array);
 
         value = rb_obj_hide(value);
         RB_OBJ_SET_FROZEN_SHAREABLE(value);
@@ -1533,6 +1534,7 @@ pm_compile_hash_elements(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l
 
                     VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary) / 2);
                     rb_hash_bulk_insert(RARRAY_LEN(ary), RARRAY_CONST_PTR(ary), hash);
+                    RB_GC_GUARD(ary);
                     hash = rb_obj_hide(hash);
                     RB_OBJ_SET_FROZEN_SHAREABLE(hash);
 

From 9246b770cd2cfd93fb1849f8b413d159d1fd256b Mon Sep 17 00:00:00 2001
From: Luke Gruber <luke.gruber@shopify.com>
Date: Fri, 13 Mar 2026 09:02:35 -0400
Subject: [PATCH 4/7] Bump timeout for
 TestGC#test_finalizer_not_run_with_vm_lock

This test should be redesigned, but increasing the timeout
should probably be good enough for now to not see CI failures related
to it. I can see how this could timeout after 10s.

Failing test after 10s timeout:
https://ci.rvm.jp/results/trunk_gcc10@ruby-sp2-noble-docker/6247393

According to the stack trace at time of abort, there was no deadlock and
it was in the middle of a GC. Everything looks fine. This is assuming
the fatal signal came from `EnvUtil.terminate`.
---
 test/ruby/test_gc.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb
index 627b3227ee872a..dc26efada0a058 100644
--- a/test/ruby/test_gc.rb
+++ b/test/ruby/test_gc.rb
@@ -897,7 +897,7 @@ def test_old_to_young_reference
   end
 
   def test_finalizer_not_run_with_vm_lock
-    assert_ractor(<<~'RUBY')
+    assert_ractor(<<~'RUBY', timeout: 30)
       Thread.new do
         loop do
           Encoding.list.each do |enc|

From 17747554a64559565518495ba7544c114b74869c Mon Sep 17 00:00:00 2001
From: Benoit Daloze <eregontp@gmail.com>
Date: Tue, 10 Mar 2026 15:21:14 +0100
Subject: [PATCH 5/7] [ruby/prism] Make it possible to lazily deserialize
 DefNode in Loader.java

* TRUFFLERUBY_METRICS_REPS=5 jt metrics time --experimental-options -e0
  For parsing-core:
  before: 0.097 0.099 0.092 0.096
  after:  0.061 0.063 0.066 0.059
* Remove extra trailing spaces by using `<%-#`.

https://github.com/ruby/prism/commit/e08b47e26c
---
 prism/templates/src/serialize.c.erb | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index 1f90a2160ea4d2..78e4f348932f45 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -50,8 +50,6 @@ static void
 pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
     pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
 
-    size_t offset = buffer->length;
-
     <%- if Prism::Template::INCLUDE_NODE_ID -%>
     pm_buffer_append_varuint(buffer, node->node_id);
     <%- end -%>
@@ -126,7 +124,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
             <%- end -%>
             <%- if node.needs_serialized_length? -%>
             // serialize length
-            uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
+            uint32_t length = pm_sizet_to_u32(buffer->length - length_offset);
             memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
             <%- end -%>
             break;

From fc16d959d01bae0694b863d4cfa7802130b875fb Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Fri, 13 Mar 2026 11:29:29 -0400
Subject: [PATCH 6/7] [ruby/prism] Expose parse options to Rust

https://github.com/ruby/prism/commit/0f1500ce92
---
 prism/util/pm_string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
index d8456ff2947eb8..76942180b6eecb 100644
--- a/prism/util/pm_string.h
+++ b/prism/util/pm_string.h
@@ -94,7 +94,7 @@ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
  * @param source The source of the string.
  * @param length The length of the string.
  */
-void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
+PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
 
 /**
  * Represents the result of calling pm_string_mapped_init or

From ee275b41215315236158827baaa3cc042865ce43 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Fri, 13 Mar 2026 12:41:52 -0400
Subject: [PATCH 7/7] Git ignore dump_ast if you are doing an in-place build

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 7bf0420d87b744..0671971dbc0217 100644
--- a/.gitignore
+++ b/.gitignore
@@ -273,6 +273,7 @@ lcov*.info
 /prism/serialize.c
 /prism/token_type.c
 /prism/srcs.mk
+/dump_ast
 
 # tool/update-NEWS-gemlist.rb
 /bundled_gems.json